summaryrefslogtreecommitdiffstats
path: root/man2
diff options
context:
space:
mode:
authorAlejandro Colomar <alx@kernel.org>2024-04-26 15:06:49 +0200
committerAlejandro Colomar <alx@kernel.org>2024-05-02 01:24:19 +0200
commitdcde2f70372b49ec43efc5db864c9ff585d0a2dd (patch)
tree78b9b7425130e4a5858e4c01a524d802423879ed /man2
parent12aca537ce78a41bbcdaf485209691e10f8002d7 (diff)
man/, share/mk/: Move man*/ to man/
This is a scripted change: $ mkdir man/; $ mv man* man/; $ ln -st . man/man*; $ find share/mk/ -type f \ | xargs grep -l '^MANDIR *:=' \ | xargs sed -i '/^MANDIR *:=/s,$,/man,'; $ find share/mk/dist/ -type f \ | xargs grep -l man \ | xargs sed -i 's,man%,man/%,g'; Link: <https://lore.kernel.org/linux-man/YxcV4h+Xn7cd6+q2@pevik/T/> Cc: Petr Vorel <pvorel@suse.cz> Cc: Jakub Wilk <jwilk@jwilk.net> Cc: Stefan Puiu <stefan.puiu@gmail.com> Signed-off-by: Alejandro Colomar <alx@kernel.org>
Diffstat (limited to 'man2')
l---------man21
-rw-r--r--man2/_Exit.21
-rw-r--r--man2/__clone2.21
-rw-r--r--man2/_exit.2138
-rw-r--r--man2/_llseek.21
-rw-r--r--man2/_newselect.21
-rw-r--r--man2/_syscall.2171
-rw-r--r--man2/_sysctl.21
-rw-r--r--man2/accept.2349
-rw-r--r--man2/accept4.21
-rw-r--r--man2/access.2468
-rw-r--r--man2/acct.2136
-rw-r--r--man2/add_key.2298
-rw-r--r--man2/adjtimex.2595
-rw-r--r--man2/afs_syscall.21
-rw-r--r--man2/alarm.281
-rw-r--r--man2/alloc_hugepages.2135
-rw-r--r--man2/arch_prctl.2176
-rw-r--r--man2/arm_fadvise.21
-rw-r--r--man2/arm_fadvise64_64.21
-rw-r--r--man2/arm_sync_file_range.21
-rw-r--r--man2/bdflush.2103
-rw-r--r--man2/bind.2286
-rw-r--r--man2/bpf.21273
-rw-r--r--man2/break.21
-rw-r--r--man2/brk.2153
-rw-r--r--man2/cacheflush.2143
-rw-r--r--man2/capget.2260
-rw-r--r--man2/capset.21
-rw-r--r--man2/chdir.2127
-rw-r--r--man2/chmod.2347
-rw-r--r--man2/chown.2471
-rw-r--r--man2/chown32.21
-rw-r--r--man2/chroot.2166
-rw-r--r--man2/clock_adjtime.21
-rw-r--r--man2/clock_getres.2539
-rw-r--r--man2/clock_gettime.21
-rw-r--r--man2/clock_nanosleep.2253
-rw-r--r--man2/clock_settime.21
-rw-r--r--man2/clone.21949
-rw-r--r--man2/clone2.21
-rw-r--r--man2/clone3.21
-rw-r--r--man2/close.2274
-rw-r--r--man2/close_range.2276
-rw-r--r--man2/connect.2253
-rw-r--r--man2/copy_file_range.2307
-rw-r--r--man2/creat.21
-rw-r--r--man2/create_module.272
-rw-r--r--man2/delete_module.2205
-rw-r--r--man2/dup.2284
-rw-r--r--man2/dup2.21
-rw-r--r--man2/dup3.21
-rw-r--r--man2/epoll_create.2144
-rw-r--r--man2/epoll_create1.21
-rw-r--r--man2/epoll_ctl.2429
-rw-r--r--man2/epoll_pwait.21
-rw-r--r--man2/epoll_pwait2.21
-rw-r--r--man2/epoll_wait.2288
-rw-r--r--man2/eventfd.2446
-rw-r--r--man2/eventfd2.21
-rw-r--r--man2/execve.2889
-rw-r--r--man2/execveat.2220
-rw-r--r--man2/exit.21
-rw-r--r--man2/exit_group.238
-rw-r--r--man2/faccessat.21
-rw-r--r--man2/faccessat2.21
-rw-r--r--man2/fadvise64.21
-rw-r--r--man2/fadvise64_64.21
-rw-r--r--man2/fallocate.2481
-rw-r--r--man2/fanotify_init.2542
-rw-r--r--man2/fanotify_mark.2850
-rw-r--r--man2/fattach.21
-rw-r--r--man2/fchdir.21
-rw-r--r--man2/fchmod.21
-rw-r--r--man2/fchmodat.21
-rw-r--r--man2/fchown.21
-rw-r--r--man2/fchown32.21
-rw-r--r--man2/fchownat.21
-rw-r--r--man2/fcntl.22113
-rw-r--r--man2/fcntl64.21
-rw-r--r--man2/fdatasync.21
-rw-r--r--man2/fdetach.21
-rw-r--r--man2/fgetxattr.21
-rw-r--r--man2/finit_module.21
-rw-r--r--man2/flistxattr.21
-rw-r--r--man2/flock.2267
-rw-r--r--man2/fork.2348
-rw-r--r--man2/free_hugepages.21
-rw-r--r--man2/fremovexattr.21
-rw-r--r--man2/fsetxattr.21
-rw-r--r--man2/fstat.21
-rw-r--r--man2/fstat64.21
-rw-r--r--man2/fstatat.21
-rw-r--r--man2/fstatat64.21
-rw-r--r--man2/fstatfs.21
-rw-r--r--man2/fstatfs64.21
-rw-r--r--man2/fsync.2200
-rw-r--r--man2/ftruncate.21
-rw-r--r--man2/ftruncate64.21
-rw-r--r--man2/futex.21976
-rw-r--r--man2/futimesat.2128
-rw-r--r--man2/get_kernel_syms.288
-rw-r--r--man2/get_mempolicy.2239
-rw-r--r--man2/get_robust_list.2156
-rw-r--r--man2/get_thread_area.21
-rw-r--r--man2/getcpu.2147
-rw-r--r--man2/getcwd.22
-rw-r--r--man2/getdents.2323
-rw-r--r--man2/getdents64.21
-rw-r--r--man2/getdomainname.2122
-rw-r--r--man2/getegid.21
-rw-r--r--man2/getegid32.21
-rw-r--r--man2/geteuid.21
-rw-r--r--man2/geteuid32.21
-rw-r--r--man2/getgid.270
-rw-r--r--man2/getgid32.21
-rw-r--r--man2/getgroups.2219
-rw-r--r--man2/getgroups32.21
-rw-r--r--man2/gethostname.2176
-rw-r--r--man2/getitimer.2278
-rw-r--r--man2/getmsg.21
-rw-r--r--man2/getpagesize.267
-rw-r--r--man2/getpeername.2116
-rw-r--r--man2/getpgid.21
-rw-r--r--man2/getpgrp.21
-rw-r--r--man2/getpid.2150
-rw-r--r--man2/getpmsg.21
-rw-r--r--man2/getppid.21
-rw-r--r--man2/getpriority.2209
-rw-r--r--man2/getrandom.2295
-rw-r--r--man2/getresgid.21
-rw-r--r--man2/getresgid32.21
-rw-r--r--man2/getresuid.270
-rw-r--r--man2/getresuid32.21
-rw-r--r--man2/getrlimit.2853
-rw-r--r--man2/getrusage.2250
-rw-r--r--man2/getsid.275
-rw-r--r--man2/getsockname.285
-rw-r--r--man2/getsockopt.2172
-rw-r--r--man2/gettid.274
-rw-r--r--man2/gettimeofday.2296
-rw-r--r--man2/getuid.280
-rw-r--r--man2/getuid32.21
-rw-r--r--man2/getunwind.287
-rw-r--r--man2/getxattr.2143
-rw-r--r--man2/gtty.21
-rw-r--r--man2/idle.244
-rw-r--r--man2/inb.21
-rw-r--r--man2/inb_p.21
-rw-r--r--man2/init_module.2393
-rw-r--r--man2/inl.21
-rw-r--r--man2/inl_p.21
-rw-r--r--man2/inotify_add_watch.2135
-rw-r--r--man2/inotify_init.297
-rw-r--r--man2/inotify_init1.21
-rw-r--r--man2/inotify_rm_watch.260
-rw-r--r--man2/insb.21
-rw-r--r--man2/insl.21
-rw-r--r--man2/insw.21
-rw-r--r--man2/intro.2115
-rw-r--r--man2/inw.21
-rw-r--r--man2/inw_p.21
-rw-r--r--man2/io_cancel.2106
-rw-r--r--man2/io_destroy.297
-rw-r--r--man2/io_getevents.2137
-rw-r--r--man2/io_setup.2114
-rw-r--r--man2/io_submit.2289
-rw-r--r--man2/ioctl.2231
-rw-r--r--man2/ioctl_console.2915
-rw-r--r--man2/ioctl_fat.2489
-rw-r--r--man2/ioctl_ficlone.21
-rw-r--r--man2/ioctl_ficlonerange.2129
-rw-r--r--man2/ioctl_fideduperange.2200
-rw-r--r--man2/ioctl_fslabel.272
-rw-r--r--man2/ioctl_getfsmap.2351
-rw-r--r--man2/ioctl_iflags.2202
-rw-r--r--man2/ioctl_ns.2342
-rw-r--r--man2/ioctl_pagemap_scan.2206
-rw-r--r--man2/ioctl_pipe.264
-rw-r--r--man2/ioctl_tty.2922
-rw-r--r--man2/ioctl_userfaultfd.21072
-rw-r--r--man2/ioperm.2105
-rw-r--r--man2/iopl.292
-rw-r--r--man2/ioprio_get.21
-rw-r--r--man2/ioprio_set.2362
-rw-r--r--man2/ipc.263
-rw-r--r--man2/isastream.21
-rw-r--r--man2/kcmp.2420
-rw-r--r--man2/kexec_file_load.21
-rw-r--r--man2/kexec_load.2331
-rw-r--r--man2/keyctl.22297
-rw-r--r--man2/kill.2165
-rw-r--r--man2/landlock_add_rule.2131
-rw-r--r--man2/landlock_create_ruleset.2124
-rw-r--r--man2/landlock_restrict_self.2116
-rw-r--r--man2/lchown.21
-rw-r--r--man2/lchown32.21
-rw-r--r--man2/lgetxattr.21
-rw-r--r--man2/link.2425
-rw-r--r--man2/linkat.21
-rw-r--r--man2/listen.2155
-rw-r--r--man2/listxattr.2322
-rw-r--r--man2/llistxattr.21
-rw-r--r--man2/llseek.292
-rw-r--r--man2/lock.21
-rw-r--r--man2/lookup_dcookie.286
-rw-r--r--man2/lremovexattr.21
-rw-r--r--man2/lseek.2252
-rw-r--r--man2/lsetxattr.21
-rw-r--r--man2/lstat.21
-rw-r--r--man2/lstat64.21
-rw-r--r--man2/madvise.2898
-rw-r--r--man2/madvise1.21
-rw-r--r--man2/mbind.2521
-rw-r--r--man2/membarrier.2460
-rw-r--r--man2/memfd_create.2549
-rw-r--r--man2/memfd_secret.2204
-rw-r--r--man2/migrate_pages.2174
-rw-r--r--man2/mincore.2158
-rw-r--r--man2/mkdir.2250
-rw-r--r--man2/mkdirat.21
-rw-r--r--man2/mknod.2302
-rw-r--r--man2/mknodat.21
-rw-r--r--man2/mlock.2507
-rw-r--r--man2/mlock2.21
-rw-r--r--man2/mlockall.21
-rw-r--r--man2/mmap.21037
-rw-r--r--man2/mmap2.285
-rw-r--r--man2/modify_ldt.2196
-rw-r--r--man2/mount.2971
-rw-r--r--man2/mount_setattr.21067
-rw-r--r--man2/move_pages.2253
-rw-r--r--man2/mprotect.2363
-rw-r--r--man2/mpx.21
-rw-r--r--man2/mq_getsetattr.233
-rw-r--r--man2/mq_notify.22
-rw-r--r--man2/mq_open.22
-rw-r--r--man2/mq_timedreceive.22
-rw-r--r--man2/mq_timedsend.22
-rw-r--r--man2/mq_unlink.22
-rw-r--r--man2/mremap.2357
-rw-r--r--man2/msgctl.2424
-rw-r--r--man2/msgget.2217
-rw-r--r--man2/msgop.2684
-rw-r--r--man2/msgrcv.21
-rw-r--r--man2/msgsnd.21
-rw-r--r--man2/msync.2140
-rw-r--r--man2/munlock.21
-rw-r--r--man2/munlockall.21
-rw-r--r--man2/munmap.21
-rw-r--r--man2/name_to_handle_at.21
-rw-r--r--man2/nanosleep.2221
-rw-r--r--man2/newfstatat.21
-rw-r--r--man2/nfsservctl.270
-rw-r--r--man2/nice.2118
-rw-r--r--man2/oldfstat.21
-rw-r--r--man2/oldlstat.21
-rw-r--r--man2/oldolduname.21
-rw-r--r--man2/oldstat.21
-rw-r--r--man2/olduname.21
-rw-r--r--man2/open.21941
-rw-r--r--man2/open_by_handle_at.2787
-rw-r--r--man2/openat.21
-rw-r--r--man2/openat2.2582
-rw-r--r--man2/outb.284
-rw-r--r--man2/outb_p.21
-rw-r--r--man2/outl.21
-rw-r--r--man2/outl_p.21
-rw-r--r--man2/outsb.21
-rw-r--r--man2/outsl.21
-rw-r--r--man2/outsw.21
-rw-r--r--man2/outw.21
-rw-r--r--man2/outw_p.21
-rw-r--r--man2/pause.250
-rw-r--r--man2/pciconfig_iobase.21
-rw-r--r--man2/pciconfig_read.2122
-rw-r--r--man2/pciconfig_write.21
-rw-r--r--man2/perf_event_open.24036
-rw-r--r--man2/perfmonctl.2193
-rw-r--r--man2/personality.2296
-rw-r--r--man2/phys.21
-rw-r--r--man2/pidfd_getfd.2144
-rw-r--r--man2/pidfd_open.2269
-rw-r--r--man2/pidfd_send_signal.2240
-rw-r--r--man2/pipe.2304
-rw-r--r--man2/pipe2.21
-rw-r--r--man2/pivot_root.2409
-rw-r--r--man2/pkey_alloc.2115
-rw-r--r--man2/pkey_free.21
-rw-r--r--man2/pkey_mprotect.21
-rw-r--r--man2/poll.2649
-rw-r--r--man2/posix_fadvise.2227
-rw-r--r--man2/ppoll.21
-rw-r--r--man2/prctl.22577
-rw-r--r--man2/pread.2146
-rw-r--r--man2/pread64.21
-rw-r--r--man2/preadv.21
-rw-r--r--man2/preadv2.21
-rw-r--r--man2/prlimit.21
-rw-r--r--man2/prlimit64.21
-rw-r--r--man2/process_madvise.2200
-rw-r--r--man2/process_vm_readv.2314
-rw-r--r--man2/process_vm_writev.21
-rw-r--r--man2/prof.21
-rw-r--r--man2/pselect.21
-rw-r--r--man2/pselect6.21
-rw-r--r--man2/ptrace.22986
-rw-r--r--man2/putmsg.21
-rw-r--r--man2/putpmsg.21
-rw-r--r--man2/pwrite.21
-rw-r--r--man2/pwrite64.21
-rw-r--r--man2/pwritev.21
-rw-r--r--man2/pwritev2.21
-rw-r--r--man2/query_module.2194
-rw-r--r--man2/quotactl.2806
-rw-r--r--man2/read.2245
-rw-r--r--man2/readahead.299
-rw-r--r--man2/readdir.2116
-rw-r--r--man2/readlink.2331
-rw-r--r--man2/readlinkat.21
-rw-r--r--man2/readv.2427
-rw-r--r--man2/reboot.2241
-rw-r--r--man2/recv.2563
-rw-r--r--man2/recvfrom.21
-rw-r--r--man2/recvmmsg.2276
-rw-r--r--man2/recvmsg.21
-rw-r--r--man2/remap_file_pages.2170
-rw-r--r--man2/removexattr.2100
-rw-r--r--man2/rename.2549
-rw-r--r--man2/renameat.21
-rw-r--r--man2/renameat2.21
-rw-r--r--man2/request_key.2562
-rw-r--r--man2/restart_syscall.2123
-rw-r--r--man2/rmdir.2128
-rw-r--r--man2/rt_sigaction.21
-rw-r--r--man2/rt_sigpending.21
-rw-r--r--man2/rt_sigprocmask.21
-rw-r--r--man2/rt_sigqueueinfo.2195
-rw-r--r--man2/rt_sigreturn.21
-rw-r--r--man2/rt_sigsuspend.21
-rw-r--r--man2/rt_sigtimedwait.21
-rw-r--r--man2/rt_tgsigqueueinfo.21
-rw-r--r--man2/s390_guarded_storage.2162
-rw-r--r--man2/s390_pci_mmio_read.21
-rw-r--r--man2/s390_pci_mmio_write.294
-rw-r--r--man2/s390_runtime_instr.2104
-rw-r--r--man2/s390_sthyi.2133
-rw-r--r--man2/sbrk.21
-rw-r--r--man2/sched_get_priority_max.2112
-rw-r--r--man2/sched_get_priority_min.21
-rw-r--r--man2/sched_getaffinity.21
-rw-r--r--man2/sched_getattr.21
-rw-r--r--man2/sched_getparam.21
-rw-r--r--man2/sched_getscheduler.21
-rw-r--r--man2/sched_rr_get_interval.2110
-rw-r--r--man2/sched_setaffinity.2427
-rw-r--r--man2/sched_setattr.2447
-rw-r--r--man2/sched_setparam.2121
-rw-r--r--man2/sched_setscheduler.2232
-rw-r--r--man2/sched_yield.276
-rw-r--r--man2/seccomp.21245
-rw-r--r--man2/seccomp_unotify.22011
-rw-r--r--man2/security.21
-rw-r--r--man2/select.2765
-rw-r--r--man2/select_tut.2638
-rw-r--r--man2/semctl.2623
-rw-r--r--man2/semget.2434
-rw-r--r--man2/semop.2523
-rw-r--r--man2/semtimedop.21
-rw-r--r--man2/send.2506
-rw-r--r--man2/sendfile.2245
-rw-r--r--man2/sendfile64.21
-rw-r--r--man2/sendmmsg.2232
-rw-r--r--man2/sendmsg.21
-rw-r--r--man2/sendto.21
-rw-r--r--man2/set_mempolicy.2343
-rw-r--r--man2/set_robust_list.21
-rw-r--r--man2/set_thread_area.2229
-rw-r--r--man2/set_tid_address.297
-rw-r--r--man2/setdomainname.21
-rw-r--r--man2/setegid.21
-rw-r--r--man2/seteuid.2134
-rw-r--r--man2/setfsgid.2109
-rw-r--r--man2/setfsgid32.21
-rw-r--r--man2/setfsuid.2127
-rw-r--r--man2/setfsuid32.21
-rw-r--r--man2/setgid.292
-rw-r--r--man2/setgid32.21
-rw-r--r--man2/setgroups.21
-rw-r--r--man2/setgroups32.21
-rw-r--r--man2/sethostname.21
-rw-r--r--man2/setitimer.21
-rw-r--r--man2/setns.2419
-rw-r--r--man2/setpgid.2329
-rw-r--r--man2/setpgrp.21
-rw-r--r--man2/setpriority.21
-rw-r--r--man2/setregid.21
-rw-r--r--man2/setregid32.21
-rw-r--r--man2/setresgid.21
-rw-r--r--man2/setresgid32.21
-rw-r--r--man2/setresuid.2147
-rw-r--r--man2/setresuid32.21
-rw-r--r--man2/setreuid.2193
-rw-r--r--man2/setreuid32.21
-rw-r--r--man2/setrlimit.21
-rw-r--r--man2/setsid.2100
-rw-r--r--man2/setsockopt.21
-rw-r--r--man2/settimeofday.21
-rw-r--r--man2/setuid.2156
-rw-r--r--man2/setuid32.21
-rw-r--r--man2/setup.255
-rw-r--r--man2/setxattr.2159
-rw-r--r--man2/sgetmask.270
-rw-r--r--man2/shmat.21
-rw-r--r--man2/shmctl.2494
-rw-r--r--man2/shmdt.21
-rw-r--r--man2/shmget.2412
-rw-r--r--man2/shmop.2508
-rw-r--r--man2/shutdown.298
-rw-r--r--man2/sigaction.21210
-rw-r--r--man2/sigaltstack.2362
-rw-r--r--man2/signal.2280
-rw-r--r--man2/signalfd.2525
-rw-r--r--man2/signalfd4.21
-rw-r--r--man2/sigpending.2110
-rw-r--r--man2/sigprocmask.2224
-rw-r--r--man2/sigreturn.2151
-rw-r--r--man2/sigsuspend.2131
-rw-r--r--man2/sigtimedwait.21
-rw-r--r--man2/sigwaitinfo.2231
-rw-r--r--man2/socket.2493
-rw-r--r--man2/socketcall.2185
-rw-r--r--man2/socketpair.2116
-rw-r--r--man2/splice.2266
-rw-r--r--man2/spu_create.2276
-rw-r--r--man2/spu_run.2260
-rw-r--r--man2/ssetmask.21
-rw-r--r--man2/stat.2539
-rw-r--r--man2/stat64.21
-rw-r--r--man2/statfs.2389
-rw-r--r--man2/statfs64.21
-rw-r--r--man2/statx.2614
-rw-r--r--man2/stime.273
-rw-r--r--man2/stty.21
-rw-r--r--man2/subpage_prot.2118
-rw-r--r--man2/swapoff.21
-rw-r--r--man2/swapon.2202
-rw-r--r--man2/symlink.2265
-rw-r--r--man2/symlinkat.21
-rw-r--r--man2/sync.2148
-rw-r--r--man2/sync_file_range.2213
-rw-r--r--man2/sync_file_range2.21
-rw-r--r--man2/syncfs.21
-rw-r--r--man2/syscall.2367
-rw-r--r--man2/syscalls.21172
-rw-r--r--man2/sysctl.2160
-rw-r--r--man2/sysfs.297
-rw-r--r--man2/sysinfo.2106
-rw-r--r--man2/syslog.2378
-rw-r--r--man2/tee.2199
-rw-r--r--man2/tgkill.21
-rw-r--r--man2/time.2114
-rw-r--r--man2/timer_create.2487
-rw-r--r--man2/timer_delete.258
-rw-r--r--man2/timer_getoverrun.2134
-rw-r--r--man2/timer_gettime.21
-rw-r--r--man2/timer_settime.2187
-rw-r--r--man2/timerfd_create.2704
-rw-r--r--man2/timerfd_gettime.21
-rw-r--r--man2/timerfd_settime.21
-rw-r--r--man2/times.2222
-rw-r--r--man2/tkill.2130
-rw-r--r--man2/truncate.2251
-rw-r--r--man2/truncate64.21
-rw-r--r--man2/tuxcall.21
-rw-r--r--man2/ugetrlimit.21
-rw-r--r--man2/umask.2149
-rw-r--r--man2/umount.2214
-rw-r--r--man2/umount2.21
-rw-r--r--man2/uname.2134
-rw-r--r--man2/unimplemented.248
-rw-r--r--man2/unlink.2298
-rw-r--r--man2/unlinkat.21
-rw-r--r--man2/unshare.2572
-rw-r--r--man2/uselib.2106
-rw-r--r--man2/userfaultfd.2951
-rw-r--r--man2/ustat.2104
-rw-r--r--man2/utime.2179
-rw-r--r--man2/utimensat.2633
-rw-r--r--man2/utimes.21
-rw-r--r--man2/vfork.2316
-rw-r--r--man2/vhangup.258
-rw-r--r--man2/vm86.258
-rw-r--r--man2/vm86old.21
-rw-r--r--man2/vmsplice.2162
-rw-r--r--man2/vserver.21
-rw-r--r--man2/wait.2720
-rw-r--r--man2/wait3.21
-rw-r--r--man2/wait4.2169
-rw-r--r--man2/waitid.21
-rw-r--r--man2/waitpid.21
-rw-r--r--man2/write.2329
-rw-r--r--man2/writev.21
503 files changed, 1 insertions, 99951 deletions
diff --git a/man2 b/man2
new file mode 120000
index 000000000..ab24e3021
--- /dev/null
+++ b/man2
@@ -0,0 +1 @@
+man/man2 \ No newline at end of file
diff --git a/man2/_Exit.2 b/man2/_Exit.2
deleted file mode 100644
index 9f9d2e763..000000000
--- a/man2/_Exit.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/_exit.2
diff --git a/man2/__clone2.2 b/man2/__clone2.2
deleted file mode 100644
index 68f41a5ba..000000000
--- a/man2/__clone2.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/clone.2
diff --git a/man2/_exit.2 b/man2/_exit.2
deleted file mode 100644
index 08854a073..000000000
--- a/man2/_exit.2
+++ /dev/null
@@ -1,138 +0,0 @@
-.\" This manpage is Copyright (C) 1992 Drew Eckhardt;
-.\" and Copyright (C) 1993 Michael Haardt, Ian Jackson.
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Modified Wed Jul 21 23:02:38 1993 by Rik Faith <faith@cs.unc.edu>
-.\" Modified 2001-11-17, aeb
-.\"
-.TH _exit 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-_exit, _Exit \- terminate the calling process
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <unistd.h>
-.P
-.BI "[[noreturn]] void _exit(int " status );
-.P
-.B #include <stdlib.h>
-.P
-.BI "[[noreturn]] void _Exit(int " status );
-.fi
-.P
-.RS -4
-Feature Test Macro Requirements for glibc (see
-.BR feature_test_macros (7)):
-.RE
-.P
-.BR _Exit ():
-.nf
- _ISOC99_SOURCE || _POSIX_C_SOURCE >= 200112L
-.fi
-.SH DESCRIPTION
-.BR _exit ()
-terminates the calling process "immediately".
-Any open file descriptors belonging to the process are closed.
-Any children of the process are inherited by
-.BR init (1)
-(or by the nearest "subreaper" process as defined through the use of the
-.BR prctl (2)
-.B PR_SET_CHILD_SUBREAPER
-operation).
-The process's parent is sent a
-.B SIGCHLD
-signal.
-.P
-The value
-.I "status & 0xFF"
-is returned to the parent process as the process's exit status, and
-can be collected by the parent using one of the
-.BR wait (2)
-family of calls.
-.P
-The function
-.BR _Exit ()
-is equivalent to
-.BR _exit ().
-.SH RETURN VALUE
-These functions do not return.
-.SH STANDARDS
-.TP
-.BR _exit ()
-POSIX.1-2008.
-.TP
-.BR _Exit ()
-C11, POSIX.1-2008.
-.SH HISTORY
-POSIX.1-2001, SVr4, 4.3BSD.
-.P
-.BR _Exit ()
-was introduced by C99.
-.SH NOTES
-For a discussion on the effects of an exit, the transmission of
-exit status, zombie processes, signals sent, and so on, see
-.BR exit (3).
-.P
-The function
-.BR _exit ()
-is like
-.BR exit (3),
-but does not call any
-functions registered with
-.BR atexit (3)
-or
-.BR on_exit (3).
-Open
-.BR stdio (3)
-streams are not flushed.
-On the other hand,
-.BR _exit ()
-does close open file descriptors, and this may cause an unknown delay,
-waiting for pending output to finish.
-If the delay is undesired,
-it may be useful to call functions like
-.BR tcflush (3)
-before calling
-.BR _exit ().
-Whether any pending I/O is canceled, and which pending I/O may be
-canceled upon
-.BR _exit (),
-is implementation-dependent.
-.SS C library/kernel differences
-The text above in DESCRIPTION describes the traditional effect of
-.BR _exit (),
-which is to terminate a process,
-and these are the semantics specified by POSIX.1 and implemented
-by the C library wrapper function.
-On modern systems, this means termination of all threads in the process.
-.P
-By contrast with the C library wrapper function, the raw Linux
-.BR _exit ()
-system call terminates only the calling thread, and actions such as
-reparenting child processes or sending
-.B SIGCHLD
-to the parent process are performed only if this is
-the last thread in the thread group.
-.\" _exit() is used by pthread_exit() to terminate the calling thread
-.P
-Up to glibc 2.3, the
-.BR _exit ()
-wrapper function invoked the kernel system call of the same name.
-Since glibc 2.3, the wrapper function invokes
-.BR exit_group (2),
-in order to terminate all of the threads in a process.
-.SH SEE ALSO
-.BR execve (2),
-.BR exit_group (2),
-.BR fork (2),
-.BR kill (2),
-.BR wait (2),
-.BR wait4 (2),
-.BR waitpid (2),
-.BR atexit (3),
-.BR exit (3),
-.BR on_exit (3),
-.BR termios (3)
diff --git a/man2/_llseek.2 b/man2/_llseek.2
deleted file mode 100644
index d15dbee55..000000000
--- a/man2/_llseek.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/llseek.2
diff --git a/man2/_newselect.2 b/man2/_newselect.2
deleted file mode 100644
index e17784318..000000000
--- a/man2/_newselect.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/select.2
diff --git a/man2/_syscall.2 b/man2/_syscall.2
deleted file mode 100644
index 0b781a39e..000000000
--- a/man2/_syscall.2
+++ /dev/null
@@ -1,171 +0,0 @@
-.\" Copyright (c) 1993 Michael Haardt (michael@moria.de),
-.\" Fri Apr 2 11:32:09 MET DST 1993
-.\"
-.\" SPDX-License-Identifier: GPL-2.0-or-later
-.\"
-.\" Tue Jul 6 12:42:46 MDT 1993 <dminer@nyx.cs.du.edu>
-.\" Added "Calling Directly" and supporting paragraphs
-.\"
-.\" Modified Sat Jul 24 15:19:12 1993 by Rik Faith <faith@cs.unc.edu>
-.\"
-.\" Modified 21 Aug 1994 by Michael Chastain <mec@shell.portal.com>:
-.\" Added explanation of arg stacking when 6 or more args.
-.\"
-.\" Modified 10 June 1995 by Andries Brouwer <aeb@cwi.nl>
-.\"
-.\" 2007-10-23 mtk: created as a new page, by taking the content
-.\" specific to the _syscall() macros from intro(2).
-.\"
-.TH _syscall 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-_syscall \- invoking a system call without library support (OBSOLETE)
-.SH SYNOPSIS
-.nf
-.B #include <linux/unistd.h>
-.P
-A _syscall macro
-.P
-desired system call
-.fi
-.SH DESCRIPTION
-The important thing to know about a system call is its prototype.
-You need to know how many arguments, their types,
-and the function return type.
-There are seven macros that make the actual call into the system easier.
-They have the form:
-.P
-.in +4n
-.EX
-.RI _syscall X ( type , name , type1 , arg1 , type2 , arg2 ,...)
-.EE
-.in
-.P
-where
-.IP
-.I X
-is 0\[en]6, which are the number of arguments taken by the
-system call
-.IP
-.I type
-is the return type of the system call
-.IP
-.I name
-is the name of the system call
-.IP
-.I typeN
-is the Nth argument's type
-.IP
-.I argN
-is the name of the Nth argument
-.P
-These macros create a function called
-.I name
-with the arguments you
-specify.
-Once you include the _syscall() in your source file,
-you call the system call by
-.IR name .
-.SH FILES
-.I /usr/include/linux/unistd.h
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Starting around Linux 2.6.18, the _syscall macros were removed
-from header files supplied to user space.
-Use
-.BR syscall (2)
-instead.
-(Some architectures, notably ia64, never provided the _syscall macros;
-on those architectures,
-.BR syscall (2)
-was always required.)
-.SH NOTES
-The _syscall() macros
-.I "do not"
-produce a prototype.
-You may have to
-create one, especially for C++ users.
-.P
-System calls are not required to return only positive or negative error
-codes.
-You need to read the source to be sure how it will return errors.
-Usually, it is the negative of a standard error code,
-for example,
-.RI \- EPERM .
-The _syscall() macros will return the result
-.I r
-of the system call
-when
-.I r
-is nonnegative, but will return \-1 and set the variable
-.I errno
-to
-.RI \- r
-when
-.I r
-is negative.
-For the error codes, see
-.BR errno (3).
-.P
-When defining a system call, the argument types
-.I must
-be
-passed by-value or by-pointer (for aggregates like structs).
-.\" The preferred way to invoke system calls that glibc does not know
-.\" about yet is via
-.\" .BR syscall (2).
-.\" However, this mechanism can be used only if using a libc
-.\" (such as glibc) that supports
-.\" .BR syscall (2),
-.\" and if the
-.\" .I <sys/syscall.h>
-.\" header file contains the required SYS_foo definition.
-.\" Otherwise, the use of a _syscall macro is required.
-.\"
-.SH EXAMPLES
-.\" SRC BEGIN (_syscall.c)
-.EX
-#include <stdio.h>
-#include <stdlib.h>
-#include <errno.h>
-#include <linux/unistd.h> /* for _syscallX macros/related stuff */
-#include <linux/kernel.h> /* for struct sysinfo */
-\&
-_syscall1(int, sysinfo, struct sysinfo *, info);
-\&
-int
-main(void)
-{
- struct sysinfo s_info;
- int error;
-\&
- error = sysinfo(&s_info);
- printf("code error = %d\en", error);
- printf("Uptime = %lds\enLoad: 1 min %lu / 5 min %lu / 15 min %lu\en"
- "RAM: total %lu / free %lu / shared %lu\en"
- "Memory in buffers = %lu\enSwap: total %lu / free %lu\en"
- "Number of processes = %d\en",
- s_info.uptime, s_info.loads[0],
- s_info.loads[1], s_info.loads[2],
- s_info.totalram, s_info.freeram,
- s_info.sharedram, s_info.bufferram,
- s_info.totalswap, s_info.freeswap,
- s_info.procs);
- exit(EXIT_SUCCESS);
-}
-.EE
-.\" SRC END
-.SS Sample output
-.EX
-code error = 0
-uptime = 502034s
-Load: 1 min 13376 / 5 min 5504 / 15 min 1152
-RAM: total 15343616 / free 827392 / shared 8237056
-Memory in buffers = 5066752
-Swap: total 27881472 / free 24698880
-Number of processes = 40
-.EE
-.SH SEE ALSO
-.BR intro (2),
-.BR syscall (2),
-.BR errno (3)
diff --git a/man2/_sysctl.2 b/man2/_sysctl.2
deleted file mode 100644
index 9e14d4b16..000000000
--- a/man2/_sysctl.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/sysctl.2
diff --git a/man2/accept.2 b/man2/accept.2
deleted file mode 100644
index aed8660f3..000000000
--- a/man2/accept.2
+++ /dev/null
@@ -1,349 +0,0 @@
-.\" Copyright (c) 1983, 1990, 1991 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" SPDX-License-Identifier: BSD-4-Clause-UC
-.\"
-.\" Modified 1993-07-24 by Rik Faith <faith@cs.unc.edu>
-.\" Modified 1996-10-21 by Eric S. Raymond <esr@thyrsus.com>
-.\" Modified 1998-2000 by Andi Kleen to match Linux 2.2 reality
-.\" Modified 2002-04-23 by Roger Luethi <rl@hellgate.ch>
-.\" Modified 2004-06-17 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\" 2008-12-04, mtk, Add documentation of accept4()
-.\"
-.TH accept 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-accept, accept4 \- accept a connection on a socket
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/socket.h>
-.P
-.BI "int accept(int " sockfd ", struct sockaddr *_Nullable restrict " addr ,
-.BI " socklen_t *_Nullable restrict " addrlen );
-.P
-.BR "#define _GNU_SOURCE" " /* See feature_test_macros(7) */"
-.B #include <sys/socket.h>
-.P
-.BI "int accept4(int " sockfd ", struct sockaddr *_Nullable restrict " addr ,
-.BI " socklen_t *_Nullable restrict " addrlen ", int " flags );
-.fi
-.SH DESCRIPTION
-The
-.BR accept ()
-system call is used with connection-based socket types
-.RB ( SOCK_STREAM ,
-.BR SOCK_SEQPACKET ).
-It extracts the first connection request on the queue of pending
-connections for the listening socket,
-.IR sockfd ,
-creates a new connected socket, and returns a new file
-descriptor referring to that socket.
-The newly created socket is not in the listening state.
-The original socket
-.I sockfd
-is unaffected by this call.
-.P
-The argument
-.I sockfd
-is a socket that has been created with
-.BR socket (2),
-bound to a local address with
-.BR bind (2),
-and is listening for connections after a
-.BR listen (2).
-.P
-The argument
-.I addr
-is a pointer to a
-.I sockaddr
-structure.
-This structure is filled in with the address of the peer socket,
-as known to the communications layer.
-The exact format of the address returned
-.I addr
-is determined by the socket's address family (see
-.BR socket (2)
-and the respective protocol man pages).
-When
-.I addr
-is NULL, nothing is filled in; in this case,
-.I addrlen
-is not used, and should also be NULL.
-.P
-The
-.I addrlen
-argument is a value-result argument:
-the caller must initialize it to contain the
-size (in bytes) of the structure pointed to by
-.IR addr ;
-on return it will contain the actual size of the peer address.
-.P
-The returned address is truncated if the buffer provided is too small;
-in this case,
-.I addrlen
-will return a value greater than was supplied to the call.
-.P
-If no pending
-connections are present on the queue, and the socket is not marked as
-nonblocking,
-.BR accept ()
-blocks the caller until a connection is present.
-If the socket is marked
-nonblocking and no pending connections are present on the queue,
-.BR accept ()
-fails with the error
-.B EAGAIN
-or
-.BR EWOULDBLOCK .
-.P
-In order to be notified of incoming connections on a socket, you can use
-.BR select (2),
-.BR poll (2),
-or
-.BR epoll (7).
-A readable event will be delivered when a new connection is attempted and you
-may then call
-.BR accept ()
-to get a socket for that connection.
-Alternatively, you can set the socket to deliver
-.B SIGIO
-when activity occurs on a socket; see
-.BR socket (7)
-for details.
-.P
-If
-.I flags
-is 0, then
-.BR accept4 ()
-is the same as
-.BR accept ().
-The following values can be bitwise ORed in
-.I flags
-to obtain different behavior:
-.TP 16
-.B SOCK_NONBLOCK
-Set the
-.B O_NONBLOCK
-file status flag on the open file description (see
-.BR open (2))
-referred to by the new file descriptor.
-Using this flag saves extra calls to
-.BR fcntl (2)
-to achieve the same result.
-.TP
-.B SOCK_CLOEXEC
-Set the close-on-exec
-.RB ( FD_CLOEXEC )
-flag on the new file descriptor.
-See the description of the
-.B O_CLOEXEC
-flag in
-.BR open (2)
-for reasons why this may be useful.
-.SH RETURN VALUE
-On success,
-these system calls return a file descriptor
-for the accepted socket (a nonnegative integer).
-On error, \-1 is returned,
-.I errno
-is set to indicate the error, and
-.I addrlen
-is left unchanged.
-.SS Error handling
-Linux
-.BR accept ()
-(and
-.BR accept4 ())
-passes already-pending network errors on the new socket
-as an error code from
-.BR accept ().
-This behavior differs from other BSD socket
-implementations.
-For reliable operation the application should detect
-the network errors defined for the protocol after
-.BR accept ()
-and treat
-them like
-.B EAGAIN
-by retrying.
-In the case of TCP/IP, these are
-.BR ENETDOWN ,
-.BR EPROTO ,
-.BR ENOPROTOOPT ,
-.BR EHOSTDOWN ,
-.BR ENONET ,
-.BR EHOSTUNREACH ,
-.BR EOPNOTSUPP ,
-and
-.BR ENETUNREACH .
-.SH ERRORS
-.TP
-.BR EAGAIN " or " EWOULDBLOCK
-.\" Actually EAGAIN on Linux
-The socket is marked nonblocking and no connections are
-present to be accepted.
-POSIX.1-2001 and POSIX.1-2008
-allow either error to be returned for this case,
-and do not require these constants to have the same value,
-so a portable application should check for both possibilities.
-.TP
-.B EBADF
-.I sockfd
-is not an open file descriptor.
-.TP
-.B ECONNABORTED
-A connection has been aborted.
-.TP
-.B EFAULT
-The
-.I addr
-argument is not in a writable part of the user address space.
-.TP
-.B EINTR
-The system call was interrupted by a signal that was caught
-before a valid connection arrived; see
-.BR signal (7).
-.TP
-.B EINVAL
-Socket is not listening for connections, or
-.I addrlen
-is invalid (e.g., is negative).
-.TP
-.B EINVAL
-.RB ( accept4 ())
-invalid value in
-.IR flags .
-.TP
-.B EMFILE
-The per-process limit on the number of open file descriptors has been reached.
-.TP
-.B ENFILE
-The system-wide limit on the total number of open files has been reached.
-.TP
-.B ENOBUFS
-.TQ
-.B ENOMEM
-Not enough free memory.
-This often means that the memory allocation is limited by the socket buffer
-limits, not by the system memory.
-.TP
-.B ENOTSOCK
-The file descriptor
-.I sockfd
-does not refer to a socket.
-.TP
-.B EOPNOTSUPP
-The referenced socket is not of type
-.BR SOCK_STREAM .
-.TP
-.B EPERM
-Firewall rules forbid connection.
-.TP
-.B EPROTO
-Protocol error.
-.P
-In addition, network errors for the new socket and as defined
-for the protocol may be returned.
-Various Linux kernels can
-return other errors such as
-.BR ENOSR ,
-.BR ESOCKTNOSUPPORT ,
-.BR EPROTONOSUPPORT ,
-.BR ETIMEDOUT .
-The value
-.B ERESTARTSYS
-may be seen during a trace.
-.SH VERSIONS
-On Linux, the new socket returned by
-.BR accept ()
-does \fInot\fP inherit file status flags such as
-.B O_NONBLOCK
-and
-.B O_ASYNC
-from the listening socket.
-This behavior differs from the canonical BSD sockets implementation.
-.\" Some testing seems to show that Tru64 5.1 and HP-UX 11 also
-.\" do not inherit file status flags -- MTK Jun 05
-Portable programs should not rely on inheritance or noninheritance
-of file status flags and always explicitly set all required flags on
-the socket returned from
-.BR accept ().
-.SH STANDARDS
-.TP
-.BR accept ()
-POSIX.1-2008.
-.TP
-.BR accept4 ()
-Linux.
-.SH HISTORY
-.TP
-.BR accept ()
-POSIX.1-2001, SVr4, 4.4BSD
-.RB ( accept ()
-first appeared in 4.2BSD).
-.\" The BSD man page documents five possible error returns
-.\" (EBADF, ENOTSOCK, EOPNOTSUPP, EWOULDBLOCK, EFAULT).
-.\" POSIX.1-2001 documents errors
-.\" EAGAIN, EBADF, ECONNABORTED, EINTR, EINVAL, EMFILE,
-.\" ENFILE, ENOBUFS, ENOMEM, ENOTSOCK, EOPNOTSUPP, EPROTO, EWOULDBLOCK.
-.\" In addition, SUSv2 documents EFAULT and ENOSR.
-.TP
-.BR accept4 ()
-Linux 2.6.28,
-glibc 2.10.
-.SH NOTES
-There may not always be a connection waiting after a
-.B SIGIO
-is delivered or
-.BR select (2),
-.BR poll (2),
-or
-.BR epoll (7)
-return a readability event because the connection might have been
-removed by an asynchronous network error or another thread before
-.BR accept ()
-is called.
-If this happens, then the call will block waiting for the next
-connection to arrive.
-To ensure that
-.BR accept ()
-never blocks, the passed socket
-.I sockfd
-needs to have the
-.B O_NONBLOCK
-flag set (see
-.BR socket (7)).
-.P
-For certain protocols which require an explicit confirmation,
-such as DECnet,
-.BR accept ()
-can be thought of as merely dequeuing the next connection request and not
-implying confirmation.
-Confirmation can be implied by
-a normal read or write on the new file descriptor, and rejection can be
-implied by closing the new socket.
-Currently, only DECnet has these semantics on Linux.
-.\"
-.SS The socklen_t type
-In the original BSD sockets implementation (and on other older systems)
-.\" such as Linux libc4 and libc5, SunOS 4, SGI
-the third argument of
-.BR accept ()
-was declared as an \fIint\ *\fP.
-A POSIX.1g draft
-standard wanted to change it into a \fIsize_t\ *\fPC;
-.\" SunOS 5 has 'size_t *'
-later POSIX standards and glibc 2.x have
-.IR "socklen_t\ * ".
-.SH EXAMPLES
-See
-.BR bind (2).
-.SH SEE ALSO
-.BR bind (2),
-.BR connect (2),
-.BR listen (2),
-.BR select (2),
-.BR socket (2),
-.BR socket (7)
diff --git a/man2/accept4.2 b/man2/accept4.2
deleted file mode 100644
index 963dfb54c..000000000
--- a/man2/accept4.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/accept.2
diff --git a/man2/access.2 b/man2/access.2
deleted file mode 100644
index 2b3e70e34..000000000
--- a/man2/access.2
+++ /dev/null
@@ -1,468 +0,0 @@
-.\" This manpage is Copyright (C) 1992 Drew Eckhardt;
-.\" and Copyright (C) 1993 Michael Haardt, Ian Jackson.
-.\" and Copyright (C) 2004, 2006, 2007, 2014 Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Modified 1993-07-21 Rik Faith (faith@cs.unc.edu)
-.\" Modified 1994-08-21 by Michael Chastain (mec@shell.portal.com):
-.\" Removed note about old kernel (pre-1.1.44) using wrong id on path.
-.\" Modified 1996-03-18 by Martin Schulze (joey@infodrom.north.de):
-.\" Stated more clearly how it behaves with symbolic links.
-.\" Added correction due to Nick Duffek (nsd@bbc.com), aeb, 960426
-.\" Modified 1996-09-07 by Michael Haardt:
-.\" Restrictions for NFS
-.\" Modified 1997-09-09 by Joseph S. Myers <jsm28@cam.ac.uk>
-.\" Modified 1998-01-13 by Michael Haardt:
-.\" Using access is often insecure
-.\" Modified 2001-10-16 by aeb
-.\" Modified 2002-04-23 by Roger Luethi <rl@hellgate.ch>
-.\" Modified 2004-06-23 by Michael Kerrisk
-.\" 2007-06-10, mtk, various parts rewritten, and added BUGS section.
-.\"
-.TH access 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-access, faccessat, faccessat2 \- check user's permissions for a file
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <unistd.h>
-.P
-.BI "int access(const char *" pathname ", int " mode );
-.P
-.BR "#include <fcntl.h>" " /* Definition of " AT_* " constants */"
-.B #include <unistd.h>
-.P
-.BI "int faccessat(int " dirfd ", const char *" pathname ", int " \
-mode ", int " flags );
- /* But see C library/kernel differences, below */
-.P
-.BR "#include <fcntl.h>" " /* Definition of " AT_* " constants */"
-.BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */"
-.B #include <unistd.h>
-.P
-.B int syscall(SYS_faccessat2,
-.BI " int " dirfd ", const char *" pathname ", int " mode \
-", int " flags );
-.fi
-.P
-.RS -4
-Feature Test Macro Requirements for glibc (see
-.BR feature_test_macros (7)):
-.RE
-.P
-.BR faccessat ():
-.nf
- Since glibc 2.10:
- _POSIX_C_SOURCE >= 200809L
- Before glibc 2.10:
- _ATFILE_SOURCE
-.fi
-.SH DESCRIPTION
-.BR access ()
-checks whether the calling process can access the file
-.IR pathname .
-If
-.I pathname
-is a symbolic link, it is dereferenced.
-.P
-The
-.I mode
-specifies the accessibility check(s) to be performed,
-and is either the value
-.BR F_OK ,
-.\" F_OK is defined as 0 on every system that I know of.
-or a mask consisting of the bitwise OR of one or more of
-.BR R_OK ", " W_OK ", and " X_OK .
-.B F_OK
-tests for the existence of the file.
-.BR R_OK ", " W_OK ", and " X_OK
-test whether the file exists and grants read, write, and
-execute permissions, respectively.
-.P
-The check is done using the calling process's
-.I real
-UID and GID, rather than the effective IDs as is done when
-actually attempting an operation (e.g.,
-.BR open (2))
-on the file.
-Similarly, for the root user, the check uses the set of
-permitted capabilities rather than the set of effective
-capabilities; and for non-root users, the check uses an empty set
-of capabilities.
-.P
-This allows set-user-ID programs and capability-endowed programs
-to easily determine the invoking user's authority.
-In other words,
-.BR access ()
-does not answer the "can I read/write/execute this file?" question.
-It answers a slightly different question:
-"(assuming I'm a setuid binary) can
-.I the user who invoked me
-read/write/execute this file?",
-which gives set-user-ID programs the possibility to
-prevent malicious users from causing them to read files
-which users shouldn't be able to read.
-.P
-If the calling process is privileged (i.e., its real UID is zero),
-then an
-.B X_OK
-check is successful for a regular file if execute permission
-is enabled for any of the file owner, group, or other.
-.SS faccessat()
-.BR faccessat ()
-operates in exactly the same way as
-.BR access (),
-except for the differences described here.
-.P
-If the pathname given in
-.I pathname
-is relative, then it is interpreted relative to the directory
-referred to by the file descriptor
-.I dirfd
-(rather than relative to the current working directory of
-the calling process, as is done by
-.BR access ()
-for a relative pathname).
-.P
-If
-.I pathname
-is relative and
-.I dirfd
-is the special value
-.BR AT_FDCWD ,
-then
-.I pathname
-is interpreted relative to the current working
-directory of the calling process (like
-.BR access ()).
-.P
-If
-.I pathname
-is absolute, then
-.I dirfd
-is ignored.
-.P
-.I flags
-is constructed by ORing together zero or more of the following values:
-.TP
-.B AT_EACCESS
-Perform access checks using the effective user and group IDs.
-By default,
-.BR faccessat ()
-uses the real IDs (like
-.BR access ()).
-.TP
-.BR AT_EMPTY_PATH " (since Linux 5.8)"
-If
-.I pathname
-is an empty string, operate on the file referred to by
-.I dirfd
-(which may have been obtained using the
-.BR open (2)
-.B O_PATH
-flag).
-In this case,
-.I dirfd
-can refer to any type of file, not just a directory.
-If
-.I dirfd
-is
-.BR AT_FDCWD ,
-the call operates on the current working directory.
-This flag is Linux-specific; define
-.B _GNU_SOURCE
-to obtain its definition.
-.TP
-.B AT_SYMLINK_NOFOLLOW
-If
-.I pathname
-is a symbolic link, do not dereference it:
-instead return information about the link itself.
-.P
-See
-.BR openat (2)
-for an explanation of the need for
-.BR faccessat ().
-.\"
-.SS faccessat2()
-The description of
-.BR faccessat ()
-given above corresponds to POSIX.1 and
-to the implementation provided by glibc.
-However, the glibc implementation was an imperfect emulation (see BUGS)
-that papered over the fact that the raw Linux
-.BR faccessat ()
-system call does not have a
-.I flags
-argument.
-To allow for a proper implementation, Linux 5.8 added the
-.BR faccessat2 ()
-system call, which supports the
-.I flags
-argument and allows a correct implementation of the
-.BR faccessat ()
-wrapper function.
-.SH RETURN VALUE
-On success (all requested permissions granted, or
-.I mode
-is
-.B F_OK
-and the file exists), zero is returned.
-On error (at least one bit in
-.I mode
-asked for a permission that is denied, or
-.I mode
-is
-.B F_OK
-and the file does not exist, or some other error occurred),
-\-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EACCES
-The requested access would be denied to the file, or search permission
-is denied for one of the directories in the path prefix of
-.IR pathname .
-(See also
-.BR path_resolution (7).)
-.TP
-.B EBADF
-.RB ( faccessat ())
-.I pathname
-is relative but
-.I dirfd
-is neither
-.B AT_FDCWD
-.RB ( faccessat ())
-nor a valid file descriptor.
-.TP
-.B EFAULT
-.I pathname
-points outside your accessible address space.
-.TP
-.B EINVAL
-.I mode
-was incorrectly specified.
-.TP
-.B EINVAL
-.RB ( faccessat ())
-Invalid flag specified in
-.IR flags .
-.TP
-.B EIO
-An I/O error occurred.
-.TP
-.B ELOOP
-Too many symbolic links were encountered in resolving
-.IR pathname .
-.TP
-.B ENAMETOOLONG
-.I pathname
-is too long.
-.TP
-.B ENOENT
-A component of
-.I pathname
-does not exist or is a dangling symbolic link.
-.TP
-.B ENOMEM
-Insufficient kernel memory was available.
-.TP
-.B ENOTDIR
-A component used as a directory in
-.I pathname
-is not, in fact, a directory.
-.TP
-.B ENOTDIR
-.RB ( faccessat ())
-.I pathname
-is relative and
-.I dirfd
-is a file descriptor referring to a file other than a directory.
-.TP
-.B EPERM
-Write permission was requested to a file that has the immutable flag set.
-See also
-.BR ioctl_iflags (2).
-.TP
-.B EROFS
-Write permission was requested for a file on a read-only filesystem.
-.TP
-.B ETXTBSY
-Write access was requested to an executable which is being
-executed.
-.SH VERSIONS
-If the calling process has appropriate privileges (i.e., is superuser),
-POSIX.1-2001 permits an implementation to indicate success for an
-.B X_OK
-check even if none of the execute file permission bits are set.
-.\" HPU-UX 11 and Tru64 5.1 do this.
-Linux does not do this.
-.\"
-.SS C library/kernel differences
-The raw
-.BR faccessat ()
-system call takes only the first three arguments.
-The
-.B AT_EACCESS
-and
-.B AT_SYMLINK_NOFOLLOW
-flags are actually implemented within the glibc wrapper function for
-.BR faccessat ().
-If either of these flags is specified, then the wrapper function employs
-.BR fstatat (2)
-to determine access permissions, but see BUGS.
-.\"
-.SS glibc notes
-On older kernels where
-.BR faccessat ()
-is unavailable (and when the
-.B AT_EACCESS
-and
-.B AT_SYMLINK_NOFOLLOW
-flags are not specified),
-the glibc wrapper function falls back to the use of
-.BR access ().
-When
-.I pathname
-is a relative pathname,
-glibc constructs a pathname based on the symbolic link in
-.I /proc/self/fd
-that corresponds to the
-.I dirfd
-argument.
-.SH STANDARDS
-.TP
-.BR access ()
-.TQ
-.BR faccessat ()
-POSIX.1-2008.
-.TP
-.BR faccessat2 ()
-Linux.
-.SH HISTORY
-.TP
-.BR access ()
-SVr4, 4.3BSD, POSIX.1-2001.
-.TP
-.BR faccessat ()
-Linux 2.6.16,
-glibc 2.4.
-.TP
-.BR faccessat2 ()
-Linux 5.8.
-.SH NOTES
-.BR Warning :
-Using these calls to check if a user is authorized to, for example,
-open a file before actually doing so using
-.BR open (2)
-creates a security hole, because the user might exploit the short time
-interval between checking and opening the file to manipulate it.
-.BR "For this reason, the use of this system call should be avoided" .
-(In the example just described,
-a safer alternative would be to temporarily switch the process's
-effective user ID to the real ID and then call
-.BR open (2).)
-.P
-.BR access ()
-always dereferences symbolic links.
-If you need to check the permissions on a symbolic link, use
-.BR faccessat ()
-with the flag
-.BR AT_SYMLINK_NOFOLLOW .
-.P
-These calls return an error if any of the access types in
-.I mode
-is denied, even if some of the other access types in
-.I mode
-are permitted.
-.P
-A file is accessible only if the permissions on each of the
-directories in the path prefix of
-.I pathname
-grant search (i.e., execute) access.
-If any directory is inaccessible, then the
-.BR access ()
-call fails, regardless of the permissions on the file itself.
-.P
-Only access bits are checked, not the file type or contents.
-Therefore, if a directory is found to be writable,
-it probably means that files can be created in the directory,
-and not that the directory can be written as a file.
-Similarly, a DOS file may be reported as executable, but the
-.BR execve (2)
-call will still fail.
-.P
-These calls
-may not work correctly on NFSv2 filesystems with UID mapping enabled,
-because UID mapping is done on the server and hidden from the client,
-which checks permissions.
-(NFS versions 3 and higher perform the check on the server.)
-Similar problems can occur to FUSE mounts.
-.\"
-.SH BUGS
-Because the Linux kernel's
-.BR faccessat ()
-system call does not support a
-.I flags
-argument, the glibc
-.BR faccessat ()
-wrapper function provided in glibc 2.32 and earlier
-emulates the required functionality using
-a combination of the
-.BR faccessat ()
-system call and
-.BR fstatat (2).
-However, this emulation does not take ACLs into account.
-Starting with glibc 2.33, the wrapper function avoids this bug
-by making use of the
-.BR faccessat2 ()
-system call where it is provided by the underlying kernel.
-.P
-In Linux 2.4 (and earlier) there is some strangeness in the handling of
-.B X_OK
-tests for superuser.
-If all categories of execute permission are disabled
-for a nondirectory file, then the only
-.BR access ()
-test that returns \-1 is when
-.I mode
-is specified as just
-.BR X_OK ;
-if
-.B R_OK
-or
-.B W_OK
-is also specified in
-.IR mode ,
-then
-.BR access ()
-returns 0 for such files.
-.\" This behavior appears to have been an implementation accident.
-Early Linux 2.6 (up to and including Linux 2.6.3)
-also behaved in the same way as Linux 2.4.
-.P
-Before Linux 2.6.20,
-these calls ignored the effect of the
-.B MS_NOEXEC
-flag if it was used to
-.BR mount (2)
-the underlying filesystem.
-Since Linux 2.6.20, the
-.B MS_NOEXEC
-flag is honored.
-.SH SEE ALSO
-.BR chmod (2),
-.BR chown (2),
-.BR open (2),
-.BR setgid (2),
-.BR setuid (2),
-.BR stat (2),
-.BR euidaccess (3),
-.BR credentials (7),
-.BR path_resolution (7),
-.BR symlink (7)
diff --git a/man2/acct.2 b/man2/acct.2
deleted file mode 100644
index 8c7414bb7..000000000
--- a/man2/acct.2
+++ /dev/null
@@ -1,136 +0,0 @@
-.\" Copyright (c) 1993 Michael Haardt
-.\" (michael@moria.de),
-.\" Fri Apr 2 11:32:09 MET DST 1993
-.\"
-.\" SPDX-License-Identifier: GPL-2.0-or-later
-.\"
-.\" Modified 1993-07-22 by Rik Faith <faith@cs.unc.edu>
-.\" Modified 1993-08-10 by Alan Cox <iiitac@pyramid.swansea.ac.uk>
-.\" Modified 1998-11-04 by Tigran Aivazian <tigran@sco.com>
-.\" Modified 2004-05-27, 2004-06-17, 2004-06-23 by Michael Kerrisk
-.\"
-.TH acct 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-acct \- switch process accounting on or off
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <unistd.h>
-.P
-.BI "int acct(const char *_Nullable " filename );
-.fi
-.P
-.RS -4
-Feature Test Macro Requirements for glibc (see
-.BR feature_test_macros (7)):
-.RE
-.P
-.BR acct ():
-.nf
- Since glibc 2.21:
-.\" commit 266865c0e7b79d4196e2cc393693463f03c90bd8
- _DEFAULT_SOURCE
- In glibc 2.19 and 2.20:
- _DEFAULT_SOURCE || (_XOPEN_SOURCE && _XOPEN_SOURCE < 500)
- Up to and including glibc 2.19:
- _BSD_SOURCE || (_XOPEN_SOURCE && _XOPEN_SOURCE < 500)
-.fi
-.SH DESCRIPTION
-The
-.BR acct ()
-system call enables or disables process accounting.
-If called with the name of an existing file as its argument,
-accounting is turned on,
-and records for each terminating process are appended to
-.I filename
-as it terminates.
-An argument of NULL causes accounting to be turned off.
-.SH RETURN VALUE
-On success, zero is returned.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EACCES
-Write permission is denied for the specified file,
-or search permission is denied for one of the directories
-in the path prefix of
-.I filename
-(see also
-.BR path_resolution (7)),
-or
-.I filename
-is not a regular file.
-.TP
-.B EFAULT
-.I filename
-points outside your accessible address space.
-.TP
-.B EIO
-Error writing to the file
-.IR filename .
-.TP
-.B EISDIR
-.I filename
-is a directory.
-.TP
-.B ELOOP
-Too many symbolic links were encountered in resolving
-.IR filename .
-.TP
-.B ENAMETOOLONG
-.I filename
-was too long.
-.TP
-.B ENFILE
-The system-wide limit on the total number of open files has been reached.
-.TP
-.B ENOENT
-The specified file does not exist.
-.TP
-.B ENOMEM
-Out of memory.
-.TP
-.B ENOSYS
-BSD process accounting has not been enabled when the operating system
-kernel was compiled.
-The kernel configuration parameter controlling this feature is
-.BR CONFIG_BSD_PROCESS_ACCT .
-.TP
-.B ENOTDIR
-A component used as a directory in
-.I filename
-is not in fact a directory.
-.TP
-.B EPERM
-The calling process has insufficient privilege to enable process accounting.
-On Linux, the
-.B CAP_SYS_PACCT
-capability is required.
-.TP
-.B EROFS
-.I filename
-refers to a file on a read-only filesystem.
-.TP
-.B EUSERS
-There are no more free file structures or we ran out of memory.
-.SH STANDARDS
-None.
-.SH HISTORY
-SVr4, 4.3BSD.
-.\" SVr4 documents an EBUSY error condition, but no EISDIR or ENOSYS.
-.\" Also AIX and HP-UX document EBUSY (attempt is made
-.\" to enable accounting when it is already enabled), as does Solaris
-.\" (attempt is made to enable accounting using the same file that is
-.\" currently being used).
-.SH NOTES
-No accounting is produced for programs running when a system crash occurs.
-In particular, nonterminating processes are never accounted for.
-.P
-The structure of the records written to the accounting file is described in
-.BR acct (5).
-.SH SEE ALSO
-.BR acct (5)
diff --git a/man2/add_key.2 b/man2/add_key.2
deleted file mode 100644
index 2b017f680..000000000
--- a/man2/add_key.2
+++ /dev/null
@@ -1,298 +0,0 @@
-.\" Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
-.\" Written by David Howells (dhowells@redhat.com)
-.\" and Copyright (C) 2016 Michael Kerrisk <mtk.man-pages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: GPL-2.0-or-later
-.\"
-.TH add_key 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-add_key \- add a key to the kernel's key management facility
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <keyutils.h>
-.P
-.BI "key_serial_t add_key(const char *" type ", const char *" description ,
-.BI " const void " payload [. plen "], size_t " plen ,
-.BI " key_serial_t " keyring ");"
-.fi
-.P
-.IR Note :
-There is no glibc wrapper for this system call; see NOTES.
-.SH DESCRIPTION
-.BR add_key ()
-creates or updates a key of the given
-.I type
-and
-.IR description ,
-instantiates it with the
-.I payload
-of length
-.IR plen ,
-attaches it to the nominated
-.IR keyring ,
-and returns the key's serial number.
-.P
-The key may be rejected if the provided data is in the wrong format or
-it is invalid in some other way.
-.P
-If the destination
-.I keyring
-already contains a key that matches the specified
-.I type
-and
-.IR description ,
-then, if the key type supports it,
-.\" FIXME The aforementioned phrases begs the question:
-.\" which key types support this?
-that key will be updated rather than a new key being created;
-if not, a new key (with a different ID) will be created
-and it will displace the link to the extant key from the keyring.
-.\" FIXME Perhaps elaborate the implications here? Namely, the new
-.\" key will have a new ID, and if the old key was a keyring that
-.\" is consequently unlinked, then keys that it was anchoring
-.\" will have their reference count decreased by one (and may
-.\" consequently be garbage collected). Is this all correct?
-.P
-The destination
-.I keyring
-serial number may be that of a valid keyring for which the caller has
-.I write
-permission.
-Alternatively, it may be one of the following special keyring IDs:
-.\" FIXME . Perhaps have a separate page describing special keyring IDs?
-.TP
-.B KEY_SPEC_THREAD_KEYRING
-This specifies the caller's thread-specific keyring
-.RB ( thread\-keyring (7)).
-.TP
-.B KEY_SPEC_PROCESS_KEYRING
-This specifies the caller's process-specific keyring
-.RB ( process\-keyring (7)).
-.TP
-.B KEY_SPEC_SESSION_KEYRING
-This specifies the caller's session-specific keyring
-.RB ( session\-keyring (7)).
-.TP
-.B KEY_SPEC_USER_KEYRING
-This specifies the caller's UID-specific keyring
-.RB ( user\-keyring (7)).
-.TP
-.B KEY_SPEC_USER_SESSION_KEYRING
-This specifies the caller's UID-session keyring
-.RB ( user\-session\-keyring (7)).
-.SS Key types
-The key
-.I type
-is a string that specifies the key's type.
-Internally, the kernel defines a number of key types that are
-available in the core key management code.
-Among the types that are available for user-space use
-and can be specified as the
-.I type
-argument to
-.BR add_key ()
-are the following:
-.TP
-.I \[dq]keyring\[dq]
-Keyrings are special key types that may contain links to sequences of other
-keys of any type.
-If this interface is used to create a keyring, then
-.I payload
-should be NULL and
-.I plen
-should be zero.
-.TP
-.I \[dq]user\[dq]
-This is a general purpose key type whose payload may be read and updated
-by user-space applications.
-The key is kept entirely within kernel memory.
-The payload for keys of this type is a blob of arbitrary data
-of up to 32,767 bytes.
-.TP
-.IR \[dq]logon\[dq] " (since Linux 3.3)"
-.\" commit 9f6ed2ca257fa8650b876377833e6f14e272848b
-This key type is essentially the same as
-.IR \[dq]user\[dq] ,
-but it does not permit the key to read.
-This is suitable for storing payloads
-that you do not want to be readable from user space.
-.P
-This key type vets the
-.I description
-to ensure that it is qualified by a "service" prefix,
-by checking to ensure that the
-.I description
-contains a ':' that is preceded by other characters.
-.TP
-.IR \[dq]big_key\[dq] " (since Linux 3.13)"
-.\" commit ab3c3587f8cda9083209a61dbe3a4407d3cada10
-This key type is similar to
-.IR \[dq]user\[dq] ,
-but may hold a payload of up to 1\ MiB.
-If the key payload is large enough,
-then it may be stored encrypted in tmpfs
-(which can be swapped out) rather than kernel memory.
-.P
-For further details on these key types, see
-.BR keyrings (7).
-.SH RETURN VALUE
-On success,
-.BR add_key ()
-returns the serial number of the key it created or updated.
-On error, \-1 is returned and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EACCES
-The keyring wasn't available for modification by the user.
-.TP
-.B EDQUOT
-The key quota for this user would be exceeded by creating this key or linking
-it to the keyring.
-.TP
-.B EFAULT
-One or more of
-.IR type ,
-.IR description ,
-and
-.I payload
-points outside process's accessible address space.
-.TP
-.B EINVAL
-The size of the string (including the terminating null byte) specified in
-.I type
-or
-.I description
-exceeded the limit (32 bytes and 4096 bytes respectively).
-.TP
-.B EINVAL
-The payload data was invalid.
-.TP
-.B EINVAL
-.I type
-was
-.I \[dq]logon\[dq]
-and the
-.I description
-was not qualified with a prefix string of the form
-.IR \[dq]service:\[dq] .
-.TP
-.B EKEYEXPIRED
-The keyring has expired.
-.TP
-.B EKEYREVOKED
-The keyring has been revoked.
-.TP
-.B ENOKEY
-The keyring doesn't exist.
-.TP
-.B ENOMEM
-Insufficient memory to create a key.
-.TP
-.B EPERM
-The
-.I type
-started with a period (\[aq].\[aq]).
-Key types that begin with a period are reserved to the implementation.
-.TP
-.B EPERM
-.I type
-was
-.I \[dq]keyring\[dq]
-and the
-.I description
-started with a period (\[aq].\[aq]).
-Keyrings with descriptions (names)
-that begin with a period are reserved to the implementation.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 2.6.10.
-.SH NOTES
-glibc does not provide a wrapper for this system call.
-A wrapper is provided in the
-.I libkeyutils
-library.
-(The accompanying package provides the
-.I <keyutils.h>
-header file.)
-When employing the wrapper in that library, link with
-.IR \-lkeyutils .
-.SH EXAMPLES
-The program below creates a key with the type, description, and payload
-specified in its command-line arguments,
-and links that key into the session keyring.
-The following shell session demonstrates the use of the program:
-.P
-.in +4n
-.EX
-$ \fB./a.out user mykey "Some payload"\fP
-Key ID is 64a4dca
-$ \fBgrep \[aq]64a4dca\[aq] /proc/keys\fP
-064a4dca I\-\-Q\-\-\- 1 perm 3f010000 1000 1000 user mykey: 12
-.EE
-.in
-.SS Program source
-\&
-.\" SRC BEGIN (add_key.c)
-.EX
-#include <keyutils.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-\&
-int
-main(int argc, char *argv[])
-{
- key_serial_t key;
-\&
- if (argc != 4) {
- fprintf(stderr, "Usage: %s type description payload\en",
- argv[0]);
- exit(EXIT_FAILURE);
- }
-\&
- key = add_key(argv[1], argv[2], argv[3], strlen(argv[3]),
- KEY_SPEC_SESSION_KEYRING);
- if (key == \-1) {
- perror("add_key");
- exit(EXIT_FAILURE);
- }
-\&
- printf("Key ID is %jx\en", (uintmax_t) key);
-\&
- exit(EXIT_SUCCESS);
-}
-.EE
-.\" SRC END
-.SH SEE ALSO
-.ad l
-.nh
-.BR keyctl (1),
-.BR keyctl (2),
-.BR request_key (2),
-.BR keyctl (3),
-.BR keyrings (7),
-.BR keyutils (7),
-.BR persistent\-keyring (7),
-.BR process\-keyring (7),
-.BR session\-keyring (7),
-.BR thread\-keyring (7),
-.BR user\-keyring (7),
-.BR user\-session\-keyring (7)
-.P
-The kernel source files
-.I Documentation/security/keys/core.rst
-and
-.I Documentation/keys/request\-key.rst
-(or, before Linux 4.13, in the files
-.\" commit b68101a1e8f0263dbc7b8375d2a7c57c6216fb76
-.I Documentation/security/keys.txt
-and
-.\" commit 3db38ed76890565772fcca3279cc8d454ea6176b
-.IR Documentation/security/keys\-request\-key.txt ).
diff --git a/man2/adjtimex.2 b/man2/adjtimex.2
deleted file mode 100644
index c7c095f5e..000000000
--- a/man2/adjtimex.2
+++ /dev/null
@@ -1,595 +0,0 @@
-'\" t
-.\" Copyright (c) 1995 Michael Chastain (mec@shell.portal.com), 15 April 1995.
-.\" and Copyright (C) 2014, 2016 Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: GPL-2.0-or-later
-.\"
-.\" Modified 1997-01-31 by Eric S. Raymond <esr@thyrsus.com>
-.\" Modified 1997-07-30 by Paul Slootman <paul@wurtel.demon.nl>
-.\" Modified 2004-05-27 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.TH adjtimex 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-adjtimex, clock_adjtime, ntp_adjtime \- tune kernel clock
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/timex.h>
-.P
-.BI "int adjtimex(struct timex *" "buf" );
-.P
-.BI "int clock_adjtime(clockid_t " clk_id, " struct timex *" "buf" );
-.P
-.BI "int ntp_adjtime(struct timex *" buf );
-.fi
-.SH DESCRIPTION
-Linux uses David L.\& Mills' clock adjustment algorithm (see RFC\ 5905).
-The system call
-.BR adjtimex ()
-reads and optionally sets adjustment parameters for this algorithm.
-It takes a pointer to a
-.I timex
-structure, updates kernel parameters from (selected) field values,
-and returns the same structure updated with the current kernel values.
-This structure is declared as follows:
-.P
-.in +4n
-.EX
-struct timex {
- int modes; /* Mode selector */
- long offset; /* Time offset; nanoseconds, if STA_NANO
- status flag is set, otherwise
- microseconds */
- long freq; /* Frequency offset; see NOTES for units */
- long maxerror; /* Maximum error (microseconds) */
- long esterror; /* Estimated error (microseconds) */
- int status; /* Clock command/status */
- long constant; /* PLL (phase\-locked loop) time constant */
- long precision; /* Clock precision
- (microseconds, read\-only) */
- long tolerance; /* Clock frequency tolerance (read\-only);
- see NOTES for units */
- struct timeval time;
- /* Current time (read\-only, except for
- ADJ_SETOFFSET); upon return, time.tv_usec
- contains nanoseconds, if STA_NANO status
- flag is set, otherwise microseconds */
- long tick; /* Microseconds between clock ticks */
- long ppsfreq; /* PPS (pulse per second) frequency
- (read\-only); see NOTES for units */
- long jitter; /* PPS jitter (read\-only); nanoseconds, if
- STA_NANO status flag is set, otherwise
- microseconds */
- int shift; /* PPS interval duration
- (seconds, read\-only) */
- long stabil; /* PPS stability (read\-only);
- see NOTES for units */
- long jitcnt; /* PPS count of jitter limit exceeded
- events (read\-only) */
- long calcnt; /* PPS count of calibration intervals
- (read\-only) */
- long errcnt; /* PPS count of calibration errors
- (read\-only) */
- long stbcnt; /* PPS count of stability limit exceeded
- events (read\-only) */
- int tai; /* TAI offset, as set by previous ADJ_TAI
- operation (seconds, read\-only,
- since Linux 2.6.26) */
- /* Further padding bytes to allow for future expansion */
-};
-.EE
-.in
-.P
-The
-.I modes
-field determines which parameters, if any, to set.
-(As described later in this page,
-the constants used for
-.BR ntp_adjtime ()
-are equivalent but differently named.)
-It is a bit mask containing a
-bitwise OR
-combination of zero or more of the following bits:
-.TP
-.B ADJ_OFFSET
-Set time offset from
-.IR buf.offset .
-Since Linux 2.6.26,
-.\" commit 074b3b87941c99bc0ce35385b5817924b1ed0c23
-the supplied value is clamped to the range (\-0.5s, +0.5s).
-In older kernels, an
-.B EINVAL
-error occurs if the supplied value is out of range.
-.TP
-.B ADJ_FREQUENCY
-Set frequency offset from
-.IR buf.freq .
-Since Linux 2.6.26,
-.\" commit 074b3b87941c99bc0ce35385b5817924b1ed0c23
-the supplied value is clamped to the range (\-32768000, +32768000).
-In older kernels, an
-.B EINVAL
-error occurs if the supplied value is out of range.
-.TP
-.B ADJ_MAXERROR
-Set maximum time error from
-.IR buf.maxerror .
-.TP
-.B ADJ_ESTERROR
-Set estimated time error from
-.IR buf.esterror .
-.TP
-.B ADJ_STATUS
-Set clock status bits from
-.IR buf.status .
-A description of these bits is provided below.
-.TP
-.B ADJ_TIMECONST
-Set PLL time constant from
-.IR buf.constant .
-If the
-.B STA_NANO
-status flag (see below) is clear, the kernel adds 4 to this value.
-.TP
-.BR ADJ_SETOFFSET " (since Linux 2.6.39)"
-.\" commit 094aa1881fdc1b8889b442eb3511b31f3ec2b762
-.\" Author: Richard Cochran <richardcochran@gmail.com>
-Add
-.I buf.time
-to the current time.
-If
-.I buf.status
-includes the
-.B ADJ_NANO
-flag, then
-.I buf.time.tv_usec
-is interpreted as a nanosecond value;
-otherwise it is interpreted as microseconds.
-.IP
-The value of
-.I buf.time
-is the sum of its two fields, but the
-field
-.I buf.time.tv_usec
-must always be nonnegative.
-The following example shows how to
-normalize a
-.I timeval
-with nanosecond resolution.
-.IP
-.in +4n
-.EX
-while (buf.time.tv_usec < 0) {
- buf.time.tv_sec \-= 1;
- buf.time.tv_usec += 1000000000;
-}
-.EE
-.in
-.TP
-.BR ADJ_MICRO " (since Linux 2.6.26)"
-.\" commit eea83d896e318bda54be2d2770d2c5d6668d11db
-.\" Author: Roman Zippel <zippel@linux-m68k.org>
-Select microsecond resolution.
-.TP
-.BR ADJ_NANO " (since Linux 2.6.26)"
-.\" commit eea83d896e318bda54be2d2770d2c5d6668d11db
-.\" Author: Roman Zippel <zippel@linux-m68k.org>
-Select nanosecond resolution.
-Only one of
-.B ADJ_MICRO
-and
-.B ADJ_NANO
-should be specified.
-.TP
-.BR ADJ_TAI " (since Linux 2.6.26)"
-.\" commit 153b5d054ac2d98ea0d86504884326b6777f683d
-Set TAI (Atomic International Time) offset from
-.IR buf.constant .
-.IP
-.B ADJ_TAI
-should not be used in conjunction with
-.BR ADJ_TIMECONST ,
-since the latter mode also employs the
-.I buf.constant
-field.
-.IP
-For a complete explanation of TAI
-and the difference between TAI and UTC, see
-.UR http://www.bipm.org/en/bipm/tai/tai.html
-.I BIPM
-.UE
-.TP
-.B ADJ_TICK
-Set tick value from
-.IR buf.tick .
-.P
-Alternatively,
-.I modes
-can be specified as either of the following (multibit mask) values,
-in which case other bits should not be specified in
-.IR modes :
-.\" In general, the other bits are ignored, but ADJ_OFFSET_SINGLESHOT 0x8001
-.\" ORed with ADJ_NANO (0x2000) gives 0xa0001 == ADJ_OFFSET_SS_READ!!
-.TP
-.B ADJ_OFFSET_SINGLESHOT
-.\" In user space, ADJ_OFFSET_SINGLESHOT is 0x8001
-.\" In kernel space it is 0x0001, and must be ANDed with ADJ_ADJTIME (0x8000)
-Old-fashioned
-.BR adjtime (3):
-(gradually) adjust time by value specified in
-.IR buf.offset ,
-which specifies an adjustment in microseconds.
-.TP
-.BR ADJ_OFFSET_SS_READ " (functional since Linux 2.6.28)"
-.\" In user space, ADJ_OFFSET_SS_READ is 0xa001
-.\" In kernel space there is ADJ_OFFSET_READONLY (0x2000) anded with
-.\" ADJ_ADJTIME (0x8000) and ADJ_OFFSET_SINGLESHOT (0x0001) to give 0xa001)
-Return (in
-.IR buf.offset )
-the remaining amount of time to be adjusted after an earlier
-.B ADJ_OFFSET_SINGLESHOT
-operation.
-This feature was added in Linux 2.6.24,
-.\" commit 52bfb36050c8529d9031d2c2513b281a360922ec
-but did not work correctly
-.\" commit 916c7a855174e3b53d182b97a26b2e27a29726a1
-until Linux 2.6.28.
-.P
-Ordinary users are restricted to a value of either 0 or
-.B ADJ_OFFSET_SS_READ
-for
-.IR modes .
-Only the superuser may set any parameters.
-.P
-The
-.I buf.status
-field is a bit mask that is used to set and/or retrieve status
-bits associated with the NTP implementation.
-Some bits in the mask are both readable and settable,
-while others are read-only.
-.TP
-.BR STA_PLL " (read-write)"
-Enable phase-locked loop (PLL) updates via
-.BR ADJ_OFFSET .
-.TP
-.BR STA_PPSFREQ " (read-write)"
-Enable PPS (pulse-per-second) frequency discipline.
-.TP
-.BR STA_PPSTIME " (read-write)"
-Enable PPS time discipline.
-.TP
-.BR STA_FLL " (read-write)"
-Select frequency-locked loop (FLL) mode.
-.TP
-.BR STA_INS " (read-write)"
-Insert a leap second after the last second of the UTC day,
-thus extending the last minute of the day by one second.
-Leap-second insertion will occur each day, so long as this flag remains set.
-.\" John Stultz;
-.\" Usually this is written as extending the day by one second,
-.\" which is represented as:
-.\" 23:59:59
-.\" 23:59:60
-.\" 00:00:00
-.\"
-.\" But since posix cannot represent 23:59:60, we repeat the last second:
-.\" 23:59:59 + TIME_INS
-.\" 23:59:59 + TIME_OOP
-.\" 00:00:00 + TIME_WAIT
-.\"
-.TP
-.BR STA_DEL " (read-write)"
-Delete a leap second at the last second of the UTC day.
-.\" John Stultz:
-.\" Similarly the progression here is:
-.\" 23:59:57 + TIME_DEL
-.\" 23:59:58 + TIME_DEL
-.\" 00:00:00 + TIME_WAIT
-Leap second deletion will occur each day, so long as this flag
-remains set.
-.\" FIXME Does there need to be a statement that it is nonsensical to set
-.\" to set both STA_INS and STA_DEL?
-.TP
-.BR STA_UNSYNC " (read-write)"
-Clock unsynchronized.
-.TP
-.BR STA_FREQHOLD " (read-write)"
-Hold frequency.
-.\" Following text from John Stultz:
-Normally adjustments made via
-.B ADJ_OFFSET
-result in dampened frequency adjustments also being made.
-So a single call corrects the current offset,
-but as offsets in the same direction are made repeatedly,
-the small frequency adjustments will accumulate to fix the long-term skew.
-.IP
-This flag prevents the small frequency adjustment from being made
-when correcting for an
-.B ADJ_OFFSET
-value.
-.\" According to the Kernel Application Program Interface document,
-.\" STA_FREQHOLD is not used by the NTP version 4 daemon
-.TP
-.BR STA_PPSSIGNAL " (read-only)"
-A valid PPS (pulse-per-second) signal is present.
-.TP
-.BR STA_PPSJITTER " (read-only)"
-PPS signal jitter exceeded.
-.TP
-.BR STA_PPSWANDER " (read-only)"
-PPS signal wander exceeded.
-.TP
-.BR STA_PPSERROR " (read-only)"
-PPS signal calibration error.
-.TP
-.BR STA_CLOCKERR " (read-only)"
-Clock hardware fault.
-.\" Not set in current kernel (4.5), but checked in a few places
-.TP
-.BR STA_NANO " (read-only; since Linux 2.6.26)"
-.\" commit eea83d896e318bda54be2d2770d2c5d6668d11db
-.\" Author: Roman Zippel <zippel@linux-m68k.org>
-Resolution (0 = microsecond, 1 = nanoseconds).
-Set via
-.BR ADJ_NANO ,
-cleared via
-.BR ADJ_MICRO .
-.TP
-.BR STA_MODE " (since Linux 2.6.26)"
-.\" commit eea83d896e318bda54be2d2770d2c5d6668d11db
-.\" Author: Roman Zippel <zippel@linux-m68k.org>
-Mode (0 = Phase Locked Loop, 1 = Frequency Locked Loop).
-.TP
-.BR STA_CLK " (read-only; since Linux 2.6.26)"
-.\" commit eea83d896e318bda54be2d2770d2c5d6668d11db
-.\" Author: Roman Zippel <zippel@linux-m68k.org>
-Clock source (0 = A, 1 = B); currently unused.
-.P
-Attempts to set read-only
-.I status
-bits are silently ignored.
-.\"
-.SS clock_adjtime ()
-The
-.BR clock_adjtime ()
-system call (added in Linux 2.6.39) behaves like
-.BR adjtimex ()
-but takes an additional
-.I clk_id
-argument to specify the particular clock on which to act.
-.SS ntp_adjtime ()
-The
-.BR ntp_adjtime ()
-library function
-(described in the NTP "Kernel Application Program API", KAPI)
-is a more portable interface for performing the same task as
-.BR adjtimex ().
-Other than the following points, it is identical to
-.BR adjtimex ():
-.IP \[bu] 3
-The constants used in
-.I modes
-are prefixed with "MOD_" rather than "ADJ_", and have the same suffixes (thus,
-.BR MOD_OFFSET ,
-.BR MOD_FREQUENCY ,
-and so on), other than the exceptions noted in the following points.
-.IP \[bu]
-.B MOD_CLKA
-is the synonym for
-.BR ADJ_OFFSET_SINGLESHOT .
-.IP \[bu]
-.B MOD_CLKB
-is the synonym for
-.BR ADJ_TICK .
-.IP \[bu]
-The is no synonym for
-.BR ADJ_OFFSET_SS_READ ,
-which is not described in the KAPI.
-.SH RETURN VALUE
-On success,
-.BR adjtimex ()
-and
-.BR ntp_adjtime ()
-return the clock state; that is, one of the following values:
-.TP 12
-.B TIME_OK
-Clock synchronized, no leap second adjustment pending.
-.TP
-.B TIME_INS
-Indicates that a leap second will be added at the end of the UTC day.
-.TP
-.B TIME_DEL
-Indicates that a leap second will be deleted at the end of the UTC day.
-.TP
-.B TIME_OOP
-Insertion of a leap second is in progress.
-.TP
-.B TIME_WAIT
-A leap-second insertion or deletion has been completed.
-This value will be returned until the next
-.B ADJ_STATUS
-operation clears the
-.B STA_INS
-and
-.B STA_DEL
-flags.
-.TP
-.B TIME_ERROR
-The system clock is not synchronized to a reliable server.
-This value is returned when any of the following holds true:
-.RS
-.IP \[bu] 3
-Either
-.B STA_UNSYNC
-or
-.B STA_CLOCKERR
-is set.
-.IP \[bu]
-.B STA_PPSSIGNAL
-is clear and either
-.B STA_PPSFREQ
-or
-.B STA_PPSTIME
-is set.
-.IP \[bu]
-.B STA_PPSTIME
-and
-.B STA_PPSJITTER
-are both set.
-.IP \[bu]
-.B STA_PPSFREQ
-is set and either
-.B STA_PPSWANDER
-or
-.B STA_PPSJITTER
-is set.
-.RE
-.IP
-The symbolic name
-.B TIME_BAD
-is a synonym for
-.BR TIME_ERROR ,
-provided for backward compatibility.
-.P
-Note that starting with Linux 3.4,
-.\" commit 6b43ae8a619d17c4935c3320d2ef9e92bdeed05d changed to asynchronous
-.\" operation, so we can no longer rely on the return code.
-the call operates asynchronously and the return value usually will
-not reflect a state change caused by the call itself.
-.P
-On failure, these calls return \-1 and set
-.I errno
-to indicate the error.
-.SH ERRORS
-.TP
-.B EFAULT
-.I buf
-does not point to writable memory.
-.TP
-.BR EINVAL " (before Linux 2.6.26)"
-An attempt was made to set
-.I buf.freq
-to a value outside the range (\-33554432, +33554432).
-.\" From a quick glance, it appears there was no clamping or range check
-.\" for buf.freq before Linux 2.0
-.TP
-.BR EINVAL " (before Linux 2.6.26)"
-An attempt was made to set
-.I buf.offset
-to a value outside the permitted range.
-Before Linux 2.0, the permitted range was (\-131072, +131072).
-From Linux 2.0 onwards, the permitted range was (\-512000, +512000).
-.TP
-.B EINVAL
-An attempt was made to set
-.I buf.status
-to a value other than those listed above.
-.TP
-.B EINVAL
-The
-.I clk_id
-given to
-.BR clock_adjtime ()
-is invalid for one of two reasons.
-Either the System-V style hard-coded
-positive clock ID value is out of range, or the dynamic
-.I clk_id
-does not refer to a valid instance of a clock object.
-See
-.BR clock_gettime (2)
-for a discussion of dynamic clocks.
-.TP
-.B EINVAL
-An attempt was made to set
-.I buf.tick
-to a value outside the range
-.RB 900000/ HZ
-to
-.RB 1100000/ HZ ,
-where
-.B HZ
-is the system timer interrupt frequency.
-.TP
-.B ENODEV
-The hot-pluggable device (like USB for example) represented by a
-dynamic
-.I clk_id
-has disappeared after its character device was opened.
-See
-.BR clock_gettime (2)
-for a discussion of dynamic clocks.
-.TP
-.B EOPNOTSUPP
-The given
-.I clk_id
-does not support adjustment.
-.TP
-.B EPERM
-.I buf.modes
-is neither 0 nor
-.BR ADJ_OFFSET_SS_READ ,
-and the caller does not have sufficient privilege.
-Under Linux, the
-.B CAP_SYS_TIME
-capability is required.
-.SH ATTRIBUTES
-For an explanation of the terms used in this section, see
-.BR attributes (7).
-.TS
-allbox;
-lbx lb lb
-l l l.
-Interface Attribute Value
-T{
-.na
-.nh
-.BR \%ntp_adjtime ()
-T} Thread safety MT-Safe
-.TE
-.SH STANDARDS
-.TP
-.BR adjtimex ()
-.TQ
-.BR clock_adjtime ()
-Linux.
-.P
-The preferred API for the NTP daemon is
-.BR ntp_adjtime ().
-.SH NOTES
-In struct
-.IR timex ,
-.IR freq ,
-.IR ppsfreq ,
-and
-.I stabil
-are ppm (parts per million) with a 16-bit fractional part,
-which means that a value of 1 in one of those fields
-actually means 2\[ha]-16 ppm, and 2\[ha]16=65536 is 1 ppm.
-This is the case for both input values (in the case of
-.IR freq )
-and output values.
-.P
-The leap-second processing triggered by
-.B STA_INS
-and
-.B STA_DEL
-is done by the kernel in timer context.
-Thus, it will take one tick into the second
-for the leap second to be inserted or deleted.
-.SH SEE ALSO
-.BR clock_gettime (2),
-.BR clock_settime (2),
-.BR settimeofday (2),
-.BR adjtime (3),
-.BR ntp_gettime (3),
-.BR capabilities (7),
-.BR time (7),
-.BR adjtimex (8),
-.BR hwclock (8)
-.P
-.ad l
-.UR http://www.slac.stanford.edu/comp/unix/\:package/\:rtems/\:src/\:ssrlApps/\:ntpNanoclock/\:api.htm
-NTP "Kernel Application Program Interface"
-.UE
diff --git a/man2/afs_syscall.2 b/man2/afs_syscall.2
deleted file mode 100644
index 5d25ea6ce..000000000
--- a/man2/afs_syscall.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/unimplemented.2
diff --git a/man2/alarm.2 b/man2/alarm.2
deleted file mode 100644
index 4a4f0fd37..000000000
--- a/man2/alarm.2
+++ /dev/null
@@ -1,81 +0,0 @@
-.\" This manpage is Copyright (C) 1992 Drew Eckhardt;
-.\" and Copyright (C) 1993 Michael Haardt, Ian Jackson.
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Modified Wed Jul 21 19:42:57 1993 by Rik Faith <faith@cs.unc.edu>
-.\" Modified Sun Jul 21 21:25:26 1996 by Andries Brouwer <aeb@cwi.nl>
-.\" Modified Wed Nov 6 03:46:05 1996 by Eric S. Raymond <esr@thyrsus.com>
-.\"
-.TH alarm 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-alarm \- set an alarm clock for delivery of a signal
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <unistd.h>
-.P
-.BI "unsigned int alarm(unsigned int " seconds );
-.fi
-.SH DESCRIPTION
-.BR alarm ()
-arranges for a
-.B SIGALRM
-signal to be delivered to the calling process in
-.I seconds
-seconds.
-.P
-If
-.I seconds
-is zero, any pending alarm is canceled.
-.P
-In any event any previously set
-.BR alarm ()
-is canceled.
-.SH RETURN VALUE
-.BR alarm ()
-returns the number of seconds remaining until any previously scheduled
-alarm was due to be delivered, or zero if there was no previously
-scheduled alarm.
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-POSIX.1-2001, SVr4, 4.3BSD.
-.SH NOTES
-.BR alarm ()
-and
-.BR setitimer (2)
-share the same timer; calls to one will interfere with use of the
-other.
-.P
-Alarms created by
-.BR alarm ()
-are preserved across
-.BR execve (2)
-and are not inherited by children created via
-.BR fork (2).
-.P
-.BR sleep (3)
-may be implemented using
-.BR SIGALRM ;
-mixing calls to
-.BR alarm ()
-and
-.BR sleep (3)
-is a bad idea.
-.P
-Scheduling delays can, as ever, cause the execution of the process to
-be delayed by an arbitrary amount of time.
-.SH SEE ALSO
-.BR gettimeofday (2),
-.BR pause (2),
-.BR select (2),
-.BR setitimer (2),
-.BR sigaction (2),
-.BR signal (2),
-.BR timer_create (2),
-.BR timerfd_create (2),
-.BR sleep (3),
-.BR time (7)
diff --git a/man2/alloc_hugepages.2 b/man2/alloc_hugepages.2
deleted file mode 100644
index 8e4b7ee1b..000000000
--- a/man2/alloc_hugepages.2
+++ /dev/null
@@ -1,135 +0,0 @@
-.\" Copyright 2003 Andries E. Brouwer (aeb@cwi.nl)
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.TH alloc_hugepages 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-alloc_hugepages, free_hugepages \- allocate or free huge pages
-.SH SYNOPSIS
-.nf
-.BI "void *syscall(SYS_alloc_hugepages, int " key ", void " addr [. len "], \
-size_t " len ,
-.BI " int " prot ", int " flag );
-.\" asmlinkage unsigned long sys_alloc_hugepages(int key, unsigned long addr,
-.\" unsigned long len, int prot, int flag);
-.BI "int syscall(SYS_free_hugepages, void *" addr );
-.\" asmlinkage int sys_free_hugepages(unsigned long addr);
-.fi
-.P
-.IR Note :
-glibc provides no wrappers for these system calls,
-necessitating the use of
-.BR syscall (2).
-.SH DESCRIPTION
-The system calls
-.BR alloc_hugepages ()
-and
-.BR free_hugepages ()
-were introduced in Linux 2.5.36 and removed again in Linux 2.5.54.
-They existed only on i386 and ia64 (when built with
-.BR CONFIG_HUGETLB_PAGE ).
-In Linux 2.4.20, the syscall numbers exist,
-but the calls fail with the error
-.BR ENOSYS .
-.P
-On i386 the memory management hardware knows about ordinary pages (4\ KiB)
-and huge pages (2 or 4\ MiB).
-Similarly ia64 knows about huge pages of
-several sizes.
-These system calls serve to map huge pages into the
-process's memory or to free them again.
-Huge pages are locked into memory, and are not swapped.
-.P
-The
-.I key
-argument is an identifier.
-When zero the pages are private, and
-not inherited by children.
-When positive the pages are shared with other applications using the same
-.IR key ,
-and inherited by child processes.
-.P
-The
-.I addr
-argument of
-.BR free_hugepages ()
-tells which page is being freed: it was the return value of a
-call to
-.BR alloc_hugepages ().
-(The memory is first actually freed when all users have released it.)
-The
-.I addr
-argument of
-.BR alloc_hugepages ()
-is a hint, that the kernel may or may not follow.
-Addresses must be properly aligned.
-.P
-The
-.I len
-argument is the length of the required segment.
-It must be a multiple of the huge page size.
-.P
-The
-.I prot
-argument specifies the memory protection of the segment.
-It is one of
-.BR PROT_READ ,
-.BR PROT_WRITE ,
-.BR PROT_EXEC .
-.P
-The
-.I flag
-argument is ignored, unless
-.I key
-is positive.
-In that case, if
-.I flag
-is
-.BR IPC_CREAT ,
-then a new huge page segment is created when none
-with the given key existed.
-If this flag is not set, then
-.B ENOENT
-is returned when no segment with the given key exists.
-.SH RETURN VALUE
-On success,
-.BR alloc_hugepages ()
-returns the allocated virtual address, and
-.BR free_hugepages ()
-returns zero.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B ENOSYS
-The system call is not supported on this kernel.
-.SH FILES
-.TP
-.I /proc/sys/vm/nr_hugepages
-Number of configured hugetlb pages.
-This can be read and written.
-.TP
-.I /proc/meminfo
-Gives info on the number of configured hugetlb pages and on their size
-in the three variables HugePages_Total, HugePages_Free, Hugepagesize.
-.SH STANDARDS
-Linux on Intel processors.
-.SH HISTORY
-These system calls are gone;
-they existed only in Linux 2.5.36 through to Linux 2.5.54.
-.SH NOTES
-Now the hugetlbfs filesystem can be used instead.
-Memory backed by huge pages (if the CPU supports them) is obtained by
-using
-.BR mmap (2)
-to map files in this virtual filesystem.
-.P
-The maximal number of huge pages can be specified using the
-.B hugepages=
-boot parameter.
-.\".P
-.\" requires CONFIG_HUGETLB_PAGE (under "Processor type and features")
-.\" and CONFIG_HUGETLBFS (under "Filesystems").
-.\" mount \-t hugetlbfs hugetlbfs /huge
-.\" SHM_HUGETLB
diff --git a/man2/arch_prctl.2 b/man2/arch_prctl.2
deleted file mode 100644
index 680d36395..000000000
--- a/man2/arch_prctl.2
+++ /dev/null
@@ -1,176 +0,0 @@
-.\" Copyright (C) 2003 Andi Kleen
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.TH arch_prctl 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-arch_prctl \- set architecture-specific thread state
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#include <asm/prctl.h>" " /* Definition of " ARCH_* " constants */"
-.BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */"
-.B #include <unistd.h>
-.P
-.BI "int syscall(SYS_arch_prctl, int " op ", unsigned long " addr );
-.BI "int syscall(SYS_arch_prctl, int " op ", unsigned long *" addr );
-.fi
-.P
-.IR Note :
-glibc provides no wrapper for
-.BR arch_prctl (),
-necessitating the use of
-.BR syscall (2).
-.SH DESCRIPTION
-.BR arch_prctl ()
-sets architecture-specific process or thread state.
-.I op
-selects an operation
-and passes argument
-.I addr
-to it;
-.I addr
-is interpreted as either an
-.I "unsigned long"
-for the "set" operations, or as an
-.IR "unsigned long\ *" ,
-for the "get" operations.
-.P
-Subfunctions for both x86 and x86-64 are:
-.TP
-.BR ARCH_SET_CPUID " (since Linux 4.12)"
-.\" commit e9ea1e7f53b852147cbd568b0568c7ad97ec21a3
-Enable
-.RI ( "addr != 0" )
-or disable
-.RI ( "addr == 0" )
-the
-.I cpuid
-instruction for the calling thread.
-The instruction is enabled by default.
-If disabled, any execution of a
-.I cpuid
-instruction will instead generate a
-.B SIGSEGV
-signal.
-This feature can be used to emulate
-.I cpuid
-results that differ from what the underlying
-hardware would have produced (e.g., in a paravirtualization setting).
-.IP
-The
-.B ARCH_SET_CPUID
-setting is preserved across
-.BR fork (2)
-and
-.BR clone (2)
-but reset to the default (i.e.,
-.I cpuid
-enabled) on
-.BR execve (2).
-.TP
-.BR ARCH_GET_CPUID " (since Linux 4.12)"
-Return the setting of the flag manipulated by
-.B ARCH_SET_CPUID
-as the result of the system call (1 for enabled, 0 for disabled).
-.I addr
-is ignored.
-.TP
-Subfunctions for x86-64 only are:
-.TP
-.B ARCH_SET_FS
-Set the 64-bit base for the
-.I FS
-register to
-.IR addr .
-.TP
-.B ARCH_GET_FS
-Return the 64-bit base value for the
-.I FS
-register of the calling thread in the
-.I unsigned long
-pointed to by
-.IR addr .
-.TP
-.B ARCH_SET_GS
-Set the 64-bit base for the
-.I GS
-register to
-.IR addr .
-.TP
-.B ARCH_GET_GS
-Return the 64-bit base value for the
-.I GS
-register of the calling thread in the
-.I unsigned long
-pointed to by
-.IR addr .
-.SH RETURN VALUE
-On success,
-.BR arch_prctl ()
-returns 0; on error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EFAULT
-.I addr
-points to an unmapped address or is outside the process address space.
-.TP
-.B EINVAL
-.I op
-is not a valid operation.
-.TP
-.B ENODEV
-.B ARCH_SET_CPUID
-was requested, but the underlying hardware does not support CPUID faulting.
-.TP
-.B EPERM
-.I addr
-is outside the process address space.
-.\" .SH AUTHOR
-.\" Man page written by Andi Kleen.
-.SH STANDARDS
-Linux/x86-64.
-.SH NOTES
-.BR arch_prctl ()
-is supported only on Linux/x86-64 for 64-bit programs currently.
-.P
-The 64-bit base changes when a new 32-bit segment selector is loaded.
-.P
-.B ARCH_SET_GS
-is disabled in some kernels.
-.P
-Context switches for 64-bit segment bases are rather expensive.
-As an optimization, if a 32-bit TLS base address is used,
-.BR arch_prctl ()
-may use a real TLS entry as if
-.BR set_thread_area (2)
-had been called, instead of manipulating the segment base register directly.
-Memory in the first 2\ GB of address space can be allocated by using
-.BR mmap (2)
-with the
-.B MAP_32BIT
-flag.
-.P
-Because of the aforementioned optimization, using
-.BR arch_prctl ()
-and
-.BR set_thread_area (2)
-in the same thread is dangerous, as they may overwrite each other's
-TLS entries.
-.P
-.I FS
-may be already used by the threading library.
-Programs that use
-.B ARCH_SET_FS
-directly are very likely to crash.
-.SH SEE ALSO
-.BR mmap (2),
-.BR modify_ldt (2),
-.BR prctl (2),
-.BR set_thread_area (2)
-.P
-AMD X86-64 Programmer's manual
diff --git a/man2/arm_fadvise.2 b/man2/arm_fadvise.2
deleted file mode 100644
index 53f54a121..000000000
--- a/man2/arm_fadvise.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/posix_fadvise.2
diff --git a/man2/arm_fadvise64_64.2 b/man2/arm_fadvise64_64.2
deleted file mode 100644
index 53f54a121..000000000
--- a/man2/arm_fadvise64_64.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/posix_fadvise.2
diff --git a/man2/arm_sync_file_range.2 b/man2/arm_sync_file_range.2
deleted file mode 100644
index ad7a1e6c7..000000000
--- a/man2/arm_sync_file_range.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/sync_file_range.2
diff --git a/man2/bdflush.2 b/man2/bdflush.2
deleted file mode 100644
index 8627a42a2..000000000
--- a/man2/bdflush.2
+++ /dev/null
@@ -1,103 +0,0 @@
-.\" Copyright (c) 1995 Michael Chastain (mec@shell.portal.com), 15 April 1995.
-.\"
-.\" SPDX-License-Identifier: GPL-2.0-or-later
-.\"
-.\" Modified 1997-01-31 by Eric S. Raymond <esr@thyrsus.com>
-.\" Modified 2004-06-17 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.TH bdflush 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-bdflush \- start, flush, or tune buffer-dirty-flush daemon
-.SH SYNOPSIS
-.nf
-.B #include <sys/kdaemon.h>
-.P
-.BI "[[deprecated]] int bdflush(int " func ", long *" address );
-.BI "[[deprecated]] int bdflush(int " func ", long " data );
-.fi
-.SH DESCRIPTION
-.IR Note :
-Since Linux 2.6,
-.\" As noted in changes in the 2.5.12 source
-this system call is deprecated and does nothing.
-It is likely to disappear altogether in a future kernel release.
-Nowadays, the task performed by
-.BR bdflush ()
-is handled by the kernel
-.I pdflush
-thread.
-.P
-.BR bdflush ()
-starts, flushes, or tunes the buffer-dirty-flush daemon.
-Only a privileged process (one with the
-.B CAP_SYS_ADMIN
-capability) may call
-.BR bdflush ().
-.P
-If
-.I func
-is negative or 0, and no daemon has been started, then
-.BR bdflush ()
-enters the daemon code and never returns.
-.P
-If
-.I func
-is 1,
-some dirty buffers are written to disk.
-.P
-If
-.I func
-is 2 or more and is even (low bit is 0), then
-.I address
-is the address of a long word,
-and the tuning parameter numbered
-.RI "(" "func" "\-2)/2"
-is returned to the caller in that address.
-.P
-If
-.I func
-is 3 or more and is odd (low bit is 1), then
-.I data
-is a long word,
-and the kernel sets tuning parameter numbered
-.RI "(" "func" "\-3)/2"
-to that value.
-.P
-The set of parameters, their values, and their valid ranges
-are defined in the Linux kernel source file
-.IR fs/buffer.c .
-.SH RETURN VALUE
-If
-.I func
-is negative or 0 and the daemon successfully starts,
-.BR bdflush ()
-never returns.
-Otherwise, the return value is 0 on success and \-1 on failure, with
-.I errno
-set to indicate the error.
-.SH ERRORS
-.TP
-.B EBUSY
-An attempt was made to enter the daemon code after
-another process has already entered.
-.TP
-.B EFAULT
-.I address
-points outside your accessible address space.
-.TP
-.B EINVAL
-An attempt was made to read or write an invalid parameter number,
-or to write an invalid value to a parameter.
-.TP
-.B EPERM
-Caller does not have the
-.B CAP_SYS_ADMIN
-capability.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Since glibc 2.23, glibc no longer supports this obsolete system call.
-.SH SEE ALSO
-.BR sync (1),
-.BR fsync (2),
-.BR sync (2)
diff --git a/man2/bind.2 b/man2/bind.2
deleted file mode 100644
index 2fec135d9..000000000
--- a/man2/bind.2
+++ /dev/null
@@ -1,286 +0,0 @@
-.\" Copyright 1993 Rickard E. Faith (faith@cs.unc.edu)
-.\" and Copyright 2005-2007, Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Portions extracted from /usr/include/sys/socket.h, which does not have
-.\" any authorship information in it. It is probably available under the GPL.
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\"
-.\" Other portions are from the 6.9 (Berkeley) 3/10/91 man page:
-.\"
-.\" Copyright (c) 1983 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" SPDX-License-Identifier: BSD-4-Clause-UC
-.\"
-.\" Modified Mon Oct 21 23:05:29 EDT 1996 by Eric S. Raymond <esr@thyrsus.com>
-.\" Modified 1998 by Andi Kleen
-.\" $Id: bind.2,v 1.3 1999/04/23 19:56:07 freitag Exp $
-.\" Modified 2004-06-23 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.TH bind 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-bind \- bind a name to a socket
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/socket.h>
-.P
-.BI "int bind(int " sockfd ", const struct sockaddr *" addr ,
-.BI " socklen_t " addrlen );
-.fi
-.SH DESCRIPTION
-When a socket is created with
-.BR socket (2),
-it exists in a name space (address family) but has no address assigned to it.
-.BR bind ()
-assigns the address specified by
-.I addr
-to the socket referred to by the file descriptor
-.IR sockfd .
-.I addrlen
-specifies the size, in bytes, of the address structure pointed to by
-.IR addr .
-Traditionally, this operation is called \[lq]assigning a name to a socket\[rq].
-.P
-It is normally necessary to assign a local address using
-.BR bind ()
-before a
-.B SOCK_STREAM
-socket may receive connections (see
-.BR accept (2)).
-.P
-The rules used in name binding vary between address families.
-Consult the manual entries in Section 7 for detailed information.
-For
-.BR AF_INET ,
-see
-.BR ip (7);
-for
-.BR AF_INET6 ,
-see
-.BR ipv6 (7);
-for
-.BR AF_UNIX ,
-see
-.BR unix (7);
-for
-.BR AF_APPLETALK ,
-see
-.BR ddp (7);
-for
-.BR AF_PACKET ,
-see
-.BR packet (7);
-for
-.BR AF_X25 ,
-see
-.BR x25 (7);
-and for
-.BR AF_NETLINK ,
-see
-.BR netlink (7).
-.P
-The actual structure passed for the
-.I addr
-argument will depend on the address family.
-The
-.I sockaddr
-structure is defined as something like:
-.P
-.in +4n
-.EX
-struct sockaddr {
- sa_family_t sa_family;
- char sa_data[14];
-}
-.EE
-.in
-.P
-The only purpose of this structure is to cast the structure
-pointer passed in
-.I addr
-in order to avoid compiler warnings.
-See EXAMPLES below.
-.SH RETURN VALUE
-On success, zero is returned.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EACCES
-.\" e.g., privileged port in AF_INET domain
-The address is protected, and the user is not the superuser.
-.TP
-.B EADDRINUSE
-The given address is already in use.
-.TP
-.B EADDRINUSE
-(Internet domain sockets)
-The port number was specified as zero in the socket address structure,
-but, upon attempting to bind to an ephemeral port,
-it was determined that all port numbers in the ephemeral port range
-are currently in use.
-See the discussion of
-.I /proc/sys/net/ipv4/ip_local_port_range
-.BR ip (7).
-.TP
-.B EBADF
-.I sockfd
-is not a valid file descriptor.
-.TP
-.B EINVAL
-The socket is already bound to an address.
-.\" This may change in the future: see
-.\" .I linux/unix/sock.c for details.
-.TP
-.B EINVAL
-.I addrlen
-is wrong, or
-.I addr
-is not a valid address for this socket's domain.
-.TP
-.B ENOTSOCK
-The file descriptor
-.I sockfd
-does not refer to a socket.
-.P
-The following errors are specific to UNIX domain
-.RB ( AF_UNIX )
-sockets:
-.TP
-.B EACCES
-Search permission is denied on a component of the path prefix.
-(See also
-.BR path_resolution (7).)
-.TP
-.B EADDRNOTAVAIL
-A nonexistent interface was requested or the requested
-address was not local.
-.TP
-.B EFAULT
-.I addr
-points outside the user's accessible address space.
-.TP
-.B ELOOP
-Too many symbolic links were encountered in resolving
-.IR addr .
-.TP
-.B ENAMETOOLONG
-.I addr
-is too long.
-.TP
-.B ENOENT
-A component in the directory prefix of the socket pathname does not exist.
-.TP
-.B ENOMEM
-Insufficient kernel memory was available.
-.TP
-.B ENOTDIR
-A component of the path prefix is not a directory.
-.TP
-.B EROFS
-The socket inode would reside on a read-only filesystem.
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-POSIX.1-2001, SVr4, 4.4BSD
-.RB ( bind ()
-first appeared in 4.2BSD).
-.\" SVr4 documents an additional
-.\" .B ENOSR
-.\" general error condition, and
-.\" additional
-.\" .B EIO
-.\" and
-.\" .B EISDIR
-.\" UNIX-domain error conditions.
-.SH BUGS
-The transparent proxy options are not described.
-.\" FIXME Document transparent proxy options
-.SH EXAMPLES
-An example of the use of
-.BR bind ()
-with Internet domain sockets can be found in
-.BR getaddrinfo (3).
-.P
-The following example shows how to bind a stream socket in the UNIX
-.RB ( AF_UNIX )
-domain, and accept connections:
-.\" listen.7 refers to this example.
-.\" accept.7 refers to this example.
-.\" unix.7 refers to this example.
-.P
-.\" SRC BEGIN (bind.c)
-.EX
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/socket.h>
-#include <sys/un.h>
-#include <unistd.h>
-\&
-#define MY_SOCK_PATH "/somepath"
-#define LISTEN_BACKLOG 50
-\&
-#define handle_error(msg) \e
- do { perror(msg); exit(EXIT_FAILURE); } while (0)
-\&
-int
-main(void)
-{
- int sfd, cfd;
- socklen_t peer_addr_size;
- struct sockaddr_un my_addr, peer_addr;
-\&
- sfd = socket(AF_UNIX, SOCK_STREAM, 0);
- if (sfd == \-1)
- handle_error("socket");
-\&
- memset(&my_addr, 0, sizeof(my_addr));
- my_addr.sun_family = AF_UNIX;
- strncpy(my_addr.sun_path, MY_SOCK_PATH,
- sizeof(my_addr.sun_path) \- 1);
-\&
- if (bind(sfd, (struct sockaddr *) &my_addr,
- sizeof(my_addr)) == \-1)
- handle_error("bind");
-\&
- if (listen(sfd, LISTEN_BACKLOG) == \-1)
- handle_error("listen");
-\&
- /* Now we can accept incoming connections one
- at a time using accept(2). */
-\&
- peer_addr_size = sizeof(peer_addr);
- cfd = accept(sfd, (struct sockaddr *) &peer_addr,
- &peer_addr_size);
- if (cfd == \-1)
- handle_error("accept");
-\&
- /* Code to deal with incoming connection(s)... */
-\&
- if (close(sfd) == \-1)
- handle_error("close");
-\&
- if (unlink(MY_SOCK_PATH) == \-1)
- handle_error("unlink");
-}
-.EE
-.\" SRC END
-.SH SEE ALSO
-.BR accept (2),
-.BR connect (2),
-.BR getsockname (2),
-.BR listen (2),
-.BR socket (2),
-.BR getaddrinfo (3),
-.BR getifaddrs (3),
-.BR ip (7),
-.BR ipv6 (7),
-.BR path_resolution (7),
-.BR socket (7),
-.BR unix (7)
diff --git a/man2/bpf.2 b/man2/bpf.2
deleted file mode 100644
index 69e1cd933..000000000
--- a/man2/bpf.2
+++ /dev/null
@@ -1,1273 +0,0 @@
-.\" Copyright (C) 2015 Alexei Starovoitov <ast@kernel.org>
-.\" and Copyright (C) 2015 Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.TH bpf 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-bpf \- perform a command on an extended BPF map or program
-.SH SYNOPSIS
-.nf
-.B #include <linux/bpf.h>
-.P
-.BI "int bpf(int " cmd ", union bpf_attr *" attr ", unsigned int " size );
-.fi
-.SH DESCRIPTION
-The
-.BR bpf ()
-system call performs a range of operations related to extended
-Berkeley Packet Filters.
-Extended BPF (or eBPF) is similar to
-the original ("classic") BPF (cBPF) used to filter network packets.
-For both cBPF and eBPF programs,
-the kernel statically analyzes the programs before loading them,
-in order to ensure that they cannot harm the running system.
-.P
-eBPF extends cBPF in multiple ways, including the ability to call
-a fixed set of in-kernel helper functions
-.\" See 'enum bpf_func_id' in include/uapi/linux/bpf.h
-(via the
-.B BPF_CALL
-opcode extension provided by eBPF)
-and access shared data structures such as eBPF maps.
-.\"
-.SS Extended BPF Design/Architecture
-eBPF maps are a generic data structure for storage of different data types.
-Data types are generally treated as binary blobs, so a user just specifies
-the size of the key and the size of the value at map-creation time.
-In other words, a key/value for a given map can have an arbitrary structure.
-.P
-A user process can create multiple maps (with key/value-pairs being
-opaque bytes of data) and access them via file descriptors.
-Different eBPF programs can access the same maps in parallel.
-It's up to the user process and eBPF program to decide what they store
-inside maps.
-.P
-There's one special map type, called a program array.
-This type of map stores file descriptors referring to other eBPF programs.
-When a lookup in the map is performed, the program flow is
-redirected in-place to the beginning of another eBPF program and does not
-return back to the calling program.
-The level of nesting has a fixed limit of 32,
-.\" Defined by the kernel constant MAX_TAIL_CALL_CNT in include/linux/bpf.h
-so that infinite loops cannot be crafted.
-At run time, the program file descriptors stored in the map can be modified,
-so program functionality can be altered based on specific requirements.
-All programs referred to in a program-array map must
-have been previously loaded into the kernel via
-.BR bpf ().
-If a map lookup fails, the current program continues its execution.
-See
-.B BPF_MAP_TYPE_PROG_ARRAY
-below for further details.
-.P
-Generally, eBPF programs are loaded by the user process and automatically
-unloaded when the process exits.
-In some cases, for example,
-.BR tc\-bpf (8),
-the program will continue to stay alive inside the kernel even after the
-process that loaded the program exits.
-In that case,
-the tc subsystem holds a reference to the eBPF program after the
-file descriptor has been closed by the user-space program.
-Thus, whether a specific program continues to live inside the kernel
-depends on how it is further attached to a given kernel subsystem
-after it was loaded via
-.BR bpf ().
-.P
-Each eBPF program is a set of instructions that is safe to run until
-its completion.
-An in-kernel verifier statically determines that the eBPF program
-terminates and is safe to execute.
-During verification, the kernel increments reference counts for each of
-the maps that the eBPF program uses,
-so that the attached maps can't be removed until the program is unloaded.
-.P
-eBPF programs can be attached to different events.
-These events can be the arrival of network packets, tracing
-events, classification events by network queueing disciplines
-(for eBPF programs attached to a
-.BR tc (8)
-classifier), and other types that may be added in the future.
-A new event triggers execution of the eBPF program, which
-may store information about the event in eBPF maps.
-Beyond storing data, eBPF programs may call a fixed set of
-in-kernel helper functions.
-.P
-The same eBPF program can be attached to multiple events and different
-eBPF programs can access the same map:
-.P
-.in +4n
-.EX
-tracing tracing tracing packet packet packet
-event A event B event C on eth0 on eth1 on eth2
- | | | | | \[ha]
- | | | | v |
- \-\-> tracing <\-\- tracing socket tc ingress tc egress
- prog_1 prog_2 prog_3 classifier action
- | | | | prog_4 prog_5
- |\-\-\- \-\-\-\-\-| |\-\-\-\-\-\-| map_3 | |
- map_1 map_2 \-\-| map_4 |\-\-
-.EE
-.in
-.\"
-.SS Arguments
-The operation to be performed by the
-.BR bpf ()
-system call is determined by the
-.I cmd
-argument.
-Each operation takes an accompanying argument,
-provided via
-.IR attr ,
-which is a pointer to a union of type
-.I bpf_attr
-(see below).
-The unused fields and padding must be zeroed out before the call.
-The
-.I size
-argument is the size of the union pointed to by
-.IR attr .
-.P
-The value provided in
-.I cmd
-is one of the following:
-.TP
-.B BPF_MAP_CREATE
-Create a map and return a file descriptor that refers to the map.
-The close-on-exec file descriptor flag (see
-.BR fcntl (2))
-is automatically enabled for the new file descriptor.
-.TP
-.B BPF_MAP_LOOKUP_ELEM
-Look up an element by key in a specified map and return its value.
-.TP
-.B BPF_MAP_UPDATE_ELEM
-Create or update an element (key/value pair) in a specified map.
-.TP
-.B BPF_MAP_DELETE_ELEM
-Look up and delete an element by key in a specified map.
-.TP
-.B BPF_MAP_GET_NEXT_KEY
-Look up an element by key in a specified map and return the key
-of the next element.
-.TP
-.B BPF_PROG_LOAD
-Verify and load an eBPF program,
-returning a new file descriptor associated with the program.
-The close-on-exec file descriptor flag (see
-.BR fcntl (2))
-is automatically enabled for the new file descriptor.
-.IP
-The
-.I bpf_attr
-union consists of various anonymous structures that are used by different
-.BR bpf ()
-commands:
-.P
-.in +4n
-.EX
-union bpf_attr {
- struct { /* Used by BPF_MAP_CREATE */
- __u32 map_type;
- __u32 key_size; /* size of key in bytes */
- __u32 value_size; /* size of value in bytes */
- __u32 max_entries; /* maximum number of entries
- in a map */
- };
-\&
- struct { /* Used by BPF_MAP_*_ELEM and BPF_MAP_GET_NEXT_KEY
- commands */
- __u32 map_fd;
- __aligned_u64 key;
- union {
- __aligned_u64 value;
- __aligned_u64 next_key;
- };
- __u64 flags;
- };
-\&
- struct { /* Used by BPF_PROG_LOAD */
- __u32 prog_type;
- __u32 insn_cnt;
- __aligned_u64 insns; /* \[aq]const struct bpf_insn *\[aq] */
- __aligned_u64 license; /* \[aq]const char *\[aq] */
- __u32 log_level; /* verbosity level of verifier */
- __u32 log_size; /* size of user buffer */
- __aligned_u64 log_buf; /* user supplied \[aq]char *\[aq]
- buffer */
- __u32 kern_version;
- /* checked when prog_type=kprobe
- (since Linux 4.1) */
-.\" commit 2541517c32be2531e0da59dfd7efc1ce844644f5
- };
-} __attribute__((aligned(8)));
-.EE
-.in
-.\"
-.SS eBPF maps
-Maps are a generic data structure for storage of different types of data.
-They allow sharing of data between eBPF kernel programs,
-and also between kernel and user-space applications.
-.P
-Each map type has the following attributes:
-.IP \[bu] 3
-type
-.IP \[bu]
-maximum number of elements
-.IP \[bu]
-key size in bytes
-.IP \[bu]
-value size in bytes
-.P
-The following wrapper functions demonstrate how various
-.BR bpf ()
-commands can be used to access the maps.
-The functions use the
-.I cmd
-argument to invoke different operations.
-.TP
-.B BPF_MAP_CREATE
-The
-.B BPF_MAP_CREATE
-command creates a new map,
-returning a new file descriptor that refers to the map.
-.IP
-.in +4n
-.EX
-int
-bpf_create_map(enum bpf_map_type map_type,
- unsigned int key_size,
- unsigned int value_size,
- unsigned int max_entries)
-{
- union bpf_attr attr = {
- .map_type = map_type,
- .key_size = key_size,
- .value_size = value_size,
- .max_entries = max_entries
- };
-\&
- return bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
-}
-.EE
-.in
-.IP
-The new map has the type specified by
-.IR map_type ,
-and attributes as specified in
-.IR key_size ,
-.IR value_size ,
-and
-.IR max_entries .
-On success, this operation returns a file descriptor.
-On error, \-1 is returned and
-.I errno
-is set to
-.BR EINVAL ,
-.BR EPERM ,
-or
-.BR ENOMEM .
-.IP
-The
-.I key_size
-and
-.I value_size
-attributes will be used by the verifier during program loading
-to check that the program is calling
-.BR bpf_map_*_elem ()
-helper functions with a correctly initialized
-.I key
-and to check that the program doesn't access the map element
-.I value
-beyond the specified
-.IR value_size .
-For example, when a map is created with a
-.I key_size
-of 8 and the eBPF program calls
-.IP
-.in +4n
-.EX
-bpf_map_lookup_elem(map_fd, fp \- 4)
-.EE
-.in
-.IP
-the program will be rejected,
-since the in-kernel helper function
-.IP
-.in +4n
-.EX
-bpf_map_lookup_elem(map_fd, void *key)
-.EE
-.in
-.IP
-expects to read 8 bytes from the location pointed to by
-.IR key ,
-but the
-.I fp\ \-\ 4
-(where
-.I fp
-is the top of the stack)
-starting address will cause out-of-bounds stack access.
-.IP
-Similarly, when a map is created with a
-.I value_size
-of 1 and the eBPF program contains
-.IP
-.in +4n
-.EX
-value = bpf_map_lookup_elem(...);
-*(u32 *) value = 1;
-.EE
-.in
-.IP
-the program will be rejected, since it accesses the
-.I value
-pointer beyond the specified 1 byte
-.I value_size
-limit.
-.IP
-Currently, the following values are supported for
-.IR map_type :
-.IP
-.in +4n
-.EX
-enum bpf_map_type {
- BPF_MAP_TYPE_UNSPEC, /* Reserve 0 as invalid map type */
- BPF_MAP_TYPE_HASH,
- BPF_MAP_TYPE_ARRAY,
- BPF_MAP_TYPE_PROG_ARRAY,
- BPF_MAP_TYPE_PERF_EVENT_ARRAY,
- BPF_MAP_TYPE_PERCPU_HASH,
- BPF_MAP_TYPE_PERCPU_ARRAY,
- BPF_MAP_TYPE_STACK_TRACE,
- BPF_MAP_TYPE_CGROUP_ARRAY,
- BPF_MAP_TYPE_LRU_HASH,
- BPF_MAP_TYPE_LRU_PERCPU_HASH,
- BPF_MAP_TYPE_LPM_TRIE,
- BPF_MAP_TYPE_ARRAY_OF_MAPS,
- BPF_MAP_TYPE_HASH_OF_MAPS,
- BPF_MAP_TYPE_DEVMAP,
- BPF_MAP_TYPE_SOCKMAP,
- BPF_MAP_TYPE_CPUMAP,
- BPF_MAP_TYPE_XSKMAP,
- BPF_MAP_TYPE_SOCKHASH,
- BPF_MAP_TYPE_CGROUP_STORAGE,
- BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
- BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
- BPF_MAP_TYPE_QUEUE,
- BPF_MAP_TYPE_STACK,
- /* See /usr/include/linux/bpf.h for the full list. */
-};
-.EE
-.in
-.IP
-.I map_type
-selects one of the available map implementations in the kernel.
-.\" FIXME We need an explanation of why one might choose each of
-.\" these map implementations
-For all map types,
-eBPF programs access maps with the same
-.BR bpf_map_lookup_elem ()
-and
-.BR bpf_map_update_elem ()
-helper functions.
-Further details of the various map types are given below.
-.TP
-.B BPF_MAP_LOOKUP_ELEM
-The
-.B BPF_MAP_LOOKUP_ELEM
-command looks up an element with a given
-.I key
-in the map referred to by the file descriptor
-.IR fd .
-.IP
-.in +4n
-.EX
-int
-bpf_lookup_elem(int fd, const void *key, void *value)
-{
- union bpf_attr attr = {
- .map_fd = fd,
- .key = ptr_to_u64(key),
- .value = ptr_to_u64(value),
- };
-\&
- return bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
-}
-.EE
-.in
-.IP
-If an element is found,
-the operation returns zero and stores the element's value into
-.IR value ,
-which must point to a buffer of
-.I value_size
-bytes.
-.IP
-If no element is found, the operation returns \-1 and sets
-.I errno
-to
-.BR ENOENT .
-.TP
-.B BPF_MAP_UPDATE_ELEM
-The
-.B BPF_MAP_UPDATE_ELEM
-command
-creates or updates an element with a given
-.I key/value
-in the map referred to by the file descriptor
-.IR fd .
-.IP
-.in +4n
-.EX
-int
-bpf_update_elem(int fd, const void *key, const void *value,
- uint64_t flags)
-{
- union bpf_attr attr = {
- .map_fd = fd,
- .key = ptr_to_u64(key),
- .value = ptr_to_u64(value),
- .flags = flags,
- };
-\&
- return bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
-}
-.EE
-.in
-.IP
-The
-.I flags
-argument should be specified as one of the following:
-.RS
-.TP
-.B BPF_ANY
-Create a new element or update an existing element.
-.TP
-.B BPF_NOEXIST
-Create a new element only if it did not exist.
-.TP
-.B BPF_EXIST
-Update an existing element.
-.RE
-.IP
-On success, the operation returns zero.
-On error, \-1 is returned and
-.I errno
-is set to
-.BR EINVAL ,
-.BR EPERM ,
-.BR ENOMEM ,
-or
-.BR E2BIG .
-.B E2BIG
-indicates that the number of elements in the map reached the
-.I max_entries
-limit specified at map creation time.
-.B EEXIST
-will be returned if
-.I flags
-specifies
-.B BPF_NOEXIST
-and the element with
-.I key
-already exists in the map.
-.B ENOENT
-will be returned if
-.I flags
-specifies
-.B BPF_EXIST
-and the element with
-.I key
-doesn't exist in the map.
-.TP
-.B BPF_MAP_DELETE_ELEM
-The
-.B BPF_MAP_DELETE_ELEM
-command
-deletes the element whose key is
-.I key
-from the map referred to by the file descriptor
-.IR fd .
-.IP
-.in +4n
-.EX
-int
-bpf_delete_elem(int fd, const void *key)
-{
- union bpf_attr attr = {
- .map_fd = fd,
- .key = ptr_to_u64(key),
- };
-\&
- return bpf(BPF_MAP_DELETE_ELEM, &attr, sizeof(attr));
-}
-.EE
-.in
-.IP
-On success, zero is returned.
-If the element is not found, \-1 is returned and
-.I errno
-is set to
-.BR ENOENT .
-.TP
-.B BPF_MAP_GET_NEXT_KEY
-The
-.B BPF_MAP_GET_NEXT_KEY
-command looks up an element by
-.I key
-in the map referred to by the file descriptor
-.I fd
-and sets the
-.I next_key
-pointer to the key of the next element.
-.IP
-.in +4n
-.EX
-int
-bpf_get_next_key(int fd, const void *key, void *next_key)
-{
- union bpf_attr attr = {
- .map_fd = fd,
- .key = ptr_to_u64(key),
- .next_key = ptr_to_u64(next_key),
- };
-\&
- return bpf(BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr));
-}
-.EE
-.in
-.IP
-If
-.I key
-is found, the operation returns zero and sets the
-.I next_key
-pointer to the key of the next element.
-If
-.I key
-is not found, the operation returns zero and sets the
-.I next_key
-pointer to the key of the first element.
-If
-.I key
-is the last element, \-1 is returned and
-.I errno
-is set to
-.BR ENOENT .
-Other possible
-.I errno
-values are
-.BR ENOMEM ,
-.BR EFAULT ,
-.BR EPERM ,
-and
-.BR EINVAL .
-This method can be used to iterate over all elements in the map.
-.TP
-.B close(map_fd)
-Delete the map referred to by the file descriptor
-.IR map_fd .
-When the user-space program that created a map exits, all maps will
-be deleted automatically (but see NOTES).
-.\"
-.SS eBPF map types
-The following map types are supported:
-.TP
-.B BPF_MAP_TYPE_HASH
-.\" commit 0f8e4bd8a1fc8c4185f1630061d0a1f2d197a475
-Hash-table maps have the following characteristics:
-.RS
-.IP \[bu] 3
-Maps are created and destroyed by user-space programs.
-Both user-space and eBPF programs
-can perform lookup, update, and delete operations.
-.IP \[bu]
-The kernel takes care of allocating and freeing key/value pairs.
-.IP \[bu]
-The
-.BR map_update_elem ()
-helper will fail to insert new element when the
-.I max_entries
-limit is reached.
-(This ensures that eBPF programs cannot exhaust memory.)
-.IP \[bu]
-.BR map_update_elem ()
-replaces existing elements atomically.
-.RE
-.IP
-Hash-table maps are
-optimized for speed of lookup.
-.TP
-.B BPF_MAP_TYPE_ARRAY
-.\" commit 28fbcfa08d8ed7c5a50d41a0433aad222835e8e3
-Array maps have the following characteristics:
-.RS
-.IP \[bu] 3
-Optimized for fastest possible lookup.
-In the future the verifier/JIT compiler
-may recognize lookup() operations that employ a constant key
-and optimize it into constant pointer.
-It is possible to optimize a non-constant
-key into direct pointer arithmetic as well, since pointers and
-.I value_size
-are constant for the life of the eBPF program.
-In other words,
-.BR array_map_lookup_elem ()
-may be 'inlined' by the verifier/JIT compiler
-while preserving concurrent access to this map from user space.
-.IP \[bu]
-All array elements pre-allocated and zero initialized at init time
-.IP \[bu]
-The key is an array index, and must be exactly four bytes.
-.IP \[bu]
-.BR map_delete_elem ()
-fails with the error
-.BR EINVAL ,
-since elements cannot be deleted.
-.IP \[bu]
-.BR map_update_elem ()
-replaces elements in a
-.B nonatomic
-fashion;
-for atomic updates, a hash-table map should be used instead.
-There is however one special case that can also be used with arrays:
-the atomic built-in
-.B __sync_fetch_and_add()
-can be used on 32 and 64 bit atomic counters.
-For example, it can be
-applied on the whole value itself if it represents a single counter,
-or in case of a structure containing multiple counters, it could be
-used on individual counters.
-This is quite often useful for aggregation and accounting of events.
-.RE
-.IP
-Among the uses for array maps are the following:
-.RS
-.IP \[bu] 3
-As "global" eBPF variables: an array of 1 element whose key is (index) 0
-and where the value is a collection of 'global' variables which
-eBPF programs can use to keep state between events.
-.IP \[bu]
-Aggregation of tracing events into a fixed set of buckets.
-.IP \[bu]
-Accounting of networking events, for example, number of packets and packet
-sizes.
-.RE
-.TP
-.BR BPF_MAP_TYPE_PROG_ARRAY " (since Linux 4.2)"
-A program array map is a special kind of array map whose map values
-contain only file descriptors referring to other eBPF programs.
-Thus, both the
-.I key_size
-and
-.I value_size
-must be exactly four bytes.
-This map is used in conjunction with the
-.BR bpf_tail_call ()
-helper.
-.IP
-This means that an eBPF program with a program array map attached to it
-can call from kernel side into
-.IP
-.in +4n
-.EX
-void bpf_tail_call(void *context, void *prog_map,
- unsigned int index);
-.EE
-.in
-.IP
-and therefore replace its own program flow with the one from the program
-at the given program array slot, if present.
-This can be regarded as kind of a jump table to a different eBPF program.
-The invoked program will then reuse the same stack.
-When a jump into the new program has been performed,
-it won't return to the old program anymore.
-.IP
-If no eBPF program is found at the given index of the program array
-(because the map slot doesn't contain a valid program file descriptor,
-the specified lookup index/key is out of bounds,
-or the limit of 32
-.\" MAX_TAIL_CALL_CNT
-nested calls has been exceed),
-execution continues with the current eBPF program.
-This can be used as a fall-through for default cases.
-.IP
-A program array map is useful, for example, in tracing or networking, to
-handle individual system calls or protocols in their own subprograms and
-use their identifiers as an individual map index.
-This approach may result in performance benefits,
-and also makes it possible to overcome the maximum
-instruction limit of a single eBPF program.
-In dynamic environments,
-a user-space daemon might atomically replace individual subprograms
-at run-time with newer versions to alter overall program behavior,
-for instance, if global policies change.
-.\"
-.SS eBPF programs
-The
-.B BPF_PROG_LOAD
-command is used to load an eBPF program into the kernel.
-The return value for this command is a new file descriptor associated
-with this eBPF program.
-.P
-.in +4n
-.EX
-char bpf_log_buf[LOG_BUF_SIZE];
-\&
-int
-bpf_prog_load(enum bpf_prog_type type,
- const struct bpf_insn *insns, int insn_cnt,
- const char *license)
-{
- union bpf_attr attr = {
- .prog_type = type,
- .insns = ptr_to_u64(insns),
- .insn_cnt = insn_cnt,
- .license = ptr_to_u64(license),
- .log_buf = ptr_to_u64(bpf_log_buf),
- .log_size = LOG_BUF_SIZE,
- .log_level = 1,
- };
-\&
- return bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
-}
-.EE
-.in
-.P
-.I prog_type
-is one of the available program types:
-.IP
-.in +4n
-.EX
-enum bpf_prog_type {
- BPF_PROG_TYPE_UNSPEC, /* Reserve 0 as invalid
- program type */
- BPF_PROG_TYPE_SOCKET_FILTER,
- BPF_PROG_TYPE_KPROBE,
- BPF_PROG_TYPE_SCHED_CLS,
- BPF_PROG_TYPE_SCHED_ACT,
- BPF_PROG_TYPE_TRACEPOINT,
- BPF_PROG_TYPE_XDP,
- BPF_PROG_TYPE_PERF_EVENT,
- BPF_PROG_TYPE_CGROUP_SKB,
- BPF_PROG_TYPE_CGROUP_SOCK,
- BPF_PROG_TYPE_LWT_IN,
- BPF_PROG_TYPE_LWT_OUT,
- BPF_PROG_TYPE_LWT_XMIT,
- BPF_PROG_TYPE_SOCK_OPS,
- BPF_PROG_TYPE_SK_SKB,
- BPF_PROG_TYPE_CGROUP_DEVICE,
- BPF_PROG_TYPE_SK_MSG,
- BPF_PROG_TYPE_RAW_TRACEPOINT,
- BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
- BPF_PROG_TYPE_LWT_SEG6LOCAL,
- BPF_PROG_TYPE_LIRC_MODE2,
- BPF_PROG_TYPE_SK_REUSEPORT,
- BPF_PROG_TYPE_FLOW_DISSECTOR,
- /* See /usr/include/linux/bpf.h for the full list. */
-};
-.EE
-.in
-.P
-For further details of eBPF program types, see below.
-.P
-The remaining fields of
-.I bpf_attr
-are set as follows:
-.IP \[bu] 3
-.I insns
-is an array of
-.I "struct bpf_insn"
-instructions.
-.IP \[bu]
-.I insn_cnt
-is the number of instructions in the program referred to by
-.IR insns .
-.IP \[bu]
-.I license
-is a license string, which must be GPL compatible to call helper functions
-marked
-.IR gpl_only .
-(The licensing rules are the same as for kernel modules,
-so that also dual licenses, such as "Dual BSD/GPL", may be used.)
-.IP \[bu]
-.I log_buf
-is a pointer to a caller-allocated buffer in which the in-kernel
-verifier can store the verification log.
-This log is a multi-line string that can be checked by
-the program author in order to understand how the verifier came to
-the conclusion that the eBPF program is unsafe.
-The format of the output can change at any time as the verifier evolves.
-.IP \[bu]
-.I log_size
-size of the buffer pointed to by
-.IR log_buf .
-If the size of the buffer is not large enough to store all
-verifier messages, \-1 is returned and
-.I errno
-is set to
-.BR ENOSPC .
-.IP \[bu]
-.I log_level
-verbosity level of the verifier.
-A value of zero means that the verifier will not provide a log;
-in this case,
-.I log_buf
-must be a null pointer, and
-.I log_size
-must be zero.
-.P
-Applying
-.BR close (2)
-to the file descriptor returned by
-.B BPF_PROG_LOAD
-will unload the eBPF program (but see NOTES).
-.P
-Maps are accessible from eBPF programs and are used to exchange data between
-eBPF programs and between eBPF programs and user-space programs.
-For example,
-eBPF programs can process various events (like kprobe, packets) and
-store their data into a map,
-and user-space programs can then fetch data from the map.
-Conversely, user-space programs can use a map as a configuration mechanism,
-populating the map with values checked by the eBPF program,
-which then modifies its behavior on the fly according to those values.
-.\"
-.\"
-.SS eBPF program types
-The eBPF program type
-.RI ( prog_type )
-determines the subset of kernel helper functions that the program
-may call.
-The program type also determines the program input (context)\[em]the
-format of
-.I "struct bpf_context"
-(which is the data blob passed into the eBPF program as the first argument).
-.\"
-.\" FIXME
-.\" Somewhere in this page we need a general introduction to the
-.\" bpf_context. For example, how does a BPF program access the
-.\" context?
-.P
-For example, a tracing program does not have the exact same
-subset of helper functions as a socket filter program
-(though they may have some helpers in common).
-Similarly,
-the input (context) for a tracing program is a set of register values,
-while for a socket filter it is a network packet.
-.P
-The set of functions available to eBPF programs of a given type may increase
-in the future.
-.P
-The following program types are supported:
-.TP
-.BR BPF_PROG_TYPE_SOCKET_FILTER " (since Linux 3.19)"
-Currently, the set of functions for
-.B BPF_PROG_TYPE_SOCKET_FILTER
-is:
-.IP
-.in +4n
-.EX
-bpf_map_lookup_elem(map_fd, void *key)
- /* look up key in a map_fd */
-bpf_map_update_elem(map_fd, void *key, void *value)
- /* update key/value */
-bpf_map_delete_elem(map_fd, void *key)
- /* delete key in a map_fd */
-.EE
-.in
-.IP
-The
-.I bpf_context
-argument is a pointer to a
-.IR "struct __sk_buff" .
-.\" FIXME: We need some text here to explain how the program
-.\" accesses __sk_buff.
-.\" See 'struct __sk_buff' and commit 9bac3d6d548e5
-.\"
-.\" Alexei commented:
-.\" Actually now in case of SOCKET_FILTER, SCHED_CLS, SCHED_ACT
-.\" the program can now access skb fields.
-.\"
-.TP
-.BR BPF_PROG_TYPE_KPROBE " (since Linux 4.1)"
-.\" commit 2541517c32be2531e0da59dfd7efc1ce844644f5
-[To be documented]
-.\" FIXME Document this program type
-.\" Describe allowed helper functions for this program type
-.\" Describe bpf_context for this program type
-.\"
-.\" FIXME We need text here to describe 'kern_version'
-.TP
-.BR BPF_PROG_TYPE_SCHED_CLS " (since Linux 4.1)"
-.\" commit 96be4325f443dbbfeb37d2a157675ac0736531a1
-.\" commit e2e9b6541dd4b31848079da80fe2253daaafb549
-[To be documented]
-.\" FIXME Document this program type
-.\" Describe allowed helper functions for this program type
-.\" Describe bpf_context for this program type
-.TP
-.BR BPF_PROG_TYPE_SCHED_ACT " (since Linux 4.1)"
-.\" commit 94caee8c312d96522bcdae88791aaa9ebcd5f22c
-.\" commit a8cb5f556b567974d75ea29c15181c445c541b1f
-[To be documented]
-.\" FIXME Document this program type
-.\" Describe allowed helper functions for this program type
-.\" Describe bpf_context for this program type
-.SS Events
-Once a program is loaded, it can be attached to an event.
-Various kernel subsystems have different ways to do so.
-.P
-Since Linux 3.19,
-.\" commit 89aa075832b0da4402acebd698d0411dcc82d03e
-the following call will attach the program
-.I prog_fd
-to the socket
-.IR sockfd ,
-which was created by an earlier call to
-.BR socket (2):
-.P
-.in +4n
-.EX
-setsockopt(sockfd, SOL_SOCKET, SO_ATTACH_BPF,
- &prog_fd, sizeof(prog_fd));
-.EE
-.in
-.P
-Since Linux 4.1,
-.\" commit 2541517c32be2531e0da59dfd7efc1ce844644f5
-the following call may be used to attach
-the eBPF program referred to by the file descriptor
-.I prog_fd
-to a perf event file descriptor,
-.IR event_fd ,
-that was created by a previous call to
-.BR perf_event_open (2):
-.P
-.in +4n
-.EX
-ioctl(event_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
-.EE
-.in
-.\"
-.\"
-.SH RETURN VALUE
-For a successful call, the return value depends on the operation:
-.TP
-.B BPF_MAP_CREATE
-The new file descriptor associated with the eBPF map.
-.TP
-.B BPF_PROG_LOAD
-The new file descriptor associated with the eBPF program.
-.TP
-All other commands
-Zero.
-.P
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B E2BIG
-The eBPF program is too large or a map reached the
-.I max_entries
-limit (maximum number of elements).
-.TP
-.B EACCES
-For
-.BR BPF_PROG_LOAD ,
-even though all program instructions are valid, the program has been
-rejected because it was deemed unsafe.
-This may be because it may have
-accessed a disallowed memory region or an uninitialized stack/register or
-because the function constraints don't match the actual types or because
-there was a misaligned memory access.
-In this case, it is recommended to call
-.BR bpf ()
-again with
-.I log_level = 1
-and examine
-.I log_buf
-for the specific reason provided by the verifier.
-.TP
-.B EAGAIN
-For
-.BR BPF_PROG_LOAD ,
-indicates that needed resources are blocked.
-This happens when the verifier detects pending signals
-while it is checking the validity of the bpf program.
-In this case, just call
-.BR bpf ()
-again with the same parameters.
-.TP
-.B EBADF
-.I fd
-is not an open file descriptor.
-.TP
-.B EFAULT
-One of the pointers
-.RI ( key
-or
-.I value
-or
-.I log_buf
-or
-.IR insns )
-is outside the accessible address space.
-.TP
-.B EINVAL
-The value specified in
-.I cmd
-is not recognized by this kernel.
-.TP
-.B EINVAL
-For
-.BR BPF_MAP_CREATE ,
-either
-.I map_type
-or attributes are invalid.
-.TP
-.B EINVAL
-For
-.B BPF_MAP_*_ELEM
-commands,
-some of the fields of
-.I "union bpf_attr"
-that are not used by this command
-are not set to zero.
-.TP
-.B EINVAL
-For
-.BR BPF_PROG_LOAD ,
-indicates an attempt to load an invalid program.
-eBPF programs can be deemed
-invalid due to unrecognized instructions, the use of reserved fields, jumps
-out of range, infinite loops or calls of unknown functions.
-.TP
-.B ENOENT
-For
-.B BPF_MAP_LOOKUP_ELEM
-or
-.BR BPF_MAP_DELETE_ELEM ,
-indicates that the element with the given
-.I key
-was not found.
-.TP
-.B ENOMEM
-Cannot allocate sufficient memory.
-.TP
-.B EPERM
-The call was made without sufficient privilege
-(without the
-.B CAP_SYS_ADMIN
-capability).
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 3.18.
-.SH NOTES
-Prior to Linux 4.4, all
-.BR bpf ()
-commands require the caller to have the
-.B CAP_SYS_ADMIN
-capability.
-From Linux 4.4 onwards,
-.\" commit 1be7f75d1668d6296b80bf35dcf6762393530afc
-an unprivileged user may create limited programs of type
-.B BPF_PROG_TYPE_SOCKET_FILTER
-and associated maps.
-However they may not store kernel pointers within
-the maps and are presently limited to the following helper functions:
-.\" [Linux 5.6] mtk: The list of available functions is, I think, governed
-.\" by the check in net/core/filter.c::bpf_base_func_proto().
-.IP \[bu] 3
-get_random
-.PD 0
-.IP \[bu]
-get_smp_processor_id
-.IP \[bu]
-tail_call
-.IP \[bu]
-ktime_get_ns
-.PD
-.P
-Unprivileged access may be blocked by writing the value 1 to the file
-.IR /proc/sys/kernel/unprivileged_bpf_disabled .
-.P
-eBPF objects (maps and programs) can be shared between processes.
-For example, after
-.BR fork (2),
-the child inherits file descriptors referring to the same eBPF objects.
-In addition, file descriptors referring to eBPF objects can be
-transferred over UNIX domain sockets.
-File descriptors referring to eBPF objects can be duplicated
-in the usual way, using
-.BR dup (2)
-and similar calls.
-An eBPF object is deallocated only after all file descriptors
-referring to the object have been closed.
-.P
-eBPF programs can be written in a restricted C that is compiled (using the
-.B clang
-compiler) into eBPF bytecode.
-Various features are omitted from this restricted C, such as loops,
-global variables, variadic functions, floating-point numbers,
-and passing structures as function arguments.
-Some examples can be found in the
-.I samples/bpf/*_kern.c
-files in the kernel source tree.
-.\" There are also examples for the tc classifier, in the iproute2
-.\" project, in examples/bpf
-.P
-The kernel contains a just-in-time (JIT) compiler that translates
-eBPF bytecode into native machine code for better performance.
-Before Linux 4.15,
-the JIT compiler is disabled by default,
-but its operation can be controlled by writing one of the
-following integer strings to the file
-.IR /proc/sys/net/core/bpf_jit_enable :
-.TP
-.B 0
-Disable JIT compilation (default).
-.TP
-.B 1
-Normal compilation.
-.TP
-.B 2
-Debugging mode.
-The generated opcodes are dumped in hexadecimal into the kernel log.
-These opcodes can then be disassembled using the program
-.I tools/net/bpf_jit_disasm.c
-provided in the kernel source tree.
-.P
-Since Linux 4.15,
-.\" commit 290af86629b25ffd1ed6232c4e9107da031705cb
-the kernel may be configured with the
-.B CONFIG_BPF_JIT_ALWAYS_ON
-option.
-In this case, the JIT compiler is always enabled, and the
-.I bpf_jit_enable
-is initialized to 1 and is immutable.
-(This kernel configuration option was provided as a mitigation for
-one of the Spectre attacks against the BPF interpreter.)
-.P
-The JIT compiler for eBPF is currently
-.\" Last reviewed in Linux 4.18-rc by grepping for BPF_ALU64 in arch/
-.\" and by checking the documentation for bpf_jit_enable in
-.\" Documentation/sysctl/net.txt
-available for the following architectures:
-.IP \[bu] 3
-x86-64 (since Linux 3.18; cBPF since Linux 3.0);
-.\" commit 0a14842f5a3c0e88a1e59fac5c3025db39721f74
-.PD 0
-.IP \[bu]
-ARM32 (since Linux 3.18; cBPF since Linux 3.4);
-.\" commit ddecdfcea0ae891f782ae853771c867ab51024c2
-.IP \[bu]
-SPARC 32 (since Linux 3.18; cBPF since Linux 3.5);
-.\" commit 2809a2087cc44b55e4377d7b9be3f7f5d2569091
-.IP \[bu]
-ARM-64 (since Linux 3.18);
-.\" commit e54bcde3d69d40023ae77727213d14f920eb264a
-.IP \[bu]
-s390 (since Linux 4.1; cBPF since Linux 3.7);
-.\" commit c10302efe569bfd646b4c22df29577a4595b4580
-.IP \[bu]
-PowerPC 64 (since Linux 4.8; cBPF since Linux 3.1);
-.\" commit 0ca87f05ba8bdc6791c14878464efc901ad71e99
-.\" commit 156d0e290e969caba25f1851c52417c14d141b24
-.IP \[bu]
-SPARC 64 (since Linux 4.12);
-.\" commit 7a12b5031c6b947cc13918237ae652b536243b76
-.IP \[bu]
-x86-32 (since Linux 4.18);
-.\" commit 03f5781be2c7b7e728d724ac70ba10799cc710d7
-.IP \[bu]
-MIPS 64 (since Linux 4.18; cBPF since Linux 3.16);
-.\" commit c6610de353da5ca6eee5b8960e838a87a90ead0c
-.\" commit f381bf6d82f032b7410185b35d000ea370ac706b
-.IP \[bu]
-riscv (since Linux 5.1).
-.\" commit 2353ecc6f91fd15b893fa01bf85a1c7a823ee4f2
-.PD
-.SH EXAMPLES
-.\" SRC BEGIN (bpf.c)
-.EX
-/* bpf+sockets example:
- * 1. create array map of 256 elements
- * 2. load program that counts number of packets received
- * r0 = skb\->data[ETH_HLEN + offsetof(struct iphdr, protocol)]
- * map[r0]++
- * 3. attach prog_fd to raw socket via setsockopt()
- * 4. print number of received TCP/UDP packets every second
- */
-int
-main(int argc, char *argv[])
-{
- int sock, map_fd, prog_fd, key;
- long long value = 0, tcp_cnt, udp_cnt;
-\&
- map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(key),
- sizeof(value), 256);
- if (map_fd < 0) {
- printf("failed to create map \[aq]%s\[aq]\en", strerror(errno));
- /* likely not run as root */
- return 1;
- }
-\&
- struct bpf_insn prog[] = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), /* r6 = r1 */
- BPF_LD_ABS(BPF_B, ETH_HLEN + offsetof(struct iphdr, protocol)),
- /* r0 = ip\->proto */
- BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, \-4),
- /* *(u32 *)(fp \- 4) = r0 */
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), /* r2 = fp */
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, \-4), /* r2 = r2 \- 4 */
- BPF_LD_MAP_FD(BPF_REG_1, map_fd), /* r1 = map_fd */
- BPF_CALL_FUNC(BPF_FUNC_map_lookup_elem),
- /* r0 = map_lookup(r1, r2) */
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
- /* if (r0 == 0) goto pc+2 */
- BPF_MOV64_IMM(BPF_REG_1, 1), /* r1 = 1 */
- BPF_XADD(BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0),
- /* lock *(u64 *) r0 += r1 */
-.\" == atomic64_add
- BPF_MOV64_IMM(BPF_REG_0, 0), /* r0 = 0 */
- BPF_EXIT_INSN(), /* return r0 */
- };
-\&
- prog_fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, prog,
- sizeof(prog) / sizeof(prog[0]), "GPL");
-\&
- sock = open_raw_sock("lo");
-\&
- assert(setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &prog_fd,
- sizeof(prog_fd)) == 0);
-\&
- for (;;) {
- key = IPPROTO_TCP;
- assert(bpf_lookup_elem(map_fd, &key, &tcp_cnt) == 0);
- key = IPPROTO_UDP;
- assert(bpf_lookup_elem(map_fd, &key, &udp_cnt) == 0);
- printf("TCP %lld UDP %lld packets\en", tcp_cnt, udp_cnt);
- sleep(1);
- }
-\&
- return 0;
-}
-.EE
-.\" SRC END
-.P
-Some complete working code can be found in the
-.I samples/bpf
-directory in the kernel source tree.
-.SH SEE ALSO
-.BR seccomp (2),
-.BR bpf\-helpers (7),
-.BR socket (7),
-.BR tc (8),
-.BR tc\-bpf (8)
-.P
-Both classic and extended BPF are explained in the kernel source file
-.IR Documentation/networking/filter.txt .
diff --git a/man2/break.2 b/man2/break.2
deleted file mode 100644
index 5d25ea6ce..000000000
--- a/man2/break.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/unimplemented.2
diff --git a/man2/brk.2 b/man2/brk.2
deleted file mode 100644
index 40ba0f302..000000000
--- a/man2/brk.2
+++ /dev/null
@@ -1,153 +0,0 @@
-.\" Copyright (c) 1993 Michael Haardt, (michael@moria.de)
-.\" and Copyright 2006, 2008, Michael Kerrisk <tmk.manpages@gmail.com>
-.\" Fri Apr 2 11:32:09 MET DST 1993
-.\"
-.\" SPDX-License-Identifier: GPL-2.0-or-later
-.\"
-.\" Modified Wed Jul 21 19:52:58 1993 by Rik Faith <faith@cs.unc.edu>
-.\" Modified Sun Aug 21 17:40:38 1994 by Rik Faith <faith@cs.unc.edu>
-.\"
-.TH brk 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-brk, sbrk \- change data segment size
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <unistd.h>
-.P
-.BI "int brk(void *" addr );
-.BI "void *sbrk(intptr_t " increment );
-.fi
-.P
-.RS -4
-Feature Test Macro Requirements for glibc (see
-.BR feature_test_macros (7)):
-.RE
-.P
-.BR brk (),
-.BR sbrk ():
-.nf
- Since glibc 2.19:
- _DEFAULT_SOURCE
- || ((_XOPEN_SOURCE >= 500) &&
- ! (_POSIX_C_SOURCE >= 200112L))
-.\" (_XOPEN_SOURCE >= 500 ||
-.\" _XOPEN_SOURCE && _XOPEN_SOURCE_EXTENDED) &&
- From glibc 2.12 to glibc 2.19:
- _BSD_SOURCE || _SVID_SOURCE
- || ((_XOPEN_SOURCE >= 500) &&
- ! (_POSIX_C_SOURCE >= 200112L))
-.\" (_XOPEN_SOURCE >= 500 ||
-.\" _XOPEN_SOURCE && _XOPEN_SOURCE_EXTENDED) &&
- Before glibc 2.12:
- _BSD_SOURCE || _SVID_SOURCE || _XOPEN_SOURCE >= 500
-.\" || _XOPEN_SOURCE && _XOPEN_SOURCE_EXTENDED
-.fi
-.SH DESCRIPTION
-.BR brk ()
-and
-.BR sbrk ()
-change the location of the
-.IR "program break" ,
-which defines the end of the process's data segment
-(i.e., the program break is the first location after the end of the
-uninitialized data segment).
-Increasing the program break has the effect of
-allocating memory to the process;
-decreasing the break deallocates memory.
-.P
-.BR brk ()
-sets the end of the data segment to the value specified by
-.IR addr ,
-when that value is reasonable, the system has enough memory,
-and the process does not exceed its maximum data size (see
-.BR setrlimit (2)).
-.P
-.BR sbrk ()
-increments the program's data space by
-.I increment
-bytes.
-Calling
-.BR sbrk ()
-with an
-.I increment
-of 0 can be used to find the current location of the program break.
-.SH RETURN VALUE
-On success,
-.BR brk ()
-returns zero.
-On error, \-1 is returned, and
-.I errno
-is set to
-.BR ENOMEM .
-.P
-On success,
-.BR sbrk ()
-returns the previous program break.
-(If the break was increased,
-then this value is a pointer to the start of the newly allocated memory).
-On error,
-.I "(void\ *)\ \-1"
-is returned, and
-.I errno
-is set to
-.BR ENOMEM .
-.SH STANDARDS
-None.
-.SH HISTORY
-4.3BSD; SUSv1, marked LEGACY in SUSv2, removed in POSIX.1-2001.
-.\"
-.\" .BR brk ()
-.\" and
-.\" .BR sbrk ()
-.\" are not defined in the C Standard and are deliberately excluded from the
-.\" POSIX.1-1990 standard (see paragraphs B.1.1.1.3 and B.8.3.3).
-.SH NOTES
-Avoid using
-.BR brk ()
-and
-.BR sbrk ():
-the
-.BR malloc (3)
-memory allocation package is the
-portable and comfortable way of allocating memory.
-.P
-Various systems use various types for the argument of
-.BR sbrk ().
-Common are \fIint\fP, \fIssize_t\fP, \fIptrdiff_t\fP, \fIintptr_t\fP.
-.\" One sees
-.\" \fIint\fP (e.g., XPGv4, DU 4.0, HP-UX 11, FreeBSD 4.0, OpenBSD 3.2),
-.\" \fIssize_t\fP (OSF1 2.0, Irix 5.3, 6.5),
-.\" \fIptrdiff_t\fP (libc4, libc5, ulibc, glibc 2.0, 2.1),
-.\" \fIintptr_t\fP (e.g., XPGv5, AIX, SunOS 5.8, 5.9, FreeBSD 4.7, NetBSD 1.6,
-.\" Tru64 5.1, glibc2.2).
-.SS C library/kernel differences
-The return value described above for
-.BR brk ()
-is the behavior provided by the glibc wrapper function for the Linux
-.BR brk ()
-system call.
-(On most other implementations, the return value from
-.BR brk ()
-is the same; this return value was also specified in SUSv2.)
-However,
-the actual Linux system call returns the new program break on success.
-On failure, the system call returns the current break.
-The glibc wrapper function does some work
-(i.e., checks whether the new break is less than
-.IR addr )
-to provide the 0 and \-1 return values described above.
-.P
-On Linux,
-.BR sbrk ()
-is implemented as a library function that uses the
-.BR brk ()
-system call, and does some internal bookkeeping so that it can
-return the old break value.
-.SH SEE ALSO
-.BR execve (2),
-.BR getrlimit (2),
-.BR end (3),
-.BR malloc (3)
diff --git a/man2/cacheflush.2 b/man2/cacheflush.2
deleted file mode 100644
index bc38355e6..000000000
--- a/man2/cacheflush.2
+++ /dev/null
@@ -1,143 +0,0 @@
-.\" Written by Ralf Baechle (ralf@waldorf-gmbh.de),
-.\" Copyright (c) 1994, 1995 Waldorf GMBH
-.\"
-.\" SPDX-License-Identifier: GPL-2.0-or-later
-.\"
-.TH cacheflush 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-cacheflush \- flush contents of instruction and/or data cache
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/cachectl.h>
-.P
-.BI "int cacheflush(void " addr [. nbytes "], int "nbytes ", int "cache );
-.fi
-.P
-.IR Note :
-On some architectures,
-there is no glibc wrapper for this system call; see NOTES.
-.SH DESCRIPTION
-.BR cacheflush ()
-flushes the contents of the indicated cache(s) for the
-user addresses in the range
-.I addr
-to
-.IR (addr+nbytes\-1) .
-.I cache
-may be one of:
-.TP
-.B ICACHE
-Flush the instruction cache.
-.TP
-.B DCACHE
-Write back to memory and invalidate the affected valid cache lines.
-.TP
-.B BCACHE
-Same as
-.BR (ICACHE|DCACHE) .
-.SH RETURN VALUE
-.BR cacheflush ()
-returns 0 on success.
-On error, it returns \-1 and sets
-.I errno
-to indicate the error.
-.SH ERRORS
-.TP
-.B EFAULT
-Some or all of the address range
-.I addr
-to
-.I (addr+nbytes\-1)
-is not accessible.
-.TP
-.B EINVAL
-.I cache
-is not one of
-.BR ICACHE ,
-.BR DCACHE ,
-or
-.B BCACHE
-(but see BUGS).
-.SH VERSIONS
-.BR cacheflush ()
-should not be used in programs intended to be portable.
-On Linux, this call first appeared on the MIPS architecture,
-but nowadays, Linux provides a
-.BR cacheflush ()
-system call on some other architectures, but with different arguments.
-.SS Architecture-specific variants
-glibc provides a wrapper for this system call,
-with the prototype shown in SYNOPSIS,
-for the following architectures:
-ARC, CSKY, MIPS, and NIOS2.
-.P
-On some other architectures,
-Linux provides this system call, with different arguments:
-.TP
-M68K:
-.nf
-.BI "int cacheflush(unsigned long " addr ", int " scope ", int " cache ,
-.BI " unsigned long " len );
-.fi
-.TP
-SH:
-.nf
-.BI "int cacheflush(unsigned long " addr ", unsigned long " len ", int " op );
-.fi
-.TP
-NDS32:
-.nf
-.BI "int cacheflush(unsigned int " start ", unsigned int " end ", int " cache );
-.fi
-.P
-On the above architectures,
-glibc does not provide a wrapper for this system call; call it using
-.BR syscall (2).
-.SS GCC alternative
-Unless you need the finer grained control that this system call provides,
-you probably want to use the GCC built-in function
-.BR __builtin___clear_cache (),
-which provides a portable interface
-across platforms supported by GCC and compatible compilers:
-.P
-.in +4n
-.EX
-.BI "void __builtin___clear_cache(void *" begin ", void *" end );
-.EE
-.in
-.P
-On platforms that don't require instruction cache flushes,
-.BR __builtin___clear_cache ()
-has no effect.
-.P
-.IR Note :
-On some GCC-compatible compilers,
-the prototype for this built-in function uses
-.I char *
-instead of
-.I void *
-for the parameters.
-.SH STANDARDS
-Historically, this system call was available on all MIPS UNIX variants
-including RISC/os, IRIX, Ultrix, NetBSD, OpenBSD, and FreeBSD
-(and also on some non-UNIX MIPS operating systems), so that
-the existence of this call in MIPS operating systems is a de-facto
-standard.
-.SH BUGS
-Linux kernels older than Linux 2.6.11 ignore the
-.I addr
-and
-.I nbytes
-arguments, making this function fairly expensive.
-Therefore, the whole cache is always flushed.
-.P
-This function always behaves as if
-.B BCACHE
-has been passed for the
-.I cache
-argument and does not do any error checking on the
-.I cache
-argument.
diff --git a/man2/capget.2 b/man2/capget.2
deleted file mode 100644
index feaa36d1d..000000000
--- a/man2/capget.2
+++ /dev/null
@@ -1,260 +0,0 @@
-.\" Copyright: written by Andrew Morgan <morgan@kernel.org>
-.\" and Copyright 2006, 2008, Michael Kerrisk <tmk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: GPL-1.0-or-later
-.\"
-.\" Modified by David A. Wheeler <dwheeler@ida.org>
-.\" Modified 2004-05-27, mtk
-.\" Modified 2004-06-21, aeb
-.\" Modified 2008-04-28, morgan of kernel.org
-.\" Update in line with addition of file capabilities and
-.\" 64-bit capability sets in Linux 2.6.2[45].
-.\" Modified 2009-01-26, andi kleen
-.\"
-.TH capget 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-capget, capset \- set/get capabilities of thread(s)
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#include <linux/capability.h>" " /* Definition of " CAP_* " and"
-.BR " _LINUX_CAPABILITY_*" " constants */"
-.BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */"
-.B #include <unistd.h>
-.P
-.BI "int syscall(SYS_capget, cap_user_header_t " hdrp ,
-.BI " cap_user_data_t " datap );
-.BI "int syscall(SYS_capset, cap_user_header_t " hdrp ,
-.BI " const cap_user_data_t " datap );
-.fi
-.P
-.IR Note :
-glibc provides no wrappers for these system calls,
-necessitating the use of
-.BR syscall (2).
-.SH DESCRIPTION
-These two system calls are the raw kernel interface for getting and
-setting thread capabilities.
-Not only are these system calls specific to Linux,
-but the kernel API is likely to change and use of
-these system calls (in particular the format of the
-.I cap_user_*_t
-types) is subject to extension with each kernel revision,
-but old programs will keep working.
-.P
-The portable interfaces are
-.BR cap_set_proc (3)
-and
-.BR cap_get_proc (3);
-if possible, you should use those interfaces in applications; see NOTES.
-.\"
-.SS Current details
-Now that you have been warned, some current kernel details.
-The structures are defined as follows.
-.P
-.in +4n
-.EX
-#define _LINUX_CAPABILITY_VERSION_1 0x19980330
-#define _LINUX_CAPABILITY_U32S_1 1
-\&
- /* V2 added in Linux 2.6.25; deprecated */
-#define _LINUX_CAPABILITY_VERSION_2 0x20071026
-.\" commit e338d263a76af78fe8f38a72131188b58fceb591
-.\" Added 64 bit capability support
-#define _LINUX_CAPABILITY_U32S_2 2
-\&
- /* V3 added in Linux 2.6.26 */
-#define _LINUX_CAPABILITY_VERSION_3 0x20080522
-.\" commit ca05a99a54db1db5bca72eccb5866d2a86f8517f
-#define _LINUX_CAPABILITY_U32S_3 2
-\&
-typedef struct __user_cap_header_struct {
- __u32 version;
- int pid;
-} *cap_user_header_t;
-\&
-typedef struct __user_cap_data_struct {
- __u32 effective;
- __u32 permitted;
- __u32 inheritable;
-} *cap_user_data_t;
-.EE
-.in
-.P
-The
-.IR effective ,
-.IR permitted ,
-and
-.I inheritable
-fields are bit masks of the capabilities defined in
-.BR capabilities (7).
-Note that the
-.B CAP_*
-values are bit indexes and need to be bit-shifted before ORing into
-the bit fields.
-To define the structures for passing to the system call, you have to use the
-.I struct __user_cap_header_struct
-and
-.I struct __user_cap_data_struct
-names because the typedefs are only pointers.
-.P
-Kernels prior to Linux 2.6.25 prefer
-32-bit capabilities with version
-.BR _LINUX_CAPABILITY_VERSION_1 .
-Linux 2.6.25 added 64-bit capability sets, with version
-.BR _LINUX_CAPABILITY_VERSION_2 .
-There was, however, an API glitch, and Linux 2.6.26 added
-.B _LINUX_CAPABILITY_VERSION_3
-to fix the problem.
-.P
-Note that 64-bit capabilities use
-.I datap[0]
-and
-.IR datap[1] ,
-whereas 32-bit capabilities use only
-.IR datap[0] .
-.P
-On kernels that support file capabilities (VFS capabilities support),
-these system calls behave slightly differently.
-This support was added as an option in Linux 2.6.24,
-and became fixed (nonoptional) in Linux 2.6.33.
-.P
-For
-.BR capget ()
-calls, one can probe the capabilities of any process by specifying its
-process ID with the
-.I hdrp\->pid
-field value.
-.P
-For details on the data, see
-.BR capabilities (7).
-.\"
-.SS With VFS capabilities support
-VFS capabilities employ a file extended attribute (see
-.BR xattr (7))
-to allow capabilities to be attached to executables.
-This privilege model obsoletes kernel support for one process
-asynchronously setting the capabilities of another.
-That is, on kernels that have VFS capabilities support, when calling
-.BR capset (),
-the only permitted values for
-.I hdrp\->pid
-are 0 or, equivalently, the value returned by
-.BR gettid (2).
-.\"
-.SS Without VFS capabilities support
-On older kernels that do not provide VFS capabilities support
-.BR capset ()
-can, if the caller has the
-.B CAP_SETPCAP
-capability, be used to change not only the caller's own capabilities,
-but also the capabilities of other threads.
-The call operates on the capabilities of the thread specified by the
-.I pid
-field of
-.I hdrp
-when that is nonzero, or on the capabilities of the calling thread if
-.I pid
-is 0.
-If
-.I pid
-refers to a single-threaded process, then
-.I pid
-can be specified as a traditional process ID;
-operating on a thread of a multithreaded process requires a thread ID
-of the type returned by
-.BR gettid (2).
-For
-.BR capset (),
-.I pid
-can also be: \-1, meaning perform the change on all threads except the
-caller and
-.BR init (1);
-or a value less than \-1, in which case the change is applied
-to all members of the process group whose ID is \-\fIpid\fP.
-.SH RETURN VALUE
-On success, zero is returned.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.P
-The calls fail with the error
-.BR EINVAL ,
-and set the
-.I version
-field of
-.I hdrp
-to the kernel preferred value of
-.B _LINUX_CAPABILITY_VERSION_?
-when an unsupported
-.I version
-value is specified.
-In this way, one can probe what the current
-preferred capability revision is.
-.SH ERRORS
-.TP
-.B EFAULT
-Bad memory address.
-.I hdrp
-must not be NULL.
-.I datap
-may be NULL only when the user is trying to determine the preferred
-capability version format supported by the kernel.
-.TP
-.B EINVAL
-One of the arguments was invalid.
-.TP
-.B EPERM
-An attempt was made to add a capability to the permitted set, or to set
-a capability in the effective set that is not in the
-permitted set.
-.TP
-.B EPERM
-An attempt was made to add a capability to the inheritable set, and either:
-.RS
-.IP \[bu] 3
-that capability was not in the caller's bounding set; or
-.IP \[bu]
-the capability was not in the caller's permitted set
-and the caller lacked the
-.B CAP_SETPCAP
-capability in its effective set.
-.RE
-.TP
-.B EPERM
-The caller attempted to use
-.BR capset ()
-to modify the capabilities of a thread other than itself,
-but lacked sufficient privilege.
-For kernels supporting VFS
-capabilities, this is never permitted.
-For kernels lacking VFS
-support, the
-.B CAP_SETPCAP
-capability is required.
-(A bug in kernels before Linux 2.6.11 meant that this error could also
-occur if a thread without this capability tried to change its
-own capabilities by specifying the
-.I pid
-field as a nonzero value (i.e., the value returned by
-.BR getpid (2))
-instead of 0.)
-.TP
-.B ESRCH
-No such thread.
-.SH STANDARDS
-Linux.
-.SH NOTES
-The portable interface to the capability querying and setting
-functions is provided by the
-.I libcap
-library and is available here:
-.br
-.UR http://git.kernel.org/cgit\:/linux\:/kernel\:/git\:/morgan\:\:/libcap.git
-.UE
-.SH SEE ALSO
-.BR clone (2),
-.BR gettid (2),
-.BR capabilities (7)
diff --git a/man2/capset.2 b/man2/capset.2
deleted file mode 100644
index 9e829cbd8..000000000
--- a/man2/capset.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/capget.2
diff --git a/man2/chdir.2 b/man2/chdir.2
deleted file mode 100644
index d33efbdfe..000000000
--- a/man2/chdir.2
+++ /dev/null
@@ -1,127 +0,0 @@
-.\" Copyright (c) 1992 Drew Eckhardt (drew@cs.colorado.edu), March 28, 1992
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Modified by Michael Haardt <michael@moria.de>
-.\" Modified 1993-07-21 by Rik Faith <faith@cs.unc.edu>
-.\" Modified 1995-04-15 by Michael Chastain <mec@shell.portal.com>:
-.\" Added 'fchdir'. Fixed bugs in error section.
-.\" Modified 1996-10-21 by Eric S. Raymond <esr@thyrsus.com>
-.\" Modified 1997-08-21 by Joseph S. Myers <jsm28@cam.ac.uk>
-.\" Modified 2004-06-23 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.TH chdir 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-chdir, fchdir \- change working directory
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <unistd.h>
-.P
-.BI "int chdir(const char *" path );
-.BI "int fchdir(int " fd );
-.fi
-.P
-.RS -4
-Feature Test Macro Requirements for glibc (see
-.BR feature_test_macros (7)):
-.RE
-.P
-.BR fchdir ():
-.nf
- _XOPEN_SOURCE >= 500
-.\" || _XOPEN_SOURCE && _XOPEN_SOURCE_EXTENDED
- || /* Since glibc 2.12: */ _POSIX_C_SOURCE >= 200809L
- || /* glibc up to and including 2.19: */ _BSD_SOURCE
-.fi
-.SH DESCRIPTION
-.BR chdir ()
-changes the current working directory of the calling process to the
-directory specified in
-.IR path .
-.P
-.BR fchdir ()
-is identical to
-.BR chdir ();
-the only difference is that the directory is given as an
-open file descriptor.
-.SH RETURN VALUE
-On success, zero is returned.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-Depending on the filesystem, other errors can be returned.
-The more
-general errors for
-.BR chdir ()
-are listed below:
-.TP
-.B EACCES
-Search permission is denied for one of the components of
-.IR path .
-(See also
-.BR path_resolution (7).)
-.TP
-.B EFAULT
-.I path
-points outside your accessible address space.
-.TP
-.B EIO
-An I/O error occurred.
-.TP
-.B ELOOP
-Too many symbolic links were encountered in resolving
-.IR path .
-.TP
-.B ENAMETOOLONG
-.I path
-is too long.
-.TP
-.B ENOENT
-The directory specified in
-.I path
-does not exist.
-.TP
-.B ENOMEM
-Insufficient kernel memory was available.
-.TP
-.B ENOTDIR
-A component of
-.I path
-is not a directory.
-.P
-The general errors for
-.BR fchdir ()
-are listed below:
-.TP
-.B EACCES
-Search permission was denied on the directory open on
-.IR fd .
-.TP
-.B EBADF
-.I fd
-is not a valid file descriptor.
-.TP
-.B ENOTDIR
-.I fd
-does not refer to a directory.
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-POSIX.1-2001, SVr4, 4.4BSD.
-.SH NOTES
-The current working directory is the starting point for interpreting
-relative pathnames (those not starting with \[aq]/\[aq]).
-.P
-A child process created via
-.BR fork (2)
-inherits its parent's current working directory.
-The current working directory is left unchanged by
-.BR execve (2).
-.SH SEE ALSO
-.BR chroot (2),
-.BR getcwd (3),
-.BR path_resolution (7)
diff --git a/man2/chmod.2 b/man2/chmod.2
deleted file mode 100644
index 255f76b1c..000000000
--- a/man2/chmod.2
+++ /dev/null
@@ -1,347 +0,0 @@
-.\" Copyright (c) 1992 Drew Eckhardt (drew@cs.colorado.edu), March 28, 1992
-.\" and Copyright (C) 2006, 2014 Michael Kerrisk
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Modified by Michael Haardt <michael@moria.de>
-.\" Modified 1993-07-21 by Rik Faith <faith@cs.unc.edu>
-.\" Modified 1997-01-12 by Michael Haardt
-.\" <michael@cantor.informatik.rwth-aachen.de>: NFS details
-.\" Modified 2004-06-23 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.TH chmod 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-chmod, fchmod, fchmodat \- change permissions of a file
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/stat.h>
-.P
-.BI "int chmod(const char *" pathname ", mode_t " mode );
-.BI "int fchmod(int " fd ", mode_t " mode );
-.P
-.BR "#include <fcntl.h>" " /* Definition of AT_* constants */"
-.B #include <sys/stat.h>
-.P
-.BI "int fchmodat(int " dirfd ", const char *" pathname ", mode_t " \
-mode ", int " flags );
-.fi
-.P
-.RS -4
-Feature Test Macro Requirements for glibc (see
-.BR feature_test_macros (7)):
-.RE
-.P
-.nf
-.BR fchmod ():
- Since glibc 2.24:
- _POSIX_C_SOURCE >= 199309L
-.\" || (_XOPEN_SOURCE && _XOPEN_SOURCE_EXTENDED)
- glibc 2.19 to glibc 2.23
- _POSIX_C_SOURCE
- glibc 2.16 to glibc 2.19:
- _BSD_SOURCE || _POSIX_C_SOURCE
- glibc 2.12 to glibc 2.16:
- _BSD_SOURCE || _XOPEN_SOURCE >= 500
- || _POSIX_C_SOURCE >= 200809L
- glibc 2.11 and earlier:
- _BSD_SOURCE || _XOPEN_SOURCE >= 500
-.\" || (_XOPEN_SOURCE && _XOPEN_SOURCE_EXTENDED)
-.fi
-.P
-.BR fchmodat ():
-.nf
- Since glibc 2.10:
- _POSIX_C_SOURCE >= 200809L
- Before glibc 2.10:
- _ATFILE_SOURCE
-.fi
-.SH DESCRIPTION
-The
-.BR chmod ()
-and
-.BR fchmod ()
-system calls change a file's mode bits.
-(The file mode consists of the file permission bits plus the set-user-ID,
-set-group-ID, and sticky bits.)
-These system calls differ only in how the file is specified:
-.IP \[bu] 3
-.BR chmod ()
-changes the mode of the file specified whose pathname is given in
-.IR pathname ,
-which is dereferenced if it is a symbolic link.
-.IP \[bu]
-.BR fchmod ()
-changes the mode of the file referred to by the open file descriptor
-.IR fd .
-.P
-The new file mode is specified in
-.IR mode ,
-which is a bit mask created by ORing together zero or
-more of the following:
-.TP 18
-.BR S_ISUID " (04000)"
-set-user-ID (set process effective user ID on
-.BR execve (2))
-.TP
-.BR S_ISGID " (02000)"
-set-group-ID (set process effective group ID on
-.BR execve (2);
-mandatory locking, as described in
-.BR fcntl (2);
-take a new file's group from parent directory, as described in
-.BR chown (2)
-and
-.BR mkdir (2))
-.TP
-.BR S_ISVTX " (01000)"
-sticky bit (restricted deletion flag, as described in
-.BR unlink (2))
-.TP
-.BR S_IRUSR " (00400)"
-read by owner
-.TP
-.BR S_IWUSR " (00200)"
-write by owner
-.TP
-.BR S_IXUSR " (00100)"
-execute/search by owner ("search" applies for directories,
-and means that entries within the directory can be accessed)
-.TP
-.BR S_IRGRP " (00040)"
-read by group
-.TP
-.BR S_IWGRP " (00020)"
-write by group
-.TP
-.BR S_IXGRP " (00010)"
-execute/search by group
-.TP
-.BR S_IROTH " (00004)"
-read by others
-.TP
-.BR S_IWOTH " (00002)"
-write by others
-.TP
-.BR S_IXOTH " (00001)"
-execute/search by others
-.P
-The effective UID of the calling process must match the owner of the file,
-or the process must be privileged (Linux: it must have the
-.B CAP_FOWNER
-capability).
-.P
-If the calling process is not privileged (Linux: does not have the
-.B CAP_FSETID
-capability), and the group of the file does not match
-the effective group ID of the process or one of its
-supplementary group IDs, the
-.B S_ISGID
-bit will be turned off,
-but this will not cause an error to be returned.
-.P
-As a security measure, depending on the filesystem,
-the set-user-ID and set-group-ID execution bits
-may be turned off if a file is written.
-(On Linux, this occurs if the writing process does not have the
-.B CAP_FSETID
-capability.)
-On some filesystems, only the superuser can set the sticky bit,
-which may have a special meaning.
-For the sticky bit, and for set-user-ID and set-group-ID bits on
-directories, see
-.BR inode (7).
-.P
-On NFS filesystems, restricting the permissions will immediately influence
-already open files, because the access control is done on the server, but
-open files are maintained by the client.
-Widening the permissions may be
-delayed for other clients if attribute caching is enabled on them.
-.\"
-.\"
-.SS fchmodat()
-The
-.BR fchmodat ()
-system call operates in exactly the same way as
-.BR chmod (),
-except for the differences described here.
-.P
-If the pathname given in
-.I pathname
-is relative, then it is interpreted relative to the directory
-referred to by the file descriptor
-.I dirfd
-(rather than relative to the current working directory of
-the calling process, as is done by
-.BR chmod ()
-for a relative pathname).
-.P
-If
-.I pathname
-is relative and
-.I dirfd
-is the special value
-.BR AT_FDCWD ,
-then
-.I pathname
-is interpreted relative to the current working
-directory of the calling process (like
-.BR chmod ()).
-.P
-If
-.I pathname
-is absolute, then
-.I dirfd
-is ignored.
-.P
-.I flags
-can either be 0, or include the following flag:
-.TP
-.B AT_SYMLINK_NOFOLLOW
-If
-.I pathname
-is a symbolic link, do not dereference it:
-instead operate on the link itself.
-This flag is not currently implemented.
-.P
-See
-.BR openat (2)
-for an explanation of the need for
-.BR fchmodat ().
-.SH RETURN VALUE
-On success, zero is returned.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-Depending on the filesystem,
-errors other than those listed below can be returned.
-.P
-The more general errors for
-.BR chmod ()
-are listed below:
-.TP
-.B EACCES
-Search permission is denied on a component of the path prefix.
-(See also
-.BR path_resolution (7).)
-.TP
-.B EBADF
-.RB ( fchmod ())
-The file descriptor
-.I fd
-is not valid.
-.TP
-.B EBADF
-.RB ( fchmodat ())
-.I pathname
-is relative but
-.I dirfd
-is neither
-.B AT_FDCWD
-nor a valid file descriptor.
-.TP
-.B EFAULT
-.I pathname
-points outside your accessible address space.
-.TP
-.B EINVAL
-.RB ( fchmodat ())
-Invalid flag specified in
-.IR flags .
-.TP
-.B EIO
-An I/O error occurred.
-.TP
-.B ELOOP
-Too many symbolic links were encountered in resolving
-.IR pathname .
-.TP
-.B ENAMETOOLONG
-.I pathname
-is too long.
-.TP
-.B ENOENT
-The file does not exist.
-.TP
-.B ENOMEM
-Insufficient kernel memory was available.
-.TP
-.B ENOTDIR
-A component of the path prefix is not a directory.
-.TP
-.B ENOTDIR
-.RB ( fchmodat ())
-.I pathname
-is relative and
-.I dirfd
-is a file descriptor referring to a file other than a directory.
-.TP
-.B ENOTSUP
-.RB ( fchmodat ())
-.I flags
-specified
-.BR AT_SYMLINK_NOFOLLOW ,
-which is not supported.
-.TP
-.B EPERM
-The effective UID does not match the owner of the file,
-and the process is not privileged (Linux: it does not have the
-.B CAP_FOWNER
-capability).
-.TP
-.B EPERM
-The file is marked immutable or append-only.
-(See
-.BR ioctl_iflags (2).)
-.TP
-.B EROFS
-The named file resides on a read-only filesystem.
-.SH VERSIONS
-.SS C library/kernel differences
-The GNU C library
-.BR fchmodat ()
-wrapper function implements the POSIX-specified
-interface described in this page.
-This interface differs from the underlying Linux system call, which does
-.I not
-have a
-.I flags
-argument.
-.SS glibc notes
-On older kernels where
-.BR fchmodat ()
-is unavailable, the glibc wrapper function falls back to the use of
-.BR chmod ().
-When
-.I pathname
-is a relative pathname,
-glibc constructs a pathname based on the symbolic link in
-.I /proc/self/fd
-that corresponds to the
-.I dirfd
-argument.
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-.TP
-.BR chmod ()
-.TQ
-.BR fchmod ()
-4.4BSD, SVr4, POSIX.1-2001.
-.TP
-.BR fchmodat ()
-POSIX.1-2008.
-Linux 2.6.16,
-glibc 2.4.
-.SH SEE ALSO
-.BR chmod (1),
-.BR chown (2),
-.BR execve (2),
-.BR open (2),
-.BR stat (2),
-.BR inode (7),
-.BR path_resolution (7),
-.BR symlink (7)
diff --git a/man2/chown.2 b/man2/chown.2
deleted file mode 100644
index ffba3acaf..000000000
--- a/man2/chown.2
+++ /dev/null
@@ -1,471 +0,0 @@
-.\" Copyright (c) 1992 Drew Eckhardt (drew@cs.colorado.edu), March 28, 1992
-.\" and Copyright (c) 1998 Andries Brouwer (aeb@cwi.nl)
-.\" and Copyright (c) 2006, 2007, 2008, 2014 Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Modified by Michael Haardt <michael@moria.de>
-.\" Modified 1993-07-21 by Rik Faith <faith@cs.unc.edu>
-.\" Modified 1996-07-09 by Andries Brouwer <aeb@cwi.nl>
-.\" Modified 1996-11-06 by Eric S. Raymond <esr@thyrsus.com>
-.\" Modified 1997-05-18 by Michael Haardt <michael@cantor.informatik.rwth-aachen.de>
-.\" Modified 2004-06-23 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\" 2007-07-08, mtk, added an example program; updated SYNOPSIS
-.\" 2008-05-08, mtk, Describe rules governing ownership of new files
-.\" (bsdgroups versus sysvgroups, and the effect of the parent
-.\" directory's set-group-ID mode bit).
-.\"
-.TH chown 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-chown, fchown, lchown, fchownat \- change ownership of a file
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <unistd.h>
-.P
-.BI "int chown(const char *" pathname ", uid_t " owner ", gid_t " group );
-.BI "int fchown(int " fd ", uid_t " owner ", gid_t " group );
-.BI "int lchown(const char *" pathname ", uid_t " owner ", gid_t " group );
-.P
-.BR "#include <fcntl.h> " "/* Definition of AT_* constants */"
-.B #include <unistd.h>
-.P
-.BI "int fchownat(int " dirfd ", const char *" pathname ,
-.BI " uid_t " owner ", gid_t " group ", int " flags );
-.fi
-.P
-.RS -4
-Feature Test Macro Requirements for glibc (see
-.BR feature_test_macros (7)):
-.RE
-.P
-.BR fchown (),
-.BR lchown ():
-.nf
- /* Since glibc 2.12: */ _POSIX_C_SOURCE >= 200809L
- || _XOPEN_SOURCE >= 500
-.\" || _XOPEN_SOURCE && _XOPEN_SOURCE_EXTENDED
- || /* glibc <= 2.19: */ _BSD_SOURCE
-.fi
-.P
-.BR fchownat ():
-.nf
- Since glibc 2.10:
- _POSIX_C_SOURCE >= 200809L
- Before glibc 2.10:
- _ATFILE_SOURCE
-.fi
-.SH DESCRIPTION
-These system calls change the owner and group of a file.
-The
-.BR chown (),
-.BR fchown (),
-and
-.BR lchown ()
-system calls differ only in how the file is specified:
-.IP \[bu] 3
-.BR chown ()
-changes the ownership of the file specified by
-.IR pathname ,
-which is dereferenced if it is a symbolic link.
-.IP \[bu]
-.BR fchown ()
-changes the ownership of the file referred to by the open file descriptor
-.IR fd .
-.IP \[bu]
-.BR lchown ()
-is like
-.BR chown (),
-but does not dereference symbolic links.
-.P
-Only a privileged process (Linux: one with the
-.B CAP_CHOWN
-capability) may change the owner of a file.
-The owner of a file may change the group of the file
-to any group of which that owner is a member.
-A privileged process (Linux: with
-.BR CAP_CHOWN )
-may change the group arbitrarily.
-.P
-If the
-.I owner
-or
-.I group
-is specified as \-1, then that ID is not changed.
-.P
-When the owner or group of an executable file is
-changed by an unprivileged user, the
-.B S_ISUID
-and
-.B S_ISGID
-mode bits are cleared.
-POSIX does not specify whether
-this also should happen when root does the
-.BR chown ();
-the Linux behavior depends on the kernel version,
-and since Linux 2.2.13, root is treated like other users.
-.\" In Linux 2.0 kernels, superuser was like everyone else
-.\" In Linux 2.2, up to Linux 2.2.12, these bits were not cleared for superuser.
-.\" Since Linux 2.2.13, superuser is once more like everyone else.
-In case of a non-group-executable file (i.e., one for which the
-.B S_IXGRP
-bit is not set) the
-.B S_ISGID
-bit indicates mandatory locking, and is not cleared by a
-.BR chown ().
-.P
-When the owner or group of an executable file is changed (by any user),
-all capability sets for the file are cleared.
-.\"
-.SS fchownat()
-The
-.BR fchownat ()
-system call operates in exactly the same way as
-.BR chown (),
-except for the differences described here.
-.P
-If the pathname given in
-.I pathname
-is relative, then it is interpreted relative to the directory
-referred to by the file descriptor
-.I dirfd
-(rather than relative to the current working directory of
-the calling process, as is done by
-.BR chown ()
-for a relative pathname).
-.P
-If
-.I pathname
-is relative and
-.I dirfd
-is the special value
-.BR AT_FDCWD ,
-then
-.I pathname
-is interpreted relative to the current working
-directory of the calling process (like
-.BR chown ()).
-.P
-If
-.I pathname
-is absolute, then
-.I dirfd
-is ignored.
-.P
-The
-.I flags
-argument is a bit mask created by ORing together
-0 or more of the following values;
-.TP
-.BR AT_EMPTY_PATH " (since Linux 2.6.39)"
-.\" commit 65cfc6722361570bfe255698d9cd4dccaf47570d
-If
-.I pathname
-is an empty string, operate on the file referred to by
-.I dirfd
-(which may have been obtained using the
-.BR open (2)
-.B O_PATH
-flag).
-In this case,
-.I dirfd
-can refer to any type of file, not just a directory.
-If
-.I dirfd
-is
-.BR AT_FDCWD ,
-the call operates on the current working directory.
-This flag is Linux-specific; define
-.B _GNU_SOURCE
-.\" Before glibc 2.16, defining _ATFILE_SOURCE sufficed
-to obtain its definition.
-.TP
-.B AT_SYMLINK_NOFOLLOW
-If
-.I pathname
-is a symbolic link, do not dereference it:
-instead operate on the link itself, like
-.BR lchown ().
-(By default,
-.BR fchownat ()
-dereferences symbolic links, like
-.BR chown ().)
-.P
-See
-.BR openat (2)
-for an explanation of the need for
-.BR fchownat ().
-.SH RETURN VALUE
-On success, zero is returned.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-Depending on the filesystem,
-errors other than those listed below can be returned.
-.P
-The more general errors for
-.BR chown ()
-are listed below.
-.TP
-.B EACCES
-Search permission is denied on a component of the path prefix.
-(See also
-.BR path_resolution (7).)
-.TP
-.B EBADF
-.RB ( fchown ())
-.I fd
-is not a valid open file descriptor.
-.TP
-.B EBADF
-.RB ( fchownat ())
-.I pathname
-is relative but
-.I dirfd
-is neither
-.B AT_FDCWD
-nor a valid file descriptor.
-.TP
-.B EFAULT
-.I pathname
-points outside your accessible address space.
-.TP
-.B EINVAL
-.RB ( fchownat ())
-Invalid flag specified in
-.IR flags .
-.TP
-.B EIO
-.RB ( fchown ())
-A low-level I/O error occurred while modifying the inode.
-.TP
-.B ELOOP
-Too many symbolic links were encountered in resolving
-.IR pathname .
-.TP
-.B ENAMETOOLONG
-.I pathname
-is too long.
-.TP
-.B ENOENT
-The file does not exist.
-.TP
-.B ENOMEM
-Insufficient kernel memory was available.
-.TP
-.B ENOTDIR
-A component of the path prefix is not a directory.
-.TP
-.B ENOTDIR
-.RB ( fchownat ())
-.I pathname
-is relative and
-.I dirfd
-is a file descriptor referring to a file other than a directory.
-.TP
-.B EPERM
-The calling process did not have the required permissions
-(see above) to change owner and/or group.
-.TP
-.B EPERM
-The file is marked immutable or append-only.
-(See
-.BR ioctl_iflags (2).)
-.TP
-.B EROFS
-The named file resides on a read-only filesystem.
-.SH VERSIONS
-The 4.4BSD version can be
-used only by the superuser (that is, ordinary users cannot give away files).
-.\" chown():
-.\" SVr4 documents EINVAL, EINTR, ENOLINK and EMULTIHOP returns, but no
-.\" ENOMEM. POSIX.1 does not document ENOMEM or ELOOP error conditions.
-.\" fchown():
-.\" SVr4 documents additional EINVAL, EIO, EINTR, and ENOLINK
-.\" error conditions.
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-.TP
-.BR chown ()
-.TQ
-.BR fchown ()
-.TQ
-.BR lchown ()
-4.4BSD, SVr4, POSIX.1-2001.
-.TP
-.BR fchownat ()
-POSIX.1-2008.
-Linux 2.6.16,
-glibc 2.4.
-.SH NOTES
-.SS Ownership of new files
-When a new file is created (by, for example,
-.BR open (2)
-or
-.BR mkdir (2)),
-its owner is made the same as the filesystem user ID of the
-creating process.
-The group of the file depends on a range of factors,
-including the type of filesystem,
-the options used to mount the filesystem,
-and whether or not the set-group-ID mode bit is enabled
-on the parent directory.
-If the filesystem supports the
-.B "\-o\ grpid"
-(or, synonymously
-.BR "\-o\ bsdgroups" )
-and
-.B "\-o\ nogrpid"
-(or, synonymously
-.BR "\-o\ sysvgroups" )
-.BR mount (8)
-options, then the rules are as follows:
-.IP \[bu] 3
-If the filesystem is mounted with
-.BR "\-o\ grpid" ,
-then the group of a new file is made
-the same as that of the parent directory.
-.IP \[bu]
-If the filesystem is mounted with
-.B \-o\ nogrpid
-and the set-group-ID bit is disabled on the parent directory,
-then the group of a new file is made the same as the
-process's filesystem GID.
-.IP \[bu]
-If the filesystem is mounted with
-.B \-o\ nogrpid
-and the set-group-ID bit is enabled on the parent directory,
-then the group of a new file is made
-the same as that of the parent directory.
-.P
-As at Linux 4.12,
-the
-.B \-o\ grpid
-and
-.B \-o\ nogrpid
-mount options are supported by ext2, ext3, ext4, and XFS.
-Filesystems that don't support these mount options follow the
-.B \-o\ nogrpid
-rules.
-.SS glibc notes
-On older kernels where
-.BR fchownat ()
-is unavailable, the glibc wrapper function falls back to the use of
-.BR chown ()
-and
-.BR lchown ().
-When
-.I pathname
-is a relative pathname,
-glibc constructs a pathname based on the symbolic link in
-.I /proc/self/fd
-that corresponds to the
-.I dirfd
-argument.
-.SS NFS
-The
-.BR chown ()
-semantics are deliberately violated on NFS filesystems
-which have UID mapping enabled.
-Additionally, the semantics of all system
-calls which access the file contents are violated, because
-.BR chown ()
-may cause immediate access revocation on already open files.
-Client side
-caching may lead to a delay between the time where ownership have
-been changed to allow access for a user and the time where the file can
-actually be accessed by the user on other clients.
-.SS Historical details
-The original Linux
-.BR chown (),
-.BR fchown (),
-and
-.BR lchown ()
-system calls supported only 16-bit user and group IDs.
-Subsequently, Linux 2.4 added
-.BR chown32 (),
-.BR fchown32 (),
-and
-.BR lchown32 (),
-supporting 32-bit IDs.
-The glibc
-.BR chown (),
-.BR fchown (),
-and
-.BR lchown ()
-wrapper functions transparently deal with the variations across kernel versions.
-.P
-Before Linux 2.1.81 (except 2.1.46),
-.BR chown ()
-did not follow symbolic links.
-Since Linux 2.1.81,
-.BR chown ()
-does follow symbolic links, and there is a new system call
-.BR lchown ()
-that does not follow symbolic links.
-Since Linux 2.1.86, this new call (that has the same semantics
-as the old
-.BR chown ())
-has got the same syscall number, and
-.BR chown ()
-got the newly introduced number.
-.SH EXAMPLES
-The following program changes the ownership of the file named in
-its second command-line argument to the value specified in its
-first command-line argument.
-The new owner can be specified either as a numeric user ID,
-or as a username (which is converted to a user ID by using
-.BR getpwnam (3)
-to perform a lookup in the system password file).
-.SS Program source
-.\" SRC BEGIN (chown.c)
-.EX
-#include <pwd.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-\&
-int
-main(int argc, char *argv[])
-{
- char *endptr;
- uid_t uid;
- struct passwd *pwd;
-\&
- if (argc != 3 || argv[1][0] == \[aq]\e0\[aq]) {
- fprintf(stderr, "%s <owner> <file>\en", argv[0]);
- exit(EXIT_FAILURE);
- }
-\&
- uid = strtol(argv[1], &endptr, 10); /* Allow a numeric string */
-\&
- if (*endptr != \[aq]\e0\[aq]) { /* Was not pure numeric string */
- pwd = getpwnam(argv[1]); /* Try getting UID for username */
- if (pwd == NULL) {
- perror("getpwnam");
- exit(EXIT_FAILURE);
- }
-\&
- uid = pwd\->pw_uid;
- }
-\&
- if (chown(argv[2], uid, \-1) == \-1) {
- perror("chown");
- exit(EXIT_FAILURE);
- }
-\&
- exit(EXIT_SUCCESS);
-}
-.EE
-.\" SRC END
-.SH SEE ALSO
-.BR chgrp (1),
-.BR chown (1),
-.BR chmod (2),
-.BR flock (2),
-.BR path_resolution (7),
-.BR symlink (7)
diff --git a/man2/chown32.2 b/man2/chown32.2
deleted file mode 100644
index f0a5635ae..000000000
--- a/man2/chown32.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/chown.2
diff --git a/man2/chroot.2 b/man2/chroot.2
deleted file mode 100644
index 72a75fb40..000000000
--- a/man2/chroot.2
+++ /dev/null
@@ -1,166 +0,0 @@
-.\" Copyright (c) 1992 Drew Eckhardt (drew@cs.colorado.edu), March 28, 1992
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Modified by Michael Haardt <michael@moria.de>
-.\" Modified 1993-07-21 by Rik Faith <faith@cs.unc.edu>
-.\" Modified 1994-08-21 by Michael Chastain <mec@shell.portal.com>
-.\" Modified 1996-06-13 by aeb
-.\" Modified 1996-11-06 by Eric S. Raymond <esr@thyrsus.com>
-.\" Modified 1997-08-21 by Joseph S. Myers <jsm28@cam.ac.uk>
-.\" Modified 2004-06-23 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.TH chroot 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-chroot \- change root directory
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <unistd.h>
-.P
-.BI "int chroot(const char *" path );
-.fi
-.P
-.RS -4
-Feature Test Macro Requirements for glibc (see
-.BR feature_test_macros (7)):
-.RE
-.P
-.BR chroot ():
-.nf
- Since glibc 2.2.2:
- _XOPEN_SOURCE && ! (_POSIX_C_SOURCE >= 200112L)
- || /* Since glibc 2.20: */ _DEFAULT_SOURCE
- || /* glibc <= 2.19: */ _BSD_SOURCE
- Before glibc 2.2.2:
- none
-.fi
-.SH DESCRIPTION
-.BR chroot ()
-changes the root directory of the calling process to that specified in
-.IR path .
-This directory will be used for pathnames beginning with \fI/\fP.
-The root directory is inherited by all children of the calling process.
-.P
-Only a privileged process (Linux: one with the
-.B CAP_SYS_CHROOT
-capability in its user namespace) may call
-.BR chroot ().
-.P
-This call changes an ingredient in the pathname resolution process
-and does nothing else.
-In particular, it is not intended to be used
-for any kind of security purpose, neither to fully sandbox a process nor
-to restrict filesystem system calls.
-In the past,
-.BR chroot ()
-has been used by daemons to restrict themselves prior to passing paths
-supplied by untrusted users to system calls such as
-.BR open (2).
-However, if a folder is moved out of the chroot directory, an attacker
-can exploit that to get out of the chroot directory as well.
-The easiest way to do that is to
-.BR chdir (2)
-to the to-be-moved directory, wait for it to be moved out, then open a
-path like ../../../etc/passwd.
-.P
-.\" This is how the "slightly trickier variation" works:
-.\" https://github.com/QubesOS/qubes-secpack/blob/master/QSBs/qsb-014-2015.txt#L142
-A slightly
-trickier variation also works under some circumstances if
-.BR chdir (2)
-is not permitted.
-If a daemon allows a "chroot directory" to be specified,
-that usually means that if you want to prevent remote users from accessing
-files outside the chroot directory, you must ensure that folders are never
-moved out of it.
-.P
-This call does not change the current working directory,
-so that after the call \[aq]\fI.\fP\[aq] can
-be outside the tree rooted at \[aq]\fI/\fP\[aq].
-In particular, the superuser can escape from a "chroot jail"
-by doing:
-.P
-.in +4n
-.EX
-mkdir foo; chroot foo; cd ..
-.EE
-.in
-.P
-This call does not close open file descriptors, and such file
-descriptors may allow access to files outside the chroot tree.
-.SH RETURN VALUE
-On success, zero is returned.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-Depending on the filesystem, other errors can be returned.
-The more general errors are listed below:
-.TP
-.B EACCES
-Search permission is denied on a component of the path prefix.
-(See also
-.BR path_resolution (7).)
-.\" Also search permission is required on the final component,
-.\" maybe just to guarantee that it is a directory?
-.TP
-.B EFAULT
-.I path
-points outside your accessible address space.
-.TP
-.B EIO
-An I/O error occurred.
-.TP
-.B ELOOP
-Too many symbolic links were encountered in resolving
-.IR path .
-.TP
-.B ENAMETOOLONG
-.I path
-is too long.
-.TP
-.B ENOENT
-The file does not exist.
-.TP
-.B ENOMEM
-Insufficient kernel memory was available.
-.TP
-.B ENOTDIR
-A component of
-.I path
-is not a directory.
-.TP
-.B EPERM
-The caller has insufficient privilege.
-.SH STANDARDS
-None.
-.SH HISTORY
-SVr4, 4.4BSD, SUSv2 (marked LEGACY).
-This function is not part of POSIX.1-2001.
-.\" SVr4 documents additional EINTR, ENOLINK and EMULTIHOP error conditions.
-.\" X/OPEN does not document EIO, ENOMEM or EFAULT error conditions.
-.SH NOTES
-A child process created via
-.BR fork (2)
-inherits its parent's root directory.
-The root directory is left unchanged by
-.BR execve (2).
-.P
-The magic symbolic link,
-.IR /proc/ pid /root ,
-can be used to discover a process's root directory; see
-.BR proc (5)
-for details.
-.P
-FreeBSD has a stronger
-.BR jail ()
-system call.
-.SH SEE ALSO
-.BR chroot (1),
-.BR chdir (2),
-.BR pivot_root (2),
-.BR path_resolution (7),
-.BR switch_root (8)
diff --git a/man2/clock_adjtime.2 b/man2/clock_adjtime.2
deleted file mode 100644
index b08b9c801..000000000
--- a/man2/clock_adjtime.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/adjtimex.2
diff --git a/man2/clock_getres.2 b/man2/clock_getres.2
deleted file mode 100644
index 9c98d12e9..000000000
--- a/man2/clock_getres.2
+++ /dev/null
@@ -1,539 +0,0 @@
-'\" t
-.\" Copyright (c) 2003 Nick Clifford (zaf@nrc.co.nz), Jan 25, 2003
-.\" Copyright (c) 2003 Andries Brouwer (aeb@cwi.nl), Aug 24, 2003
-.\" Copyright (c) 2020 Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" 2003-08-23 Martin Schulze <joey@infodrom.org> improvements
-.\" 2003-08-24 aeb, large parts rewritten
-.\" 2004-08-06 Christoph Lameter <clameter@sgi.com>, SMP note
-.\"
-.TH clock_getres 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-clock_getres, clock_gettime, clock_settime \- clock and time functions
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc ),
-since glibc 2.17
-.P
-Before glibc 2.17,
-Real-time library
-.RI ( librt ", " \-lrt )
-.SH SYNOPSIS
-.nf
-.B #include <time.h>
-.P
-.BI "int clock_getres(clockid_t " clockid ", struct timespec *_Nullable " res );
-.P
-.BI "int clock_gettime(clockid_t " clockid ", struct timespec *" tp );
-.BI "int clock_settime(clockid_t " clockid ", const struct timespec *" tp );
-.fi
-.P
-.RS -4
-Feature Test Macro Requirements for glibc (see
-.BR feature_test_macros (7)):
-.RE
-.P
-.BR clock_getres (),
-.BR clock_gettime (),
-.BR clock_settime ():
-.nf
- _POSIX_C_SOURCE >= 199309L
-.fi
-.SH DESCRIPTION
-The function
-.BR clock_getres ()
-finds the resolution (precision) of the specified clock
-.IR clockid ,
-and, if
-.I res
-is non-NULL, stores it in the \fIstruct timespec\fP pointed to by
-.IR res .
-The resolution of clocks depends on the implementation and cannot be
-configured by a particular process.
-If the time value pointed to by the argument
-.I tp
-of
-.BR clock_settime ()
-is not a multiple of
-.IR res ,
-then it is truncated to a multiple of
-.IR res .
-.P
-The functions
-.BR clock_gettime ()
-and
-.BR clock_settime ()
-retrieve and set the time of the specified clock
-.IR clockid .
-.P
-The
-.I res
-and
-.I tp
-arguments are
-.BR timespec (3)
-structures.
-.P
-The
-.I clockid
-argument is the identifier of the particular clock on which to act.
-A clock may be system-wide and hence visible for all processes, or
-per-process if it measures time only within a single process.
-.P
-All implementations support the system-wide real-time clock,
-which is identified by
-.BR CLOCK_REALTIME .
-Its time represents seconds and nanoseconds since the Epoch.
-When its time is changed, timers for a relative interval are
-unaffected, but timers for an absolute point in time are affected.
-.P
-More clocks may be implemented.
-The interpretation of the
-corresponding time values and the effect on timers is unspecified.
-.P
-Sufficiently recent versions of glibc and the Linux kernel
-support the following clocks:
-.TP
-.B CLOCK_REALTIME
-A settable system-wide clock that measures real (i.e., wall-clock) time.
-Setting this clock requires appropriate privileges.
-This clock is affected by discontinuous jumps in the system time
-(e.g., if the system administrator manually changes the clock),
-and by frequency adjustments performed by NTP and similar applications via
-.BR adjtime (3),
-.BR adjtimex (2),
-.BR clock_adjtime (2),
-and
-.BR ntp_adjtime (3).
-This clock normally counts the number of seconds since
-1970-01-01 00:00:00 Coordinated Universal Time (UTC)
-except that it ignores leap seconds;
-near a leap second it is typically adjusted by NTP
-to stay roughly in sync with UTC.
-.TP
-.BR CLOCK_REALTIME_ALARM " (since Linux 3.0; Linux-specific)"
-Like
-.BR CLOCK_REALTIME ,
-but not settable.
-See
-.BR timer_create (2)
-for further details.
-.TP
-.BR CLOCK_REALTIME_COARSE " (since Linux 2.6.32; Linux-specific)"
-.\" Added in commit da15cfdae03351c689736f8d142618592e3cebc3
-A faster but less precise version of
-.BR CLOCK_REALTIME .
-This clock is not settable.
-Use when you need very fast, but not fine-grained timestamps.
-Requires per-architecture support,
-and probably also architecture support for this flag in the
-.BR vdso (7).
-.TP
-.BR CLOCK_TAI " (since Linux 3.10; Linux-specific)"
-.\" commit 1ff3c9677bff7e468e0c487d0ffefe4e901d33f4
-A nonsettable system-wide clock derived from wall-clock time
-but counting leap seconds.
-This clock does
-not experience discontinuities or frequency adjustments caused by
-inserting leap seconds as
-.B CLOCK_REALTIME
-does.
-.IP
-The acronym TAI refers to International Atomic Time.
-.TP
-.B CLOCK_MONOTONIC
-A nonsettable system-wide clock that
-represents monotonic time since\[em]as described
-by POSIX\[em]"some unspecified point in the past".
-On Linux, that point corresponds to the number of seconds that the system
-has been running since it was booted.
-.IP
-The
-.B CLOCK_MONOTONIC
-clock is not affected by discontinuous jumps in the system time
-(e.g., if the system administrator manually changes the clock),
-but is affected by frequency adjustments.
-This clock does not count time that the system is suspended.
-All
-.B CLOCK_MONOTONIC
-variants guarantee that the time returned by consecutive calls will not go
-backwards, but successive calls may\[em]depending on the architecture\[em]return
-identical (not-increased) time values.
-.TP
-.BR CLOCK_MONOTONIC_COARSE " (since Linux 2.6.32; Linux-specific)"
-.\" Added in commit da15cfdae03351c689736f8d142618592e3cebc3
-A faster but less precise version of
-.BR CLOCK_MONOTONIC .
-Use when you need very fast, but not fine-grained timestamps.
-Requires per-architecture support,
-and probably also architecture support for this flag in the
-.BR vdso (7).
-.TP
-.BR CLOCK_MONOTONIC_RAW " (since Linux 2.6.28; Linux-specific)"
-.\" Added in commit 2d42244ae71d6c7b0884b5664cf2eda30fb2ae68, John Stultz
-Similar to
-.BR CLOCK_MONOTONIC ,
-but provides access to a raw hardware-based time
-that is not subject to frequency adjustments.
-This clock does not count time that the system is suspended.
-.TP
-.BR CLOCK_BOOTTIME " (since Linux 2.6.39; Linux-specific)"
-.\" commit 7fdd7f89006dd5a4c702fa0ce0c272345fa44ae0
-.\" commit 70a08cca1227dc31c784ec930099a4417a06e7d0
-A nonsettable system-wide clock that is identical to
-.BR CLOCK_MONOTONIC ,
-except that it also includes any time that the system is suspended.
-This allows applications to get a suspend-aware monotonic clock
-without having to deal with the complications of
-.BR CLOCK_REALTIME ,
-which may have discontinuities if the time is changed using
-.BR settimeofday (2)
-or similar.
-.TP
-.BR CLOCK_BOOTTIME_ALARM " (since Linux 3.0; Linux-specific)"
-Like
-.BR CLOCK_BOOTTIME .
-See
-.BR timer_create (2)
-for further details.
-.TP
-.BR CLOCK_PROCESS_CPUTIME_ID " (since Linux 2.6.12)"
-This is a clock that measures CPU time consumed by this process
-(i.e., CPU time consumed by all threads in the process).
-On Linux, this clock is not settable.
-.TP
-.BR CLOCK_THREAD_CPUTIME_ID " (since Linux 2.6.12)"
-This is a clock that measures CPU time consumed by this thread.
-On Linux, this clock is not settable.
-.P
-Linux also implements dynamic clock instances as described below.
-.SS Dynamic clocks
-In addition to the hard-coded System-V style clock IDs described above,
-Linux also supports
-POSIX clock operations on certain character devices.
-Such devices are
-called "dynamic" clocks, and are supported since Linux 2.6.39.
-.P
-Using the appropriate macros, open file
-descriptors may be converted into clock IDs and passed to
-.BR clock_gettime (),
-.BR clock_settime (),
-and
-.BR clock_adjtime (2).
-The following example shows how to convert a file descriptor into a
-dynamic clock ID.
-.P
-.in +4n
-.EX
-#define CLOCKFD 3
-#define FD_TO_CLOCKID(fd) ((\[ti](clockid_t) (fd) << 3) | CLOCKFD)
-#define CLOCKID_TO_FD(clk) ((unsigned int) \[ti]((clk) >> 3))
-\&
-struct timespec ts;
-clockid_t clkid;
-int fd;
-\&
-fd = open("/dev/ptp0", O_RDWR);
-clkid = FD_TO_CLOCKID(fd);
-clock_gettime(clkid, &ts);
-.EE
-.in
-.SH RETURN VALUE
-.BR clock_gettime (),
-.BR clock_settime (),
-and
-.BR clock_getres ()
-return 0 for success.
-On error, \-1 is returned and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EACCES
-.BR clock_settime ()
-does not have write permission for the dynamic POSIX
-clock device indicated.
-.TP
-.B EFAULT
-.I tp
-points outside the accessible address space.
-.TP
-.B EINVAL
-The
-.I clockid
-specified is invalid for one of two reasons.
-Either the System-V style
-hard coded positive value is out of range, or the dynamic clock ID
-does not refer to a valid instance of a clock object.
-.\" Linux also gives this error on attempts to set CLOCK_PROCESS_CPUTIME_ID
-.\" and CLOCK_THREAD_CPUTIME_ID, when probably the proper error should be
-.\" EPERM.
-.TP
-.B EINVAL
-.RB ( clock_settime ()):
-.I tp.tv_sec
-is negative or
-.I tp.tv_nsec
-is outside the range [0, 999,999,999].
-.TP
-.B EINVAL
-The
-.I clockid
-specified in a call to
-.BR clock_settime ()
-is not a settable clock.
-.TP
-.BR EINVAL " (since Linux 4.3)"
-.\" commit e1d7ba8735551ed79c7a0463a042353574b96da3
-A call to
-.BR clock_settime ()
-with a
-.I clockid
-of
-.B CLOCK_REALTIME
-attempted to set the time to a value less than
-the current value of the
-.B CLOCK_MONOTONIC
-clock.
-.TP
-.B ENODEV
-The hot-pluggable device (like USB for example) represented by a
-dynamic
-.I clk_id
-has disappeared after its character device was opened.
-.TP
-.B ENOTSUP
-The operation is not supported by the dynamic POSIX clock device
-specified.
-.TP
-.B EOVERFLOW
-The timestamp would not fit in
-.I time_t
-range.
-This can happen if an executable with 32-bit
-.I time_t
-is run on a 64-bit kernel when the time is 2038-01-19 03:14:08 UTC or later.
-However, when the system time is out of
-.I time_t
-range in other situations, the behavior is undefined.
-.TP
-.B EPERM
-.BR clock_settime ()
-does not have permission to set the clock indicated.
-.SH ATTRIBUTES
-For an explanation of the terms used in this section, see
-.BR attributes (7).
-.TS
-allbox;
-lbx lb lb
-l l l.
-Interface Attribute Value
-T{
-.na
-.nh
-.BR clock_getres (),
-.BR clock_gettime (),
-.BR clock_settime ()
-T} Thread safety MT-Safe
-.TE
-.SH VERSIONS
-POSIX.1 specifies the following:
-.RS
-.P
-Setting the value of the
-.B CLOCK_REALTIME
-clock via
-.BR clock_settime ()
-shall have no effect on threads that are blocked waiting for a relative time
-service based upon this clock, including the
-.BR nanosleep ()
-function; nor on the expiration of relative timers based upon this clock.
-Consequently, these time services shall expire when the requested relative
-interval elapses, independently of the new or old value of the clock.
-.RE
-.P
-According to POSIX.1-2001, a process with "appropriate privileges" may set the
-.B CLOCK_PROCESS_CPUTIME_ID
-and
-.B CLOCK_THREAD_CPUTIME_ID
-clocks using
-.BR clock_settime ().
-On Linux, these clocks are not settable
-(i.e., no process has "appropriate privileges").
-.\" See http://bugzilla.kernel.org/show_bug.cgi?id=11972
-.SS C library/kernel differences
-On some architectures, an implementation of
-.BR clock_gettime ()
-is provided in the
-.BR vdso (7).
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-POSIX.1-2001, SUSv2.
-Linux 2.6.
-.P
-On POSIX systems on which these functions are available, the symbol
-.B _POSIX_TIMERS
-is defined in \fI<unistd.h>\fP to a value greater than 0.
-POSIX.1-2008 makes these functions mandatory.
-.P
-The symbols
-.BR _POSIX_MONOTONIC_CLOCK ,
-.BR _POSIX_CPUTIME ,
-.B _POSIX_THREAD_CPUTIME
-indicate that
-.BR CLOCK_MONOTONIC ,
-.BR CLOCK_PROCESS_CPUTIME_ID ,
-.B CLOCK_THREAD_CPUTIME_ID
-are available.
-(See also
-.BR sysconf (3).)
-.\"
-.SS Historical note for SMP systems
-Before Linux added kernel support for
-.B CLOCK_PROCESS_CPUTIME_ID
-and
-.BR CLOCK_THREAD_CPUTIME_ID ,
-glibc implemented these clocks on many platforms using timer
-registers from the CPUs
-(TSC on i386, AR.ITC on Itanium).
-These registers may differ between CPUs and as a consequence
-these clocks may return
-.B bogus results
-if a process is migrated to another CPU.
-.P
-If the CPUs in an SMP system have different clock sources, then
-there is no way to maintain a correlation between the timer registers since
-each CPU will run at a slightly different frequency.
-If that is the case, then
-.I clock_getcpuclockid(0)
-will return
-.B ENOENT
-to signify this condition.
-The two clocks will then be useful only if it
-can be ensured that a process stays on a certain CPU.
-.P
-The processors in an SMP system do not start all at exactly the same
-time and therefore the timer registers are typically running at an offset.
-Some architectures include code that attempts to limit these offsets on bootup.
-However, the code cannot guarantee to accurately tune the offsets.
-glibc contains no provisions to deal with these offsets (unlike the Linux
-Kernel).
-Typically these offsets are small and therefore the effects may be
-negligible in most cases.
-.P
-Since glibc 2.4,
-the wrapper functions for the system calls described in this page avoid
-the abovementioned problems by employing the kernel implementation of
-.B CLOCK_PROCESS_CPUTIME_ID
-and
-.BR CLOCK_THREAD_CPUTIME_ID ,
-on systems that provide such an implementation
-(i.e., Linux 2.6.12 and later).
-.SH EXAMPLES
-The program below demonstrates the use of
-.BR clock_gettime ()
-and
-.BR clock_getres ()
-with various clocks.
-This is an example of what we might see when running the program:
-.P
-.in +4n
-.EX
-$ \fB./clock_times x\fP
-CLOCK_REALTIME : 1585985459.446 (18356 days + 7h 30m 59s)
- resolution: 0.000000001
-CLOCK_TAI : 1585985496.447 (18356 days + 7h 31m 36s)
- resolution: 0.000000001
-CLOCK_MONOTONIC: 52395.722 (14h 33m 15s)
- resolution: 0.000000001
-CLOCK_BOOTTIME : 72691.019 (20h 11m 31s)
- resolution: 0.000000001
-.EE
-.in
-.SS Program source
-\&
-.\" SRC BEGIN (clock_getres.c)
-.EX
-/* clock_times.c
-\&
- Licensed under GNU General Public License v2 or later.
-*/
-#define _XOPEN_SOURCE 600
-#include <stdbool.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <time.h>
-\&
-#define SECS_IN_DAY (24 * 60 * 60)
-\&
-static void
-displayClock(clockid_t clock, const char *name, bool showRes)
-{
- long days;
- struct timespec ts;
-\&
- if (clock_gettime(clock, &ts) == \-1) {
- perror("clock_gettime");
- exit(EXIT_FAILURE);
- }
-\&
- printf("%\-15s: %10jd.%03ld (", name,
- (intmax_t) ts.tv_sec, ts.tv_nsec / 1000000);
-\&
- days = ts.tv_sec / SECS_IN_DAY;
- if (days > 0)
- printf("%ld days + ", days);
-\&
- printf("%2dh %2dm %2ds",
- (int) (ts.tv_sec % SECS_IN_DAY) / 3600,
- (int) (ts.tv_sec % 3600) / 60,
- (int) ts.tv_sec % 60);
- printf(")\en");
-\&
- if (clock_getres(clock, &ts) == \-1) {
- perror("clock_getres");
- exit(EXIT_FAILURE);
- }
-\&
- if (showRes)
- printf(" resolution: %10jd.%09ld\en",
- (intmax_t) ts.tv_sec, ts.tv_nsec);
-}
-\&
-int
-main(int argc, char *argv[])
-{
- bool showRes = argc > 1;
-\&
- displayClock(CLOCK_REALTIME, "CLOCK_REALTIME", showRes);
-#ifdef CLOCK_TAI
- displayClock(CLOCK_TAI, "CLOCK_TAI", showRes);
-#endif
- displayClock(CLOCK_MONOTONIC, "CLOCK_MONOTONIC", showRes);
-#ifdef CLOCK_BOOTTIME
- displayClock(CLOCK_BOOTTIME, "CLOCK_BOOTTIME", showRes);
-#endif
- exit(EXIT_SUCCESS);
-}
-.EE
-.\" SRC END
-.SH SEE ALSO
-.BR date (1),
-.BR gettimeofday (2),
-.BR settimeofday (2),
-.BR time (2),
-.BR adjtime (3),
-.BR clock_getcpuclockid (3),
-.BR ctime (3),
-.BR ftime (3),
-.BR pthread_getcpuclockid (3),
-.BR sysconf (3),
-.BR timespec (3),
-.BR time (7),
-.BR time_namespaces (7),
-.BR vdso (7),
-.BR hwclock (8)
diff --git a/man2/clock_gettime.2 b/man2/clock_gettime.2
deleted file mode 100644
index 5a599b4b9..000000000
--- a/man2/clock_gettime.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/clock_getres.2
diff --git a/man2/clock_nanosleep.2 b/man2/clock_nanosleep.2
deleted file mode 100644
index 7715ec385..000000000
--- a/man2/clock_nanosleep.2
+++ /dev/null
@@ -1,253 +0,0 @@
-.\" Copyright (c) 2008, Linux Foundation, written by Michael Kerrisk
-.\" <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.TH clock_nanosleep 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-clock_nanosleep \- high-resolution sleep with specifiable clock
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc ),
-since glibc 2.17
-.P
-Before glibc 2.17,
-Real-time library
-.RI ( librt ", " \-lrt )
-.SH SYNOPSIS
-.B #include <time.h>
-.nf
-.P
-.BI "int clock_nanosleep(clockid_t " clockid ", int " flags ,
-.BI " const struct timespec *" t ,
-.BI " struct timespec *_Nullable " remain );
-.fi
-.P
-.RS -4
-Feature Test Macro Requirements for glibc (see
-.BR feature_test_macros (7)):
-.RE
-.P
-.BR clock_nanosleep ():
-.nf
- _POSIX_C_SOURCE >= 200112L
-.fi
-.SH DESCRIPTION
-Like
-.BR nanosleep (2),
-.BR clock_nanosleep ()
-allows the calling thread to sleep for an interval specified
-with nanosecond precision.
-It differs in allowing the caller to select the clock against
-which the sleep interval is to be measured,
-and in allowing the sleep interval to be specified as
-either an absolute or a relative value.
-.P
-The time values passed to and returned by this call are specified using
-.BR timespec (3)
-structures.
-.P
-The
-.I clockid
-argument specifies the clock against which the sleep interval
-is to be measured.
-This argument can have one of the following values:
-.\" Look in time/posix-timers.c (kernel 5.6 sources) for the
-.\" 'struct k_clock' structures that have an 'nsleep' method
-.TP
-.B CLOCK_REALTIME
-A settable system-wide real-time clock.
-.TP
-.BR CLOCK_TAI " (since Linux 3.10)"
-A system-wide clock derived from wall-clock time but counting leap seconds.
-.TP
-.B CLOCK_MONOTONIC
-A nonsettable, monotonically increasing clock that measures time
-since some unspecified point in the past that does not change after
-system startup.
-.\" On Linux this clock measures time since boot.
-.TP
-.BR CLOCK_BOOTTIME " (since Linux 2.6.39)"
-Identical to
-.BR CLOCK_MONOTONIC ,
-except that it also includes any time that the system is suspended.
-.TP
-.B CLOCK_PROCESS_CPUTIME_ID
-A settable per-process clock that measures CPU time consumed
-by all threads in the process.
-.\" There is some trickery between glibc and the kernel
-.\" to deal with the CLOCK_PROCESS_CPUTIME_ID case.
-.P
-See
-.BR clock_getres (2)
-for further details on these clocks.
-In addition, the CPU clock IDs returned by
-.BR clock_getcpuclockid (3)
-and
-.BR pthread_getcpuclockid (3)
-can also be passed in
-.IR clockid .
-.\" Sleeping against CLOCK_REALTIME_ALARM and CLOCK_BOOTTIME_ALARM
-.\" is also possible (tested), with CAP_WAKE_ALARM, but I'm not
-.\" sure if this is useful or needs to be documented.
-.P
-If
-.I flags
-is 0, then the value specified in
-.I t
-is interpreted as an interval relative to the current
-value of the clock specified by
-.IR clockid .
-.P
-If
-.I flags
-is
-.BR TIMER_ABSTIME ,
-then
-.I t
-is interpreted as an absolute time as measured by the clock,
-.IR clockid .
-If
-.I t
-is less than or equal to the current value of the clock,
-then
-.BR clock_nanosleep ()
-returns immediately without suspending the calling thread.
-.P
-.BR clock_nanosleep ()
-suspends the execution of the calling thread
-until either at least the time specified by
-.I t
-has elapsed,
-or a signal is delivered that causes a signal handler to be called or
-that terminates the process.
-.P
-If the call is interrupted by a signal handler,
-.BR clock_nanosleep ()
-fails with the error
-.BR EINTR .
-In addition, if
-.I remain
-is not NULL, and
-.I flags
-was not
-.BR TIMER_ABSTIME ,
-it returns the remaining unslept time in
-.IR remain .
-This value can then be used to call
-.BR clock_nanosleep ()
-again and complete a (relative) sleep.
-.SH RETURN VALUE
-On successfully sleeping for the requested interval,
-.BR clock_nanosleep ()
-returns 0.
-If the call is interrupted by a signal handler or encounters an error,
-then it returns one of the positive error number listed in ERRORS.
-.SH ERRORS
-.TP
-.B EFAULT
-.I t
-or
-.I remain
-specified an invalid address.
-.TP
-.B EINTR
-The sleep was interrupted by a signal handler; see
-.BR signal (7).
-.TP
-.B EINVAL
-The value in the
-.I tv_nsec
-field was not in the range [0, 999999999] or
-.I tv_sec
-was negative.
-.TP
-.B EINVAL
-.I clockid
-was invalid.
-.RB ( CLOCK_THREAD_CPUTIME_ID
-is not a permitted value for
-.IR clockid .)
-.TP
-.B ENOTSUP
-The kernel does not support sleeping against this
-.IR clockid .
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-POSIX.1-2001.
-Linux 2.6,
-glibc 2.1.
-.SH NOTES
-If the interval specified in
-.I t
-is not an exact multiple of the granularity underlying clock (see
-.BR time (7)),
-then the interval will be rounded up to the next multiple.
-Furthermore, after the sleep completes, there may still be a delay before
-the CPU becomes free to once again execute the calling thread.
-.P
-Using an absolute timer is useful for preventing
-timer drift problems of the type described in
-.BR nanosleep (2).
-(Such problems are exacerbated in programs that try to restart
-a relative sleep that is repeatedly interrupted by signals.)
-To perform a relative sleep that avoids these problems, call
-.BR clock_gettime (2)
-for the desired clock,
-add the desired interval to the returned time value,
-and then call
-.BR clock_nanosleep ()
-with the
-.B TIMER_ABSTIME
-flag.
-.P
-.BR clock_nanosleep ()
-is never restarted after being interrupted by a signal handler,
-regardless of the use of the
-.BR sigaction (2)
-.B SA_RESTART
-flag.
-.P
-The
-.I remain
-argument is unused, and unnecessary, when
-.I flags
-is
-.BR TIMER_ABSTIME .
-(An absolute sleep can be restarted using the same
-.I t
-argument.)
-.P
-POSIX.1 specifies that
-.BR clock_nanosleep ()
-has no effect on signals dispositions or the signal mask.
-.P
-POSIX.1 specifies that after changing the value of the
-.B CLOCK_REALTIME
-clock via
-.BR clock_settime (2),
-the new clock value shall be used to determine the time
-at which a thread blocked on an absolute
-.BR clock_nanosleep ()
-will wake up;
-if the new clock value falls past the end of the sleep interval, then the
-.BR clock_nanosleep ()
-call will return immediately.
-.P
-POSIX.1 specifies that
-changing the value of the
-.B CLOCK_REALTIME
-clock via
-.BR clock_settime (2)
-shall have no effect on a thread that is blocked on a relative
-.BR clock_nanosleep ().
-.SH SEE ALSO
-.BR clock_getres (2),
-.BR nanosleep (2),
-.BR restart_syscall (2),
-.BR timer_create (2),
-.BR sleep (3),
-.BR timespec (3),
-.BR usleep (3),
-.BR time (7)
diff --git a/man2/clock_settime.2 b/man2/clock_settime.2
deleted file mode 100644
index 5a599b4b9..000000000
--- a/man2/clock_settime.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/clock_getres.2
diff --git a/man2/clone.2 b/man2/clone.2
deleted file mode 100644
index e88c54eeb..000000000
--- a/man2/clone.2
+++ /dev/null
@@ -1,1949 +0,0 @@
-'\" t
-.\" Copyright (c) 1992 Drew Eckhardt <drew@cs.colorado.edu>, March 28, 1992
-.\" and Copyright (c) Michael Kerrisk, 2001, 2002, 2005, 2013, 2019
-.\"
-.\" SPDX-License-Identifier: GPL-1.0-or-later
-.\"
-.\" Modified by Michael Haardt <michael@moria.de>
-.\" Modified 24 Jul 1993 by Rik Faith <faith@cs.unc.edu>
-.\" Modified 21 Aug 1994 by Michael Chastain <mec@shell.portal.com>:
-.\" New man page (copied from 'fork.2').
-.\" Modified 10 June 1995 by Andries Brouwer <aeb@cwi.nl>
-.\" Modified 25 April 1998 by Xavier Leroy <Xavier.Leroy@inria.fr>
-.\" Modified 26 Jun 2001 by Michael Kerrisk
-.\" Mostly upgraded to Linux 2.4.x
-.\" Added prototype for sys_clone() plus description
-.\" Added CLONE_THREAD with a brief description of thread groups
-.\" Added CLONE_PARENT and revised entire page remove ambiguity
-.\" between "calling process" and "parent process"
-.\" Added CLONE_PTRACE and CLONE_VFORK
-.\" Added EPERM and EINVAL error codes
-.\" Renamed "__clone" to "clone" (which is the prototype in <sched.h>)
-.\" various other minor tidy ups and clarifications.
-.\" Modified 26 Jun 2001 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Updated notes for 2.4.7+ behavior of CLONE_THREAD
-.\" Modified 15 Oct 2002 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Added description for CLONE_NEWNS, which was added in Linux 2.4.19
-.\" Slightly rephrased, aeb.
-.\" Modified 1 Feb 2003 - added CLONE_SIGHAND restriction, aeb.
-.\" Modified 1 Jan 2004 - various updates, aeb
-.\" Modified 2004-09-10 - added CLONE_PARENT_SETTID etc. - aeb.
-.\" 2005-04-12, mtk, noted the PID caching behavior of NPTL's getpid()
-.\" wrapper under BUGS.
-.\" 2005-05-10, mtk, added CLONE_SYSVSEM, CLONE_UNTRACED, CLONE_STOPPED.
-.\" 2005-05-17, mtk, Substantially enhanced discussion of CLONE_THREAD.
-.\" 2008-11-18, mtk, order CLONE_* flags alphabetically
-.\" 2008-11-18, mtk, document CLONE_NEWPID
-.\" 2008-11-19, mtk, document CLONE_NEWUTS
-.\" 2008-11-19, mtk, document CLONE_NEWIPC
-.\" 2008-11-19, Jens Axboe, mtk, document CLONE_IO
-.\"
-.TH clone 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-clone, __clone2, clone3 \- create a child process
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-/* Prototype for the glibc wrapper function */
-.P
-.B #define _GNU_SOURCE
-.B #include <sched.h>
-.P
-.BI "int clone(int (*" "fn" ")(void *_Nullable), void *" stack \
-", int " flags ,
-.BI " void *_Nullable " "arg" ", ..." \
-" \fR/*\fP" " pid_t *_Nullable " parent_tid ,
-.BI " void *_Nullable " tls ,
-.BI " pid_t *_Nullable " child_tid " \fR*/\fP );"
-.P
-/* For the prototype of the raw clone() system call, see NOTES */
-.P
-.BR "#include <linux/sched.h>" " /* Definition of " "struct clone_args" " */"
-.BR "#include <sched.h>" " /* Definition of " CLONE_* " constants */"
-.BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */"
-.B #include <unistd.h>
-.P
-.BI "long syscall(SYS_clone3, struct clone_args *" cl_args ", size_t " size );
-.fi
-.P
-.IR Note :
-glibc provides no wrapper for
-.BR clone3 (),
-necessitating the use of
-.BR syscall (2).
-.SH DESCRIPTION
-These system calls
-create a new ("child") process, in a manner similar to
-.BR fork (2).
-.P
-By contrast with
-.BR fork (2),
-these system calls provide more precise control over what pieces of execution
-context are shared between the calling process and the child process.
-For example, using these system calls, the caller can control whether
-or not the two processes share the virtual address space,
-the table of file descriptors, and the table of signal handlers.
-These system calls also allow the new child process to be placed
-in separate
-.BR namespaces (7).
-.P
-Note that in this manual
-page, "calling process" normally corresponds to "parent process".
-But see the descriptions of
-.B CLONE_PARENT
-and
-.B CLONE_THREAD
-below.
-.P
-This page describes the following interfaces:
-.IP \[bu] 3
-The glibc
-.BR clone ()
-wrapper function and the underlying system call on which it is based.
-The main text describes the wrapper function;
-the differences for the raw system call
-are described toward the end of this page.
-.IP \[bu]
-The newer
-.BR clone3 ()
-system call.
-.P
-In the remainder of this page, the terminology "the clone call" is used
-when noting details that apply to all of these interfaces.
-.\"
-.SS The clone() wrapper function
-When the child process is created with the
-.BR clone ()
-wrapper function,
-it commences execution by calling the function pointed to by the argument
-.IR fn .
-(This differs from
-.BR fork (2),
-where execution continues in the child from the point
-of the
-.BR fork (2)
-call.)
-The
-.I arg
-argument is passed as the argument of the function
-.IR fn .
-.P
-When the
-.IR fn ( arg )
-function returns, the child process terminates.
-The integer returned by
-.I fn
-is the exit status for the child process.
-The child process may also terminate explicitly by calling
-.BR exit (2)
-or after receiving a fatal signal.
-.P
-The
-.I stack
-argument specifies the location of the stack used by the child process.
-Since the child and calling process may share memory,
-it is not possible for the child process to execute in the
-same stack as the calling process.
-The calling process must therefore
-set up memory space for the child stack and pass a pointer to this
-space to
-.BR clone ().
-Stacks grow downward on all processors that run Linux
-(except the HP PA processors), so
-.I stack
-usually points to the topmost address of the memory space set up for
-the child stack.
-Note that
-.BR clone ()
-does not provide a means whereby the caller can inform the kernel of the
-size of the stack area.
-.P
-The remaining arguments to
-.BR clone ()
-are discussed below.
-.\"
-.SS clone3()
-The
-.BR clone3 ()
-system call provides a superset of the functionality of the older
-.BR clone ()
-interface.
-It also provides a number of API improvements, including:
-space for additional flags bits;
-cleaner separation in the use of various arguments;
-and the ability to specify the size of the child's stack area.
-.P
-As with
-.BR fork (2),
-.BR clone3 ()
-returns in both the parent and the child.
-It returns 0 in the child process and returns the PID of the child
-in the parent.
-.P
-The
-.I cl_args
-argument of
-.BR clone3 ()
-is a structure of the following form:
-.P
-.in +4n
-.EX
-struct clone_args {
- u64 flags; /* Flags bit mask */
- u64 pidfd; /* Where to store PID file descriptor
- (\fIint *\fP) */
- u64 child_tid; /* Where to store child TID,
- in child\[aq]s memory (\fIpid_t *\fP) */
- u64 parent_tid; /* Where to store child TID,
- in parent\[aq]s memory (\fIpid_t *\fP) */
- u64 exit_signal; /* Signal to deliver to parent on
- child termination */
- u64 stack; /* Pointer to lowest byte of stack */
- u64 stack_size; /* Size of stack */
- u64 tls; /* Location of new TLS */
- u64 set_tid; /* Pointer to a \fIpid_t\fP array
- (since Linux 5.5) */
- u64 set_tid_size; /* Number of elements in \fIset_tid\fP
- (since Linux 5.5) */
- u64 cgroup; /* File descriptor for target cgroup
- of child (since Linux 5.7) */
-};
-.EE
-.in
-.P
-The
-.I size
-argument that is supplied to
-.BR clone3 ()
-should be initialized to the size of this structure.
-(The existence of the
-.I size
-argument permits future extensions to the
-.I clone_args
-structure.)
-.P
-The stack for the child process is specified via
-.IR cl_args.stack ,
-which points to the lowest byte of the stack area,
-and
-.IR cl_args.stack_size ,
-which specifies the size of the stack in bytes.
-In the case where the
-.B CLONE_VM
-flag (see below) is specified, a stack must be explicitly allocated
-and specified.
-Otherwise, these two fields can be specified as NULL and 0,
-which causes the child to use the same stack area as the parent
-(in the child's own virtual address space).
-.P
-The remaining fields in the
-.I cl_args
-argument are discussed below.
-.\"
-.SS Equivalence between clone() and clone3() arguments
-Unlike the older
-.BR clone ()
-interface, where arguments are passed individually, in the newer
-.BR clone3 ()
-interface the arguments are packaged into the
-.I clone_args
-structure shown above.
-This structure allows for a superset of the information passed via the
-.BR clone ()
-arguments.
-.P
-The following table shows the equivalence between the arguments of
-.BR clone ()
-and the fields in the
-.I clone_args
-argument supplied to
-.BR clone3 ():
-.RS 4
-.TS
-lb lb lb
-l l l
-li li l.
-clone() clone3() Notes
- \fIcl_args\fP field
-flags & \[ti]0xff flags T{
-For most flags; details below
-T}
-parent_tid pidfd See CLONE_PIDFD
-child_tid child_tid See CLONE_CHILD_SETTID
-parent_tid parent_tid See CLONE_PARENT_SETTID
-flags & 0xff exit_signal
-stack stack
-\fP---\fP stack_size
-tls tls See CLONE_SETTLS
-\fP---\fP set_tid See below for details
-\fP---\fP set_tid_size
-\fP---\fP cgroup See CLONE_INTO_CGROUP
-.TE
-.RE
-.\"
-.SS The child termination signal
-When the child process terminates, a signal may be sent to the parent.
-The termination signal is specified in the low byte of
-.I flags
-.RB ( clone ())
-or in
-.I cl_args.exit_signal
-.RB ( clone3 ()).
-If this signal is specified as anything other than
-.BR SIGCHLD ,
-then the parent process must specify the
-.B __WALL
-or
-.B __WCLONE
-options when waiting for the child with
-.BR wait (2).
-If no signal (i.e., zero) is specified, then the parent process is not signaled
-when the child terminates.
-.\"
-.SS The set_tid array
-By default, the kernel chooses the next sequential PID for the new
-process in each of the PID namespaces where it is present.
-When creating a process with
-.BR clone3 (),
-the
-.I set_tid
-array (available since Linux 5.5)
-can be used to select specific PIDs for the process in some
-or all of the PID namespaces where it is present.
-If the PID of the newly created process should be set only for the current
-PID namespace or in the newly created PID namespace (if
-.I flags
-contains
-.BR CLONE_NEWPID )
-then the first element in the
-.I set_tid
-array has to be the desired PID and
-.I set_tid_size
-needs to be 1.
-.P
-If the PID of the newly created process should have a certain value in
-multiple PID namespaces, then the
-.I set_tid
-array can have multiple entries.
-The first entry defines the PID in the most
-deeply nested PID namespace and each of the following entries contains
-the PID in the
-corresponding ancestor PID namespace.
-The number of PID namespaces in which a PID
-should be set is defined by
-.I set_tid_size
-which cannot be larger than the number of currently nested PID namespaces.
-.P
-To create a process with the following PIDs in a PID namespace hierarchy:
-.RS 4
-.TS
-lb lb lb
-l l l.
-PID NS level Requested PID Notes
-0 31496 Outermost PID namespace
-1 42
-2 7 Innermost PID namespace
-.TE
-.RE
-.P
-Set the array to:
-.P
-.in +4n
-.EX
-set_tid[0] = 7;
-set_tid[1] = 42;
-set_tid[2] = 31496;
-set_tid_size = 3;
-.EE
-.in
-.P
-If only the PIDs in the two innermost PID namespaces
-need to be specified, set the array to:
-.P
-.in +4n
-.EX
-set_tid[0] = 7;
-set_tid[1] = 42;
-set_tid_size = 2;
-.EE
-.in
-.P
-The PID in the PID namespaces outside the two innermost PID namespaces
-is selected the same way as any other PID is selected.
-.P
-The
-.I set_tid
-feature requires
-.B CAP_SYS_ADMIN
-or
-(since Linux 5.9)
-.\" commit 124ea650d3072b005457faed69909221c2905a1f
-.\" commit 1caef81da05a84a40dbf02110e967ce6d1135ff6
-.B CAP_CHECKPOINT_RESTORE
-in all owning user namespaces of the target PID namespaces.
-.P
-Callers may only choose a PID greater than 1 in a given PID namespace
-if an
-.B init
-process (i.e., a process with PID 1) already exists in that namespace.
-Otherwise the PID
-entry for this PID namespace must be 1.
-.\"
-.SS The flags mask
-Both
-.BR clone ()
-and
-.BR clone3 ()
-allow a flags bit mask that modifies their behavior
-and allows the caller to specify what is shared between the calling process
-and the child process.
-This bit mask\[em]the
-.I flags
-argument of
-.BR clone ()
-or the
-.I cl_args.flags
-field passed to
-.BR clone3 ()\[em]is
-referred to as the
-.I flags
-mask in the remainder of this page.
-.P
-The
-.I flags
-mask is specified as a bitwise OR of zero or more of
-the constants listed below.
-Except as noted below, these flags are available
-(and have the same effect) in both
-.BR clone ()
-and
-.BR clone3 ().
-.TP
-.BR CLONE_CHILD_CLEARTID " (since Linux 2.5.49)"
-Clear (zero) the child thread ID at the location pointed to by
-.I child_tid
-.RB ( clone ())
-or
-.I cl_args.child_tid
-.RB ( clone3 ())
-in child memory when the child exits, and do a wakeup on the futex
-at that address.
-The address involved may be changed by the
-.BR set_tid_address (2)
-system call.
-This is used by threading libraries.
-.TP
-.BR CLONE_CHILD_SETTID " (since Linux 2.5.49)"
-Store the child thread ID at the location pointed to by
-.I child_tid
-.RB ( clone ())
-or
-.I cl_args.child_tid
-.RB ( clone3 ())
-in the child's memory.
-The store operation completes before the clone call
-returns control to user space in the child process.
-(Note that the store operation may not have completed before the clone call
-returns in the parent process, which is relevant if the
-.B CLONE_VM
-flag is also employed.)
-.TP
-.BR CLONE_CLEAR_SIGHAND " (since Linux 5.5)"
-.\" commit b612e5df4587c934bd056bf05f4a1deca4de4f75
-By default, signal dispositions in the child thread are the same as
-in the parent.
-If this flag is specified,
-then all signals that are handled in the parent
-(and not set to
-.BR SIG_IGN )
-are reset to their default dispositions
-.RB ( SIG_DFL )
-in the child.
-.IP
-Specifying this flag together with
-.B CLONE_SIGHAND
-is nonsensical and disallowed.
-.TP
-.BR CLONE_DETACHED " (historical)"
-For a while (during the Linux 2.5 development series)
-.\" added in Linux 2.5.32; removed in Linux 2.6.0-test4
-there was a
-.B CLONE_DETACHED
-flag,
-which caused the parent not to receive a signal when the child terminated.
-Ultimately, the effect of this flag was subsumed under the
-.B CLONE_THREAD
-flag and by the time Linux 2.6.0 was released, this flag had no effect.
-Starting in Linux 2.6.2, the need to give this flag together with
-.B CLONE_THREAD
-disappeared.
-.IP
-This flag is still defined, but it is usually ignored when calling
-.BR clone ().
-However, see the description of
-.B CLONE_PIDFD
-for some exceptions.
-.TP
-.BR CLONE_FILES " (since Linux 2.0)"
-If
-.B CLONE_FILES
-is set, the calling process and the child process share the same file
-descriptor table.
-Any file descriptor created by the calling process or by the child
-process is also valid in the other process.
-Similarly, if one of the processes closes a file descriptor,
-or changes its associated flags (using the
-.BR fcntl (2)
-.B F_SETFD
-operation), the other process is also affected.
-If a process sharing a file descriptor table calls
-.BR execve (2),
-its file descriptor table is duplicated (unshared).
-.IP
-If
-.B CLONE_FILES
-is not set, the child process inherits a copy of all file descriptors
-opened in the calling process at the time of the clone call.
-Subsequent operations that open or close file descriptors,
-or change file descriptor flags,
-performed by either the calling
-process or the child process do not affect the other process.
-Note, however,
-that the duplicated file descriptors in the child refer to the same
-open file descriptions as the corresponding file descriptors
-in the calling process,
-and thus share file offsets and file status flags (see
-.BR open (2)).
-.TP
-.BR CLONE_FS " (since Linux 2.0)"
-If
-.B CLONE_FS
-is set, the caller and the child process share the same filesystem
-information.
-This includes the root of the filesystem, the current
-working directory, and the umask.
-Any call to
-.BR chroot (2),
-.BR chdir (2),
-or
-.BR umask (2)
-performed by the calling process or the child process also affects the
-other process.
-.IP
-If
-.B CLONE_FS
-is not set, the child process works on a copy of the filesystem
-information of the calling process at the time of the clone call.
-Calls to
-.BR chroot (2),
-.BR chdir (2),
-or
-.BR umask (2)
-performed later by one of the processes do not affect the other process.
-.TP
-.BR CLONE_INTO_CGROUP " (since Linux 5.7)"
-.\" commit ef2c41cf38a7559bbf91af42d5b6a4429db8fc68
-By default, a child process is placed in the same version 2
-cgroup as its parent.
-The
-.B CLONE_INTO_CGROUP
-flag allows the child process to be created in a different version 2 cgroup.
-(Note that
-.B CLONE_INTO_CGROUP
-has effect only for version 2 cgroups.)
-.IP
-In order to place the child process in a different cgroup,
-the caller specifies
-.B CLONE_INTO_CGROUP
-in
-.I cl_args.flags
-and passes a file descriptor that refers to a version 2 cgroup in the
-.I cl_args.cgroup
-field.
-(This file descriptor can be obtained by opening a cgroup v2 directory
-using either the
-.B O_RDONLY
-or the
-.B O_PATH
-flag.)
-Note that all of the usual restrictions (described in
-.BR cgroups (7))
-on placing a process into a version 2 cgroup apply.
-.IP
-Among the possible use cases for
-.B CLONE_INTO_CGROUP
-are the following:
-.RS
-.IP \[bu] 3
-Spawning a process into a cgroup different from the parent's cgroup
-makes it possible for a service manager to directly spawn new
-services into dedicated cgroups.
-This eliminates the accounting
-jitter that would be caused if the child process was first created in the
-same cgroup as the parent and then
-moved into the target cgroup.
-Furthermore, spawning the child process directly into a target cgroup
-is significantly cheaper than moving the child process into
-the target cgroup after it has been created.
-.IP \[bu]
-The
-.B CLONE_INTO_CGROUP
-flag also allows the creation of
-frozen child processes by spawning them into a frozen cgroup.
-(See
-.BR cgroups (7)
-for a description of the freezer controller.)
-.IP \[bu]
-For threaded applications (or even thread implementations which
-make use of cgroups to limit individual threads), it is possible to
-establish a fixed cgroup layout before spawning each thread
-directly into its target cgroup.
-.RE
-.TP
-.BR CLONE_IO " (since Linux 2.6.25)"
-If
-.B CLONE_IO
-is set, then the new process shares an I/O context with
-the calling process.
-If this flag is not set, then (as with
-.BR fork (2))
-the new process has its own I/O context.
-.IP
-.\" The following based on text from Jens Axboe
-The I/O context is the I/O scope of the disk scheduler (i.e.,
-what the I/O scheduler uses to model scheduling of a process's I/O).
-If processes share the same I/O context,
-they are treated as one by the I/O scheduler.
-As a consequence, they get to share disk time.
-For some I/O schedulers,
-.\" the anticipatory and CFQ scheduler
-if two processes share an I/O context,
-they will be allowed to interleave their disk access.
-If several threads are doing I/O on behalf of the same process
-.RB ( aio_read (3),
-for instance), they should employ
-.B CLONE_IO
-to get better I/O performance.
-.\" with CFQ and AS.
-.IP
-If the kernel is not configured with the
-.B CONFIG_BLOCK
-option, this flag is a no-op.
-.TP
-.BR CLONE_NEWCGROUP " (since Linux 4.6)"
-Create the process in a new cgroup namespace.
-If this flag is not set, then (as with
-.BR fork (2))
-the process is created in the same cgroup namespaces as the calling process.
-.IP
-For further information on cgroup namespaces, see
-.BR cgroup_namespaces (7).
-.IP
-Only a privileged process
-.RB ( CAP_SYS_ADMIN )
-can employ
-.BR CLONE_NEWCGROUP .
-.\"
-.TP
-.BR CLONE_NEWIPC " (since Linux 2.6.19)"
-If
-.B CLONE_NEWIPC
-is set, then create the process in a new IPC namespace.
-If this flag is not set, then (as with
-.BR fork (2)),
-the process is created in the same IPC namespace as
-the calling process.
-.IP
-For further information on IPC namespaces, see
-.BR ipc_namespaces (7).
-.IP
-Only a privileged process
-.RB ( CAP_SYS_ADMIN )
-can employ
-.BR CLONE_NEWIPC .
-This flag can't be specified in conjunction with
-.BR CLONE_SYSVSEM .
-.TP
-.BR CLONE_NEWNET " (since Linux 2.6.24)"
-(The implementation of this flag was completed only
-by about Linux 2.6.29.)
-.IP
-If
-.B CLONE_NEWNET
-is set, then create the process in a new network namespace.
-If this flag is not set, then (as with
-.BR fork (2))
-the process is created in the same network namespace as
-the calling process.
-.IP
-For further information on network namespaces, see
-.BR network_namespaces (7).
-.IP
-Only a privileged process
-.RB ( CAP_SYS_ADMIN )
-can employ
-.BR CLONE_NEWNET .
-.TP
-.BR CLONE_NEWNS " (since Linux 2.4.19)"
-If
-.B CLONE_NEWNS
-is set, the cloned child is started in a new mount namespace,
-initialized with a copy of the namespace of the parent.
-If
-.B CLONE_NEWNS
-is not set, the child lives in the same mount
-namespace as the parent.
-.IP
-For further information on mount namespaces, see
-.BR namespaces (7)
-and
-.BR mount_namespaces (7).
-.IP
-Only a privileged process
-.RB ( CAP_SYS_ADMIN )
-can employ
-.BR CLONE_NEWNS .
-It is not permitted to specify both
-.B CLONE_NEWNS
-and
-.B CLONE_FS
-.\" See https://lwn.net/Articles/543273/
-in the same clone call.
-.TP
-.BR CLONE_NEWPID " (since Linux 2.6.24)"
-.\" This explanation draws a lot of details from
-.\" http://lwn.net/Articles/259217/
-.\" Authors: Pavel Emelyanov <xemul@openvz.org>
-.\" and Kir Kolyshkin <kir@openvz.org>
-.\"
-.\" The primary kernel commit is 30e49c263e36341b60b735cbef5ca37912549264
-.\" Author: Pavel Emelyanov <xemul@openvz.org>
-If
-.B CLONE_NEWPID
-is set, then create the process in a new PID namespace.
-If this flag is not set, then (as with
-.BR fork (2))
-the process is created in the same PID namespace as
-the calling process.
-.IP
-For further information on PID namespaces, see
-.BR namespaces (7)
-and
-.BR pid_namespaces (7).
-.IP
-Only a privileged process
-.RB ( CAP_SYS_ADMIN )
-can employ
-.BR CLONE_NEWPID .
-This flag can't be specified in conjunction with
-.BR CLONE_THREAD .
-.TP
-.B CLONE_NEWUSER
-(This flag first became meaningful for
-.BR clone ()
-in Linux 2.6.23,
-the current
-.BR clone ()
-semantics were merged in Linux 3.5,
-and the final pieces to make the user namespaces completely usable were
-merged in Linux 3.8.)
-.IP
-If
-.B CLONE_NEWUSER
-is set, then create the process in a new user namespace.
-If this flag is not set, then (as with
-.BR fork (2))
-the process is created in the same user namespace as the calling process.
-.IP
-For further information on user namespaces, see
-.BR namespaces (7)
-and
-.BR user_namespaces (7).
-.IP
-Before Linux 3.8, use of
-.B CLONE_NEWUSER
-required that the caller have three capabilities:
-.BR CAP_SYS_ADMIN ,
-.BR CAP_SETUID ,
-and
-.BR CAP_SETGID .
-.\" Before Linux 2.6.29, it appears that only CAP_SYS_ADMIN was needed
-Starting with Linux 3.8,
-no privileges are needed to create a user namespace.
-.IP
-This flag can't be specified in conjunction with
-.B CLONE_THREAD
-or
-.BR CLONE_PARENT .
-For security reasons,
-.\" commit e66eded8309ebf679d3d3c1f5820d1f2ca332c71
-.\" https://lwn.net/Articles/543273/
-.\" The fix actually went into Linux 3.9 and into Linux 3.8.3. However, user namespaces
-.\" were, for practical purposes, unusable in earlier Linux 3.8.x because of the
-.\" various filesystems that didn't support userns.
-.B CLONE_NEWUSER
-cannot be specified in conjunction with
-.BR CLONE_FS .
-.TP
-.BR CLONE_NEWUTS " (since Linux 2.6.19)"
-If
-.B CLONE_NEWUTS
-is set, then create the process in a new UTS namespace,
-whose identifiers are initialized by duplicating the identifiers
-from the UTS namespace of the calling process.
-If this flag is not set, then (as with
-.BR fork (2))
-the process is created in the same UTS namespace as
-the calling process.
-.IP
-For further information on UTS namespaces, see
-.BR uts_namespaces (7).
-.IP
-Only a privileged process
-.RB ( CAP_SYS_ADMIN )
-can employ
-.BR CLONE_NEWUTS .
-.TP
-.BR CLONE_PARENT " (since Linux 2.3.12)"
-If
-.B CLONE_PARENT
-is set, then the parent of the new child (as returned by
-.BR getppid (2))
-will be the same as that of the calling process.
-.IP
-If
-.B CLONE_PARENT
-is not set, then (as with
-.BR fork (2))
-the child's parent is the calling process.
-.IP
-Note that it is the parent process, as returned by
-.BR getppid (2),
-which is signaled when the child terminates, so that
-if
-.B CLONE_PARENT
-is set, then the parent of the calling process, rather than the
-calling process itself, is signaled.
-.IP
-The
-.B CLONE_PARENT
-flag can't be used in clone calls by the
-global init process (PID 1 in the initial PID namespace)
-and init processes in other PID namespaces.
-This restriction prevents the creation of multi-rooted process trees
-as well as the creation of unreapable zombies in the initial PID namespace.
-.TP
-.BR CLONE_PARENT_SETTID " (since Linux 2.5.49)"
-Store the child thread ID at the location pointed to by
-.I parent_tid
-.RB ( clone ())
-or
-.I cl_args.parent_tid
-.RB ( clone3 ())
-in the parent's memory.
-(In Linux 2.5.32-2.5.48 there was a flag
-.B CLONE_SETTID
-that did this.)
-The store operation completes before the clone call
-returns control to user space.
-.TP
-.BR CLONE_PID " (Linux 2.0 to Linux 2.5.15)"
-If
-.B CLONE_PID
-is set, the child process is created with the same process ID as
-the calling process.
-This is good for hacking the system, but otherwise
-of not much use.
-From Linux 2.3.21 onward, this flag could be
-specified only by the system boot process (PID 0).
-The flag disappeared completely from the kernel sources in Linux 2.5.16.
-Subsequently, the kernel silently ignored this bit if it was specified in the
-.I flags
-mask.
-Much later, the same bit was recycled for use as the
-.B CLONE_PIDFD
-flag.
-.TP
-.BR CLONE_PIDFD " (since Linux 5.2)"
-.\" commit b3e5838252665ee4cfa76b82bdf1198dca81e5be
-If this flag is specified,
-a PID file descriptor referring to the child process is allocated
-and placed at a specified location in the parent's memory.
-The close-on-exec flag is set on this new file descriptor.
-PID file descriptors can be used for the purposes described in
-.BR pidfd_open (2).
-.RS
-.IP \[bu] 3
-When using
-.BR clone3 (),
-the PID file descriptor is placed at the location pointed to by
-.IR cl_args.pidfd .
-.IP \[bu]
-When using
-.BR clone (),
-the PID file descriptor is placed at the location pointed to by
-.IR parent_tid .
-Since the
-.I parent_tid
-argument is used to return the PID file descriptor,
-.B CLONE_PIDFD
-cannot be used with
-.B CLONE_PARENT_SETTID
-when calling
-.BR clone ().
-.RE
-.IP
-It is currently not possible to use this flag together with
-.B CLONE_THREAD.
-This means that the process identified by the PID file descriptor
-will always be a thread group leader.
-.IP
-If the obsolete
-.B CLONE_DETACHED
-flag is specified alongside
-.B CLONE_PIDFD
-when calling
-.BR clone (),
-an error is returned.
-An error also results if
-.B CLONE_DETACHED
-is specified when calling
-.BR clone3 ().
-This error behavior ensures that the bit corresponding to
-.B CLONE_DETACHED
-can be reused for further PID file descriptor features in the future.
-.TP
-.BR CLONE_PTRACE " (since Linux 2.2)"
-If
-.B CLONE_PTRACE
-is specified, and the calling process is being traced,
-then trace the child also (see
-.BR ptrace (2)).
-.TP
-.BR CLONE_SETTLS " (since Linux 2.5.32)"
-The TLS (Thread Local Storage) descriptor is set to
-.IR tls .
-.IP
-The interpretation of
-.I tls
-and the resulting effect is architecture dependent.
-On x86,
-.I tls
-is interpreted as a
-.I struct user_desc\~*
-(see
-.BR set_thread_area (2)).
-On x86-64 it is the new value to be set for the %fs base register
-(see the
-.B ARCH_SET_FS
-argument to
-.BR arch_prctl (2)).
-On architectures with a dedicated TLS register, it is the new value
-of that register.
-.IP
-Use of this flag requires detailed knowledge and generally it
-should not be used except in libraries implementing threading.
-.TP
-.BR CLONE_SIGHAND " (since Linux 2.0)"
-If
-.B CLONE_SIGHAND
-is set, the calling process and the child process share the same table of
-signal handlers.
-If the calling process or child process calls
-.BR sigaction (2)
-to change the behavior associated with a signal, the behavior is
-changed in the other process as well.
-However, the calling process and child
-processes still have distinct signal masks and sets of pending
-signals.
-So, one of them may block or unblock signals using
-.BR sigprocmask (2)
-without affecting the other process.
-.IP
-If
-.B CLONE_SIGHAND
-is not set, the child process inherits a copy of the signal handlers
-of the calling process at the time of the clone call.
-Calls to
-.BR sigaction (2)
-performed later by one of the processes have no effect on the other
-process.
-.IP
-Since Linux 2.6.0,
-.\" Precisely: Linux 2.6.0-test6
-the
-.I flags
-mask must also include
-.B CLONE_VM
-if
-.B CLONE_SIGHAND
-is specified.
-.TP
-.BR CLONE_STOPPED " (since Linux 2.6.0)"
-.\" Precisely: Linux 2.6.0-test2
-If
-.B CLONE_STOPPED
-is set, then the child is initially stopped (as though it was sent a
-.B SIGSTOP
-signal), and must be resumed by sending it a
-.B SIGCONT
-signal.
-.IP
-This flag was
-.I deprecated
-from Linux 2.6.25 onward,
-and was
-.I removed
-altogether in Linux 2.6.38.
-Since then, the kernel silently ignores it without error.
-.\" glibc 2.8 removed this defn from bits/sched.h
-Starting with Linux 4.6, the same bit was reused for the
-.B CLONE_NEWCGROUP
-flag.
-.TP
-.BR CLONE_SYSVSEM " (since Linux 2.5.10)"
-If
-.B CLONE_SYSVSEM
-is set, then the child and the calling process share
-a single list of System V semaphore adjustment
-.RI ( semadj )
-values (see
-.BR semop (2)).
-In this case, the shared list accumulates
-.I semadj
-values across all processes sharing the list,
-and semaphore adjustments are performed only when the last process
-that is sharing the list terminates (or ceases sharing the list using
-.BR unshare (2)).
-If this flag is not set, then the child has a separate
-.I semadj
-list that is initially empty.
-.TP
-.BR CLONE_THREAD " (since Linux 2.4.0)"
-.\" Precisely: Linux 2.6.0-test8
-If
-.B CLONE_THREAD
-is set, the child is placed in the same thread group as the calling process.
-To make the remainder of the discussion of
-.B CLONE_THREAD
-more readable, the term "thread" is used to refer to the
-processes within a thread group.
-.IP
-Thread groups were a feature added in Linux 2.4 to support the
-POSIX threads notion of a set of threads that share a single PID.
-Internally, this shared PID is the so-called
-thread group identifier (TGID) for the thread group.
-Since Linux 2.4, calls to
-.BR getpid (2)
-return the TGID of the caller.
-.IP
-The threads within a group can be distinguished by their (system-wide)
-unique thread IDs (TID).
-A new thread's TID is available as the function result
-returned to the caller,
-and a thread can obtain
-its own TID using
-.BR gettid (2).
-.IP
-When a clone call is made without specifying
-.BR CLONE_THREAD ,
-then the resulting thread is placed in a new thread group
-whose TGID is the same as the thread's TID.
-This thread is the
-.I leader
-of the new thread group.
-.IP
-A new thread created with
-.B CLONE_THREAD
-has the same parent process as the process that made the clone call
-(i.e., like
-.BR CLONE_PARENT ),
-so that calls to
-.BR getppid (2)
-return the same value for all of the threads in a thread group.
-When a
-.B CLONE_THREAD
-thread terminates, the thread that created it is not sent a
-.B SIGCHLD
-(or other termination) signal;
-nor can the status of such a thread be obtained
-using
-.BR wait (2).
-(The thread is said to be
-.IR detached .)
-.IP
-After all of the threads in a thread group terminate
-the parent process of the thread group is sent a
-.B SIGCHLD
-(or other termination) signal.
-.IP
-If any of the threads in a thread group performs an
-.BR execve (2),
-then all threads other than the thread group leader are terminated,
-and the new program is executed in the thread group leader.
-.IP
-If one of the threads in a thread group creates a child using
-.BR fork (2),
-then any thread in the group can
-.BR wait (2)
-for that child.
-.IP
-Since Linux 2.5.35, the
-.I flags
-mask must also include
-.B CLONE_SIGHAND
-if
-.B CLONE_THREAD
-is specified
-(and note that, since Linux 2.6.0,
-.\" Precisely: Linux 2.6.0-test6
-.B CLONE_SIGHAND
-also requires
-.B CLONE_VM
-to be included).
-.IP
-Signal dispositions and actions are process-wide:
-if an unhandled signal is delivered to a thread, then
-it will affect (terminate, stop, continue, be ignored in)
-all members of the thread group.
-.IP
-Each thread has its own signal mask, as set by
-.BR sigprocmask (2).
-.IP
-A signal may be process-directed or thread-directed.
-A process-directed signal is targeted at a thread group (i.e., a TGID),
-and is delivered to an arbitrarily selected thread from among those
-that are not blocking the signal.
-A signal may be process-directed because it was generated by the kernel
-for reasons other than a hardware exception, or because it was sent using
-.BR kill (2)
-or
-.BR sigqueue (3).
-A thread-directed signal is targeted at (i.e., delivered to)
-a specific thread.
-A signal may be thread directed because it was sent using
-.BR tgkill (2)
-or
-.BR pthread_sigqueue (3),
-or because the thread executed a machine language instruction that triggered
-a hardware exception
-(e.g., invalid memory access triggering
-.B SIGSEGV
-or a floating-point exception triggering
-.BR SIGFPE ).
-.IP
-A call to
-.BR sigpending (2)
-returns a signal set that is the union of the pending process-directed
-signals and the signals that are pending for the calling thread.
-.IP
-If a process-directed signal is delivered to a thread group,
-and the thread group has installed a handler for the signal, then
-the handler is invoked in exactly one, arbitrarily selected
-member of the thread group that has not blocked the signal.
-If multiple threads in a group are waiting to accept the same signal using
-.BR sigwaitinfo (2),
-the kernel will arbitrarily select one of these threads
-to receive the signal.
-.TP
-.BR CLONE_UNTRACED " (since Linux 2.5.46)"
-If
-.B CLONE_UNTRACED
-is specified, then a tracing process cannot force
-.B CLONE_PTRACE
-on this child process.
-.TP
-.BR CLONE_VFORK " (since Linux 2.2)"
-If
-.B CLONE_VFORK
-is set, the execution of the calling process is suspended
-until the child releases its virtual memory
-resources via a call to
-.BR execve (2)
-or
-.BR _exit (2)
-(as with
-.BR vfork (2)).
-.IP
-If
-.B CLONE_VFORK
-is not set, then both the calling process and the child are schedulable
-after the call, and an application should not rely on execution occurring
-in any particular order.
-.TP
-.BR CLONE_VM " (since Linux 2.0)"
-If
-.B CLONE_VM
-is set, the calling process and the child process run in the same memory
-space.
-In particular, memory writes performed by the calling process
-or by the child process are also visible in the other process.
-Moreover, any memory mapping or unmapping performed with
-.BR mmap (2)
-or
-.BR munmap (2)
-by the child or calling process also affects the other process.
-.IP
-If
-.B CLONE_VM
-is not set, the child process runs in a separate copy of the memory
-space of the calling process at the time of the clone call.
-Memory writes or file mappings/unmappings performed by one of the
-processes do not affect the other, as with
-.BR fork (2).
-.IP
-If the
-.B CLONE_VM
-flag is specified and the
-.B CLONE_VFORK
-flag is not specified,
-then any alternate signal stack that was established by
-.BR sigaltstack (2)
-is cleared in the child process.
-.SH RETURN VALUE
-.\" gettid(2) returns current->pid;
-.\" getpid(2) returns current->tgid;
-On success, the thread ID of the child process is returned
-in the caller's thread of execution.
-On failure, \-1 is returned
-in the caller's context, no child process is created, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.BR EACCES " (" clone3 "() only)"
-.B CLONE_INTO_CGROUP
-was specified in
-.IR cl_args.flags ,
-but the restrictions (described in
-.BR cgroups (7))
-on placing the child process into the version 2 cgroup referred to by
-.I cl_args.cgroup
-are not met.
-.TP
-.B EAGAIN
-Too many processes are already running; see
-.BR fork (2).
-.TP
-.BR EBUSY " (" clone3 "() only)"
-.B CLONE_INTO_CGROUP
-was specified in
-.IR cl_args.flags ,
-but the file descriptor specified in
-.I cl_args.cgroup
-refers to a version 2 cgroup in which a domain controller is enabled.
-.TP
-.BR EEXIST " (" clone3 "() only)"
-One (or more) of the PIDs specified in
-.I set_tid
-already exists in the corresponding PID namespace.
-.TP
-.B EINVAL
-Both
-.B CLONE_SIGHAND
-and
-.B CLONE_CLEAR_SIGHAND
-were specified in the
-.I flags
-mask.
-.TP
-.B EINVAL
-.B CLONE_SIGHAND
-was specified in the
-.I flags
-mask, but
-.B CLONE_VM
-was not.
-(Since Linux 2.6.0.)
-.\" Precisely: Linux 2.6.0-test6
-.TP
-.B EINVAL
-.B CLONE_THREAD
-was specified in the
-.I flags
-mask, but
-.B CLONE_SIGHAND
-was not.
-(Since Linux 2.5.35.)
-.\" .TP
-.\" .B EINVAL
-.\" Precisely one of
-.\" .B CLONE_DETACHED
-.\" and
-.\" .B CLONE_THREAD
-.\" was specified.
-.\" (Since Linux 2.6.0-test6.)
-.TP
-.B EINVAL
-.B CLONE_THREAD
-was specified in the
-.I flags
-mask, but the current process previously called
-.BR unshare (2)
-with the
-.B CLONE_NEWPID
-flag or used
-.BR setns (2)
-to reassociate itself with a PID namespace.
-.TP
-.B EINVAL
-.\" commit e66eded8309ebf679d3d3c1f5820d1f2ca332c71
-Both
-.B CLONE_FS
-and
-.B CLONE_NEWNS
-were specified in the
-.I flags
-mask.
-.TP
-.BR EINVAL " (since Linux 3.9)"
-Both
-.B CLONE_NEWUSER
-and
-.B CLONE_FS
-were specified in the
-.I flags
-mask.
-.TP
-.B EINVAL
-Both
-.B CLONE_NEWIPC
-and
-.B CLONE_SYSVSEM
-were specified in the
-.I flags
-mask.
-.TP
-.B EINVAL
-.B CLONE_NEWPID
-and one (or both) of
-.B CLONE_THREAD
-or
-.B CLONE_PARENT
-were specified in the
-.I flags
-mask.
-.TP
-.B EINVAL
-.B CLONE_NEWUSER
-and
-.B CLONE_THREAD
-were specified in the
-.I flags
-mask.
-.TP
-.BR EINVAL " (since Linux 2.6.32)"
-.\" commit 123be07b0b399670a7cc3d82fef0cb4f93ef885c
-.B CLONE_PARENT
-was specified, and the caller is an init process.
-.TP
-.B EINVAL
-Returned by the glibc
-.BR clone ()
-wrapper function when
-.I fn
-or
-.I stack
-is specified as NULL.
-.TP
-.B EINVAL
-.B CLONE_NEWIPC
-was specified in the
-.I flags
-mask,
-but the kernel was not configured with the
-.B CONFIG_SYSVIPC
-and
-.B CONFIG_IPC_NS
-options.
-.TP
-.B EINVAL
-.B CLONE_NEWNET
-was specified in the
-.I flags
-mask,
-but the kernel was not configured with the
-.B CONFIG_NET_NS
-option.
-.TP
-.B EINVAL
-.B CLONE_NEWPID
-was specified in the
-.I flags
-mask,
-but the kernel was not configured with the
-.B CONFIG_PID_NS
-option.
-.TP
-.B EINVAL
-.B CLONE_NEWUSER
-was specified in the
-.I flags
-mask,
-but the kernel was not configured with the
-.B CONFIG_USER_NS
-option.
-.TP
-.B EINVAL
-.B CLONE_NEWUTS
-was specified in the
-.I flags
-mask,
-but the kernel was not configured with the
-.B CONFIG_UTS_NS
-option.
-.TP
-.B EINVAL
-.I stack
-is not aligned to a suitable boundary for this architecture.
-For example, on aarch64,
-.I stack
-must be a multiple of 16.
-.TP
-.BR EINVAL " (" clone3 "() only)"
-.B CLONE_DETACHED
-was specified in the
-.I flags
-mask.
-.TP
-.BR EINVAL " (" clone "() only)"
-.B CLONE_PIDFD
-was specified together with
-.B CLONE_DETACHED
-in the
-.I flags
-mask.
-.TP
-.B EINVAL
-.B CLONE_PIDFD
-was specified together with
-.B CLONE_THREAD
-in the
-.I flags
-mask.
-.TP
-.BR "EINVAL " "(" clone "() only)"
-.B CLONE_PIDFD
-was specified together with
-.B CLONE_PARENT_SETTID
-in the
-.I flags
-mask.
-.TP
-.BR EINVAL " (" clone3 "() only)"
-.I set_tid_size
-is greater than the number of nested PID namespaces.
-.TP
-.BR EINVAL " (" clone3 "() only)"
-One of the PIDs specified in
-.I set_tid
-was an invalid.
-.TP
-.BR EINVAL " (" clone3 "() only)"
-.\" commit 7f192e3cd316ba58c88dfa26796cf77789dd9872
-.B CLONE_THREAD
-or
-.B CLONE_PARENT
-was specified in the
-.I flags
-mask, but a signal was specified in
-.IR exit_signal .
-.TP
-.BR EINVAL " (AArch64 only, Linux 4.6 and earlier)"
-.I stack
-was not aligned to a 128-bit boundary.
-.TP
-.B ENOMEM
-Cannot allocate sufficient memory to allocate a task structure for the
-child, or to copy those parts of the caller's context that need to be
-copied.
-.TP
-.BR ENOSPC " (since Linux 3.7)"
-.\" commit f2302505775fd13ba93f034206f1e2a587017929
-.B CLONE_NEWPID
-was specified in the
-.I flags
-mask,
-but the limit on the nesting depth of PID namespaces
-would have been exceeded; see
-.BR pid_namespaces (7).
-.TP
-.BR ENOSPC " (since Linux 4.9; beforehand " EUSERS )
-.B CLONE_NEWUSER
-was specified in the
-.I flags
-mask, and the call would cause the limit on the number of
-nested user namespaces to be exceeded.
-See
-.BR user_namespaces (7).
-.IP
-From Linux 3.11 to Linux 4.8, the error diagnosed in this case was
-.BR EUSERS .
-.TP
-.BR ENOSPC " (since Linux 4.9)"
-One of the values in the
-.I flags
-mask specified the creation of a new user namespace,
-but doing so would have caused the limit defined by the corresponding file in
-.I /proc/sys/user
-to be exceeded.
-For further details, see
-.BR namespaces (7).
-.TP
-.BR EOPNOTSUPP " (" clone3 "() only)"
-.B CLONE_INTO_CGROUP
-was specified in
-.IR cl_args.flags ,
-but the file descriptor specified in
-.I cl_args.cgroup
-refers to a version 2 cgroup that is in the
-.I domain invalid
-state.
-.TP
-.B EPERM
-.BR CLONE_NEWCGROUP ,
-.BR CLONE_NEWIPC ,
-.BR CLONE_NEWNET ,
-.BR CLONE_NEWNS ,
-.BR CLONE_NEWPID ,
-or
-.B CLONE_NEWUTS
-was specified by an unprivileged process (process without \fBCAP_SYS_ADMIN\fP).
-.TP
-.B EPERM
-.B CLONE_PID
-was specified by a process other than process 0.
-(This error occurs only on Linux 2.5.15 and earlier.)
-.TP
-.B EPERM
-.B CLONE_NEWUSER
-was specified in the
-.I flags
-mask,
-but either the effective user ID or the effective group ID of the caller
-does not have a mapping in the parent namespace (see
-.BR user_namespaces (7)).
-.TP
-.BR EPERM " (since Linux 3.9)"
-.\" commit 3151527ee007b73a0ebd296010f1c0454a919c7d
-.B CLONE_NEWUSER
-was specified in the
-.I flags
-mask and the caller is in a chroot environment
-.\" FIXME What is the rationale for this restriction?
-(i.e., the caller's root directory does not match the root directory
-of the mount namespace in which it resides).
-.TP
-.BR EPERM " (" clone3 "() only)"
-.I set_tid_size
-was greater than zero, and the caller lacks the
-.B CAP_SYS_ADMIN
-capability in one or more of the user namespaces that own the
-corresponding PID namespaces.
-.TP
-.BR ERESTARTNOINTR " (since Linux 2.6.17)"
-.\" commit 4a2c7a7837da1b91468e50426066d988050e4d56
-System call was interrupted by a signal and will be restarted.
-(This can be seen only during a trace.)
-.TP
-.BR EUSERS " (Linux 3.11 to Linux 4.8)"
-.B CLONE_NEWUSER
-was specified in the
-.I flags
-mask,
-and the limit on the number of nested user namespaces would be exceeded.
-See the discussion of the
-.B ENOSPC
-error above.
-.SH VERSIONS
-The glibc
-.BR clone ()
-wrapper function makes some changes
-in the memory pointed to by
-.I stack
-(changes required to set the stack up correctly for the child)
-.I before
-invoking the
-.BR clone ()
-system call.
-So, in cases where
-.BR clone ()
-is used to recursively create children,
-do not use the buffer employed for the parent's stack
-as the stack of the child.
-.P
-On i386,
-.BR clone ()
-should not be called through vsyscall, but directly through
-.IR "int $0x80" .
-.SS C library/kernel differences
-The raw
-.BR clone ()
-system call corresponds more closely to
-.BR fork (2)
-in that execution in the child continues from the point of the
-call.
-As such, the
-.I fn
-and
-.I arg
-arguments of the
-.BR clone ()
-wrapper function are omitted.
-.P
-In contrast to the glibc wrapper, the raw
-.BR clone ()
-system call accepts NULL as a
-.I stack
-argument (and
-.BR clone3 ()
-likewise allows
-.I cl_args.stack
-to be NULL).
-In this case, the child uses a duplicate of the parent's stack.
-(Copy-on-write semantics ensure that the child gets separate copies
-of stack pages when either process modifies the stack.)
-In this case, for correct operation, the
-.B CLONE_VM
-option should not be specified.
-(If the child
-.I shares
-the parent's memory because of the use of the
-.B CLONE_VM
-flag,
-then no copy-on-write duplication occurs and chaos is likely to result.)
-.P
-The order of the arguments also differs in the raw system call,
-and there are variations in the arguments across architectures,
-as detailed in the following paragraphs.
-.P
-The raw system call interface on x86-64 and some other architectures
-(including sh, tile, and alpha) is:
-.P
-.in +4n
-.EX
-.BI "long clone(unsigned long " flags ", void *" stack ,
-.BI " int *" parent_tid ", int *" child_tid ,
-.BI " unsigned long " tls );
-.EE
-.in
-.P
-On x86-32, and several other common architectures
-(including score, ARM, ARM 64, PA-RISC, arc, Power PC, xtensa,
-and MIPS),
-.\" CONFIG_CLONE_BACKWARDS
-the order of the last two arguments is reversed:
-.P
-.in +4n
-.EX
-.BI "long clone(unsigned long " flags ", void *" stack ,
-.BI " int *" parent_tid ", unsigned long " tls ,
-.BI " int *" child_tid );
-.EE
-.in
-.P
-On the cris and s390 architectures,
-.\" CONFIG_CLONE_BACKWARDS2
-the order of the first two arguments is reversed:
-.P
-.in +4n
-.EX
-.BI "long clone(void *" stack ", unsigned long " flags ,
-.BI " int *" parent_tid ", int *" child_tid ,
-.BI " unsigned long " tls );
-.EE
-.in
-.P
-On the microblaze architecture,
-.\" CONFIG_CLONE_BACKWARDS3
-an additional argument is supplied:
-.P
-.in +4n
-.EX
-.BI "long clone(unsigned long " flags ", void *" stack ,
-.BI " int " stack_size , "\fR /* Size of stack */"
-.BI " int *" parent_tid ", int *" child_tid ,
-.BI " unsigned long " tls );
-.EE
-.in
-.\"
-.SS blackfin, m68k, and sparc
-.\" Mike Frysinger noted in a 2013 mail:
-.\" these arches don't define __ARCH_WANT_SYS_CLONE:
-.\" blackfin ia64 m68k sparc
-The argument-passing conventions on
-blackfin, m68k, and sparc are different from the descriptions above.
-For details, see the kernel (and glibc) source.
-.SS ia64
-On ia64, a different interface is used:
-.P
-.in +4n
-.EX
-.BI "int __clone2(int (*" "fn" ")(void *),"
-.BI " void *" stack_base ", size_t " stack_size ,
-.BI " int " flags ", void *" "arg" ", ..."
-.BI " /* pid_t *" parent_tid ", struct user_desc *" tls ,
-.BI " pid_t *" child_tid " */ );"
-.EE
-.in
-.P
-The prototype shown above is for the glibc wrapper function;
-for the system call itself,
-the prototype can be described as follows (it is identical to the
-.BR clone ()
-prototype on microblaze):
-.P
-.in +4n
-.EX
-.BI "long clone2(unsigned long " flags ", void *" stack_base ,
-.BI " int " stack_size , "\fR /* Size of stack */"
-.BI " int *" parent_tid ", int *" child_tid ,
-.BI " unsigned long " tls );
-.EE
-.in
-.P
-.BR __clone2 ()
-operates in the same way as
-.BR clone (),
-except that
-.I stack_base
-points to the lowest address of the child's stack area,
-and
-.I stack_size
-specifies the size of the stack pointed to by
-.IR stack_base .
-.SH STANDARDS
-Linux.
-.SH HISTORY
-.TP
-.BR clone3 ()
-Linux 5.3.
-.\" There is no entry for
-.\" .BR clone ()
-.\" in libc5.
-.\" glibc2 provides
-.\" .BR clone ()
-.\" as described in this manual page.
-.SS Linux 2.4 and earlier
-In the Linux 2.4.x series,
-.B CLONE_THREAD
-generally does not make the parent of the new thread the same
-as the parent of the calling process.
-However, from Linux 2.4.7 to Linux 2.4.18 the
-.B CLONE_THREAD
-flag implied the
-.B CLONE_PARENT
-flag (as in Linux 2.6.0 and later).
-.P
-In Linux 2.4 and earlier,
-.BR clone ()
-does not take arguments
-.IR parent_tid ,
-.IR tls ,
-and
-.IR child_tid .
-.SH NOTES
-One use of these system calls
-is to implement threads: multiple flows of control in a program that
-run concurrently in a shared address space.
-.P
-The
-.BR kcmp (2)
-system call can be used to test whether two processes share various
-resources such as a file descriptor table,
-System V semaphore undo operations, or a virtual address space.
-.P
-Handlers registered using
-.BR pthread_atfork (3)
-are not executed during a clone call.
-.SH BUGS
-GNU C library versions 2.3.4 up to and including 2.24
-contained a wrapper function for
-.BR getpid (2)
-that performed caching of PIDs.
-This caching relied on support in the glibc wrapper for
-.BR clone (),
-but limitations in the implementation
-meant that the cache was not up to date in some circumstances.
-In particular,
-if a signal was delivered to the child immediately after the
-.BR clone ()
-call, then a call to
-.BR getpid (2)
-in a handler for the signal could return the PID
-of the calling process ("the parent"),
-if the clone wrapper had not yet had a chance to update the PID
-cache in the child.
-(This discussion ignores the case where the child was created using
-.BR CLONE_THREAD ,
-when
-.BR getpid (2)
-.I should
-return the same value in the child and in the process that called
-.BR clone (),
-since the caller and the child are in the same thread group.
-The stale-cache problem also does not occur if the
-.I flags
-argument includes
-.BR CLONE_VM .)
-To get the truth, it was sometimes necessary to use code such as the following:
-.P
-.in +4n
-.EX
-#include <syscall.h>
-\&
-pid_t mypid;
-\&
-mypid = syscall(SYS_getpid);
-.EE
-.in
-.\" See also the following bug reports
-.\" https://bugzilla.redhat.com/show_bug.cgi?id=417521
-.\" http://sourceware.org/bugzilla/show_bug.cgi?id=6910
-.P
-Because of the stale-cache problem, as well as other problems noted in
-.BR getpid (2),
-the PID caching feature was removed in glibc 2.25.
-.SH EXAMPLES
-The following program demonstrates the use of
-.BR clone ()
-to create a child process that executes in a separate UTS namespace.
-The child changes the hostname in its UTS namespace.
-Both parent and child then display the system hostname,
-making it possible to see that the hostname
-differs in the UTS namespaces of the parent and child.
-For an example of the use of this program, see
-.BR setns (2).
-.P
-Within the sample program, we allocate the memory that is to
-be used for the child's stack using
-.BR mmap (2)
-rather than
-.BR malloc (3)
-for the following reasons:
-.IP \[bu] 3
-.BR mmap (2)
-allocates a block of memory that starts on a page
-boundary and is a multiple of the page size.
-This is useful if we want to establish a guard page (a page with protection
-.BR PROT_NONE )
-at the end of the stack using
-.BR mprotect (2).
-.IP \[bu]
-We can specify the
-.B MAP_STACK
-flag to request a mapping that is suitable for a stack.
-For the moment, this flag is a no-op on Linux,
-but it exists and has effect on some other systems,
-so we should include it for portability.
-.SS Program source
-.\" SRC BEGIN (clone.c)
-.EX
-#define _GNU_SOURCE
-#include <err.h>
-#include <sched.h>
-#include <signal.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/mman.h>
-#include <sys/utsname.h>
-#include <sys/wait.h>
-#include <unistd.h>
-\&
-static int /* Start function for cloned child */
-childFunc(void *arg)
-{
- struct utsname uts;
-\&
- /* Change hostname in UTS namespace of child. */
-\&
- if (sethostname(arg, strlen(arg)) == \-1)
- err(EXIT_FAILURE, "sethostname");
-\&
- /* Retrieve and display hostname. */
-\&
- if (uname(&uts) == \-1)
- err(EXIT_FAILURE, "uname");
- printf("uts.nodename in child: %s\en", uts.nodename);
-\&
- /* Keep the namespace open for a while, by sleeping.
- This allows some experimentation\-\-for example, another
- process might join the namespace. */
-\&
- sleep(200);
-\&
- return 0; /* Child terminates now */
-}
-\&
-#define STACK_SIZE (1024 * 1024) /* Stack size for cloned child */
-\&
-int
-main(int argc, char *argv[])
-{
- char *stack; /* Start of stack buffer */
- char *stackTop; /* End of stack buffer */
- pid_t pid;
- struct utsname uts;
-\&
- if (argc < 2) {
- fprintf(stderr, "Usage: %s <child\-hostname>\en", argv[0]);
- exit(EXIT_SUCCESS);
- }
-\&
- /* Allocate memory to be used for the stack of the child. */
-\&
- stack = mmap(NULL, STACK_SIZE, PROT_READ | PROT_WRITE,
- MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, \-1, 0);
- if (stack == MAP_FAILED)
- err(EXIT_FAILURE, "mmap");
-\&
- stackTop = stack + STACK_SIZE; /* Assume stack grows downward */
-\&
- /* Create child that has its own UTS namespace;
- child commences execution in childFunc(). */
-\&
- pid = clone(childFunc, stackTop, CLONE_NEWUTS | SIGCHLD, argv[1]);
- if (pid == \-1)
- err(EXIT_FAILURE, "clone");
- printf("clone() returned %jd\en", (intmax_t) pid);
-\&
- /* Parent falls through to here */
-\&
- sleep(1); /* Give child time to change its hostname */
-\&
- /* Display hostname in parent\[aq]s UTS namespace. This will be
- different from hostname in child\[aq]s UTS namespace. */
-\&
- if (uname(&uts) == \-1)
- err(EXIT_FAILURE, "uname");
- printf("uts.nodename in parent: %s\en", uts.nodename);
-\&
- if (waitpid(pid, NULL, 0) == \-1) /* Wait for child */
- err(EXIT_FAILURE, "waitpid");
- printf("child has terminated\en");
-\&
- exit(EXIT_SUCCESS);
-}
-.EE
-.\" SRC END
-.SH SEE ALSO
-.BR fork (2),
-.BR futex (2),
-.BR getpid (2),
-.BR gettid (2),
-.BR kcmp (2),
-.BR mmap (2),
-.BR pidfd_open (2),
-.BR set_thread_area (2),
-.BR set_tid_address (2),
-.BR setns (2),
-.BR tkill (2),
-.BR unshare (2),
-.BR wait (2),
-.BR capabilities (7),
-.BR namespaces (7),
-.BR pthreads (7)
diff --git a/man2/clone2.2 b/man2/clone2.2
deleted file mode 100644
index 68f41a5ba..000000000
--- a/man2/clone2.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/clone.2
diff --git a/man2/clone3.2 b/man2/clone3.2
deleted file mode 100644
index 68f41a5ba..000000000
--- a/man2/clone3.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/clone.2
diff --git a/man2/close.2 b/man2/close.2
deleted file mode 100644
index cc6d2f43e..000000000
--- a/man2/close.2
+++ /dev/null
@@ -1,274 +0,0 @@
-.\" This manpage is Copyright (C) 1992 Drew Eckhardt;
-.\" and Copyright (C) 1993 Michael Haardt, Ian Jackson.
-.\" and Copyright (C) 2016 Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Modified Wed Jul 21 22:40:25 1993 by Rik Faith <faith@cs.unc.edu>
-.\" Modified Sat Feb 18 15:27:48 1995 by Michael Haardt
-.\" Modified Sun Apr 14 11:40:50 1996 by Andries Brouwer <aeb@cwi.nl>:
-.\" corrected description of effect on locks (thanks to
-.\" Tigran Aivazian <tigran@sco.com>).
-.\" Modified Fri Jan 31 16:21:46 1997 by Eric S. Raymond <esr@thyrsus.com>
-.\" Modified 2000-07-22 by Nicolás Lichtmaier <nick@debian.org>
-.\" added note about close(2) not guaranteeing that data is safe on close.
-.\"
-.TH close 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-close \- close a file descriptor
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <unistd.h>
-.P
-.BI "int close(int " fd );
-.fi
-.SH DESCRIPTION
-.BR close ()
-closes a file descriptor, so that it no longer refers to any file and
-may be reused.
-Any record locks (see
-.BR fcntl (2))
-held on the file it was associated with,
-and owned by the process,
-are removed regardless of the file descriptor that was used to obtain the lock.
-This has some unfortunate consequences
-and one should be extra careful when using advisory record locking.
-See
-.BR fcntl (2)
-for discussion of the risks and consequences
-as well as for the (probably preferred) open file description locks.
-.P
-If
-.I fd
-is the last file descriptor referring to the underlying
-open file description (see
-.BR open (2)),
-the resources associated with the open file description are freed;
-if the file descriptor was the last reference to a file which has been
-removed using
-.BR unlink (2),
-the file is deleted.
-.SH RETURN VALUE
-.BR close ()
-returns zero on success.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EBADF
-.I fd
-isn't a valid open file descriptor.
-.TP
-.B EINTR
-.\" Though, it's in doubt whether this error can ever occur; see
-.\" https://lwn.net/Articles/576478/ "Returning EINTR from close()"
-The
-.BR close ()
-call was interrupted by a signal; see
-.BR signal (7).
-.TP
-.B EIO
-An I/O error occurred.
-.TP
-.B ENOSPC
-.TQ
-.B EDQUOT
-On NFS, these errors are not normally reported against the first write
-which exceeds the available storage space, but instead against a
-subsequent
-.BR write (2),
-.BR fsync (2),
-or
-.BR close ().
-.P
-See NOTES for a discussion of why
-.BR close ()
-should not be retried after an error.
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-POSIX.1-2001, SVr4, 4.3BSD.
-.\" SVr4 documents an additional ENOLINK error condition.
-.SH NOTES
-A successful close does not guarantee that the data has been successfully
-saved to disk, as the kernel uses the buffer cache to defer writes.
-Typically, filesystems do not flush buffers when a file is closed.
-If you need to be sure that
-the data is physically stored on the underlying disk, use
-.BR fsync (2).
-(It will depend on the disk hardware at this point.)
-.P
-The close-on-exec file descriptor flag can be used to ensure
-that a file descriptor is automatically closed upon a successful
-.BR execve (2);
-see
-.BR fcntl (2)
-for details.
-.\"
-.SS Multithreaded processes and close()
-It is probably unwise to close file descriptors while
-they may be in use by system calls in
-other threads in the same process.
-Since a file descriptor may be reused,
-there are some obscure race conditions
-that may cause unintended side effects.
-.\" Date: Tue, 4 Sep 2007 13:57:35 +0200
-.\" From: Fredrik Noring <noring@nocrew.org>
-.\" One such race involves signals and ERESTARTSYS. If a file descriptor
-.\" in use by a system call is closed and then reused by e.g. an
-.\" independent open() in some unrelated thread, before the original system
-.\" call has restarted after ERESTARTSYS, the original system call will
-.\" later restart with the reused file descriptor. This is most likely a
-.\" serious programming error.
-.P
-Furthermore, consider the following scenario where two threads are
-performing operations on the same file descriptor:
-.IP (1) 5
-One thread is blocked in an I/O system call on the file descriptor.
-For example, it is trying to
-.BR write (2)
-to a pipe that is already full, or trying to
-.BR read (2)
-from a stream socket which currently has no available data.
-.IP (2)
-Another thread closes the file descriptor.
-.P
-The behavior in this situation varies across systems.
-On some systems, when the file descriptor is closed,
-the blocking system call returns immediately with an error.
-.P
-On Linux (and possibly some other systems), the behavior is different:
-the blocking I/O system call holds a reference to the underlying
-open file description, and this reference keeps the description open
-until the I/O system call completes.
-.\" 'struct file' in kernel-speak
-(See
-.BR open (2)
-for a discussion of open file descriptions.)
-Thus, the blocking system call in the first thread may successfully
-complete after the
-.BR close ()
-in the second thread.
-.\"
-.SS Dealing with error returns from close()
-A careful programmer will check the return value of
-.BR close (),
-since it is quite possible that errors on a previous
-.BR write (2)
-operation are reported only on the final
-.BR close ()
-that releases the open file description.
-Failing to check the return value when closing a file may lead to
-.I silent
-loss of data.
-This can especially be observed with NFS and with disk quota.
-.P
-Note, however, that a failure return should be used only for
-diagnostic purposes (i.e., a warning to the application that there
-may still be I/O pending or there may have been failed I/O)
-or remedial purposes
-(e.g., writing the file once more or creating a backup).
-.P
-Retrying the
-.BR close ()
-after a failure return is the wrong thing to do,
-.\" The file descriptor is released early in close();
-.\" close() ==> __close_fd():
-.\" __put_unused_fd() ==> __clear_open_fd()
-.\" return filp_close(file, files);
-.\"
-.\" The errors are returned by filp_close() after the FD has been
-.\" cleared for re-use.
-since this may cause a reused file descriptor
-from another thread to be closed.
-This can occur because the Linux kernel
-.I always
-releases the file descriptor early in the close
-operation, freeing it for reuse;
-the steps that may return an error,
-.\" filp_close()
-such as flushing data to the filesystem or device,
-occur only later in the close operation.
-.P
-Many other implementations similarly always close the file descriptor
-.\" FreeBSD documents this explicitly. From the look of the source code
-.\" SVR4, ancient SunOS, later Solaris, and AIX all do this.
-(except in the case of
-.BR EBADF ,
-meaning that the file descriptor was invalid)
-even if they subsequently report an error on return from
-.BR close ().
-POSIX.1 is currently silent on this point,
-but there are plans to mandate this behavior in the next major release
-.\" Issue 8
-of the standard.
-.P
-A careful programmer who wants to know about I/O errors may precede
-.BR close ()
-with a call to
-.BR fsync (2).
-.P
-The
-.B EINTR
-error is a somewhat special case.
-Regarding the
-.B EINTR
-error, POSIX.1-2008 says:
-.P
-.RS
-If
-.BR close ()
-is interrupted by a signal that is to be caught, it shall return \-1 with
-.I errno
-set to
-.B EINTR
-and the state of
-.I fildes
-is unspecified.
-.RE
-.P
-This permits the behavior that occurs on Linux and
-many other implementations, where,
-as with other errors that may be reported by
-.BR close (),
-the file descriptor is guaranteed to be closed.
-However, it also permits another possibility:
-that the implementation returns an
-.B EINTR
-error and keeps the file descriptor open.
-(According to its documentation, HP-UX's
-.BR close ()
-does this.)
-The caller must then once more use
-.BR close ()
-to close the file descriptor, to avoid file descriptor leaks.
-This divergence in implementation behaviors provides
-a difficult hurdle for portable applications, since on many implementations,
-.BR close ()
-must not be called again after an
-.B EINTR
-error, and on at least one,
-.BR close ()
-must be called again.
-There are plans to address this conundrum for
-the next major release of the POSIX.1 standard.
-.\" FIXME . for later review when Issue 8 is one day released...
-.\" POSIX proposes further changes for EINTR
-.\" http://austingroupbugs.net/tag_view_page.php?tag_id=8
-.\" http://austingroupbugs.net/view.php?id=529
-.\"
-.\" FIXME .
-.\" Review the following glibc bug later
-.\" https://sourceware.org/bugzilla/show_bug.cgi?id=14627
-.SH SEE ALSO
-.BR close_range (2),
-.BR fcntl (2),
-.BR fsync (2),
-.BR open (2),
-.BR shutdown (2),
-.BR unlink (2),
-.BR fclose (3)
diff --git a/man2/close_range.2 b/man2/close_range.2
deleted file mode 100644
index c5252d660..000000000
--- a/man2/close_range.2
+++ /dev/null
@@ -1,276 +0,0 @@
-.\" Copyright (c) 2020 Stephen Kitt <steve@sk2.org>
-.\" and Copyright (c) 2021 Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.TH close_range 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-close_range \- close all file descriptors in a given range
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#define _GNU_SOURCE" " /* See feature_test_macros(7) */"
-.B #include <unistd.h>
-.P
-.BR "#include <linux/close_range.h>" " /* Definition of " CLOSE_RANGE_*
-.BR "" " constants */"
-.P
-.BI "int close_range(unsigned int " first ", unsigned int " last \
-", int " flags );
-.fi
-.SH DESCRIPTION
-The
-.BR close_range ()
-system call closes all open file descriptors from
-.I first
-to
-.I last
-(included).
-.P
-Errors closing a given file descriptor are currently ignored.
-.P
-.I flags
-is a bit mask containing 0 or more of the following:
-.TP
-.BR CLOSE_RANGE_CLOEXEC " (since Linux 5.11)"
-Set the close-on-exec flag on the specified file descriptors,
-rather than immediately closing them.
-.TP
-.B CLOSE_RANGE_UNSHARE
-Unshare the specified file descriptors from any other processes
-before closing them,
-avoiding races with other threads sharing the file descriptor table.
-.SH RETURN VALUE
-On success,
-.BR close_range ()
-returns 0.
-On error, \-1 is returned and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EINVAL
-.I flags
-is not valid, or
-.I first
-is greater than
-.IR last .
-.P
-The following can occur with
-.B CLOSE_RANGE_UNSHARE
-(when constructing the new descriptor table):
-.TP
-.B EMFILE
-The number of open file descriptors exceeds the limit specified in
-.I /proc/sys/fs/nr_open
-(see
-.BR proc (5)).
-This error can occur in situations where that limit was lowered before
-a call to
-.BR close_range ()
-where the
-.B CLOSE_RANGE_UNSHARE
-flag is specified.
-.TP
-.B ENOMEM
-Insufficient kernel memory was available.
-.SH STANDARDS
-None.
-.SH HISTORY
-FreeBSD.
-Linux 5.9,
-glibc 2.34.
-.SH NOTES
-.SS Closing all open file descriptors
-.\" 278a5fbaed89dacd04e9d052f4594ffd0e0585de
-To avoid blindly closing file descriptors
-in the range of possible file descriptors,
-this is sometimes implemented (on Linux)
-by listing open file descriptors in
-.I /proc/self/fd/
-and calling
-.BR close (2)
-on each one.
-.BR close_range ()
-can take care of this without requiring
-.I /proc
-and within a single system call,
-which provides significant performance benefits.
-.SS Closing file descriptors before exec
-.\" 60997c3d45d9a67daf01c56d805ae4fec37e0bd8
-File descriptors can be closed safely using
-.P
-.in +4n
-.EX
-/* we don't want anything past stderr here */
-close_range(3, \[ti]0U, CLOSE_RANGE_UNSHARE);
-execve(....);
-.EE
-.in
-.P
-.B CLOSE_RANGE_UNSHARE
-is conceptually equivalent to
-.P
-.in +4n
-.EX
-unshare(CLONE_FILES);
-close_range(first, last, 0);
-.EE
-.in
-.P
-but can be more efficient:
-if the unshared range extends past
-the current maximum number of file descriptors allocated
-in the caller's file descriptor table
-(the common case when
-.I last
-is \[ti]0U),
-the kernel will unshare a new file descriptor table for the caller up to
-.IR first ,
-copying as few file descriptors as possible.
-This avoids subsequent
-.BR close (2)
-calls entirely;
-the whole operation is complete once the table is unshared.
-.SS Closing files on \fBexec\fP
-.\" 582f1fb6b721facf04848d2ca57f34468da1813e
-This is particularly useful in cases where multiple
-.RB pre- exec
-setup steps risk conflicting with each other.
-For example, setting up a
-.BR seccomp (2)
-profile can conflict with a
-.BR close_range ()
-call:
-if the file descriptors are closed before the
-.BR seccomp (2)
-profile is set up,
-the profile setup can't use them itself,
-or control their closure;
-if the file descriptors are closed afterwards,
-the seccomp profile can't block the
-.BR close_range ()
-call or any fallbacks.
-Using
-.B CLOSE_RANGE_CLOEXEC
-avoids this:
-the descriptors can be marked before the
-.BR seccomp (2)
-profile is set up,
-and the profile can control access to
-.BR close_range ()
-without affecting the calling process.
-.SH EXAMPLES
-The program shown below opens the files named in its command-line arguments,
-displays the list of files that it has opened
-(by iterating through the entries in
-.IR /proc/PID/fd ),
-uses
-.BR close_range ()
-to close all file descriptors greater than or equal to 3,
-and then once more displays the process's list of open files.
-The following example demonstrates the use of the program:
-.P
-.in +4n
-.EX
-$ \fBtouch /tmp/a /tmp/b /tmp/c\fP
-$ \fB./a.out /tmp/a /tmp/b /tmp/c\fP
-/tmp/a opened as FD 3
-/tmp/b opened as FD 4
-/tmp/c opened as FD 5
-/proc/self/fd/0 ==> /dev/pts/1
-/proc/self/fd/1 ==> /dev/pts/1
-/proc/self/fd/2 ==> /dev/pts/1
-/proc/self/fd/3 ==> /tmp/a
-/proc/self/fd/4 ==> /tmp/b
-/proc/self/fd/5 ==> /tmp/b
-/proc/self/fd/6 ==> /proc/9005/fd
-========= About to call close_range() =======
-/proc/self/fd/0 ==> /dev/pts/1
-/proc/self/fd/1 ==> /dev/pts/1
-/proc/self/fd/2 ==> /dev/pts/1
-/proc/self/fd/3 ==> /proc/9005/fd
-.EE
-.in
-.P
-Note that the lines showing the pathname
-.I /proc/9005/fd
-result from the calls to
-.BR opendir (3).
-.SS Program source
-\&
-.\" SRC BEGIN (close_range.c)
-.EX
-#define _GNU_SOURCE
-#include <dirent.h>
-#include <fcntl.h>
-#include <limits.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-\&
-/* Show the contents of the symbolic links in /proc/self/fd */
-\&
-static void
-show_fds(void)
-{
- DIR *dirp;
- char path[PATH_MAX], target[PATH_MAX];
- ssize_t len;
- struct dirent *dp;
-\&
- dirp = opendir("/proc/self/fd");
- if (dirp == NULL) {
- perror("opendir");
- exit(EXIT_FAILURE);
- }
-\&
- for (;;) {
- dp = readdir(dirp);
- if (dp == NULL)
- break;
-\&
- if (dp\->d_type == DT_LNK) {
- snprintf(path, sizeof(path), "/proc/self/fd/%s",
- dp\->d_name);
-\&
- len = readlink(path, target, sizeof(target));
- printf("%s ==> %.*s\en", path, (int) len, target);
- }
- }
-\&
- closedir(dirp);
-}
-\&
-int
-main(int argc, char *argv[])
-{
- int fd;
-\&
- for (size_t j = 1; j < argc; j++) {
- fd = open(argv[j], O_RDONLY);
- if (fd == \-1) {
- perror(argv[j]);
- exit(EXIT_FAILURE);
- }
- printf("%s opened as FD %d\en", argv[j], fd);
- }
-\&
- show_fds();
-\&
- printf("========= About to call close_range() =======\en");
-\&
- if (close_range(3, \[ti]0U, 0) == \-1) {
- perror("close_range");
- exit(EXIT_FAILURE);
- }
-\&
- show_fds();
- exit(EXIT_FAILURE);
-}
-.EE
-.\" SRC END
-.SH SEE ALSO
-.BR close (2)
diff --git a/man2/connect.2 b/man2/connect.2
deleted file mode 100644
index dc4d51c59..000000000
--- a/man2/connect.2
+++ /dev/null
@@ -1,253 +0,0 @@
-.\" Copyright 1993 Rickard E. Faith (faith@cs.unc.edu)
-.\" Portions extracted from /usr/include/sys/socket.h, which does not have
-.\" any authorship information in it. It is probably available under the GPL.
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\"
-.\" Other portions are from the 6.9 (Berkeley) 3/10/91 man page:
-.\"
-.\" Copyright (c) 1983 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" SPDX-License-Identifier: BSD-4-Clause-UC
-.\"
-.\" Modified 1997-01-31 by Eric S. Raymond <esr@thyrsus.com>
-.\" Modified 1998, 1999 by Andi Kleen
-.\" Modified 2004-06-23 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.TH connect 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-connect \- initiate a connection on a socket
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/socket.h>
-.P
-.BI "int connect(int " sockfd ", const struct sockaddr *" addr ,
-.BI " socklen_t " addrlen );
-.fi
-.SH DESCRIPTION
-The
-.BR connect ()
-system call connects the socket referred to by the file descriptor
-.I sockfd
-to the address specified by
-.IR addr .
-The
-.I addrlen
-argument specifies the size of
-.IR addr .
-The format of the address in
-.I addr
-is determined by the address space of the socket
-.IR sockfd ;
-see
-.BR socket (2)
-for further details.
-.P
-If the socket
-.I sockfd
-is of type
-.BR SOCK_DGRAM ,
-then
-.I addr
-is the address to which datagrams are sent by default, and the only
-address from which datagrams are received.
-If the socket is of type
-.B SOCK_STREAM
-or
-.BR SOCK_SEQPACKET ,
-this call attempts to make a connection to the socket that is bound
-to the address specified by
-.IR addr .
-.P
-Some protocol sockets (e.g., UNIX domain stream sockets)
-may successfully
-.BR connect ()
-only once.
-.P
-Some protocol sockets
-(e.g., datagram sockets in the UNIX and Internet domains)
-may use
-.BR connect ()
-multiple times to change their association.
-.P
-Some protocol sockets
-(e.g., TCP sockets as well as datagram sockets in the UNIX and
-Internet domains)
-may dissolve the association by connecting to an address with the
-.I sa_family
-member of
-.I sockaddr
-set to
-.BR AF_UNSPEC ;
-thereafter, the socket can be connected to another address.
-.RB ( AF_UNSPEC
-is supported since Linux 2.2.)
-.SH RETURN VALUE
-If the connection or binding succeeds, zero is returned.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-The following are general socket errors only.
-There may be other domain-specific error codes.
-.TP
-.B EACCES
-For UNIX domain sockets, which are identified by pathname:
-Write permission is denied on the socket file,
-or search permission is denied for one of the directories
-in the path prefix.
-(See also
-.BR path_resolution (7).)
-.TP
-.B EACCES
-.TQ
-.B EPERM
-The user tried to connect to a broadcast address without having the socket
-broadcast flag enabled or the connection request failed because of a local
-firewall rule.
-.TP
-.B EACCES
-It can also be returned if an SELinux policy denied a connection (for
-example, if there is a policy saying that an HTTP proxy can only
-connect to ports associated with HTTP servers, and the proxy tries to
-connect to a different port).
-.TP
-.B EADDRINUSE
-Local address is already in use.
-.TP
-.B EADDRNOTAVAIL
-(Internet domain sockets)
-The socket referred to by
-.I sockfd
-had not previously been bound to an address and,
-upon attempting to bind it to an ephemeral port,
-it was determined that all port numbers in the ephemeral port range
-are currently in use.
-See the discussion of
-.I /proc/sys/net/ipv4/ip_local_port_range
-in
-.BR ip (7).
-.TP
-.B EAFNOSUPPORT
-The passed address didn't have the correct address family in its
-.I sa_family
-field.
-.TP
-.B EAGAIN
-For nonblocking UNIX domain sockets, the socket is nonblocking, and the
-connection cannot be completed immediately.
-For other socket families, there are insufficient entries in the routing cache.
-.TP
-.B EALREADY
-The socket is nonblocking and a previous connection attempt has not yet
-been completed.
-.TP
-.B EBADF
-.I sockfd
-is not a valid open file descriptor.
-.TP
-.B ECONNREFUSED
-A
-.BR connect ()
-on a stream socket found no one listening on the remote address.
-.TP
-.B EFAULT
-The socket structure address is outside the user's address space.
-.TP
-.B EINPROGRESS
-The socket is nonblocking and the connection cannot be completed immediately.
-(UNIX domain sockets failed with
-.B EAGAIN
-instead.)
-It is possible to
-.BR select (2)
-or
-.BR poll (2)
-for completion by selecting the socket for writing.
-After
-.BR select (2)
-indicates writability, use
-.BR getsockopt (2)
-to read the
-.B SO_ERROR
-option at level
-.B SOL_SOCKET
-to determine whether
-.BR connect ()
-completed successfully
-.RB ( SO_ERROR
-is zero) or unsuccessfully
-.RB ( SO_ERROR
-is one of the usual error codes listed here,
-explaining the reason for the failure).
-.TP
-.B EINTR
-The system call was interrupted by a signal that was caught; see
-.BR signal (7).
-.\" For TCP, the connection will complete asynchronously.
-.\" See http://lkml.org/lkml/2005/7/12/254
-.TP
-.B EISCONN
-The socket is already connected.
-.TP
-.B ENETUNREACH
-Network is unreachable.
-.TP
-.B ENOTSOCK
-The file descriptor
-.I sockfd
-does not refer to a socket.
-.TP
-.B EPROTOTYPE
-The socket type does not support the requested communications protocol.
-This error can occur, for example,
-on an attempt to connect a UNIX domain datagram socket to a stream socket.
-.TP
-.B ETIMEDOUT
-Timeout while attempting connection.
-The server may be too
-busy to accept new connections.
-Note that for IP sockets the timeout may
-be very long when syncookies are enabled on the server.
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-POSIX.1-2001, SVr4, 4.4BSD,
-.RB ( connect ()
-first appeared in 4.2BSD).
-.\" SVr4 documents the additional
-.\" general error codes
-.\" .BR EADDRNOTAVAIL ,
-.\" .BR EINVAL ,
-.\" .BR EAFNOSUPPORT ,
-.\" .BR EALREADY ,
-.\" .BR EINTR ,
-.\" .BR EPROTOTYPE ,
-.\" and
-.\" .BR ENOSR .
-.\" It also
-.\" documents many additional error conditions not described here.
-.SH NOTES
-If
-.BR connect ()
-fails, consider the state of the socket as unspecified.
-Portable applications should close the socket and create a new one for
-reconnecting.
-.SH EXAMPLES
-An example of the use of
-.BR connect ()
-is shown in
-.BR getaddrinfo (3).
-.SH SEE ALSO
-.BR accept (2),
-.BR bind (2),
-.BR getsockname (2),
-.BR listen (2),
-.BR socket (2),
-.BR path_resolution (7),
-.BR selinux (8)
diff --git a/man2/copy_file_range.2 b/man2/copy_file_range.2
deleted file mode 100644
index 913d94345..000000000
--- a/man2/copy_file_range.2
+++ /dev/null
@@ -1,307 +0,0 @@
-.\"This manpage is Copyright (C) 2015 Anna Schumaker <Anna.Schumaker@Netapp.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.TH copy_file_range 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-copy_file_range \- Copy a range of data from one file to another
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #define _GNU_SOURCE
-.B #define _FILE_OFFSET_BITS 64
-.B #include <unistd.h>
-.P
-.BI "ssize_t copy_file_range(int " fd_in ", off_t *_Nullable " off_in ,
-.BI " int " fd_out ", off_t *_Nullable " off_out ,
-.BI " size_t " len ", unsigned int " flags );
-.fi
-.SH DESCRIPTION
-The
-.BR copy_file_range ()
-system call performs an in-kernel copy between two file descriptors
-without the additional cost of transferring data from the kernel to user space
-and then back into the kernel.
-It copies up to
-.I len
-bytes of data from the source file descriptor
-.I fd_in
-to the target file descriptor
-.IR fd_out ,
-overwriting any data that exists within the requested range of the target file.
-.P
-The following semantics apply for
-.IR off_in ,
-and similar statements apply to
-.IR off_out :
-.IP \[bu] 3
-If
-.I off_in
-is NULL, then bytes are read from
-.I fd_in
-starting from the file offset, and the file offset is
-adjusted by the number of bytes copied.
-.IP \[bu]
-If
-.I off_in
-is not NULL, then
-.I off_in
-must point to a buffer that specifies the starting
-offset where bytes from
-.I fd_in
-will be read.
-The file offset of
-.I fd_in
-is not changed, but
-.I off_in
-is adjusted appropriately.
-.P
-.I fd_in
-and
-.I fd_out
-can refer to the same file.
-If they refer to the same file, then the source and target ranges are not
-allowed to overlap.
-.P
-The
-.I flags
-argument is provided to allow for future extensions
-and currently must be set to 0.
-.SH RETURN VALUE
-Upon successful completion,
-.BR copy_file_range ()
-will return the number of bytes copied between files.
-This could be less than the length originally requested.
-If the file offset of
-.I fd_in
-is at or past the end of file, no bytes are copied, and
-.BR copy_file_range ()
-returns zero.
-.P
-On error,
-.BR copy_file_range ()
-returns \-1 and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EBADF
-One or more file descriptors are not valid.
-.TP
-.B EBADF
-.I fd_in
-is not open for reading; or
-.I fd_out
-is not open for writing.
-.TP
-.B EBADF
-The
-.B O_APPEND
-flag is set for the open file description (see
-.BR open (2))
-referred to by the file descriptor
-.IR fd_out .
-.TP
-.B EFBIG
-An attempt was made to write at a position past the maximum file offset the
-kernel supports.
-.TP
-.B EFBIG
-An attempt was made to write a range that exceeds the allowed maximum file size.
-The maximum file size differs between filesystem implementations and can be
-different from the maximum allowed file offset.
-.TP
-.B EFBIG
-An attempt was made to write beyond the process's file size resource limit.
-This may also result in the process receiving a
-.B SIGXFSZ
-signal.
-.TP
-.B EINVAL
-The
-.I flags
-argument is not 0.
-.TP
-.B EINVAL
-.I fd_in
-and
-.I fd_out
-refer to the same file and the source and target ranges overlap.
-.TP
-.B EINVAL
-Either
-.I fd_in
-or
-.I fd_out
-is not a regular file.
-.TP
-.B EIO
-A low-level I/O error occurred while copying.
-.TP
-.B EISDIR
-Either
-.I fd_in
-or
-.I fd_out
-refers to a directory.
-.TP
-.B ENOMEM
-Out of memory.
-.TP
-.B ENOSPC
-There is not enough space on the target filesystem to complete the copy.
-.TP
-.BR EOPNOTSUPP " (since Linux 5.19)"
-.\" commit 868f9f2f8e004bfe0d3935b1976f625b2924893b
-The filesystem does not support this operation.
-.TP
-.B EOVERFLOW
-The requested source or destination range is too large to represent in the
-specified data types.
-.TP
-.B EPERM
-.I fd_out
-refers to an immutable file.
-.TP
-.B ETXTBSY
-Either
-.I fd_in
-or
-.I fd_out
-refers to an active swap file.
-.TP
-.BR EXDEV " (before Linux 5.3)"
-.\" commit 5dae222a5ff0c269730393018a5539cc970a4726
-The files referred to by
-.IR fd_in " and " fd_out
-are not on the same filesystem.
-.TP
-.BR EXDEV " (since Linux 5.19)"
-.\" commit 868f9f2f8e004bfe0d3935b1976f625b2924893b
-The files referred to by
-.IR fd_in " and " fd_out
-are not on the same filesystem,
-and the source and target filesystems are not of the same type,
-or do not support cross-filesystem copy.
-.SH VERSIONS
-A major rework of the kernel implementation occurred in Linux 5.3.
-Areas of the API that weren't clearly defined were clarified and the API bounds
-are much more strictly checked than on earlier kernels.
-.P
-Since Linux 5.19,
-cross-filesystem copies can be achieved
-when both filesystems are of the same type,
-and that filesystem implements support for it.
-See BUGS for behavior prior to Linux 5.19.
-.P
-Applications should target the behaviour and requirements of Linux 5.19,
-that was also backported to earlier stable kernels.
-.SH STANDARDS
-Linux, GNU.
-.SH HISTORY
-Linux 4.5,
-but glibc 2.27 provides a user-space
-emulation when it is not available.
-.\" https://sourceware.org/git/?p=glibc.git;a=commit;f=posix/unistd.h;h=bad7a0c81f501fbbcc79af9eaa4b8254441c4a1f
-.SH NOTES
-If
-.I fd_in
-is a sparse file, then
-.BR copy_file_range ()
-may expand any holes existing in the requested range.
-Users may benefit from calling
-.BR copy_file_range ()
-in a loop, and using the
-.BR lseek (2)
-.B SEEK_DATA
-and
-.B SEEK_HOLE
-operations to find the locations of data segments.
-.P
-.BR copy_file_range ()
-gives filesystems an opportunity to implement "copy acceleration" techniques,
-such as the use of reflinks (i.e., two or more inodes that share
-pointers to the same copy-on-write disk blocks)
-or server-side-copy (in the case of NFS).
-.P
-.B _FILE_OFFSET_BITS
-should be defined to be 64 in code that uses non-null
-.I off_in
-or
-.I off_out
-or that takes the address of
-.BR copy_file_range ,
-if the code is intended to be portable
-to traditional 32-bit x86 and ARM platforms where
-.BR off_t 's
-width defaults to 32 bits.
-.SH BUGS
-In Linux 5.3 to Linux 5.18,
-cross-filesystem copies were implemented by the kernel,
-if the operation was not supported by individual filesystems.
-However, on some virtual filesystems,
-the call failed to copy, while still reporting success.
-.SH EXAMPLES
-.\" SRC BEGIN (copy_file_range.c)
-.EX
-#define _GNU_SOURCE
-#define _FILE_OFFSET_BITS 64
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/stat.h>
-#include <unistd.h>
-\&
-int
-main(int argc, char *argv[])
-{
- int fd_in, fd_out;
- off_t len, ret;
- struct stat stat;
-\&
- if (argc != 3) {
- fprintf(stderr, "Usage: %s <source> <destination>\en", argv[0]);
- exit(EXIT_FAILURE);
- }
-\&
- fd_in = open(argv[1], O_RDONLY);
- if (fd_in == \-1) {
- perror("open (argv[1])");
- exit(EXIT_FAILURE);
- }
-\&
- if (fstat(fd_in, &stat) == \-1) {
- perror("fstat");
- exit(EXIT_FAILURE);
- }
-\&
- len = stat.st_size;
-\&
- fd_out = open(argv[2], O_CREAT | O_WRONLY | O_TRUNC, 0644);
- if (fd_out == \-1) {
- perror("open (argv[2])");
- exit(EXIT_FAILURE);
- }
-\&
- do {
- ret = copy_file_range(fd_in, NULL, fd_out, NULL, len, 0);
- if (ret == \-1) {
- perror("copy_file_range");
- exit(EXIT_FAILURE);
- }
-\&
- len \-= ret;
- } while (len > 0 && ret > 0);
-\&
- close(fd_in);
- close(fd_out);
- exit(EXIT_SUCCESS);
-}
-.EE
-.\" SRC END
-.SH SEE ALSO
-.BR lseek (2),
-.BR sendfile (2),
-.BR splice (2)
diff --git a/man2/creat.2 b/man2/creat.2
deleted file mode 100644
index 604e1213a..000000000
--- a/man2/creat.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/open.2
diff --git a/man2/create_module.2 b/man2/create_module.2
deleted file mode 100644
index 1121713a8..000000000
--- a/man2/create_module.2
+++ /dev/null
@@ -1,72 +0,0 @@
-.\" Copyright (C) 1996 Free Software Foundation, Inc.
-.\"
-.\" SPDX-License-Identifier: GPL-1.0-or-later
-.\"
-.\" 2006-02-09, some reformatting by Luc Van Oostenryck; some
-.\" reformatting and rewordings by mtk
-.\"
-.TH create_module 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-create_module \- create a loadable module entry
-.SH SYNOPSIS
-.nf
-.B #include <linux/module.h>
-.P
-.BI "[[deprecated]] caddr_t create_module(const char *" name ", size_t " size );
-.fi
-.SH DESCRIPTION
-.IR Note :
-This system call is present only before Linux 2.6.
-.P
-.BR create_module ()
-attempts to create a loadable module entry and reserve the kernel memory
-that will be needed to hold the module.
-This system call requires privilege.
-.SH RETURN VALUE
-On success, returns the kernel address at which the module will reside.
-On error, \-1 is returned and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EEXIST
-A module by that name already exists.
-.TP
-.B EFAULT
-.I name
-is outside the program's accessible address space.
-.TP
-.B EINVAL
-The requested size is too small even for the module header information.
-.TP
-.B ENOMEM
-The kernel could not allocate a contiguous block of memory large
-enough for the module.
-.TP
-.B ENOSYS
-.BR create_module ()
-is not supported in this version of the kernel
-(e.g., Linux 2.6 or later).
-.TP
-.B EPERM
-The caller was not privileged
-(did not have the
-.B CAP_SYS_MODULE
-capability).
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Removed in Linux 2.6.
-.\" Removed in Linux 2.5.48
-.P
-This obsolete system call is not supported by glibc.
-No declaration is provided in glibc headers, but, through a quirk of history,
-glibc versions before glibc 2.23 did export an ABI for this system call.
-Therefore, in order to employ this system call,
-it was sufficient to manually declare the interface in your code;
-alternatively, you could invoke the system call using
-.BR syscall (2).
-.SH SEE ALSO
-.BR delete_module (2),
-.BR init_module (2),
-.BR query_module (2)
diff --git a/man2/delete_module.2 b/man2/delete_module.2
deleted file mode 100644
index e9c432e84..000000000
--- a/man2/delete_module.2
+++ /dev/null
@@ -1,205 +0,0 @@
-.\" Copyright (C) 2012 Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.TH delete_module 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-delete_module \- unload a kernel module
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#include <fcntl.h>" " /* Definition of " O_* " constants */"
-.BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */"
-.B #include <unistd.h>
-.P
-.BI "int syscall(SYS_delete_module, const char *" name ", unsigned int " flags );
-.fi
-.P
-.IR Note :
-glibc provides no wrapper for
-.BR delete_module (),
-necessitating the use of
-.BR syscall (2).
-.SH DESCRIPTION
-The
-.BR delete_module ()
-system call attempts to remove the unused loadable module entry
-identified by
-.IR name .
-If the module has an
-.I exit
-function, then that function is executed before unloading the module.
-The
-.I flags
-argument is used to modify the behavior of the system call,
-as described below.
-This system call requires privilege.
-.P
-Module removal is attempted according to the following rules:
-.IP (1) 5
-If there are other loaded modules that depend on
-(i.e., refer to symbols defined in) this module,
-then the call fails.
-.IP (2)
-Otherwise, if the reference count for the module
-(i.e., the number of processes currently using the module)
-is zero, then the module is immediately unloaded.
-.IP (3)
-If a module has a nonzero reference count,
-then the behavior depends on the bits set in
-.IR flags .
-In normal usage (see NOTES), the
-.B O_NONBLOCK
-flag is always specified, and the
-.B O_TRUNC
-flag may additionally be specified.
-.\" O_TRUNC == KMOD_REMOVE_FORCE in kmod library
-.\" O_NONBLOCK == KMOD_REMOVE_NOWAIT in kmod library
-.IP
-The various combinations for
-.I flags
-have the following effect:
-.RS
-.TP
-.B flags == O_NONBLOCK
-The call returns immediately, with an error.
-.TP
-.B flags == (O_NONBLOCK | O_TRUNC)
-The module is unloaded immediately,
-regardless of whether it has a nonzero reference count.
-.TP
-.B (flags & O_NONBLOCK) == 0
-If
-.I flags
-does not specify
-.BR O_NONBLOCK ,
-the following steps occur:
-.RS
-.IP \[bu] 3
-The module is marked so that no new references are permitted.
-.IP \[bu]
-If the module's reference count is nonzero,
-the caller is placed in an uninterruptible sleep state
-.RB ( TASK_UNINTERRUPTIBLE )
-until the reference count is zero, at which point the call unblocks.
-.IP \[bu]
-The module is unloaded in the usual way.
-.RE
-.RE
-.P
-The
-.B O_TRUNC
-flag has one further effect on the rules described above.
-By default, if a module has an
-.I init
-function but no
-.I exit
-function, then an attempt to remove the module fails.
-However, if
-.B O_TRUNC
-was specified, this requirement is bypassed.
-.P
-Using the
-.B O_TRUNC
-flag is dangerous!
-If the kernel was not built with
-.BR CONFIG_MODULE_FORCE_UNLOAD ,
-this flag is silently ignored.
-(Normally,
-.B CONFIG_MODULE_FORCE_UNLOAD
-is enabled.)
-Using this flag taints the kernel (TAINT_FORCED_RMMOD).
-.SH RETURN VALUE
-On success, zero is returned.
-On error, \-1 is returned and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EBUSY
-The module is not "live"
-(i.e., it is still being initialized or is already marked for removal);
-or, the module has
-an
-.I init
-function but has no
-.I exit
-function, and
-.B O_TRUNC
-was not specified in
-.IR flags .
-.TP
-.B EFAULT
-.I name
-refers to a location outside the process's accessible address space.
-.TP
-.B ENOENT
-No module by that name exists.
-.TP
-.B EPERM
-The caller was not privileged
-(did not have the
-.B CAP_SYS_MODULE
-capability),
-or module unloading is disabled
-(see
-.I /proc/sys/kernel/modules_disabled
-in
-.BR proc (5)).
-.TP
-.B EWOULDBLOCK
-Other modules depend on this module;
-or,
-.B O_NONBLOCK
-was specified in
-.IR flags ,
-but the reference count of this module is nonzero and
-.B O_TRUNC
-was not specified in
-.IR flags .
-.SH STANDARDS
-Linux.
-.SH HISTORY
-The
-.BR delete_module ()
-system call is not supported by glibc.
-No declaration is provided in glibc headers, but, through a quirk of history,
-glibc versions before glibc 2.23 did export an ABI for this system call.
-Therefore, in order to employ this system call,
-it is (before glibc 2.23) sufficient to
-manually declare the interface in your code;
-alternatively, you can invoke the system call using
-.BR syscall (2).
-.SS Linux 2.4 and earlier
-In Linux 2.4 and earlier, the system call took only one argument:
-.P
-.BI " int delete_module(const char *" name );
-.P
-If
-.I name
-is NULL, all unused modules marked auto-clean are removed.
-.P
-Some further details of differences in the behavior of
-.BR delete_module ()
-in Linux 2.4 and earlier are
-.I not
-currently explained in this manual page.
-.SH NOTES
-The uninterruptible sleep that may occur if
-.B O_NONBLOCK
-is omitted from
-.I flags
-is considered undesirable, because the sleeping process is left
-in an unkillable state.
-As at Linux 3.7, specifying
-.B O_NONBLOCK
-is optional, but in future kernels it is likely to become mandatory.
-.SH SEE ALSO
-.BR create_module (2),
-.BR init_module (2),
-.BR query_module (2),
-.BR lsmod (8),
-.BR modprobe (8),
-.BR rmmod (8)
diff --git a/man2/dup.2 b/man2/dup.2
deleted file mode 100644
index 4948e6dd4..000000000
--- a/man2/dup.2
+++ /dev/null
@@ -1,284 +0,0 @@
-.\" This manpage is Copyright (C) 1992 Drew Eckhardt;
-.\" and Copyright (C) 1993 Michael Haardt, Ian Jackson.
-.\" and Copyright (C) 2005, 2008 Michael Kerrisk <mtk.manpages@gmail.com>
-.\" and Copyright (C) 2014 Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Modified 1993-07-21, Rik Faith <faith@cs.unc.edu>
-.\" Modified 1994-08-21, Michael Chastain <mec@shell.portal.com>:
-.\" Fixed typos.
-.\" Modified 1997-01-31, Eric S. Raymond <esr@thyrsus.com>
-.\" Modified 2002-09-28, aeb
-.\" 2009-01-12, mtk, reordered text in DESCRIPTION and added some
-.\" details for dup2().
-.\" 2008-10-09, mtk: add description of dup3()
-.\"
-.TH dup 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-dup, dup2, dup3 \- duplicate a file descriptor
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <unistd.h>
-.P
-.BI "int dup(int " oldfd );
-.BI "int dup2(int " oldfd ", int " newfd );
-.P
-.BR "#define _GNU_SOURCE" " /* See feature_test_macros(7) */"
-.BR "#include <fcntl.h>" " /* Definition of " O_* " constants */"
-.B #include <unistd.h>
-.P
-.BI "int dup3(int " oldfd ", int " newfd ", int " flags );
-.fi
-.SH DESCRIPTION
-The
-.BR dup ()
-system call allocates a new file descriptor that refers to the same
-open file description as the descriptor
-.IR oldfd .
-(For an explanation of open file descriptions, see
-.BR open (2).)
-The new file descriptor number is guaranteed to be the lowest-numbered
-file descriptor that was unused in the calling process.
-.P
-After a successful return,
-the old and new file descriptors may be used interchangeably.
-Since the two file descriptors refer to the same open file description,
-they share file offset and file status flags;
-for example, if the file offset is modified by using
-.BR lseek (2)
-on one of the file descriptors,
-the offset is also changed for the other file descriptor.
-.P
-The two file descriptors do not share file descriptor flags
-(the close-on-exec flag).
-The close-on-exec flag
-.RB ( FD_CLOEXEC ;
-see
-.BR fcntl (2))
-for the duplicate descriptor is off.
-.\"
-.SS dup2()
-The
-.BR dup2 ()
-system call performs the same task as
-.BR dup (),
-but instead of using the lowest-numbered unused file descriptor,
-it uses the file descriptor number specified in
-.IR newfd .
-In other words,
-the file descriptor
-.I newfd
-is adjusted so that it now refers to the same open file description as
-.IR oldfd .
-.P
-If the file descriptor
-.I newfd
-was previously open, it is closed before being reused;
-the close is performed silently
-(i.e., any errors during the close are not reported by
-.BR dup2 ()).
-.P
-The steps of closing and reusing the file descriptor
-.I newfd
-are performed
-.IR atomically .
-This is important, because trying to implement equivalent functionality using
-.BR close (2)
-and
-.BR dup ()
-would be
-subject to race conditions, whereby
-.I newfd
-might be reused between the two steps.
-Such reuse could happen because the main program is interrupted
-by a signal handler that allocates a file descriptor,
-or because a parallel thread allocates a file descriptor.
-.P
-Note the following points:
-.IP \[bu] 3
-If
-.I oldfd
-is not a valid file descriptor, then the call fails, and
-.I newfd
-is not closed.
-.IP \[bu]
-If
-.I oldfd
-is a valid file descriptor, and
-.I newfd
-has the same value as
-.IR oldfd ,
-then
-.BR dup2 ()
-does nothing, and returns
-.IR newfd .
-.\"
-.SS dup3()
-.BR dup3 ()
-is the same as
-.BR dup2 (),
-except that:
-.IP \[bu] 3
-The caller can force the close-on-exec flag to be set
-for the new file descriptor by specifying
-.B O_CLOEXEC
-in
-.IR flags .
-See the description of the same flag in
-.BR open (2)
-for reasons why this may be useful.
-.IP \[bu]
-.\" Ulrich Drepper, LKML, 2008-10-09:
-.\" We deliberately decided on this change. Otherwise, what is the
-.\" result of dup3(fd, fd, O_CLOEXEC)?
-If
-.I oldfd
-equals
-.IR newfd ,
-then
-.BR dup3 ()
-fails with the error
-.BR EINVAL .
-.SH RETURN VALUE
-On success, these system calls
-return the new file descriptor.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EBADF
-.I oldfd
-isn't an open file descriptor.
-.TP
-.B EBADF
-.I newfd
-is out of the allowed range for file descriptors (see the discussion of
-.B RLIMIT_NOFILE
-in
-.BR getrlimit (2)).
-.TP
-.B EBUSY
-(Linux only) This may be returned by
-.BR dup2 ()
-or
-.BR dup3 ()
-during a race condition with
-.BR open (2)
-and
-.BR dup ().
-.TP
-.B EINTR
-The
-.BR dup2 ()
-or
-.BR dup3 ()
-call was interrupted by a signal; see
-.BR signal (7).
-.TP
-.B EINVAL
-.RB ( dup3 ())
-.I flags
-contain an invalid value.
-.TP
-.B EINVAL
-.RB ( dup3 ())
-.I oldfd
-was equal to
-.IR newfd .
-.TP
-.B EMFILE
-The per-process limit on the number of open file descriptors has been reached
-(see the discussion of
-.B RLIMIT_NOFILE
-in
-.BR getrlimit (2)).
-.SH STANDARDS
-.TP
-.BR dup ()
-.TQ
-.BR dup2 ()
-POSIX.1-2008.
-.TP
-.BR dup3 ()
-Linux.
-.SH HISTORY
-.TP
-.BR dup ()
-.TQ
-.BR dup2 ()
-POSIX.1-2001, SVr4, 4.3BSD.
-.\" SVr4 documents additional
-.\" EINTR and ENOLINK error conditions. POSIX.1 adds EINTR.
-.\" The EBUSY return is Linux-specific.
-.TP
-.BR dup3 ()
-Linux 2.6.27,
-glibc 2.9.
-.SH NOTES
-The error returned by
-.BR dup2 ()
-is different from that returned by
-.BR fcntl( "..., " F_DUPFD ", ..." )
-when
-.I newfd
-is out of range.
-On some systems,
-.BR dup2 ()
-also sometimes returns
-.B EINVAL
-like
-.BR F_DUPFD .
-.P
-If
-.I newfd
-was open, any errors that would have been reported at
-.BR close (2)
-time are lost.
-If this is of concern,
-then\[em]unless the program is single-threaded and does not allocate
-file descriptors in signal handlers\[em]the correct approach is
-.I not
-to close
-.I newfd
-before calling
-.BR dup2 (),
-because of the race condition described above.
-Instead, code something like the following could be used:
-.P
-.in +4n
-.EX
-/* Obtain a duplicate of \[aq]newfd\[aq] that can subsequently
- be used to check for close() errors; an EBADF error
- means that \[aq]newfd\[aq] was not open. */
-\&
-tmpfd = dup(newfd);
-if (tmpfd == \-1 && errno != EBADF) {
- /* Handle unexpected dup() error. */
-}
-\&
-/* Atomically duplicate \[aq]oldfd\[aq] on \[aq]newfd\[aq]. */
-\&
-if (dup2(oldfd, newfd) == \-1) {
- /* Handle dup2() error. */
-}
-\&
-/* Now check for close() errors on the file originally
- referred to by \[aq]newfd\[aq]. */
-\&
-if (tmpfd != \-1) {
- if (close(tmpfd) == \-1) {
- /* Handle errors from close. */
- }
-}
-.EE
-.in
-.SH SEE ALSO
-.BR close (2),
-.BR fcntl (2),
-.BR open (2),
-.BR pidfd_getfd (2)
diff --git a/man2/dup2.2 b/man2/dup2.2
deleted file mode 100644
index 49a65c65d..000000000
--- a/man2/dup2.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/dup.2
diff --git a/man2/dup3.2 b/man2/dup3.2
deleted file mode 100644
index 49a65c65d..000000000
--- a/man2/dup3.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/dup.2
diff --git a/man2/epoll_create.2 b/man2/epoll_create.2
deleted file mode 100644
index f0327e8ba..000000000
--- a/man2/epoll_create.2
+++ /dev/null
@@ -1,144 +0,0 @@
-.\" Copyright (C) 2003 Davide Libenzi
-.\" and Copyright 2008, 2009, 2012 Michael Kerrisk <tk.manpages@gmail.com>
-.\" Davide Libenzi <davidel@xmailserver.org>
-.\"
-.\" SPDX-License-Identifier: GPL-2.0-or-later
-.\"
-.\" Modified 2004-06-17 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Modified 2005-04-04 by Marko Kohtala <marko.kohtala@gmail.com>
-.\" 2008-10-10, mtk: add description of epoll_create1()
-.\"
-.TH epoll_create 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-epoll_create, epoll_create1 \- open an epoll file descriptor
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/epoll.h>
-.P
-.BI "int epoll_create(int " size );
-.BI "int epoll_create1(int " flags );
-.fi
-.SH DESCRIPTION
-.BR epoll_create ()
-creates a new
-.BR epoll (7)
-instance.
-Since Linux 2.6.8, the
-.I size
-argument is ignored, but must be greater than zero; see HISTORY.
-.P
-.BR epoll_create ()
-returns a file descriptor referring to the new epoll instance.
-This file descriptor is used for all the subsequent calls to the
-.B epoll
-interface.
-When no longer required, the file descriptor returned by
-.BR epoll_create ()
-should be closed by using
-.BR close (2).
-When all file descriptors referring to an epoll instance have been closed,
-the kernel destroys the instance
-and releases the associated resources for reuse.
-.SS epoll_create1()
-If
-.I flags
-is 0, then, other than the fact that the obsolete
-.I size
-argument is dropped,
-.BR epoll_create1 ()
-is the same as
-.BR epoll_create ().
-The following value can be included in
-.I flags
-to obtain different behavior:
-.TP
-.B EPOLL_CLOEXEC
-Set the close-on-exec
-.RB ( FD_CLOEXEC )
-flag on the new file descriptor.
-See the description of the
-.B O_CLOEXEC
-flag in
-.BR open (2)
-for reasons why this may be useful.
-.SH RETURN VALUE
-On success,
-these system calls
-return a file descriptor (a nonnegative integer).
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EINVAL
-.I size
-is not positive.
-.TP
-.B EINVAL
-.RB ( epoll_create1 ())
-Invalid value specified in
-.IR flags .
-.TP
-.B EMFILE
-The per-process limit on the number of open file descriptors has been reached.
-.TP
-.B ENFILE
-The system-wide limit on the total number of open files has been reached.
-.TP
-.B ENOMEM
-There was insufficient memory to create the kernel object.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-.TP
-.BR epoll_create ()
-Linux 2.6,
-glibc 2.3.2.
-.\" To be precise: kernel 2.5.44.
-.\" The interface should be finalized by Linux kernel 2.5.66.
-.TP
-.BR epoll_create1 ()
-Linux 2.6.27,
-glibc 2.9.
-.P
-In the initial
-.BR epoll_create ()
-implementation, the
-.I size
-argument informed the kernel of the number of file descriptors
-that the caller expected to add to the
-.B epoll
-instance.
-The kernel used this information as a hint for the amount of
-space to initially allocate in internal data structures describing events.
-(If necessary, the kernel would allocate more space
-if the caller's usage exceeded the hint given in
-.IR size .)
-Nowadays,
-this hint is no longer required
-(the kernel dynamically sizes the required data structures
-without needing the hint), but
-.I size
-must still be greater than zero,
-in order to ensure backward compatibility when new
-.B epoll
-applications are run on older kernels.
-.P
-Prior to Linux 2.6.29,
-.\" commit 9df04e1f25effde823a600e755b51475d438f56b
-a
-.I /proc/sys/fs/epoll/max_user_instances
-kernel parameter limited live epolls for each real user ID,
-and caused
-.BR epoll_create ()
-to fail with
-.B EMFILE
-on overrun.
-.SH SEE ALSO
-.BR close (2),
-.BR epoll_ctl (2),
-.BR epoll_wait (2),
-.BR epoll (7)
diff --git a/man2/epoll_create1.2 b/man2/epoll_create1.2
deleted file mode 100644
index 69605b6ef..000000000
--- a/man2/epoll_create1.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/epoll_create.2
diff --git a/man2/epoll_ctl.2 b/man2/epoll_ctl.2
deleted file mode 100644
index 6d5bc032e..000000000
--- a/man2/epoll_ctl.2
+++ /dev/null
@@ -1,429 +0,0 @@
-.\" Copyright (C) 2003 Davide Libenzi
-.\" Davide Libenzi <davidel@xmailserver.org>
-.\" and Copyright 2009, 2014, 2016, 2018, 2019 Michael Kerrisk <tk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: GPL-2.0-or-later
-.\"
-.TH epoll_ctl 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-epoll_ctl \- control interface for an epoll file descriptor
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/epoll.h>
-.P
-.BI "int epoll_ctl(int " epfd ", int " op ", int " fd ,
-.BI " struct epoll_event *_Nullable " event );
-.fi
-.SH DESCRIPTION
-This system call is used to add, modify, or remove
-entries in the interest list of the
-.BR epoll (7)
-instance
-referred to by the file descriptor
-.IR epfd .
-It requests that the operation
-.I op
-be performed for the target file descriptor,
-.IR fd .
-.P
-Valid values for the
-.I op
-argument are:
-.TP
-.B EPOLL_CTL_ADD
-Add an entry to the interest list of the epoll file descriptor,
-.IR epfd .
-The entry includes the file descriptor,
-.IR fd ,
-a reference to the corresponding open file description (see
-.BR epoll (7)
-and
-.BR open (2)),
-and the settings specified in
-.IR event .
-.TP
-.B EPOLL_CTL_MOD
-Change the settings associated with
-.I fd
-in the interest list to the new settings specified in
-.IR event .
-.TP
-.B EPOLL_CTL_DEL
-Remove (deregister) the target file descriptor
-.I fd
-from the interest list.
-The
-.I event
-argument is ignored and can be NULL (but see BUGS below).
-.P
-The
-.I event
-argument describes the object linked to the file descriptor
-.IR fd .
-The
-.I struct epoll_event
-is described in
-.BR epoll_event (3type).
-.P
-The
-.I data
-member of the
-.I epoll_event
-structure specifies data that the kernel should save and then return (via
-.BR epoll_wait (2))
-when this file descriptor becomes ready.
-.P
-The
-.I events
-member of the
-.I epoll_event
-structure is a bit mask composed by ORing together zero or more event types,
-returned by
-.BR epoll_wait (2),
-and input flags, which affect its behaviour, but aren't returned.
-The available event types are:
-.TP
-.B EPOLLIN
-The associated file is available for
-.BR read (2)
-operations.
-.TP
-.B EPOLLOUT
-The associated file is available for
-.BR write (2)
-operations.
-.TP
-.BR EPOLLRDHUP " (since Linux 2.6.17)"
-Stream socket peer closed connection,
-or shut down writing half of connection.
-(This flag is especially useful for writing simple code to detect
-peer shutdown when using edge-triggered monitoring.)
-.TP
-.B EPOLLPRI
-There is an exceptional condition on the file descriptor.
-See the discussion of
-.B POLLPRI
-in
-.BR poll (2).
-.TP
-.B EPOLLERR
-Error condition happened on the associated file descriptor.
-This event is also reported for the write end of a pipe when the read end
-has been closed.
-.IP
-.BR epoll_wait (2)
-will always report for this event; it is not necessary to set it in
-.I events
-when calling
-.BR epoll_ctl ().
-.TP
-.B EPOLLHUP
-Hang up happened on the associated file descriptor.
-.IP
-.BR epoll_wait (2)
-will always wait for this event; it is not necessary to set it in
-.I events
-when calling
-.BR epoll_ctl ().
-.IP
-Note that when reading from a channel such as a pipe or a stream socket,
-this event merely indicates that the peer closed its end of the channel.
-Subsequent reads from the channel will return 0 (end of file)
-only after all outstanding data in the channel has been consumed.
-.P
-And the available input flags are:
-.TP
-.B EPOLLET
-Requests edge-triggered notification for the associated file descriptor.
-The default behavior for
-.B epoll
-is level-triggered.
-See
-.BR epoll (7)
-for more detailed information about edge-triggered and
-level-triggered notification.
-.TP
-.BR EPOLLONESHOT " (since Linux 2.6.2)"
-Requests one-shot notification for the associated file descriptor.
-This means that after an event notified for the file descriptor by
-.BR epoll_wait (2),
-the file descriptor is disabled in the interest list and no other events
-will be reported by the
-.B epoll
-interface.
-The user must call
-.BR epoll_ctl ()
-with
-.B EPOLL_CTL_MOD
-to rearm the file descriptor with a new event mask.
-.TP
-.BR EPOLLWAKEUP " (since Linux 3.5)"
-.\" commit 4d7e30d98939a0340022ccd49325a3d70f7e0238
-If
-.B EPOLLONESHOT
-and
-.B EPOLLET
-are clear and the process has the
-.B CAP_BLOCK_SUSPEND
-capability,
-ensure that the system does not enter "suspend" or
-"hibernate" while this event is pending or being processed.
-The event is considered as being "processed" from the time
-when it is returned by a call to
-.BR epoll_wait (2)
-until the next call to
-.BR epoll_wait (2)
-on the same
-.BR epoll (7)
-file descriptor,
-the closure of that file descriptor,
-the removal of the event file descriptor with
-.BR EPOLL_CTL_DEL ,
-or the clearing of
-.B EPOLLWAKEUP
-for the event file descriptor with
-.BR EPOLL_CTL_MOD .
-See also BUGS.
-.TP
-.BR EPOLLEXCLUSIVE " (since Linux 4.5)"
-Sets an exclusive wakeup mode for the epoll file descriptor that is being
-attached to the target file descriptor,
-.IR fd .
-When a wakeup event occurs and multiple epoll file descriptors
-are attached to the same target file using
-.BR EPOLLEXCLUSIVE ,
-one or more of the epoll file descriptors will receive an event with
-.BR epoll_wait (2).
-The default in this scenario (when
-.B EPOLLEXCLUSIVE
-is not set) is for all epoll file descriptors to receive an event.
-.B EPOLLEXCLUSIVE
-is thus useful for avoiding thundering herd problems in certain scenarios.
-.IP
-If the same file descriptor is in multiple epoll instances,
-some with the
-.B EPOLLEXCLUSIVE
-flag, and others without, then events will be provided to all epoll
-instances that did not specify
-.BR EPOLLEXCLUSIVE ,
-and at least one of the epoll instances that did specify
-.BR EPOLLEXCLUSIVE .
-.IP
-The following values may be specified in conjunction with
-.BR EPOLLEXCLUSIVE :
-.BR EPOLLIN ,
-.BR EPOLLOUT ,
-.BR EPOLLWAKEUP ,
-and
-.BR EPOLLET .
-.B EPOLLHUP
-and
-.B EPOLLERR
-can also be specified, but this is not required:
-as usual, these events are always reported if they occur,
-regardless of whether they are specified in
-.IR events .
-Attempts to specify other values in
-.I events
-yield the error
-.BR EINVAL .
-.IP
-.B EPOLLEXCLUSIVE
-may be used only in an
-.B EPOLL_CTL_ADD
-operation; attempts to employ it with
-.B EPOLL_CTL_MOD
-yield an error.
-If
-.B EPOLLEXCLUSIVE
-has been set using
-.BR epoll_ctl (),
-then a subsequent
-.B EPOLL_CTL_MOD
-on the same
-.IR epfd ,\~ fd
-pair yields an error.
-A call to
-.BR epoll_ctl ()
-that specifies
-.B EPOLLEXCLUSIVE
-in
-.I events
-and specifies the target file descriptor
-.I fd
-as an epoll instance will likewise fail.
-The error in all of these cases is
-.BR EINVAL .
-.SH RETURN VALUE
-When successful,
-.BR epoll_ctl ()
-returns zero.
-When an error occurs,
-.BR epoll_ctl ()
-returns \-1 and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EBADF
-.I epfd
-or
-.I fd
-is not a valid file descriptor.
-.TP
-.B EEXIST
-.I op
-was
-.BR EPOLL_CTL_ADD ,
-and the supplied file descriptor
-.I fd
-is already registered with this epoll instance.
-.TP
-.B EINVAL
-.I epfd
-is not an
-.B epoll
-file descriptor,
-or
-.I fd
-is the same as
-.IR epfd ,
-or the requested operation
-.I op
-is not supported by this interface.
-.TP
-.B EINVAL
-An invalid event type was specified along with
-.B EPOLLEXCLUSIVE
-in
-.IR events .
-.TP
-.B EINVAL
-.I op
-was
-.B EPOLL_CTL_MOD
-and
-.I events
-included
-.BR EPOLLEXCLUSIVE .
-.TP
-.B EINVAL
-.I op
-was
-.B EPOLL_CTL_MOD
-and the
-.B EPOLLEXCLUSIVE
-flag has previously been applied to this
-.IR epfd ,\~ fd
-pair.
-.TP
-.B EINVAL
-.B EPOLLEXCLUSIVE
-was specified in
-.I event
-and
-.I fd
-refers to an epoll instance.
-.TP
-.B ELOOP
-.I fd
-refers to an epoll instance and this
-.B EPOLL_CTL_ADD
-operation would result in a circular loop of epoll instances
-monitoring one another or a nesting depth of epoll instances
-greater than 5.
-.TP
-.B ENOENT
-.I op
-was
-.B EPOLL_CTL_MOD
-or
-.BR EPOLL_CTL_DEL ,
-and
-.I fd
-is not registered with this epoll instance.
-.TP
-.B ENOMEM
-There was insufficient memory to handle the requested
-.I op
-control operation.
-.TP
-.B ENOSPC
-The limit imposed by
-.I /proc/sys/fs/epoll/max_user_watches
-was encountered while trying to register
-.RB ( EPOLL_CTL_ADD )
-a new file descriptor on an epoll instance.
-See
-.BR epoll (7)
-for further details.
-.TP
-.B EPERM
-The target file
-.I fd
-does not support
-.BR epoll .
-This error can occur if
-.I fd
-refers to, for example, a regular file or a directory.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 2.6,
-.\" To be precise: Linux 2.5.44.
-.\" The interface should be finalized by Linux 2.5.66.
-glibc 2.3.2.
-.SH NOTES
-The
-.B epoll
-interface supports all file descriptors that support
-.BR poll (2).
-.SH BUGS
-Before Linux 2.6.9, the
-.B EPOLL_CTL_DEL
-operation required a non-null pointer in
-.IR event ,
-even though this argument is ignored.
-Since Linux 2.6.9,
-.I event
-can be specified as NULL
-when using
-.BR EPOLL_CTL_DEL .
-Applications that need to be portable to kernels before Linux 2.6.9
-should specify a non-null pointer in
-.IR event .
-.P
-If
-.B EPOLLWAKEUP
-is specified in
-.IR flags ,
-but the caller does not have the
-.B CAP_BLOCK_SUSPEND
-capability, then the
-.B EPOLLWAKEUP
-flag is
-.IR "silently ignored" .
-This unfortunate behavior is necessary because no validity
-checks were performed on the
-.I flags
-argument in the original implementation, and the addition of the
-.B EPOLLWAKEUP
-with a check that caused the call to fail if the caller did not have the
-.B CAP_BLOCK_SUSPEND
-capability caused a breakage in at least one existing user-space
-application that happened to randomly (and uselessly) specify this bit.
-.\" commit a8159414d7e3af7233e7a5a82d1c5d85379bd75c (behavior change)
-.\" https://lwn.net/Articles/520198/
-A robust application should therefore double check that it has the
-.B CAP_BLOCK_SUSPEND
-capability if attempting to use the
-.B EPOLLWAKEUP
-flag.
-.SH SEE ALSO
-.BR epoll_create (2),
-.BR epoll_wait (2),
-.BR poll (2),
-.BR epoll (7)
diff --git a/man2/epoll_pwait.2 b/man2/epoll_pwait.2
deleted file mode 100644
index 9282a7064..000000000
--- a/man2/epoll_pwait.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/epoll_wait.2
diff --git a/man2/epoll_pwait2.2 b/man2/epoll_pwait2.2
deleted file mode 100644
index 9282a7064..000000000
--- a/man2/epoll_pwait2.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/epoll_wait.2
diff --git a/man2/epoll_wait.2 b/man2/epoll_wait.2
deleted file mode 100644
index 20af75075..000000000
--- a/man2/epoll_wait.2
+++ /dev/null
@@ -1,288 +0,0 @@
-.\" Copyright (C) 2003 Davide Libenzi
-.\" Davide Libenzi <davidel@xmailserver.org>
-.\" and Copyright 2007, 2012, 2014, 2018 Michael Kerrisk <tk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: GPL-2.0-or-later
-.\"
-.\" 2007-04-30: mtk, Added description of epoll_pwait()
-.\"
-.TH epoll_wait 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-epoll_wait, epoll_pwait, epoll_pwait2 \-
-wait for an I/O event on an epoll file descriptor
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/epoll.h>
-.P
-.BI "int epoll_wait(int " epfd ", struct epoll_event *" events ,
-.BI " int " maxevents ", int " timeout );
-.BI "int epoll_pwait(int " epfd ", struct epoll_event *" events ,
-.BI " int " maxevents ", int " timeout ,
-.BI " const sigset_t *_Nullable " sigmask );
-.BI "int epoll_pwait2(int " epfd ", struct epoll_event *" events ,
-.BI " int " maxevents ", \
-const struct timespec *_Nullable " timeout ,
-.BI " const sigset_t *_Nullable " sigmask );
-.fi
-.SH DESCRIPTION
-The
-.BR epoll_wait ()
-system call waits for events on the
-.BR epoll (7)
-instance referred to by the file descriptor
-.IR epfd .
-The buffer pointed to by
-.I events
-is used to return information from the ready list
-about file descriptors in the interest list that
-have some events available.
-Up to
-.I maxevents
-are returned by
-.BR epoll_wait ().
-The
-.I maxevents
-argument must be greater than zero.
-.P
-The
-.I timeout
-argument specifies the number of milliseconds that
-.BR epoll_wait ()
-will block.
-Time is measured against the
-.B CLOCK_MONOTONIC
-clock.
-.P
-A call to
-.BR epoll_wait ()
-will block until either:
-.IP \[bu] 3
-a file descriptor delivers an event;
-.IP \[bu]
-the call is interrupted by a signal handler; or
-.IP \[bu]
-the timeout expires.
-.P
-Note that the
-.I timeout
-interval will be rounded up to the system clock granularity,
-and kernel scheduling delays mean that the blocking interval
-may overrun by a small amount.
-Specifying a
-.I timeout
-of \-1 causes
-.BR epoll_wait ()
-to block indefinitely, while specifying a
-.I timeout
-equal to zero causes
-.BR epoll_wait ()
-to return immediately, even if no events are available.
-.P
-The
-.I struct epoll_event
-is described in
-.BR epoll_event (3type).
-.P
-The
-.I data
-field of each returned
-.I epoll_event
-structure contains the same data as was specified
-in the most recent call to
-.BR epoll_ctl (2)
-.RB ( EPOLL_CTL_ADD ", " EPOLL_CTL_MOD )
-for the corresponding open file descriptor.
-.P
-The
-.I events
-field is a bit mask that indicates the events that have occurred for the
-corresponding open file description.
-See
-.BR epoll_ctl (2)
-for a list of the bits that may appear in this mask.
-.\"
-.SS epoll_pwait()
-The relationship between
-.BR epoll_wait ()
-and
-.BR epoll_pwait ()
-is analogous to the relationship between
-.BR select (2)
-and
-.BR pselect (2):
-like
-.BR pselect (2),
-.BR epoll_pwait ()
-allows an application to safely wait until either a file descriptor
-becomes ready or until a signal is caught.
-.P
-The following
-.BR epoll_pwait ()
-call:
-.P
-.in +4n
-.EX
-ready = epoll_pwait(epfd, &events, maxevents, timeout, &sigmask);
-.EE
-.in
-.P
-is equivalent to
-.I atomically
-executing the following calls:
-.P
-.in +4n
-.EX
-sigset_t origmask;
-\&
-pthread_sigmask(SIG_SETMASK, &sigmask, &origmask);
-ready = epoll_wait(epfd, &events, maxevents, timeout);
-pthread_sigmask(SIG_SETMASK, &origmask, NULL);
-.EE
-.in
-.P
-The
-.I sigmask
-argument may be specified as NULL, in which case
-.BR epoll_pwait ()
-is equivalent to
-.BR epoll_wait ().
-.\"
-.SS epoll_pwait2()
-The
-.BR epoll_pwait2 ()
-system call is equivalent to
-.BR epoll_pwait ()
-except for the
-.I timeout
-argument.
-It takes an argument of type
-.I timespec
-to be able to specify nanosecond resolution timeout.
-This argument functions the same as in
-.BR pselect (2)
-and
-.BR ppoll (2).
-If
-.I timeout
-is NULL, then
-.BR epoll_pwait2 ()
-can block indefinitely.
-.SH RETURN VALUE
-On success,
-.BR epoll_wait ()
-returns the number of file descriptors ready for the requested I/O operation,
-or zero if no file descriptor became ready during the requested
-.I timeout
-milliseconds.
-On failure,
-.BR epoll_wait ()
-returns \-1 and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EBADF
-.I epfd
-is not a valid file descriptor.
-.TP
-.B EFAULT
-The memory area pointed to by
-.I events
-is not accessible with write permissions.
-.TP
-.B EINTR
-The call was interrupted by a signal handler before either (1) any of the
-requested events occurred or (2) the
-.I timeout
-expired; see
-.BR signal (7).
-.TP
-.B EINVAL
-.I epfd
-is not an
-.B epoll
-file descriptor, or
-.I maxevents
-is less than or equal to zero.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-.TP
-.BR epoll_wait ()
-Linux 2.6,
-.\" To be precise: Linux 2.5.44.
-.\" The interface should be finalized by Linux 2.5.66.
-glibc 2.3.2.
-.TP
-.BR epoll_pwait ()
-Linux 2.6.19,
-glibc 2.6.
-.TP
-.BR epoll_pwait2 ()
-Linux 5.11.
-.SH NOTES
-While one thread is blocked in a call to
-.BR epoll_wait (),
-it is possible for another thread to add a file descriptor to the waited-upon
-.B epoll
-instance.
-If the new file descriptor becomes ready,
-it will cause the
-.BR epoll_wait ()
-call to unblock.
-.P
-If more than
-.I maxevents
-file descriptors are ready when
-.BR epoll_wait ()
-is called, then successive
-.BR epoll_wait ()
-calls will round robin through the set of ready file descriptors.
-This behavior helps avoid starvation scenarios,
-where a process fails to notice that additional file descriptors
-are ready because it focuses on a set of file descriptors that
-are already known to be ready.
-.P
-Note that it is possible to call
-.BR epoll_wait ()
-on an
-.B epoll
-instance whose interest list is currently empty
-(or whose interest list becomes empty because file descriptors are closed
-or removed from the interest in another thread).
-The call will block until some file descriptor is later added to the
-interest list (in another thread) and that file descriptor becomes ready.
-.SS C library/kernel differences
-The raw
-.BR epoll_pwait ()
-and
-.BR epoll_pwait2 ()
-system calls have a sixth argument,
-.IR "size_t sigsetsize" ,
-which specifies the size in bytes of the
-.I sigmask
-argument.
-The glibc
-.BR epoll_pwait ()
-wrapper function specifies this argument as a fixed value
-(equal to
-.IR sizeof(sigset_t) ).
-.SH BUGS
-Before Linux 2.6.37, a
-.I timeout
-value larger than approximately
-.I LONG_MAX / HZ
-milliseconds is treated as \-1 (i.e., infinity).
-Thus, for example, on a system where
-.I sizeof(long)
-is 4 and the kernel
-.I HZ
-value is 1000,
-this means that timeouts greater than 35.79 minutes are treated as infinity.
-.SH SEE ALSO
-.BR epoll_create (2),
-.BR epoll_ctl (2),
-.BR epoll (7)
diff --git a/man2/eventfd.2 b/man2/eventfd.2
deleted file mode 100644
index 3b67239ac..000000000
--- a/man2/eventfd.2
+++ /dev/null
@@ -1,446 +0,0 @@
-'\" t
-.\" Copyright (C) 2008 Michael Kerrisk <mtk.manpages@gmail.com>
-.\" starting from a version by Davide Libenzi <davidel@xmailserver.org>
-.\"
-.\" SPDX-License-Identifier: GPL-2.0-or-later
-.\"
-.\" 2008-10-10, mtk: describe eventfd2(), and EFD_NONBLOCK and EFD_CLOEXEC
-.\"
-.TH eventfd 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-eventfd \- create a file descriptor for event notification
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/eventfd.h>
-.P
-.BI "int eventfd(unsigned int " initval ", int " flags );
-.fi
-.SH DESCRIPTION
-.BR eventfd ()
-creates an "eventfd object" that can be used as
-an event wait/notify mechanism by user-space applications,
-and by the kernel to notify user-space applications of events.
-The object contains an unsigned 64-bit integer
-.RI ( uint64_t )
-counter that is maintained by the kernel.
-This counter is initialized with the value specified in the argument
-.IR initval .
-.P
-As its return value,
-.BR eventfd ()
-returns a new file descriptor that can be used to refer to the
-eventfd object.
-.P
-The following values may be bitwise ORed in
-.I flags
-to change the behavior of
-.BR eventfd ():
-.TP
-.BR EFD_CLOEXEC " (since Linux 2.6.27)"
-Set the close-on-exec
-.RB ( FD_CLOEXEC )
-flag on the new file descriptor.
-See the description of the
-.B O_CLOEXEC
-flag in
-.BR open (2)
-for reasons why this may be useful.
-.TP
-.BR EFD_NONBLOCK " (since Linux 2.6.27)"
-Set the
-.B O_NONBLOCK
-file status flag on the open file description (see
-.BR open (2))
-referred to by the new file descriptor.
-Using this flag saves extra calls to
-.BR fcntl (2)
-to achieve the same result.
-.TP
-.BR EFD_SEMAPHORE " (since Linux 2.6.30)"
-Provide semaphore-like semantics for reads from the new file descriptor.
-See below.
-.P
-Up to Linux 2.6.26, the
-.I flags
-argument is unused, and must be specified as zero.
-.P
-The following operations can be performed on the file descriptor returned by
-.BR eventfd ():
-.TP
-.BR read (2)
-Each successful
-.BR read (2)
-returns an 8-byte integer.
-A
-.BR read (2)
-fails with the error
-.B EINVAL
-if the size of the supplied buffer is less than 8 bytes.
-.IP
-The value returned by
-.BR read (2)
-is in host byte order\[em]that is,
-the native byte order for integers on the host machine.
-.IP
-The semantics of
-.BR read (2)
-depend on whether the eventfd counter currently has a nonzero value
-and whether the
-.B EFD_SEMAPHORE
-flag was specified when creating the eventfd file descriptor:
-.RS
-.IP \[bu] 3
-If
-.B EFD_SEMAPHORE
-was not specified and the eventfd counter has a nonzero value, then a
-.BR read (2)
-returns 8 bytes containing that value,
-and the counter's value is reset to zero.
-.IP \[bu]
-If
-.B EFD_SEMAPHORE
-was specified and the eventfd counter has a nonzero value, then a
-.BR read (2)
-returns 8 bytes containing the value 1,
-and the counter's value is decremented by 1.
-.IP \[bu]
-If the eventfd counter is zero at the time of the call to
-.BR read (2),
-then the call either blocks until the counter becomes nonzero
-(at which time, the
-.BR read (2)
-proceeds as described above)
-or fails with the error
-.B EAGAIN
-if the file descriptor has been made nonblocking.
-.RE
-.TP
-.BR write (2)
-A
-.BR write (2)
-call adds the 8-byte integer value supplied in its
-buffer to the counter.
-The maximum value that may be stored in the counter is the largest
-unsigned 64-bit value minus 1 (i.e., 0xfffffffffffffffe).
-If the addition would cause the counter's value to exceed
-the maximum, then the
-.BR write (2)
-either blocks until a
-.BR read (2)
-is performed on the file descriptor,
-or fails with the error
-.B EAGAIN
-if the file descriptor has been made nonblocking.
-.IP
-A
-.BR write (2)
-fails with the error
-.B EINVAL
-if the size of the supplied buffer is less than 8 bytes,
-or if an attempt is made to write the value 0xffffffffffffffff.
-.TP
-.BR poll (2)
-.TQ
-.BR select (2)
-.TQ
-(and similar)
-The returned file descriptor supports
-.BR poll (2)
-(and analogously
-.BR epoll (7))
-and
-.BR select (2),
-as follows:
-.RS
-.IP \[bu] 3
-The file descriptor is readable
-(the
-.BR select (2)
-.I readfds
-argument; the
-.BR poll (2)
-.B POLLIN
-flag)
-if the counter has a value greater than 0.
-.IP \[bu]
-The file descriptor is writable
-(the
-.BR select (2)
-.I writefds
-argument; the
-.BR poll (2)
-.B POLLOUT
-flag)
-if it is possible to write a value of at least "1" without blocking.
-.IP \[bu]
-If an overflow of the counter value was detected,
-then
-.BR select (2)
-indicates the file descriptor as being both readable and writable, and
-.BR poll (2)
-returns a
-.B POLLERR
-event.
-As noted above,
-.BR write (2)
-can never overflow the counter.
-However an overflow can occur if 2\[ha]64
-eventfd "signal posts" were performed by the KAIO
-subsystem (theoretically possible, but practically unlikely).
-If an overflow has occurred, then
-.BR read (2)
-will return that maximum
-.I uint64_t
-value (i.e., 0xffffffffffffffff).
-.RE
-.IP
-The eventfd file descriptor also supports the other file-descriptor
-multiplexing APIs:
-.BR pselect (2)
-and
-.BR ppoll (2).
-.TP
-.BR close (2)
-When the file descriptor is no longer required it should be closed.
-When all file descriptors associated with the same eventfd object
-have been closed, the resources for object are freed by the kernel.
-.P
-A copy of the file descriptor created by
-.BR eventfd ()
-is inherited by the child produced by
-.BR fork (2).
-The duplicate file descriptor is associated with the same
-eventfd object.
-File descriptors created by
-.BR eventfd ()
-are preserved across
-.BR execve (2),
-unless the close-on-exec flag has been set.
-.SH RETURN VALUE
-On success,
-.BR eventfd ()
-returns a new eventfd file descriptor.
-On error, \-1 is returned and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EINVAL
-An unsupported value was specified in
-.IR flags .
-.TP
-.B EMFILE
-The per-process limit on the number of open file descriptors has been reached.
-.TP
-.B ENFILE
-The system-wide limit on the total number of open files has been
-reached.
-.TP
-.B ENODEV
-.\" Note from Davide:
-.\" The ENODEV error is basically never going to happen if
-.\" the kernel boots correctly. That error happen only if during
-.\" the kernel initialization, some error occur in the anonymous
-.\" inode source initialization.
-Could not mount (internal) anonymous inode device.
-.TP
-.B ENOMEM
-There was insufficient memory to create a new
-eventfd file descriptor.
-.SH ATTRIBUTES
-For an explanation of the terms used in this section, see
-.BR attributes (7).
-.TS
-allbox;
-lbx lb lb
-l l l.
-Interface Attribute Value
-T{
-.na
-.nh
-.BR eventfd ()
-T} Thread safety MT-Safe
-.TE
-.SH VERSIONS
-.SS C library/kernel differences
-There are two underlying Linux system calls:
-.BR eventfd ()
-and the more recent
-.BR eventfd2 ().
-The former system call does not implement a
-.I flags
-argument.
-The latter system call implements the
-.I flags
-values described above.
-The glibc wrapper function will use
-.BR eventfd2 ()
-where it is available.
-.SS Additional glibc features
-The GNU C library defines an additional type,
-and two functions that attempt to abstract some of the details of
-reading and writing on an eventfd file descriptor:
-.P
-.in +4n
-.EX
-typedef uint64_t eventfd_t;
-\&
-int eventfd_read(int fd, eventfd_t *value);
-int eventfd_write(int fd, eventfd_t value);
-.EE
-.in
-.P
-The functions perform the read and write operations on an
-eventfd file descriptor,
-returning 0 if the correct number of bytes was transferred,
-or \-1 otherwise.
-.SH STANDARDS
-Linux, GNU.
-.SH HISTORY
-.TP
-.BR eventfd ()
-Linux 2.6.22,
-glibc 2.8.
-.\" eventfd() is in glibc 2.7, but reportedly does not build
-.TP
-.BR eventfd2 ()
-Linux 2.6.27 (see VERSIONS).
-Since glibc 2.9, the
-.BR eventfd ()
-wrapper will employ the
-.BR eventfd2 ()
-system call, if it is supported by the kernel.
-.SH NOTES
-Applications can use an eventfd file descriptor instead of a pipe (see
-.BR pipe (2))
-in all cases where a pipe is used simply to signal events.
-The kernel overhead of an eventfd file descriptor
-is much lower than that of a pipe,
-and only one file descriptor is
-required (versus the two required for a pipe).
-.P
-When used in the kernel, an eventfd
-file descriptor can provide a bridge from kernel to user space, allowing,
-for example, functionalities like KAIO (kernel AIO)
-.\" or eventually syslets/threadlets
-to signal to a file descriptor that some operation is complete.
-.P
-A key point about an eventfd file descriptor is that it can be
-monitored just like any other file descriptor using
-.BR select (2),
-.BR poll (2),
-or
-.BR epoll (7).
-This means that an application can simultaneously monitor the
-readiness of "traditional" files and the readiness of other
-kernel mechanisms that support the eventfd interface.
-(Without the
-.BR eventfd ()
-interface, these mechanisms could not be multiplexed via
-.BR select (2),
-.BR poll (2),
-or
-.BR epoll (7).)
-.P
-The current value of an eventfd counter can be viewed
-via the entry for the corresponding file descriptor in the process's
-.IR /proc/ pid /fdinfo
-directory.
-See
-.BR proc (5)
-for further details.
-.SH EXAMPLES
-The following program creates an eventfd file descriptor
-and then forks to create a child process.
-While the parent briefly sleeps,
-the child writes each of the integers supplied in the program's
-command-line arguments to the eventfd file descriptor.
-When the parent has finished sleeping,
-it reads from the eventfd file descriptor.
-.P
-The following shell session shows a sample run of the program:
-.P
-.in +4n
-.EX
-.RB "$" " ./a.out 1 2 4 7 14"
-Child writing 1 to efd
-Child writing 2 to efd
-Child writing 4 to efd
-Child writing 7 to efd
-Child writing 14 to efd
-Child completed write loop
-Parent about to read
-Parent read 28 (0x1c) from efd
-.EE
-.in
-.SS Program source
-\&
-.\" SRC BEGIN (eventfd.c)
-.EX
-#include <err.h>
-#include <inttypes.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/eventfd.h>
-#include <unistd.h>
-\&
-int
-main(int argc, char *argv[])
-{
- int efd;
- uint64_t u;
- ssize_t s;
-\&
- if (argc < 2) {
- fprintf(stderr, "Usage: %s <num>...\en", argv[0]);
- exit(EXIT_FAILURE);
- }
-\&
- efd = eventfd(0, 0);
- if (efd == \-1)
- err(EXIT_FAILURE, "eventfd");
-\&
- switch (fork()) {
- case 0:
- for (size_t j = 1; j < argc; j++) {
- printf("Child writing %s to efd\en", argv[j]);
- u = strtoull(argv[j], NULL, 0);
- /* strtoull() allows various bases */
- s = write(efd, &u, sizeof(uint64_t));
- if (s != sizeof(uint64_t))
- err(EXIT_FAILURE, "write");
- }
- printf("Child completed write loop\en");
-\&
- exit(EXIT_SUCCESS);
-\&
- default:
- sleep(2);
-\&
- printf("Parent about to read\en");
- s = read(efd, &u, sizeof(uint64_t));
- if (s != sizeof(uint64_t))
- err(EXIT_FAILURE, "read");
- printf("Parent read %"PRIu64" (%#"PRIx64") from efd\en", u, u);
- exit(EXIT_SUCCESS);
-\&
- case \-1:
- err(EXIT_FAILURE, "fork");
- }
-}
-.EE
-.\" SRC END
-.SH SEE ALSO
-.BR futex (2),
-.BR pipe (2),
-.BR poll (2),
-.BR read (2),
-.BR select (2),
-.BR signalfd (2),
-.BR timerfd_create (2),
-.BR write (2),
-.BR epoll (7),
-.BR sem_overview (7)
diff --git a/man2/eventfd2.2 b/man2/eventfd2.2
deleted file mode 100644
index eddfaa8a1..000000000
--- a/man2/eventfd2.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/eventfd.2
diff --git a/man2/execve.2 b/man2/execve.2
deleted file mode 100644
index 03dfdcdcf..000000000
--- a/man2/execve.2
+++ /dev/null
@@ -1,889 +0,0 @@
-.\" Copyright (c) 1992 Drew Eckhardt (drew@cs.colorado.edu), March 28, 1992
-.\" and Copyright (c) 2006 Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Modified by Michael Haardt <michael@moria.de>
-.\" Modified 1993-07-21 by Rik Faith <faith@cs.unc.edu>
-.\" Modified 1994-08-21 by Michael Chastain <mec@shell.portal.com>:
-.\" Modified 1997-01-31 by Eric S. Raymond <esr@thyrsus.com>
-.\" Modified 1999-11-12 by Urs Thuermann <urs@isnogud.escape.de>
-.\" Modified 2004-06-23 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\" 2006-09-04 Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Added list of process attributes that are not preserved on exec().
-.\" 2007-09-14 Ollie Wild <aaw@google.com>, mtk
-.\" Add text describing limits on command-line arguments + environment
-.\"
-.TH execve 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-execve \- execute program
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <unistd.h>
-.P
-.BI "int execve(const char *" pathname ", char *const _Nullable " argv [],
-.BI " char *const _Nullable " envp []);
-.fi
-.SH DESCRIPTION
-.BR execve ()
-executes the program referred to by \fIpathname\fP.
-This causes the program that is currently being run by the calling process
-to be replaced with a new program, with newly initialized stack, heap,
-and (initialized and uninitialized) data segments.
-.P
-\fIpathname\fP must be either a binary executable, or a script
-starting with a line of the form:
-.P
-.in +4n
-.EX
-\fB#!\fP\fIinterpreter \fP[optional-arg]
-.EE
-.in
-.P
-For details of the latter case, see "Interpreter scripts" below.
-.P
-.I argv
-is an array of pointers to strings passed to the new program
-as its command-line arguments.
-By convention, the first of these strings (i.e.,
-.IR argv[0] )
-should contain the filename associated with the file being executed.
-The
-.I argv
-array must be terminated by a null pointer.
-(Thus, in the new program,
-.I argv[argc]
-will be a null pointer.)
-.P
-.I envp
-is an array of pointers to strings, conventionally of the form
-.BR key=value ,
-which are passed as the environment of the new program.
-The
-.I envp
-array must be terminated by a null pointer.
-.P
-This manual page describes the Linux system call in detail;
-for an overview of the nomenclature and the many, often preferable,
-standardised variants of this function provided by libc,
-including ones that search the
-.B PATH
-environment variable, see
-.BR exec (3).
-.P
-The argument vector and environment can be accessed by the
-new program's main function, when it is defined as:
-.P
-.in +4n
-.EX
-int main(int argc, char *argv[], char *envp[])
-.EE
-.in
-.P
-Note, however, that the use of a third argument to the main function
-is not specified in POSIX.1;
-according to POSIX.1,
-the environment should be accessed via the external variable
-.BR environ (7).
-.P
-.BR execve ()
-does not return on success, and the text, initialized data,
-uninitialized data (bss), and stack of the calling process are overwritten
-according to the contents of the newly loaded program.
-.P
-If the current program is being ptraced, a \fBSIGTRAP\fP signal is sent to it
-after a successful
-.BR execve ().
-.P
-If the set-user-ID bit is set on the program file referred to by
-\fIpathname\fP,
-then the effective user ID of the calling process is changed
-to that of the owner of the program file.
-Similarly, if the set-group-ID bit is set on the program file,
-then the effective group ID of the calling
-process is set to the group of the program file.
-.P
-The aforementioned transformations of the effective IDs are
-.I not
-performed (i.e., the set-user-ID and set-group-ID bits are ignored)
-if any of the following is true:
-.IP \[bu] 3
-the
-.I no_new_privs
-attribute is set for the calling thread (see
-.BR prctl (2));
-.IP \[bu]
-the underlying filesystem is mounted
-.I nosuid
-(the
-.B MS_NOSUID
-flag for
-.BR mount (2));
-or
-.IP \[bu]
-the calling process is being ptraced.
-.P
-The capabilities of the program file (see
-.BR capabilities (7))
-are also ignored if any of the above are true.
-.P
-The effective user ID of the process is copied to the saved set-user-ID;
-similarly, the effective group ID is copied to the saved set-group-ID.
-This copying takes place after any effective ID changes that occur
-because of the set-user-ID and set-group-ID mode bits.
-.P
-The process's real UID and real GID, as well as its supplementary group IDs,
-are unchanged by a call to
-.BR execve ().
-.P
-If the executable is an a.out dynamically linked
-binary executable containing
-shared-library stubs, the Linux dynamic linker
-.BR ld.so (8)
-is called at the start of execution to bring
-needed shared objects into memory
-and link the executable with them.
-.P
-If the executable is a dynamically linked ELF executable, the
-interpreter named in the PT_INTERP segment is used to load the needed
-shared objects.
-This interpreter is typically
-.I /lib/ld\-linux.so.2
-for binaries linked with glibc (see
-.BR ld\-linux.so (8)).
-.\"
-.SS Effect on process attributes
-All process attributes are preserved during an
-.BR execve (),
-except the following:
-.IP \[bu] 3
-The dispositions of any signals that are being caught are
-reset to the default
-.RB ( signal (7)).
-.IP \[bu]
-Any alternate signal stack is not preserved
-.RB ( sigaltstack (2)).
-.IP \[bu]
-Memory mappings are not preserved
-.RB ( mmap (2)).
-.IP \[bu]
-Attached System\ V shared memory segments are detached
-.RB ( shmat (2)).
-.IP \[bu]
-POSIX shared memory regions are unmapped
-.RB ( shm_open (3)).
-.IP \[bu]
-Open POSIX message queue descriptors are closed
-.RB ( mq_overview (7)).
-.IP \[bu]
-Any open POSIX named semaphores are closed
-.RB ( sem_overview (7)).
-.IP \[bu]
-POSIX timers are not preserved
-.RB ( timer_create (2)).
-.IP \[bu]
-Any open directory streams are closed
-.RB ( opendir (3)).
-.IP \[bu]
-Memory locks are not preserved
-.RB ( mlock (2),
-.BR mlockall (2)).
-.IP \[bu]
-Exit handlers are not preserved
-.RB ( atexit (3),
-.BR on_exit (3)).
-.IP \[bu]
-The floating-point environment is reset to the default (see
-.BR fenv (3)).
-.P
-The process attributes in the preceding list are all specified
-in POSIX.1.
-The following Linux-specific process attributes are also
-not preserved during an
-.BR execve ():
-.IP \[bu] 3
-The process's "dumpable" attribute is set to the value 1,
-unless a set-user-ID program, a set-group-ID program,
-or a program with capabilities is being executed,
-in which case the dumpable flag may instead be reset to the value in
-.IR /proc/sys/fs/suid_dumpable ,
-in the circumstances described under
-.B PR_SET_DUMPABLE
-in
-.BR prctl (2).
-Note that changes to the "dumpable" attribute may cause ownership
-of files in the process's
-.IR /proc/ pid
-directory to change to
-.IR root:root ,
-as described in
-.BR proc (5).
-.IP \[bu]
-The
-.BR prctl (2)
-.B PR_SET_KEEPCAPS
-flag is cleared.
-.IP \[bu]
-(Since Linux 2.4.36 / 2.6.23)
-If a set-user-ID or set-group-ID program is being executed,
-then the parent death signal set by
-.BR prctl (2)
-.B PR_SET_PDEATHSIG
-flag is cleared.
-.IP \[bu]
-The process name, as set by
-.BR prctl (2)
-.B PR_SET_NAME
-(and displayed by
-.IR "ps\ \-o comm" ),
-is reset to the name of the new executable file.
-.IP \[bu]
-The
-.B SECBIT_KEEP_CAPS
-.I securebits
-flag is cleared.
-See
-.BR capabilities (7).
-.IP \[bu]
-The termination signal is reset to
-.B SIGCHLD
-(see
-.BR clone (2)).
-.IP \[bu]
-The file descriptor table is unshared, undoing the effect of the
-.B CLONE_FILES
-flag of
-.BR clone (2).
-.P
-Note the following further points:
-.IP \[bu] 3
-All threads other than the calling thread are destroyed during an
-.BR execve ().
-Mutexes, condition variables, and other pthreads objects are not preserved.
-.IP \[bu]
-The equivalent of \fIsetlocale(LC_ALL, "C")\fP
-is executed at program start-up.
-.IP \[bu]
-POSIX.1 specifies that the dispositions of any signals that
-are ignored or set to the default are left unchanged.
-POSIX.1 specifies one exception: if
-.B SIGCHLD
-is being ignored,
-then an implementation may leave the disposition unchanged or
-reset it to the default; Linux does the former.
-.IP \[bu]
-Any outstanding asynchronous I/O operations are canceled
-.RB ( aio_read (3),
-.BR aio_write (3)).
-.IP \[bu]
-For the handling of capabilities during
-.BR execve (),
-see
-.BR capabilities (7).
-.IP \[bu]
-By default, file descriptors remain open across an
-.BR execve ().
-File descriptors that are marked close-on-exec are closed;
-see the description of
-.B FD_CLOEXEC
-in
-.BR fcntl (2).
-(If a file descriptor is closed, this will cause the release
-of all record locks obtained on the underlying file by this process.
-See
-.BR fcntl (2)
-for details.)
-POSIX.1 says that if file descriptors 0, 1, and 2 would
-otherwise be closed after a successful
-.BR execve (),
-and the process would gain privilege because the set-user-ID or
-set-group-ID mode bit was set on the executed file,
-then the system may open an unspecified file for each of these
-file descriptors.
-As a general principle, no portable program, whether privileged or not,
-can assume that these three file descriptors will remain
-closed across an
-.BR execve ().
-.\" On Linux it appears that these file descriptors are
-.\" always open after an execve(), and it looks like
-.\" Solaris 8 and FreeBSD 6.1 are the same. -- mtk, 30 Apr 2007
-.SS Interpreter scripts
-An interpreter script is a text file that has execute
-permission enabled and whose first line is of the form:
-.P
-.in +4n
-.EX
-\fB#!\fP\fIinterpreter \fP[optional-arg]
-.EE
-.in
-.P
-The
-.I interpreter
-must be a valid pathname for an executable file.
-.P
-If the
-.I pathname
-argument of
-.BR execve ()
-specifies an interpreter script, then
-.I interpreter
-will be invoked with the following arguments:
-.P
-.in +4n
-.EX
-\fIinterpreter\fP [optional-arg] \fIpathname\fP arg...
-.EE
-.in
-.P
-where
-.I pathname
-is the pathname of the file specified as the first argument of
-.BR execve (),
-and
-.I arg...
-is the series of words pointed to by the
-.I argv
-argument of
-.BR execve (),
-starting at
-.IR argv[1] .
-Note that there is no way to get the
-.I argv[0]
-that was passed to the
-.BR execve ()
-call.
-.\" See the P - preserve-argv[0] option.
-.\" Documentation/admin-guide/binfmt-misc.rst
-.\" https://www.kernel.org/doc/html/latest/admin-guide/binfmt-misc.html
-.P
-For portable use,
-.I optional-arg
-should either be absent, or be specified as a single word (i.e., it
-should not contain white space); see NOTES below.
-.P
-Since Linux 2.6.28,
-.\" commit bf2a9a39639b8b51377905397a5005f444e9a892
-the kernel permits the interpreter of a script to itself be a script.
-This permission is recursive, up to a limit of four recursions,
-so that the interpreter may be a script which is interpreted by a script,
-and so on.
-.SS Limits on size of arguments and environment
-Most UNIX implementations impose some limit on the total size
-of the command-line argument
-.RI ( argv )
-and environment
-.RI ( envp )
-strings that may be passed to a new program.
-POSIX.1 allows an implementation to advertise this limit using the
-.B ARG_MAX
-constant (either defined in
-.I <limits.h>
-or available at run time using the call
-.IR "sysconf(_SC_ARG_MAX)" ).
-.P
-Before Linux 2.6.23, the memory used to store the
-environment and argument strings was limited to 32 pages
-(defined by the kernel constant
-.BR MAX_ARG_PAGES ).
-On architectures with a 4-kB page size,
-this yields a maximum size of 128\ kB.
-.P
-On Linux 2.6.23 and later, most architectures support a size limit
-derived from the soft
-.B RLIMIT_STACK
-resource limit (see
-.BR getrlimit (2))
-that is in force at the time of the
-.BR execve ()
-call.
-(Architectures with no memory management unit are excepted:
-they maintain the limit that was in effect before Linux 2.6.23.)
-This change allows programs to have a much larger
-argument and/or environment list.
-.\" For some background on the changes to ARG_MAX in Linux 2.6.23 and
-.\" Linux 2.6.25, see:
-.\" http://sourceware.org/bugzilla/show_bug.cgi?id=5786
-.\" http://bugzilla.kernel.org/show_bug.cgi?id=10095
-.\" http://thread.gmane.org/gmane.linux.kernel/646709/focus=648101,
-.\" checked into Linux 2.6.25 as commit a64e715fc74b1a7dcc5944f848acc38b2c4d4ee2.
-For these architectures, the total size is limited to 1/4 of the allowed
-stack size.
-(Imposing the 1/4-limit
-ensures that the new program always has some stack space.)
-.\" Ollie: That doesn't include the lists of pointers, though,
-.\" so the actual usage is a bit higher (1 pointer per argument).
-Additionally, the total size is limited to 3/4 of the value
-of the kernel constant
-.B _STK_LIM
-(8 MiB).
-Since Linux 2.6.25,
-the kernel also places a floor of 32 pages on this size limit,
-so that, even when
-.B RLIMIT_STACK
-is set very low,
-applications are guaranteed to have at least as much argument and
-environment space as was provided by Linux 2.6.22 and earlier.
-(This guarantee was not provided in Linux 2.6.23 and 2.6.24.)
-Additionally, the limit per string is 32 pages (the kernel constant
-.BR MAX_ARG_STRLEN ),
-and the maximum number of strings is 0x7FFFFFFF.
-.SH RETURN VALUE
-On success,
-.BR execve ()
-does not return, on error \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B E2BIG
-The total number of bytes in the environment
-.RI ( envp )
-and argument list
-.RI ( argv )
-is too large,
-an argument or environment string is too long,
-or the full
-.I pathname
-of the executable is too long.
-The terminating null byte is counted as part of the string length.
-.TP
-.B EACCES
-Search permission is denied on a component of the path prefix of
-.I pathname
-or the name of a script interpreter.
-(See also
-.BR path_resolution (7).)
-.TP
-.B EACCES
-The file or a script interpreter is not a regular file.
-.TP
-.B EACCES
-Execute permission is denied for the file or a script or ELF interpreter.
-.TP
-.B EACCES
-The filesystem is mounted
-.IR noexec .
-.TP
-.BR EAGAIN " (since Linux 3.1)"
-.\" commit 72fa59970f8698023045ab0713d66f3f4f96945c
-Having changed its real UID using one of the
-.BR set*uid ()
-calls, the caller was\[em]and is now still\[em]above its
-.B RLIMIT_NPROC
-resource limit (see
-.BR setrlimit (2)).
-For a more detailed explanation of this error, see NOTES.
-.TP
-.B EFAULT
-.I pathname
-or one of the pointers in the vectors
-.I argv
-or
-.I envp
-points outside your accessible address space.
-.TP
-.B EINVAL
-An ELF executable had more than one PT_INTERP segment (i.e., tried to
-name more than one interpreter).
-.TP
-.B EIO
-An I/O error occurred.
-.TP
-.B EISDIR
-An ELF interpreter was a directory.
-.TP
-.B ELIBBAD
-An ELF interpreter was not in a recognized format.
-.TP
-.B ELOOP
-Too many symbolic links were encountered in resolving
-.I pathname
-or the name of a script or ELF interpreter.
-.TP
-.B ELOOP
-The maximum recursion limit was reached during recursive script
-interpretation (see "Interpreter scripts", above).
-Before Linux 3.8,
-.\" commit d740269867021faf4ce38a449353d2b986c34a67
-the error produced for this case was
-.BR ENOEXEC .
-.TP
-.B EMFILE
-The per-process limit on the number of open file descriptors has been reached.
-.TP
-.B ENAMETOOLONG
-.I pathname
-is too long.
-.TP
-.B ENFILE
-The system-wide limit on the total number of open files has been reached.
-.TP
-.B ENOENT
-The file
-.I pathname
-or a script or ELF interpreter does not exist.
-.TP
-.B ENOEXEC
-An executable is not in a recognized format, is for the wrong
-architecture, or has some other format error that means it cannot be
-executed.
-.TP
-.B ENOMEM
-Insufficient kernel memory was available.
-.TP
-.B ENOTDIR
-A component of the path prefix of
-.I pathname
-or a script or ELF interpreter is not a directory.
-.TP
-.B EPERM
-The filesystem is mounted
-.IR nosuid ,
-the user is not the superuser,
-and the file has the set-user-ID or set-group-ID bit set.
-.TP
-.B EPERM
-The process is being traced, the user is not the superuser and the
-file has the set-user-ID or set-group-ID bit set.
-.TP
-.B EPERM
-A "capability-dumb" applications would not obtain the full set of
-permitted capabilities granted by the executable file.
-See
-.BR capabilities (7).
-.TP
-.B ETXTBSY
-The specified executable was open for writing by one or more processes.
-.SH VERSIONS
-POSIX does not document the #! behavior, but it exists
-(with some variations) on other UNIX systems.
-.P
-On Linux,
-.I argv
-and
-.I envp
-can be specified as NULL.
-In both cases, this has the same effect as specifying the argument
-as a pointer to a list containing a single null pointer.
-.B "Do not take advantage of this nonstandard and nonportable misfeature!"
-On many other UNIX systems, specifying
-.I argv
-as NULL will result in an error
-.RB ( EFAULT ).
-.I Some
-other UNIX systems treat the
-.I envp==NULL
-case the same as Linux.
-.\" e.g., EFAULT on Solaris 8 and FreeBSD 6.1; but
-.\" HP-UX 11 is like Linux -- mtk, Apr 2007
-.\" Bug filed 30 Apr 2007: http://bugzilla.kernel.org/show_bug.cgi?id=8408
-.\" Bug rejected (because fix would constitute an ABI change).
-.\"
-.P
-POSIX.1 says that values returned by
-.BR sysconf (3)
-should be invariant over the lifetime of a process.
-However, since Linux 2.6.23, if the
-.B RLIMIT_STACK
-resource limit changes, then the value reported by
-.B _SC_ARG_MAX
-will also change,
-to reflect the fact that the limit on space for holding
-command-line arguments and environment variables has changed.
-.\"
-.SS Interpreter scripts
-The kernel imposes a maximum length on the text that follows the
-"#!" characters at the start of a script;
-characters beyond the limit are ignored.
-Before Linux 5.1, the limit is 127 characters.
-Since Linux 5.1,
-.\" commit 6eb3c3d0a52dca337e327ae8868ca1f44a712e02
-the limit is 255 characters.
-.P
-The semantics of the
-.I optional-arg
-argument of an interpreter script vary across implementations.
-On Linux, the entire string following the
-.I interpreter
-name is passed as a single argument to the interpreter,
-and this string can include white space.
-However, behavior differs on some other systems.
-Some systems
-.\" e.g., Solaris 8
-use the first white space to terminate
-.IR optional-arg .
-On some systems,
-.\" e.g., FreeBSD before 6.0, but not FreeBSD 6.0 onward
-an interpreter script can have multiple arguments,
-and white spaces in
-.I optional-arg
-are used to delimit the arguments.
-.P
-Linux (like most other modern UNIX systems)
-ignores the set-user-ID and set-group-ID bits on scripts.
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-POSIX.1-2001, SVr4, 4.3BSD.
-.\" SVr4 documents additional error
-.\" conditions EAGAIN, EINTR, ELIBACC, ENOLINK, EMULTIHOP; POSIX does not
-.\" document ETXTBSY, EPERM, EFAULT, ELOOP, EIO, ENFILE, EMFILE, EINVAL,
-.\" EISDIR or ELIBBAD error conditions.
-.P
-With UNIX\ V6, the argument list of an
-.BR exec ()
-call was ended by 0,
-while the argument list of
-.I main
-was ended by \-1.
-Thus, this argument list was not directly usable in a further
-.BR exec ()
-call.
-Since UNIX\ V7, both are NULL.
-.SH NOTES
-One sometimes sees
-.BR execve ()
-(and the related functions described in
-.BR exec (3))
-described as "executing a
-.I new
-process" (or similar).
-This is a highly misleading description:
-there is no new process;
-many attributes of the calling process remain unchanged
-(in particular, its PID).
-All that
-.BR execve ()
-does is arrange for an existing process (the calling process)
-to execute a new program.
-.P
-Set-user-ID and set-group-ID processes can not be
-.BR ptrace (2)d.
-.P
-The result of mounting a filesystem
-.I nosuid
-varies across Linux kernel versions:
-some will refuse execution of set-user-ID and set-group-ID
-executables when this would
-give the user powers they did not have already (and return
-.BR EPERM ),
-some will just ignore the set-user-ID and set-group-ID bits and
-.BR exec ()
-successfully.
-.P
-In most cases where
-.BR execve ()
-fails, control returns to the original executable image,
-and the caller of
-.BR execve ()
-can then handle the error.
-However, in (rare) cases (typically caused by resource exhaustion),
-failure may occur past the point of no return:
-the original executable image has been torn down,
-but the new image could not be completely built.
-In such cases, the kernel kills the process with a
-.\" commit 19d860a140beac48a1377f179e693abe86a9dac9
-.B SIGSEGV
-.RB ( SIGKILL
-until Linux 3.17)
-signal.
-.SS execve() and EAGAIN
-A more detailed explanation of the
-.B EAGAIN
-error that can occur (since Linux 3.1) when calling
-.BR execve ()
-is as follows.
-.P
-The
-.B EAGAIN
-error can occur when a
-.I preceding
-call to
-.BR setuid (2),
-.BR setreuid (2),
-or
-.BR setresuid (2)
-caused the real user ID of the process to change,
-and that change caused the process to exceed its
-.B RLIMIT_NPROC
-resource limit (i.e., the number of processes belonging
-to the new real UID exceeds the resource limit).
-From Linux 2.6.0 to Linux 3.0, this caused the
-.BR set*uid ()
-call to fail.
-(Before Linux 2.6,
-.\" commit 909cc4ae86f3380152a18e2a3c44523893ee11c4
-the resource limit was not imposed on processes that
-changed their user IDs.)
-.P
-Since Linux 3.1, the scenario just described no longer causes the
-.BR set*uid ()
-call to fail,
-because it too often led to security holes where buggy applications
-didn't check the return status and assumed
-that\[em]if the caller had root privileges\[em]the call would always succeed.
-Instead, the
-.BR set*uid ()
-calls now successfully change the real UID,
-but the kernel sets an internal flag, named
-.BR PF_NPROC_EXCEEDED ,
-to note that the
-.B RLIMIT_NPROC
-resource limit has been exceeded.
-If the
-.B PF_NPROC_EXCEEDED
-flag is set and the resource limit is still
-exceeded at the time of a subsequent
-.BR execve ()
-call, that call fails with the error
-.BR EAGAIN .
-This kernel logic ensures that the
-.B RLIMIT_NPROC
-resource limit is still enforced for the
-common privileged daemon workflow\[em]namely,
-.BR fork (2)
-+
-.BR set*uid ()
-+
-.BR execve ().
-.P
-If the resource limit was not still exceeded at the time of the
-.BR execve ()
-call
-(because other processes belonging to this real UID terminated between the
-.BR set*uid ()
-call and the
-.BR execve ()
-call), then the
-.BR execve ()
-call succeeds and the kernel clears the
-.B PF_NPROC_EXCEEDED
-process flag.
-The flag is also cleared if a subsequent call to
-.BR fork (2)
-by this process succeeds.
-.\"
-.\" .SH BUGS
-.\" Some Linux versions have failed to check permissions on ELF
-.\" interpreters. This is a security hole, because it allows users to
-.\" open any file, such as a rewinding tape device, for reading. Some
-.\" Linux versions have also had other security holes in
-.\" .BR execve ()
-.\" that could be exploited for denial of service by a suitably crafted
-.\" ELF binary. There are no known problems with Linux 2.0.34 or Linux 2.2.15.
-.SH EXAMPLES
-The following program is designed to be execed by the second program below.
-It just echoes its command-line arguments, one per line.
-.P
-.in +4n
-.\" SRC BEGIN (myecho.c)
-.EX
-/* myecho.c */
-\&
-#include <stdio.h>
-#include <stdlib.h>
-\&
-int
-main(int argc, char *argv[])
-{
- for (size_t j = 0; j < argc; j++)
- printf("argv[%zu]: %s\en", j, argv[j]);
-\&
- exit(EXIT_SUCCESS);
-}
-.EE
-.\" SRC END
-.in
-.P
-This program can be used to exec the program named in its command-line
-argument:
-.P
-.in +4n
-.\" SRC BEGIN (execve.c)
-.EX
-/* execve.c */
-\&
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-\&
-int
-main(int argc, char *argv[])
-{
- static char *newargv[] = { NULL, "hello", "world", NULL };
- static char *newenviron[] = { NULL };
-\&
- if (argc != 2) {
- fprintf(stderr, "Usage: %s <file\-to\-exec>\en", argv[0]);
- exit(EXIT_FAILURE);
- }
-\&
- newargv[0] = argv[1];
-\&
- execve(argv[1], newargv, newenviron);
- perror("execve"); /* execve() returns only on error */
- exit(EXIT_FAILURE);
-}
-.EE
-.\" SRC END
-.in
-.P
-We can use the second program to exec the first as follows:
-.P
-.in +4n
-.EX
-.RB "$" " cc myecho.c \-o myecho"
-.RB "$" " cc execve.c \-o execve"
-.RB "$" " ./execve ./myecho"
-argv[0]: ./myecho
-argv[1]: hello
-argv[2]: world
-.EE
-.in
-.P
-We can also use these programs to demonstrate the use of a script
-interpreter.
-To do this we create a script whose "interpreter" is our
-.I myecho
-program:
-.P
-.in +4n
-.EX
-.RB "$" " cat > script"
-.B #!./myecho script\-arg
-.B \[ha]D
-.RB "$" " chmod +x script"
-.EE
-.in
-.P
-We can then use our program to exec the script:
-.P
-.in +4n
-.EX
-.RB "$" " ./execve ./script"
-argv[0]: ./myecho
-argv[1]: script\-arg
-argv[2]: ./script
-argv[3]: hello
-argv[4]: world
-.EE
-.in
-.SH SEE ALSO
-.BR chmod (2),
-.BR execveat (2),
-.BR fork (2),
-.BR get_robust_list (2),
-.BR ptrace (2),
-.BR exec (3),
-.BR fexecve (3),
-.BR getauxval (3),
-.BR getopt (3),
-.BR system (3),
-.BR capabilities (7),
-.BR credentials (7),
-.BR environ (7),
-.BR path_resolution (7),
-.BR ld.so (8)
diff --git a/man2/execveat.2 b/man2/execveat.2
deleted file mode 100644
index ff0eb278f..000000000
--- a/man2/execveat.2
+++ /dev/null
@@ -1,220 +0,0 @@
-.\" Copyright (c) 2014 Google, Inc., written by David Drysdale
-.\" and Copyright (c) 2015, Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.TH execveat 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-execveat \- execute program relative to a directory file descriptor
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#include <linux/fcntl.h>" " /* Definition of " AT_* " constants */"
-.B #include <unistd.h>
-.P
-.BI "int execveat(int " dirfd ", const char *" pathname ,
-.BI " char *const _Nullable " argv [],
-.BI " char *const _Nullable " envp [],
-.BI " int " flags );
-.fi
-.SH DESCRIPTION
-.\" commit 51f39a1f0cea1cacf8c787f652f26dfee9611874
-The
-.BR execveat ()
-system call executes the program referred to by the combination of
-.I dirfd
-and
-.IR pathname .
-It operates in exactly the same way as
-.BR execve (2),
-except for the differences described in this manual page.
-.P
-If the pathname given in
-.I pathname
-is relative, then it is interpreted relative to the directory
-referred to by the file descriptor
-.I dirfd
-(rather than relative to the current working directory of
-the calling process, as is done by
-.BR execve (2)
-for a relative pathname).
-.P
-If
-.I pathname
-is relative and
-.I dirfd
-is the special value
-.BR AT_FDCWD ,
-then
-.I pathname
-is interpreted relative to the current working
-directory of the calling process (like
-.BR execve (2)).
-.P
-If
-.I pathname
-is absolute, then
-.I dirfd
-is ignored.
-.P
-If
-.I pathname
-is an empty string and the
-.B AT_EMPTY_PATH
-flag is specified, then the file descriptor
-.I dirfd
-specifies the file to be executed (i.e.,
-.I dirfd
-refers to an executable file, rather than a directory).
-.P
-The
-.I flags
-argument is a bit mask that can include zero or more of the following flags:
-.TP
-.B AT_EMPTY_PATH
-If
-.I pathname
-is an empty string, operate on the file referred to by
-.I dirfd
-(which may have been obtained using the
-.BR open (2)
-.B O_PATH
-flag).
-.TP
-.B AT_SYMLINK_NOFOLLOW
-If the file identified by
-.I dirfd
-and a non-NULL
-.I pathname
-is a symbolic link, then the call fails with the error
-.BR ELOOP .
-.SH RETURN VALUE
-On success,
-.BR execveat ()
-does not return.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-The same errors that occur for
-.BR execve (2)
-can also occur for
-.BR execveat ().
-The following additional errors can occur for
-.BR execveat ():
-.TP
-.I pathname
-is relative but
-.I dirfd
-is neither
-.B AT_FDCWD
-nor a valid file descriptor.
-.TP
-.B EINVAL
-Invalid flag specified in
-.IR flags .
-.TP
-.B ELOOP
-.I flags
-includes
-.B AT_SYMLINK_NOFOLLOW
-and the file identified by
-.I dirfd
-and a non-NULL
-.I pathname
-is a symbolic link.
-.TP
-.B ENOENT
-The program identified by
-.I dirfd
-and
-.I pathname
-requires the use of an interpreter program
-(such as a script starting with "#!"), but the file descriptor
-.I dirfd
-was opened with the
-.B O_CLOEXEC
-flag, with the result that
-the program file is inaccessible to the launched interpreter.
-See BUGS.
-.TP
-.B ENOTDIR
-.I pathname
-is relative and
-.I dirfd
-is a file descriptor referring to a file other than a directory.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 3.19,
-glibc 2.34.
-.SH NOTES
-In addition to the reasons explained in
-.BR openat (2),
-the
-.BR execveat ()
-system call is also needed to allow
-.BR fexecve (3)
-to be implemented on systems that do not have the
-.I /proc
-filesystem mounted.
-.P
-When asked to execute a script file, the
-.I argv[0]
-that is passed to the script interpreter is a string of the form
-.I /dev/fd/N
-or
-.IR /dev/fd/N/P ,
-where
-.I N
-is the number of the file descriptor passed via the
-.I dirfd
-argument.
-A string of the first form occurs when
-.B AT_EMPTY_PATH
-is employed.
-A string of the second form occurs when the script is specified via both
-.I dirfd
-and
-.IR pathname ;
-in this case,
-.I P
-is the value given in
-.IR pathname .
-.P
-For the same reasons described in
-.BR fexecve (3),
-the natural idiom when using
-.BR execveat ()
-is to set the close-on-exec flag on
-.IR dirfd .
-(But see BUGS.)
-.SH BUGS
-The
-.B ENOENT
-error described above means that it is not possible to set the
-close-on-exec flag on the file descriptor given to a call of the form:
-.P
-.in +4n
-.EX
-execveat(fd, "", argv, envp, AT_EMPTY_PATH);
-.EE
-.in
-.P
-However, the inability to set the close-on-exec flag means that a file
-descriptor referring to the script leaks through to the script itself.
-As well as wasting a file descriptor,
-this leakage can lead to file-descriptor exhaustion in scenarios
-where scripts recursively employ
-.BR execveat ().
-.\" For an example, see Michael Kerrisk's 2015-01-10 reply in this LKML
-.\" thread (http://thread.gmane.org/gmane.linux.kernel/1836105/focus=20229):
-.\"
-.\" Subject: [PATCHv10 man-pages 5/5] execveat.2: initial man page.\" for execveat(2
-.\" Date: Mon, 24 Nov 2014 11:53:59 +0000
-.SH SEE ALSO
-.BR execve (2),
-.BR openat (2),
-.BR fexecve (3)
diff --git a/man2/exit.2 b/man2/exit.2
deleted file mode 100644
index 9f9d2e763..000000000
--- a/man2/exit.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/_exit.2
diff --git a/man2/exit_group.2 b/man2/exit_group.2
deleted file mode 100644
index ac7cb4243..000000000
--- a/man2/exit_group.2
+++ /dev/null
@@ -1,38 +0,0 @@
-.\" Copyright (C) 2004 Andries Brouwer (aeb@cwi.nl)
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.TH exit_group 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-exit_group \- exit all threads in a process
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */"
-.B #include <unistd.h>
-.P
-.BI "[[noreturn]] void syscall(SYS_exit_group, int " status );
-.fi
-.P
-.IR Note :
-glibc provides no wrapper for
-.BR exit_group (),
-necessitating the use of
-.BR syscall (2).
-.SH DESCRIPTION
-This system call terminates all threads
-in the calling process's thread group.
-.SH RETURN VALUE
-This system call does not return.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 2.5.35.
-.SH NOTES
-Since glibc 2.3, this is the system call invoked when the
-.BR _exit (2)
-wrapper function is called.
-.SH SEE ALSO
-.BR _exit (2)
diff --git a/man2/faccessat.2 b/man2/faccessat.2
deleted file mode 100644
index 9d4f76e5b..000000000
--- a/man2/faccessat.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/access.2
diff --git a/man2/faccessat2.2 b/man2/faccessat2.2
deleted file mode 100644
index 9d4f76e5b..000000000
--- a/man2/faccessat2.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/access.2
diff --git a/man2/fadvise64.2 b/man2/fadvise64.2
deleted file mode 100644
index 53f54a121..000000000
--- a/man2/fadvise64.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/posix_fadvise.2
diff --git a/man2/fadvise64_64.2 b/man2/fadvise64_64.2
deleted file mode 100644
index 53f54a121..000000000
--- a/man2/fadvise64_64.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/posix_fadvise.2
diff --git a/man2/fallocate.2 b/man2/fallocate.2
deleted file mode 100644
index bb2bb0521..000000000
--- a/man2/fallocate.2
+++ /dev/null
@@ -1,481 +0,0 @@
-.\" Copyright (c) 2007 Silicon Graphics, Inc. All Rights Reserved
-.\" Written by Dave Chinner <dgc@sgi.com>
-.\"
-.\" SPDX-License-Identifier: GPL-2.0-only
-.\"
-.\" 2011-09-19: Added FALLOC_FL_PUNCH_HOLE
-.\" 2011-09-19: Substantial restructuring of the page
-.\"
-.TH fallocate 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-fallocate \- manipulate file space
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#define _GNU_SOURCE" " /* See feature_test_macros(7) */"
-.B #include <fcntl.h>
-.P
-.BI "int fallocate(int " fd ", int " mode ", off_t " offset \
-", off_t " len ");"
-.fi
-.SH DESCRIPTION
-This is a nonportable, Linux-specific system call.
-For the portable, POSIX.1-specified method of ensuring that space
-is allocated for a file, see
-.BR posix_fallocate (3).
-.P
-.BR fallocate ()
-allows the caller to directly manipulate the allocated disk space
-for the file referred to by
-.I fd
-for the byte range starting at
-.I offset
-and continuing for
-.I len
-bytes.
-.P
-The
-.I mode
-argument determines the operation to be performed on the given range.
-Details of the supported operations are given in the subsections below.
-.SS Allocating disk space
-The default operation (i.e.,
-.I mode
-is zero) of
-.BR fallocate ()
-allocates the disk space within the range specified by
-.I offset
-and
-.IR len .
-The file size (as reported by
-.BR stat (2))
-will be changed if
-.IR offset + len
-is greater than the file size.
-Any subregion within the range specified by
-.I offset
-and
-.I len
-that did not contain data before the call will be initialized to zero.
-This default behavior closely resembles the behavior of the
-.BR posix_fallocate (3)
-library function,
-and is intended as a method of optimally implementing that function.
-.P
-After a successful call, subsequent writes into the range specified by
-.I offset
-and
-.I len
-are guaranteed not to fail because of lack of disk space.
-.P
-If the
-.B FALLOC_FL_KEEP_SIZE
-flag is specified in
-.IR mode ,
-the behavior of the call is similar,
-but the file size will not be changed even if
-.IR offset + len
-is greater than the file size.
-Preallocating zeroed blocks beyond the end of the file in this manner
-is useful for optimizing append workloads.
-.P
-If the
-.B FALLOC_FL_UNSHARE_RANGE
-flag is specified in
-.IR mode ,
-shared file data extents will be made private to the file to guarantee
-that a subsequent write will not fail due to lack of space.
-Typically, this will be done by performing a copy-on-write operation on
-all shared data in the file.
-This flag may not be supported by all filesystems.
-.P
-Because allocation is done in block size chunks,
-.BR fallocate ()
-may allocate a larger range of disk space than was specified.
-.SS Deallocating file space
-Specifying the
-.B FALLOC_FL_PUNCH_HOLE
-flag (available since Linux 2.6.38) in
-.I mode
-deallocates space (i.e., creates a hole)
-in the byte range starting at
-.I offset
-and continuing for
-.I len
-bytes.
-Within the specified range, partial filesystem blocks are zeroed,
-and whole filesystem blocks are removed from the file.
-After a successful call,
-subsequent reads from this range will return zeros.
-.P
-The
-.B FALLOC_FL_PUNCH_HOLE
-flag must be ORed with
-.B FALLOC_FL_KEEP_SIZE
-in
-.IR mode ;
-in other words, even when punching off the end of the file, the file size
-(as reported by
-.BR stat (2))
-does not change.
-.P
-Not all filesystems support
-.BR FALLOC_FL_PUNCH_HOLE ;
-if a filesystem doesn't support the operation, an error is returned.
-The operation is supported on at least the following filesystems:
-.IP \[bu] 3
-XFS (since Linux 2.6.38)
-.IP \[bu]
-ext4 (since Linux 3.0)
-.\" commit a4bb6b64e39abc0e41ca077725f2a72c868e7622
-.IP \[bu]
-Btrfs (since Linux 3.7)
-.IP \[bu]
-.BR tmpfs (5)
-(since Linux 3.5)
-.\" commit 83e4fa9c16e4af7122e31be3eca5d57881d236fe
-.IP \[bu]
-.BR gfs2 (5)
-(since Linux 4.16)
-.\" commit 4e56a6411fbce6f859566e17298114c2434391a4
-.SS Collapsing file space
-.\" commit 00f5e61998dd17f5375d9dfc01331f104b83f841
-Specifying the
-.B FALLOC_FL_COLLAPSE_RANGE
-flag (available since Linux 3.15) in
-.I mode
-removes a byte range from a file, without leaving a hole.
-The byte range to be collapsed starts at
-.I offset
-and continues for
-.I len
-bytes.
-At the completion of the operation,
-the contents of the file starting at the location
-.I offset+len
-will be appended at the location
-.IR offset ,
-and the file will be
-.I len
-bytes smaller.
-.P
-A filesystem may place limitations on the granularity of the operation,
-in order to ensure efficient implementation.
-Typically,
-.I offset
-and
-.I len
-must be a multiple of the filesystem logical block size,
-which varies according to the filesystem type and configuration.
-If a filesystem has such a requirement,
-.BR fallocate ()
-fails with the error
-.B EINVAL
-if this requirement is violated.
-.P
-If the region specified by
-.I offset
-plus
-.I len
-reaches or passes the end of file, an error is returned;
-instead, use
-.BR ftruncate (2)
-to truncate a file.
-.P
-No other flags may be specified in
-.I mode
-in conjunction with
-.BR FALLOC_FL_COLLAPSE_RANGE .
-.P
-As at Linux 3.15,
-.B FALLOC_FL_COLLAPSE_RANGE
-is supported by
-ext4 (only for extent-based files)
-.\" commit 9eb79482a97152930b113b51dff530aba9e28c8e
-and XFS.
-.\" commit e1d8fb88a64c1f8094b9f6c3b6d2d9e6719c970d
-.SS Zeroing file space
-Specifying the
-.B FALLOC_FL_ZERO_RANGE
-flag (available since Linux 3.15)
-.\" commit 409332b65d3ed8cfa7a8030f1e9d52f372219642
-in
-.I mode
-zeros space in the byte range starting at
-.I offset
-and continuing for
-.I len
-bytes.
-Within the specified range, blocks are preallocated for the regions
-that span the holes in the file.
-After a successful call, subsequent
-reads from this range will return zeros.
-.P
-Zeroing is done within the filesystem preferably by converting the range into
-unwritten extents.
-This approach means that the specified range will not be physically zeroed
-out on the device (except for partial blocks at the either end of the range),
-and I/O is (otherwise) required only to update metadata.
-.P
-If the
-.B FALLOC_FL_KEEP_SIZE
-flag is additionally specified in
-.IR mode ,
-the behavior of the call is similar,
-but the file size will not be changed even if
-.IR offset + len
-is greater than the file size.
-This behavior is the same as when preallocating space with
-.B FALLOC_FL_KEEP_SIZE
-specified.
-.P
-Not all filesystems support
-.BR FALLOC_FL_ZERO_RANGE ;
-if a filesystem doesn't support the operation, an error is returned.
-The operation is supported on at least the following filesystems:
-.IP \[bu] 3
-XFS (since Linux 3.15)
-.\" commit 376ba313147b4172f3e8cf620b9fb591f3e8cdfa
-.IP \[bu]
-ext4, for extent-based files (since Linux 3.15)
-.\" commit b8a8684502a0fc852afa0056c6bb2a9273f6fcc0
-.IP \[bu]
-SMB3 (since Linux 3.17)
-.\" commit 30175628bf7f521e9ee31ac98fa6d6fe7441a556
-.IP \[bu]
-Btrfs (since Linux 4.16)
-.\" commit f27451f229966874a8793995b8e6b74326d125df
-.SS Increasing file space
-Specifying the
-.B FALLOC_FL_INSERT_RANGE
-flag
-(available since Linux 4.1)
-.\" commit dd46c787788d5bf5b974729d43e4c405814a4c7d
-in
-.I mode
-increases the file space by inserting a hole within the file size without
-overwriting any existing data.
-The hole will start at
-.I offset
-and continue for
-.I len
-bytes.
-When inserting the hole inside file, the contents of the file starting at
-.I offset
-will be shifted upward (i.e., to a higher file offset) by
-.I len
-bytes.
-Inserting a hole inside a file increases the file size by
-.I len
-bytes.
-.P
-This mode has the same limitations as
-.B FALLOC_FL_COLLAPSE_RANGE
-regarding the granularity of the operation.
-If the granularity requirements are not met,
-.BR fallocate ()
-fails with the error
-.BR EINVAL .
-If the
-.I offset
-is equal to or greater than the end of file, an error is returned.
-For such operations (i.e., inserting a hole at the end of file),
-.BR ftruncate (2)
-should be used.
-.P
-No other flags may be specified in
-.I mode
-in conjunction with
-.BR FALLOC_FL_INSERT_RANGE .
-.P
-.B FALLOC_FL_INSERT_RANGE
-requires filesystem support.
-Filesystems that support this operation include
-XFS (since Linux 4.1)
-.\" commit a904b1ca5751faf5ece8600e18cd3b674afcca1b
-and ext4 (since Linux 4.2).
-.\" commit 331573febb6a224bc50322e3670da326cb7f4cfc
-.\" f2fs also has support since Linux 4.2
-.\" commit f62185d0e283e9d311e3ac1020f159d95f0aab39
-.SH RETURN VALUE
-On success,
-.BR fallocate ()
-returns zero.
-On error, \-1 is returned and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EBADF
-.I fd
-is not a valid file descriptor, or is not opened for writing.
-.TP
-.B EFBIG
-.IR offset + len
-exceeds the maximum file size.
-.TP
-.B EFBIG
-.I mode
-is
-.BR FALLOC_FL_INSERT_RANGE ,
-and the current file size+\fIlen\fP exceeds the maximum file size.
-.TP
-.B EINTR
-A signal was caught during execution; see
-.BR signal (7).
-.TP
-.B EINVAL
-.I offset
-was less than 0, or
-.I len
-.\" FIXME . (raise a kernel bug) Probably the len==0 case should be
-.\" a no-op, rather than an error. That would be consistent with
-.\" similar APIs for the len==0 case.
-.\" See "Re: [PATCH] fallocate.2: add FALLOC_FL_PUNCH_HOLE flag definition"
-.\" 21 Sep 2012
-.\" http://thread.gmane.org/gmane.linux.file-systems/48331/focus=1193526
-was less than or equal to 0.
-.TP
-.B EINVAL
-.I mode
-is
-.B FALLOC_FL_COLLAPSE_RANGE
-and the range specified by
-.I offset
-plus
-.I len
-reaches or passes the end of the file.
-.TP
-.B EINVAL
-.I mode
-is
-.B FALLOC_FL_INSERT_RANGE
-and the range specified by
-.I offset
-reaches or passes the end of the file.
-.TP
-.B EINVAL
-.I mode
-is
-.B FALLOC_FL_COLLAPSE_RANGE
-or
-.BR FALLOC_FL_INSERT_RANGE ,
-but either
-.I offset
-or
-.I len
-is not a multiple of the filesystem block size.
-.TP
-.B EINVAL
-.I mode
-contains one of
-.B FALLOC_FL_COLLAPSE_RANGE
-or
-.B FALLOC_FL_INSERT_RANGE
-and also other flags;
-no other flags are permitted with
-.B FALLOC_FL_COLLAPSE_RANGE
-or
-.BR FALLOC_FL_INSERT_RANGE .
-.TP
-.B EINVAL
-.I mode
-is
-.BR FALLOC_FL_COLLAPSE_RANGE ,
-.BR FALLOC_FL_ZERO_RANGE ,
-or
-.BR FALLOC_FL_INSERT_RANGE ,
-but the file referred to by
-.I fd
-is not a regular file.
-.\" There was an inconsistency in Linux 3.15-rc1, that should be resolved so that all
-.\" filesystems use this error for this case. (Tytso says ex4 will change.)
-.\" http://thread.gmane.org/gmane.comp.file-systems.xfs.general/60485/focus=5521
-.\" From: Michael Kerrisk (man-pages <mtk.manpages@...>
-.\" Subject: Re: [PATCH v5 10/10] manpage: update FALLOC_FL_COLLAPSE_RANGE flag in fallocate
-.\" Newsgroups: gmane.linux.man, gmane.linux.file-systems
-.\" Date: 2014-04-17 13:40:05 GMT
-.TP
-.B EIO
-An I/O error occurred while reading from or writing to a filesystem.
-.TP
-.B ENODEV
-.I fd
-does not refer to a regular file or a directory.
-(If
-.I fd
-is a pipe or FIFO, a different error results.)
-.TP
-.B ENOSPC
-There is not enough space left on the device containing the file
-referred to by
-.IR fd .
-.TP
-.B ENOSYS
-This kernel does not implement
-.BR fallocate ().
-.TP
-.B EOPNOTSUPP
-The filesystem containing the file referred to by
-.I fd
-does not support this operation;
-or the
-.I mode
-is not supported by the filesystem containing the file referred to by
-.IR fd .
-.TP
-.B EPERM
-The file referred to by
-.I fd
-is marked immutable (see
-.BR chattr (1)).
-.TP
-.B EPERM
-.I mode
-specifies
-.BR FALLOC_FL_PUNCH_HOLE ,
-.BR FALLOC_FL_COLLAPSE_RANGE ,
-or
-.B FALLOC_FL_INSERT_RANGE
-and
-the file referred to by
-.I fd
-is marked append-only
-(see
-.BR chattr (1)).
-.TP
-.B EPERM
-The operation was prevented by a file seal; see
-.BR fcntl (2).
-.TP
-.B ESPIPE
-.I fd
-refers to a pipe or FIFO.
-.TP
-.B ETXTBSY
-.I mode
-specifies
-.B FALLOC_FL_COLLAPSE_RANGE
-or
-.BR FALLOC_FL_INSERT_RANGE ,
-but the file referred to by
-.I fd
-is currently being executed.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-.TP
-.BR fallocate ()
-Linux 2.6.23,
-glibc 2.10.
-.TP
-.B FALLOC_FL_*
-glibc 2.18.
-.\" See http://sourceware.org/bugzilla/show_bug.cgi?id=14964
-.SH SEE ALSO
-.BR fallocate (1),
-.BR ftruncate (2),
-.BR posix_fadvise (3),
-.BR posix_fallocate (3)
diff --git a/man2/fanotify_init.2 b/man2/fanotify_init.2
deleted file mode 100644
index e5f9cbf29..000000000
--- a/man2/fanotify_init.2
+++ /dev/null
@@ -1,542 +0,0 @@
-.\" Copyright (C) 2013, Heinrich Schuchardt <xypron.glpk@gmx.de>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.TH fanotify_init 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-fanotify_init \- create and initialize fanotify group
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#include <fcntl.h>" " /* Definition of " O_* " constants */"
-.B #include <sys/fanotify.h>
-.P
-.BI "int fanotify_init(unsigned int " flags ", unsigned int " event_f_flags );
-.fi
-.SH DESCRIPTION
-For an overview of the fanotify API, see
-.BR fanotify (7).
-.P
-.BR fanotify_init ()
-initializes a new fanotify group and returns a file descriptor for the event
-queue associated with the group.
-.P
-The file descriptor is used in calls to
-.BR fanotify_mark (2)
-to specify the files, directories, mounts, or filesystems for which fanotify
-events shall be created.
-These events are received by reading from the file descriptor.
-Some events are only informative, indicating that a file has been accessed.
-Other events can be used to determine whether
-another application is permitted to access a file or directory.
-Permission to access filesystem objects is granted by writing to the file
-descriptor.
-.P
-Multiple programs may be using the fanotify interface at the same time to
-monitor the same files.
-.P
-The number of fanotify groups per user is limited.
-See
-.BR fanotify (7)
-for details about this limit.
-.P
-The
-.I flags
-argument contains a multi-bit field defining the notification class of the
-listening application and further single bit fields specifying the behavior
-of the file descriptor.
-.P
-If multiple listeners for permission events exist,
-the notification class is used to establish the sequence
-in which the listeners receive the events.
-.P
-Only one of the following notification classes may be specified in
-.IR flags :
-.TP
-.B FAN_CLASS_PRE_CONTENT
-This value allows the receipt of events notifying that a file has been
-accessed and events for permission decisions if a file may be accessed.
-It is intended for event listeners that need to access files before they
-contain their final data.
-This notification class might be used by hierarchical storage managers,
-for example.
-Use of this flag requires the
-.B CAP_SYS_ADMIN
-capability.
-.TP
-.B FAN_CLASS_CONTENT
-This value allows the receipt of events notifying that a file has been
-accessed and events for permission decisions if a file may be accessed.
-It is intended for event listeners that need to access files when they
-already contain their final content.
-This notification class might be used by malware detection programs, for
-example.
-Use of this flag requires the
-.B CAP_SYS_ADMIN
-capability.
-.TP
-.B FAN_CLASS_NOTIF
-This is the default value.
-It does not need to be specified.
-This value only allows the receipt of events notifying that a file has been
-accessed.
-Permission decisions before the file is accessed are not possible.
-.P
-Listeners with different notification classes will receive events in the
-order
-.BR FAN_CLASS_PRE_CONTENT ,
-.BR FAN_CLASS_CONTENT ,
-.BR FAN_CLASS_NOTIF .
-The order of notification for listeners in the same notification class
-is undefined.
-.P
-The following bits can additionally be set in
-.IR flags :
-.TP
-.B FAN_CLOEXEC
-Set the close-on-exec flag
-.RB ( FD_CLOEXEC )
-on the new file descriptor.
-See the description of the
-.B O_CLOEXEC
-flag in
-.BR open (2).
-.TP
-.B FAN_NONBLOCK
-Enable the nonblocking flag
-.RB ( O_NONBLOCK )
-for the file descriptor.
-Reading from the file descriptor will not block.
-Instead, if no data is available,
-.BR read (2)
-fails with the error
-.BR EAGAIN .
-.TP
-.B FAN_UNLIMITED_QUEUE
-Remove the limit on the number of events in the event queue.
-See
-.BR fanotify (7)
-for details about this limit.
-Use of this flag requires the
-.B CAP_SYS_ADMIN
-capability.
-.TP
-.B FAN_UNLIMITED_MARKS
-Remove the limit on the number of fanotify marks per user.
-See
-.BR fanotify (7)
-for details about this limit.
-Use of this flag requires the
-.B CAP_SYS_ADMIN
-capability.
-.TP
-.BR FAN_REPORT_TID " (since Linux 4.20)"
-.\" commit d0a6a87e40da49cfc7954c491d3065a25a641b29
-Report thread ID (TID) instead of process ID (PID)
-in the
-.I pid
-field of the
-.I "struct fanotify_event_metadata"
-supplied to
-.BR read (2)
-(see
-.BR fanotify (7)).
-Use of this flag requires the
-.B CAP_SYS_ADMIN
-capability.
-.TP
-.BR FAN_ENABLE_AUDIT " (since Linux 4.15)"
-.\" commit de8cd83e91bc3ee212b3e6ec6e4283af9e4ab269
-Enable generation of audit log records about access mediation performed by
-permission events.
-The permission event response has to be marked with the
-.B FAN_AUDIT
-flag for an audit log record to be generated.
-Use of this flag requires the
-.B CAP_AUDIT_WRITE
-capability.
-.TP
-.BR FAN_REPORT_FID " (since Linux 5.1)"
-.\" commit a8b13aa20afb69161b5123b4f1acc7ea0a03d360
-This value allows the receipt of events which contain additional information
-about the underlying filesystem object correlated to an event.
-An additional record of type
-.B FAN_EVENT_INFO_TYPE_FID
-encapsulates the information about the object and is included alongside the
-generic event metadata structure.
-The file descriptor that is used to represent the object correlated to an
-event is instead substituted with a file handle.
-It is intended for applications that may find the use of a file handle to
-identify an object more suitable than a file descriptor.
-Additionally, it may be used for applications monitoring a directory or a
-filesystem that are interested in the directory entry modification events
-.BR FAN_CREATE ,
-.BR FAN_DELETE ,
-.BR FAN_MOVE ,
-and
-.BR FAN_RENAME ,
-or in events such as
-.BR FAN_ATTRIB ,
-.BR FAN_DELETE_SELF ,
-and
-.BR FAN_MOVE_SELF .
-All the events above require an fanotify group that identifies filesystem
-objects by file handles.
-Note that without the flag
-.BR FAN_REPORT_TARGET_FID ,
-for the directory entry modification events,
-there is an information record that identifies the modified directory
-and not the created/deleted/moved child object.
-The use of
-.B FAN_CLASS_CONTENT
-or
-.B FAN_CLASS_PRE_CONTENT
-is not permitted with this flag and will result in the error
-.BR EINVAL .
-See
-.BR fanotify (7)
-for additional details.
-.TP
-.BR FAN_REPORT_DIR_FID " (since Linux 5.9)"
-.\" commit 83b7a59896dd24015a34b7f00027f0ff3747972f
-Events for fanotify groups initialized with this flag will contain
-(see exceptions below) additional information about a directory object
-correlated to an event.
-An additional record of type
-.B FAN_EVENT_INFO_TYPE_DFID
-encapsulates the information about the directory object and is included
-alongside the generic event metadata structure.
-For events that occur on a non-directory object, the additional structure
-includes a file handle that identifies the parent directory filesystem object.
-Note that there is no guarantee that the directory filesystem object will be
-found at the location described by the file handle information at the time
-the event is received.
-When combined with the flag
-.BR FAN_REPORT_FID ,
-two records may be reported with events that occur on a non-directory object,
-one to identify the non-directory object itself and one to identify the parent
-directory object.
-Note that in some cases, a filesystem object does not have a parent,
-for example, when an event occurs on an unlinked but open file.
-In that case, with the
-.B FAN_REPORT_FID
-flag, the event will be reported with only one record to identify the
-non-directory object itself, because there is no directory associated with
-the event.
-Without the
-.B FAN_REPORT_FID
-flag, no event will be reported.
-See
-.BR fanotify (7)
-for additional details.
-.TP
-.BR FAN_REPORT_NAME " (since Linux 5.9)"
-.\" commit 929943b38daf817f2e6d303ea04401651fc3bc05
-Events for fanotify groups initialized with this flag will contain additional
-information about the name of the directory entry correlated to an event.
-This flag must be provided in conjunction with the flag
-.BR FAN_REPORT_DIR_FID .
-Providing this flag value without
-.B FAN_REPORT_DIR_FID
-will result in the error
-.BR EINVAL .
-This flag may be combined with the flag
-.BR FAN_REPORT_FID .
-An additional record of type
-.BR FAN_EVENT_INFO_TYPE_DFID_NAME ,
-which encapsulates the information about the directory entry, is included
-alongside the generic event metadata structure and substitutes the additional
-information record of type
-.BR FAN_EVENT_INFO_TYPE_DFID .
-The additional record includes a file handle that identifies a directory
-filesystem object followed by a name that identifies an entry in that
-directory.
-For the directory entry modification events
-.BR FAN_CREATE ,
-.BR FAN_DELETE ,
-and
-.BR FAN_MOVE ,
-the reported name is that of the created/deleted/moved directory entry.
-The event
-.B FAN_RENAME
-may contain two information records.
-One of type
-.B FAN_EVENT_INFO_TYPE_OLD_DFID_NAME
-identifying the old directory entry,
-and another of type
-.B FAN_EVENT_INFO_TYPE_NEW_DFID_NAME
-identifying the new directory entry.
-For other events that occur on a directory object, the reported file handle
-is that of the directory object itself and the reported name is '.'.
-For other events that occur on a non-directory object, the reported file handle
-is that of the parent directory object and the reported name is the name of a
-directory entry where the object was located at the time of the event.
-The rationale behind this logic is that the reported directory file handle can
-be passed to
-.BR open_by_handle_at (2)
-to get an open directory file descriptor and that file descriptor along with
-the reported name can be used to call
-.BR fstatat (2).
-The same rule that applies to record type
-.B FAN_EVENT_INFO_TYPE_DFID
-also applies to record type
-.BR FAN_EVENT_INFO_TYPE_DFID_NAME :
-if a non-directory object has no parent, either the event will not be reported
-or it will be reported without the directory entry information.
-Note that there is no guarantee that the filesystem object will be found at the
-location described by the directory entry information at the time the event is
-received.
-See
-.BR fanotify (7)
-for additional details.
-.TP
-.B FAN_REPORT_DFID_NAME
-This is a synonym for
-.RB ( FAN_REPORT_DIR_FID | FAN_REPORT_NAME ).
-.TP
-.BR FAN_REPORT_TARGET_FID " (since Linux 5.17)"
-.\" commit d61fd650e9d206a71fda789f02a1ced4b19944c4
-Events for fanotify groups initialized with this flag
-will contain additional information about the child
-correlated with directory entry modification events.
-This flag must be provided in conjunction with the flags
-.BR FAN_REPORT_FID ,
-.B FAN_REPORT_DIR_FID
-and
-.BR FAN_REPORT_NAME .
-or else the error
-.B EINVAL
-will be returned.
-For the directory entry modification events
-.BR FAN_CREATE ,
-.BR FAN_DELETE ,
-.BR FAN_MOVE ,
-and
-.BR FAN_RENAME ,
-an additional record of type
-.BR FAN_EVENT_INFO_TYPE_FID ,
-is reported in addition to the information records of type
-.BR FAN_EVENT_INFO_TYPE_DFID ,
-.BR FAN_EVENT_INFO_TYPE_DFID_NAME ,
-.BR FAN_EVENT_INFO_TYPE_OLD_DFID_NAME ,
-and
-.BR FAN_EVENT_INFO_TYPE_NEW_DFID_NAME .
-The additional record includes a file handle
-that identifies the filesystem child object
-that the directory entry is referring to.
-.TP
-.B FAN_REPORT_DFID_NAME_TARGET
-This is a synonym for
-.RB ( FAN_REPORT_DFID_NAME | FAN_REPORT_FID | FAN_REPORT_TARGET_FID ).
-.TP
-.BR FAN_REPORT_PIDFD " (since Linux 5.15)"
-.\" commit af579beb666aefb17e9a335c12c788c92932baf1
-Events for fanotify groups initialized with this flag will contain
-an additional information record alongside the generic
-.I fanotify_event_metadata
-structure.
-This information record will be of type
-.B FAN_EVENT_INFO_TYPE_PIDFD
-and will contain a pidfd for the process that
-was responsible for generating an event.
-A pidfd returned in this information record object is
-no different to the pidfd that is returned when calling
-.BR pidfd_open (2).
-Usage of this information record are for applications that
-may be interested in reliably determining whether
-the process responsible for generating an event
-has been recycled or terminated.
-The use of the
-.B FAN_REPORT_TID
-flag along with
-.B FAN_REPORT_PIDFD
-is currently not supported and
-attempting to do so will result in the error
-.B EINVAL
-being returned.
-This limitation is currently imposed by the pidfd API
-as it currently only supports
-the creation of pidfds for thread-group leaders.
-Creating pidfds for non-thread-group leaders
-may be supported at some point in the future,
-so this restriction may eventually be lifted.
-For more details on information records,
-see
-.BR fanotify (7).
-.P
-The
-.I event_f_flags
-argument
-defines the file status flags that will be set on the open file descriptions
-that are created for fanotify events.
-For details of these flags, see the description of the
-.I flags
-values in
-.BR open (2).
-.I event_f_flags
-includes a multi-bit field for the access mode.
-This field can take the following values:
-.TP
-.B O_RDONLY
-This value allows only read access.
-.TP
-.B O_WRONLY
-This value allows only write access.
-.TP
-.B O_RDWR
-This value allows read and write access.
-.P
-Additional bits can be set in
-.IR event_f_flags .
-The most useful values are:
-.TP
-.B O_LARGEFILE
-Enable support for files exceeding 2\ GB.
-Failing to set this flag will result in an
-.B EOVERFLOW
-error when trying to open a large file which is monitored by
-an fanotify group on a 32-bit system.
-.TP
-.BR O_CLOEXEC " (since Linux 3.18)"
-.\" commit 0b37e097a648aa71d4db1ad108001e95b69a2da4
-Enable the close-on-exec flag for the file descriptor.
-See the description of the
-.B O_CLOEXEC
-flag in
-.BR open (2)
-for reasons why this may be useful.
-.P
-The following are also allowable:
-.BR O_APPEND ,
-.BR O_DSYNC ,
-.BR O_NOATIME ,
-.BR O_NONBLOCK ,
-and
-.BR O_SYNC .
-Specifying any other flag in
-.I event_f_flags
-yields the error
-.B EINVAL
-(but see BUGS).
-.SH RETURN VALUE
-On success,
-.BR fanotify_init ()
-returns a new file descriptor.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EINVAL
-An invalid value was passed in
-.I flags
-or
-.IR event_f_flags .
-.B FAN_ALL_INIT_FLAGS
-(deprecated since Linux 4.20)
-.\" commit 23c9deeb3285d34fd243abb3d6b9f07db60c3cf4
-defines all allowable bits for
-.IR flags .
-.TP
-.B EMFILE
-The number of fanotify groups for this user exceeds the limit.
-See
-.BR fanotify (7)
-for details about this limit.
-.TP
-.B EMFILE
-The per-process limit on the number of open file descriptors has been reached.
-.TP
-.B ENOMEM
-The allocation of memory for the notification group failed.
-.TP
-.B ENOSYS
-This kernel does not implement
-.BR fanotify_init ().
-The fanotify API is available only if the kernel was configured with
-.BR CONFIG_FANOTIFY .
-.TP
-.B EPERM
-The operation is not permitted because the caller lacks a required capability.
-.SH VERSIONS
-Prior to Linux 5.13,
-.\" commit 7cea2a3c505e87a9d6afc78be4a7f7be636a73a7
-calling
-.BR fanotify_init ()
-required the
-.B CAP_SYS_ADMIN
-capability.
-Since Linux 5.13,
-.\" commit 7cea2a3c505e87a9d6afc78be4a7f7be636a73a7
-users may call
-.BR fanotify_init ()
-without the
-.B CAP_SYS_ADMIN
-capability to create and initialize
-an fanotify group with limited functionality.
-.TP
-The limitations imposed on an event listener created by a user without the
-.B CAP_SYS_ADMIN
-capability are as follows:
-.RS
-.IP \[bu] 3
-The user cannot request for an unlimited event queue by using
-.BR FAN_UNLIMITED_QUEUE .
-.IP \[bu]
-The user cannot request for an unlimited number of marks by using
-.BR FAN_UNLIMITED_MARKS .
-.IP \[bu]
-The user cannot request to use either notification classes
-.B FAN_CLASS_CONTENT
-or
-.BR FAN_CLASS_PRE_CONTENT .
-This means that user cannot request permission events.
-.IP \[bu]
-The user is required to create a group that identifies filesystem objects by
-file handles, for example, by providing the
-.B FAN_REPORT_FID
-flag.
-.IP \[bu]
-The user is limited to only mark inodes.
-The ability to mark a mount or filesystem via
-.BR fanotify_mark ()
-through the use of
-.B FAN_MARK_MOUNT
-or
-.B FAN_MARK_FILESYSTEM
-is not permitted.
-.IP \[bu]
-The event object in the event queue is limited in terms of the information
-that is made available to the unprivileged user.
-A user will also not receive the pid that generated the event, unless the
-listening process itself generated the event.
-.RE
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 2.6.37.
-.\" was introduced in Linux 2.6.36 and enabled in Linux 2.6.37.
-.SH BUGS
-The following bug was present before Linux 3.18:
-.IP \[bu] 3
-.\" Fixed by commit 0b37e097a648aa71d4db1ad108001e95b69a2da4
-The
-.B O_CLOEXEC
-is ignored when passed in
-.IR event_f_flags .
-.P
-The following bug was present before Linux 3.14:
-.IP \[bu] 3
-.\" Fixed by commit 48149e9d3a7e924010a0daab30a6197b7d7b6580
-The
-.I event_f_flags
-argument is not checked for invalid flags.
-Flags that are intended only for internal use,
-such as
-.BR FMODE_EXEC ,
-can be set, and will consequently be set for the file descriptors
-returned when reading from the fanotify file descriptor.
-.SH SEE ALSO
-.BR fanotify_mark (2),
-.BR fanotify (7)
diff --git a/man2/fanotify_mark.2 b/man2/fanotify_mark.2
deleted file mode 100644
index f3fce0c4e..000000000
--- a/man2/fanotify_mark.2
+++ /dev/null
@@ -1,850 +0,0 @@
-.\" Copyright (C) 2013, Heinrich Schuchardt <xypron.glpk@gmx.de>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.TH fanotify_mark 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-fanotify_mark \- add, remove, or modify an fanotify mark on a filesystem
-object
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/fanotify.h>
-.P
-.BI "int fanotify_mark(int " fanotify_fd ", unsigned int " flags ,
-.BI " uint64_t " mask ", int " dirfd ,
-.BI " const char *_Nullable " pathname );
-.fi
-.SH DESCRIPTION
-For an overview of the fanotify API, see
-.BR fanotify (7).
-.P
-.BR fanotify_mark ()
-adds, removes, or modifies an fanotify mark on a filesystem object.
-The caller must have read permission on the filesystem object that
-is to be marked.
-.P
-The
-.I fanotify_fd
-argument is a file descriptor returned by
-.BR fanotify_init (2).
-.P
-.I flags
-is a bit mask describing the modification to perform.
-It must include exactly one of the following values:
-.TP
-.B FAN_MARK_ADD
-The events in
-.I mask
-will be added to the mark mask (or to the ignore mask).
-.I mask
-must be nonempty or the error
-.B EINVAL
-will occur.
-.TP
-.B FAN_MARK_REMOVE
-The events in argument
-.I mask
-will be removed from the mark mask (or from the ignore mask).
-.I mask
-must be nonempty or the error
-.B EINVAL
-will occur.
-.TP
-.B FAN_MARK_FLUSH
-Remove either all marks for filesystems, all marks for mounts, or all
-marks for directories and files from the fanotify group.
-If
-.I flags
-contains
-.BR FAN_MARK_MOUNT ,
-all marks for mounts are removed from the group.
-If
-.I flags
-contains
-.BR FAN_MARK_FILESYSTEM ,
-all marks for filesystems are removed from the group.
-Otherwise, all marks for directories and files are removed.
-No flag other than, and at most one of, the flags
-.B FAN_MARK_MOUNT
-or
-.B FAN_MARK_FILESYSTEM
-can be used in conjunction with
-.BR FAN_MARK_FLUSH .
-.I mask
-is ignored.
-.P
-If none of the values above is specified, or more than one is specified,
-the call fails with the error
-.BR EINVAL .
-.P
-In addition,
-zero or more of the following values may be ORed into
-.IR flags :
-.TP
-.B FAN_MARK_DONT_FOLLOW
-If
-.I pathname
-is a symbolic link, mark the link itself, rather than the file to which it
-refers.
-(By default,
-.BR fanotify_mark ()
-dereferences
-.I pathname
-if it is a symbolic link.)
-.TP
-.B FAN_MARK_ONLYDIR
-If the filesystem object to be marked is not a directory, the error
-.B ENOTDIR
-shall be raised.
-.TP
-.B FAN_MARK_MOUNT
-Mark the mount specified by
-.IR pathname .
-If
-.I pathname
-is not itself a mount point, the mount containing
-.I pathname
-will be marked.
-All directories, subdirectories, and the contained files of the mount
-will be monitored.
-The events which require that filesystem objects are identified by file handles,
-such as
-.BR FAN_CREATE ,
-.BR FAN_ATTRIB ,
-.BR FAN_MOVE ,
-and
-.BR FAN_DELETE_SELF ,
-cannot be provided as a
-.I mask
-when
-.I flags
-contains
-.BR FAN_MARK_MOUNT .
-Attempting to do so will result in the error
-.B EINVAL
-being returned.
-Use of this flag requires the
-.B CAP_SYS_ADMIN
-capability.
-.TP
-.BR FAN_MARK_FILESYSTEM " (since Linux 4.20)"
-.\" commit d54f4fba889b205e9cd8239182ca5d27d0ac3bc2
-Mark the filesystem specified by
-.IR pathname .
-The filesystem containing
-.I pathname
-will be marked.
-All the contained files and directories of the filesystem from any mount
-point will be monitored.
-Use of this flag requires the
-.B CAP_SYS_ADMIN
-capability.
-.TP
-.B FAN_MARK_IGNORED_MASK
-The events in
-.I mask
-shall be added to or removed from the ignore mask.
-Note that the flags
-.BR FAN_ONDIR ,
-and
-.B FAN_EVENT_ON_CHILD
-have no effect when provided with this flag.
-The effect of setting the flags
-.BR FAN_ONDIR ,
-and
-.B FAN_EVENT_ON_CHILD
-in the mark mask
-on the events that are set in the ignore mask
-is undefined and depends on the Linux kernel version.
-Specifically, prior to Linux 5.9,
-.\" commit 497b0c5a7c0688c1b100a9c2e267337f677c198e
-setting a mark mask on a file
-and a mark with ignore mask on its parent directory
-would not result in ignoring events on the file,
-regardless of the
-.B FAN_EVENT_ON_CHILD
-flag in the parent directory's mark mask.
-When the ignore mask is updated with the
-.B FAN_MARK_IGNORED_MASK
-flag
-on a mark that was previously updated with the
-.B FAN_MARK_IGNORE
-flag,
-the update fails with
-.B EEXIST
-error.
-.TP
-.BR FAN_MARK_IGNORE " (since Linux 6.0)"
-.\" commit e252f2ed1c8c6c3884ab5dd34e003ed21f1fe6e0
-This flag has a similar effect as setting the
-.B FAN_MARK_IGNORED_MASK
-flag.
-The events in
-.I mask
-shall be added to or removed from the ignore mask.
-Unlike the
-.B FAN_MARK_IGNORED_MASK
-flag,
-this flag also has the effect that the
-.BR FAN_ONDIR ,
-and
-.B FAN_EVENT_ON_CHILD
-flags take effect on the ignore mask.
-Specifically, unless the
-.B FAN_ONDIR
-flag is set with
-.BR FAN_MARK_IGNORE ,
-events on directories will not be ignored.
-If the flag
-.B FAN_EVENT_ON_CHILD
-is set with
-.BR FAN_MARK_IGNORE ,
-events on children will be ignored.
-For example,
-a mark on a directory with combination of
-a mask with
-.B FAN_CREATE
-event
-and
-.B FAN_ONDIR
-flag
-and an ignore mask with
-.B FAN_CREATE
-event
-and without
-.B FAN_ONDIR
-flag,
-will result in getting only
-the events for creation of sub-directories.
-When using the
-.B FAN_MARK_IGNORE
-flag to add to an ignore mask
-of a mount,
-filesystem,
-or directory inode mark,
-the
-.B FAN_MARK_IGNORED_SURV_MODIFY
-flag must be specified.
-Failure to do so will results with
-.B EINVAL
-or
-.B EISDIR
-error.
-.TP
-.B FAN_MARK_IGNORED_SURV_MODIFY
-The ignore mask shall survive modify events.
-If this flag is not set,
-the ignore mask is cleared when a modify event occurs
-on the marked object.
-Omitting this flag is typically used to suppress events
-(e.g.,
-.BR FAN_OPEN )
-for a specific file,
-until that specific file's content has been modified.
-It is far less useful to suppress events
-on an entire filesystem,
-or mount,
-or on all files inside a directory,
-until some file's content has been modified.
-For this reason,
-the
-.B FAN_MARK_IGNORE
-flag requires the
-.B FAN_MARK_IGNORED_SURV_MODIFY
-flag on a mount,
-filesystem,
-or directory inode mark.
-This flag cannot be removed from a mark once set.
-When the ignore mask is updated without this flag
-on a mark that was previously updated with the
-.B FAN_MARK_IGNORE
-and
-.B FAN_MARK_IGNORED_SURV_MODIFY
-flags,
-the update fails with
-.B EEXIST
-error.
-.TP
-.B FAN_MARK_IGNORE_SURV
-This is a synonym for
-.RB ( FAN_MARK_IGNORE | FAN_MARK_IGNORED_SURV_MODIFY ).
-.TP
-.BR FAN_MARK_EVICTABLE " (since Linux 5.19)"
-.\" commit 5f9d3bd520261fd7a850818c71809fd580e0f30c
-When an inode mark is created with this flag,
-the inode object will not be pinned to the inode cache,
-therefore,
-allowing the inode object to be evicted from the inode cache
-when the memory pressure on the system is high.
-The eviction of the inode object
-results in the evictable mark also being lost.
-When the mask of an evictable inode mark is updated
-without using the
-.B FAN_MARK_EVICATBLE
-flag,
-the marked inode is pinned to inode cache
-and the mark is no longer evictable.
-When the mask of a non-evictable inode mark is updated
-with the
-.B FAN_MARK_EVICTABLE
-flag,
-the inode mark remains non-evictable
-and the update fails with
-.B EEXIST
-error.
-Mounts and filesystems are not evictable objects,
-therefore,
-an attempt to create a mount mark or a filesystem mark
-with the
-.B FAN_MARK_EVICTABLE
-flag,
-will result in the error
-.BR EINVAL .
-For example,
-inode marks can be used in combination with mount marks
-to reduce the amount of events from noninteresting paths.
-The event listener reads events,
-checks if the path reported in the event is of interest,
-and if it is not,
-the listener sets a mark with an ignore mask on the directory.
-Evictable inode marks allow using this method for a large number of directories
-without the concern of pinning all inodes and exhausting the system's memory.
-.P
-.I mask
-defines which events shall be listened for (or which shall be ignored).
-It is a bit mask composed of the following values:
-.TP
-.B FAN_ACCESS
-Create an event when a file or directory (but see BUGS) is accessed (read).
-.TP
-.B FAN_MODIFY
-Create an event when a file is modified (write).
-.TP
-.B FAN_CLOSE_WRITE
-Create an event when a writable file is closed.
-.TP
-.B FAN_CLOSE_NOWRITE
-Create an event when a read-only file or directory is closed.
-.TP
-.B FAN_OPEN
-Create an event when a file or directory is opened.
-.TP
-.BR FAN_OPEN_EXEC " (since Linux 5.0)"
-.\" commit 9b076f1c0f4869b838a1b7aa0edb5664d47ec8aa
-Create an event when a file is opened with the intent to be executed.
-See NOTES for additional details.
-.TP
-.BR FAN_ATTRIB " (since Linux 5.1)"
-.\" commit 235328d1fa4251c6dcb32351219bb553a58838d2
-Create an event when the metadata for a file or directory has changed.
-An fanotify group that identifies filesystem objects by file handles
-is required.
-.TP
-.BR FAN_CREATE " (since Linux 5.1)"
-.\" commit 235328d1fa4251c6dcb32351219bb553a58838d2
-Create an event when a file or directory has been created in a marked
-parent directory.
-An fanotify group that identifies filesystem objects by file handles
-is required.
-.TP
-.BR FAN_DELETE " (since Linux 5.1)"
-.\" commit 235328d1fa4251c6dcb32351219bb553a58838d2
-Create an event when a file or directory has been deleted in a marked
-parent directory.
-An fanotify group that identifies filesystem objects by file handles
-is required.
-.TP
-.BR FAN_DELETE_SELF " (since Linux 5.1)"
-.\" commit 235328d1fa4251c6dcb32351219bb553a58838d2
-Create an event when a marked file or directory itself is deleted.
-An fanotify group that identifies filesystem objects by file handles
-is required.
-.TP
-.BR FAN_FS_ERROR " (since Linux 5.16)"
-.\" commit 9709bd548f11a092d124698118013f66e1740f9b
-Create an event when a filesystem error
-leading to inconsistent filesystem metadata is detected.
-An additional information record of type
-.B FAN_EVENT_INFO_TYPE_ERROR
-is returned for each event in the read buffer.
-An fanotify group that identifies filesystem objects by file handles
-is required.
-.IP
-Events of such type are dependent on support
-from the underlying filesystem.
-At the time of writing,
-only the
-.B ext4
-filesystem reports
-.B FAN_FS_ERROR
-events.
-.IP
-See
-.BR fanotify (7)
-for additional details.
-.TP
-.BR FAN_MOVED_FROM " (since Linux 5.1)"
-.\" commit 235328d1fa4251c6dcb32351219bb553a58838d2
-Create an event when a file or directory has been moved from a marked
-parent directory.
-An fanotify group that identifies filesystem objects by file handles
-is required.
-.TP
-.BR FAN_MOVED_TO " (since Linux 5.1)"
-.\" commit 235328d1fa4251c6dcb32351219bb553a58838d2
-Create an event when a file or directory has been moved to a marked parent
-directory.
-An fanotify group that identifies filesystem objects by file handles
-is required.
-.TP
-.BR FAN_RENAME " (since Linux 5.17)"
-.\" commit 8cc3b1ccd930fe6971e1527f0c4f1bdc8cb56026
-This event contains the same information provided by events
-.B FAN_MOVED_FROM
-and
-.BR FAN_MOVED_TO ,
-however is represented by a single event with up to two information records.
-An fanotify group that identifies filesystem objects by file handles
-is required.
-If the filesystem object to be marked is not a directory, the error
-.B ENOTDIR
-shall be raised.
-.TP
-.BR FAN_MOVE_SELF " (since Linux 5.1)"
-.\" commit 235328d1fa4251c6dcb32351219bb553a58838d2
-Create an event when a marked file or directory itself has been moved.
-An fanotify group that identifies filesystem objects by file handles
-is required.
-.TP
-.B FAN_OPEN_PERM
-Create an event when a permission to open a file or directory is requested.
-An fanotify file descriptor created with
-.B FAN_CLASS_PRE_CONTENT
-or
-.B FAN_CLASS_CONTENT
-is required.
-.TP
-.BR FAN_OPEN_EXEC_PERM " (since Linux 5.0)"
-.\" commit 66917a3130f218dcef9eeab4fd11a71cd00cd7c9
-Create an event when a permission to open a file for execution is
-requested.
-An fanotify file descriptor created with
-.B FAN_CLASS_PRE_CONTENT
-or
-.B FAN_CLASS_CONTENT
-is required.
-See NOTES for additional details.
-.TP
-.B FAN_ACCESS_PERM
-Create an event when a permission to read a file or directory is requested.
-An fanotify file descriptor created with
-.B FAN_CLASS_PRE_CONTENT
-or
-.B FAN_CLASS_CONTENT
-is required.
-.TP
-.B FAN_ONDIR
-Create events for directories\[em]for example, when
-.BR opendir (3),
-.BR readdir (3)
-(but see BUGS), and
-.BR closedir (3)
-are called.
-Without this flag, events are created only for files.
-In the context of directory entry events, such as
-.BR FAN_CREATE ,
-.BR FAN_DELETE ,
-.BR FAN_MOVED_FROM ,
-and
-.BR FAN_MOVED_TO ,
-specifying the flag
-.B FAN_ONDIR
-is required in order to create events when subdirectory entries are
-modified (i.e.,
-.BR mkdir (2)/
-.BR rmdir (2)).
-.TP
-.B FAN_EVENT_ON_CHILD
-Events for the immediate children of marked directories shall be created.
-The flag has no effect when marking mounts and filesystems.
-Note that events are not generated for children of the subdirectories
-of marked directories.
-More specifically, the directory entry modification events
-.BR FAN_CREATE ,
-.BR FAN_DELETE ,
-.BR FAN_MOVED_FROM ,
-and
-.B FAN_MOVED_TO
-are not generated for any entry modifications performed inside subdirectories
-of marked directories.
-Note that the events
-.B FAN_DELETE_SELF
-and
-.B FAN_MOVE_SELF
-are not generated for children of marked directories.
-To monitor complete directory trees it is necessary to mark the relevant
-mount or filesystem.
-.P
-The following composed values are defined:
-.TP
-.B FAN_CLOSE
-A file is closed
-.RB ( FAN_CLOSE_WRITE | FAN_CLOSE_NOWRITE ).
-.TP
-.B FAN_MOVE
-A file or directory has been moved
-.RB ( FAN_MOVED_FROM | FAN_MOVED_TO ).
-.P
-The filesystem object to be marked is determined by the file descriptor
-.I dirfd
-and the pathname specified in
-.IR pathname :
-.IP \[bu] 3
-If
-.I pathname
-is NULL,
-.I dirfd
-defines the filesystem object to be marked.
-.IP \[bu]
-If
-.I pathname
-is NULL, and
-.I dirfd
-takes the special value
-.BR AT_FDCWD ,
-the current working directory is to be marked.
-.IP \[bu]
-If
-.I pathname
-is absolute, it defines the filesystem object to be marked, and
-.I dirfd
-is ignored.
-.IP \[bu]
-If
-.I pathname
-is relative, and
-.I dirfd
-does not have the value
-.BR AT_FDCWD ,
-then the filesystem object to be marked is determined by interpreting
-.I pathname
-relative the directory referred to by
-.IR dirfd .
-.IP \[bu]
-If
-.I pathname
-is relative, and
-.I dirfd
-has the value
-.BR AT_FDCWD ,
-then the filesystem object to be marked is determined by interpreting
-.I pathname
-relative to the current working directory.
-(See
-.BR openat (2)
-for an explanation of why the
-.I dirfd
-argument is useful.)
-.SH RETURN VALUE
-On success,
-.BR fanotify_mark ()
-returns 0.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EBADF
-An invalid file descriptor was passed in
-.IR fanotify_fd .
-.TP
-.B EBADF
-.I pathname
-is relative but
-.I dirfd
-is neither
-.B AT_FDCWD
-nor a valid file descriptor.
-.TP
-.B EEXIST
-The filesystem object indicated by
-.I dirfd
-and
-.I pathname
-has a mark that was updated without the
-.B FAN_MARK_EVICTABLE
-flag,
-and the user attempted to update the mark with
-.B FAN_MARK_EVICTABLE
-flag.
-.TP
-.B EEXIST
-The filesystem object indicated by
-.I dirfd
-and
-.I pathname
-has a mark that was updated with the
-.B FAN_MARK_IGNORE
-flag,
-and the user attempted to update the mark with
-.B FAN_MARK_IGNORED_MASK
-flag.
-.TP
-.B EEXIST
-The filesystem object indicated by
-.I dirfd
-and
-.I pathname
-has a mark that was updated with the
-.B FAN_MARK_IGNORE
-and
-.B FAN_MARK_IGNORED_SURV_MODIFY
-flags,
-and the user attempted to update the mark only with
-.B FAN_MARK_IGNORE
-flag.
-.TP
-.B EINVAL
-An invalid value was passed in
-.I flags
-or
-.IR mask ,
-or
-.I fanotify_fd
-was not an fanotify file descriptor.
-.TP
-.B EINVAL
-The fanotify file descriptor was opened with
-.B FAN_CLASS_NOTIF
-or the fanotify group identifies filesystem objects by file handles
-and mask contains a flag for permission events
-.RB ( FAN_OPEN_PERM
-or
-.BR FAN_ACCESS_PERM ).
-.TP
-.B EINVAL
-The group was initialized without
-.B FAN_REPORT_FID
-but one or more event types specified in the
-.I mask
-require it.
-.TP
-.B EINVAL
-.I flags
-contains
-.BR FAN_MARK_IGNORE ,
-and either
-.B FAN_MARK_MOUNT
-or
-.BR FAN_MARK_FILESYSTEM ,
-but does not contain
-.BR FAN_MARK_IGNORED_SURV_MODIFY .
-.TP
-.B EISDIR
-.I flags
-contains
-.BR FAN_MARK_IGNORE ,
-but does not contain
-.BR FAN_MARK_IGNORED_SURV_MODIFY ,
-and
-.I dirfd
-and
-.I pathname
-specify a directory.
-.TP
-.B ENODEV
-The filesystem object indicated by
-.I dirfd
-and
-.I pathname
-is not associated with a filesystem that supports
-.I fsid
-(e.g.,
-.BR fuse (4)).
-.BR tmpfs (5)
-did not support
-.I fsid
-prior to Linux 5.13.
-.\" commit 59cda49ecf6c9a32fae4942420701b6e087204f6
-This error can be returned only with an fanotify group that identifies
-filesystem objects by file handles.
-.TP
-.B ENOENT
-The filesystem object indicated by
-.I dirfd
-and
-.I pathname
-does not exist.
-This error also occurs when trying to remove a mark from an object
-which is not marked.
-.TP
-.B ENOMEM
-The necessary memory could not be allocated.
-.TP
-.B ENOSPC
-The number of marks for this user exceeds the limit and the
-.B FAN_UNLIMITED_MARKS
-flag was not specified when the fanotify file descriptor was created with
-.BR fanotify_init (2).
-See
-.BR fanotify (7)
-for details about this limit.
-.TP
-.B ENOSYS
-This kernel does not implement
-.BR fanotify_mark ().
-The fanotify API is available only if the kernel was configured with
-.BR CONFIG_FANOTIFY .
-.TP
-.B ENOTDIR
-.I flags
-contains
-.BR FAN_MARK_ONLYDIR ,
-and
-.I dirfd
-and
-.I pathname
-do not specify a directory.
-.TP
-.B ENOTDIR
-.I mask
-contains
-.BR FAN_RENAME ,
-and
-.I dirfd
-and
-.I pathname
-do not specify a directory.
-.TP
-.B ENOTDIR
-.I flags
-contains
-.BR FAN_MARK_IGNORE ,
-or the fanotify group was initialized with flag
-.BR FAN_REPORT_TARGET_FID ,
-and
-.I mask
-contains directory entry modification events
-(e.g.,
-.BR FAN_CREATE ,
-.BR FAN_DELETE ),
-or directory event flags
-(e.g.,
-.BR FAN_ONDIR ,
-.BR FAN_EVENT_ON_CHILD ),
-and
-.I dirfd
-and
-.I pathname
-do not specify a directory.
-.TP
-.B EOPNOTSUPP
-The object indicated by
-.I pathname
-is associated with a filesystem
-that does not support the encoding of file handles.
-This error can be returned only with an fanotify group that identifies
-filesystem objects by file handles.
-Calling
-.BR name_to_handle_at (2)
-with the flag
-.BR AT_HANDLE_FID " (since Linux 6.5)"
-.\" commit 96b2b072ee62be8ae68c8ecf14854c4d0505a8f8
-can be used as a test
-to check if a filesystem supports reporting events with file handles.
-.TP
-.B EPERM
-The operation is not permitted because the caller lacks a required capability.
-.TP
-.B EXDEV
-The filesystem object indicated by
-.I pathname
-resides within a filesystem subvolume (e.g.,
-.BR btrfs (5))
-which uses a different
-.I fsid
-than its root superblock.
-This error can be returned only with an fanotify group that identifies
-filesystem objects by file handles.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 2.6.37.
-.\" was introduced in Linux 2.6.36 and enabled in Linux 2.6.37.
-.SH NOTES
-.SS FAN_OPEN_EXEC and FAN_OPEN_EXEC_PERM
-When using either
-.B FAN_OPEN_EXEC
-or
-.B FAN_OPEN_EXEC_PERM
-within the
-.IR mask ,
-events of these types will be returned only when the direct execution of a
-program occurs.
-More specifically, this means that events of these types will be generated
-for files that are opened using
-.BR execve (2),
-.BR execveat (2),
-or
-.BR uselib (2).
-Events of these types will not be raised in the situation where an
-interpreter is passed (or reads) a file for interpretation.
-.P
-Additionally, if a mark has also been placed on the Linux dynamic
-linker, a user should also expect to receive an event for it when
-an ELF object has been successfully opened using
-.BR execve (2)
-or
-.BR execveat (2).
-.P
-For example, if the following ELF binary were to be invoked and a
-.B FAN_OPEN_EXEC
-mark has been placed on /:
-.P
-.in +4n
-.EX
-$ /bin/echo foo
-.EE
-.in
-.P
-The listening application in this case would receive
-.B FAN_OPEN_EXEC
-events for both the ELF binary and interpreter, respectively:
-.P
-.in +4n
-.EX
-/bin/echo
-/lib64/ld\-linux\-x86\-64.so.2
-.EE
-.in
-.SH BUGS
-The following bugs were present in before Linux 3.16:
-.IP \[bu] 3
-.\" Fixed by commit 0a8dd2db579f7a0ac7033d6b857c3d5dbaa77563
-If
-.I flags
-contains
-.BR FAN_MARK_FLUSH ,
-.IR dirfd ,
-and
-.I pathname
-must specify a valid filesystem object, even though this object is not used.
-.IP \[bu]
-.\" Fixed by commit d4c7cf6cffb1bc711a833b5e304ba5bcfe76398b
-.BR readdir (2)
-does not generate a
-.B FAN_ACCESS
-event.
-.IP \[bu]
-.\" Fixed by commit cc299a98eb13a9853675a9cbb90b30b4011e1406
-If
-.BR fanotify_mark ()
-is called with
-.BR FAN_MARK_FLUSH ,
-.I flags
-is not checked for invalid values.
-.SH SEE ALSO
-.BR fanotify_init (2),
-.BR fanotify (7)
diff --git a/man2/fattach.2 b/man2/fattach.2
deleted file mode 100644
index 5d25ea6ce..000000000
--- a/man2/fattach.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/unimplemented.2
diff --git a/man2/fchdir.2 b/man2/fchdir.2
deleted file mode 100644
index 60b9685da..000000000
--- a/man2/fchdir.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/chdir.2
diff --git a/man2/fchmod.2 b/man2/fchmod.2
deleted file mode 100644
index 92647d2e3..000000000
--- a/man2/fchmod.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/chmod.2
diff --git a/man2/fchmodat.2 b/man2/fchmodat.2
deleted file mode 100644
index 92647d2e3..000000000
--- a/man2/fchmodat.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/chmod.2
diff --git a/man2/fchown.2 b/man2/fchown.2
deleted file mode 100644
index f0a5635ae..000000000
--- a/man2/fchown.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/chown.2
diff --git a/man2/fchown32.2 b/man2/fchown32.2
deleted file mode 100644
index b8b9452b6..000000000
--- a/man2/fchown32.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/fchown.2
diff --git a/man2/fchownat.2 b/man2/fchownat.2
deleted file mode 100644
index f0a5635ae..000000000
--- a/man2/fchownat.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/chown.2
diff --git a/man2/fcntl.2 b/man2/fcntl.2
deleted file mode 100644
index 1f9b2d430..000000000
--- a/man2/fcntl.2
+++ /dev/null
@@ -1,2113 +0,0 @@
-.\" This manpage is Copyright (C) 1992 Drew Eckhardt;
-.\" and Copyright (C) 1993 Michael Haardt, Ian Jackson;
-.\" and Copyright (C) 1998 Jamie Lokier;
-.\" and Copyright (C) 2002-2010, 2014 Michael Kerrisk;
-.\" and Copyright (C) 2014 Jeff Layton
-.\" and Copyright (C) 2014 David Herrmann
-.\" and Copyright (C) 2017 Jens Axboe
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Modified 1993-07-24 by Rik Faith <faith@cs.unc.edu>
-.\" Modified 1995-09-26 by Andries Brouwer <aeb@cwi.nl>
-.\" and again on 960413 and 980804 and 981223.
-.\" Modified 1998-12-11 by Jamie Lokier <jamie@imbolc.ucc.ie>
-.\" Applied correction by Christian Ehrhardt - aeb, 990712
-.\" Modified 2002-04-23 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Added note on F_SETFL and O_DIRECT
-.\" Complete rewrite + expansion of material on file locking
-.\" Incorporated description of F_NOTIFY, drawing on
-.\" Stephen Rothwell's notes in Documentation/dnotify.txt.
-.\" Added description of F_SETLEASE and F_GETLEASE
-.\" Corrected and polished, aeb, 020527.
-.\" Modified 2004-03-03 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Modified description of file leases: fixed some errors of detail
-.\" Replaced the term "lease contestant" by "lease breaker"
-.\" Modified, 27 May 2004, Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Added notes on capability requirements
-.\" Modified 2004-12-08, added O_NOATIME after note from Martin Pool
-.\" 2004-12-10, mtk, noted F_GETOWN bug after suggestion from aeb.
-.\" 2005-04-08 Jamie Lokier <jamie@shareable.org>, mtk
-.\" Described behavior of F_SETOWN/F_SETSIG in
-.\" multithreaded processes, and generally cleaned
-.\" up the discussion of F_SETOWN.
-.\" 2005-05-20, Johannes Nicolai <johannes.nicolai@hpi.uni-potsdam.de>,
-.\" mtk: Noted F_SETOWN bug for socket file descriptor in Linux 2.4
-.\" and earlier. Added text on permissions required to send signal.
-.\" 2009-09-30, Michael Kerrisk
-.\" Note obsolete F_SETOWN behavior with threads.
-.\" Document F_SETOWN_EX and F_GETOWN_EX
-.\" 2010-06-17, Michael Kerrisk
-.\" Document F_SETPIPE_SZ and F_GETPIPE_SZ.
-.\" 2014-07-08, David Herrmann <dh.herrmann@gmail.com>
-.\" Document F_ADD_SEALS and F_GET_SEALS
-.\" 2017-06-26, Jens Axboe <axboe@kernel.dk>
-.\" Document F_{GET,SET}_RW_HINT and F_{GET,SET}_FILE_RW_HINT
-.\"
-.TH fcntl 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-fcntl \- manipulate file descriptor
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <fcntl.h>
-.P
-.BI "int fcntl(int " fd ", int " op ", ... /* " arg " */ );"
-.fi
-.SH DESCRIPTION
-.BR fcntl ()
-performs one of the operations described below on the open file descriptor
-.IR fd .
-The operation is determined by
-.IR op .
-.P
-.BR fcntl ()
-can take an optional third argument.
-Whether or not this argument is required is determined by
-.IR op .
-The required argument type is indicated in parentheses after each
-.I op
-name (in most cases, the required type is
-.IR int ,
-and we identify the argument using the name
-.IR arg ),
-or
-.I void
-is specified if the argument is not required.
-.P
-Certain of the operations below are supported only since a particular
-Linux kernel version.
-The preferred method of checking whether the host kernel supports
-a particular operation is to invoke
-.BR fcntl ()
-with the desired
-.I op
-value and then test whether the call failed with
-.BR EINVAL ,
-indicating that the kernel does not recognize this value.
-.SS Duplicating a file descriptor
-.TP
-.BR F_DUPFD " (\fIint\fP)"
-Duplicate the file descriptor
-.I fd
-using the lowest-numbered available file descriptor greater than or equal to
-.IR arg .
-This is different from
-.BR dup2 (2),
-which uses exactly the file descriptor specified.
-.IP
-On success, the new file descriptor is returned.
-.IP
-See
-.BR dup (2)
-for further details.
-.TP
-.BR F_DUPFD_CLOEXEC " (\fIint\fP; since Linux 2.6.24)"
-As for
-.BR F_DUPFD ,
-but additionally set the
-close-on-exec flag for the duplicate file descriptor.
-Specifying this flag permits a program to avoid an additional
-.BR fcntl ()
-.B F_SETFD
-operation to set the
-.B FD_CLOEXEC
-flag.
-For an explanation of why this flag is useful,
-see the description of
-.B O_CLOEXEC
-in
-.BR open (2).
-.SS File descriptor flags
-The following operations manipulate the flags associated with
-a file descriptor.
-Currently, only one such flag is defined:
-.BR FD_CLOEXEC ,
-the close-on-exec flag.
-If the
-.B FD_CLOEXEC
-bit is set,
-the file descriptor will automatically be closed during a successful
-.BR execve (2).
-(If the
-.BR execve (2)
-fails, the file descriptor is left open.)
-If the
-.B FD_CLOEXEC
-bit is not set, the file descriptor will remain open across an
-.BR execve (2).
-.TP
-.BR F_GETFD " (\fIvoid\fP)"
-Return (as the function result) the file descriptor flags;
-.I arg
-is ignored.
-.TP
-.BR F_SETFD " (\fIint\fP)"
-Set the file descriptor flags to the value specified by
-.IR arg .
-.P
-In multithreaded programs, using
-.BR fcntl ()
-.B F_SETFD
-to set the close-on-exec flag at the same time as another thread performs a
-.BR fork (2)
-plus
-.BR execve (2)
-is vulnerable to a race condition that may unintentionally leak
-the file descriptor to the program executed in the child process.
-See the discussion of the
-.B O_CLOEXEC
-flag in
-.BR open (2)
-for details and a remedy to the problem.
-.SS File status flags
-Each open file description has certain associated status flags,
-initialized by
-.BR open (2)
-.\" or
-.\" .BR creat (2),
-and possibly modified by
-.BR fcntl ().
-Duplicated file descriptors
-(made with
-.BR dup (2),
-.BR fcntl (F_DUPFD),
-.BR fork (2),
-etc.) refer to the same open file description, and thus
-share the same file status flags.
-.P
-The file status flags and their semantics are described in
-.BR open (2).
-.TP
-.BR F_GETFL " (\fIvoid\fP)"
-Return (as the function result)
-the file access mode and the file status flags;
-.I arg
-is ignored.
-.TP
-.BR F_SETFL " (\fIint\fP)"
-Set the file status flags to the value specified by
-.IR arg .
-File access mode
-.RB ( O_RDONLY ", " O_WRONLY ", " O_RDWR )
-and file creation flags
-(i.e.,
-.BR O_CREAT ", " O_EXCL ", " O_NOCTTY ", " O_TRUNC )
-in
-.I arg
-are ignored.
-On Linux, this operation can change only the
-.BR O_APPEND ,
-.BR O_ASYNC ,
-.BR O_DIRECT ,
-.BR O_NOATIME ,
-and
-.B O_NONBLOCK
-flags.
-It is not possible to change the
-.B O_DSYNC
-and
-.B O_SYNC
-flags; see BUGS, below.
-.SS Advisory record locking
-Linux implements traditional ("process-associated") UNIX record locks,
-as standardized by POSIX.
-For a Linux-specific alternative with better semantics,
-see the discussion of open file description locks below.
-.P
-.BR F_SETLK ,
-.BR F_SETLKW ,
-and
-.B F_GETLK
-are used to acquire, release, and test for the existence of record
-locks (also known as byte-range, file-segment, or file-region locks).
-The third argument,
-.IR lock ,
-is a pointer to a structure that has at least the following fields
-(in unspecified order).
-.P
-.in +4n
-.EX
-struct flock {
- ...
- short l_type; /* Type of lock: F_RDLCK,
- F_WRLCK, F_UNLCK */
- short l_whence; /* How to interpret l_start:
- SEEK_SET, SEEK_CUR, SEEK_END */
- off_t l_start; /* Starting offset for lock */
- off_t l_len; /* Number of bytes to lock */
- pid_t l_pid; /* PID of process blocking our lock
- (set by F_GETLK and F_OFD_GETLK) */
- ...
-};
-.EE
-.in
-.P
-The
-.IR l_whence ", " l_start ", and " l_len
-fields of this structure specify the range of bytes we wish to lock.
-Bytes past the end of the file may be locked,
-but not bytes before the start of the file.
-.P
-.I l_start
-is the starting offset for the lock, and is interpreted
-relative to either:
-the start of the file (if
-.I l_whence
-is
-.BR SEEK_SET );
-the current file offset (if
-.I l_whence
-is
-.BR SEEK_CUR );
-or the end of the file (if
-.I l_whence
-is
-.BR SEEK_END ).
-In the final two cases,
-.I l_start
-can be a negative number provided the
-offset does not lie before the start of the file.
-.P
-.I l_len
-specifies the number of bytes to be locked.
-If
-.I l_len
-is positive, then the range to be locked covers bytes
-.I l_start
-up to and including
-.IR l_start + l_len \-1.
-Specifying 0 for
-.I l_len
-has the special meaning: lock all bytes starting at the
-location specified by
-.IR l_whence " and " l_start
-through to the end of file, no matter how large the file grows.
-.P
-POSIX.1-2001 allows (but does not require)
-an implementation to support a negative
-.I l_len
-value; if
-.I l_len
-is negative, the interval described by
-.I lock
-covers bytes
-.IR l_start + l_len
-up to and including
-.IR l_start \-1.
-This is supported since Linux 2.4.21 and Linux 2.5.49.
-.P
-The
-.I l_type
-field can be used to place a read
-.RB ( F_RDLCK )
-or a write
-.RB ( F_WRLCK )
-lock on a file.
-Any number of processes may hold a read lock (shared lock)
-on a file region, but only one process may hold a write lock
-(exclusive lock).
-An exclusive lock excludes all other locks,
-both shared and exclusive.
-A single process can hold only one type of lock on a file region;
-if a new lock is applied to an already-locked region,
-then the existing lock is converted to the new lock type.
-(Such conversions may involve splitting, shrinking, or coalescing with
-an existing lock if the byte range specified by the new lock does not
-precisely coincide with the range of the existing lock.)
-.TP
-.BR F_SETLK " (\fIstruct flock *\fP)"
-Acquire a lock (when
-.I l_type
-is
-.B F_RDLCK
-or
-.BR F_WRLCK )
-or release a lock (when
-.I l_type
-is
-.BR F_UNLCK )
-on the bytes specified by the
-.IR l_whence ", " l_start ", and " l_len
-fields of
-.IR lock .
-If a conflicting lock is held by another process,
-this call returns \-1 and sets
-.I errno
-to
-.B EACCES
-or
-.BR EAGAIN .
-(The error returned in this case differs across implementations,
-so POSIX requires a portable application to check for both errors.)
-.TP
-.BR F_SETLKW " (\fIstruct flock *\fP)"
-As for
-.BR F_SETLK ,
-but if a conflicting lock is held on the file, then wait for that
-lock to be released.
-If a signal is caught while waiting, then the call is interrupted
-and (after the signal handler has returned)
-returns immediately (with return value \-1 and
-.I errno
-set to
-.BR EINTR ;
-see
-.BR signal (7)).
-.TP
-.BR F_GETLK " (\fIstruct flock *\fP)"
-On input to this call,
-.I lock
-describes a lock we would like to place on the file.
-If the lock could be placed,
-.BR fcntl ()
-does not actually place it, but returns
-.B F_UNLCK
-in the
-.I l_type
-field of
-.I lock
-and leaves the other fields of the structure unchanged.
-.IP
-If one or more incompatible locks would prevent
-this lock being placed, then
-.BR fcntl ()
-returns details about one of those locks in the
-.IR l_type ", " l_whence ", " l_start ", and " l_len
-fields of
-.IR lock .
-If the conflicting lock is a traditional (process-associated) record lock,
-then the
-.I l_pid
-field is set to the PID of the process holding that lock.
-If the conflicting lock is an open file description lock, then
-.I l_pid
-is set to \-1.
-Note that the returned information
-may already be out of date by the time the caller inspects it.
-.P
-In order to place a read lock,
-.I fd
-must be open for reading.
-In order to place a write lock,
-.I fd
-must be open for writing.
-To place both types of lock, open a file read-write.
-.P
-When placing locks with
-.BR F_SETLKW ,
-the kernel detects
-.IR deadlocks ,
-whereby two or more processes have their
-lock requests mutually blocked by locks held by the other processes.
-For example, suppose process A holds a write lock on byte 100 of a file,
-and process B holds a write lock on byte 200.
-If each process then attempts to lock the byte already
-locked by the other process using
-.BR F_SETLKW ,
-then, without deadlock detection,
-both processes would remain blocked indefinitely.
-When the kernel detects such deadlocks,
-it causes one of the blocking lock requests to immediately fail with the error
-.BR EDEADLK ;
-an application that encounters such an error should release
-some of its locks to allow other applications to proceed before
-attempting regain the locks that it requires.
-Circular deadlocks involving more than two processes are also detected.
-Note, however, that there are limitations to the kernel's
-deadlock-detection algorithm; see BUGS.
-.P
-As well as being removed by an explicit
-.BR F_UNLCK ,
-record locks are automatically released when the process terminates.
-.P
-Record locks are not inherited by a child created via
-.BR fork (2),
-but are preserved across an
-.BR execve (2).
-.P
-Because of the buffering performed by the
-.BR stdio (3)
-library, the use of record locking with routines in that package
-should be avoided; use
-.BR read (2)
-and
-.BR write (2)
-instead.
-.P
-The record locks described above are associated with the process
-(unlike the open file description locks described below).
-This has some unfortunate consequences:
-.IP \[bu] 3
-If a process closes
-.I any
-file descriptor referring to a file,
-then all of the process's locks on that file are released,
-regardless of the file descriptor(s) on which the locks were obtained.
-.\" (Additional file descriptors referring to the same file
-.\" may have been obtained by calls to
-.\" .BR open "(2), " dup "(2), " dup2 "(2), or " fcntl ().)
-This is bad: it means that a process can lose its locks on
-a file such as
-.I /etc/passwd
-or
-.I /etc/mtab
-when for some reason a library function decides to open, read,
-and close the same file.
-.IP \[bu]
-The threads in a process share locks.
-In other words,
-a multithreaded program can't use record locking to ensure
-that threads don't simultaneously access the same region of a file.
-.P
-Open file description locks solve both of these problems.
-.SS Open file description locks (non-POSIX)
-Open file description locks are advisory byte-range locks whose operation is
-in most respects identical to the traditional record locks described above.
-This lock type is Linux-specific,
-and available since Linux 3.15.
-(There is a proposal with the Austin Group
-.\" FIXME . Review progress into POSIX
-.\" http://austingroupbugs.net/view.php?id=768
-to include this lock type in the next revision of POSIX.1.)
-For an explanation of open file descriptions, see
-.BR open (2).
-.P
-The principal difference between the two lock types
-is that whereas traditional record locks
-are associated with a process,
-open file description locks are associated with the
-open file description on which they are acquired,
-much like locks acquired with
-.BR flock (2).
-Consequently (and unlike traditional advisory record locks),
-open file description locks are inherited across
-.BR fork (2)
-(and
-.BR clone (2)
-with
-.BR CLONE_FILES ),
-and are only automatically released on the last close
-of the open file description,
-instead of being released on any close of the file.
-.P
-Conflicting lock combinations
-(i.e., a read lock and a write lock or two write locks)
-where one lock is an open file description lock and the other
-is a traditional record lock conflict
-even when they are acquired by the same process on the same file descriptor.
-.P
-Open file description locks placed via the same open file description
-(i.e., via the same file descriptor,
-or via a duplicate of the file descriptor created by
-.BR fork (2),
-.BR dup (2),
-.BR fcntl ()
-.BR F_DUPFD ,
-and so on) are always compatible:
-if a new lock is placed on an already locked region,
-then the existing lock is converted to the new lock type.
-(Such conversions may result in splitting, shrinking, or coalescing with
-an existing lock as discussed above.)
-.P
-On the other hand, open file description locks may conflict with
-each other when they are acquired via different open file descriptions.
-Thus, the threads in a multithreaded program can use
-open file description locks to synchronize access to a file region
-by having each thread perform its own
-.BR open (2)
-on the file and applying locks via the resulting file descriptor.
-.P
-As with traditional advisory locks, the third argument to
-.BR fcntl (),
-.IR lock ,
-is a pointer to an
-.I flock
-structure.
-By contrast with traditional record locks, the
-.I l_pid
-field of that structure must be set to zero
-when using the operations described below.
-.P
-The operations for working with open file description locks are analogous
-to those used with traditional locks:
-.TP
-.BR F_OFD_SETLK " (\fIstruct flock *\fP)"
-Acquire an open file description lock (when
-.I l_type
-is
-.B F_RDLCK
-or
-.BR F_WRLCK )
-or release an open file description lock (when
-.I l_type
-is
-.BR F_UNLCK )
-on the bytes specified by the
-.IR l_whence ", " l_start ", and " l_len
-fields of
-.IR lock .
-If a conflicting lock is held by another process,
-this call returns \-1 and sets
-.I errno
-to
-.BR EAGAIN .
-.TP
-.BR F_OFD_SETLKW " (\fIstruct flock *\fP)"
-As for
-.BR F_OFD_SETLK ,
-but if a conflicting lock is held on the file, then wait for that lock to be
-released.
-If a signal is caught while waiting, then the call is interrupted
-and (after the signal handler has returned) returns immediately
-(with return value \-1 and
-.I errno
-set to
-.BR EINTR ;
-see
-.BR signal (7)).
-.TP
-.BR F_OFD_GETLK " (\fIstruct flock *\fP)"
-On input to this call,
-.I lock
-describes an open file description lock we would like to place on the file.
-If the lock could be placed,
-.BR fcntl ()
-does not actually place it, but returns
-.B F_UNLCK
-in the
-.I l_type
-field of
-.I lock
-and leaves the other fields of the structure unchanged.
-If one or more incompatible locks would prevent this lock being placed,
-then details about one of these locks are returned via
-.IR lock ,
-as described above for
-.BR F_GETLK .
-.P
-In the current implementation,
-.\" commit 57b65325fe34ec4c917bc4e555144b4a94d9e1f7
-no deadlock detection is performed for open file description locks.
-(This contrasts with process-associated record locks,
-for which the kernel does perform deadlock detection.)
-.\"
-.SS Mandatory locking
-.IR Warning :
-the Linux implementation of mandatory locking is unreliable.
-See BUGS below.
-Because of these bugs,
-and the fact that the feature is believed to be little used,
-since Linux 4.5, mandatory locking has been made an optional feature,
-governed by a configuration option
-.RB ( CONFIG_MANDATORY_FILE_LOCKING ).
-This feature is no longer supported at all in Linux 5.15 and above.
-.P
-By default, both traditional (process-associated) and open file description
-record locks are advisory.
-Advisory locks are not enforced and are useful only between
-cooperating processes.
-.P
-Both lock types can also be mandatory.
-Mandatory locks are enforced for all processes.
-If a process tries to perform an incompatible access (e.g.,
-.BR read (2)
-or
-.BR write (2))
-on a file region that has an incompatible mandatory lock,
-then the result depends upon whether the
-.B O_NONBLOCK
-flag is enabled for its open file description.
-If the
-.B O_NONBLOCK
-flag is not enabled, then
-the system call is blocked until the lock is removed
-or converted to a mode that is compatible with the access.
-If the
-.B O_NONBLOCK
-flag is enabled, then the system call fails with the error
-.BR EAGAIN .
-.P
-To make use of mandatory locks, mandatory locking must be enabled
-both on the filesystem that contains the file to be locked,
-and on the file itself.
-Mandatory locking is enabled on a filesystem
-using the "\-o mand" option to
-.BR mount (8),
-or the
-.B MS_MANDLOCK
-flag for
-.BR mount (2).
-Mandatory locking is enabled on a file by disabling
-group execute permission on the file and enabling the set-group-ID
-permission bit (see
-.BR chmod (1)
-and
-.BR chmod (2)).
-.P
-Mandatory locking is not specified by POSIX.
-Some other systems also support mandatory locking,
-although the details of how to enable it vary across systems.
-.\"
-.SS Lost locks
-When an advisory lock is obtained on a networked filesystem such as
-NFS it is possible that the lock might get lost.
-This may happen due to administrative action on the server, or due to a
-network partition (i.e., loss of network connectivity with the server)
-which lasts long enough for the server to assume
-that the client is no longer functioning.
-.P
-When the filesystem determines that a lock has been lost, future
-.BR read (2)
-or
-.BR write (2)
-requests may fail with the error
-.BR EIO .
-This error will persist until the lock is removed or the file
-descriptor is closed.
-Since Linux 3.12,
-.\" commit ef1820f9be27b6ad158f433ab38002ab8131db4d
-this happens at least for NFSv4 (including all minor versions).
-.P
-Some versions of UNIX send a signal
-.RB ( SIGLOST )
-in this circumstance.
-Linux does not define this signal, and does not provide any
-asynchronous notification of lost locks.
-.\"
-.SS Managing signals
-.BR F_GETOWN ,
-.BR F_SETOWN ,
-.BR F_GETOWN_EX ,
-.BR F_SETOWN_EX ,
-.BR F_GETSIG ,
-and
-.B F_SETSIG
-are used to manage I/O availability signals:
-.TP
-.BR F_GETOWN " (\fIvoid\fP)"
-Return (as the function result)
-the process ID or process group ID currently receiving
-.B SIGIO
-and
-.B SIGURG
-signals for events on file descriptor
-.IR fd .
-Process IDs are returned as positive values;
-process group IDs are returned as negative values (but see BUGS below).
-.I arg
-is ignored.
-.TP
-.BR F_SETOWN " (\fIint\fP)"
-Set the process ID or process group ID that will receive
-.B SIGIO
-and
-.B SIGURG
-signals for events on the file descriptor
-.IR fd .
-The target process or process group ID is specified in
-.IR arg .
-A process ID is specified as a positive value;
-a process group ID is specified as a negative value.
-Most commonly, the calling process specifies itself as the owner
-(that is,
-.I arg
-is specified as
-.BR getpid (2)).
-.IP
-As well as setting the file descriptor owner,
-one must also enable generation of signals on the file descriptor.
-This is done by using the
-.BR fcntl ()
-.B F_SETFL
-operation to set the
-.B O_ASYNC
-file status flag on the file descriptor.
-Subsequently, a
-.B SIGIO
-signal is sent whenever input or output becomes possible
-on the file descriptor.
-The
-.BR fcntl ()
-.B F_SETSIG
-operation can be used to obtain delivery of a signal other than
-.BR SIGIO .
-.IP
-Sending a signal to the owner process (group) specified by
-.B F_SETOWN
-is subject to the same permissions checks as are described for
-.BR kill (2),
-where the sending process is the one that employs
-.B F_SETOWN
-(but see BUGS below).
-If this permission check fails, then the signal is
-silently discarded.
-.IR Note :
-The
-.B F_SETOWN
-operation records the caller's credentials at the time of the
-.BR fcntl ()
-call,
-and it is these saved credentials that are used for the permission checks.
-.IP
-If the file descriptor
-.I fd
-refers to a socket,
-.B F_SETOWN
-also selects
-the recipient of
-.B SIGURG
-signals that are delivered when out-of-band
-data arrives on that socket.
-.RB ( SIGURG
-is sent in any situation where
-.BR select (2)
-would report the socket as having an "exceptional condition".)
-.\" The following appears to be rubbish. It doesn't seem to
-.\" be true according to the kernel source, and I can write
-.\" a program that gets a terminal-generated SIGIO even though
-.\" it is not the foreground process group of the terminal.
-.\" -- MTK, 8 Apr 05
-.\"
-.\" If the file descriptor
-.\" .I fd
-.\" refers to a terminal device, then SIGIO
-.\" signals are sent to the foreground process group of the terminal.
-.IP
-The following was true in Linux 2.6.x up to and including Linux 2.6.11:
-.RS
-.IP
-If a nonzero value is given to
-.B F_SETSIG
-in a multithreaded process running with a threading library
-that supports thread groups (e.g., NPTL),
-then a positive value given to
-.B F_SETOWN
-has a different meaning:
-.\" The relevant place in the (2.6) kernel source is the
-.\" 'switch' in fs/fcntl.c::send_sigio_to_task() -- MTK, Apr 2005
-instead of being a process ID identifying a whole process,
-it is a thread ID identifying a specific thread within a process.
-Consequently, it may be necessary to pass
-.B F_SETOWN
-the result of
-.BR gettid (2)
-instead of
-.BR getpid (2)
-to get sensible results when
-.B F_SETSIG
-is used.
-(In current Linux threading implementations,
-a main thread's thread ID is the same as its process ID.
-This means that a single-threaded program can equally use
-.BR gettid (2)
-or
-.BR getpid (2)
-in this scenario.)
-Note, however, that the statements in this paragraph do not apply
-to the
-.B SIGURG
-signal generated for out-of-band data on a socket:
-this signal is always sent to either a process or a process group,
-depending on the value given to
-.BR F_SETOWN .
-.\" send_sigurg()/send_sigurg_to_task() bypasses
-.\" kill_fasync()/send_sigio()/send_sigio_to_task()
-.\" to directly call send_group_sig_info()
-.\" -- MTK, Apr 2005 (kernel 2.6.11)
-.RE
-.IP
-The above behavior was accidentally dropped in Linux 2.6.12,
-and won't be restored.
-From Linux 2.6.32 onward, use
-.B F_SETOWN_EX
-to target
-.B SIGIO
-and
-.B SIGURG
-signals at a particular thread.
-.TP
-.BR F_GETOWN_EX " (\fIstruct f_owner_ex *\fP) (since Linux 2.6.32)"
-Return the current file descriptor owner settings
-as defined by a previous
-.B F_SETOWN_EX
-operation.
-The information is returned in the structure pointed to by
-.IR arg ,
-which has the following form:
-.IP
-.in +4n
-.EX
-struct f_owner_ex {
- int type;
- pid_t pid;
-};
-.EE
-.in
-.IP
-The
-.I type
-field will have one of the values
-.BR F_OWNER_TID ,
-.BR F_OWNER_PID ,
-or
-.BR F_OWNER_PGRP .
-The
-.I pid
-field is a positive integer representing a thread ID, process ID,
-or process group ID.
-See
-.B F_SETOWN_EX
-for more details.
-.TP
-.BR F_SETOWN_EX " (\fIstruct f_owner_ex *\fP) (since Linux 2.6.32)"
-This operation performs a similar task to
-.BR F_SETOWN .
-It allows the caller to direct I/O availability signals
-to a specific thread, process, or process group.
-The caller specifies the target of signals via
-.IR arg ,
-which is a pointer to a
-.I f_owner_ex
-structure.
-The
-.I type
-field has one of the following values, which define how
-.I pid
-is interpreted:
-.RS
-.TP
-.B F_OWNER_TID
-Send the signal to the thread whose thread ID
-(the value returned by a call to
-.BR clone (2)
-or
-.BR gettid (2))
-is specified in
-.IR pid .
-.TP
-.B F_OWNER_PID
-Send the signal to the process whose ID
-is specified in
-.IR pid .
-.TP
-.B F_OWNER_PGRP
-Send the signal to the process group whose ID
-is specified in
-.IR pid .
-(Note that, unlike with
-.BR F_SETOWN ,
-a process group ID is specified as a positive value here.)
-.RE
-.TP
-.BR F_GETSIG " (\fIvoid\fP)"
-Return (as the function result)
-the signal sent when input or output becomes possible.
-A value of zero means
-.B SIGIO
-is sent.
-Any other value (including
-.BR SIGIO )
-is the
-signal sent instead, and in this case additional info is available to
-the signal handler if installed with
-.BR SA_SIGINFO .
-.I arg
-is ignored.
-.TP
-.BR F_SETSIG " (\fIint\fP)"
-Set the signal sent when input or output becomes possible
-to the value given in
-.IR arg .
-A value of zero means to send the default
-.B SIGIO
-signal.
-Any other value (including
-.BR SIGIO )
-is the signal to send instead, and in this case additional info
-is available to the signal handler if installed with
-.BR SA_SIGINFO .
-.\"
-.\" The following was true only up until Linux 2.6.11:
-.\"
-.\" Additionally, passing a nonzero value to
-.\" .B F_SETSIG
-.\" changes the signal recipient from a whole process to a specific thread
-.\" within a process.
-.\" See the description of
-.\" .B F_SETOWN
-.\" for more details.
-.IP
-By using
-.B F_SETSIG
-with a nonzero value, and setting
-.B SA_SIGINFO
-for the
-signal handler (see
-.BR sigaction (2)),
-extra information about I/O events is passed to
-the handler in a
-.I siginfo_t
-structure.
-If the
-.I si_code
-field indicates the source is
-.BR SI_SIGIO ,
-the
-.I si_fd
-field gives the file descriptor associated with the event.
-Otherwise,
-there is no indication which file descriptors are pending, and you
-should use the usual mechanisms
-.RB ( select (2),
-.BR poll (2),
-.BR read (2)
-with
-.B O_NONBLOCK
-set etc.) to determine which file descriptors are available for I/O.
-.IP
-Note that the file descriptor provided in
-.I si_fd
-is the one that was specified during the
-.B F_SETSIG
-operation.
-This can lead to an unusual corner case.
-If the file descriptor is duplicated
-.RB ( dup (2)
-or similar), and the original file descriptor is closed,
-then I/O events will continue to be generated, but the
-.I si_fd
-field will contain the number of the now closed file descriptor.
-.IP
-By selecting a real time signal (value >=
-.BR SIGRTMIN ),
-multiple I/O events may be queued using the same signal numbers.
-(Queuing is dependent on available memory.)
-Extra information is available
-if
-.B SA_SIGINFO
-is set for the signal handler, as above.
-.IP
-Note that Linux imposes a limit on the
-number of real-time signals that may be queued to a
-process (see
-.BR getrlimit (2)
-and
-.BR signal (7))
-and if this limit is reached, then the kernel reverts to
-delivering
-.BR SIGIO ,
-and this signal is delivered to the entire
-process rather than to a specific thread.
-.\" See fs/fcntl.c::send_sigio_to_task() (2.4/2.6) sources -- MTK, Apr 05
-.P
-Using these mechanisms, a program can implement fully asynchronous I/O
-without using
-.BR select (2)
-or
-.BR poll (2)
-most of the time.
-.P
-The use of
-.B O_ASYNC
-is specific to BSD and Linux.
-The only use of
-.B F_GETOWN
-and
-.B F_SETOWN
-specified in POSIX.1 is in conjunction with the use of the
-.B SIGURG
-signal on sockets.
-(POSIX does not specify the
-.B SIGIO
-signal.)
-.BR F_GETOWN_EX ,
-.BR F_SETOWN_EX ,
-.BR F_GETSIG ,
-and
-.B F_SETSIG
-are Linux-specific.
-POSIX has asynchronous I/O and the
-.I aio_sigevent
-structure to achieve similar things; these are also available
-in Linux as part of the GNU C Library (glibc).
-.SS Leases
-.B F_SETLEASE
-and
-.B F_GETLEASE
-(Linux 2.4 onward) are used to establish a new lease,
-and retrieve the current lease, on the open file description
-referred to by the file descriptor
-.IR fd .
-A file lease provides a mechanism whereby the process holding
-the lease (the "lease holder") is notified (via delivery of a signal)
-when a process (the "lease breaker") tries to
-.BR open (2)
-or
-.BR truncate (2)
-the file referred to by that file descriptor.
-.TP
-.BR F_SETLEASE " (\fIint\fP)"
-Set or remove a file lease according to which of the following
-values is specified in the integer
-.IR arg :
-.RS
-.TP
-.B F_RDLCK
-Take out a read lease.
-This will cause the calling process to be notified when
-the file is opened for writing or is truncated.
-.\" The following became true in Linux 2.6.10:
-.\" See the man-pages-2.09 Changelog for further info.
-A read lease can be placed only on a file descriptor that
-is opened read-only.
-.TP
-.B F_WRLCK
-Take out a write lease.
-This will cause the caller to be notified when
-the file is opened for reading or writing or is truncated.
-A write lease may be placed on a file only if there are no
-other open file descriptors for the file.
-.TP
-.B F_UNLCK
-Remove our lease from the file.
-.RE
-.P
-Leases are associated with an open file description (see
-.BR open (2)).
-This means that duplicate file descriptors (created by, for example,
-.BR fork (2)
-or
-.BR dup (2))
-refer to the same lease, and this lease may be modified
-or released using any of these descriptors.
-Furthermore, the lease is released by either an explicit
-.B F_UNLCK
-operation on any of these duplicate file descriptors, or when all
-such file descriptors have been closed.
-.P
-Leases may be taken out only on regular files.
-An unprivileged process may take out a lease only on a file whose
-UID (owner) matches the filesystem UID of the process.
-A process with the
-.B CAP_LEASE
-capability may take out leases on arbitrary files.
-.TP
-.BR F_GETLEASE " (\fIvoid\fP)"
-Indicates what type of lease is associated with the file descriptor
-.I fd
-by returning either
-.BR F_RDLCK ", " F_WRLCK ", or " F_UNLCK ,
-indicating, respectively, a read lease , a write lease, or no lease.
-.I arg
-is ignored.
-.P
-When a process (the "lease breaker") performs an
-.BR open (2)
-or
-.BR truncate (2)
-that conflicts with a lease established via
-.BR F_SETLEASE ,
-the system call is blocked by the kernel and
-the kernel notifies the lease holder by sending it a signal
-.RB ( SIGIO
-by default).
-The lease holder should respond to receipt of this signal by doing
-whatever cleanup is required in preparation for the file to be
-accessed by another process (e.g., flushing cached buffers) and
-then either remove or downgrade its lease.
-A lease is removed by performing an
-.B F_SETLEASE
-operation specifying
-.I arg
-as
-.BR F_UNLCK .
-If the lease holder currently holds a write lease on the file,
-and the lease breaker is opening the file for reading,
-then it is sufficient for the lease holder to downgrade
-the lease to a read lease.
-This is done by performing an
-.B F_SETLEASE
-operation specifying
-.I arg
-as
-.BR F_RDLCK .
-.P
-If the lease holder fails to downgrade or remove the lease within
-the number of seconds specified in
-.IR /proc/sys/fs/lease\-break\-time ,
-then the kernel forcibly removes or downgrades the lease holder's lease.
-.P
-Once a lease break has been initiated,
-.B F_GETLEASE
-returns the target lease type (either
-.B F_RDLCK
-or
-.BR F_UNLCK ,
-depending on what would be compatible with the lease breaker)
-until the lease holder voluntarily downgrades or removes the lease or
-the kernel forcibly does so after the lease break timer expires.
-.P
-Once the lease has been voluntarily or forcibly removed or downgraded,
-and assuming the lease breaker has not unblocked its system call,
-the kernel permits the lease breaker's system call to proceed.
-.P
-If the lease breaker's blocked
-.BR open (2)
-or
-.BR truncate (2)
-is interrupted by a signal handler,
-then the system call fails with the error
-.BR EINTR ,
-but the other steps still occur as described above.
-If the lease breaker is killed by a signal while blocked in
-.BR open (2)
-or
-.BR truncate (2),
-then the other steps still occur as described above.
-If the lease breaker specifies the
-.B O_NONBLOCK
-flag when calling
-.BR open (2),
-then the call immediately fails with the error
-.BR EWOULDBLOCK ,
-but the other steps still occur as described above.
-.P
-The default signal used to notify the lease holder is
-.BR SIGIO ,
-but this can be changed using the
-.B F_SETSIG
-operation to
-.BR fcntl ().
-If a
-.B F_SETSIG
-operation is performed (even one specifying
-.BR SIGIO ),
-and the signal
-handler is established using
-.BR SA_SIGINFO ,
-then the handler will receive a
-.I siginfo_t
-structure as its second argument, and the
-.I si_fd
-field of this argument will hold the file descriptor of the leased file
-that has been accessed by another process.
-(This is useful if the caller holds leases against multiple files.)
-.SS File and directory change notification (dnotify)
-.TP
-.BR F_NOTIFY " (\fIint\fP)"
-(Linux 2.4 onward)
-Provide notification when the directory referred to by
-.I fd
-or any of the files that it contains is changed.
-The events to be notified are specified in
-.IR arg ,
-which is a bit mask specified by ORing together zero or more of
-the following bits:
-.P
-.RS
-.PD 0
-.TP
-.B DN_ACCESS
-A file was accessed
-.RB ( read (2),
-.BR pread (2),
-.BR readv (2),
-and similar)
-.TP
-.B DN_MODIFY
-A file was modified
-.RB ( write (2),
-.BR pwrite (2),
-.BR writev (2),
-.BR truncate (2),
-.BR ftruncate (2),
-and similar).
-.TP
-.B DN_CREATE
-A file was created
-.RB ( open (2),
-.BR creat (2),
-.BR mknod (2),
-.BR mkdir (2),
-.BR link (2),
-.BR symlink (2),
-.BR rename (2)
-into this directory).
-.TP
-.B DN_DELETE
-A file was unlinked
-.RB ( unlink (2),
-.BR rename (2)
-to another directory,
-.BR rmdir (2)).
-.TP
-.B DN_RENAME
-A file was renamed within this directory
-.RB ( rename (2)).
-.TP
-.B DN_ATTRIB
-The attributes of a file were changed
-.RB ( chown (2),
-.BR chmod (2),
-.BR utime (2),
-.BR utimensat (2),
-and similar).
-.PD
-.RE
-.IP
-(In order to obtain these definitions, the
-.B _GNU_SOURCE
-feature test macro must be defined before including
-.I any
-header files.)
-.IP
-Directory notifications are normally "one-shot", and the application
-must reregister to receive further notifications.
-Alternatively, if
-.B DN_MULTISHOT
-is included in
-.IR arg ,
-then notification will remain in effect until explicitly removed.
-.IP
-.\" The following does seem a poor API-design choice...
-A series of
-.B F_NOTIFY
-requests is cumulative, with the events in
-.I arg
-being added to the set already monitored.
-To disable notification of all events, make an
-.B F_NOTIFY
-call specifying
-.I arg
-as 0.
-.IP
-Notification occurs via delivery of a signal.
-The default signal is
-.BR SIGIO ,
-but this can be changed using the
-.B F_SETSIG
-operation to
-.BR fcntl ().
-(Note that
-.B SIGIO
-is one of the nonqueuing standard signals;
-switching to the use of a real-time signal means that
-multiple notifications can be queued to the process.)
-In the latter case, the signal handler receives a
-.I siginfo_t
-structure as its second argument (if the handler was
-established using
-.BR SA_SIGINFO )
-and the
-.I si_fd
-field of this structure contains the file descriptor which
-generated the notification (useful when establishing notification
-on multiple directories).
-.IP
-Especially when using
-.BR DN_MULTISHOT ,
-a real time signal should be used for notification,
-so that multiple notifications can be queued.
-.IP
-.B NOTE:
-New applications should use the
-.I inotify
-interface (available since Linux 2.6.13),
-which provides a much superior interface for obtaining notifications of
-filesystem events.
-See
-.BR inotify (7).
-.SS Changing the capacity of a pipe
-.TP
-.BR F_SETPIPE_SZ " (\fIint\fP; since Linux 2.6.35)"
-Change the capacity of the pipe referred to by
-.I fd
-to be at least
-.I arg
-bytes.
-An unprivileged process can adjust the pipe capacity to any value
-between the system page size and the limit defined in
-.I /proc/sys/fs/pipe\-max\-size
-(see
-.BR proc (5)).
-Attempts to set the pipe capacity below the page size are silently
-rounded up to the page size.
-Attempts by an unprivileged process to set the pipe capacity above the limit in
-.I /proc/sys/fs/pipe\-max\-size
-yield the error
-.BR EPERM ;
-a privileged process
-.RB ( CAP_SYS_RESOURCE )
-can override the limit.
-.IP
-When allocating the buffer for the pipe,
-the kernel may use a capacity larger than
-.IR arg ,
-if that is convenient for the implementation.
-(In the current implementation,
-the allocation is the next higher power-of-two page-size multiple
-of the requested size.)
-The actual capacity (in bytes) that is set is returned as the function result.
-.IP
-Attempting to set the pipe capacity smaller than the amount
-of buffer space currently used to store data produces the error
-.BR EBUSY .
-.IP
-Note that because of the way the pages of the pipe buffer
-are employed when data is written to the pipe,
-the number of bytes that can be written may be less than the nominal size,
-depending on the size of the writes.
-.TP
-.BR F_GETPIPE_SZ " (\fIvoid\fP; since Linux 2.6.35)"
-Return (as the function result) the capacity of the pipe referred to by
-.IR fd .
-.\"
-.SS File Sealing
-File seals limit the set of allowed operations on a given file.
-For each seal that is set on a file,
-a specific set of operations will fail with
-.B EPERM
-on this file from now on.
-The file is said to be sealed.
-The default set of seals depends on the type of the underlying
-file and filesystem.
-For an overview of file sealing, a discussion of its purpose,
-and some code examples, see
-.BR memfd_create (2).
-.P
-Currently,
-file seals can be applied only to a file descriptor returned by
-.BR memfd_create (2)
-(if the
-.B MFD_ALLOW_SEALING
-was employed).
-On other filesystems, all
-.BR fcntl ()
-operations that operate on seals will return
-.BR EINVAL .
-.P
-Seals are a property of an inode.
-Thus, all open file descriptors referring to the same inode share
-the same set of seals.
-Furthermore, seals can never be removed, only added.
-.TP
-.BR F_ADD_SEALS " (\fIint\fP; since Linux 3.17)"
-Add the seals given in the bit-mask argument
-.I arg
-to the set of seals of the inode referred to by the file descriptor
-.IR fd .
-Seals cannot be removed again.
-Once this call succeeds, the seals are enforced by the kernel immediately.
-If the current set of seals includes
-.B F_SEAL_SEAL
-(see below), then this call will be rejected with
-.BR EPERM .
-Adding a seal that is already set is a no-op, in case
-.B F_SEAL_SEAL
-is not set already.
-In order to place a seal, the file descriptor
-.I fd
-must be writable.
-.TP
-.BR F_GET_SEALS " (\fIvoid\fP; since Linux 3.17)"
-Return (as the function result) the current set of seals
-of the inode referred to by
-.IR fd .
-If no seals are set, 0 is returned.
-If the file does not support sealing, \-1 is returned and
-.I errno
-is set to
-.BR EINVAL .
-.P
-The following seals are available:
-.TP
-.B F_SEAL_SEAL
-If this seal is set, any further call to
-.BR fcntl ()
-with
-.B F_ADD_SEALS
-fails with the error
-.BR EPERM .
-Therefore, this seal prevents any modifications to the set of seals itself.
-If the initial set of seals of a file includes
-.BR F_SEAL_SEAL ,
-then this effectively causes the set of seals to be constant and locked.
-.TP
-.B F_SEAL_SHRINK
-If this seal is set, the file in question cannot be reduced in size.
-This affects
-.BR open (2)
-with the
-.B O_TRUNC
-flag as well as
-.BR truncate (2)
-and
-.BR ftruncate (2).
-Those calls fail with
-.B EPERM
-if you try to shrink the file in question.
-Increasing the file size is still possible.
-.TP
-.B F_SEAL_GROW
-If this seal is set, the size of the file in question cannot be increased.
-This affects
-.BR write (2)
-beyond the end of the file,
-.BR truncate (2),
-.BR ftruncate (2),
-and
-.BR fallocate (2).
-These calls fail with
-.B EPERM
-if you use them to increase the file size.
-If you keep the size or shrink it, those calls still work as expected.
-.TP
-.B F_SEAL_WRITE
-If this seal is set, you cannot modify the contents of the file.
-Note that shrinking or growing the size of the file is
-still possible and allowed.
-.\" One or more other seals are typically used with F_SEAL_WRITE
-.\" because, given a file with the F_SEAL_WRITE seal set, then,
-.\" while it would no longer be possible to (say) write zeros into
-.\" the last 100 bytes of a file, it would still be possible
-.\" to (say) shrink the file by 100 bytes using ftruncate(), and
-.\" then increase the file size by 100 bytes, which would have
-.\" the effect of replacing the last hundred bytes by zeros.
-.\"
-Thus, this seal is normally used in combination with one of the other seals.
-This seal affects
-.BR write (2)
-and
-.BR fallocate (2)
-(only in combination with the
-.B FALLOC_FL_PUNCH_HOLE
-flag).
-Those calls fail with
-.B EPERM
-if this seal is set.
-Furthermore, trying to create new shared, writable memory-mappings via
-.BR mmap (2)
-will also fail with
-.BR EPERM .
-.IP
-Using the
-.B F_ADD_SEALS
-operation to set the
-.B F_SEAL_WRITE
-seal fails with
-.B EBUSY
-if any writable, shared mapping exists.
-Such mappings must be unmapped before you can add this seal.
-Furthermore, if there are any asynchronous I/O operations
-.RB ( io_submit (2))
-pending on the file,
-all outstanding writes will be discarded.
-.TP
-.BR F_SEAL_FUTURE_WRITE " (since Linux 5.1)"
-The effect of this seal is similar to
-.BR F_SEAL_WRITE ,
-but the contents of the file can still be modified via
-shared writable mappings that were created prior to the seal being set.
-Any attempt to create a new writable mapping on the file via
-.BR mmap (2)
-will fail with
-.BR EPERM .
-Likewise, an attempt to write to the file via
-.BR write (2)
-will fail with
-.BR EPERM .
-.IP
-Using this seal,
-one process can create a memory buffer that it can continue to modify
-while sharing that buffer on a "read-only" basis with other processes.
-.\"
-.SS File read/write hints
-Write lifetime hints can be used to inform the kernel about the relative
-expected lifetime of writes on a given inode or
-via a particular open file description.
-(See
-.BR open (2)
-for an explanation of open file descriptions.)
-In this context, the term "write lifetime" means
-the expected time the data will live on media, before
-being overwritten or erased.
-.P
-An application may use the different hint values specified below to
-separate writes into different write classes,
-so that multiple users or applications running on a single storage back-end
-can aggregate their I/O patterns in a consistent manner.
-However, there are no functional semantics implied by these flags,
-and different I/O classes can use the write lifetime hints
-in arbitrary ways, so long as the hints are used consistently.
-.P
-The following operations can be applied to the file descriptor,
-.IR fd :
-.TP
-.BR F_GET_RW_HINT " (\fIuint64_t *\fP; since Linux 4.13)"
-Returns the value of the read/write hint associated with the underlying inode
-referred to by
-.IR fd .
-.TP
-.BR F_SET_RW_HINT " (\fIuint64_t *\fP; since Linux 4.13)"
-Sets the read/write hint value associated with the
-underlying inode referred to by
-.IR fd .
-This hint persists until either it is explicitly modified or
-the underlying filesystem is unmounted.
-.TP
-.BR F_GET_FILE_RW_HINT " (\fIuint64_t *\fP; since Linux 4.13)"
-Returns the value of the read/write hint associated with
-the open file description referred to by
-.IR fd .
-.TP
-.BR F_SET_FILE_RW_HINT " (\fIuint64_t *\fP; since Linux 4.13)"
-Sets the read/write hint value associated with the open file description
-referred to by
-.IR fd .
-.P
-If an open file description has not been assigned a read/write hint,
-then it shall use the value assigned to the inode, if any.
-.P
-The following read/write
-hints are valid since Linux 4.13:
-.TP
-.B RWH_WRITE_LIFE_NOT_SET
-No specific hint has been set.
-This is the default value.
-.TP
-.B RWH_WRITE_LIFE_NONE
-No specific write lifetime is associated with this file or inode.
-.TP
-.B RWH_WRITE_LIFE_SHORT
-Data written to this inode or via this open file description
-is expected to have a short lifetime.
-.TP
-.B RWH_WRITE_LIFE_MEDIUM
-Data written to this inode or via this open file description
-is expected to have a lifetime longer than
-data written with
-.BR RWH_WRITE_LIFE_SHORT .
-.TP
-.B RWH_WRITE_LIFE_LONG
-Data written to this inode or via this open file description
-is expected to have a lifetime longer than
-data written with
-.BR RWH_WRITE_LIFE_MEDIUM .
-.TP
-.B RWH_WRITE_LIFE_EXTREME
-Data written to this inode or via this open file description
-is expected to have a lifetime longer than
-data written with
-.BR RWH_WRITE_LIFE_LONG .
-.P
-All the write-specific hints are relative to each other,
-and no individual absolute meaning should be attributed to them.
-.SH RETURN VALUE
-For a successful call, the return value depends on the operation:
-.TP
-.B F_DUPFD
-The new file descriptor.
-.TP
-.B F_GETFD
-Value of file descriptor flags.
-.TP
-.B F_GETFL
-Value of file status flags.
-.TP
-.B F_GETLEASE
-Type of lease held on file descriptor.
-.TP
-.B F_GETOWN
-Value of file descriptor owner.
-.TP
-.B F_GETSIG
-Value of signal sent when read or write becomes possible, or zero
-for traditional
-.B SIGIO
-behavior.
-.TP
-.B F_GETPIPE_SZ
-.TQ
-.B F_SETPIPE_SZ
-The pipe capacity.
-.TP
-.B F_GET_SEALS
-A bit mask identifying the seals that have been set
-for the inode referred to by
-.IR fd .
-.TP
-All other operations
-Zero.
-.P
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.BR EACCES " or " EAGAIN
-Operation is prohibited by locks held by other processes.
-.TP
-.B EAGAIN
-The operation is prohibited because the file has been memory-mapped by
-another process.
-.TP
-.B EBADF
-.I fd
-is not an open file descriptor
-.TP
-.B EBADF
-.I op
-is
-.B F_SETLK
-or
-.B F_SETLKW
-and the file descriptor open mode doesn't match with the
-type of lock requested.
-.TP
-.B EBUSY
-.I op
-is
-.B F_SETPIPE_SZ
-and the new pipe capacity specified in
-.I arg
-is smaller than the amount of buffer space currently
-used to store data in the pipe.
-.TP
-.B EBUSY
-.I op
-is
-.BR F_ADD_SEALS ,
-.I arg
-includes
-.BR F_SEAL_WRITE ,
-and there exists a writable, shared mapping on the file referred to by
-.IR fd .
-.TP
-.B EDEADLK
-It was detected that the specified
-.B F_SETLKW
-operation would cause a deadlock.
-.TP
-.B EFAULT
-.I lock
-is outside your accessible address space.
-.TP
-.B EINTR
-.I op
-is
-.B F_SETLKW
-or
-.B F_OFD_SETLKW
-and the operation was interrupted by a signal; see
-.BR signal (7).
-.TP
-.B EINTR
-.I op
-is
-.BR F_GETLK ,
-.BR F_SETLK ,
-.BR F_OFD_GETLK ,
-or
-.BR F_OFD_SETLK ,
-and the operation was interrupted by a signal before the lock was checked or
-acquired.
-Most likely when locking a remote file (e.g., locking over
-NFS), but can sometimes happen locally.
-.TP
-.B EINVAL
-The value specified in
-.I op
-is not recognized by this kernel.
-.TP
-.B EINVAL
-.I op
-is
-.B F_ADD_SEALS
-and
-.I arg
-includes an unrecognized sealing bit.
-.TP
-.B EINVAL
-.I op
-is
-.B F_ADD_SEALS
-or
-.B F_GET_SEALS
-and the filesystem containing the inode referred to by
-.I fd
-does not support sealing.
-.TP
-.B EINVAL
-.I op
-is
-.B F_DUPFD
-and
-.I arg
-is negative or is greater than the maximum allowable value
-(see the discussion of
-.B RLIMIT_NOFILE
-in
-.BR getrlimit (2)).
-.TP
-.B EINVAL
-.I op
-is
-.B F_SETSIG
-and
-.I arg
-is not an allowable signal number.
-.TP
-.B EINVAL
-.I op
-is
-.BR F_OFD_SETLK ,
-.BR F_OFD_SETLKW ,
-or
-.BR F_OFD_GETLK ,
-and
-.I l_pid
-was not specified as zero.
-.TP
-.B EMFILE
-.I op
-is
-.B F_DUPFD
-and the per-process limit on the number of open file descriptors
-has been reached.
-.TP
-.B ENOLCK
-Too many segment locks open, lock table is full, or a remote locking
-protocol failed (e.g., locking over NFS).
-.TP
-.B ENOTDIR
-.B F_NOTIFY
-was specified in
-.IR op ,
-but
-.I fd
-does not refer to a directory.
-.TP
-.B EPERM
-.I op
-is
-.B F_SETPIPE_SZ
-and the soft or hard user pipe limit has been reached; see
-.BR pipe (7).
-.TP
-.B EPERM
-Attempted to clear the
-.B O_APPEND
-flag on a file that has the append-only attribute set.
-.TP
-.B EPERM
-.I op
-was
-.BR F_ADD_SEALS ,
-but
-.I fd
-was not open for writing
-or the current set of seals on the file already includes
-.BR F_SEAL_SEAL .
-.SH STANDARDS
-POSIX.1-2008.
-.P
-.BR F_GETOWN_EX ,
-.BR F_SETOWN_EX ,
-.BR F_SETPIPE_SZ ,
-.BR F_GETPIPE_SZ ,
-.BR F_GETSIG ,
-.BR F_SETSIG ,
-.BR F_NOTIFY ,
-.BR F_GETLEASE ,
-and
-.B F_SETLEASE
-are Linux-specific.
-(Define the
-.B _GNU_SOURCE
-macro to obtain these definitions.)
-.\" .P
-.\" SVr4 documents additional EIO, ENOLINK and EOVERFLOW error conditions.
-.P
-.BR F_OFD_SETLK ,
-.BR F_OFD_SETLKW ,
-and
-.B F_OFD_GETLK
-are Linux-specific (and one must define
-.B _GNU_SOURCE
-to obtain their definitions),
-but work is being done to have them included in the next version of POSIX.1.
-.P
-.B F_ADD_SEALS
-and
-.B F_GET_SEALS
-are Linux-specific.
-.\" FIXME . Once glibc adds support, add a note about FTM requirements
-.SH HISTORY
-SVr4, 4.3BSD, POSIX.1-2001.
-.P
-Only the operations
-.BR F_DUPFD ,
-.BR F_GETFD ,
-.BR F_SETFD ,
-.BR F_GETFL ,
-.BR F_SETFL ,
-.BR F_GETLK ,
-.BR F_SETLK ,
-and
-.B F_SETLKW
-are specified in POSIX.1-2001.
-.P
-.B F_GETOWN
-and
-.B F_SETOWN
-are specified in POSIX.1-2001.
-(To get their definitions, define either
-.\" .BR _BSD_SOURCE ,
-.\" or
-.B _XOPEN_SOURCE
-with the value 500 or greater, or
-.B _POSIX_C_SOURCE
-with the value 200809L or greater.)
-.P
-.B F_DUPFD_CLOEXEC
-is specified in POSIX.1-2008.
-(To get this definition, define
-.B _POSIX_C_SOURCE
-with the value 200809L or greater, or
-.B _XOPEN_SOURCE
-with the value 700 or greater.)
-.SH NOTES
-The errors returned by
-.BR dup2 (2)
-are different from those returned by
-.BR F_DUPFD .
-.\"
-.SS File locking
-The original Linux
-.BR fcntl ()
-system call was not designed to handle large file offsets
-(in the
-.I flock
-structure).
-Consequently, an
-.BR fcntl64 ()
-system call was added in Linux 2.4.
-The newer system call employs a different structure for file locking,
-.IR flock64 ,
-and corresponding operations,
-.BR F_GETLK64 ,
-.BR F_SETLK64 ,
-and
-.BR F_SETLKW64 .
-However, these details can be ignored by applications using glibc, whose
-.BR fcntl ()
-wrapper function transparently employs the more recent system call
-where it is available.
-.\"
-.SS Record locks
-Since Linux 2.0, there is no interaction between the types of lock
-placed by
-.BR flock (2)
-and
-.BR fcntl ().
-.P
-Several systems have more fields in
-.I "struct flock"
-such as, for example,
-.I l_sysid
-(to identify the machine where the lock is held).
-.\" e.g., Solaris 8 documents this field in fcntl(2), and Irix 6.5
-.\" documents it in fcntl(5). mtk, May 2007
-.\" Also, FreeBSD documents it (Apr 2014).
-Clearly,
-.I l_pid
-alone is not going to be very useful if the process holding the lock
-may live on a different machine;
-on Linux, while present on some architectures (such as MIPS32),
-this field is not used.
-.P
-The original Linux
-.BR fcntl ()
-system call was not designed to handle large file offsets
-(in the
-.I flock
-structure).
-Consequently, an
-.BR fcntl64 ()
-system call was added in Linux 2.4.
-The newer system call employs a different structure for file locking,
-.IR flock64 ,
-and corresponding operations,
-.BR F_GETLK64 ,
-.BR F_SETLK64 ,
-and
-.BR F_SETLKW64 .
-However, these details can be ignored by applications using glibc, whose
-.BR fcntl ()
-wrapper function transparently employs the more recent system call
-where it is available.
-.SS Record locking and NFS
-Before Linux 3.12, if an NFSv4 client
-loses contact with the server for a period of time
-(defined as more than 90 seconds with no communication),
-.\"
-.\" Neil Brown: With NFSv3 the failure mode is the reverse. If
-.\" the server loses contact with a client then any lock stays in place
-.\" indefinitely ("why can't I read my mail"... I remember it well).
-.\"
-it might lose and regain a lock without ever being aware of the fact.
-(The period of time after which contact is assumed lost is known as
-the NFSv4 leasetime.
-On a Linux NFS server, this can be determined by looking at
-.IR /proc/fs/nfsd/nfsv4leasetime ,
-which expresses the period in seconds.
-The default value for this file is 90.)
-.\"
-.\" Jeff Layton:
-.\" Note that this is not a firm timeout. The server runs a job
-.\" periodically to clean out expired stateful objects, and it's likely
-.\" that there is some time (maybe even up to another whole lease period)
-.\" between when the timeout expires and the job actually runs. If the
-.\" client gets a RENEW in there within that window, its lease will be
-.\" renewed and its state preserved.
-.\"
-This scenario potentially risks data corruption,
-since another process might acquire a lock in the intervening period
-and perform file I/O.
-.P
-Since Linux 3.12,
-.\" commit ef1820f9be27b6ad158f433ab38002ab8131db4d
-if an NFSv4 client loses contact with the server,
-any I/O to the file by a process which "thinks" it holds
-a lock will fail until that process closes and reopens the file.
-A kernel parameter,
-.IR nfs.recover_lost_locks ,
-can be set to 1 to obtain the pre-3.12 behavior,
-whereby the client will attempt to recover lost locks
-when contact is reestablished with the server.
-Because of the attendant risk of data corruption,
-.\" commit f6de7a39c181dfb8a2c534661a53c73afb3081cd
-this parameter defaults to 0 (disabled).
-.SH BUGS
-.SS F_SETFL
-It is not possible to use
-.B F_SETFL
-to change the state of the
-.B O_DSYNC
-and
-.B O_SYNC
-flags.
-.\" FIXME . According to POSIX.1-2001, O_SYNC should also be modifiable
-.\" via fcntl(2), but currently Linux does not permit this
-.\" See http://bugzilla.kernel.org/show_bug.cgi?id=5994
-Attempts to change the state of these flags are silently ignored.
-.SS F_GETOWN
-A limitation of the Linux system call conventions on some
-architectures (notably i386) means that if a (negative)
-process group ID to be returned by
-.B F_GETOWN
-falls in the range \-1 to \-4095, then the return value is wrongly
-interpreted by glibc as an error in the system call;
-.\" glibc source: sysdeps/unix/sysv/linux/i386/sysdep.h
-that is, the return value of
-.BR fcntl ()
-will be \-1, and
-.I errno
-will contain the (positive) process group ID.
-The Linux-specific
-.B F_GETOWN_EX
-operation avoids this problem.
-.\" mtk, Dec 04: some limited testing on alpha and ia64 seems to
-.\" indicate that ANY negative PGID value will cause F_GETOWN
-.\" to misinterpret the return as an error. Some other architectures
-.\" seem to have the same range check as i386.
-Since glibc 2.11, glibc makes the kernel
-.B F_GETOWN
-problem invisible by implementing
-.B F_GETOWN
-using
-.BR F_GETOWN_EX .
-.SS F_SETOWN
-In Linux 2.4 and earlier, there is bug that can occur
-when an unprivileged process uses
-.B F_SETOWN
-to specify the owner
-of a socket file descriptor
-as a process (group) other than the caller.
-In this case,
-.BR fcntl ()
-can return \-1 with
-.I errno
-set to
-.BR EPERM ,
-even when the owner process (group) is one that the caller
-has permission to send signals to.
-Despite this error return, the file descriptor owner is set,
-and signals will be sent to the owner.
-.\"
-.SS Deadlock detection
-The deadlock-detection algorithm employed by the kernel when dealing with
-.B F_SETLKW
-requests can yield both
-false negatives (failures to detect deadlocks,
-leaving a set of deadlocked processes blocked indefinitely)
-and false positives
-.RB ( EDEADLK
-errors when there is no deadlock).
-For example,
-the kernel limits the lock depth of its dependency search to 10 steps,
-meaning that circular deadlock chains that exceed
-that size will not be detected.
-In addition, the kernel may falsely indicate a deadlock
-when two or more processes created using the
-.BR clone (2)
-.B CLONE_FILES
-flag place locks that appear (to the kernel) to conflict.
-.\"
-.SS Mandatory locking
-The Linux implementation of mandatory locking
-is subject to race conditions which render it unreliable:
-.\" http://marc.info/?l=linux-kernel&m=119013491707153&w=2
-.\"
-.\" Reconfirmed by Jeff Layton
-.\" From: Jeff Layton <jlayton <at> redhat.com>
-.\" Subject: Re: Status of fcntl() mandatory locking
-.\" Newsgroups: gmane.linux.file-systems
-.\" Date: 2014-04-28 10:07:57 GMT
-.\" http://thread.gmane.org/gmane.linux.file-systems/84481/focus=84518
-a
-.BR write (2)
-call that overlaps with a lock may modify data after the mandatory lock is
-acquired;
-a
-.BR read (2)
-call that overlaps with a lock may detect changes to data that were made
-only after a write lock was acquired.
-Similar races exist between mandatory locks and
-.BR mmap (2).
-It is therefore inadvisable to rely on mandatory locking.
-.SH SEE ALSO
-.BR dup2 (2),
-.BR flock (2),
-.BR open (2),
-.BR socket (2),
-.BR lockf (3),
-.BR capabilities (7),
-.BR feature_test_macros (7),
-.BR lslocks (8)
-.P
-.IR locks.txt ,
-.IR mandatory\-locking.txt ,
-and
-.I dnotify.txt
-in the Linux kernel source directory
-.I Documentation/filesystems/
-(on older kernels, these files are directly under the
-.I Documentation/
-directory, and
-.I mandatory\-locking.txt
-is called
-.IR mandatory.txt )
diff --git a/man2/fcntl64.2 b/man2/fcntl64.2
deleted file mode 100644
index fc8ddc17d..000000000
--- a/man2/fcntl64.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/fcntl.2
diff --git a/man2/fdatasync.2 b/man2/fdatasync.2
deleted file mode 100644
index 3c7494f37..000000000
--- a/man2/fdatasync.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/fsync.2
diff --git a/man2/fdetach.2 b/man2/fdetach.2
deleted file mode 100644
index 5d25ea6ce..000000000
--- a/man2/fdetach.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/unimplemented.2
diff --git a/man2/fgetxattr.2 b/man2/fgetxattr.2
deleted file mode 100644
index d9e5d9037..000000000
--- a/man2/fgetxattr.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/getxattr.2
diff --git a/man2/finit_module.2 b/man2/finit_module.2
deleted file mode 100644
index 20c5c51f8..000000000
--- a/man2/finit_module.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/init_module.2
diff --git a/man2/flistxattr.2 b/man2/flistxattr.2
deleted file mode 100644
index 117bd2b53..000000000
--- a/man2/flistxattr.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/listxattr.2
diff --git a/man2/flock.2 b/man2/flock.2
deleted file mode 100644
index 9c8f5b3b7..000000000
--- a/man2/flock.2
+++ /dev/null
@@ -1,267 +0,0 @@
-.\" Copyright 1993 Rickard E. Faith (faith@cs.unc.edu) and
-.\" and Copyright 2002 Michael Kerrisk
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Modified Fri Jan 31 16:26:07 1997 by Eric S. Raymond <esr@thyrsus.com>
-.\" Modified Fri Dec 11 17:57:27 1998 by Jamie Lokier <jamie@imbolc.ucc.ie>
-.\" Modified 24 Apr 2002 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Substantial rewrites and additions
-.\" 2005-05-10 mtk, noted that lock conversions are not atomic.
-.\"
-.\" FIXME Maybe document LOCK_MAND, LOCK_RW, LOCK_READ, LOCK_WRITE
-.\" which only have effect for SAMBA.
-.\"
-.TH flock 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-flock \- apply or remove an advisory lock on an open file
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/file.h>
-.P
-.BI "int flock(int " fd ", int " op );
-.fi
-.SH DESCRIPTION
-Apply or remove an advisory lock on the open file specified by
-.IR fd .
-The argument
-.I op
-is one of the following:
-.RS 4
-.TP 9
-.B LOCK_SH
-Place a shared lock.
-More than one process may hold a shared lock for a given file
-at a given time.
-.TP
-.B LOCK_EX
-Place an exclusive lock.
-Only one process may hold an exclusive lock for a given
-file at a given time.
-.TP
-.B LOCK_UN
-Remove an existing lock held by this process.
-.RE
-.P
-A call to
-.BR flock ()
-may block if an incompatible lock is held by another process.
-To make a nonblocking request, include
-.B LOCK_NB
-(by ORing)
-with any of the above operations.
-.P
-A single file may not simultaneously have both shared and exclusive locks.
-.P
-Locks created by
-.BR flock ()
-are associated with an open file description (see
-.BR open (2)).
-This means that duplicate file descriptors (created by, for example,
-.BR fork (2)
-or
-.BR dup (2))
-refer to the same lock, and this lock may be modified
-or released using any of these file descriptors.
-Furthermore, the lock is released either by an explicit
-.B LOCK_UN
-operation on any of these duplicate file descriptors, or when all
-such file descriptors have been closed.
-.P
-If a process uses
-.BR open (2)
-(or similar) to obtain more than one file descriptor for the same file,
-these file descriptors are treated independently by
-.BR flock ().
-An attempt to lock the file using one of these file descriptors
-may be denied by a lock that the calling process has
-already placed via another file descriptor.
-.P
-A process may hold only one type of lock (shared or exclusive)
-on a file.
-Subsequent
-.BR flock ()
-calls on an already locked file will convert an existing lock to the new
-lock mode.
-.P
-Locks created by
-.BR flock ()
-are preserved across an
-.BR execve (2).
-.P
-A shared or exclusive lock can be placed on a file regardless of the
-mode in which the file was opened.
-.SH RETURN VALUE
-On success, zero is returned.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EBADF
-.I fd
-is not an open file descriptor.
-.TP
-.B EINTR
-While waiting to acquire a lock, the call was interrupted by
-delivery of a signal caught by a handler; see
-.BR signal (7).
-.TP
-.B EINVAL
-.I op
-is invalid.
-.TP
-.B ENOLCK
-The kernel ran out of memory for allocating lock records.
-.TP
-.B EWOULDBLOCK
-The file is locked and the
-.B LOCK_NB
-flag was selected.
-.SH VERSIONS
-Since Linux 2.0,
-.BR flock ()
-is implemented as a system call in its own right rather
-than being emulated in the GNU C library as a call to
-.BR fcntl (2).
-With this implementation,
-there is no interaction between the types of lock
-placed by
-.BR flock ()
-and
-.BR fcntl (2),
-and
-.BR flock ()
-does not detect deadlock.
-(Note, however, that on some systems, such as the modern BSDs,
-.\" E.g., according to the flock(2) man page, FreeBSD since at least 5.3
-.BR flock ()
-and
-.BR fcntl (2)
-locks
-.I do
-interact with one another.)
-.SS CIFS details
-Up to Linux 5.4,
-.BR flock ()
-is not propagated over SMB.
-A file with such locks will not appear locked for remote clients.
-.P
-Since Linux 5.5,
-.BR flock ()
-locks are emulated with SMB byte-range locks on the entire file.
-Similarly to NFS, this means that
-.BR fcntl (2)
-and
-.BR flock ()
-locks interact with one another.
-Another important side-effect is that the locks are not advisory anymore:
-any IO on a locked file will always fail with
-.B EACCES
-when done from a separate file descriptor.
-This difference originates from the design of locks in the SMB protocol,
-which provides mandatory locking semantics.
-.P
-Remote and mandatory locking semantics may vary with
-SMB protocol, mount options and server type.
-See
-.BR mount.cifs (8)
-for additional information.
-.SH STANDARDS
-BSD.
-.SH HISTORY
-4.4BSD (the
-.BR flock ()
-call first appeared in 4.2BSD).
-A version of
-.BR flock (),
-possibly implemented in terms of
-.BR fcntl (2),
-appears on most UNIX systems.
-.SS NFS details
-Up to Linux 2.6.11,
-.BR flock ()
-does not lock files over NFS
-(i.e., the scope of locks was limited to the local system).
-Instead, one could use
-.BR fcntl (2)
-byte-range locking, which does work over NFS,
-given a sufficiently recent version of
-Linux and a server which supports locking.
-.P
-Since Linux 2.6.12, NFS clients support
-.BR flock ()
-locks by emulating them as
-.BR fcntl (2)
-byte-range locks on the entire file.
-This means that
-.BR fcntl (2)
-and
-.BR flock ()
-locks
-.I do
-interact with one another over NFS.
-It also means that in order to place an exclusive lock,
-the file must be opened for writing.
-.P
-Since Linux 2.6.37,
-.\" commit 5eebde23223aeb0ad2d9e3be6590ff8bbfab0fc2
-the kernel supports a compatibility mode that allows
-.BR flock ()
-locks (and also
-.BR fcntl (2)
-byte region locks) to be treated as local;
-see the discussion of the
-.I "local_lock"
-option in
-.BR nfs (5).
-.SH NOTES
-.BR flock ()
-places advisory locks only; given suitable permissions on a file,
-a process is free to ignore the use of
-.BR flock ()
-and perform I/O on the file.
-.P
-.BR flock ()
-and
-.BR fcntl (2)
-locks have different semantics with respect to forked processes and
-.BR dup (2).
-On systems that implement
-.BR flock ()
-using
-.BR fcntl (2),
-the semantics of
-.BR flock ()
-will be different from those described in this manual page.
-.P
-Converting a lock
-(shared to exclusive, or vice versa) is not guaranteed to be atomic:
-the existing lock is first removed, and then a new lock is established.
-Between these two steps,
-a pending lock request by another process may be granted,
-with the result that the conversion either blocks, or fails if
-.B LOCK_NB
-was specified.
-(This is the original BSD behavior,
-and occurs on many other implementations.)
-.\" Kernel 2.5.21 changed things a little: during lock conversion
-.\" it is now the highest priority process that will get the lock -- mtk
-.SH SEE ALSO
-.BR flock (1),
-.BR close (2),
-.BR dup (2),
-.BR execve (2),
-.BR fcntl (2),
-.BR fork (2),
-.BR open (2),
-.BR lockf (3),
-.BR lslocks (8)
-.P
-.I Documentation/filesystems/locks.txt
-in the Linux kernel source tree
-.RI ( Documentation/locks.txt
-in older kernels)
diff --git a/man2/fork.2 b/man2/fork.2
deleted file mode 100644
index b5a7816a0..000000000
--- a/man2/fork.2
+++ /dev/null
@@ -1,348 +0,0 @@
-.\" Copyright (C) 2006 Michael Kerrisk <mtk.manpages@gmail.com>
-.\" A few fragments remain from an earlier (1992) page by
-.\" Drew Eckhardt (drew@cs.colorado.edu),
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Modified by Michael Haardt (michael@moria.de)
-.\" Modified Sat Jul 24 13:22:07 1993 by Rik Faith (faith@cs.unc.edu)
-.\" Modified 21 Aug 1994 by Michael Chastain (mec@shell.portal.com):
-.\" Referenced 'clone(2)'.
-.\" Modified 1995-06-10, 1996-04-18, 1999-11-01, 2000-12-24
-.\" by Andries Brouwer (aeb@cwi.nl)
-.\" Modified, 27 May 2004, Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Added notes on capability requirements
-.\" 2006-09-04, Michael Kerrisk
-.\" Greatly expanded, to describe all attributes that differ
-.\" parent and child.
-.\"
-.TH fork 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-fork \- create a child process
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <unistd.h>
-.P
-.B pid_t fork(void);
-.fi
-.SH DESCRIPTION
-.BR fork ()
-creates a new process by duplicating the calling process.
-The new process is referred to as the
-.I child
-process.
-The calling process is referred to as the
-.I parent
-process.
-.P
-The child process and the parent process run in separate memory spaces.
-At the time of
-.BR fork ()
-both memory spaces have the same content.
-Memory writes, file mappings
-.RB ( mmap (2)),
-and unmappings
-.RB ( munmap (2))
-performed by one of the processes do not affect the other.
-.P
-The child process is an exact duplicate of the parent
-process except for the following points:
-.IP \[bu] 3
-The child has its own unique process ID,
-and this PID does not match the ID of any existing process group
-.RB ( setpgid (2))
-or session.
-.IP \[bu]
-The child's parent process ID is the same as the parent's process ID.
-.IP \[bu]
-The child does not inherit its parent's memory locks
-.RB ( mlock (2),
-.BR mlockall (2)).
-.IP \[bu]
-Process resource utilizations
-.RB ( getrusage (2))
-and CPU time counters
-.RB ( times (2))
-are reset to zero in the child.
-.IP \[bu]
-The child's set of pending signals is initially empty
-.RB ( sigpending (2)).
-.IP \[bu]
-The child does not inherit semaphore adjustments from its parent
-.RB ( semop (2)).
-.IP \[bu]
-The child does not inherit process-associated record locks from its parent
-.RB ( fcntl (2)).
-(On the other hand, it does inherit
-.BR fcntl (2)
-open file description locks and
-.BR flock (2)
-locks from its parent.)
-.IP \[bu]
-The child does not inherit timers from its parent
-.RB ( setitimer (2),
-.BR alarm (2),
-.BR timer_create (2)).
-.IP \[bu]
-The child does not inherit outstanding asynchronous I/O operations
-from its parent
-.RB ( aio_read (3),
-.BR aio_write (3)),
-nor does it inherit any asynchronous I/O contexts from its parent (see
-.BR io_setup (2)).
-.P
-The process attributes in the preceding list are all specified
-in POSIX.1.
-The parent and child also differ with respect to the following
-Linux-specific process attributes:
-.IP \[bu] 3
-The child does not inherit directory change notifications (dnotify)
-from its parent
-(see the description of
-.B F_NOTIFY
-in
-.BR fcntl (2)).
-.IP \[bu]
-The
-.BR prctl (2)
-.B PR_SET_PDEATHSIG
-setting is reset so that the child does not receive a signal
-when its parent terminates.
-.IP \[bu]
-The default timer slack value is set to the parent's
-current timer slack value.
-See the description of
-.B PR_SET_TIMERSLACK
-in
-.BR prctl (2).
-.IP \[bu]
-Memory mappings that have been marked with the
-.BR madvise (2)
-.B MADV_DONTFORK
-flag are not inherited across a
-.BR fork ().
-.IP \[bu]
-Memory in address ranges that have been marked with the
-.BR madvise (2)
-.B MADV_WIPEONFORK
-flag is zeroed in the child after a
-.BR fork ().
-(The
-.B MADV_WIPEONFORK
-setting remains in place for those address ranges in the child.)
-.IP \[bu]
-The termination signal of the child is always
-.B SIGCHLD
-(see
-.BR clone (2)).
-.IP \[bu]
-The port access permission bits set by
-.BR ioperm (2)
-are not inherited by the child;
-the child must turn on any bits that it requires using
-.BR ioperm (2).
-.P
-Note the following further points:
-.IP \[bu] 3
-The child process is created with a single thread\[em]the
-one that called
-.BR fork ().
-The entire virtual address space of the parent is replicated in the child,
-including the states of mutexes, condition variables,
-and other pthreads objects; the use of
-.BR pthread_atfork (3)
-may be helpful for dealing with problems that this can cause.
-.IP \[bu]
-After a
-.BR fork ()
-in a multithreaded program,
-the child can safely call only async-signal-safe functions (see
-.BR signal\-safety (7))
-until such time as it calls
-.BR execve (2).
-.IP \[bu]
-The child inherits copies of the parent's set of open file descriptors.
-Each file descriptor in the child refers to the same
-open file description (see
-.BR open (2))
-as the corresponding file descriptor in the parent.
-This means that the two file descriptors share open file status flags,
-file offset,
-and signal-driven I/O attributes (see the description of
-.B F_SETOWN
-and
-.B F_SETSIG
-in
-.BR fcntl (2)).
-.IP \[bu]
-The child inherits copies of the parent's set of open message
-queue descriptors (see
-.BR mq_overview (7)).
-Each file descriptor in the child refers to the same
-open message queue description
-as the corresponding file descriptor in the parent.
-This means that the two file descriptors share the same flags
-.RI ( mq_flags ).
-.IP \[bu]
-The child inherits copies of the parent's set of open directory streams (see
-.BR opendir (3)).
-POSIX.1 says that the corresponding directory streams
-in the parent and child
-.I may
-share the directory stream positioning;
-on Linux/glibc they do not.
-.SH RETURN VALUE
-On success, the PID of the child process is returned in the parent,
-and 0 is returned in the child.
-On failure, \-1 is returned in the parent,
-no child process is created, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EAGAIN
-.\" NOTE! The following should match the description in pthread_create(3)
-A system-imposed limit on the number of threads was encountered.
-There are a number of limits that may trigger this error:
-.RS
-.IP \[bu] 3
-the
-.B RLIMIT_NPROC
-soft resource limit (set via
-.BR setrlimit (2)),
-which limits the number of processes and threads for a real user ID,
-was reached;
-.IP \[bu]
-the kernel's system-wide limit on the number of processes and threads,
-.IR /proc/sys/kernel/threads\-max ,
-was reached (see
-.BR proc (5));
-.IP \[bu]
-the maximum number of PIDs,
-.IR /proc/sys/kernel/pid_max ,
-was reached (see
-.BR proc (5));
-or
-.IP \[bu]
-the PID limit
-.RI ( pids.max )
-imposed by the cgroup "process number" (PIDs) controller was reached.
-.RE
-.TP
-.B EAGAIN
-The caller is operating under the
-.B SCHED_DEADLINE
-scheduling policy and does not have the reset-on-fork flag set.
-See
-.BR sched (7).
-.TP
-.B ENOMEM
-.BR fork ()
-failed to allocate the necessary kernel structures because memory is tight.
-.TP
-.B ENOMEM
-An attempt was made to create a child process in a PID namespace
-whose "init" process has terminated.
-See
-.BR pid_namespaces (7).
-.TP
-.B ENOSYS
-.BR fork ()
-is not supported on this platform (for example,
-.\" e.g., arm (optionally), blackfin, c6x, frv, h8300, microblaze, xtensa
-hardware without a Memory-Management Unit).
-.TP
-.BR ERESTARTNOINTR " (since Linux 2.6.17)"
-.\" commit 4a2c7a7837da1b91468e50426066d988050e4d56
-System call was interrupted by a signal and will be restarted.
-(This can be seen only during a trace.)
-.SH VERSIONS
-.SS C library/kernel differences
-Since glibc 2.3.3,
-.\" nptl/sysdeps/unix/sysv/linux/fork.c
-rather than invoking the kernel's
-.BR fork ()
-system call,
-the glibc
-.BR fork ()
-wrapper that is provided as part of the
-NPTL threading implementation invokes
-.BR clone (2)
-with flags that provide the same effect as the traditional system call.
-(A call to
-.BR fork ()
-is equivalent to a call to
-.BR clone (2)
-specifying
-.I flags
-as just
-.BR SIGCHLD .)
-The glibc wrapper invokes any fork handlers that have been
-established using
-.BR pthread_atfork (3).
-.\" and does some magic to ensure that getpid(2) returns the right value.
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-POSIX.1-2001, SVr4, 4.3BSD.
-.SH NOTES
-Under Linux,
-.BR fork ()
-is implemented using copy-on-write pages, so the only penalty that it incurs
-is the time and memory required to duplicate the parent's page tables,
-and to create a unique task structure for the child.
-.SH EXAMPLES
-See
-.BR pipe (2)
-and
-.BR wait (2)
-for more examples.
-.P
-.\" SRC BEGIN (fork.c)
-.EX
-#include <signal.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-\&
-int
-main(void)
-{
- pid_t pid;
-\&
- if (signal(SIGCHLD, SIG_IGN) == SIG_ERR) {
- perror("signal");
- exit(EXIT_FAILURE);
- }
- pid = fork();
- switch (pid) {
- case \-1:
- perror("fork");
- exit(EXIT_FAILURE);
- case 0:
- puts("Child exiting.");
- exit(EXIT_SUCCESS);
- default:
- printf("Child is PID %jd\en", (intmax_t) pid);
- puts("Parent exiting.");
- exit(EXIT_SUCCESS);
- }
-}
-.EE
-.\" SRC END
-.SH SEE ALSO
-.BR clone (2),
-.BR execve (2),
-.BR exit (2),
-.BR setrlimit (2),
-.BR unshare (2),
-.BR vfork (2),
-.BR wait (2),
-.BR daemon (3),
-.BR pthread_atfork (3),
-.BR capabilities (7),
-.BR credentials (7)
diff --git a/man2/free_hugepages.2 b/man2/free_hugepages.2
deleted file mode 100644
index d4b906a97..000000000
--- a/man2/free_hugepages.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/alloc_hugepages.2
diff --git a/man2/fremovexattr.2 b/man2/fremovexattr.2
deleted file mode 100644
index 38d01ccde..000000000
--- a/man2/fremovexattr.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/removexattr.2
diff --git a/man2/fsetxattr.2 b/man2/fsetxattr.2
deleted file mode 100644
index dc0780751..000000000
--- a/man2/fsetxattr.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/setxattr.2
diff --git a/man2/fstat.2 b/man2/fstat.2
deleted file mode 100644
index b1a86c195..000000000
--- a/man2/fstat.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/stat.2
diff --git a/man2/fstat64.2 b/man2/fstat64.2
deleted file mode 100644
index 2b9971d2e..000000000
--- a/man2/fstat64.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/fstat.2
diff --git a/man2/fstatat.2 b/man2/fstatat.2
deleted file mode 100644
index b1a86c195..000000000
--- a/man2/fstatat.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/stat.2
diff --git a/man2/fstatat64.2 b/man2/fstatat64.2
deleted file mode 100644
index 7791269e6..000000000
--- a/man2/fstatat64.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/fstatat.2
diff --git a/man2/fstatfs.2 b/man2/fstatfs.2
deleted file mode 100644
index 923d3c0cc..000000000
--- a/man2/fstatfs.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/statfs.2
diff --git a/man2/fstatfs64.2 b/man2/fstatfs64.2
deleted file mode 100644
index fde2b22f5..000000000
--- a/man2/fstatfs64.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/fstatfs.2
diff --git a/man2/fsync.2 b/man2/fsync.2
deleted file mode 100644
index e8d19bcc8..000000000
--- a/man2/fsync.2
+++ /dev/null
@@ -1,200 +0,0 @@
-.\" Copyright 1993 Rickard E. Faith (faith@cs.unc.edu) and
-.\" and Copyright 2006 Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Modified 21 Aug 1994 by Michael Chastain <mec@shell.portal.com>:
-.\" Removed note about old libc (pre-4.5.26) translating to 'sync'.
-.\" Modified 15 Apr 1995 by Michael Chastain <mec@shell.portal.com>:
-.\" Added `see also' section.
-.\" Modified 13 Apr 1996 by Markus Kuhn <mskuhn@cip.informatik.uni-erlangen.de>
-.\" Added remarks about fdatasync.
-.\" Modified 31 Jan 1997 by Eric S. Raymond <esr@thyrsus.com>
-.\" Modified 18 Apr 2001 by Andi Kleen
-.\" Fix description to describe what it really does; add a few caveats.
-.\" 2006-04-28, mtk, substantial rewrite of various parts.
-.\" 2012-02-27 Various changes by Christoph Hellwig <hch@lst.de>
-.\"
-.TH fsync 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-fsync, fdatasync \- synchronize a file's in-core state with storage device
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <unistd.h>
-.P
-.BI "int fsync(int " fd );
-.P
-.BI "int fdatasync(int " fd );
-.fi
-.P
-.RS -4
-Feature Test Macro Requirements for glibc (see
-.BR feature_test_macros (7)):
-.RE
-.P
-.nf
-.BR fsync ():
- glibc 2.16 and later:
- No feature test macros need be defined
- glibc up to and including 2.15:
- _BSD_SOURCE || _XOPEN_SOURCE
- || /* Since glibc 2.8: */ _POSIX_C_SOURCE >= 200112L
-.fi
-.P
-.BR fdatasync ():
-.nf
- _POSIX_C_SOURCE >= 199309L || _XOPEN_SOURCE >= 500
-.fi
-.SH DESCRIPTION
-.BR fsync ()
-transfers ("flushes") all modified in-core data of
-(i.e., modified buffer cache pages for) the
-file referred to by the file descriptor
-.I fd
-to the disk device (or other permanent storage device) so that all
-changed information can be retrieved even if the system crashes or
-is rebooted.
-This includes writing through or flushing a disk cache if present.
-The call blocks until the device reports that the transfer has completed.
-.P
-As well as flushing the file data,
-.BR fsync ()
-also flushes the metadata information associated with the file (see
-.BR inode (7)).
-.P
-Calling
-.BR fsync ()
-does not necessarily ensure
-that the entry in the directory containing the file has also reached disk.
-For that an explicit
-.BR fsync ()
-on a file descriptor for the directory is also needed.
-.P
-.BR fdatasync ()
-is similar to
-.BR fsync (),
-but does not flush modified metadata unless that metadata
-is needed in order to allow a subsequent data retrieval to be
-correctly handled.
-For example, changes to
-.I st_atime
-or
-.I st_mtime
-(respectively, time of last access and
-time of last modification; see
-.BR inode (7))
-do not require flushing because they are not necessary for
-a subsequent data read to be handled correctly.
-On the other hand, a change to the file size
-.RI ( st_size ,
-as made by say
-.BR ftruncate (2)),
-would require a metadata flush.
-.P
-The aim of
-.BR fdatasync ()
-is to reduce disk activity for applications that do not
-require all metadata to be synchronized with the disk.
-.SH RETURN VALUE
-On success, these system calls return zero.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EBADF
-.I fd
-is not a valid open file descriptor.
-.TP
-.B EINTR
-The function was interrupted by a signal; see
-.BR signal (7).
-.TP
-.B EIO
-An error occurred during synchronization.
-This error may relate to data written to some other file descriptor
-on the same file.
-Since Linux 4.13,
-.\" commit 088737f44bbf6378745f5b57b035e57ee3dc4750
-errors from write-back will be reported to
-all file descriptors that might have written the data which triggered
-the error.
-Some filesystems (e.g., NFS) keep close track of which data
-came through which file descriptor, and give more precise reporting.
-Other filesystems (e.g., most local filesystems) will report errors to
-all file descriptors that were open on the file when the error was recorded.
-.TP
-.B ENOSPC
-Disk space was exhausted while synchronizing.
-.TP
-.B EROFS
-.TQ
-.B EINVAL
-.I fd
-is bound to a special file (e.g., a pipe, FIFO, or socket)
-which does not support synchronization.
-.TP
-.B ENOSPC
-.TQ
-.B EDQUOT
-.I fd
-is bound to a file on NFS or another filesystem which does not allocate
-space at the time of a
-.BR write (2)
-system call, and some previous write failed due to insufficient
-storage space.
-.SH VERSIONS
-On POSIX systems on which
-.BR fdatasync ()
-is available,
-.B _POSIX_SYNCHRONIZED_IO
-is defined in
-.I <unistd.h>
-to a value greater than 0.
-(See also
-.BR sysconf (3).)
-.\" POSIX.1-2001: It shall be defined to -1 or 0 or 200112L.
-.\" -1: unavailable, 0: ask using sysconf().
-.\" glibc defines them to 1.
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-POSIX.1-2001, 4.2BSD.
-.P
-In Linux 2.2 and earlier,
-.BR fdatasync ()
-is equivalent to
-.BR fsync (),
-and so has no performance advantage.
-.P
-The
-.BR fsync ()
-implementations in older kernels and lesser used filesystems
-do not know how to flush disk caches.
-In these cases disk caches need to be disabled using
-.BR hdparm (8)
-or
-.BR sdparm (8)
-to guarantee safe operation.
-.P
-Under AT&T UNIX System V Release 4
-.I fd
-needs to be opened for writing.
-This is by itself incompatible with the original BSD interface
-and forbidden by POSIX,
-but nevertheless survives in HP-UX and AIX.
-.SH SEE ALSO
-.BR sync (1),
-.BR bdflush (2),
-.BR open (2),
-.BR posix_fadvise (2),
-.BR pwritev (2),
-.BR sync (2),
-.BR sync_file_range (2),
-.BR fflush (3),
-.BR fileno (3),
-.BR hdparm (8),
-.BR mount (8)
diff --git a/man2/ftruncate.2 b/man2/ftruncate.2
deleted file mode 100644
index 2ed34f1ed..000000000
--- a/man2/ftruncate.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/truncate.2
diff --git a/man2/ftruncate64.2 b/man2/ftruncate64.2
deleted file mode 100644
index a8862d3ea..000000000
--- a/man2/ftruncate64.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/ftruncate.2
diff --git a/man2/futex.2 b/man2/futex.2
deleted file mode 100644
index 3eed74412..000000000
--- a/man2/futex.2
+++ /dev/null
@@ -1,1976 +0,0 @@
-.\" Page by b.hubert
-.\" and Copyright (C) 2015, Thomas Gleixner <tglx@linutronix.de>
-.\" and Copyright (C) 2015, Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" %%%LICENSE_START(FREELY_REDISTRIBUTABLE)
-.\" may be freely modified and distributed
-.\" %%%LICENSE_END
-.\"
-.\" Niki A. Rahimi (LTC Security Development, narahimi@us.ibm.com)
-.\" added ERRORS section.
-.\"
-.\" Modified 2004-06-17 mtk
-.\" Modified 2004-10-07 aeb, added FUTEX_REQUEUE, FUTEX_CMP_REQUEUE
-.\"
-.\" FIXME Still to integrate are some points from Torvald Riegel's mail of
-.\" 2015-01-23:
-.\" http://thread.gmane.org/gmane.linux.kernel/1703405/focus=7977
-.\"
-.\" FIXME Do we need to add some text regarding Torvald Riegel's 2015-01-24 mail
-.\" http://thread.gmane.org/gmane.linux.kernel/1703405/focus=1873242
-.\"
-.TH futex 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-futex \- fast user-space locking
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.P
-.BR "#include <linux/futex.h>" " /* Definition of " FUTEX_* " constants */"
-.BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */"
-.B #include <unistd.h>
-.P
-.BI "long syscall(SYS_futex, uint32_t *" uaddr ", int " futex_op \
-", uint32_t " val ,
-.BI " const struct timespec *" timeout , \
-" \fR /* or: \fBuint32_t \fIval2\fP */"
-.BI " uint32_t *" uaddr2 ", uint32_t " val3 );
-.fi
-.P
-.IR Note :
-glibc provides no wrapper for
-.BR futex (),
-necessitating the use of
-.BR syscall (2).
-.SH DESCRIPTION
-The
-.BR futex ()
-system call provides a method for waiting until a certain condition becomes
-true.
-It is typically used as a blocking construct in the context of
-shared-memory synchronization.
-When using futexes, the majority of
-the synchronization operations are performed in user space.
-A user-space program employs the
-.BR futex ()
-system call only when it is likely that the program has to block for
-a longer time until the condition becomes true.
-Other
-.BR futex ()
-operations can be used to wake any processes or threads waiting
-for a particular condition.
-.P
-A futex is a 32-bit value\[em]referred to below as a
-.IR "futex word" \[em]whose
-address is supplied to the
-.BR futex ()
-system call.
-(Futexes are 32 bits in size on all platforms, including 64-bit systems.)
-All futex operations are governed by this value.
-In order to share a futex between processes,
-the futex is placed in a region of shared memory,
-created using (for example)
-.BR mmap (2)
-or
-.BR shmat (2).
-(Thus, the futex word may have different
-virtual addresses in different processes,
-but these addresses all refer to the same location in physical memory.)
-In a multithreaded program, it is sufficient to place the futex word
-in a global variable shared by all threads.
-.P
-When executing a futex operation that requests to block a thread,
-the kernel will block only if the futex word has the value that the
-calling thread supplied (as one of the arguments of the
-.BR futex ()
-call) as the expected value of the futex word.
-The loading of the futex word's value,
-the comparison of that value with the expected value,
-and the actual blocking will happen atomically and will be totally ordered
-with respect to concurrent operations performed by other threads
-on the same futex word.
-.\" Notes from Darren Hart (Dec 2015):
-.\" Totally ordered with respect futex operations refers to semantics
-.\" of the ACQUIRE/RELEASE operations and how they impact ordering of
-.\" memory reads and writes. The kernel futex operations are protected
-.\" by spinlocks, which ensure that all operations are serialized
-.\" with respect to one another.
-.\"
-.\" This is a lot to attempt to define in this document. Perhaps a
-.\" reference to linux/Documentation/memory-barriers.txt as a footnote
-.\" would be sufficient? Or perhaps for this manual, "serialized" would
-.\" be sufficient, with a footnote regarding "totally ordered" and a
-.\" pointer to the memory-barrier documentation?
-Thus, the futex word is used to connect the synchronization in user space
-with the implementation of blocking by the kernel.
-Analogously to an atomic
-compare-and-exchange operation that potentially changes shared memory,
-blocking via a futex is an atomic compare-and-block operation.
-.\" FIXME(Torvald Riegel):
-.\" Eventually we want to have some text in NOTES to satisfy
-.\" the reference in the following sentence
-.\" See NOTES for a detailed specification of
-.\" the synchronization semantics.
-.P
-One use of futexes is for implementing locks.
-The state of the lock (i.e., acquired or not acquired)
-can be represented as an atomically accessed flag in shared memory.
-In the uncontended case,
-a thread can access or modify the lock state with atomic instructions,
-for example atomically changing it from not acquired to acquired
-using an atomic compare-and-exchange instruction.
-(Such instructions are performed entirely in user mode,
-and the kernel maintains no information about the lock state.)
-On the other hand, a thread may be unable to acquire a lock because
-it is already acquired by another thread.
-It then may pass the lock's flag as a futex word and the value
-representing the acquired state as the expected value to a
-.BR futex ()
-wait operation.
-This
-.BR futex ()
-operation will block if and only if the lock is still acquired
-(i.e., the value in the futex word still matches the "acquired state").
-When releasing the lock, a thread has to first reset the
-lock state to not acquired and then execute a futex
-operation that wakes threads blocked on the lock flag used as a futex word
-(this can be further optimized to avoid unnecessary wake-ups).
-See
-.BR futex (7)
-for more detail on how to use futexes.
-.P
-Besides the basic wait and wake-up futex functionality, there are further
-futex operations aimed at supporting more complex use cases.
-.P
-Note that
-no explicit initialization or destruction is necessary to use futexes;
-the kernel maintains a futex
-(i.e., the kernel-internal implementation artifact)
-only while operations such as
-.BR FUTEX_WAIT ,
-described below, are being performed on a particular futex word.
-.\"
-.SS Arguments
-The
-.I uaddr
-argument points to the futex word.
-On all platforms, futexes are four-byte
-integers that must be aligned on a four-byte boundary.
-The operation to perform on the futex is specified in the
-.I futex_op
-argument;
-.I val
-is a value whose meaning and purpose depends on
-.IR futex_op .
-.P
-The remaining arguments
-.RI ( timeout ,
-.IR uaddr2 ,
-and
-.IR val3 )
-are required only for certain of the futex operations described below.
-Where one of these arguments is not required, it is ignored.
-.P
-For several blocking operations, the
-.I timeout
-argument is a pointer to a
-.I timespec
-structure that specifies a timeout for the operation.
-However, notwithstanding the prototype shown above, for some operations,
-the least significant four bytes of this argument are instead
-used as an integer whose meaning is determined by the operation.
-For these operations, the kernel casts the
-.I timeout
-value first to
-.IR "unsigned long",
-then to
-.IR uint32_t ,
-and in the remainder of this page, this argument is referred to as
-.I val2
-when interpreted in this fashion.
-.P
-Where it is required, the
-.I uaddr2
-argument is a pointer to a second futex word that is employed
-by the operation.
-.P
-The interpretation of the final integer argument,
-.IR val3 ,
-depends on the operation.
-.\"
-.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
-.\"
-.SS Futex operations
-The
-.I futex_op
-argument consists of two parts:
-a command that specifies the operation to be performed,
-bitwise ORed with zero or more options that
-modify the behaviour of the operation.
-The options that may be included in
-.I futex_op
-are as follows:
-.TP
-.BR FUTEX_PRIVATE_FLAG " (since Linux 2.6.22)"
-.\" commit 34f01cc1f512fa783302982776895c73714ebbc2
-This option bit can be employed with all futex operations.
-It tells the kernel that the futex is process-private and not shared
-with another process (i.e., it is being used for synchronization
-only between threads of the same process).
-This allows the kernel to make some additional performance optimizations.
-.\" I.e., It allows the kernel choose the fast path for validating
-.\" the user-space address and avoids expensive VMA lookups,
-.\" taking reference counts on file backing store, and so on.
-.IP
-As a convenience,
-.I <linux/futex.h>
-defines a set of constants with the suffix
-.B _PRIVATE
-that are equivalents of all of the operations listed below,
-.\" except the obsolete FUTEX_FD, for which the "private" flag was
-.\" meaningless
-but with the
-.B FUTEX_PRIVATE_FLAG
-ORed into the constant value.
-Thus, there are
-.BR FUTEX_WAIT_PRIVATE ,
-.BR FUTEX_WAKE_PRIVATE ,
-and so on.
-.TP
-.BR FUTEX_CLOCK_REALTIME " (since Linux 2.6.28)"
-.\" commit 1acdac104668a0834cfa267de9946fac7764d486
-This option bit can be employed only with the
-.BR FUTEX_WAIT_BITSET ,
-.BR FUTEX_WAIT_REQUEUE_PI ,
-(since Linux 4.5)
-.\" commit 337f13046ff03717a9e99675284a817527440a49
-.BR FUTEX_WAIT ,
-and
-(since Linux 5.14)
-.\" commit bf22a6976897977b0a3f1aeba6823c959fc4fdae
-.B FUTEX_LOCK_PI2
-operations.
-.IP
-If this option is set, the kernel measures the
-.I timeout
-against the
-.B CLOCK_REALTIME
-clock.
-.IP
-If this option is not set, the kernel measures the
-.I timeout
-against the
-.B CLOCK_MONOTONIC
-clock.
-.P
-The operation specified in
-.I futex_op
-is one of the following:
-.\"
-.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
-.\"
-.TP
-.BR FUTEX_WAIT " (since Linux 2.6.0)"
-.\" Strictly speaking, since some time in Linux 2.5.x
-This operation tests that the value at the
-futex word pointed to by the address
-.I uaddr
-still contains the expected value
-.IR val ,
-and if so, then sleeps waiting for a
-.B FUTEX_WAKE
-operation on the futex word.
-The load of the value of the futex word is an atomic memory
-access (i.e., using atomic machine instructions of the respective
-architecture).
-This load, the comparison with the expected value, and
-starting to sleep are performed atomically
-.\" FIXME: Torvald, I think we may need to add some explanation of
-.\" "totally ordered" here.
-and totally ordered
-with respect to other futex operations on the same futex word.
-If the thread starts to sleep,
-it is considered a waiter on this futex word.
-If the futex value does not match
-.IR val ,
-then the call fails immediately with the error
-.BR EAGAIN .
-.IP
-The purpose of the comparison with the expected value is to prevent lost
-wake-ups.
-If another thread changed the value of the futex word after the
-calling thread decided to block based on the prior value,
-and if the other thread executed a
-.B FUTEX_WAKE
-operation (or similar wake-up) after the value change and before this
-.B FUTEX_WAIT
-operation, then the calling thread will observe the
-value change and will not start to sleep.
-.IP
-If the
-.I timeout
-is not NULL, the structure it points to specifies a
-timeout for the wait.
-(This interval will be rounded up to the system clock granularity,
-and is guaranteed not to expire early.)
-The timeout is by default measured according to the
-.B CLOCK_MONOTONIC
-clock, but, since Linux 4.5, the
-.B CLOCK_REALTIME
-clock can be selected by specifying
-.B FUTEX_CLOCK_REALTIME
-in
-.IR futex_op .
-If
-.I timeout
-is NULL, the call blocks indefinitely.
-.IP
-.IR Note :
-for
-.BR FUTEX_WAIT ,
-.I timeout
-is interpreted as a
-.I relative
-value.
-This differs from other futex operations, where
-.I timeout
-is interpreted as an absolute value.
-To obtain the equivalent of
-.B FUTEX_WAIT
-with an absolute timeout, employ
-.B FUTEX_WAIT_BITSET
-with
-.I val3
-specified as
-.BR FUTEX_BITSET_MATCH_ANY .
-.IP
-The arguments
-.I uaddr2
-and
-.I val3
-are ignored.
-.\" FIXME . (Torvald) I think we should remove this. Or maybe adapt to a
-.\" different example.
-.\"
-.\" For
-.\" .BR futex (7),
-.\" this call is executed if decrementing the count gave a negative value
-.\" (indicating contention),
-.\" and will sleep until another process or thread releases
-.\" the futex and executes the
-.\" .B FUTEX_WAKE
-.\" operation.
-.\"
-.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
-.\"
-.TP
-.BR FUTEX_WAKE " (since Linux 2.6.0)"
-.\" Strictly speaking, since Linux 2.5.x
-This operation wakes at most
-.I val
-of the waiters that are waiting (e.g., inside
-.BR FUTEX_WAIT )
-on the futex word at the address
-.IR uaddr .
-Most commonly,
-.I val
-is specified as either 1 (wake up a single waiter) or
-.B INT_MAX
-(wake up all waiters).
-No guarantee is provided about which waiters are awoken
-(e.g., a waiter with a higher scheduling priority is not guaranteed
-to be awoken in preference to a waiter with a lower priority).
-.IP
-The arguments
-.IR timeout ,
-.IR uaddr2 ,
-and
-.I val3
-are ignored.
-.\" FIXME . (Torvald) I think we should remove this. Or maybe adapt to
-.\" a different example.
-.\"
-.\" For
-.\" .BR futex (7),
-.\" this is executed if incrementing the count showed that
-.\" there were waiters,
-.\" once the futex value has been set to 1
-.\" (indicating that it is available).
-.\"
-.\" How does "incrementing the count show that there were waiters"?
-.\"
-.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
-.\"
-.TP
-.BR FUTEX_FD " (from Linux 2.6.0 up to and including Linux 2.6.25)"
-.\" Strictly speaking, from Linux 2.5.x to Linux 2.6.25
-This operation creates a file descriptor that is associated with
-the futex at
-.IR uaddr .
-The caller must close the returned file descriptor after use.
-When another process or thread performs a
-.B FUTEX_WAKE
-on the futex word, the file descriptor indicates as being readable with
-.BR select (2),
-.BR poll (2),
-and
-.BR epoll (7)
-.IP
-The file descriptor can be used to obtain asynchronous notifications: if
-.I val
-is nonzero, then, when another process or thread executes a
-.BR FUTEX_WAKE ,
-the caller will receive the signal number that was passed in
-.IR val .
-.IP
-The arguments
-.IR timeout ,
-.IR uaddr2 ,
-and
-.I val3
-are ignored.
-.IP
-Because it was inherently racy,
-.B FUTEX_FD
-has been removed
-.\" commit 82af7aca56c67061420d618cc5a30f0fd4106b80
-from Linux 2.6.26 onward.
-.\"
-.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
-.\"
-.TP
-.BR FUTEX_REQUEUE " (since Linux 2.6.0)"
-This operation performs the same task as
-.B FUTEX_CMP_REQUEUE
-(see below), except that no check is made using the value in
-.IR val3 .
-(The argument
-.I val3
-is ignored.)
-.\"
-.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
-.\"
-.TP
-.BR FUTEX_CMP_REQUEUE " (since Linux 2.6.7)"
-This operation first checks whether the location
-.I uaddr
-still contains the value
-.IR val3 .
-If not, the operation fails with the error
-.BR EAGAIN .
-Otherwise, the operation wakes up a maximum of
-.I val
-waiters that are waiting on the futex at
-.IR uaddr .
-If there are more than
-.I val
-waiters, then the remaining waiters are removed
-from the wait queue of the source futex at
-.I uaddr
-and added to the wait queue of the target futex at
-.IR uaddr2 .
-The
-.I val2
-argument specifies an upper limit on the number of waiters
-that are requeued to the futex at
-.IR uaddr2 .
-.IP
-.\" FIXME(Torvald) Is the following correct? Or is just the decision
-.\" which threads to wake or requeue part of the atomic operation?
-The load from
-.I uaddr
-is an atomic memory access (i.e., using atomic machine instructions of
-the respective architecture).
-This load, the comparison with
-.IR val3 ,
-and the requeueing of any waiters are performed atomically and totally
-ordered with respect to other operations on the same futex word.
-.\" Notes from a f2f conversation with Thomas Gleixner (Aug 2015): ###
-.\" The operation is serialized with respect to operations on both
-.\" source and target futex. No other waiter can enqueue itself
-.\" for waiting and no other waiter can dequeue itself because of
-.\" a timeout or signal.
-.IP
-Typical values to specify for
-.I val
-are 0 or 1.
-(Specifying
-.B INT_MAX
-is not useful, because it would make the
-.B FUTEX_CMP_REQUEUE
-operation equivalent to
-.BR FUTEX_WAKE .)
-The limit value specified via
-.I val2
-is typically either 1 or
-.BR INT_MAX .
-(Specifying the argument as 0 is not useful, because it would make the
-.B FUTEX_CMP_REQUEUE
-operation equivalent to
-.BR FUTEX_WAIT .)
-.IP
-The
-.B FUTEX_CMP_REQUEUE
-operation was added as a replacement for the earlier
-.BR FUTEX_REQUEUE .
-The difference is that the check of the value at
-.I uaddr
-can be used to ensure that requeueing happens only under certain
-conditions, which allows race conditions to be avoided in certain use cases.
-.\" But, as Rich Felker points out, there remain valid use cases for
-.\" FUTEX_REQUEUE, for example, when the calling thread is requeuing
-.\" the target(s) to a lock that the calling thread owns
-.\" From: Rich Felker <dalias@libc.org>
-.\" Date: Wed, 29 Oct 2014 22:43:17 -0400
-.\" To: Darren Hart <dvhart@infradead.org>
-.\" CC: libc-alpha@sourceware.org, ...
-.\" Subject: Re: Add futex wrapper to glibc?
-.IP
-Both
-.B FUTEX_REQUEUE
-and
-.B FUTEX_CMP_REQUEUE
-can be used to avoid "thundering herd" wake-ups that could occur when using
-.B FUTEX_WAKE
-in cases where all of the waiters that are woken need to acquire
-another futex.
-Consider the following scenario,
-where multiple waiter threads are waiting on B,
-a wait queue implemented using a futex:
-.IP
-.in +4n
-.EX
-lock(A)
-while (!check_value(V)) {
- unlock(A);
- block_on(B);
- lock(A);
-};
-unlock(A);
-.EE
-.in
-.IP
-If a waker thread used
-.BR FUTEX_WAKE ,
-then all waiters waiting on B would be woken up,
-and they would all try to acquire lock A.
-However, waking all of the threads in this manner would be pointless because
-all except one of the threads would immediately block on lock A again.
-By contrast, a requeue operation wakes just one waiter and moves
-the other waiters to lock A,
-and when the woken waiter unlocks A then the next waiter can proceed.
-.\"
-.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
-.\"
-.TP
-.BR FUTEX_WAKE_OP " (since Linux 2.6.14)"
-.\" commit 4732efbeb997189d9f9b04708dc26bf8613ed721
-.\" Author: Jakub Jelinek <jakub@redhat.com>
-.\" Date: Tue Sep 6 15:16:25 2005 -0700
-.\" FIXME. (Torvald) The glibc condvar implementation is currently being
-.\" revised (e.g., to not use an internal lock anymore).
-.\" It is probably more future-proof to remove this paragraph.
-.\" [Torvald, do you have an update here?]
-This operation was added to support some user-space use cases
-where more than one futex must be handled at the same time.
-The most notable example is the implementation of
-.BR pthread_cond_signal (3),
-which requires operations on two futexes,
-the one used to implement the mutex and the one used in the implementation
-of the wait queue associated with the condition variable.
-.B FUTEX_WAKE_OP
-allows such cases to be implemented without leading to
-high rates of contention and context switching.
-.IP
-The
-.B FUTEX_WAKE_OP
-operation is equivalent to executing the following code atomically
-and totally ordered with respect to other futex operations on
-any of the two supplied futex words:
-.IP
-.in +4n
-.EX
-uint32_t oldval = *(uint32_t *) uaddr2;
-*(uint32_t *) uaddr2 = oldval \fIop\fP \fIoparg\fP;
-futex(uaddr, FUTEX_WAKE, val, 0, 0, 0);
-if (oldval \fIcmp\fP \fIcmparg\fP)
- futex(uaddr2, FUTEX_WAKE, val2, 0, 0, 0);
-.EE
-.in
-.IP
-In other words,
-.B FUTEX_WAKE_OP
-does the following:
-.RS
-.IP \[bu] 3
-saves the original value of the futex word at
-.I uaddr2
-and performs an operation to modify the value of the futex at
-.IR uaddr2 ;
-this is an atomic read-modify-write memory access (i.e., using atomic
-machine instructions of the respective architecture)
-.IP \[bu]
-wakes up a maximum of
-.I val
-waiters on the futex for the futex word at
-.IR uaddr ;
-and
-.IP \[bu]
-dependent on the results of a test of the original value of the
-futex word at
-.IR uaddr2 ,
-wakes up a maximum of
-.I val2
-waiters on the futex for the futex word at
-.IR uaddr2 .
-.RE
-.IP
-The operation and comparison that are to be performed are encoded
-in the bits of the argument
-.IR val3 .
-Pictorially, the encoding is:
-.IP
-.in +4n
-.EX
-+---+---+-----------+-----------+
-|op |cmp| oparg | cmparg |
-+---+---+-----------+-----------+
- 4 4 12 12 <== # of bits
-.EE
-.in
-.IP
-Expressed in code, the encoding is:
-.IP
-.in +4n
-.EX
-#define FUTEX_OP(op, oparg, cmp, cmparg) \e
- (((op & 0xf) << 28) | \e
- ((cmp & 0xf) << 24) | \e
- ((oparg & 0xfff) << 12) | \e
- (cmparg & 0xfff))
-.EE
-.in
-.IP
-In the above,
-.I op
-and
-.I cmp
-are each one of the codes listed below.
-The
-.I oparg
-and
-.I cmparg
-components are literal numeric values, except as noted below.
-.IP
-The
-.I op
-component has one of the following values:
-.IP
-.in +4n
-.EX
-FUTEX_OP_SET 0 /* uaddr2 = oparg; */
-FUTEX_OP_ADD 1 /* uaddr2 += oparg; */
-FUTEX_OP_OR 2 /* uaddr2 |= oparg; */
-FUTEX_OP_ANDN 3 /* uaddr2 &= \[ti]oparg; */
-FUTEX_OP_XOR 4 /* uaddr2 \[ha]= oparg; */
-.EE
-.in
-.IP
-In addition, bitwise ORing the following value into
-.I op
-causes
-.I (1\~<<\~oparg)
-to be used as the operand:
-.IP
-.in +4n
-.EX
-FUTEX_OP_ARG_SHIFT 8 /* Use (1 << oparg) as operand */
-.EE
-.in
-.IP
-The
-.I cmp
-field is one of the following:
-.IP
-.in +4n
-.EX
-FUTEX_OP_CMP_EQ 0 /* if (oldval == cmparg) wake */
-FUTEX_OP_CMP_NE 1 /* if (oldval != cmparg) wake */
-FUTEX_OP_CMP_LT 2 /* if (oldval < cmparg) wake */
-FUTEX_OP_CMP_LE 3 /* if (oldval <= cmparg) wake */
-FUTEX_OP_CMP_GT 4 /* if (oldval > cmparg) wake */
-FUTEX_OP_CMP_GE 5 /* if (oldval >= cmparg) wake */
-.EE
-.in
-.IP
-The return value of
-.B FUTEX_WAKE_OP
-is the sum of the number of waiters woken on the futex
-.I uaddr
-plus the number of waiters woken on the futex
-.IR uaddr2 .
-.\"
-.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
-.\"
-.TP
-.BR FUTEX_WAIT_BITSET " (since Linux 2.6.25)"
-.\" commit cd689985cf49f6ff5c8eddc48d98b9d581d9475d
-This operation is like
-.B FUTEX_WAIT
-except that
-.I val3
-is used to provide a 32-bit bit mask to the kernel.
-This bit mask, in which at least one bit must be set,
-is stored in the kernel-internal state of the waiter.
-See the description of
-.B FUTEX_WAKE_BITSET
-for further details.
-.IP
-If
-.I timeout
-is not NULL, the structure it points to specifies
-an absolute timeout for the wait operation.
-If
-.I timeout
-is NULL, the operation can block indefinitely.
-.IP
-The
-.I uaddr2
-argument is ignored.
-.\"
-.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
-.\"
-.TP
-.BR FUTEX_WAKE_BITSET " (since Linux 2.6.25)"
-.\" commit cd689985cf49f6ff5c8eddc48d98b9d581d9475d
-This operation is the same as
-.B FUTEX_WAKE
-except that the
-.I val3
-argument is used to provide a 32-bit bit mask to the kernel.
-This bit mask, in which at least one bit must be set,
-is used to select which waiters should be woken up.
-The selection is done by a bitwise AND of the "wake" bit mask
-(i.e., the value in
-.IR val3 )
-and the bit mask which is stored in the kernel-internal
-state of the waiter (the "wait" bit mask that is set using
-.BR FUTEX_WAIT_BITSET ).
-All of the waiters for which the result of the AND is nonzero are woken up;
-the remaining waiters are left sleeping.
-.IP
-The effect of
-.B FUTEX_WAIT_BITSET
-and
-.B FUTEX_WAKE_BITSET
-is to allow selective wake-ups among multiple waiters that are blocked
-on the same futex.
-However, note that, depending on the use case,
-employing this bit-mask multiplexing feature on a
-futex can be less efficient than simply using multiple futexes,
-because employing bit-mask multiplexing requires the kernel
-to check all waiters on a futex,
-including those that are not interested in being woken up
-(i.e., they do not have the relevant bit set in their "wait" bit mask).
-.\" According to http://locklessinc.com/articles/futex_cheat_sheet/:
-.\"
-.\" "The original reason for the addition of these extensions
-.\" was to improve the performance of pthread read-write locks
-.\" in glibc. However, the pthreads library no longer uses the
-.\" same locking algorithm, and these extensions are not used
-.\" without the bitset parameter being all ones.
-.\"
-.\" The page goes on to note that the FUTEX_WAIT_BITSET operation
-.\" is nevertheless used (with a bit mask of all ones) in order to
-.\" obtain the absolute timeout functionality that is useful
-.\" for efficiently implementing Pthreads APIs (which use absolute
-.\" timeouts); FUTEX_WAIT provides only relative timeouts.
-.IP
-The constant
-.BR FUTEX_BITSET_MATCH_ANY ,
-which corresponds to all 32 bits set in the bit mask, can be used as the
-.I val3
-argument for
-.B FUTEX_WAIT_BITSET
-and
-.BR FUTEX_WAKE_BITSET .
-Other than differences in the handling of the
-.I timeout
-argument, the
-.B FUTEX_WAIT
-operation is equivalent to
-.B FUTEX_WAIT_BITSET
-with
-.I val3
-specified as
-.BR FUTEX_BITSET_MATCH_ANY ;
-that is, allow a wake-up by any waker.
-The
-.B FUTEX_WAKE
-operation is equivalent to
-.B FUTEX_WAKE_BITSET
-with
-.I val3
-specified as
-.BR FUTEX_BITSET_MATCH_ANY ;
-that is, wake up any waiter(s).
-.IP
-The
-.I uaddr2
-and
-.I timeout
-arguments are ignored.
-.\"
-.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
-.\"
-.SS Priority-inheritance futexes
-Linux supports priority-inheritance (PI) futexes in order to handle
-priority-inversion problems that can be encountered with
-normal futex locks.
-Priority inversion is the problem that occurs when a high-priority
-task is blocked waiting to acquire a lock held by a low-priority task,
-while tasks at an intermediate priority continuously preempt
-the low-priority task from the CPU.
-Consequently, the low-priority task makes no progress toward
-releasing the lock, and the high-priority task remains blocked.
-.P
-Priority inheritance is a mechanism for dealing with
-the priority-inversion problem.
-With this mechanism, when a high-priority task becomes blocked
-by a lock held by a low-priority task,
-the priority of the low-priority task is temporarily raised
-to that of the high-priority task,
-so that it is not preempted by any intermediate level tasks,
-and can thus make progress toward releasing the lock.
-To be effective, priority inheritance must be transitive,
-meaning that if a high-priority task blocks on a lock
-held by a lower-priority task that is itself blocked by a lock
-held by another intermediate-priority task
-(and so on, for chains of arbitrary length),
-then both of those tasks
-(or more generally, all of the tasks in a lock chain)
-have their priorities raised to be the same as the high-priority task.
-.P
-From a user-space perspective,
-what makes a futex PI-aware is a policy agreement (described below)
-between user space and the kernel about the value of the futex word,
-coupled with the use of the PI-futex operations described below.
-(Unlike the other futex operations described above,
-the PI-futex operations are designed
-for the implementation of very specific IPC mechanisms.)
-.\"
-.\" Quoting Darren Hart:
-.\" These opcodes paired with the PI futex value policy (described below)
-.\" defines a "futex" as PI aware. These were created very specifically
-.\" in support of PI pthread_mutexes, so it makes a lot more sense to
-.\" talk about a PI aware pthread_mutex, than a PI aware futex, since
-.\" there is a lot of policy and scaffolding that has to be built up
-.\" around it to use it properly (this is what a PI pthread_mutex is).
-.P
-.\" mtk: The following text is drawn from the Hart/Guniguntala paper
-.\" (listed in SEE ALSO), but I have reworded some pieces
-.\" significantly.
-.\"
-The PI-futex operations described below differ from the other
-futex operations in that they impose policy on the use of the value of the
-futex word:
-.IP \[bu] 3
-If the lock is not acquired, the futex word's value shall be 0.
-.IP \[bu]
-If the lock is acquired, the futex word's value shall
-be the thread ID (TID;
-see
-.BR gettid (2))
-of the owning thread.
-.IP \[bu]
-If the lock is owned and there are threads contending for the lock,
-then the
-.B FUTEX_WAITERS
-bit shall be set in the futex word's value; in other words, this value is:
-.IP
-.in +4n
-.EX
-FUTEX_WAITERS | TID
-.EE
-.in
-.IP
-(Note that is invalid for a PI futex word to have no owner and
-.B FUTEX_WAITERS
-set.)
-.P
-With this policy in place,
-a user-space application can acquire an unacquired
-lock or release a lock using atomic instructions executed in user mode
-(e.g., a compare-and-swap operation such as
-.I cmpxchg
-on the x86 architecture).
-Acquiring a lock simply consists of using compare-and-swap to atomically
-set the futex word's value to the caller's TID if its previous value was 0.
-Releasing a lock requires using compare-and-swap to set the futex word's
-value to 0 if the previous value was the expected TID.
-.P
-If a futex is already acquired (i.e., has a nonzero value),
-waiters must employ the
-.B FUTEX_LOCK_PI
-or
-.B FUTEX_LOCK_PI2
-operations to acquire the lock.
-If other threads are waiting for the lock, then the
-.B FUTEX_WAITERS
-bit is set in the futex value;
-in this case, the lock owner must employ the
-.B FUTEX_UNLOCK_PI
-operation to release the lock.
-.P
-In the cases where callers are forced into the kernel
-(i.e., required to perform a
-.BR futex ()
-call),
-they then deal directly with a so-called RT-mutex,
-a kernel locking mechanism which implements the required
-priority-inheritance semantics.
-After the RT-mutex is acquired, the futex value is updated accordingly,
-before the calling thread returns to user space.
-.P
-It is important to note
-.\" tglx (July 2015):
-.\" If there are multiple waiters on a pi futex then a wake pi operation
-.\" will wake the first waiter and hand over the lock to this waiter. This
-.\" includes handing over the rtmutex which represents the futex in the
-.\" kernel. The strict requirement is that the futex owner and the rtmutex
-.\" owner must be the same, except for the update period which is
-.\" serialized by the futex internal locking. That means the kernel must
-.\" update the user-space value prior to returning to user space
-that the kernel will update the futex word's value prior
-to returning to user space.
-(This prevents the possibility of the futex word's value ending
-up in an invalid state, such as having an owner but the value being 0,
-or having waiters but not having the
-.B FUTEX_WAITERS
-bit set.)
-.P
-If a futex has an associated RT-mutex in the kernel
-(i.e., there are blocked waiters)
-and the owner of the futex/RT-mutex dies unexpectedly,
-then the kernel cleans up the RT-mutex and hands it over to the next waiter.
-This in turn requires that the user-space value is updated accordingly.
-To indicate that this is required, the kernel sets the
-.B FUTEX_OWNER_DIED
-bit in the futex word along with the thread ID of the new owner.
-User space can detect this situation via the presence of the
-.B FUTEX_OWNER_DIED
-bit and is then responsible for cleaning up the stale state left over by
-the dead owner.
-.\" tglx (July 2015):
-.\" The FUTEX_OWNER_DIED bit can also be set on uncontended futexes, where
-.\" the kernel has no state associated. This happens via the robust futex
-.\" mechanism. In that case the futex value will be set to
-.\" FUTEX_OWNER_DIED. The robust futex mechanism is also available for non
-.\" PI futexes.
-.P
-PI futexes are operated on by specifying one of the values listed below in
-.IR futex_op .
-Note that the PI futex operations must be used as paired operations
-and are subject to some additional requirements:
-.IP \[bu] 3
-.BR FUTEX_LOCK_PI ,
-.BR FUTEX_LOCK_PI2 ,
-and
-.B FUTEX_TRYLOCK_PI
-pair with
-.BR FUTEX_UNLOCK_PI .
-.B FUTEX_UNLOCK_PI
-must be called only on a futex owned by the calling thread,
-as defined by the value policy, otherwise the error
-.B EPERM
-results.
-.IP \[bu]
-.B FUTEX_WAIT_REQUEUE_PI
-pairs with
-.BR FUTEX_CMP_REQUEUE_PI .
-This must be performed from a non-PI futex to a distinct PI futex
-(or the error
-.B EINVAL
-results).
-Additionally,
-.I val
-(the number of waiters to be woken) must be 1
-(or the error
-.B EINVAL
-results).
-.P
-The PI futex operations are as follows:
-.\"
-.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
-.\"
-.TP
-.BR FUTEX_LOCK_PI " (since Linux 2.6.18)"
-.\" commit c87e2837be82df479a6bae9f155c43516d2feebc
-This operation is used after an attempt to acquire
-the lock via an atomic user-mode instruction failed
-because the futex word has a nonzero value\[em]specifically,
-because it contained the (PID-namespace-specific) TID of the lock owner.
-.IP
-The operation checks the value of the futex word at the address
-.IR uaddr .
-If the value is 0, then the kernel tries to atomically set
-the futex value to the caller's TID.
-If the futex word's value is nonzero,
-the kernel atomically sets the
-.B FUTEX_WAITERS
-bit, which signals the futex owner that it cannot unlock the futex in
-user space atomically by setting the futex value to 0.
-.\" tglx (July 2015):
-.\" The operation here is similar to the FUTEX_WAIT logic. When the user
-.\" space atomic acquire does not succeed because the futex value was non
-.\" zero, then the waiter goes into the kernel, takes the kernel internal
-.\" lock and retries the acquisition under the lock. If the acquisition
-.\" does not succeed either, then it sets the FUTEX_WAITERS bit, to signal
-.\" the lock owner that it needs to go into the kernel. Here is the pseudo
-.\" code:
-.\"
-.\" lock(kernel_lock);
-.\" retry:
-.\"
-.\" /*
-.\" * Owner might have unlocked in user space before we
-.\" * were able to set the waiter bit.
-.\" */
-.\" if (atomic_acquire(futex) == SUCCESS) {
-.\" unlock(kernel_lock());
-.\" return 0;
-.\" }
-.\"
-.\" /*
-.\" * Owner might have unlocked after the above atomic_acquire()
-.\" * attempt.
-.\" */
-.\" if (atomic_set_waiters_bit(futex) != SUCCESS)
-.\" goto retry;
-.\"
-.\" queue_waiter();
-.\" unlock(kernel_lock);
-.\" block();
-.\"
-After that, the kernel:
-.RS
-.IP (1) 5
-Tries to find the thread which is associated with the owner TID.
-.IP (2)
-Creates or reuses kernel state on behalf of the owner.
-(If this is the first waiter, there is no kernel state for this
-futex, so kernel state is created by locking the RT-mutex
-and the futex owner is made the owner of the RT-mutex.
-If there are existing waiters, then the existing state is reused.)
-.IP (3)
-Attaches the waiter to the futex
-(i.e., the waiter is enqueued on the RT-mutex waiter list).
-.RE
-.IP
-If more than one waiter exists,
-the enqueueing of the waiter is in descending priority order.
-(For information on priority ordering, see the discussion of the
-.BR SCHED_DEADLINE ,
-.BR SCHED_FIFO ,
-and
-.B SCHED_RR
-scheduling policies in
-.BR sched (7).)
-The owner inherits either the waiter's CPU bandwidth
-(if the waiter is scheduled under the
-.B SCHED_DEADLINE
-policy) or the waiter's priority (if the waiter is scheduled under the
-.B SCHED_RR
-or
-.B SCHED_FIFO
-policy).
-.\" August 2015:
-.\" mtk: If the realm is restricted purely to SCHED_OTHER (SCHED_NORMAL)
-.\" processes, does the nice value come into play also?
-.\"
-.\" tglx: No. SCHED_OTHER/NORMAL tasks are handled in FIFO order
-This inheritance follows the lock chain in the case of nested locking
-.\" (i.e., task 1 blocks on lock A, held by task 2,
-.\" while task 2 blocks on lock B, held by task 3)
-and performs deadlock detection.
-.IP
-The
-.I timeout
-argument provides a timeout for the lock attempt.
-If
-.I timeout
-is not NULL, the structure it points to specifies
-an absolute timeout, measured against the
-.B CLOCK_REALTIME
-clock.
-.\" 2016-07-07 response from Thomas Gleixner on LKML:
-.\" From: Thomas Gleixner <tglx@linutronix.de>
-.\" Date: 6 July 2016 at 20:57
-.\" Subject: Re: futex: Allow FUTEX_CLOCK_REALTIME with FUTEX_WAIT op
-.\"
-.\" On Thu, 23 Jun 2016, Michael Kerrisk (man-pages) wrote:
-.\" > On 06/23/2016 08:28 PM, Darren Hart wrote:
-.\" > > And as a follow-on, what is the reason for FUTEX_LOCK_PI only using
-.\" > > CLOCK_REALTIME? It seems reasonable to me that a user may want to wait a
-.\" > > specific amount of time, regardless of wall time.
-.\" >
-.\" > Yes, that's another weird inconsistency.
-.\"
-.\" The reason is that phtread_mutex_timedlock() uses absolute timeouts based on
-.\" CLOCK_REALTIME. glibc folks asked to make that the default behaviour back
-.\" then when we added LOCK_PI.
-If
-.I timeout
-is NULL, the operation will block indefinitely.
-.IP
-The
-.IR uaddr2 ,
-.IR val ,
-and
-.I val3
-arguments are ignored.
-.\"
-.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
-.\"
-.TP
-.BR FUTEX_LOCK_PI2 " (since Linux 5.14)"
-.\" commit bf22a6976897977b0a3f1aeba6823c959fc4fdae
-This operation is the same as
-.BR FUTEX_LOCK_PI ,
-except that the clock against which
-.I timeout
-is measured is selectable.
-By default, the (absolute) timeout specified in
-.I timeout
-is measured against the
-.B CLOCK_MONOTONIC
-clock, but if the
-.B FUTEX_CLOCK_REALTIME
-flag is specified in
-.IR futex_op ,
-then the timeout is measured against the
-.B CLOCK_REALTIME
-clock.
-.\"
-.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
-.\"
-.TP
-.BR FUTEX_TRYLOCK_PI " (since Linux 2.6.18)"
-.\" commit c87e2837be82df479a6bae9f155c43516d2feebc
-This operation tries to acquire the lock at
-.IR uaddr .
-It is invoked when a user-space atomic acquire did not
-succeed because the futex word was not 0.
-.IP
-Because the kernel has access to more state information than user space,
-acquisition of the lock might succeed if performed by the
-kernel in cases where the futex word
-(i.e., the state information accessible to use-space) contains stale state
-.RB ( FUTEX_WAITERS
-and/or
-.BR FUTEX_OWNER_DIED ).
-This can happen when the owner of the futex died.
-User space cannot handle this condition in a race-free manner,
-but the kernel can fix this up and acquire the futex.
-.\" Paraphrasing a f2f conversation with Thomas Gleixner about the
-.\" above point (Aug 2015): ###
-.\" There is a rare possibility of a race condition involving an
-.\" uncontended futex with no owner, but with waiters. The
-.\" kernel-user-space contract is that if a futex is nonzero, you must
-.\" go into kernel. The futex was owned by a task, and that task dies
-.\" but there are no waiters, so the futex value is non zero.
-.\" Therefore, the next locker has to go into the kernel,
-.\" so that the kernel has a chance to clean up. (CMXCH on zero
-.\" in user space would fail, so kernel has to clean up.)
-.\" Darren Hart (Oct 2015):
-.\" The trylock in the kernel has more state, so it can independently
-.\" verify the flags that user space must trust implicitly.
-.IP
-The
-.IR uaddr2 ,
-.IR val ,
-.IR timeout ,
-and
-.I val3
-arguments are ignored.
-.\"
-.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
-.\"
-.TP
-.BR FUTEX_UNLOCK_PI " (since Linux 2.6.18)"
-.\" commit c87e2837be82df479a6bae9f155c43516d2feebc
-This operation wakes the top priority waiter that is waiting in
-.B FUTEX_LOCK_PI
-or
-.B FUTEX_LOCK_PI2
-on the futex address provided by the
-.I uaddr
-argument.
-.IP
-This is called when the user-space value at
-.I uaddr
-cannot be changed atomically from a TID (of the owner) to 0.
-.IP
-The
-.IR uaddr2 ,
-.IR val ,
-.IR timeout ,
-and
-.I val3
-arguments are ignored.
-.\"
-.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
-.\"
-.TP
-.BR FUTEX_CMP_REQUEUE_PI " (since Linux 2.6.31)"
-.\" commit 52400ba946759af28442dee6265c5c0180ac7122
-This operation is a PI-aware variant of
-.BR FUTEX_CMP_REQUEUE .
-It requeues waiters that are blocked via
-.B FUTEX_WAIT_REQUEUE_PI
-on
-.I uaddr
-from a non-PI source futex
-.RI ( uaddr )
-to a PI target futex
-.RI ( uaddr2 ).
-.IP
-As with
-.BR FUTEX_CMP_REQUEUE ,
-this operation wakes up a maximum of
-.I val
-waiters that are waiting on the futex at
-.IR uaddr .
-However, for
-.BR FUTEX_CMP_REQUEUE_PI ,
-.I val
-is required to be 1
-(since the main point is to avoid a thundering herd).
-The remaining waiters are removed from the wait queue of the source futex at
-.I uaddr
-and added to the wait queue of the target futex at
-.IR uaddr2 .
-.IP
-The
-.I val2
-.\" val2 is the cap on the number of requeued waiters.
-.\" In the glibc pthread_cond_broadcast() implementation, this argument
-.\" is specified as INT_MAX, and for pthread_cond_signal() it is 0.
-and
-.I val3
-arguments serve the same purposes as for
-.BR FUTEX_CMP_REQUEUE .
-.\"
-.\" The page at http://locklessinc.com/articles/futex_cheat_sheet/
-.\" notes that "priority-inheritance Futex to priority-inheritance
-.\" Futex requeues are currently unsupported". However, probably
-.\" the page does not need to say nothing about this, since
-.\" Thomas Gleixner commented (July 2015): "they never will be
-.\" supported because they make no sense at all"
-.\"
-.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
-.\"
-.TP
-.BR FUTEX_WAIT_REQUEUE_PI " (since Linux 2.6.31)"
-.\" commit 52400ba946759af28442dee6265c5c0180ac7122
-.\"
-Wait on a non-PI futex at
-.I uaddr
-and potentially be requeued (via a
-.B FUTEX_CMP_REQUEUE_PI
-operation in another task) onto a PI futex at
-.IR uaddr2 .
-The wait operation on
-.I uaddr
-is the same as for
-.BR FUTEX_WAIT .
-.IP
-The waiter can be removed from the wait on
-.I uaddr
-without requeueing on
-.I uaddr2
-via a
-.B FUTEX_WAKE
-operation in another task.
-In this case, the
-.B FUTEX_WAIT_REQUEUE_PI
-operation fails with the error
-.BR EAGAIN .
-.IP
-If
-.I timeout
-is not NULL, the structure it points to specifies
-an absolute timeout for the wait operation.
-If
-.I timeout
-is NULL, the operation can block indefinitely.
-.IP
-The
-.I val3
-argument is ignored.
-.IP
-The
-.B FUTEX_WAIT_REQUEUE_PI
-and
-.B FUTEX_CMP_REQUEUE_PI
-were added to support a fairly specific use case:
-support for priority-inheritance-aware POSIX threads condition variables.
-The idea is that these operations should always be paired,
-in order to ensure that user space and the kernel remain in sync.
-Thus, in the
-.B FUTEX_WAIT_REQUEUE_PI
-operation, the user-space application pre-specifies the target
-of the requeue that takes place in the
-.B FUTEX_CMP_REQUEUE_PI
-operation.
-.\"
-.\" Darren Hart notes that a patch to allow glibc to fully support
-.\" PI-aware pthreads condition variables has not yet been accepted into
-.\" glibc. The story is complex, and can be found at
-.\" https://sourceware.org/bugzilla/show_bug.cgi?id=11588
-.\" Darren notes that in the meantime, the patch is shipped with various
-.\" PREEMPT_RT-enabled Linux systems.
-.\"
-.\" Related to the preceding, Darren proposed that somewhere, man-pages
-.\" should document the following point:
-.\"
-.\" While the Linux kernel, since Linux 2.6.31, supports requeueing of
-.\" priority-inheritance (PI) aware mutexes via the
-.\" FUTEX_WAIT_REQUEUE_PI and FUTEX_CMP_REQUEUE_PI futex operations,
-.\" the glibc implementation does not yet take full advantage of this.
-.\" Specifically, the condvar internal data lock remains a non-PI aware
-.\" mutex, regardless of the type of the pthread_mutex associated with
-.\" the condvar. This can lead to an unbounded priority inversion on
-.\" the internal data lock even when associating a PI aware
-.\" pthread_mutex with a condvar during a pthread_cond*_wait
-.\" operation. For this reason, it is not recommended to rely on
-.\" priority inheritance when using pthread condition variables.
-.\"
-.\" The problem is that the obvious location for this text is
-.\" the pthread_cond*wait(3) man page. However, such a man page
-.\" does not currently exist.
-.\"
-.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
-.\"
-.SH RETURN VALUE
-In the event of an error (and assuming that
-.BR futex ()
-was invoked via
-.BR syscall (2)),
-all operations return \-1 and set
-.I errno
-to indicate the error.
-.P
-The return value on success depends on the operation,
-as described in the following list:
-.TP
-.B FUTEX_WAIT
-Returns 0 if the caller was woken up.
-Note that a wake-up can also be caused by common futex usage patterns
-in unrelated code that happened to have previously used the futex word's
-memory location (e.g., typical futex-based implementations of
-Pthreads mutexes can cause this under some conditions).
-Therefore, callers should always conservatively assume that a return
-value of 0 can mean a spurious wake-up, and use the futex word's value
-(i.e., the user-space synchronization scheme)
-to decide whether to continue to block or not.
-.TP
-.B FUTEX_WAKE
-Returns the number of waiters that were woken up.
-.TP
-.B FUTEX_FD
-Returns the new file descriptor associated with the futex.
-.TP
-.B FUTEX_REQUEUE
-Returns the number of waiters that were woken up.
-.TP
-.B FUTEX_CMP_REQUEUE
-Returns the total number of waiters that were woken up or
-requeued to the futex for the futex word at
-.IR uaddr2 .
-If this value is greater than
-.IR val ,
-then the difference is the number of waiters requeued to the futex for the
-futex word at
-.IR uaddr2 .
-.TP
-.B FUTEX_WAKE_OP
-Returns the total number of waiters that were woken up.
-This is the sum of the woken waiters on the two futexes for
-the futex words at
-.I uaddr
-and
-.IR uaddr2 .
-.TP
-.B FUTEX_WAIT_BITSET
-Returns 0 if the caller was woken up.
-See
-.B FUTEX_WAIT
-for how to interpret this correctly in practice.
-.TP
-.B FUTEX_WAKE_BITSET
-Returns the number of waiters that were woken up.
-.TP
-.B FUTEX_LOCK_PI
-Returns 0 if the futex was successfully locked.
-.TP
-.B FUTEX_LOCK_PI2
-Returns 0 if the futex was successfully locked.
-.TP
-.B FUTEX_TRYLOCK_PI
-Returns 0 if the futex was successfully locked.
-.TP
-.B FUTEX_UNLOCK_PI
-Returns 0 if the futex was successfully unlocked.
-.TP
-.B FUTEX_CMP_REQUEUE_PI
-Returns the total number of waiters that were woken up or
-requeued to the futex for the futex word at
-.IR uaddr2 .
-If this value is greater than
-.IR val ,
-then difference is the number of waiters requeued to the futex for
-the futex word at
-.IR uaddr2 .
-.TP
-.B FUTEX_WAIT_REQUEUE_PI
-Returns 0 if the caller was successfully requeued to the futex for
-the futex word at
-.IR uaddr2 .
-.\"
-.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
-.\"
-.SH ERRORS
-.TP
-.B EACCES
-No read access to the memory of a futex word.
-.TP
-.B EAGAIN
-.RB ( FUTEX_WAIT ,
-.BR FUTEX_WAIT_BITSET ,
-.BR FUTEX_WAIT_REQUEUE_PI )
-The value pointed to by
-.I uaddr
-was not equal to the expected value
-.I val
-at the time of the call.
-.IP
-.BR Note :
-on Linux, the symbolic names
-.B EAGAIN
-and
-.B EWOULDBLOCK
-(both of which appear in different parts of the kernel futex code)
-have the same value.
-.TP
-.B EAGAIN
-.RB ( FUTEX_CMP_REQUEUE ,
-.BR FUTEX_CMP_REQUEUE_PI )
-The value pointed to by
-.I uaddr
-is not equal to the expected value
-.IR val3 .
-.TP
-.B EAGAIN
-.RB ( FUTEX_LOCK_PI ,
-.BR FUTEX_LOCK_PI2 ,
-.BR FUTEX_TRYLOCK_PI ,
-.BR FUTEX_CMP_REQUEUE_PI )
-The futex owner thread ID of
-.I uaddr
-(for
-.BR FUTEX_CMP_REQUEUE_PI :
-.IR uaddr2 )
-is about to exit,
-but has not yet handled the internal state cleanup.
-Try again.
-.TP
-.B EDEADLK
-.RB ( FUTEX_LOCK_PI ,
-.BR FUTEX_LOCK_PI2 ,
-.BR FUTEX_TRYLOCK_PI ,
-.BR FUTEX_CMP_REQUEUE_PI )
-The futex word at
-.I uaddr
-is already locked by the caller.
-.TP
-.B EDEADLK
-.\" FIXME . I see that kernel/locking/rtmutex.c uses EDEADLK in some
-.\" places, and EDEADLOCK in others. On almost all architectures
-.\" these constants are synonymous. Is there a reason that both
-.\" names are used?
-.\"
-.\" tglx (July 2015): "No. We should probably fix that."
-.\"
-.RB ( FUTEX_CMP_REQUEUE_PI )
-While requeueing a waiter to the PI futex for the futex word at
-.IR uaddr2 ,
-the kernel detected a deadlock.
-.TP
-.B EFAULT
-A required pointer argument (i.e.,
-.IR uaddr ,
-.IR uaddr2 ,
-or
-.IR timeout )
-did not point to a valid user-space address.
-.TP
-.B EINTR
-A
-.B FUTEX_WAIT
-or
-.B FUTEX_WAIT_BITSET
-operation was interrupted by a signal (see
-.BR signal (7)).
-Before Linux 2.6.22, this error could also be returned for
-a spurious wakeup; since Linux 2.6.22, this no longer happens.
-.TP
-.B EINVAL
-The operation in
-.I futex_op
-is one of those that employs a timeout, but the supplied
-.I timeout
-argument was invalid
-.RI ( tv_sec
-was less than zero, or
-.I tv_nsec
-was not less than 1,000,000,000).
-.TP
-.B EINVAL
-The operation specified in
-.I futex_op
-employs one or both of the pointers
-.I uaddr
-and
-.IR uaddr2 ,
-but one of these does not point to a valid object\[em]that is,
-the address is not four-byte-aligned.
-.TP
-.B EINVAL
-.RB ( FUTEX_WAIT_BITSET ,
-.BR FUTEX_WAKE_BITSET )
-The bit mask supplied in
-.I val3
-is zero.
-.TP
-.B EINVAL
-.RB ( FUTEX_CMP_REQUEUE_PI )
-.I uaddr
-equals
-.I uaddr2
-(i.e., an attempt was made to requeue to the same futex).
-.TP
-.B EINVAL
-.RB ( FUTEX_FD )
-The signal number supplied in
-.I val
-is invalid.
-.TP
-.B EINVAL
-.RB ( FUTEX_WAKE ,
-.BR FUTEX_WAKE_OP ,
-.BR FUTEX_WAKE_BITSET ,
-.BR FUTEX_REQUEUE ,
-.BR FUTEX_CMP_REQUEUE )
-The kernel detected an inconsistency between the user-space state at
-.I uaddr
-and the kernel state\[em]that is, it detected a waiter which waits in
-.B FUTEX_LOCK_PI
-or
-.B FUTEX_LOCK_PI2
-on
-.IR uaddr .
-.TP
-.B EINVAL
-.RB ( FUTEX_LOCK_PI ,
-.BR FUTEX_LOCK_PI2 ,
-.BR FUTEX_TRYLOCK_PI ,
-.BR FUTEX_UNLOCK_PI )
-The kernel detected an inconsistency between the user-space state at
-.I uaddr
-and the kernel state.
-This indicates either state corruption
-or that the kernel found a waiter on
-.I uaddr
-which is waiting via
-.B FUTEX_WAIT
-or
-.BR FUTEX_WAIT_BITSET .
-.TP
-.B EINVAL
-.RB ( FUTEX_CMP_REQUEUE_PI )
-The kernel detected an inconsistency between the user-space state at
-.I uaddr2
-and the kernel state;
-.\" From a conversation with Thomas Gleixner (Aug 2015): ###
-.\" The kernel sees: I have non PI state for a futex you tried to
-.\" tell me was PI
-that is, the kernel detected a waiter which waits via
-.B FUTEX_WAIT
-or
-.B FUTEX_WAIT_BITSET
-on
-.IR uaddr2 .
-.TP
-.B EINVAL
-.RB ( FUTEX_CMP_REQUEUE_PI )
-The kernel detected an inconsistency between the user-space state at
-.I uaddr
-and the kernel state;
-that is, the kernel detected a waiter which waits via
-.B FUTEX_WAIT
-or
-.B FUTEX_WAIT_BITSET
-on
-.IR uaddr .
-.TP
-.B EINVAL
-.RB ( FUTEX_CMP_REQUEUE_PI )
-The kernel detected an inconsistency between the user-space state at
-.I uaddr
-and the kernel state;
-that is, the kernel detected a waiter which waits on
-.I uaddr
-via
-.B FUTEX_LOCK_PI
-or
-.B FUTEX_LOCK_PI2
-(instead of
-.BR FUTEX_WAIT_REQUEUE_PI ).
-.TP
-.B EINVAL
-.RB ( FUTEX_CMP_REQUEUE_PI )
-.\" This deals with the case:
-.\" wait_requeue_pi(A, B);
-.\" requeue_pi(A, C);
-An attempt was made to requeue a waiter to a futex other than that
-specified by the matching
-.B FUTEX_WAIT_REQUEUE_PI
-call for that waiter.
-.TP
-.B EINVAL
-.RB ( FUTEX_CMP_REQUEUE_PI )
-The
-.I val
-argument is not 1.
-.TP
-.B EINVAL
-Invalid argument.
-.TP
-.B ENFILE
-.RB ( FUTEX_FD )
-The system-wide limit on the total number of open files has been reached.
-.TP
-.B ENOMEM
-.RB ( FUTEX_LOCK_PI ,
-.BR FUTEX_LOCK_PI2 ,
-.BR FUTEX_TRYLOCK_PI ,
-.BR FUTEX_CMP_REQUEUE_PI )
-The kernel could not allocate memory to hold state information.
-.TP
-.B ENOSYS
-Invalid operation specified in
-.IR futex_op .
-.TP
-.B ENOSYS
-The
-.B FUTEX_CLOCK_REALTIME
-option was specified in
-.IR futex_op ,
-but the accompanying operation was neither
-.BR FUTEX_WAIT ,
-.BR FUTEX_WAIT_BITSET ,
-.BR FUTEX_WAIT_REQUEUE_PI ,
-nor
-.BR FUTEX_LOCK_PI2 .
-.TP
-.B ENOSYS
-.RB ( FUTEX_LOCK_PI ,
-.BR FUTEX_LOCK_PI2 ,
-.BR FUTEX_TRYLOCK_PI ,
-.BR FUTEX_UNLOCK_PI ,
-.BR FUTEX_CMP_REQUEUE_PI ,
-.BR FUTEX_WAIT_REQUEUE_PI )
-A run-time check determined that the operation is not available.
-The PI-futex operations are not implemented on all architectures and
-are not supported on some CPU variants.
-.TP
-.B EPERM
-.RB ( FUTEX_LOCK_PI ,
-.BR FUTEX_LOCK_PI2 ,
-.BR FUTEX_TRYLOCK_PI ,
-.BR FUTEX_CMP_REQUEUE_PI )
-The caller is not allowed to attach itself to the futex at
-.I uaddr
-(for
-.BR FUTEX_CMP_REQUEUE_PI :
-the futex at
-.IR uaddr2 ).
-(This may be caused by a state corruption in user space.)
-.TP
-.B EPERM
-.RB ( FUTEX_UNLOCK_PI )
-The caller does not own the lock represented by the futex word.
-.TP
-.B ESRCH
-.RB ( FUTEX_LOCK_PI ,
-.BR FUTEX_LOCK_PI2 ,
-.BR FUTEX_TRYLOCK_PI ,
-.BR FUTEX_CMP_REQUEUE_PI )
-The thread ID in the futex word at
-.I uaddr
-does not exist.
-.TP
-.B ESRCH
-.RB ( FUTEX_CMP_REQUEUE_PI )
-The thread ID in the futex word at
-.I uaddr2
-does not exist.
-.TP
-.B ETIMEDOUT
-The operation in
-.I futex_op
-employed the timeout specified in
-.IR timeout ,
-and the timeout expired before the operation completed.
-.\"
-.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
-.\"
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 2.6.0.
-.P
-Initial futex support was merged in Linux 2.5.7 but with different
-semantics from what was described above.
-A four-argument system call with the semantics
-described in this page was introduced in Linux 2.5.40.
-A fifth argument was added in Linux 2.5.70,
-and a sixth argument was added in Linux 2.6.7.
-.SH EXAMPLES
-The program below demonstrates use of futexes in a program where a parent
-process and a child process use a pair of futexes located inside a
-shared anonymous mapping to synchronize access to a shared resource:
-the terminal.
-The two processes each write
-.I nloops
-(a command-line argument that defaults to 5 if omitted)
-messages to the terminal and employ a synchronization protocol
-that ensures that they alternate in writing messages.
-Upon running this program we see output such as the following:
-.P
-.in +4n
-.EX
-$ \fB./futex_demo\fP
-Parent (18534) 0
-Child (18535) 0
-Parent (18534) 1
-Child (18535) 1
-Parent (18534) 2
-Child (18535) 2
-Parent (18534) 3
-Child (18535) 3
-Parent (18534) 4
-Child (18535) 4
-.EE
-.in
-.SS Program source
-\&
-.\" SRC BEGIN (futex.c)
-.EX
-/* futex_demo.c
-\&
- Usage: futex_demo [nloops]
- (Default: 5)
-\&
- Demonstrate the use of futexes in a program where parent and child
- use a pair of futexes located inside a shared anonymous mapping to
- synchronize access to a shared resource: the terminal. The two
- processes each write \[aq]num\-loops\[aq] messages to the terminal and employ
- a synchronization protocol that ensures that they alternate in
- writing messages.
-*/
-#define _GNU_SOURCE
-#include <err.h>
-#include <errno.h>
-#include <linux/futex.h>
-#include <stdatomic.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/mman.h>
-#include <sys/syscall.h>
-#include <sys/time.h>
-#include <sys/wait.h>
-#include <unistd.h>
-\&
-static uint32_t *futex1, *futex2, *iaddr;
-\&
-static int
-futex(uint32_t *uaddr, int futex_op, uint32_t val,
- const struct timespec *timeout, uint32_t *uaddr2, uint32_t val3)
-{
- return syscall(SYS_futex, uaddr, futex_op, val,
- timeout, uaddr2, val3);
-}
-\&
-/* Acquire the futex pointed to by \[aq]futexp\[aq]: wait for its value to
- become 1, and then set the value to 0. */
-\&
-static void
-fwait(uint32_t *futexp)
-{
- long s;
- const uint32_t one = 1;
-\&
- /* atomic_compare_exchange_strong(ptr, oldval, newval)
- atomically performs the equivalent of:
-\&
- if (*ptr == *oldval)
- *ptr = newval;
-\&
- It returns true if the test yielded true and *ptr was updated. */
-\&
- while (1) {
-\&
- /* Is the futex available? */
- if (atomic_compare_exchange_strong(futexp, &one, 0))
- break; /* Yes */
-\&
- /* Futex is not available; wait. */
-\&
- s = futex(futexp, FUTEX_WAIT, 0, NULL, NULL, 0);
- if (s == \-1 && errno != EAGAIN)
- err(EXIT_FAILURE, "futex\-FUTEX_WAIT");
- }
-}
-\&
-/* Release the futex pointed to by \[aq]futexp\[aq]: if the futex currently
- has the value 0, set its value to 1 and then wake any futex waiters,
- so that if the peer is blocked in fwait(), it can proceed. */
-\&
-static void
-fpost(uint32_t *futexp)
-{
- long s;
- const uint32_t zero = 0;
-\&
- /* atomic_compare_exchange_strong() was described
- in comments above. */
-\&
- if (atomic_compare_exchange_strong(futexp, &zero, 1)) {
- s = futex(futexp, FUTEX_WAKE, 1, NULL, NULL, 0);
- if (s == \-1)
- err(EXIT_FAILURE, "futex\-FUTEX_WAKE");
- }
-}
-\&
-int
-main(int argc, char *argv[])
-{
- pid_t childPid;
- unsigned int nloops;
-\&
- setbuf(stdout, NULL);
-\&
- nloops = (argc > 1) ? atoi(argv[1]) : 5;
-\&
- /* Create a shared anonymous mapping that will hold the futexes.
- Since the futexes are being shared between processes, we
- subsequently use the "shared" futex operations (i.e., not the
- ones suffixed "_PRIVATE"). */
-\&
- iaddr = mmap(NULL, sizeof(*iaddr) * 2, PROT_READ | PROT_WRITE,
- MAP_ANONYMOUS | MAP_SHARED, \-1, 0);
- if (iaddr == MAP_FAILED)
- err(EXIT_FAILURE, "mmap");
-\&
- futex1 = &iaddr[0];
- futex2 = &iaddr[1];
-\&
- *futex1 = 0; /* State: unavailable */
- *futex2 = 1; /* State: available */
-\&
- /* Create a child process that inherits the shared anonymous
- mapping. */
-\&
- childPid = fork();
- if (childPid == \-1)
- err(EXIT_FAILURE, "fork");
-\&
- if (childPid == 0) { /* Child */
- for (unsigned int j = 0; j < nloops; j++) {
- fwait(futex1);
- printf("Child (%jd) %u\en", (intmax_t) getpid(), j);
- fpost(futex2);
- }
-\&
- exit(EXIT_SUCCESS);
- }
-\&
- /* Parent falls through to here. */
-\&
- for (unsigned int j = 0; j < nloops; j++) {
- fwait(futex2);
- printf("Parent (%jd) %u\en", (intmax_t) getpid(), j);
- fpost(futex1);
- }
-\&
- wait(NULL);
-\&
- exit(EXIT_SUCCESS);
-}
-.EE
-.\" SRC END
-.SH SEE ALSO
-.ad l
-.BR get_robust_list (2),
-.BR restart_syscall (2),
-.BR pthread_mutexattr_getprotocol (3),
-.BR futex (7),
-.BR sched (7)
-.P
-The following kernel source files:
-.IP \[bu] 3
-.I Documentation/pi\-futex.txt
-.IP \[bu]
-.I Documentation/futex\-requeue\-pi.txt
-.IP \[bu]
-.I Documentation/locking/rt\-mutex.txt
-.IP \[bu]
-.I Documentation/locking/rt\-mutex\-design.txt
-.IP \[bu]
-.I Documentation/robust\-futex\-ABI.txt
-.P
-Franke, H., Russell, R., and Kirwood, M., 2002.
-\fIFuss, Futexes and Furwocks: Fast Userlevel Locking in Linux\fP
-(from proceedings of the Ottawa Linux Symposium 2002),
-.br
-.UR http://kernel.org\:/doc\:/ols\:/2002\:/ols2002\-pages\-479\-495.pdf
-.UE
-.P
-Hart, D., 2009. \fIA futex overview and update\fP,
-.UR http://lwn.net/Articles/360699/
-.UE
-.P
-Hart, D.\& and Guniguntala, D., 2009.
-\fIRequeue-PI: Making glibc Condvars PI-Aware\fP
-(from proceedings of the 2009 Real-Time Linux Workshop),
-.UR http://lwn.net/images/conf/rtlws11/papers/proc/p10.pdf
-.UE
-.P
-Drepper, U., 2011. \fIFutexes Are Tricky\fP,
-.UR http://www.akkadia.org/drepper/futex.pdf
-.UE
-.P
-Futex example library, futex\-*.tar.bz2 at
-.br
-.UR https://mirrors.kernel.org\:/pub\:/linux\:/kernel\:/people\:/rusty/
-.UE
-.\"
-.\" FIXME(Torvald) We should probably refer to the glibc code here, in
-.\" particular the glibc-internal futex wrapper functions that are
-.\" WIP, and the generic pthread_mutex_t and perhaps condvar
-.\" implementations.
diff --git a/man2/futimesat.2 b/man2/futimesat.2
deleted file mode 100644
index 78b5431ed..000000000
--- a/man2/futimesat.2
+++ /dev/null
@@ -1,128 +0,0 @@
-.\" This manpage is Copyright (C) 2006, Michael Kerrisk
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.TH futimesat 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-futimesat \- change timestamps of a file relative to a \
-directory file descriptor
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#include <fcntl.h>" " /* Definition of " AT_* " constants */"
-.B #include <sys/time.h>
-.P
-.BI "[[deprecated]] int futimesat(int " dirfd ", const char *" pathname ,
-.BI " const struct timeval " times [2]);
-.fi
-.P
-.RS -4
-Feature Test Macro Requirements for glibc (see
-.BR feature_test_macros (7)):
-.RE
-.P
-.BR futimesat ():
-.nf
- _GNU_SOURCE
-.fi
-.SH DESCRIPTION
-This system call is obsolete.
-Use
-.BR utimensat (2)
-instead.
-.P
-The
-.BR futimesat ()
-system call operates in exactly the same way as
-.BR utimes (2),
-except for the differences described in this manual page.
-.P
-If the pathname given in
-.I pathname
-is relative, then it is interpreted relative to the directory
-referred to by the file descriptor
-.I dirfd
-(rather than relative to the current working directory of
-the calling process, as is done by
-.BR utimes (2)
-for a relative pathname).
-.P
-If
-.I pathname
-is relative and
-.I dirfd
-is the special value
-.BR AT_FDCWD ,
-then
-.I pathname
-is interpreted relative to the current working
-directory of the calling process (like
-.BR utimes (2)).
-.P
-If
-.I pathname
-is absolute, then
-.I dirfd
-is ignored.
-(See
-.BR openat (2)
-for an explanation of why the
-.I dirfd
-argument is useful.)
-.SH RETURN VALUE
-On success,
-.BR futimesat ()
-returns a 0.
-On error, \-1 is returned and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-The same errors that occur for
-.BR utimes (2)
-can also occur for
-.BR futimesat ().
-The following additional errors can occur for
-.BR futimesat ():
-.TP
-.B EBADF
-.I pathname
-is relative but
-.I dirfd
-is neither
-.B AT_FDCWD
-nor a valid file descriptor.
-.TP
-.B ENOTDIR
-.I pathname
-is relative and
-.I dirfd
-is a file descriptor referring to a file other than a directory.
-.SH VERSIONS
-.SS glibc
-If
-.I pathname
-is NULL, then the glibc
-.BR futimesat ()
-wrapper function updates the times for the file referred to by
-.IR dirfd .
-.\" The Solaris futimesat() also has this strangeness.
-.SH STANDARDS
-None.
-.SH HISTORY
-Linux 2.6.16,
-glibc 2.4.
-.P
-It was implemented from a specification that was proposed for POSIX.1,
-but that specification was replaced by the one for
-.BR utimensat (2).
-.P
-A similar system call exists on Solaris.
-.SH NOTES
-.SH SEE ALSO
-.BR stat (2),
-.BR utimensat (2),
-.BR utimes (2),
-.BR futimes (3),
-.BR path_resolution (7)
diff --git a/man2/get_kernel_syms.2 b/man2/get_kernel_syms.2
deleted file mode 100644
index f032705e7..000000000
--- a/man2/get_kernel_syms.2
+++ /dev/null
@@ -1,88 +0,0 @@
-.\" Copyright (C) 1996 Free Software Foundation, Inc.
-.\"
-.\" SPDX-License-Identifier: GPL-1.0-or-later
-.\"
-.\" 2006-02-09, some reformatting by Luc Van Oostenryck; some
-.\" reformatting and rewordings by mtk
-.\"
-.TH get_kernel_syms 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-get_kernel_syms \- retrieve exported kernel and module symbols
-.SH SYNOPSIS
-.nf
-.B #include <linux/module.h>
-.P
-.BI "[[deprecated]] int get_kernel_syms(struct kernel_sym *" table );
-.fi
-.SH DESCRIPTION
-.BR Note :
-This system call is present only before Linux 2.6.
-.P
-If
-.I table
-is NULL,
-.BR get_kernel_syms ()
-returns the number of symbols available for query.
-Otherwise, it fills in a table of structures:
-.P
-.in +4n
-.EX
-struct kernel_sym {
- unsigned long value;
- char name[60];
-};
-.EE
-.in
-.P
-The symbols are interspersed with magic symbols of the form
-.BI # module-name
-with the kernel having an empty name.
-The value associated with a symbol of this form is the address at
-which the module is loaded.
-.P
-The symbols exported from each module follow their magic module tag
-and the modules are returned in the reverse of the
-order in which they were loaded.
-.SH RETURN VALUE
-On success, returns the number of symbols copied to
-.IR table .
-On error, \-1 is returned and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-There is only one possible error return:
-.TP
-.B ENOSYS
-.BR get_kernel_syms ()
-is not supported in this version of the kernel.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Removed in Linux 2.6.
-.\" Removed in Linux 2.5.48
-.P
-This obsolete system call is not supported by glibc.
-No declaration is provided in glibc headers, but, through a quirk of history,
-glibc versions before glibc 2.23 did export an ABI for this system call.
-Therefore, in order to employ this system call,
-it was sufficient to manually declare the interface in your code;
-alternatively, you could invoke the system call using
-.BR syscall (2).
-.SH BUGS
-There is no way to indicate the size of the buffer allocated for
-.IR table .
-If symbols have been added to the kernel since the
-program queried for the symbol table size, memory will be corrupted.
-.P
-The length of exported symbol names is limited to 59 characters.
-.P
-Because of these limitations, this system call is deprecated in
-favor of
-.BR query_module (2)
-(which is itself nowadays deprecated
-in favor of other interfaces described on its manual page).
-.SH SEE ALSO
-.BR create_module (2),
-.BR delete_module (2),
-.BR init_module (2),
-.BR query_module (2)
diff --git a/man2/get_mempolicy.2 b/man2/get_mempolicy.2
deleted file mode 100644
index f6f80ab3e..000000000
--- a/man2/get_mempolicy.2
+++ /dev/null
@@ -1,239 +0,0 @@
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft-var
-.\"
-.\" Copyright 2003,2004 Andi Kleen, SuSE Labs.
-.\" and Copyright 2007 Lee Schermerhorn, Hewlett Packard
-.\"
-.\" 2006-02-03, mtk, substantial wording changes and other improvements
-.\" 2007-08-27, Lee Schermerhorn <Lee.Schermerhorn@hp.com>
-.\" more precise specification of behavior.
-.\"
-.TH get_mempolicy 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-get_mempolicy \- retrieve NUMA memory policy for a thread
-.SH LIBRARY
-NUMA (Non-Uniform Memory Access) policy library
-.RI ( libnuma ", " \-lnuma )
-.SH SYNOPSIS
-.B "#include <numaif.h>"
-.nf
-.P
-.BI "long get_mempolicy(int *" mode ,
-.BI " unsigned long " nodemask [(. maxnode " + ULONG_WIDTH - 1)"
-.B " / ULONG_WIDTH],"
-.BI " unsigned long " maxnode ", void *" addr ,
-.BI " unsigned long " flags );
-.fi
-.SH DESCRIPTION
-.BR get_mempolicy ()
-retrieves the NUMA policy of the calling thread or of a memory address,
-depending on the setting of
-.IR flags .
-.P
-A NUMA machine has different
-memory controllers with different distances to specific CPUs.
-The memory policy defines from which node memory is allocated for
-the thread.
-.P
-If
-.I flags
-is specified as 0,
-then information about the calling thread's default policy
-(as set by
-.BR set_mempolicy (2))
-is returned, in the buffers pointed to by
-.I mode
-and
-.IR nodemask .
-The value returned in these arguments
-may be used to restore the thread's policy to its state at
-the time of the call to
-.BR get_mempolicy ()
-using
-.BR set_mempolicy (2).
-When
-.I flags
-is 0,
-.I addr
-must be specified as NULL.
-.P
-If
-.I flags
-specifies
-.B MPOL_F_MEMS_ALLOWED
-(available since Linux 2.6.24), the
-.I mode
-argument is ignored and the set of nodes (memories) that the
-thread is allowed to specify in subsequent calls to
-.BR mbind (2)
-or
-.BR set_mempolicy (2)
-(in the absence of any
-.IR "mode flags" )
-is returned in
-.IR nodemask .
-It is not permitted to combine
-.B MPOL_F_MEMS_ALLOWED
-with either
-.B MPOL_F_ADDR
-or
-.BR MPOL_F_NODE .
-.P
-If
-.I flags
-specifies
-.BR MPOL_F_ADDR ,
-then information is returned about the policy governing the memory
-address given in
-.IR addr .
-This policy may be different from the thread's default policy if
-.BR mbind (2)
-or one of the helper functions described in
-.BR numa (3)
-has been used to establish a policy for the memory range containing
-.IR addr .
-.P
-If the
-.I mode
-argument is not NULL, then
-.BR get_mempolicy ()
-will store the policy mode and any optional
-.I "mode flags"
-of the requested NUMA policy in the location pointed to by this argument.
-If
-.I nodemask
-is not NULL, then the nodemask associated with the policy will be stored
-in the location pointed to by this argument.
-.I maxnode
-specifies the number of node IDs
-that can be stored into
-.IR nodemask \[em]that
-is, the maximum node ID plus one.
-The value specified by
-.I maxnode
-is always rounded to a multiple of
-.IR "sizeof(unsigned\ long)*8" .
-.P
-If
-.I flags
-specifies both
-.B MPOL_F_NODE
-and
-.BR MPOL_F_ADDR ,
-.BR get_mempolicy ()
-will return the node ID of the node on which the address
-.I addr
-is allocated into the location pointed to by
-.IR mode .
-If no page has yet been allocated for the specified address,
-.BR get_mempolicy ()
-will allocate a page as if the thread had performed a read
-(load) access to that address, and return the ID of the node
-where that page was allocated.
-.P
-If
-.I flags
-specifies
-.BR MPOL_F_NODE ,
-but not
-.BR MPOL_F_ADDR ,
-and the thread's current policy is
-.B MPOL_INTERLEAVE
-or
-.BR MPOL_WEIGHTED_INTERLEAVE ,
-then
-.BR get_mempolicy ()
-will return in the location pointed to by a non-NULL
-.I mode
-argument,
-the node ID of the next node that will be used for
-interleaving of internal kernel pages allocated on behalf of the thread.
-.\" Note: code returns next interleave node via 'mode' argument -Lee Schermerhorn
-These allocations include pages for memory-mapped files in
-process memory ranges mapped using the
-.BR mmap (2)
-call with the
-.B MAP_PRIVATE
-flag for read accesses, and in memory ranges mapped with the
-.B MAP_SHARED
-flag for all accesses.
-.P
-Other flag values are reserved.
-.P
-For an overview of the possible policies see
-.BR set_mempolicy (2).
-.SH RETURN VALUE
-On success,
-.BR get_mempolicy ()
-returns 0;
-on error, \-1 is returned and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EFAULT
-Part of all of the memory range specified by
-.I nodemask
-and
-.I maxnode
-points outside your accessible address space.
-.TP
-.B EINVAL
-The value specified by
-.I maxnode
-is less than the number of node IDs supported by the system.
-Or
-.I flags
-specified values other than
-.B MPOL_F_NODE
-or
-.BR MPOL_F_ADDR ;
-or
-.I flags
-specified
-.B MPOL_F_ADDR
-and
-.I addr
-is NULL,
-or
-.I flags
-did not specify
-.B MPOL_F_ADDR
-and
-.I addr
-is not NULL.
-Or,
-.I flags
-specified
-.B MPOL_F_NODE
-but not
-.B MPOL_F_ADDR
-and the current thread policy is neither
-.B MPOL_INTERLEAVE
-nor
-.BR MPOL_WEIGHTED_INTERLEAVE .
-Or,
-.I flags
-specified
-.B MPOL_F_MEMS_ALLOWED
-with either
-.B MPOL_F_ADDR
-or
-.BR MPOL_F_NODE .
-(And there are other
-.B EINVAL
-cases.)
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 2.6.7.
-.SH NOTES
-For information on library support, see
-.BR numa (7).
-.SH SEE ALSO
-.BR getcpu (2),
-.BR mbind (2),
-.BR mmap (2),
-.BR set_mempolicy (2),
-.BR numa (3),
-.BR numa (7),
-.BR numactl (8)
diff --git a/man2/get_robust_list.2 b/man2/get_robust_list.2
deleted file mode 100644
index 9fc260022..000000000
--- a/man2/get_robust_list.2
+++ /dev/null
@@ -1,156 +0,0 @@
-.\" Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
-.\" Written by Ivana Varekova <varekova@redhat.com>
-.\" and Copyright (c) 2017, Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" FIXME Something could be added to this page (or exit(2))
-.\" about exit_robust_list processing
-.\"
-.TH get_robust_list 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-get_robust_list, set_robust_list \- get/set list of robust futexes
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#include <linux/futex.h>" \
-" /* Definition of " "struct robust_list_head" " */"
-.BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */"
-.B #include <unistd.h>
-.P
-.BI "long syscall(SYS_get_robust_list, int " pid ,
-.BI " struct robust_list_head **" head_ptr ", size_t *" len_ptr );
-.B long syscall(SYS_set_robust_list,
-.BI " struct robust_list_head *" head ", size_t " len );
-.fi
-.P
-.IR Note :
-glibc provides no wrappers for these system calls,
-necessitating the use of
-.BR syscall (2).
-.SH DESCRIPTION
-These system calls deal with per-thread robust futex lists.
-These lists are managed in user space:
-the kernel knows only about the location of the head of the list.
-A thread can inform the kernel of the location of its robust futex list using
-.BR set_robust_list ().
-The address of a thread's robust futex list can be obtained using
-.BR get_robust_list ().
-.P
-The purpose of the robust futex list is to ensure that if a thread
-accidentally fails to unlock a futex before terminating or calling
-.BR execve (2),
-another thread that is waiting on that futex is notified that
-the former owner of the futex has died.
-This notification consists of two pieces: the
-.B FUTEX_OWNER_DIED
-bit is set in the futex word, and the kernel performs a
-.BR futex (2)
-.B FUTEX_WAKE
-operation on one of the threads waiting on the futex.
-.P
-The
-.BR get_robust_list ()
-system call returns the head of the robust futex list of the thread
-whose thread ID is specified in
-.IR pid .
-If
-.I pid
-is 0,
-the head of the list for the calling thread is returned.
-The list head is stored in the location pointed to by
-.IR head_ptr .
-The size of the object pointed to by
-.I **head_ptr
-is stored in
-.IR len_ptr .
-.P
-Permission to employ
-.BR get_robust_list ()
-is governed by a ptrace access mode
-.B PTRACE_MODE_READ_REALCREDS
-check; see
-.BR ptrace (2).
-.P
-The
-.BR set_robust_list ()
-system call requests the kernel to record the head of the list of
-robust futexes owned by the calling thread.
-The
-.I head
-argument is the list head to record.
-The
-.I len
-argument should be
-.IR sizeof(*head) .
-.SH RETURN VALUE
-The
-.BR set_robust_list ()
-and
-.BR get_robust_list ()
-system calls return zero when the operation is successful,
-an error code otherwise.
-.SH ERRORS
-The
-.BR set_robust_list ()
-system call can fail with the following error:
-.TP
-.B EINVAL
-.I len
-does not equal
-.IR "sizeof(struct\ robust_list_head)" .
-.P
-The
-.BR get_robust_list ()
-system call can fail with the following errors:
-.TP
-.B EFAULT
-The head of the robust futex list can't be stored at the location
-.IR head .
-.TP
-.B EPERM
-The calling process does not have permission to see the robust futex list of
-the thread with the thread ID
-.IR pid ,
-and does not have the
-.B CAP_SYS_PTRACE
-capability.
-.TP
-.B ESRCH
-No thread with the thread ID
-.I pid
-could be found.
-.SH VERSIONS
-These system calls were added in Linux 2.6.17.
-.SH NOTES
-These system calls are not needed by normal applications.
-.P
-A thread can have only one robust futex list;
-therefore applications that wish
-to use this functionality should use the robust mutexes provided by glibc.
-.P
-In the initial implementation,
-a thread waiting on a futex was notified that the owner had died
-only if the owner terminated.
-Starting with Linux 2.6.28,
-.\" commit 8141c7f3e7aee618312fa1c15109e1219de784a7
-notification was extended to include the case where the owner performs an
-.BR execve (2).
-.P
-The thread IDs mentioned in the main text are
-.I kernel
-thread IDs of the kind returned by
-.BR clone (2)
-and
-.BR gettid (2).
-.SH SEE ALSO
-.BR futex (2),
-.BR pthread_mutexattr_setrobust (3)
-.P
-.I Documentation/robust\-futexes.txt
-and
-.I Documentation/robust\-futex\-ABI.txt
-in the Linux kernel source tree
-.\" http://lwn.net/Articles/172149/
diff --git a/man2/get_thread_area.2 b/man2/get_thread_area.2
deleted file mode 100644
index a03fe54fb..000000000
--- a/man2/get_thread_area.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/set_thread_area.2
diff --git a/man2/getcpu.2 b/man2/getcpu.2
deleted file mode 100644
index 72c500f97..000000000
--- a/man2/getcpu.2
+++ /dev/null
@@ -1,147 +0,0 @@
-.\" SPDX-License-Identifier: Linux-man-pages-1-para
-.\"
-.\" This man page is Copyright (C) 2006 Andi Kleen <ak@muc.de>.
-.\"
-.\" 2008, mtk, various edits
-.\"
-.TH getcpu 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-getcpu \- determine CPU and NUMA node on which the calling thread is running
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#define _GNU_SOURCE" " /* See feature_test_macros(7) */"
-.B #include <sched.h>
-.P
-.BI "int getcpu(unsigned int *_Nullable " cpu ", \
-unsigned int *_Nullable " node );
-.fi
-.SH DESCRIPTION
-The
-.BR getcpu ()
-system call identifies the processor and node on which the calling
-thread or process is currently running and writes them into the
-integers pointed to by the
-.I cpu
-and
-.I node
-arguments.
-The processor is a unique small integer identifying a CPU.
-The node is a unique small identifier identifying a NUMA node.
-When either
-.I cpu
-or
-.I node
-is NULL nothing is written to the respective pointer.
-.P
-The information placed in
-.I cpu
-is guaranteed to be current only at the time of the call:
-unless the CPU affinity has been fixed using
-.BR sched_setaffinity (2),
-the kernel might change the CPU at any time.
-(Normally this does not happen
-because the scheduler tries to minimize movements between CPUs to
-keep caches hot, but it is possible.)
-The caller must allow for the possibility that the information returned in
-.I cpu
-and
-.I node
-is no longer current by the time the call returns.
-.SH RETURN VALUE
-On success, 0 is returned.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EFAULT
-Arguments point outside the calling process's address space.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 2.6.19 (x86-64 and i386),
-glibc 2.29.
-.\"
-.SS C library/kernel differences
-The kernel system call has a third argument:
-.P
-.in +4n
-.nf
-.BI "int getcpu(unsigned int *" cpu ", unsigned int *" node ,
-.BI " struct getcpu_cache *" tcache );
-.fi
-.in
-.P
-The
-.I tcache
-argument is unused since Linux 2.6.24,
-and (when invoking the system call directly)
-should be specified as NULL,
-unless portability to Linux 2.6.23 or earlier is required.
-.P
-.\" commit 4307d1e5ada595c87f9a4d16db16ba5edb70dcb1
-.\" Author: Ingo Molnar <mingo@elte.hu>
-.\" Date: Wed Nov 7 18:37:48 2007 +0100
-.\" x86: ignore the sys_getcpu() tcache parameter
-In Linux 2.6.23 and earlier, if the
-.I tcache
-argument was non-NULL,
-then it specified a pointer to a caller-allocated buffer in thread-local
-storage that was used to provide a caching mechanism for
-.BR getcpu ().
-Use of the cache could speed
-.BR getcpu ()
-calls, at the cost that there was a very small chance that
-the returned information would be out of date.
-The caching mechanism was considered to cause problems when
-migrating threads between CPUs, and so the argument is now ignored.
-.\"
-.\" ===== Before Linux 2.6.24: =====
-.\" .I tcache
-.\" is a pointer to a
-.\" .IR "struct getcpu_cache"
-.\" that is used as a cache by
-.\" .BR getcpu ().
-.\" The caller should put the cache into a thread-local variable
-.\" if the process is multithreaded,
-.\" because the cache cannot be shared between different threads.
-.\" .I tcache
-.\" can be NULL.
-.\" If it is not NULL
-.\" .BR getcpu ()
-.\" will use it to speed up operation.
-.\" The information inside the cache is private to the system call
-.\" and should not be accessed by the user program.
-.\" The information placed in the cache can change between Linux releases.
-.\"
-.\" When no cache is specified
-.\" .BR getcpu ()
-.\" will be slower,
-.\" but always retrieve the current CPU and node information.
-.\" With a cache
-.\" .BR getcpu ()
-.\" is faster.
-.\" However, the cached information is updated only once per jiffy (see
-.\" .BR time (7)).
-.\" This means that the information could theoretically be out of date,
-.\" although in practice the scheduler's attempt to maintain
-.\" soft CPU affinity means that the information is unlikely to change
-.\" over the course of the caching interval.
-.SH NOTES
-Linux makes a best effort to make this call as fast as possible.
-(On some architectures, this is done via an implementation in the
-.BR vdso (7).)
-The intention of
-.BR getcpu ()
-is to allow programs to make optimizations with per-CPU data
-or for NUMA optimization.
-.SH SEE ALSO
-.BR mbind (2),
-.BR sched_setaffinity (2),
-.BR set_mempolicy (2),
-.BR sched_getcpu (3),
-.BR cpuset (7),
-.BR vdso (7)
diff --git a/man2/getcwd.2 b/man2/getcwd.2
deleted file mode 100644
index f080be0fc..000000000
--- a/man2/getcwd.2
+++ /dev/null
@@ -1,2 +0,0 @@
-.so man3/getcwd.3
-.\" Because getcwd(3) is layered on a system call of the same name
diff --git a/man2/getdents.2 b/man2/getdents.2
deleted file mode 100644
index 1e630f9e2..000000000
--- a/man2/getdents.2
+++ /dev/null
@@ -1,323 +0,0 @@
-.\" Copyright (C) 1995 Andries Brouwer (aeb@cwi.nl)
-.\" and Copyright 2008, 2015 Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Written 11 June 1995 by Andries Brouwer <aeb@cwi.nl>
-.\" Modified 22 July 1995 by Michael Chastain <mec@duracef.shout.net>:
-.\" Derived from 'readdir.2'.
-.\" Modified Tue Oct 22 08:11:14 EDT 1996 by Eric S. Raymond <esr@thyrsus.com>
-.\"
-.TH getdents 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-getdents, getdents64 \- get directory entries
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */"
-.B #include <unistd.h>
-.P
-.BI "long syscall(SYS_getdents, unsigned int " fd \
-", struct linux_dirent *" dirp ,
-.BI " unsigned int " count );
-.P
-.BR "#define _GNU_SOURCE" " /* See feature_test_macros(7) */"
-.B #include <dirent.h>
-.P
-.BI "ssize_t getdents64(int " fd ", void " dirp [. count "], size_t " count );
-.fi
-.P
-.IR Note :
-glibc provides no wrapper for
-.BR getdents (),
-necessitating the use of
-.BR syscall (2).
-.P
-.IR Note :
-There is no definition of
-.I struct linux_dirent
-in glibc; see NOTES.
-.SH DESCRIPTION
-These are not the interfaces you are interested in.
-Look at
-.BR readdir (3)
-for the POSIX-conforming C library interface.
-This page documents the bare kernel system call interfaces.
-.SS getdents()
-The system call
-.BR getdents ()
-reads several
-.I linux_dirent
-structures from the directory
-referred to by the open file descriptor
-.I fd
-into the buffer pointed to by
-.IR dirp .
-The argument
-.I count
-specifies the size of that buffer.
-.P
-The
-.I linux_dirent
-structure is declared as follows:
-.P
-.in +4n
-.EX
-struct linux_dirent {
- unsigned long d_ino; /* Inode number */
- unsigned long d_off; /* Not an offset; see below */
- unsigned short d_reclen; /* Length of this \fIlinux_dirent\fP */
- char d_name[]; /* Filename (null\-terminated) */
- /* length is actually (d_reclen \- 2 \-
- offsetof(struct linux_dirent, d_name)) */
- /*
- char pad; // Zero padding byte
- char d_type; // File type (only since Linux
- // 2.6.4); offset is (d_reclen \- 1)
- */
-}
-.EE
-.in
-.P
-.I d_ino
-is an inode number.
-.I d_off
-is a filesystem-specific value with no specific meaning to user space,
-though on older filesystems it used to be
-the distance from the start of the directory to the start of the next
-.IR linux_dirent ;
-see
-.BR readdir (3) .
-.I d_reclen
-is the size of this entire
-.IR linux_dirent .
-.I d_name
-is a null-terminated filename.
-.P
-.I d_type
-is a byte at the end of the structure that indicates the file type.
-It contains one of the following values (defined in
-.IR <dirent.h> ):
-.TP 12
-.B DT_BLK
-This is a block device.
-.TP
-.B DT_CHR
-This is a character device.
-.TP
-.B DT_DIR
-This is a directory.
-.TP
-.B DT_FIFO
-This is a named pipe (FIFO).
-.TP
-.B DT_LNK
-This is a symbolic link.
-.TP
-.B DT_REG
-This is a regular file.
-.TP
-.B DT_SOCK
-This is a UNIX domain socket.
-.TP
-.B DT_UNKNOWN
-The file type is unknown.
-.P
-The
-.I d_type
-field is implemented since Linux 2.6.4.
-It occupies a space that was previously a zero-filled padding byte in the
-.I linux_dirent
-structure.
-Thus, on kernels up to and including Linux 2.6.3,
-attempting to access this field always provides the value 0
-.RB ( DT_UNKNOWN ).
-.P
-Currently,
-.\" kernel 2.6.27
-.\" The same sentence is in readdir.2
-only some filesystems (among them: Btrfs, ext2, ext3, and ext4)
-have full support for returning the file type in
-.IR d_type .
-All applications must properly handle a return of
-.BR DT_UNKNOWN .
-.SS getdents64()
-The original Linux
-.BR getdents ()
-system call did not handle large filesystems and large file offsets.
-Consequently, Linux 2.4 added
-.BR getdents64 (),
-with wider types for the
-.I d_ino
-and
-.I d_off
-fields.
-In addition,
-.BR getdents64 ()
-supports an explicit
-.I d_type
-field.
-.P
-The
-.BR getdents64 ()
-system call is like
-.BR getdents (),
-except that its second argument is a pointer to a buffer containing
-structures of the following type:
-.P
-.in +4n
-.EX
-struct linux_dirent64 {
- ino64_t d_ino; /* 64\-bit inode number */
- off64_t d_off; /* Not an offset; see getdents() */
- unsigned short d_reclen; /* Size of this dirent */
- unsigned char d_type; /* File type */
- char d_name[]; /* Filename (null\-terminated) */
-};
-.EE
-.in
-.SH RETURN VALUE
-On success, the number of bytes read is returned.
-On end of directory, 0 is returned.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EBADF
-Invalid file descriptor
-.IR fd .
-.TP
-.B EFAULT
-Argument points outside the calling process's address space.
-.TP
-.B EINVAL
-Result buffer is too small.
-.TP
-.B ENOENT
-No such directory.
-.TP
-.B ENOTDIR
-File descriptor does not refer to a directory.
-.SH STANDARDS
-None.
-.SH HISTORY
-SVr4.
-.\" SVr4 documents additional ENOLINK, EIO error conditions.
-.TP
-.BR getdents64 ()
-glibc 2.30.
-.SH NOTES
-glibc does not provide a wrapper for
-.BR getdents ();
-call
-.BR getdents ()
-using
-.BR syscall (2).
-In that case you will need to define the
-.I linux_dirent
-or
-.I linux_dirent64
-structure yourself.
-.P
-Probably, you want to use
-.BR readdir (3)
-instead of these system calls.
-.P
-These calls supersede
-.BR readdir (2).
-.SH EXAMPLES
-.\" FIXME The example program needs to be revised, since it uses the older
-.\" getdents() system call and the structure with smaller field widths.
-The program below demonstrates the use of
-.BR getdents ().
-The following output shows an example of what we see when running this
-program on an ext2 directory:
-.P
-.in +4n
-.EX
-.RB "$" " ./a.out /testfs/"
--\-\-\-\-\-\-\-\-\-\-\-\-\-\- nread=120 \-\-\-\-\-\-\-\-\-\-\-\-\-\-\-
-inode# file type d_reclen d_off d_name
- 2 directory 16 12 .
- 2 directory 16 24 ..
- 11 directory 24 44 lost+found
- 12 regular 16 56 a
- 228929 directory 16 68 sub
- 16353 directory 16 80 sub2
- 130817 directory 16 4096 sub3
-.EE
-.in
-.SS Program source
-\&
-.\" SRC BEGIN (getdents.c)
-.EX
-#define _GNU_SOURCE
-#include <dirent.h> /* Defines DT_* constants */
-#include <err.h>
-#include <fcntl.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/syscall.h>
-#include <unistd.h>
-\&
-struct linux_dirent {
- unsigned long d_ino;
- off_t d_off;
- unsigned short d_reclen;
- char d_name[];
-};
-\&
-#define BUF_SIZE 1024
-\&
-int
-main(int argc, char *argv[])
-{
- int fd;
- char d_type;
- char buf[BUF_SIZE];
- long nread;
- struct linux_dirent *d;
-\&
- fd = open(argc > 1 ? argv[1] : ".", O_RDONLY | O_DIRECTORY);
- if (fd == \-1)
- err(EXIT_FAILURE, "open");
-\&
- for (;;) {
- nread = syscall(SYS_getdents, fd, buf, BUF_SIZE);
- if (nread == \-1)
- err(EXIT_FAILURE, "getdents");
-\&
- if (nread == 0)
- break;
-\&
- printf("\-\-\-\-\-\-\-\-\-\-\-\-\-\-\- nread=%ld \-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\en", nread);
- printf("inode# file type d_reclen d_off d_name\en");
- for (size_t bpos = 0; bpos < nread;) {
- d = (struct linux_dirent *) (buf + bpos);
- printf("%8lu ", d\->d_ino);
- d_type = *(buf + bpos + d\->d_reclen \- 1);
- printf("%\-10s ", (d_type == DT_REG) ? "regular" :
- (d_type == DT_DIR) ? "directory" :
- (d_type == DT_FIFO) ? "FIFO" :
- (d_type == DT_SOCK) ? "socket" :
- (d_type == DT_LNK) ? "symlink" :
- (d_type == DT_BLK) ? "block dev" :
- (d_type == DT_CHR) ? "char dev" : "???");
- printf("%4d %10jd %s\en", d\->d_reclen,
- (intmax_t) d\->d_off, d\->d_name);
- bpos += d\->d_reclen;
- }
- }
-\&
- exit(EXIT_SUCCESS);
-}
-.EE
-.\" SRC END
-.SH SEE ALSO
-.BR readdir (2),
-.BR readdir (3),
-.BR inode (7)
diff --git a/man2/getdents64.2 b/man2/getdents64.2
deleted file mode 100644
index f3674bac6..000000000
--- a/man2/getdents64.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/getdents.2
diff --git a/man2/getdomainname.2 b/man2/getdomainname.2
deleted file mode 100644
index 3b74bcd7a..000000000
--- a/man2/getdomainname.2
+++ /dev/null
@@ -1,122 +0,0 @@
-.\" Copyright 1993 Rickard E. Faith (faith@cs.unc.edu)
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Modified 1997-08-25 by Nicolás Lichtmaier <nick@debian.org>
-.\" Modified 2004-06-17 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Modified 2008-11-27 by mtk
-.\"
-.TH getdomainname 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-getdomainname, setdomainname \- get/set NIS domain name
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <unistd.h>
-.P
-.BI "int getdomainname(char *" name ", size_t " len );
-.BI "int setdomainname(const char *" name ", size_t " len );
-.fi
-.P
-.RS -4
-Feature Test Macro Requirements for glibc (see
-.BR feature_test_macros (7)):
-.RE
-.P
-.BR getdomainname (),
-.BR setdomainname ():
-.nf
- Since glibc 2.21:
-.\" commit 266865c0e7b79d4196e2cc393693463f03c90bd8
- _DEFAULT_SOURCE
- In glibc 2.19 and 2.20:
- _DEFAULT_SOURCE || (_XOPEN_SOURCE && _XOPEN_SOURCE < 500)
- Up to and including glibc 2.19:
- _BSD_SOURCE || (_XOPEN_SOURCE && _XOPEN_SOURCE < 500)
-.fi
-.SH DESCRIPTION
-These functions are used to access or to change the NIS domain name of the
-host system.
-More precisely, they operate on the NIS domain name associated with the calling
-process's UTS namespace.
-.P
-.BR setdomainname ()
-sets the domain name to the value given in the character array
-.IR name .
-The
-.I len
-argument specifies the number of bytes in
-.IR name .
-(Thus,
-.I name
-does not require a terminating null byte.)
-.P
-.BR getdomainname ()
-returns the null-terminated domain name in the character array
-.IR name ,
-which has a length of
-.I len
-bytes.
-If the null-terminated domain name requires more than \fIlen\fP bytes,
-.BR getdomainname ()
-returns the first \fIlen\fP bytes (glibc) or gives an error (libc).
-.SH RETURN VALUE
-On success, zero is returned.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.BR setdomainname ()
-can fail with the following errors:
-.TP
-.B EFAULT
-.I name
-pointed outside of user address space.
-.TP
-.B EINVAL
-.I len
-was negative or too large.
-.TP
-.B EPERM
-The caller did not have the
-.B CAP_SYS_ADMIN
-capability in the user namespace associated with its UTS namespace (see
-.BR namespaces (7)).
-.P
-.BR getdomainname ()
-can fail with the following errors:
-.TP
-.B EINVAL
-For
-.BR getdomainname ()
-under libc:
-.I name
-is NULL or
-.I name
-is longer than
-.I len
-bytes.
-.SH VERSIONS
-On most Linux architectures (including x86),
-there is no
-.BR getdomainname ()
-system call; instead, glibc implements
-.BR getdomainname ()
-as a library function that returns a copy of the
-.I domainname
-field returned from a call to
-.BR uname (2).
-.SH STANDARDS
-None.
-.\" But they appear on most systems...
-.SH HISTORY
-Since Linux 1.0, the limit on the length of a domain name,
-including the terminating null byte, is 64 bytes.
-In older kernels, it was 8 bytes.
-.SH SEE ALSO
-.BR gethostname (2),
-.BR sethostname (2),
-.BR uname (2),
-.BR uts_namespaces (7)
diff --git a/man2/getegid.2 b/man2/getegid.2
deleted file mode 100644
index d9b10e73f..000000000
--- a/man2/getegid.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/getgid.2
diff --git a/man2/getegid32.2 b/man2/getegid32.2
deleted file mode 100644
index d7da70863..000000000
--- a/man2/getegid32.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/getegid.2
diff --git a/man2/geteuid.2 b/man2/geteuid.2
deleted file mode 100644
index 165cfe1d0..000000000
--- a/man2/geteuid.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/getuid.2
diff --git a/man2/geteuid32.2 b/man2/geteuid32.2
deleted file mode 100644
index 8e60b7798..000000000
--- a/man2/geteuid32.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/geteuid.2
diff --git a/man2/getgid.2 b/man2/getgid.2
deleted file mode 100644
index a02515509..000000000
--- a/man2/getgid.2
+++ /dev/null
@@ -1,70 +0,0 @@
-.\" Copyright 1993 Rickard E. Faith (faith@cs.unc.edu)
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.TH getgid 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-getgid, getegid \- get group identity
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <unistd.h>
-.P
-.B gid_t getgid(void);
-.B gid_t getegid(void);
-.fi
-.SH DESCRIPTION
-.BR getgid ()
-returns the real group ID of the calling process.
-.P
-.BR getegid ()
-returns the effective group ID of the calling process.
-.SH ERRORS
-These functions are always successful
-and never modify
-.\" https://www.austingroupbugs.net/view.php?id=511
-.\" 0000511: getuid and friends should not modify errno
-.IR errno .
-.SH VERSIONS
-On Alpha, instead of a pair of
-.BR getgid ()
-and
-.BR getegid ()
-system calls, a single
-.BR getxgid ()
-system call is provided, which returns a pair of real and effective GIDs.
-The glibc
-.BR getgid ()
-and
-.BR getegid ()
-wrapper functions transparently deal with this.
-See
-.BR syscall (2)
-for details regarding register mapping.
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-POSIX.1-2001, 4.3BSD.
-.P
-The original Linux
-.BR getgid ()
-and
-.BR getegid ()
-system calls supported only 16-bit group IDs.
-Subsequently, Linux 2.4 added
-.BR getgid32 ()
-and
-.BR getegid32 (),
-supporting 32-bit IDs.
-The glibc
-.BR getgid ()
-and
-.BR getegid ()
-wrapper functions transparently deal with the variations across kernel versions.
-.SH SEE ALSO
-.BR getresgid (2),
-.BR setgid (2),
-.BR setregid (2),
-.BR credentials (7)
diff --git a/man2/getgid32.2 b/man2/getgid32.2
deleted file mode 100644
index d9b10e73f..000000000
--- a/man2/getgid32.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/getgid.2
diff --git a/man2/getgroups.2 b/man2/getgroups.2
deleted file mode 100644
index 3d9be4b98..000000000
--- a/man2/getgroups.2
+++ /dev/null
@@ -1,219 +0,0 @@
-.\" Copyright 1993 Rickard E. Faith (faith@cs.unc.edu)
-.\" and Copyright (C) 2008, 2010, 2015, Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Modified Thu Oct 31 12:04:29 1996 by Eric S. Raymond <esr@thyrsus.com>
-.\" Modified, 27 May 2004, Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Added notes on capability requirements
-.\" 2008-05-03, mtk, expanded and rewrote parts of DESCRIPTION and RETURN
-.\" VALUE, made style of page more consistent with man-pages style.
-.\"
-.TH getgroups 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-getgroups, setgroups \- get/set list of supplementary group IDs
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <unistd.h>
-.P
-.BI "int getgroups(int " size ", gid_t " list []);
-.P
-.B #include <grp.h>
-.P
-.BI "int setgroups(size_t " size ", const gid_t *_Nullable " list );
-.fi
-.P
-.RS -4
-Feature Test Macro Requirements for glibc (see
-.BR feature_test_macros (7)):
-.RE
-.P
-.BR setgroups ():
-.nf
- Since glibc 2.19:
- _DEFAULT_SOURCE
- glibc 2.19 and earlier:
- _BSD_SOURCE
-.fi
-.SH DESCRIPTION
-.BR getgroups ()
-returns the supplementary group IDs of the calling process in
-.IR list .
-The argument
-.I size
-should be set to the maximum number of items that can be stored in the
-buffer pointed to by
-.IR list .
-If the calling process is a member of more than
-.I size
-supplementary groups, then an error results.
-.P
-It is unspecified whether the effective group ID of the calling process
-is included in the returned list.
-(Thus, an application should also call
-.BR getegid (2)
-and add or remove the resulting value.)
-.P
-If
-.I size
-is zero,
-.I list
-is not modified, but the total number of supplementary group IDs for the
-process is returned.
-This allows the caller to determine the size of a dynamically allocated
-.I list
-to be used in a further call to
-.BR getgroups ().
-.P
-.BR setgroups ()
-sets the supplementary group IDs for the calling process.
-Appropriate privileges are required (see the description of the
-.B EPERM
-error, below).
-The
-.I size
-argument specifies the number of supplementary group IDs
-in the buffer pointed to by
-.IR list .
-A process can drop all of its supplementary groups with the call:
-.P
-.in +4n
-.EX
-setgroups(0, NULL);
-.EE
-.in
-.SH RETURN VALUE
-On success,
-.BR getgroups ()
-returns the number of supplementary group IDs.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.P
-On success,
-.BR setgroups ()
-returns 0.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EFAULT
-.I list
-has an invalid address.
-.P
-.BR getgroups ()
-can additionally fail with the following error:
-.TP
-.B EINVAL
-.I size
-is less than the number of supplementary group IDs, but is not zero.
-.P
-.BR setgroups ()
-can additionally fail with the following errors:
-.TP
-.B EINVAL
-.I size
-is greater than
-.B NGROUPS_MAX
-(32 before Linux 2.6.4; 65536 since Linux 2.6.4).
-.TP
-.B ENOMEM
-Out of memory.
-.TP
-.B EPERM
-The calling process has insufficient privilege
-(the caller does not have the
-.B CAP_SETGID
-capability in the user namespace in which it resides).
-.TP
-.BR EPERM " (since Linux 3.19)"
-The use of
-.BR setgroups ()
-is denied in this user namespace.
-See the description of
-.IR /proc/ pid /setgroups
-in
-.BR user_namespaces (7).
-.SH VERSIONS
-.SS C library/kernel differences
-At the kernel level, user IDs and group IDs are a per-thread attribute.
-However, POSIX requires that all threads in a process
-share the same credentials.
-The NPTL threading implementation handles the POSIX requirements by
-providing wrapper functions for
-the various system calls that change process UIDs and GIDs.
-These wrapper functions (including the one for
-.BR setgroups ())
-employ a signal-based technique to ensure
-that when one thread changes credentials,
-all of the other threads in the process also change their credentials.
-For details, see
-.BR nptl (7).
-.SH STANDARDS
-.TP
-.BR getgroups ()
-POSIX.1-2008.
-.TP
-.BR setgroups ()
-None.
-.SH HISTORY
-.TP
-.BR getgroups ()
-SVr4, 4.3BSD, POSIX.1-2001.
-.TP
-.BR setgroups ()
-SVr4, 4.3BSD.
-Since
-.BR setgroups ()
-requires privilege, it is not covered by POSIX.1.
-.P
-The original Linux
-.BR getgroups ()
-system call supported only 16-bit group IDs.
-Subsequently, Linux 2.4 added
-.BR getgroups32 (),
-supporting 32-bit IDs.
-The glibc
-.BR getgroups ()
-wrapper function transparently deals with the variation across kernel versions.
-.SH NOTES
-A process can have up to
-.B NGROUPS_MAX
-supplementary group IDs
-in addition to the effective group ID.
-The constant
-.B NGROUPS_MAX
-is defined in
-.IR <limits.h> .
-The set of supplementary group IDs
-is inherited from the parent process, and preserved across an
-.BR execve (2).
-.P
-The maximum number of supplementary group IDs can be found at run time using
-.BR sysconf (3):
-.P
-.in +4n
-.EX
-long ngroups_max;
-ngroups_max = sysconf(_SC_NGROUPS_MAX);
-.EE
-.in
-.P
-The maximum return value of
-.BR getgroups ()
-cannot be larger than one more than this value.
-Since Linux 2.6.4, the maximum number of supplementary group IDs is also
-exposed via the Linux-specific read-only file,
-.IR /proc/sys/kernel/ngroups_max .
-.SH SEE ALSO
-.BR getgid (2),
-.BR setgid (2),
-.BR getgrouplist (3),
-.BR group_member (3),
-.BR initgroups (3),
-.BR capabilities (7),
-.BR credentials (7)
diff --git a/man2/getgroups32.2 b/man2/getgroups32.2
deleted file mode 100644
index 0ae4cc0b1..000000000
--- a/man2/getgroups32.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/getgroups.2
diff --git a/man2/gethostname.2 b/man2/gethostname.2
deleted file mode 100644
index 1a9fc0d2f..000000000
--- a/man2/gethostname.2
+++ /dev/null
@@ -1,176 +0,0 @@
-.\" Copyright 1993 Rickard E. Faith (faith@cs.unc.edu)
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Modified 1995-07-22 by Michael Chastain <mec@duracef.shout.net>:
-.\" 'gethostname' is real system call on Linux/Alpha.
-.\" Modified 1997-01-31 by Eric S. Raymond <esr@thyrsus.com>
-.\" Modified 2000-06-04, 2001-12-15 by aeb
-.\" Modified 2004-06-17 by mtk
-.\" Modified 2008-11-27 by mtk
-.\"
-.TH gethostname 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-gethostname, sethostname \- get/set hostname
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <unistd.h>
-.P
-.BI "int gethostname(char *" name ", size_t " len );
-.BI "int sethostname(const char *" name ", size_t " len );
-.fi
-.P
-.RS -4
-Feature Test Macro Requirements for glibc (see
-.BR feature_test_macros (7)):
-.RE
-.P
-.BR gethostname ():
-.nf
- _XOPEN_SOURCE >= 500 || _POSIX_C_SOURCE >= 200112L
- || /* glibc 2.19 and earlier */ _BSD_SOURCE
-.\" The above is something of a simplification
-.\" also before glibc 2.3 there was a bit churn
-.fi
-.P
-.BR sethostname ():
-.nf
- Since glibc 2.21:
-.\" commit 266865c0e7b79d4196e2cc393693463f03c90bd8
- _DEFAULT_SOURCE
- In glibc 2.19 and 2.20:
- _DEFAULT_SOURCE || (_XOPEN_SOURCE && _XOPEN_SOURCE < 500)
- Up to and including glibc 2.19:
- _BSD_SOURCE || (_XOPEN_SOURCE && _XOPEN_SOURCE < 500)
-.fi
-.SH DESCRIPTION
-These system calls are used to access or to change the system hostname.
-More precisely, they operate on the hostname associated with the calling
-process's UTS namespace.
-.P
-.BR sethostname ()
-sets the hostname to the value given in the character array
-.IR name .
-The
-.I len
-argument specifies the number of bytes in
-.IR name .
-(Thus,
-.I name
-does not require a terminating null byte.)
-.P
-.BR gethostname ()
-returns the null-terminated hostname in the character array
-.IR name ,
-which has a length of
-.I len
-bytes.
-If the null-terminated hostname is too large to fit,
-then the name is truncated, and no error is returned (but see NOTES below).
-POSIX.1 says that if such truncation occurs,
-then it is unspecified whether the returned buffer
-includes a terminating null byte.
-.SH RETURN VALUE
-On success, zero is returned.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EFAULT
-.I name
-is an invalid address.
-.TP
-.B EINVAL
-.I len
-is negative
-.\" Can't occur for gethostbyname() wrapper, since 'len' has an
-.\" unsigned type; can occur for the underlying system call.
-or, for
-.BR sethostname (),
-.I len
-is larger than the maximum allowed size.
-.TP
-.B ENAMETOOLONG
-.RB "(glibc " gethostname ())
-.I len
-is smaller than the actual size.
-(Before glibc 2.1, glibc uses
-.B EINVAL
-for this case.)
-.TP
-.B EPERM
-For
-.BR sethostname (),
-the caller did not have the
-.B CAP_SYS_ADMIN
-capability in the user namespace associated with its UTS namespace (see
-.BR namespaces (7)).
-.SH VERSIONS
-SUSv2 guarantees that "Host names are limited to 255 bytes".
-POSIX.1 guarantees that "Host names (not including
-the terminating null byte) are limited to
-.B HOST_NAME_MAX
-bytes".
-On Linux,
-.B HOST_NAME_MAX
-is defined with the value 64, which has been the limit since Linux 1.0
-(earlier kernels imposed a limit of 8 bytes).
-.SS C library/kernel differences
-The GNU C library does not employ the
-.BR gethostname ()
-system call; instead, it implements
-.BR gethostname ()
-as a library function that calls
-.BR uname (2)
-and copies up to
-.I len
-bytes from the returned
-.I nodename
-field into
-.IR name .
-Having performed the copy, the function then checks if the length of the
-.I nodename
-was greater than or equal to
-.IR len ,
-and if it is, then the function returns \-1 with
-.I errno
-set to
-.BR ENAMETOOLONG ;
-in this case, a terminating null byte is not included in the returned
-.IR name .
-.SH STANDARDS
-.TP
-.BR gethostname ()
-POSIX.1-2008.
-.TP
-.BR sethostname ()
-None.
-.SH HISTORY
-SVr4, 4.4BSD (these interfaces first appeared in 4.2BSD).
-POSIX.1-2001 and POSIX.1-2008 specify
-.BR gethostname ()
-but not
-.BR sethostname ().
-.P
-Versions of glibc before glibc 2.2
-.\" At least glibc 2.0 and glibc 2.1, older versions not checked
-handle the case where the length of the
-.I nodename
-was greater than or equal to
-.I len
-differently: nothing is copied into
-.I name
-and the function returns \-1 with
-.I errno
-set to
-.BR ENAMETOOLONG .
-.SH SEE ALSO
-.BR hostname (1),
-.BR getdomainname (2),
-.BR setdomainname (2),
-.BR uname (2),
-.BR uts_namespaces (7)
diff --git a/man2/getitimer.2 b/man2/getitimer.2
deleted file mode 100644
index b0a97c68f..000000000
--- a/man2/getitimer.2
+++ /dev/null
@@ -1,278 +0,0 @@
-.\" Copyright 7/93 by Darren Senn <sinster@scintilla.santa-clara.ca.us>
-.\" and Copyright (C) 2016, Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Based on a similar page Copyright 1992 by Rick Faith
-.\"
-.\" %%%LICENSE_START(FREELY_REDISTRIBUTABLE)
-.\" May be freely distributed and modified
-.\" %%%LICENSE_END
-.\"
-.\" Modified Tue Oct 22 00:22:35 EDT 1996 by Eric S. Raymond <esr@thyrsus.com>
-.\" 2005-04-06 mtk, Matthias Lang <matthias@corelatus.se>
-.\" Noted MAX_SEC_IN_JIFFIES ceiling
-.\"
-.TH getitimer 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-getitimer, setitimer \- get or set value of an interval timer
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/time.h>
-.P
-.BI "int getitimer(int " which ", struct itimerval *" curr_value );
-.BI "int setitimer(int " which ", const struct itimerval *restrict " new_value ,
-.BI " struct itimerval *_Nullable restrict " old_value );
-.fi
-.SH DESCRIPTION
-These system calls provide access to interval timers, that is,
-timers that initially expire at some point in the future,
-and (optionally) at regular intervals after that.
-When a timer expires, a signal is generated for the calling process,
-and the timer is reset to the specified interval
-(if the interval is nonzero).
-.P
-Three types of timers\[em]specified via the
-.I which
-argument\[em]are provided,
-each of which counts against a different clock and
-generates a different signal on timer expiration:
-.TP
-.B ITIMER_REAL
-This timer counts down in real (i.e., wall clock) time.
-At each expiration, a
-.B SIGALRM
-signal is generated.
-.TP
-.B ITIMER_VIRTUAL
-This timer counts down against the user-mode CPU time consumed by the process.
-(The measurement includes CPU time consumed by all threads in the process.)
-At each expiration, a
-.B SIGVTALRM
-signal is generated.
-.TP
-.B ITIMER_PROF
-This timer counts down against the total (i.e., both user and system)
-CPU time consumed by the process.
-(The measurement includes CPU time consumed by all threads in the process.)
-At each expiration, a
-.B SIGPROF
-signal is generated.
-.IP
-In conjunction with
-.BR ITIMER_VIRTUAL ,
-this timer can be used to profile user and system CPU time
-consumed by the process.
-.P
-A process has only one of each of the three types of timers.
-.P
-Timer values are defined by the following structures:
-.P
-.in +4n
-.EX
-struct itimerval {
- struct timeval it_interval; /* Interval for periodic timer */
- struct timeval it_value; /* Time until next expiration */
-};
-\&
-struct timeval {
- time_t tv_sec; /* seconds */
- suseconds_t tv_usec; /* microseconds */
-};
-.EE
-.in
-.\"
-.SS getitimer()
-The function
-.BR getitimer ()
-places the current value of the timer specified by
-.I which
-in the buffer pointed to by
-.IR curr_value .
-.P
-The
-.I it_value
-substructure is populated with the amount of time remaining until
-the next expiration of the specified timer.
-This value changes as the timer counts down, and will be reset to
-.I it_interval
-when the timer expires.
-If both fields of
-.I it_value
-are zero, then this timer is currently disarmed (inactive).
-.P
-The
-.I it_interval
-substructure is populated with the timer interval.
-If both fields of
-.I it_interval
-are zero, then this is a single-shot timer (i.e., it expires just once).
-.SS setitimer()
-The function
-.BR setitimer ()
-arms or disarms the timer specified by
-.IR which ,
-by setting the timer to the value specified by
-.IR new_value .
-If
-.I old_value
-is non-NULL,
-the buffer it points to is used to return the previous value of the timer
-(i.e., the same information that is returned by
-.BR getitimer ()).
-.P
-If either field in
-.I new_value.it_value
-is nonzero,
-then the timer is armed to initially expire at the specified time.
-If both fields in
-.I new_value.it_value
-are zero, then the timer is disarmed.
-.P
-The
-.I new_value.it_interval
-field specifies the new interval for the timer;
-if both of its subfields are zero, the timer is single-shot.
-.SH RETURN VALUE
-On success, zero is returned.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EFAULT
-.IR new_value ,
-.IR old_value ,
-or
-.I curr_value
-is not valid a pointer.
-.TP
-.B EINVAL
-.I which
-is not one of
-.BR ITIMER_REAL ,
-.BR ITIMER_VIRTUAL ,
-or
-.BR ITIMER_PROF ;
-or (since Linux 2.6.22) one of the
-.I tv_usec
-fields in the structure pointed to by
-.I new_value
-contains a value outside the range [0, 999999].
-.SH VERSIONS
-The standards are silent on the meaning of the call:
-.P
-.in +4n
-.EX
-setitimer(which, NULL, &old_value);
-.EE
-.in
-.P
-Many systems (Solaris, the BSDs, and perhaps others)
-treat this as equivalent to:
-.P
-.in +4n
-.EX
-getitimer(which, &old_value);
-.EE
-.in
-.P
-In Linux, this is treated as being equivalent to a call in which the
-.I new_value
-fields are zero; that is, the timer is disabled.
-.IR "Don't use this Linux misfeature" :
-it is nonportable and unnecessary.
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-POSIX.1-2001, SVr4, 4.4BSD (this call first appeared in 4.2BSD).
-POSIX.1-2008 marks
-.BR getitimer ()
-and
-.BR setitimer ()
-obsolete, recommending the use of the POSIX timers API
-.RB ( timer_gettime (2),
-.BR timer_settime (2),
-etc.) instead.
-.SH NOTES
-Timers will never expire before the requested time,
-but may expire some (short) time afterward, which depends
-on the system timer resolution and on the system load; see
-.BR time (7).
-(But see BUGS below.)
-If the timer expires while the process is active (always true for
-.BR ITIMER_VIRTUAL ),
-the signal will be delivered immediately when generated.
-.P
-A child created via
-.BR fork (2)
-does not inherit its parent's interval timers.
-Interval timers are preserved across an
-.BR execve (2).
-.P
-POSIX.1 leaves the
-interaction between
-.BR setitimer ()
-and the three interfaces
-.BR alarm (2),
-.BR sleep (3),
-and
-.BR usleep (3)
-unspecified.
-.SH BUGS
-The generation and delivery of a signal are distinct, and
-only one instance of each of the signals listed above may be pending
-for a process.
-Under very heavy loading, an
-.B ITIMER_REAL
-timer may expire before the signal from a previous expiration
-has been delivered.
-The second signal in such an event will be lost.
-.P
-Before Linux 2.6.16, timer values are represented in jiffies.
-If a request is made set a timer with a value whose jiffies
-representation exceeds
-.B MAX_SEC_IN_JIFFIES
-(defined in
-.IR include/linux/jiffies.h ),
-then the timer is silently truncated to this ceiling value.
-On Linux/i386 (where, since Linux 2.6.13,
-the default jiffy is 0.004 seconds),
-this means that the ceiling value for a timer is
-approximately 99.42 days.
-Since Linux 2.6.16,
-the kernel uses a different internal representation for times,
-and this ceiling is removed.
-.P
-On certain systems (including i386),
-Linux kernels before Linux 2.6.12 have a bug which will produce
-premature timer expirations of up to one jiffy under some circumstances.
-This bug is fixed in Linux 2.6.12.
-.\" 4 Jul 2005: It looks like this bug may remain in Linux 2.4.x.
-.\" http://lkml.org/lkml/2005/7/1/165
-.P
-POSIX.1-2001 says that
-.BR setitimer ()
-should fail if a
-.I tv_usec
-value is specified that is outside of the range [0, 999999].
-However, up to and including Linux 2.6.21,
-Linux does not give an error, but instead silently
-adjusts the corresponding seconds value for the timer.
-From Linux 2.6.22 onward,
-this nonconformance has been repaired:
-an improper
-.I tv_usec
-value results in an
-.B EINVAL
-error.
-.\" Bugzilla report 25 Apr 2006:
-.\" http://bugzilla.kernel.org/show_bug.cgi?id=6443
-.\" "setitimer() should reject noncanonical arguments"
-.SH SEE ALSO
-.BR gettimeofday (2),
-.BR sigaction (2),
-.BR signal (2),
-.BR timer_create (2),
-.BR timerfd_create (2),
-.BR time (7)
diff --git a/man2/getmsg.2 b/man2/getmsg.2
deleted file mode 100644
index 5d25ea6ce..000000000
--- a/man2/getmsg.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/unimplemented.2
diff --git a/man2/getpagesize.2 b/man2/getpagesize.2
deleted file mode 100644
index fcdac938a..000000000
--- a/man2/getpagesize.2
+++ /dev/null
@@ -1,67 +0,0 @@
-.\" Copyright (C) 2001 Andries Brouwer <aeb@cwi.nl>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.TH getpagesize 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-getpagesize \- get memory page size
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <unistd.h>
-.P
-.B int getpagesize(void);
-.fi
-.P
-.RS -4
-Feature Test Macro Requirements for glibc (see
-.BR feature_test_macros (7)):
-.RE
-.P
-.BR getpagesize ():
-.nf
- Since glibc 2.20:
- _DEFAULT_SOURCE || ! (_POSIX_C_SOURCE >= 200112L)
- glibc 2.12 to glibc 2.19:
- _BSD_SOURCE || ! (_POSIX_C_SOURCE >= 200112L)
- Before glibc 2.12:
- _BSD_SOURCE || _XOPEN_SOURCE >= 500
-.\" || _XOPEN_SOURCE && _XOPEN_SOURCE_EXTENDED
-.fi
-.SH DESCRIPTION
-The function
-.BR getpagesize ()
-returns the number of bytes in a memory page,
-where "page" is a fixed-length block,
-the unit for memory allocation and file mapping performed by
-.BR mmap (2).
-.SH VERSIONS
-A user program should not hard-code a page size,
-neither as a literal nor using the
-.B PAGE_SIZE
-macro,
-because some architectures support multiple page sizes.
-.P
-This manual page is in section 2 because
-Alpha, SPARC, and SPARC64
-all have a Linux system call
-.BR getpagesize ()
-though other architectures do not,
-and use the ELF auxiliary vector instead.
-.SH STANDARDS
-None.
-.SH HISTORY
-This call first appeared in 4.2BSD.
-SVr4, 4.4BSD, SUSv2.
-In SUSv2 the
-.BR getpagesize ()
-call was labeled LEGACY,
-and it was removed in POSIX.1-2001.
-.P
-glibc 2.0 returned a constant
-even on architectures with multiple page sizes.
-.SH SEE ALSO
-.BR mmap (2),
-.BR sysconf (3)
diff --git a/man2/getpeername.2 b/man2/getpeername.2
deleted file mode 100644
index dc216c0f6..000000000
--- a/man2/getpeername.2
+++ /dev/null
@@ -1,116 +0,0 @@
-.\" Copyright (c) 1983, 1991 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" SPDX-License-Identifier: BSD-4-Clause-UC
-.\"
-.\" @(#)getpeername.2 6.5 (Berkeley) 3/10/91
-.\"
-.\" Modified Sat Jul 24 16:37:50 1993 by Rik Faith <faith@cs.unc.edu>
-.\" Modified Thu Jul 30 14:37:50 1993 by Martin Schulze <joey@debian.org>
-.\" Modified Sun Mar 28 21:26:46 1999 by Andries Brouwer <aeb@cwi.nl>
-.\" Modified 17 Jul 2002, Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Added 'socket' to NAME, so that "man -k socket" will show this page.
-.\"
-.TH getpeername 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-getpeername \- get name of connected peer socket
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/socket.h>
-.P
-.BI "int getpeername(int " sockfd ", struct sockaddr *restrict " addr ,
-.BI " socklen_t *restrict " addrlen );
-.fi
-.SH DESCRIPTION
-.BR getpeername ()
-returns the address of the peer connected to the socket
-.IR sockfd ,
-in the buffer pointed to by
-.IR addr .
-The
-.I addrlen
-argument should be initialized to indicate the amount of space pointed to
-by
-.IR addr .
-On return it contains the actual size of the name returned (in bytes).
-The name is truncated if the buffer provided is too small.
-.P
-The returned address is truncated if the buffer provided is too small;
-in this case,
-.I addrlen
-will return a value greater than was supplied to the call.
-.SH RETURN VALUE
-On success, zero is returned.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EBADF
-The argument
-.I sockfd
-is not a valid file descriptor.
-.TP
-.B EFAULT
-The
-.I addr
-argument points to memory not in a valid part of the
-process address space.
-.TP
-.B EINVAL
-.I addrlen
-is invalid (e.g., is negative).
-.TP
-.B ENOBUFS
-Insufficient resources were available in the system
-to perform the operation.
-.TP
-.B ENOTCONN
-The socket is not connected.
-.TP
-.B ENOTSOCK
-The file descriptor
-.I sockfd
-does not refer to a socket.
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-POSIX.1-2001, SVr4, 4.4BSD
-(first appeared in 4.2BSD).
-.SH NOTES
-For stream sockets, once a
-.BR connect (2)
-has been performed, either socket can call
-.BR getpeername ()
-to obtain the address of the peer socket.
-On the other hand, datagram sockets are connectionless.
-Calling
-.BR connect (2)
-on a datagram socket merely sets the peer address for outgoing
-datagrams sent with
-.BR write (2)
-or
-.BR recv (2).
-The caller of
-.BR connect (2)
-can use
-.BR getpeername ()
-to obtain the peer address that it earlier set for the socket.
-However, the peer socket is unaware of this information, and calling
-.BR getpeername ()
-on the peer socket will return no useful information (unless a
-.BR connect (2)
-call was also executed on the peer).
-Note also that the receiver of a datagram can obtain
-the address of the sender when using
-.BR recvfrom (2).
-.SH SEE ALSO
-.BR accept (2),
-.BR bind (2),
-.BR getsockname (2),
-.BR ip (7),
-.BR socket (7),
-.BR unix (7)
diff --git a/man2/getpgid.2 b/man2/getpgid.2
deleted file mode 100644
index d6b107a1c..000000000
--- a/man2/getpgid.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/setpgid.2
diff --git a/man2/getpgrp.2 b/man2/getpgrp.2
deleted file mode 100644
index d6b107a1c..000000000
--- a/man2/getpgrp.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/setpgid.2
diff --git a/man2/getpid.2 b/man2/getpid.2
deleted file mode 100644
index 811270f05..000000000
--- a/man2/getpid.2
+++ /dev/null
@@ -1,150 +0,0 @@
-.\" Copyright 1993 Rickard E. Faith (faith@cs.unc.edu)
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.TH getpid 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-getpid, getppid \- get process identification
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <unistd.h>
-.P
-.B pid_t getpid(void);
-.B pid_t getppid(void);
-.fi
-.SH DESCRIPTION
-.BR getpid ()
-returns the process ID (PID) of the calling process.
-(This is often used by
-routines that generate unique temporary filenames.)
-.P
-.BR getppid ()
-returns the process ID of the parent of the calling process.
-This will be either the ID of the process that created this process using
-.BR fork (),
-or, if that process has already terminated,
-the ID of the process to which this process has been reparented (either
-.BR init (1)
-or a "subreaper" process defined via the
-.BR prctl (2)
-.B PR_SET_CHILD_SUBREAPER
-operation).
-.SH ERRORS
-These functions are always successful.
-.SH VERSIONS
-On Alpha, instead of a pair of
-.BR getpid ()
-and
-.BR getppid ()
-system calls, a single
-.BR getxpid ()
-system call is provided, which returns a pair of PID and parent PID.
-The glibc
-.BR getpid ()
-and
-.BR getppid ()
-wrapper functions transparently deal with this.
-See
-.BR syscall (2)
-for details regarding register mapping.
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-POSIX.1-2001, 4.3BSD, SVr4.
-.SS C library/kernel differences
-From glibc 2.3.4 up to and including glibc 2.24,
-the glibc wrapper function for
-.BR getpid ()
-cached PIDs,
-with the goal of avoiding additional system calls when a process calls
-.BR getpid ()
-repeatedly.
-Normally this caching was invisible,
-but its correct operation relied on support in the wrapper functions for
-.BR fork (2),
-.BR vfork (2),
-and
-.BR clone (2):
-if an application bypassed the glibc wrappers for these system calls by using
-.BR syscall (2),
-then a call to
-.BR getpid ()
-in the child would return the wrong value
-(to be precise: it would return the PID of the parent process).
-.\" The following program demonstrates this "feature":
-.\"
-.\" #define _GNU_SOURCE
-.\" #include <sys/syscall.h>
-.\" #include <sys/wait.h>
-.\" #include <stdint.h>
-.\" #include <stdio.h>
-.\" #include <stdlib.h>
-.\" #include <unistd.h>
-.\"
-.\" int
-.\" main(int argc, char *argv[])
-.\" {
-.\" /* The following statement fills the getpid() cache */
-.\"
-.\" printf("parent PID = %ld\n", (intmax_t) getpid());
-.\"
-.\" if (syscall(SYS_fork) == 0) {
-.\" if (getpid() != syscall(SYS_getpid))
-.\" printf("child getpid() mismatch: getpid()=%jd; "
-.\" "syscall(SYS_getpid)=%ld\n",
-.\" (intmax_t) getpid(), (long) syscall(SYS_getpid));
-.\" exit(EXIT_SUCCESS);
-.\" }
-.\" wait(NULL);
-.\"}
-In addition, there were cases where
-.BR getpid ()
-could return the wrong value even when invoking
-.BR clone (2)
-via the glibc wrapper function.
-(For a discussion of one such case, see BUGS in
-.BR clone (2).)
-Furthermore, the complexity of the caching code had been
-the source of a few bugs within glibc over the years.
-.P
-Because of the aforementioned problems,
-since glibc 2.25, the PID cache is removed:
-.\" commit c579f48edba88380635ab98cb612030e3ed8691e
-.\" https://sourceware.org/glibc/wiki/Release/2.25#pid_cache_removal
-calls to
-.BR getpid ()
-always invoke the actual system call, rather than returning a cached value.
-.\" FIXME .
-.\" Review progress of https://bugzilla.redhat.com/show_bug.cgi?id=1469757
-.SH NOTES
-If the caller's parent is in a different PID namespace (see
-.BR pid_namespaces (7)),
-.BR getppid ()
-returns 0.
-.P
-From a kernel perspective,
-the PID (which is shared by all of the threads in a multithreaded process)
-is sometimes also known as the thread group ID (TGID).
-This contrasts with the kernel thread ID (TID),
-which is unique for each thread.
-For further details, see
-.BR gettid (2)
-and the discussion of the
-.B CLONE_THREAD
-flag in
-.BR clone (2).
-.SH SEE ALSO
-.BR clone (2),
-.BR fork (2),
-.BR gettid (2),
-.BR kill (2),
-.BR exec (3),
-.BR mkstemp (3),
-.BR tempnam (3),
-.BR tmpfile (3),
-.BR tmpnam (3),
-.BR credentials (7),
-.BR pid_namespaces (7)
diff --git a/man2/getpmsg.2 b/man2/getpmsg.2
deleted file mode 100644
index 5d25ea6ce..000000000
--- a/man2/getpmsg.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/unimplemented.2
diff --git a/man2/getppid.2 b/man2/getppid.2
deleted file mode 100644
index fca885ee4..000000000
--- a/man2/getppid.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/getpid.2
diff --git a/man2/getpriority.2 b/man2/getpriority.2
deleted file mode 100644
index 111b591b5..000000000
--- a/man2/getpriority.2
+++ /dev/null
@@ -1,209 +0,0 @@
-.\" Copyright (c) 1980, 1991 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" SPDX-License-Identifier: BSD-4-Clause-UC
-.\"
-.\" @(#)getpriority.2 6.9 (Berkeley) 3/10/91
-.\"
-.\" Modified 1993-07-24 by Rik Faith <faith@cs.unc.edu>
-.\" Modified 1996-07-01 by Andries Brouwer <aeb@cwi.nl>
-.\" Modified 1996-11-06 by Eric S. Raymond <esr@thyrsus.com>
-.\" Modified 2001-10-21 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Corrected statement under EPERM to clarify privileges required
-.\" Modified 2002-06-21 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Clarified meaning of 0 value for 'who' argument
-.\" Modified 2004-05-27 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.TH getpriority 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-getpriority, setpriority \- get/set program scheduling priority
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/resource.h>
-.P
-.BI "int getpriority(int " which ", id_t " who );
-.BI "int setpriority(int " which ", id_t " who ", int " prio );
-.fi
-.SH DESCRIPTION
-The scheduling priority of the process, process group, or user, as
-indicated by
-.I which
-and
-.I who
-is obtained with the
-.BR getpriority ()
-call and set with the
-.BR setpriority ()
-call.
-The process attribute dealt with by these system calls is
-the same attribute (also known as the "nice" value) that is dealt with by
-.BR nice (2).
-.P
-The value
-.I which
-is one of
-.BR PRIO_PROCESS ,
-.BR PRIO_PGRP ,
-or
-.BR PRIO_USER ,
-and
-.I who
-is interpreted relative to
-.I which
-(a process identifier for
-.BR PRIO_PROCESS ,
-process group
-identifier for
-.BR PRIO_PGRP ,
-and a user ID for
-.BR PRIO_USER ).
-A zero value for
-.I who
-denotes (respectively) the calling process, the process group of the
-calling process, or the real user ID of the calling process.
-.P
-The
-.I prio
-argument is a value in the range \-20 to 19 (but see NOTES below),
-with \-20 being the highest priority and 19 being the lowest priority.
-Attempts to set a priority outside this range
-are silently clamped to the range.
-The default priority is 0;
-lower values give a process a higher scheduling priority.
-.P
-The
-.BR getpriority ()
-call returns the highest priority (lowest numerical value)
-enjoyed by any of the specified processes.
-The
-.BR setpriority ()
-call sets the priorities of all of the specified processes
-to the specified value.
-.P
-Traditionally, only a privileged process could lower the nice value
-(i.e., set a higher priority).
-However, since Linux 2.6.12, an unprivileged process can decrease
-the nice value of a target process that has a suitable
-.B RLIMIT_NICE
-soft limit; see
-.BR getrlimit (2)
-for details.
-.SH RETURN VALUE
-On success,
-.BR getpriority ()
-returns the calling thread's nice value, which may be a negative number.
-On error, it returns \-1 and sets
-.I errno
-to indicate the error.
-.P
-Since a successful call to
-.BR getpriority ()
-can legitimately return the value \-1, it is necessary
-to clear
-.I errno
-prior to the
-call, then check
-.I errno
-afterward to determine
-if \-1 is an error or a legitimate value.
-.P
-.BR setpriority ()
-returns 0 on success.
-On failure, it returns \-1 and sets
-.I errno
-to indicate the error.
-.SH ERRORS
-.TP
-.B EACCES
-The caller attempted to set a lower nice value
-(i.e., a higher process priority), but did not
-have the required privilege (on Linux: did not have the
-.B CAP_SYS_NICE
-capability).
-.TP
-.B EINVAL
-.I which
-was not one of
-.BR PRIO_PROCESS ,
-.BR PRIO_PGRP ,
-or
-.BR PRIO_USER .
-.TP
-.B EPERM
-A process was located, but its effective user ID did not match
-either the effective or the real user ID of the caller,
-and was not privileged (on Linux: did not have the
-.B CAP_SYS_NICE
-capability).
-But see NOTES below.
-.TP
-.B ESRCH
-No process was located using the
-.I which
-and
-.I who
-values specified.
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-POSIX.1-2001,
-SVr4, 4.4BSD (these interfaces first appeared in 4.2BSD).
-.SH NOTES
-For further details on the nice value, see
-.BR sched (7).
-.P
-.IR Note :
-the addition of the "autogroup" feature in Linux 2.6.38 means that
-the nice value no longer has its traditional effect in many circumstances.
-For details, see
-.BR sched (7).
-.P
-A child created by
-.BR fork (2)
-inherits its parent's nice value.
-The nice value is preserved across
-.BR execve (2).
-.P
-The details on the condition for
-.B EPERM
-depend on the system.
-The above description is what POSIX.1-2001 says, and seems to be followed on
-all System\ V-like systems.
-Linux kernels before Linux 2.6.12 required the real or
-effective user ID of the caller to match
-the real user of the process \fIwho\fP (instead of its effective user ID).
-Linux 2.6.12 and later require
-the effective user ID of the caller to match
-the real or effective user ID of the process \fIwho\fP.
-All BSD-like systems (SunOS 4.1.3, Ultrix 4.2,
-4.3BSD, FreeBSD 4.3, OpenBSD-2.5, ...) behave in the same
-manner as Linux 2.6.12 and later.
-.\"
-.SS C library/kernel differences
-The getpriority system call returns nice values translated to the range 40..1,
-since a negative return value would be interpreted as an error.
-The glibc wrapper function for
-.BR getpriority ()
-translates the value back according to the formula
-.I unice\~=\~20\~\-\~knice
-(thus, the 40..1 range returned by the kernel
-corresponds to the range \-20..19 as seen by user space).
-.SH BUGS
-According to POSIX, the nice value is a per-process setting.
-However, under the current Linux/NPTL implementation of POSIX threads,
-the nice value is a per-thread attribute:
-different threads in the same process can have different nice values.
-Portable applications should avoid relying on the Linux behavior,
-which may be made standards conformant in the future.
-.SH SEE ALSO
-.BR nice (1),
-.BR renice (1),
-.BR fork (2),
-.BR capabilities (7),
-.BR sched (7)
-.P
-.I Documentation/scheduler/sched\-nice\-design.txt
-in the Linux kernel source tree (since Linux 2.6.23)
diff --git a/man2/getrandom.2 b/man2/getrandom.2
deleted file mode 100644
index a6489a494..000000000
--- a/man2/getrandom.2
+++ /dev/null
@@ -1,295 +0,0 @@
-.\" Copyright (C) 2014, Theodore Ts'o <tytso@mit.edu>
-.\" Copyright (C) 2014,2015 Heinrich Schuchardt <xypron.glpk@gmx.de>
-.\" Copyright (C) 2015, Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.TH getrandom 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-getrandom \- obtain a series of random bytes
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/random.h>
-.P
-.BI "ssize_t getrandom(void " buf [. buflen "], size_t " buflen ", \
-unsigned int " flags );
-.fi
-.SH DESCRIPTION
-The
-.BR getrandom ()
-system call fills the buffer pointed to by
-.I buf
-with up to
-.I buflen
-random bytes.
-These bytes can be used to seed user-space random number generators
-or for cryptographic purposes.
-.P
-By default,
-.BR getrandom ()
-draws entropy from the
-.I urandom
-source (i.e., the same source as the
-.I /dev/urandom
-device).
-This behavior can be changed via the
-.I flags
-argument.
-.P
-If the
-.I urandom
-source has been initialized,
-reads of up to 256 bytes will always return as many bytes as
-requested and will not be interrupted by signals.
-No such guarantees apply for larger buffer sizes.
-For example, if the call is interrupted by a signal handler,
-it may return a partially filled buffer, or fail with the error
-.BR EINTR .
-.P
-If the
-.I urandom
-source has not yet been initialized, then
-.BR getrandom ()
-will block, unless
-.B GRND_NONBLOCK
-is specified in
-.IR flags .
-.P
-The
-.I flags
-argument is a bit mask that can contain zero or more of the following values
-ORed together:
-.TP
-.B GRND_RANDOM
-If this bit is set, then random bytes are drawn from the
-.I random
-source
-(i.e., the same source as the
-.I /dev/random
-device)
-instead of the
-.I urandom
-source.
-The
-.I random
-source is limited based on the entropy that can be obtained from environmental
-noise.
-If the number of available bytes in the
-.I random
-source is less than requested in
-.IR buflen ,
-the call returns just the available random bytes.
-If no random bytes are available, the behavior depends on the presence of
-.B GRND_NONBLOCK
-in the
-.I flags
-argument.
-.TP
-.B GRND_NONBLOCK
-By default, when reading from the
-.I random
-source,
-.BR getrandom ()
-blocks if no random bytes are available,
-and when reading from the
-.I urandom
-source, it blocks if the entropy pool has not yet been initialized.
-If the
-.B GRND_NONBLOCK
-flag is set, then
-.BR getrandom ()
-does not block in these cases, but instead immediately returns \-1 with
-.I errno
-set to
-.BR EAGAIN .
-.SH RETURN VALUE
-On success,
-.BR getrandom ()
-returns the number of bytes that were copied to the buffer
-.IR buf .
-This may be less than the number of bytes requested via
-.I buflen
-if either
-.B GRND_RANDOM
-was specified in
-.I flags
-and insufficient entropy was present in the
-.I random
-source or the system call was interrupted by a signal.
-.P
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EAGAIN
-The requested entropy was not available, and
-.BR getrandom ()
-would have blocked if the
-.B GRND_NONBLOCK
-flag was not set.
-.TP
-.B EFAULT
-The address referred to by
-.I buf
-is outside the accessible address space.
-.TP
-.B EINTR
-The call was interrupted by a signal
-handler; see the description of how interrupted
-.BR read (2)
-calls on "slow" devices are handled with and without the
-.B SA_RESTART
-flag in the
-.BR signal (7)
-man page.
-.TP
-.B EINVAL
-An invalid flag was specified in
-.IR flags .
-.TP
-.B ENOSYS
-The glibc wrapper function for
-.BR getrandom ()
-determined that the underlying kernel does not implement this system call.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 3.17,
-glibc 2.25.
-.SH NOTES
-For an overview and comparison of the various interfaces that
-can be used to obtain randomness, see
-.BR random (7).
-.P
-Unlike
-.I /dev/random
-and
-.IR /dev/urandom ,
-.BR getrandom ()
-does not involve the use of pathnames or file descriptors.
-Thus,
-.BR getrandom ()
-can be useful in cases where
-.BR chroot (2)
-makes
-.I /dev
-pathnames invisible,
-and where an application (e.g., a daemon during start-up)
-closes a file descriptor for one of these files
-that was opened by a library.
-.\"
-.SS Maximum number of bytes returned
-As of Linux 3.19 the following limits apply:
-.IP \[bu] 3
-When reading from the
-.I urandom
-source, a maximum of 32Mi-1 bytes is returned by a single call to
-.BR getrandom ()
-on systems where
-.I int
-has a size of 32 bits.
-.IP \[bu]
-When reading from the
-.I random
-source, a maximum of 512 bytes is returned.
-.SS Interruption by a signal handler
-When reading from the
-.I urandom
-source
-.RB ( GRND_RANDOM
-is not set),
-.BR getrandom ()
-will block until the entropy pool has been initialized
-(unless the
-.B GRND_NONBLOCK
-flag was specified).
-If a request is made to read a large number of bytes (more than 256),
-.BR getrandom ()
-will block until those bytes have been generated and transferred
-from kernel memory to
-.IR buf .
-When reading from the
-.I random
-source
-.RB ( GRND_RANDOM
-is set),
-.BR getrandom ()
-will block until some random bytes become available
-(unless the
-.B GRND_NONBLOCK
-flag was specified).
-.P
-The behavior when a call to
-.BR getrandom ()
-that is blocked while reading from the
-.I urandom
-source is interrupted by a signal handler
-depends on the initialization state of the entropy buffer
-and on the request size,
-.IR buflen .
-If the entropy is not yet initialized, then the call fails with the
-.B EINTR
-error.
-If the entropy pool has been initialized
-and the request size is large
-.RI ( buflen "\ >\ 256),"
-the call either succeeds, returning a partially filled buffer,
-or fails with the error
-.BR EINTR .
-If the entropy pool has been initialized and the request size is small
-.RI ( buflen "\ <=\ 256),"
-then
-.BR getrandom ()
-will not fail with
-.BR EINTR .
-Instead, it will return all of the bytes that have been requested.
-.P
-When reading from the
-.I random
-source, blocking requests of any size can be interrupted by a signal handler
-(the call fails with the error
-.BR EINTR ).
-.P
-Using
-.BR getrandom ()
-to read small buffers (<=\ 256 bytes) from the
-.I urandom
-source is the preferred mode of usage.
-.P
-The special treatment of small values of
-.I buflen
-was designed for compatibility with
-OpenBSD's
-.BR getentropy (3),
-which is nowadays supported by glibc.
-.P
-The user of
-.BR getrandom ()
-.I must
-always check the return value,
-to determine whether either an error occurred
-or fewer bytes than requested were returned.
-In the case where
-.B GRND_RANDOM
-is not specified and
-.I buflen
-is less than or equal to 256,
-a return of fewer bytes than requested should never happen,
-but the careful programmer will check for this anyway!
-.SH BUGS
-As of Linux 3.19, the following bug exists:
-.\" FIXME patch proposed https://lkml.org/lkml/2014/11/29/16
-.IP \[bu] 3
-Depending on CPU load,
-.BR getrandom ()
-does not react to interrupts before reading all bytes requested.
-.SH SEE ALSO
-.BR getentropy (3),
-.BR random (4),
-.BR urandom (4),
-.BR random (7),
-.BR signal (7)
diff --git a/man2/getresgid.2 b/man2/getresgid.2
deleted file mode 100644
index ac4fb7cb8..000000000
--- a/man2/getresgid.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/getresuid.2
diff --git a/man2/getresgid32.2 b/man2/getresgid32.2
deleted file mode 100644
index 2b3240fa4..000000000
--- a/man2/getresgid32.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/getresgid.2
diff --git a/man2/getresuid.2 b/man2/getresuid.2
deleted file mode 100644
index b39bf5b7f..000000000
--- a/man2/getresuid.2
+++ /dev/null
@@ -1,70 +0,0 @@
-.\" Copyright (C) 1997 Andries Brouwer (aeb@cwi.nl)
-.\" and Copyright (c) 2007, Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Modified, 2003-05-26, Michael Kerrisk, <mtk.manpages@gmail.com>
-.\"
-.TH getresuid 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-getresuid, getresgid \- get real, effective, and saved user/group IDs
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#define _GNU_SOURCE" " /* See feature_test_macros(7) */"
-.B #include <unistd.h>
-.P
-.BI "int getresuid(uid_t *" ruid ", uid_t *" euid ", uid_t *" suid );
-.BI "int getresgid(gid_t *" rgid ", gid_t *" egid ", gid_t *" sgid );
-.fi
-.SH DESCRIPTION
-.BR getresuid ()
-returns the real UID, the effective UID, and the saved set-user-ID
-of the calling process, in the arguments
-.IR ruid ,
-.IR euid ,
-and
-.IR suid ,
-respectively.
-.BR getresgid ()
-performs the analogous task for the process's group IDs.
-.SH RETURN VALUE
-On success, zero is returned.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EFAULT
-One of the arguments specified an address outside the calling program's
-address space.
-.SH STANDARDS
-None.
-These calls also appear on HP-UX and some of the BSDs.
-.SH HISTORY
-Linux 2.1.44,
-glibc 2.3.2.
-.P
-The original Linux
-.BR getresuid ()
-and
-.BR getresgid ()
-system calls supported only 16-bit user and group IDs.
-Subsequently, Linux 2.4 added
-.BR getresuid32 ()
-and
-.BR getresgid32 (),
-supporting 32-bit IDs.
-The glibc
-.BR getresuid ()
-and
-.BR getresgid ()
-wrapper functions transparently deal with the variations across kernel versions.
-.SH SEE ALSO
-.BR getuid (2),
-.BR setresuid (2),
-.BR setreuid (2),
-.BR setuid (2),
-.BR credentials (7)
diff --git a/man2/getresuid32.2 b/man2/getresuid32.2
deleted file mode 100644
index ac4fb7cb8..000000000
--- a/man2/getresuid32.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/getresuid.2
diff --git a/man2/getrlimit.2 b/man2/getrlimit.2
deleted file mode 100644
index b1254f618..000000000
--- a/man2/getrlimit.2
+++ /dev/null
@@ -1,853 +0,0 @@
-'\" t
-.\" Copyright (c) 1992 Drew Eckhardt, March 28, 1992
-.\" and Copyright (c) 2002, 2004, 2005, 2008, 2010 Michael Kerrisk
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Modified by Michael Haardt <michael@moria.de>
-.\" Modified 1993-07-23 by Rik Faith <faith@cs.unc.edu>
-.\" Modified 1996-01-13 by Arnt Gulbrandsen <agulbra@troll.no>
-.\" Modified 1996-01-22 by aeb, following a remark by
-.\" Tigran Aivazian <tigran@sco.com>
-.\" Modified 1996-04-14 by aeb, following a remark by
-.\" Robert Bihlmeyer <robbe@orcus.ping.at>
-.\" Modified 1996-10-22 by Eric S. Raymond <esr@thyrsus.com>
-.\" Modified 2001-05-04 by aeb, following a remark by
-.\" HÃ¥vard Lygre <hklygre@online.no>
-.\" Modified 2001-04-17 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Modified 2002-06-13 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Added note on nonstandard behavior when SIGCHLD is ignored.
-.\" Modified 2002-07-09 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Enhanced descriptions of 'resource' values
-.\" Modified 2003-11-28 by aeb, added RLIMIT_CORE
-.\" Modified 2004-03-26 by aeb, added RLIMIT_AS
-.\" Modified 2004-06-16 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Added notes on CAP_SYS_RESOURCE
-.\"
-.\" 2004-11-16 -- mtk: the getrlimit.2 page, which formally included
-.\" coverage of getrusage(2), has been split, so that the latter
-.\" is now covered in its own getrusage.2.
-.\"
-.\" Modified 2004-11-16, mtk: A few other minor changes
-.\" Modified 2004-11-23, mtk
-.\" Added notes on RLIMIT_MEMLOCK, RLIMIT_NPROC, and RLIMIT_RSS
-.\" to "CONFORMING TO"
-.\" Modified 2004-11-25, mtk
-.\" Rewrote discussion on RLIMIT_MEMLOCK to incorporate kernel
-.\" 2.6.9 changes.
-.\" Added note on RLIMIT_CPU error in older kernels
-.\" 2004-11-03, mtk, Added RLIMIT_SIGPENDING
-.\" 2005-07-13, mtk, documented RLIMIT_MSGQUEUE limit.
-.\" 2005-07-28, mtk, Added descriptions of RLIMIT_NICE and RLIMIT_RTPRIO
-.\" 2008-05-07, mtk / Peter Zijlstra, Added description of RLIMIT_RTTIME
-.\" 2010-11-06, mtk: Added documentation of prlimit()
-.\"
-.TH getrlimit 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-getrlimit, setrlimit, prlimit \- get/set resource limits
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/resource.h>
-.P
-.BI "int getrlimit(int " resource ", struct rlimit *" rlim );
-.BI "int setrlimit(int " resource ", const struct rlimit *" rlim );
-.P
-.BI "int prlimit(pid_t " pid ", int " resource ,
-.BI " const struct rlimit *_Nullable " new_limit ,
-.BI " struct rlimit *_Nullable " old_limit );
-.fi
-.P
-.RS -4
-Feature Test Macro Requirements for glibc (see
-.BR feature_test_macros (7)):
-.RE
-.P
-.BR prlimit ():
-.nf
- _GNU_SOURCE
-.fi
-.SH DESCRIPTION
-The
-.BR getrlimit ()
-and
-.BR setrlimit ()
-system calls get and set resource limits.
-Each resource has an associated soft and hard limit, as defined by the
-.I rlimit
-structure:
-.P
-.in +4n
-.EX
-struct rlimit {
- rlim_t rlim_cur; /* Soft limit */
- rlim_t rlim_max; /* Hard limit (ceiling for rlim_cur) */
-};
-.EE
-.in
-.P
-The soft limit is the value that the kernel enforces for the
-corresponding resource.
-The hard limit acts as a ceiling for the soft limit:
-an unprivileged process may set only its soft limit to a value in the
-range from 0 up to the hard limit, and (irreversibly) lower its hard limit.
-A privileged process (under Linux: one with the
-.B CAP_SYS_RESOURCE
-capability in the initial user namespace)
-may make arbitrary changes to either limit value.
-.P
-The value
-.B RLIM_INFINITY
-denotes no limit on a resource (both in the structure returned by
-.BR getrlimit ()
-and in the structure passed to
-.BR setrlimit ()).
-.P
-The
-.I resource
-argument must be one of:
-.TP
-.B RLIMIT_AS
-This is the maximum size of the process's virtual memory
-(address space).
-The limit is specified in bytes, and is rounded down to the system page size.
-.\" since Linux 2.0.27 / Linux 2.1.12
-This limit affects calls to
-.BR brk (2),
-.BR mmap (2),
-and
-.BR mremap (2),
-which fail with the error
-.B ENOMEM
-upon exceeding this limit.
-In addition, automatic stack expansion fails
-(and generates a
-.B SIGSEGV
-that kills the process if no alternate stack
-has been made available via
-.BR sigaltstack (2)).
-Since the value is a \fIlong\fP, on machines with a 32-bit \fIlong\fP
-either this limit is at most 2\ GiB, or this resource is unlimited.
-.TP
-.B RLIMIT_CORE
-This is the maximum size of a
-.I core
-file (see
-.BR core (5))
-in bytes that the process may dump.
-When 0 no core dump files are created.
-When nonzero, larger dumps are truncated to this size.
-.TP
-.B RLIMIT_CPU
-This is a limit, in seconds,
-on the amount of CPU time that the process can consume.
-When the process reaches the soft limit, it is sent a
-.B SIGXCPU
-signal.
-The default action for this signal is to terminate the process.
-However, the signal can be caught, and the handler can return control to
-the main program.
-If the process continues to consume CPU time, it will be sent
-.B SIGXCPU
-once per second until the hard limit is reached, at which time
-it is sent
-.BR SIGKILL .
-(This latter point describes Linux behavior.
-Implementations vary in how they treat processes which continue to
-consume CPU time after reaching the soft limit.
-Portable applications that need to catch this signal should
-perform an orderly termination upon first receipt of
-.BR SIGXCPU .)
-.TP
-.B RLIMIT_DATA
-This is the maximum size
-of the process's data segment (initialized data,
-uninitialized data, and heap).
-The limit is specified in bytes, and is rounded down to the system page size.
-This limit affects calls to
-.BR brk (2),
-.BR sbrk (2),
-and (since Linux 4.7)
-.BR mmap (2),
-.\" commits 84638335900f1995495838fe1bd4870c43ec1f67
-.\" ("mm: rework virtual memory accounting"),
-.\" f4fcd55841fc9e46daac553b39361572453c2b88
-.\" (mm: enable RLIMIT_DATA by default with workaround for valgrind).
-which fail with the error
-.B ENOMEM
-upon encountering the soft limit of this resource.
-.TP
-.B RLIMIT_FSIZE
-This is the maximum size in bytes of files that the process may create.
-Attempts to extend a file beyond this limit result in delivery of a
-.B SIGXFSZ
-signal.
-By default, this signal terminates a process, but a process can
-catch this signal instead, in which case the relevant system call (e.g.,
-.BR write (2),
-.BR truncate (2))
-fails with the error
-.BR EFBIG .
-.TP
-.BR RLIMIT_LOCKS " (Linux 2.4.0 to Linux 2.4.24)"
-.\" to be precise: Linux 2.4.0-test9; no longer in Linux 2.4.25 / Linux 2.5.65
-This is a limit on the combined number of
-.BR flock (2)
-locks and
-.BR fcntl (2)
-leases that this process may establish.
-.TP
-.B RLIMIT_MEMLOCK
-This is the maximum number of bytes of memory that may be locked
-into RAM.
-This limit is in effect rounded down to the nearest multiple
-of the system page size.
-This limit affects
-.BR mlock (2),
-.BR mlockall (2),
-and the
-.BR mmap (2)
-.B MAP_LOCKED
-operation.
-Since Linux 2.6.9, it also affects the
-.BR shmctl (2)
-.B SHM_LOCK
-operation, where it sets a maximum on the total bytes in
-shared memory segments (see
-.BR shmget (2))
-that may be locked by the real user ID of the calling process.
-The
-.BR shmctl (2)
-.B SHM_LOCK
-locks are accounted for separately from the per-process memory
-locks established by
-.BR mlock (2),
-.BR mlockall (2),
-and
-.BR mmap (2)
-.BR MAP_LOCKED ;
-a process can lock bytes up to this limit in each of these
-two categories.
-.IP
-Before Linux 2.6.9, this limit controlled the amount of
-memory that could be locked by a privileged process.
-Since Linux 2.6.9, no limits are placed on the amount of memory
-that a privileged process may lock, and this limit instead governs
-the amount of memory that an unprivileged process may lock.
-.TP
-.BR RLIMIT_MSGQUEUE " (since Linux 2.6.8)"
-This is a limit on the number of bytes that can be allocated
-for POSIX message queues for the real user ID of the calling process.
-This limit is enforced for
-.BR mq_open (3).
-Each message queue that the user creates counts (until it is removed)
-against this limit according to the formula:
-.RS 4
-.IP
-Since Linux 3.5:
-.IP
-.in +4n
-.EX
-bytes = attr.mq_maxmsg * sizeof(struct msg_msg) +
- MIN(attr.mq_maxmsg, MQ_PRIO_MAX) *
- sizeof(struct posix_msg_tree_node)+
- /* For overhead */
- attr.mq_maxmsg * attr.mq_msgsize;
- /* For message data */
-.EE
-.in
-.IP
-Linux 3.4 and earlier:
-.IP
-.in +4n
-.EX
-bytes = attr.mq_maxmsg * sizeof(struct msg_msg *) +
- /* For overhead */
- attr.mq_maxmsg * attr.mq_msgsize;
- /* For message data */
-.EE
-.in
-.RE
-.IP
-where
-.I attr
-is the
-.I mq_attr
-structure specified as the fourth argument to
-.BR mq_open (3),
-and the
-.I msg_msg
-and
-.I posix_msg_tree_node
-structures are kernel-internal structures.
-.IP
-The "overhead" addend in the formula accounts for overhead
-bytes required by the implementation
-and ensures that the user cannot
-create an unlimited number of zero-length messages (such messages
-nevertheless each consume some system memory for bookkeeping overhead).
-.TP
-.BR RLIMIT_NICE " (since Linux 2.6.12, but see BUGS below)"
-This specifies a ceiling to which the process's nice value can be raised using
-.BR setpriority (2)
-or
-.BR nice (2).
-The actual ceiling for the nice value is calculated as
-.IR "20\ \-\ rlim_cur" .
-The useful range for this limit is thus from 1
-(corresponding to a nice value of 19) to 40
-(corresponding to a nice value of \-20).
-This unusual choice of range was necessary
-because negative numbers cannot be specified
-as resource limit values, since they typically have special meanings.
-For example,
-.B RLIM_INFINITY
-typically is the same as \-1.
-For more detail on the nice value, see
-.BR sched (7).
-.TP
-.B RLIMIT_NOFILE
-This specifies a value one greater than the maximum file descriptor number
-that can be opened by this process.
-Attempts
-.RB ( open (2),
-.BR pipe (2),
-.BR dup (2),
-etc.)
-to exceed this limit yield the error
-.BR EMFILE .
-(Historically, this limit was named
-.B RLIMIT_OFILE
-on BSD.)
-.IP
-Since Linux 4.5,
-this limit also defines the maximum number of file descriptors that
-an unprivileged process (one without the
-.B CAP_SYS_RESOURCE
-capability) may have "in flight" to other processes,
-by being passed across UNIX domain sockets.
-This limit applies to the
-.BR sendmsg (2)
-system call.
-For further details, see
-.BR unix (7).
-.TP
-.B RLIMIT_NPROC
-This is a limit on the number of extant process
-(or, more precisely on Linux, threads)
-for the real user ID of the calling process.
-So long as the current number of processes belonging to this
-process's real user ID is greater than or equal to this limit,
-.BR fork (2)
-fails with the error
-.BR EAGAIN .
-.IP
-The
-.B RLIMIT_NPROC
-limit is not enforced for processes that have either the
-.B CAP_SYS_ADMIN
-or the
-.B CAP_SYS_RESOURCE
-capability,
-or run with real user ID 0.
-.TP
-.B RLIMIT_RSS
-This is a limit (in bytes) on the process's resident set
-(the number of virtual pages resident in RAM).
-This limit has effect only in Linux 2.4.x, x < 30, and there
-affects only calls to
-.BR madvise (2)
-specifying
-.BR MADV_WILLNEED .
-.\" As at Linux 2.6.12, this limit still does nothing in Linux 2.6 though
-.\" talk of making it do something has surfaced from time to time in LKML
-.\" -- MTK, Jul 05
-.TP
-.BR RLIMIT_RTPRIO " (since Linux 2.6.12, but see BUGS)"
-This specifies a ceiling on the real-time priority that may be set for
-this process using
-.BR sched_setscheduler (2)
-and
-.BR sched_setparam (2).
-.IP
-For further details on real-time scheduling policies, see
-.BR sched (7)
-.TP
-.BR RLIMIT_RTTIME " (since Linux 2.6.25)"
-This is a limit (in microseconds)
-on the amount of CPU time that a process scheduled
-under a real-time scheduling policy may consume without making a blocking
-system call.
-For the purpose of this limit,
-each time a process makes a blocking system call,
-the count of its consumed CPU time is reset to zero.
-The CPU time count is not reset if the process continues trying to
-use the CPU but is preempted, its time slice expires, or it calls
-.BR sched_yield (2).
-.IP
-Upon reaching the soft limit, the process is sent a
-.B SIGXCPU
-signal.
-If the process catches or ignores this signal and
-continues consuming CPU time, then
-.B SIGXCPU
-will be generated once each second until the hard limit is reached,
-at which point the process is sent a
-.B SIGKILL
-signal.
-.IP
-The intended use of this limit is to stop a runaway
-real-time process from locking up the system.
-.IP
-For further details on real-time scheduling policies, see
-.BR sched (7)
-.TP
-.BR RLIMIT_SIGPENDING " (since Linux 2.6.8)"
-This is a limit on the number of signals
-that may be queued for the real user ID of the calling process.
-Both standard and real-time signals are counted for the purpose of
-checking this limit.
-However, the limit is enforced only for
-.BR sigqueue (3);
-it is always possible to use
-.BR kill (2)
-to queue one instance of any of the signals that are not already
-queued to the process.
-.\" This replaces the /proc/sys/kernel/rtsig-max system-wide limit
-.\" that was present in Linux <= 2.6.7. MTK Dec 04
-.TP
-.B RLIMIT_STACK
-This is the maximum size of the process stack, in bytes.
-Upon reaching this limit, a
-.B SIGSEGV
-signal is generated.
-To handle this signal, a process must employ an alternate signal stack
-.RB ( sigaltstack (2)).
-.IP
-Since Linux 2.6.23,
-this limit also determines the amount of space used for the process's
-command-line arguments and environment variables; for details, see
-.BR execve (2).
-.SS prlimit()
-.\" commit c022a0acad534fd5f5d5f17280f6d4d135e74e81
-.\" Author: Jiri Slaby <jslaby@suse.cz>
-.\" Date: Tue May 4 18:03:50 2010 +0200
-.\"
-.\" rlimits: implement prlimit64 syscall
-.\"
-.\" commit 6a1d5e2c85d06da35cdfd93f1a27675bfdc3ad8c
-.\" Author: Jiri Slaby <jslaby@suse.cz>
-.\" Date: Wed Mar 24 17:06:58 2010 +0100
-.\"
-.\" rlimits: add rlimit64 structure
-.\"
-The Linux-specific
-.BR prlimit ()
-system call combines and extends the functionality of
-.BR setrlimit ()
-and
-.BR getrlimit ().
-It can be used to both set and get the resource limits of an arbitrary process.
-.P
-The
-.I resource
-argument has the same meaning as for
-.BR setrlimit ()
-and
-.BR getrlimit ().
-.P
-If the
-.I new_limit
-argument is not NULL, then the
-.I rlimit
-structure to which it points is used to set new values for
-the soft and hard limits for
-.IR resource .
-If the
-.I old_limit
-argument is not NULL, then a successful call to
-.BR prlimit ()
-places the previous soft and hard limits for
-.I resource
-in the
-.I rlimit
-structure pointed to by
-.IR old_limit .
-.P
-The
-.I pid
-argument specifies the ID of the process on which the call is to operate.
-If
-.I pid
-is 0, then the call applies to the calling process.
-To set or get the resources of a process other than itself,
-the caller must have the
-.B CAP_SYS_RESOURCE
-capability in the user namespace of the process
-whose resource limits are being changed, or the
-real, effective, and saved set user IDs of the target process
-must match the real user ID of the caller
-.I and
-the real, effective, and saved set group IDs of the target process
-must match the real group ID of the caller.
-.\" FIXME . this permission check is strange
-.\" Asked about this on LKML, 7 Nov 2010
-.\" "Inconsistent credential checking in prlimit() syscall"
-.SH RETURN VALUE
-On success, these system calls return 0.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EFAULT
-A pointer argument points to a location
-outside the accessible address space.
-.TP
-.B EINVAL
-The value specified in
-.I resource
-is not valid;
-or, for
-.BR setrlimit ()
-or
-.BR prlimit ():
-.I rlim\->rlim_cur
-was greater than
-.IR rlim\->rlim_max .
-.TP
-.B EPERM
-An unprivileged process tried to raise the hard limit; the
-.B CAP_SYS_RESOURCE
-capability is required to do this.
-.TP
-.B EPERM
-The caller tried to increase the hard
-.B RLIMIT_NOFILE
-limit above the maximum defined by
-.I /proc/sys/fs/nr_open
-(see
-.BR proc (5))
-.TP
-.B EPERM
-.RB ( prlimit ())
-The calling process did not have permission to set limits
-for the process specified by
-.IR pid .
-.TP
-.B ESRCH
-Could not find a process with the ID specified in
-.IR pid .
-.SH ATTRIBUTES
-For an explanation of the terms used in this section, see
-.BR attributes (7).
-.TS
-allbox;
-lbx lb lb
-l l l.
-Interface Attribute Value
-T{
-.na
-.nh
-.BR getrlimit (),
-.BR setrlimit (),
-.BR prlimit ()
-T} Thread safety MT-Safe
-.TE
-.SH STANDARDS
-.TP
-.BR getrlimit ()
-.TQ
-.BR setrlimit ()
-POSIX.1-2008.
-.TP
-.BR prlimit ()
-Linux.
-.P
-.B RLIMIT_MEMLOCK
-and
-.B RLIMIT_NPROC
-derive from BSD and are not specified in POSIX.1;
-they are present on the BSDs and Linux, but on few other implementations.
-.B RLIMIT_RSS
-derives from BSD and is not specified in POSIX.1;
-it is nevertheless present on most implementations.
-.BR \%RLIMIT_MSGQUEUE ,
-.BR RLIMIT_NICE ,
-.BR RLIMIT_RTPRIO ,
-.BR RLIMIT_RTTIME ,
-and
-.B \%RLIMIT_SIGPENDING
-are Linux-specific.
-.SH HISTORY
-.TP
-.BR getrlimit ()
-.TQ
-.BR setrlimit ()
-POSIX.1-2001, SVr4, 4.3BSD.
-.TP
-.BR prlimit ()
-Linux 2.6.36,
-glibc 2.13.
-.SH NOTES
-A child process created via
-.BR fork (2)
-inherits its parent's resource limits.
-Resource limits are preserved across
-.BR execve (2).
-.P
-Resource limits are per-process attributes that are shared
-by all of the threads in a process.
-.P
-Lowering the soft limit for a resource below the process's
-current consumption of that resource will succeed
-(but will prevent the process from further increasing
-its consumption of the resource).
-.P
-One can set the resource limits of the shell using the built-in
-.I ulimit
-command
-.RI ( limit
-in
-.BR csh (1)).
-The shell's resource limits are inherited by the processes that
-it creates to execute commands.
-.P
-Since Linux 2.6.24, the resource limits of any process can be inspected via
-.IR /proc/ pid /limits ;
-see
-.BR proc (5).
-.P
-Ancient systems provided a
-.BR vlimit ()
-function with a similar purpose to
-.BR setrlimit ().
-For backward compatibility, glibc also provides
-.BR vlimit ().
-All new applications should be written using
-.BR setrlimit ().
-.SS C library/kernel ABI differences
-Since glibc 2.13, the glibc
-.BR getrlimit ()
-and
-.BR setrlimit ()
-wrapper functions no longer invoke the corresponding system calls,
-but instead employ
-.BR prlimit (),
-for the reasons described in BUGS.
-.P
-The name of the glibc wrapper function is
-.BR prlimit ();
-the underlying system call is
-.BR prlimit64 ().
-.SH BUGS
-In older Linux kernels, the
-.B SIGXCPU
-and
-.B SIGKILL
-signals delivered when a process encountered the soft and hard
-.B RLIMIT_CPU
-limits were delivered one (CPU) second later than they should have been.
-This was fixed in Linux 2.6.8.
-.P
-In Linux 2.6.x kernels before Linux 2.6.17, a
-.B RLIMIT_CPU
-limit of 0 is wrongly treated as "no limit" (like
-.BR RLIM_INFINITY ).
-Since Linux 2.6.17, setting a limit of 0 does have an effect,
-but is actually treated as a limit of 1 second.
-.\" see http://marc.theaimsgroup.com/?l=linux-kernel&m=114008066530167&w=2
-.P
-A kernel bug means that
-.\" See https://lwn.net/Articles/145008/
-.B RLIMIT_RTPRIO
-does not work in Linux 2.6.12; the problem is fixed in Linux 2.6.13.
-.P
-In Linux 2.6.12, there was an off-by-one mismatch
-between the priority ranges returned by
-.BR getpriority (2)
-and
-.BR RLIMIT_NICE .
-This had the effect that the actual ceiling for the nice value
-was calculated as
-.IR "19\ \-\ rlim_cur" .
-This was fixed in Linux 2.6.13.
-.\" see http://marc.theaimsgroup.com/?l=linux-kernel&m=112256338703880&w=2
-.P
-Since Linux 2.6.12,
-.\" The relevant patch, sent to LKML, seems to be
-.\" http://thread.gmane.org/gmane.linux.kernel/273462
-.\" From: Roland McGrath <roland <at> redhat.com>
-.\" Subject: [PATCH 7/7] make RLIMIT_CPU/SIGXCPU per-process
-.\" Date: 2005-01-23 23:27:46 GMT
-if a process reaches its soft
-.B RLIMIT_CPU
-limit and has a handler installed for
-.BR SIGXCPU ,
-then, in addition to invoking the signal handler,
-the kernel increases the soft limit by one second.
-This behavior repeats if the process continues to consume CPU time,
-until the hard limit is reached,
-at which point the process is killed.
-Other implementations
-.\" Tested Solaris 10, FreeBSD 9, OpenBSD 5.0
-do not change the
-.B RLIMIT_CPU
-soft limit in this manner,
-and the Linux behavior is probably not standards conformant;
-portable applications should avoid relying on this Linux-specific behavior.
-.\" FIXME . https://bugzilla.kernel.org/show_bug.cgi?id=50951
-The Linux-specific
-.B RLIMIT_RTTIME
-limit exhibits the same behavior when the soft limit is encountered.
-.P
-Kernels before Linux 2.4.22 did not diagnose the error
-.B EINVAL
-for
-.BR setrlimit ()
-when
-.I rlim\->rlim_cur
-was greater than
-.IR rlim\->rlim_max .
-.\" d3561f78fd379a7110e46c87964ba7aa4120235c
-.P
-Linux doesn't return an error when an attempt to set
-.B RLIMIT_CPU
-has failed, for compatibility reasons.
-.\"
-.SS Representation of \[dq]large\[dq] resource limit values on 32-bit platforms
-The glibc
-.BR getrlimit ()
-and
-.BR setrlimit ()
-wrapper functions use a 64-bit
-.I rlim_t
-data type, even on 32-bit platforms.
-However, the
-.I rlim_t
-data type used in the
-.BR getrlimit ()
-and
-.BR setrlimit ()
-system calls is a (32-bit)
-.IR "unsigned long" .
-.\" Linux still uses long for limits internally:
-.\" c022a0acad534fd5f5d5f17280f6d4d135e74e81
-.\" kernel/sys.c:do_prlimit() still uses struct rlimit which
-.\" uses kernel_ulong_t for its members, i.e. 32-bit on 32-bit kernel.
-Furthermore, in Linux,
-the kernel represents resource limits on 32-bit platforms as
-.IR "unsigned long" .
-However, a 32-bit data type is not wide enough.
-.\" https://bugzilla.kernel.org/show_bug.cgi?id=5042
-.\" https://www.sourceware.org/bugzilla/show_bug.cgi?id=12201
-The most pertinent limit here is
-.BR \%RLIMIT_FSIZE ,
-which specifies the maximum size to which a file can grow:
-to be useful, this limit must be represented using a type
-that is as wide as the type used to
-represent file offsets\[em]that is, as wide as a 64-bit
-.B off_t
-(assuming a program compiled with
-.IR _FILE_OFFSET_BITS=64 ).
-.P
-To work around this kernel limitation,
-if a program tried to set a resource limit to a value larger than
-can be represented in a 32-bit
-.IR "unsigned long" ,
-then the glibc
-.BR setrlimit ()
-wrapper function silently converted the limit value to
-.BR RLIM_INFINITY .
-In other words, the requested resource limit setting was silently ignored.
-.P
-Since glibc 2.13,
-.\" https://www.sourceware.org/bugzilla/show_bug.cgi?id=12201
-glibc works around the limitations of the
-.BR \%getrlimit ()
-and
-.BR setrlimit ()
-system calls by implementing
-.BR setrlimit ()
-and
-.BR \%getrlimit ()
-as wrapper functions that call
-.BR prlimit ().
-.SH EXAMPLES
-The program below demonstrates the use of
-.BR prlimit ().
-.P
-.\" SRC BEGIN (getrlimit.c)
-.EX
-#define _GNU_SOURCE
-#define _FILE_OFFSET_BITS 64
-#include <err.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/resource.h>
-#include <time.h>
-\&
-int
-main(int argc, char *argv[])
-{
- pid_t pid;
- struct rlimit old, new;
- struct rlimit *newp;
-\&
- if (!(argc == 2 || argc == 4)) {
- fprintf(stderr, "Usage: %s <pid> [<new\-soft\-limit> "
- "<new\-hard\-limit>]\en", argv[0]);
- exit(EXIT_FAILURE);
- }
-\&
- pid = atoi(argv[1]); /* PID of target process */
-\&
- newp = NULL;
- if (argc == 4) {
- new.rlim_cur = atoi(argv[2]);
- new.rlim_max = atoi(argv[3]);
- newp = &new;
- }
-\&
- /* Set CPU time limit of target process; retrieve and display
- previous limit */
-\&
- if (prlimit(pid, RLIMIT_CPU, newp, &old) == \-1)
- err(EXIT_FAILURE, "prlimit\-1");
- printf("Previous limits: soft=%jd; hard=%jd\en",
- (intmax_t) old.rlim_cur, (intmax_t) old.rlim_max);
-\&
- /* Retrieve and display new CPU time limit */
-\&
- if (prlimit(pid, RLIMIT_CPU, NULL, &old) == \-1)
- err(EXIT_FAILURE, "prlimit\-2");
- printf("New limits: soft=%jd; hard=%jd\en",
- (intmax_t) old.rlim_cur, (intmax_t) old.rlim_max);
-\&
- exit(EXIT_SUCCESS);
-}
-.EE
-.\" SRC END
-.SH SEE ALSO
-.BR prlimit (1),
-.BR dup (2),
-.BR fcntl (2),
-.BR fork (2),
-.BR getrusage (2),
-.BR mlock (2),
-.BR mmap (2),
-.BR open (2),
-.BR quotactl (2),
-.BR sbrk (2),
-.BR shmctl (2),
-.BR malloc (3),
-.BR sigqueue (3),
-.BR ulimit (3),
-.BR core (5),
-.BR capabilities (7),
-.BR cgroups (7),
-.BR credentials (7),
-.BR signal (7)
diff --git a/man2/getrusage.2 b/man2/getrusage.2
deleted file mode 100644
index 2ae9dafa4..000000000
--- a/man2/getrusage.2
+++ /dev/null
@@ -1,250 +0,0 @@
-'\" t
-.\" Copyright (c) 1992 Drew Eckhardt, March 28, 1992
-.\" and Copyright (c) 2002 Michael Kerrisk
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" 2004-11-16 -- mtk: the getrlimit.2 page, which formerly included
-.\" coverage of getrusage(2), has been split, so that the latter is
-.\" now covered in its own getrusage.2. For older details of change
-.\" history, etc., see getrlimit.2
-.\"
-.\" Modified 2004-11-16, mtk, Noted that the nonconformance
-.\" when SIGCHLD is being ignored is fixed in Linux 2.6.9.
-.\" 2008-02-22, Sripathi Kodi <sripathik@in.ibm.com>: Document RUSAGE_THREAD
-.\" 2008-05-25, mtk, clarify RUSAGE_CHILDREN + other clean-ups.
-.\" 2010-05-24, Mark Hills <mark@pogo.org.uk>: Description of fields,
-.\" document ru_maxrss
-.\" 2010-05-24, mtk, enhanced description of various fields
-.\"
-.TH getrusage 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-getrusage \- get resource usage
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/resource.h>
-.P
-.BI "int getrusage(int " who ", struct rusage *" usage );
-.fi
-.SH DESCRIPTION
-.BR getrusage ()
-returns resource usage measures for
-.IR who ,
-which can be one of the following:
-.TP
-.B RUSAGE_SELF
-Return resource usage statistics for the calling process,
-which is the sum of resources used by all threads in the process.
-.TP
-.B RUSAGE_CHILDREN
-Return resource usage statistics for all children of the
-calling process that have terminated and been waited for.
-These statistics will include the resources used by grandchildren,
-and further removed descendants,
-if all of the intervening descendants waited on their terminated children.
-.TP
-.BR RUSAGE_THREAD " (since Linux 2.6.26)"
-Return resource usage statistics for the calling thread.
-The
-.B _GNU_SOURCE
-feature test macro must be defined (before including
-.I any
-header file)
-in order to obtain the definition of this constant from
-.IR <sys/resource.h> .
-.P
-The resource usages are returned in the structure pointed to by
-.IR usage ,
-which has the following form:
-.P
-.in +4n
-.EX
-struct rusage {
- struct timeval ru_utime; /* user CPU time used */
- struct timeval ru_stime; /* system CPU time used */
- long ru_maxrss; /* maximum resident set size */
- long ru_ixrss; /* integral shared memory size */
- long ru_idrss; /* integral unshared data size */
- long ru_isrss; /* integral unshared stack size */
- long ru_minflt; /* page reclaims (soft page faults) */
- long ru_majflt; /* page faults (hard page faults) */
- long ru_nswap; /* swaps */
- long ru_inblock; /* block input operations */
- long ru_oublock; /* block output operations */
- long ru_msgsnd; /* IPC messages sent */
- long ru_msgrcv; /* IPC messages received */
- long ru_nsignals; /* signals received */
- long ru_nvcsw; /* voluntary context switches */
- long ru_nivcsw; /* involuntary context switches */
-};
-.EE
-.in
-.P
-Not all fields are completed;
-unmaintained fields are set to zero by the kernel.
-(The unmaintained fields are provided for compatibility with other systems,
-and because they may one day be supported on Linux.)
-The fields are interpreted as follows:
-.TP
-.I ru_utime
-This is the total amount of time spent executing in user mode,
-expressed in a
-.I timeval
-structure (seconds plus microseconds).
-.TP
-.I ru_stime
-This is the total amount of time spent executing in kernel mode,
-expressed in a
-.I timeval
-structure (seconds plus microseconds).
-.TP
-.IR ru_maxrss " (since Linux 2.6.32)"
-This is the maximum resident set size used (in kilobytes).
-For
-.BR RUSAGE_CHILDREN ,
-this is the resident set size of the largest child, not the maximum
-resident set size of the process tree.
-.TP
-.IR ru_ixrss " (unmaintained)"
-This field is currently unused on Linux.
-.\" On some systems,
-.\" this is the integral of the text segment memory consumption,
-.\" expressed in kilobyte-seconds.
-.TP
-.IR ru_idrss " (unmaintained)"
-This field is currently unused on Linux.
-.\" On some systems, this is the integral of the data segment memory consumption,
-.\" expressed in kilobyte-seconds.
-.TP
-.IR ru_isrss " (unmaintained)"
-This field is currently unused on Linux.
-.\" On some systems, this is the integral of the stack memory consumption,
-.\" expressed in kilobyte-seconds.
-.TP
-.I ru_minflt
-The number of page faults serviced without any I/O activity; here
-I/O activity is avoided by \*(lqreclaiming\*(rq a page frame from
-the list of pages awaiting reallocation.
-.TP
-.I ru_majflt
-The number of page faults serviced that required I/O activity.
-.TP
-.IR ru_nswap " (unmaintained)"
-This field is currently unused on Linux.
-.\" On some systems, this is the number of swaps out of physical memory.
-.TP
-.IR ru_inblock " (since Linux 2.6.22)"
-The number of times the filesystem had to perform input.
-.TP
-.IR ru_oublock " (since Linux 2.6.22)"
-The number of times the filesystem had to perform output.
-.TP
-.IR ru_msgsnd " (unmaintained)"
-This field is currently unused on Linux.
-.\" On FreeBSD 6.2, this appears to measure messages sent over sockets
-.\" On some systems,
-.\" this field records the number of messages sent over sockets.
-.TP
-.IR ru_msgrcv " (unmaintained)"
-This field is currently unused on Linux.
-.\" On FreeBSD 6.2, this appears to measure messages received over sockets
-.\" On some systems,
-.\" this field records the number of messages received over sockets.
-.TP
-.IR ru_nsignals " (unmaintained)"
-This field is currently unused on Linux.
-.\" On some systems, this field records the number of signals received.
-.TP
-.IR ru_nvcsw " (since Linux 2.6)"
-The number of times a context switch resulted due to a process
-voluntarily giving up the processor before its time slice was
-completed (usually to await availability of a resource).
-.TP
-.IR ru_nivcsw " (since Linux 2.6)"
-The number of times a context switch resulted due to a higher
-priority process becoming runnable or because the current process
-exceeded its time slice.
-.SH RETURN VALUE
-On success, zero is returned.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EFAULT
-.I usage
-points outside the accessible address space.
-.TP
-.B EINVAL
-.I who
-is invalid.
-.SH ATTRIBUTES
-For an explanation of the terms used in this section, see
-.BR attributes (7).
-.TS
-allbox;
-lbx lb lb
-l l l.
-Interface Attribute Value
-T{
-.na
-.nh
-.BR getrusage ()
-T} Thread safety MT-Safe
-.TE
-.SH STANDARDS
-POSIX.1-2008.
-.P
-POSIX.1 specifies
-.BR getrusage (),
-but specifies only the fields
-.I ru_utime
-and
-.IR ru_stime .
-.P
-.B RUSAGE_THREAD
-is Linux-specific.
-.SH HISTORY
-POSIX.1-2001, SVr4, 4.3BSD.
-.P
-Before Linux 2.6.9, if the disposition of
-.B SIGCHLD
-is set to
-.B SIG_IGN
-then the resource usages of child processes
-are automatically included in the value returned by
-.BR RUSAGE_CHILDREN ,
-although POSIX.1-2001 explicitly prohibits this.
-This nonconformance is rectified in Linux 2.6.9 and later.
-.\" See the description of getrusage() in XSH.
-.\" A similar statement was also in SUSv2.
-.P
-The structure definition shown at the start of this page
-was taken from 4.3BSD Reno.
-.P
-Ancient systems provided a
-.BR vtimes ()
-function with a similar purpose to
-.BR getrusage ().
-For backward compatibility, glibc (up until Linux 2.32) also provides
-.BR vtimes ().
-All new applications should be written using
-.BR getrusage ().
-(Since Linux 2.33, glibc no longer provides an
-.BR vtimes ()
-implementation.)
-.SH NOTES
-Resource usage metrics are preserved across an
-.BR execve (2).
-.SH SEE ALSO
-.BR clock_gettime (2),
-.BR getrlimit (2),
-.BR times (2),
-.BR wait (2),
-.BR wait4 (2),
-.BR clock (3),
-.BR proc_pid_stat (5),
-.BR proc_pid_io (5)
diff --git a/man2/getsid.2 b/man2/getsid.2
deleted file mode 100644
index c296a92d2..000000000
--- a/man2/getsid.2
+++ /dev/null
@@ -1,75 +0,0 @@
-.\" Copyright (C) 1996 Andries Brouwer (aeb@cwi.nl)
-.\" and Copyright (C) 2016 Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: GPL-2.0-or-later
-.\"
-.\" Modified Thu Oct 31 14:18:40 1996 by Eric S. Raymond <esr@y\thyrsus.com>
-.\" Modified 2001-12-17, aeb
-.TH getsid 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-getsid \- get session ID
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <unistd.h>
-.P
-.BI "pid_t getsid(pid_t" " pid" );
-.fi
-.P
-.RS -4
-Feature Test Macro Requirements for glibc (see
-.BR feature_test_macros (7)):
-.RE
-.P
-.BR getsid ():
-.nf
- _XOPEN_SOURCE >= 500
-.\" || _XOPEN_SOURCE && _XOPEN_SOURCE_EXTENDED
- || /* Since glibc 2.12: */ _POSIX_C_SOURCE >= 200809L
-.fi
-.SH DESCRIPTION
-.BR getsid ()
-returns the session ID of the process with process ID
-.IR pid .
-If
-.I pid
-is 0,
-.BR getsid ()
-returns the session ID of the calling process.
-.SH RETURN VALUE
-On success, a session ID is returned.
-On error, \fI(pid_t)\ \-1\fP is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EPERM
-A process with process ID
-.I pid
-exists, but it is not in the same session as the calling process,
-and the implementation considers this an error.
-.TP
-.B ESRCH
-No process with process ID
-.I pid
-was found.
-.SH VERSIONS
-Linux does not return
-.BR EPERM .
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-POSIX.1-2001, SVr4.
-Linux 2.0.
-.\" Linux has this system call since Linux 1.3.44.
-.\" There is libc support since libc 5.2.19.
-.SH NOTES
-See
-.BR credentials (7)
-for a description of sessions and session IDs.
-.SH SEE ALSO
-.BR getpgid (2),
-.BR setsid (2),
-.BR credentials (7)
diff --git a/man2/getsockname.2 b/man2/getsockname.2
deleted file mode 100644
index 27a0b7e61..000000000
--- a/man2/getsockname.2
+++ /dev/null
@@ -1,85 +0,0 @@
-.\" Copyright (c) 1983, 1991 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" SPDX-License-Identifier: BSD-4-Clause-UC
-.\"
-.\" @(#)getsockname.2 6.4 (Berkeley) 3/10/91
-.\"
-.\" Modified Sat Jul 24 16:30:29 1993 by Rik Faith <faith@cs.unc.edu>
-.\" Modified Tue Oct 22 00:22:35 EDT 1996 by Eric S. Raymond <esr@thyrsus.com>
-.\" Modified Sun Mar 28 21:26:46 1999 by Andries Brouwer <aeb@cwi.nl>
-.\"
-.TH getsockname 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-getsockname \- get socket name
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/socket.h>
-.P
-.BI "int getsockname(int " sockfd ", struct sockaddr *restrict " addr ,
-.BI " socklen_t *restrict " addrlen );
-.fi
-.SH DESCRIPTION
-.BR getsockname ()
-returns the current address to which the socket
-.I sockfd
-is bound, in the buffer pointed to by
-.IR addr .
-The
-.I addrlen
-argument should be initialized to indicate
-the amount of space (in bytes) pointed to by
-.IR addr .
-On return it contains the actual size of the socket address.
-.P
-The returned address is truncated if the buffer provided is too small;
-in this case,
-.I addrlen
-will return a value greater than was supplied to the call.
-.SH RETURN VALUE
-On success, zero is returned.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EBADF
-The argument
-.I sockfd
-is not a valid file descriptor.
-.TP
-.B EFAULT
-The
-.I addr
-argument points to memory not in a valid part of the
-process address space.
-.TP
-.B EINVAL
-.I addrlen
-is invalid (e.g., is negative).
-.TP
-.B ENOBUFS
-Insufficient resources were available in the system
-to perform the operation.
-.TP
-.B ENOTSOCK
-The file descriptor
-.I sockfd
-does not refer to a socket.
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-POSIX.1-2001, SVr4, 4.4BSD
-(first appeared in 4.2BSD).
-.\" SVr4 documents additional ENOMEM
-.\" and ENOSR error codes.
-.SH SEE ALSO
-.BR bind (2),
-.BR socket (2),
-.BR getifaddrs (3),
-.BR ip (7),
-.BR socket (7),
-.BR unix (7)
diff --git a/man2/getsockopt.2 b/man2/getsockopt.2
deleted file mode 100644
index d0a24db76..000000000
--- a/man2/getsockopt.2
+++ /dev/null
@@ -1,172 +0,0 @@
-.\" Copyright (c) 1983, 1991 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" SPDX-License-Identifier: BSD-4-Clause-UC
-.\"
-.\" $Id: getsockopt.2,v 1.1 1999/05/24 14:57:04 freitag Exp $
-.\"
-.\" Modified Sat Jul 24 16:19:32 1993 by Rik Faith (faith@cs.unc.edu)
-.\" Modified Mon Apr 22 02:29:06 1996 by Martin Schulze (joey@infodrom.north.de)
-.\" Modified Tue Aug 27 10:52:51 1996 by Andries Brouwer (aeb@cwi.nl)
-.\" Modified Thu Jan 23 13:29:34 1997 by Andries Brouwer (aeb@cwi.nl)
-.\" Modified Sun Mar 28 21:26:46 1999 by Andries Brouwer (aeb@cwi.nl)
-.\" Modified 1999 by Andi Kleen <ak@muc.de>.
-.\" Removed most stuff because it is in socket.7 now.
-.\"
-.TH getsockopt 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-getsockopt, setsockopt \- get and set options on sockets
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/socket.h>
-.P
-.BI "int getsockopt(int " sockfd ", int " level ", int " optname ,
-.BI " void " optval "[restrict *." optlen ],
-.BI " socklen_t *restrict " optlen );
-.BI "int setsockopt(int " sockfd ", int " level ", int " optname ,
-.BI " const void " optval [. optlen ],
-.BI " socklen_t " optlen );
-.fi
-.SH DESCRIPTION
-.BR getsockopt ()
-and
-.BR setsockopt ()
-manipulate options for the socket referred to by the file descriptor
-.IR sockfd .
-Options may exist at multiple
-protocol levels; they are always present at the uppermost
-socket level.
-.P
-When manipulating socket options, the level at which the
-option resides and the name of the option must be specified.
-To manipulate options at the sockets API level,
-.I level
-is specified as
-.BR SOL_SOCKET .
-To manipulate options at any
-other level the protocol number of the appropriate protocol
-controlling the option is supplied.
-For example,
-to indicate that an option is to be interpreted by the
-.B TCP
-protocol,
-.I level
-should be set to the protocol number of
-.BR TCP ;
-see
-.BR getprotoent (3).
-.P
-The arguments
-.I optval
-and
-.I optlen
-are used to access option values for
-.BR setsockopt ().
-For
-.BR getsockopt ()
-they identify a buffer in which the value for the
-requested option(s) are to be returned.
-For
-.BR getsockopt (),
-.I optlen
-is a value-result argument, initially containing the
-size of the buffer pointed to by
-.IR optval ,
-and modified on return to indicate the actual size of
-the value returned.
-If no option value is to be supplied or returned,
-.I optval
-may be NULL.
-.P
-.I Optname
-and any specified options are passed uninterpreted to the appropriate
-protocol module for interpretation.
-The include file
-.I <sys/socket.h>
-contains definitions for socket level options, described below.
-Options at
-other protocol levels vary in format and name; consult the appropriate
-entries in section 4 of the manual.
-.P
-Most socket-level options utilize an
-.I int
-argument for
-.IR optval .
-For
-.BR setsockopt (),
-the argument should be nonzero to enable a boolean option, or zero if the
-option is to be disabled.
-.P
-For a description of the available socket options see
-.BR socket (7)
-and the appropriate protocol man pages.
-.SH RETURN VALUE
-On success, zero is returned for the standard options.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.P
-Netfilter allows the programmer
-to define custom socket options with associated handlers; for such
-options, the return value on success is the value returned by the handler.
-.SH ERRORS
-.TP
-.B EBADF
-The argument
-.I sockfd
-is not a valid file descriptor.
-.TP
-.B EFAULT
-The address pointed to by
-.I optval
-is not in a valid part of the process address space.
-For
-.BR getsockopt (),
-this error may also be returned if
-.I optlen
-is not in a valid part of the process address space.
-.TP
-.B EINVAL
-.I optlen
-invalid in
-.BR setsockopt ().
-In some cases this error can also occur for an invalid value in
-.I optval
-(e.g., for the
-.B IP_ADD_MEMBERSHIP
-option described in
-.BR ip (7)).
-.TP
-.B ENOPROTOOPT
-The option is unknown at the level indicated.
-.TP
-.B ENOTSOCK
-The file descriptor
-.I sockfd
-does not refer to a socket.
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-POSIX.1-2001,
-SVr4, 4.4BSD (first appeared in 4.2BSD).
-.\" SVr4 documents additional ENOMEM and ENOSR error codes, but does
-.\" not document the
-.\" .BR SO_SNDLOWAT ", " SO_RCVLOWAT ", " SO_SNDTIMEO ", " SO_RCVTIMEO
-.\" options
-.SH BUGS
-Several of the socket options should be handled at lower levels of the
-system.
-.SH SEE ALSO
-.BR ioctl (2),
-.BR socket (2),
-.BR getprotoent (3),
-.BR protocols (5),
-.BR ip (7),
-.BR packet (7),
-.BR socket (7),
-.BR tcp (7),
-.BR udp (7),
-.BR unix (7)
diff --git a/man2/gettid.2 b/man2/gettid.2
deleted file mode 100644
index 2a8f932f6..000000000
--- a/man2/gettid.2
+++ /dev/null
@@ -1,74 +0,0 @@
-.\" Copyright 2003 Abhijit Menon-Sen <ams@wiw.org>
-.\" and Copyright (C) 2008 Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.TH gettid 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-gettid \- get thread identification
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #define _GNU_SOURCE
-.B #include <unistd.h>
-.P
-.B pid_t gettid(void);
-.fi
-.SH DESCRIPTION
-.BR gettid ()
-returns the caller's thread ID (TID).
-In a single-threaded process, the thread ID
-is equal to the process ID (PID, as returned by
-.BR getpid (2)).
-In a multithreaded process, all threads
-have the same PID, but each one has a unique TID.
-For further details, see the discussion of
-.B CLONE_THREAD
-in
-.BR clone (2).
-.SH RETURN VALUE
-On success, returns the thread ID of the calling thread.
-.SH ERRORS
-This call is always successful.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 2.4.11,
-glibc 2.30.
-.SH NOTES
-The thread ID returned by this call is not the same thing as a
-POSIX thread ID (i.e., the opaque value returned by
-.BR pthread_self (3)).
-.P
-In a new thread group created by a
-.BR clone (2)
-call that does not specify the
-.B CLONE_THREAD
-flag (or, equivalently, a new process created by
-.BR fork (2)),
-the new process is a thread group leader,
-and its thread group ID (the value returned by
-.BR getpid (2))
-is the same as its thread ID (the value returned by
-.BR gettid ()).
-.SH SEE ALSO
-.BR capget (2),
-.BR clone (2),
-.BR fcntl (2),
-.BR fork (2),
-.BR get_robust_list (2),
-.BR getpid (2),
-.\" .BR kcmp (2),
-.BR ioprio_set (2),
-.\" .BR move_pages (2),
-.\" .BR migrate_pages (2),
-.BR perf_event_open (2),
-.\" .BR process_vm_readv (2),
-.\" .BR ptrace (2),
-.BR sched_setaffinity (2),
-.BR sched_setparam (2),
-.BR sched_setscheduler (2),
-.BR tgkill (2),
-.BR timer_create (2)
diff --git a/man2/gettimeofday.2 b/man2/gettimeofday.2
deleted file mode 100644
index e0231e230..000000000
--- a/man2/gettimeofday.2
+++ /dev/null
@@ -1,296 +0,0 @@
-.\" Copyright (c) 1992 Drew Eckhardt (drew@cs.colorado.edu), March 28, 1992
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Modified by Michael Haardt (michael@moria.de)
-.\" Modified 1993-07-23 by Rik Faith (faith@cs.unc.edu)
-.\" Modified 1994-08-21 by Michael Chastain (mec@shell.portal.com):
-.\" Fixed necessary '#include' lines.
-.\" Modified 1995-04-15 by Michael Chastain (mec@shell.portal.com):
-.\" Added reference to adjtimex.
-.\" Removed some nonsense lines pointed out by Urs Thuermann,
-.\" (urs@isnogud.escape.de), aeb, 950722.
-.\" Modified 1997-01-14 by Austin Donnelly (and1000@debian.org):
-.\" Added return values section, and bit on EFAULT
-.\" Added clarification on timezone, aeb, 971210.
-.\" Removed "#include <unistd.h>", aeb, 010316.
-.\" Modified, 2004-05-27 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Added notes on capability requirement.
-.\"
-.TH gettimeofday 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-gettimeofday, settimeofday \- get / set time
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/time.h>
-.P
-.BI "int gettimeofday(struct timeval *restrict " tv ,
-.BI " struct timezone *_Nullable restrict " tz );
-.BI "int settimeofday(const struct timeval *" tv ,
-.BI " const struct timezone *_Nullable " tz );
-.fi
-.P
-.RS -4
-Feature Test Macro Requirements for glibc (see
-.BR feature_test_macros (7)):
-.RE
-.P
-.BR settimeofday ():
-.nf
- Since glibc 2.19:
- _DEFAULT_SOURCE
- glibc 2.19 and earlier:
- _BSD_SOURCE
-.fi
-.SH DESCRIPTION
-The functions
-.BR gettimeofday ()
-and
-.BR settimeofday ()
-can get and set the time as well as a timezone.
-.P
-The
-.I tv
-argument is a
-.I struct timeval
-(as specified in
-.IR <sys/time.h> ):
-.P
-.in +4n
-.EX
-struct timeval {
- time_t tv_sec; /* seconds */
- suseconds_t tv_usec; /* microseconds */
-};
-.EE
-.in
-.P
-and gives the number of seconds and microseconds since the Epoch (see
-.BR time (2)).
-.P
-The
-.I tz
-argument is a
-.IR "struct timezone" :
-.P
-.in +4n
-.EX
-struct timezone {
- int tz_minuteswest; /* minutes west of Greenwich */
- int tz_dsttime; /* type of DST correction */
-};
-.EE
-.in
-.P
-If either
-.I tv
-or
-.I tz
-is NULL, the corresponding structure is not set or returned.
-.\" FIXME . The compilation warning looks to be going away in glibc 2.17
-.\" see glibc commit 4b7634a5e03b0da6f8875de9d3f74c1cf6f2a6e8
-(However, compilation warnings will result if
-.I tv
-is NULL.)
-.\" The following is covered under EPERM below:
-.\" .P
-.\" Only the superuser may use
-.\" .BR settimeofday ().
-.P
-The use of the
-.I timezone
-structure is obsolete; the
-.I tz
-argument should normally be specified as NULL.
-(See NOTES below.)
-.P
-Under Linux, there are some peculiar "warp clock" semantics associated
-with the
-.BR settimeofday ()
-system call if on the very first call (after booting)
-that has a non-NULL
-.I tz
-argument, the
-.I tv
-argument is NULL and the
-.I tz_minuteswest
-field is nonzero.
-(The
-.I tz_dsttime
-field should be zero for this case.)
-In such a case it is assumed that the CMOS clock
-is on local time, and that it has to be incremented by this amount
-to get UTC system time.
-No doubt it is a bad idea to use this feature.
-.SH RETURN VALUE
-.BR gettimeofday ()
-and
-.BR settimeofday ()
-return 0 for success.
-On error, \-1 is returned and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EFAULT
-One of
-.I tv
-or
-.I tz
-pointed outside the accessible address space.
-.TP
-.B EINVAL
-.RB ( settimeofday ()):
-.I timezone
-is invalid.
-.TP
-.B EINVAL
-.RB ( settimeofday ()):
-.I tv.tv_sec
-is negative or
-.I tv.tv_usec
-is outside the range [0, 999,999].
-.TP
-.BR EINVAL " (since Linux 4.3)"
-.\" commit e1d7ba8735551ed79c7a0463a042353574b96da3
-.RB ( settimeofday ()):
-An attempt was made to set the time to a value less than
-the current value of the
-.B CLOCK_MONOTONIC
-clock (see
-.BR clock_gettime (2)).
-.TP
-.B EPERM
-The calling process has insufficient privilege to call
-.BR settimeofday ();
-under Linux the
-.B CAP_SYS_TIME
-capability is required.
-.SH VERSIONS
-.SS C library/kernel differences
-On some architectures, an implementation of
-.BR gettimeofday ()
-is provided in the
-.BR vdso (7).
-.P
-The kernel accepts NULL for both
-.I tv
-and
-.IR tz .
-The timezone argument is ignored by glibc and musl,
-and not passed to/from the kernel.
-Android's bionic passes the timezone argument to/from the kernel,
-but Android does not update the kernel timezone
-based on the device timezone in Settings,
-so the kernel's timezone is typically UTC.
-.SH STANDARDS
-.TP
-.BR gettimeofday ()
-POSIX.1-2008 (obsolete).
-.TP
-.BR settimeofday ()
-None.
-.SH HISTORY
-SVr4, 4.3BSD.
-POSIX.1-2001 describes
-.BR gettimeofday ()
-but not
-.BR settimeofday ().
-POSIX.1-2008 marks
-.BR gettimeofday ()
-as obsolete, recommending the use of
-.BR clock_gettime (2)
-instead.
-.P
-Traditionally, the fields of
-.I struct timeval
-were of type
-.IR long .
-.\"
-.SS The tz_dsttime field
-On a non-Linux kernel, with glibc, the
-.I tz_dsttime
-field of
-.I struct timezone
-will be set to a nonzero value by
-.BR gettimeofday ()
-if the current timezone has ever had or will have a daylight saving
-rule applied.
-In this sense it exactly mirrors the meaning of
-.BR daylight (3)
-for the current zone.
-On Linux, with glibc, the setting of the
-.I tz_dsttime
-field of
-.I struct timezone
-has never been used by
-.BR settimeofday ()
-or
-.BR gettimeofday ().
-.\" it has not
-.\" been and will not be supported by libc or glibc.
-.\" Each and every occurrence of this field in the kernel source
-.\" (other than the declaration) is a bug.
-Thus, the following is purely of historical interest.
-.P
-On old systems, the field
-.I tz_dsttime
-contains a symbolic constant (values are given below)
-that indicates in which part of the year Daylight Saving Time
-is in force.
-(Note: this value is constant throughout the year:
-it does not indicate that DST is in force, it just selects an
-algorithm.)
-The daylight saving time algorithms defined are as follows:
-.P
-.in +4n
-.EX
-\fBDST_NONE\fP /* not on DST */
-\fBDST_USA\fP /* USA style DST */
-\fBDST_AUST\fP /* Australian style DST */
-\fBDST_WET\fP /* Western European DST */
-\fBDST_MET\fP /* Middle European DST */
-\fBDST_EET\fP /* Eastern European DST */
-\fBDST_CAN\fP /* Canada */
-\fBDST_GB\fP /* Great Britain and Eire */
-\fBDST_RUM\fP /* Romania */
-\fBDST_TUR\fP /* Turkey */
-\fBDST_AUSTALT\fP /* Australian style with shift in 1986 */
-.EE
-.in
-.P
-Of course it turned out that the period in which
-Daylight Saving Time is in force cannot be given
-by a simple algorithm, one per country; indeed,
-this period is determined by unpredictable political
-decisions.
-So this method of representing timezones
-has been abandoned.
-.SH NOTES
-The time returned by
-.BR gettimeofday ()
-.I is
-affected by discontinuous jumps in the system time
-(e.g., if the system administrator manually changes the system time).
-If you need a monotonically increasing clock, see
-.BR clock_gettime (2).
-.P
-Macros for operating on
-.I timeval
-structures are described in
-.BR timeradd (3).
-.SH SEE ALSO
-.BR date (1),
-.BR adjtimex (2),
-.BR clock_gettime (2),
-.BR time (2),
-.BR ctime (3),
-.BR ftime (3),
-.BR timeradd (3),
-.BR capabilities (7),
-.BR time (7),
-.BR vdso (7),
-.BR hwclock (8)
diff --git a/man2/getuid.2 b/man2/getuid.2
deleted file mode 100644
index 700571729..000000000
--- a/man2/getuid.2
+++ /dev/null
@@ -1,80 +0,0 @@
-.\" Copyright 1993 Rickard E. Faith (faith@cs.unc.edu)
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Historical remark, aeb, 2004-06-05
-.TH getuid 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-getuid, geteuid \- get user identity
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <unistd.h>
-.P
-.B uid_t getuid(void);
-.B uid_t geteuid(void);
-.fi
-.SH DESCRIPTION
-.BR getuid ()
-returns the real user ID of the calling process.
-.P
-.BR geteuid ()
-returns the effective user ID of the calling process.
-.SH ERRORS
-These functions are always successful
-and never modify
-.\" https://www.austingroupbugs.net/view.php?id=511
-.\" 0000511: getuid and friends should not modify errno
-.IR errno .
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-POSIX.1-2001, 4.3BSD.
-.P
-In UNIX\ V6 the
-.BR getuid ()
-call returned
-.IR "(euid << 8) + uid" .
-UNIX\ V7 introduced separate calls
-.BR getuid ()
-and
-.BR geteuid ().
-.P
-The original Linux
-.BR getuid ()
-and
-.BR geteuid ()
-system calls supported only 16-bit user IDs.
-Subsequently, Linux 2.4 added
-.BR getuid32 ()
-and
-.BR geteuid32 (),
-supporting 32-bit IDs.
-The glibc
-.BR getuid ()
-and
-.BR geteuid ()
-wrapper functions transparently deal with the variations across kernel versions.
-.P
-On Alpha, instead of a pair of
-.BR getuid ()
-and
-.BR geteuid ()
-system calls, a single
-.BR getxuid ()
-system call is provided, which returns a pair of real and effective UIDs.
-The glibc
-.BR getuid ()
-and
-.BR geteuid ()
-wrapper functions transparently deal with this.
-See
-.BR syscall (2)
-for details regarding register mapping.
-.SH SEE ALSO
-.BR getresuid (2),
-.BR setreuid (2),
-.BR setuid (2),
-.BR credentials (7)
diff --git a/man2/getuid32.2 b/man2/getuid32.2
deleted file mode 100644
index 165cfe1d0..000000000
--- a/man2/getuid32.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/getuid.2
diff --git a/man2/getunwind.2 b/man2/getunwind.2
deleted file mode 100644
index 4abecc108..000000000
--- a/man2/getunwind.2
+++ /dev/null
@@ -1,87 +0,0 @@
-.\" Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
-.\" Written by Marcela Maslanova <mmaslano@redhat.com>
-.\" and Copyright 2013, Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.TH getunwind 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-getunwind \- copy the unwind data to caller's buffer
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <linux/unwind.h>
-.BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */"
-.B #include <unistd.h>
-.P
-.BI "[[deprecated]] long syscall(SYS_getunwind, void " buf [. buf_size ],
-.BI " size_t " buf_size );
-.fi
-.SH DESCRIPTION
-.I Note: this system call is obsolete.
-.P
-The
-IA-64-specific
-.BR getunwind ()
-system call copies the kernel's call frame
-unwind data into the buffer pointed to by
-.I buf
-and returns the size of the unwind data;
-this data describes the gate page (kernel code that
-is mapped into user space).
-.P
-The size of the buffer
-.I buf
-is specified in
-.IR buf_size .
-The data is copied only if
-.I buf_size
-is greater than or equal to the size of the unwind data and
-.I buf
-is not NULL;
-otherwise, no data is copied, and the call succeeds,
-returning the size that would be needed to store the unwind data.
-.P
-The first part of the unwind data contains an unwind table.
-The rest contains the associated unwind information, in no particular order.
-The unwind table contains entries of the following form:
-.P
-.in +4n
-.EX
-u64 start; (64\-bit address of start of function)
-u64 end; (64\-bit address of end of function)
-u64 info; (BUF\-relative offset to unwind info)
-.EE
-.in
-.P
-An entry whose
-.I start
-value is zero indicates the end of the table.
-For more information about the format, see the
-.I IA-64 Software Conventions and Runtime Architecture
-manual.
-.SH RETURN VALUE
-On success,
-.BR getunwind ()
-returns the size of the unwind data.
-On error, \-1 is returned and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.BR getunwind ()
-fails with the error
-.B EFAULT
-if the unwind info can't be stored in the space specified by
-.IR buf .
-.SH STANDARDS
-Linux on IA-64.
-.SH HISTORY
-Linux 2.4.
-.P
-This system call has been deprecated.
-The modern way to obtain the kernel's unwind data is via the
-.BR vdso (7).
-.SH SEE ALSO
-.BR getauxval (3)
diff --git a/man2/getxattr.2 b/man2/getxattr.2
deleted file mode 100644
index f0b872f8c..000000000
--- a/man2/getxattr.2
+++ /dev/null
@@ -1,143 +0,0 @@
-.\" Copyright (C) Andreas Gruenbacher, February 2001
-.\" Copyright (C) Silicon Graphics Inc, September 2001
-.\"
-.\" SPDX-License-Identifier: GPL-2.0-or-later
-.\"
-.TH getxattr 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-getxattr, lgetxattr, fgetxattr \- retrieve an extended attribute value
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/xattr.h>
-.P
-.BI "ssize_t getxattr(const char *" path ", const char *" name ,
-.BI " void " value [. size "], size_t " size );
-.BI "ssize_t lgetxattr(const char *" path ", const char *" name ,
-.BI " void " value [. size "], size_t " size );
-.BI "ssize_t fgetxattr(int " fd ", const char *" name ,
-.BI " void " value [. size "], size_t " size );
-.fi
-.SH DESCRIPTION
-Extended attributes are
-.IR name : value
-pairs associated with inodes (files, directories, symbolic links, etc.).
-They are extensions to the normal attributes which are associated
-with all inodes in the system (i.e., the
-.BR stat (2)
-data).
-A complete overview of extended attributes concepts can be found in
-.BR xattr (7).
-.P
-.BR getxattr ()
-retrieves the value of the extended attribute identified by
-.I name
-and associated with the given
-.I path
-in the filesystem.
-The attribute value is placed in the buffer pointed to by
-.IR value ;
-.I size
-specifies the size of that buffer.
-The return value of the call is the number of bytes placed in
-.IR value .
-.P
-.BR lgetxattr ()
-is identical to
-.BR getxattr (),
-except in the case of a symbolic link, where the link itself is
-interrogated, not the file that it refers to.
-.P
-.BR fgetxattr ()
-is identical to
-.BR getxattr (),
-only the open file referred to by
-.I fd
-(as returned by
-.BR open (2))
-is interrogated in place of
-.IR path .
-.P
-An extended attribute
-.I name
-is a null-terminated string.
-The name includes a namespace prefix; there may be several, disjoint
-namespaces associated with an individual inode.
-The value of an extended attribute is a chunk of arbitrary textual or
-binary data that was assigned using
-.BR setxattr (2).
-.P
-If
-.I size
-is specified as zero, these calls return the current size of the
-named extended attribute (and leave
-.I value
-unchanged).
-This can be used to determine the size of the buffer that
-should be supplied in a subsequent call.
-(But, bear in mind that there is a possibility that the
-attribute value may change between the two calls,
-so that it is still necessary to check the return status
-from the second call.)
-.SH RETURN VALUE
-On success, these calls return a nonnegative value which is
-the size (in bytes) of the extended attribute value.
-On failure, \-1 is returned and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B E2BIG
-The size of the attribute value is larger than the maximum size allowed; the
-attribute cannot be retrieved.
-This can happen on filesystems that support
-very large attribute values such as NFSv4, for example.
-.TP
-.B ENODATA
-The named attribute does not exist, or the process has no access to
-this attribute.
-.\" .RB ( ENOATTR
-.\" is defined to be a synonym for
-.\" .BR ENODATA
-.\" in
-.\" .IR <attr/attributes.h> .)
-.TP
-.B ENOTSUP
-Extended attributes are not supported by the filesystem, or are disabled.
-.TP
-.B ERANGE
-The
-.I size
-of the
-.I value
-buffer is too small to hold the result.
-.P
-In addition, the errors documented in
-.BR stat (2)
-can also occur.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 2.4,
-glibc 2.3.
-.\" .SH AUTHORS
-.\" Andreas Gruenbacher,
-.\" .RI < a.gruenbacher@computer.org >
-.\" and the SGI XFS development team,
-.\" .RI < linux-xfs@oss.sgi.com >.
-.\" Please send any bug reports or comments to these addresses.
-.SH EXAMPLES
-See
-.BR listxattr (2).
-.SH SEE ALSO
-.BR getfattr (1),
-.BR setfattr (1),
-.BR listxattr (2),
-.BR open (2),
-.BR removexattr (2),
-.BR setxattr (2),
-.BR stat (2),
-.BR symlink (7),
-.BR xattr (7)
diff --git a/man2/gtty.2 b/man2/gtty.2
deleted file mode 100644
index 5d25ea6ce..000000000
--- a/man2/gtty.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/unimplemented.2
diff --git a/man2/idle.2 b/man2/idle.2
deleted file mode 100644
index a63c17ce1..000000000
--- a/man2/idle.2
+++ /dev/null
@@ -1,44 +0,0 @@
-.\" Copyright 1993 Rickard E. Faith (faith@cs.unc.edu)
-.\" Portions extracted from linux/mm/swap.c:
-.\" Copyright (C) 1991, 1992 Linus Torvalds
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Modified 21 Aug 1994 by Michael Chastain <mec@shell.portal.com>:
-.\" Added text about calling restriction (new in Linux 1.1.20 I believe).
-.\" N.B. calling "idle" from user process used to hang process!
-.\" Modified Thu Oct 31 14:41:15 1996 by Eric S. Raymond <esr@thyrsus.com>
-.\" "
-.TH idle 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-idle \- make process 0 idle
-.SH SYNOPSIS
-.nf
-.B #include <unistd.h>
-.P
-.B [[deprecated]] int idle(void);
-.fi
-.SH DESCRIPTION
-.BR idle ()
-is an internal system call used during bootstrap.
-It marks the process's pages as swappable, lowers its priority,
-and enters the main scheduling loop.
-.BR idle ()
-never returns.
-.P
-Only process 0 may call
-.BR idle ().
-Any user process, even a process with superuser permission,
-will receive
-.BR EPERM .
-.SH RETURN VALUE
-.BR idle ()
-never returns for process 0, and always returns \-1 for a user process.
-.SH ERRORS
-.TP
-.B EPERM
-Always, for a user process.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Removed in Linux 2.3.13.
diff --git a/man2/inb.2 b/man2/inb.2
deleted file mode 100644
index 2c63c7549..000000000
--- a/man2/inb.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/outb.2
diff --git a/man2/inb_p.2 b/man2/inb_p.2
deleted file mode 100644
index 2c63c7549..000000000
--- a/man2/inb_p.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/outb.2
diff --git a/man2/init_module.2 b/man2/init_module.2
deleted file mode 100644
index 5055f2546..000000000
--- a/man2/init_module.2
+++ /dev/null
@@ -1,393 +0,0 @@
-.\" Copyright (C) 2012 Michael Kerrisk <mtk.manpages@gmail.com>
-.\" A few fragments remain from a version
-.\" Copyright (C) 1996 Free Software Foundation, Inc.
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.TH init_module 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-init_module, finit_module \- load a kernel module
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#include <linux/module.h>" " /* Definition of " MODULE_* " constants */"
-.BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */"
-.B #include <unistd.h>
-.P
-.BI "int syscall(SYS_init_module, void " module_image [. len "], \
-unsigned long " len ,
-.BI " const char *" param_values );
-.BI "int syscall(SYS_finit_module, int " fd ,
-.BI " const char *" param_values ", int " flags );
-.fi
-.P
-.IR Note :
-glibc provides no wrappers for these system calls,
-necessitating the use of
-.BR syscall (2).
-.SH DESCRIPTION
-.BR init_module ()
-loads an ELF image into kernel space,
-performs any necessary symbol relocations,
-initializes module parameters to values provided by the caller,
-and then runs the module's
-.I init
-function.
-This system call requires privilege.
-.P
-The
-.I module_image
-argument points to a buffer containing the binary image
-to be loaded;
-.I len
-specifies the size of that buffer.
-The module image should be a valid ELF image, built for the running kernel.
-.P
-The
-.I param_values
-argument is a string containing space-delimited specifications of the
-values for module parameters (defined inside the module using
-.BR module_param ()
-and
-.BR module_param_array ()).
-The kernel parses this string and initializes the specified
-parameters.
-Each of the parameter specifications has the form:
-.P
-.RI " " name [\c
-.BI = value\c
-.RB [ ,\c
-.IR value ...]]
-.P
-The parameter
-.I name
-is one of those defined within the module using
-.IR module_param ()
-(see the Linux kernel source file
-.IR include/linux/moduleparam.h ).
-The parameter
-.I value
-is optional in the case of
-.I bool
-and
-.I invbool
-parameters.
-Values for array parameters are specified as a comma-separated list.
-.SS finit_module()
-The
-.BR finit_module ()
-.\" commit 34e1169d996ab148490c01b65b4ee371cf8ffba2
-.\" https://lwn.net/Articles/519010/
-system call is like
-.BR init_module (),
-but reads the module to be loaded from the file descriptor
-.IR fd .
-It is useful when the authenticity of a kernel module
-can be determined from its location in the filesystem;
-in cases where that is possible,
-the overhead of using cryptographically signed modules to
-determine the authenticity of a module can be avoided.
-The
-.I param_values
-argument is as for
-.BR init_module ().
-.P
-The
-.I flags
-argument modifies the operation of
-.BR finit_module ().
-It is a bit mask value created by ORing
-together zero or more of the following flags:
-.\" commit 2f3238aebedb243804f58d62d57244edec4149b2
-.TP
-.B MODULE_INIT_IGNORE_MODVERSIONS
-Ignore symbol version hashes.
-.TP
-.B MODULE_INIT_IGNORE_VERMAGIC
-Ignore kernel version magic.
-.TP
-.BR MODULE_INIT_COMPRESSED_FILE " (since Linux 5.17)"
-.\" commit b1ae6dc41eaaa98bb75671e0f3665bfda248c3e7
-Use in-kernel module decompression.
-.P
-There are some safety checks built into a module to ensure that
-it matches the kernel against which it is loaded.
-.\" http://www.tldp.org/HOWTO/Module-HOWTO/basekerncompat.html
-.\" is dated, but informative
-These checks are recorded when the module is built and
-verified when the module is loaded.
-First, the module records a "vermagic" string containing
-the kernel version number and prominent features (such as the CPU type).
-Second, if the module was built with the
-.B CONFIG_MODVERSIONS
-configuration option enabled,
-a version hash is recorded for each symbol the module uses.
-This hash is based on the types of the arguments and return value
-for the function named by the symbol.
-In this case, the kernel version number within the
-"vermagic" string is ignored,
-as the symbol version hashes are assumed to be sufficiently reliable.
-.P
-Using the
-.B MODULE_INIT_IGNORE_VERMAGIC
-flag indicates that the "vermagic" string is to be ignored, and the
-.B MODULE_INIT_IGNORE_MODVERSIONS
-flag indicates that the symbol version hashes are to be ignored.
-If the kernel is built to permit forced loading (i.e., configured with
-.BR CONFIG_MODULE_FORCE_LOAD ),
-then loading continues, otherwise it fails with the error
-.B ENOEXEC
-as expected for malformed modules.
-.P
-If the kernel was build with
-.BR CONFIG_MODULE_DECOMPRESS ,
-the in-kernel decompression feature can be used.
-User-space code can check if the kernel supports decompression
-by reading the
-.I /sys/module/compression
-attribute.
-If the kernel supports decompression,
-the compressed file can directly be passed to
-.BR finit_module ()
-using the
-.B MODULE_INIT_COMPRESSED_FILE
-flag.
-The in-kernel module decompressor supports the following compression algorithms:
-.P
-.RS 4
-.PD 0
-.IP \[bu] 3
-.I gzip
-(since Linux 5.17)
-.IP \[bu]
-.I xz
-(since Linux 5.17)
-.IP \[bu]
-.I zstd
-.\" commit 169a58ad824d896b9e291a27193342616e651b82
-(since Linux 6.2)
-.PD
-.RE
-.P
-The kernel only implements a single decompression method.
-This is selected during module generation accordingly to the compression method
-chosen in the kernel configuration.
-.SH RETURN VALUE
-On success, these system calls return 0.
-On error, \-1 is returned and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.BR EBADMSG " (since Linux 3.7)"
-Module signature is misformatted.
-.TP
-.B EBUSY
-Timeout while trying to resolve a symbol reference by this module.
-.TP
-.B EFAULT
-An address argument referred to a location that
-is outside the process's accessible address space.
-.TP
-.BR ENOKEY " (since Linux 3.7)"
-.\" commit 48ba2462ace6072741fd8d0058207d630ce93bf1
-.\" commit 1d0059f3a468825b5fc5405c636a2f6e02707ffa
-.\" commit 106a4ee258d14818467829bf0e12aeae14c16cd7
-Module signature is invalid or
-the kernel does not have a key for this module.
-This error is returned only if the kernel was configured with
-.BR CONFIG_MODULE_SIG_FORCE ;
-if the kernel was not configured with this option,
-then an invalid or unsigned module simply taints the kernel.
-.TP
-.B ENOMEM
-Out of memory.
-.TP
-.B EPERM
-The caller was not privileged
-(did not have the
-.B CAP_SYS_MODULE
-capability),
-or module loading is disabled
-(see
-.I /proc/sys/kernel/modules_disabled
-in
-.BR proc (5)).
-.P
-The following errors may additionally occur for
-.BR init_module ():
-.TP
-.B EEXIST
-A module with this name is already loaded.
-.TP
-.B EINVAL
-.I param_values
-is invalid, or some part of the ELF image in
-.I module_image
-contains inconsistencies.
-.\" .TP
-.\" .BR EINVAL " (Linux 2.4 and earlier)"
-.\" Some
-.\" .I image
-.\" slot is filled in incorrectly,
-.\" .I image\->name
-.\" does not correspond to the original module name, some
-.\" .I image\->deps
-.\" entry does not correspond to a loaded module,
-.\" or some other similar inconsistency.
-.TP
-.B ENOEXEC
-The binary image supplied in
-.I module_image
-is not an ELF image,
-or is an ELF image that is invalid or for a different architecture.
-.P
-The following errors may additionally occur for
-.BR finit_module ():
-.TP
-.B EBADF
-The file referred to by
-.I fd
-is not opened for reading.
-.TP
-.B EFBIG
-The file referred to by
-.I fd
-is too large.
-.TP
-.B EINVAL
-.I flags
-is invalid.
-.TP
-.B EINVAL
-The decompressor sanity checks failed,
-while loading a compressed module with flag
-.B MODULE_INIT_COMPRESSED_FILE
-set.
-.TP
-.B ENOEXEC
-.I fd
-does not refer to an open file.
-.TP
-.BR EOPNOTSUPP " (since Linux 5.17)"
-The flag
-.B MODULE_INIT_COMPRESSED_FILE
-is set to load a compressed module,
-and the kernel was built without
-.BR CONFIG_MODULE_DECOMPRESS .
-.TP
-.BR ETXTBSY " (since Linux 4.7)"
-.\" commit 39d637af5aa7577f655c58b9e55587566c63a0af
-The file referred to by
-.I fd
-is opened for read-write.
-.P
-In addition to the above errors, if the module's
-.I init
-function is executed and returns an error, then
-.BR init_module ()
-or
-.BR finit_module ()
-fails and
-.I errno
-is set to the value returned by the
-.I init
-function.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-.TP
-.BR finit_module ()
-Linux 3.8.
-.P
-The
-.BR init_module ()
-system call is not supported by glibc.
-No declaration is provided in glibc headers, but, through a quirk of history,
-glibc versions before glibc 2.23 did export an ABI for this system call.
-Therefore, in order to employ this system call,
-it is (before glibc 2.23) sufficient to
-manually declare the interface in your code;
-alternatively, you can invoke the system call using
-.BR syscall (2).
-.SS Linux 2.4 and earlier
-In Linux 2.4 and earlier, the
-.BR init_module ()
-system call was rather different:
-.P
-.B " #include <linux/module.h>"
-.P
-.BI " int init_module(const char *" name ", struct module *" image );
-.P
-(User-space applications can detect which version of
-.BR init_module ()
-is available by calling
-.BR query_module ();
-the latter call fails with the error
-.B ENOSYS
-on Linux 2.6 and later.)
-.P
-The older version of the system call
-loads the relocated module image pointed to by
-.I image
-into kernel space and runs the module's
-.I init
-function.
-The caller is responsible for providing the relocated image (since
-Linux 2.6, the
-.BR init_module ()
-system call does the relocation).
-.P
-The module image begins with a module structure and is followed by
-code and data as appropriate.
-Since Linux 2.2, the module structure is defined as follows:
-.P
-.in +4n
-.EX
-struct module {
- unsigned long size_of_struct;
- struct module *next;
- const char *name;
- unsigned long size;
- long usecount;
- unsigned long flags;
- unsigned int nsyms;
- unsigned int ndeps;
- struct module_symbol *syms;
- struct module_ref *deps;
- struct module_ref *refs;
- int (*init)(void);
- void (*cleanup)(void);
- const struct exception_table_entry *ex_table_start;
- const struct exception_table_entry *ex_table_end;
-#ifdef __alpha__
- unsigned long gp;
-#endif
-};
-.EE
-.in
-.P
-All of the pointer fields, with the exception of
-.I next
-and
-.IR refs ,
-are expected to point within the module body and be
-initialized as appropriate for kernel space, that is, relocated with
-the rest of the module.
-.SH NOTES
-Information about currently loaded modules can be found in
-.I /proc/modules
-and in the file trees under the per-module subdirectories under
-.IR /sys/module .
-.P
-See the Linux kernel source file
-.I include/linux/module.h
-for some useful background information.
-.SH SEE ALSO
-.BR create_module (2),
-.BR delete_module (2),
-.BR query_module (2),
-.BR lsmod (8),
-.BR modprobe (8)
diff --git a/man2/inl.2 b/man2/inl.2
deleted file mode 100644
index 2c63c7549..000000000
--- a/man2/inl.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/outb.2
diff --git a/man2/inl_p.2 b/man2/inl_p.2
deleted file mode 100644
index 2c63c7549..000000000
--- a/man2/inl_p.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/outb.2
diff --git a/man2/inotify_add_watch.2 b/man2/inotify_add_watch.2
deleted file mode 100644
index 61d3987ec..000000000
--- a/man2/inotify_add_watch.2
+++ /dev/null
@@ -1,135 +0,0 @@
-.\" Copyright (C) 2005 Robert Love
-.\" and Copyright, 2006 Michael Kerrisk
-.\"
-.\" SPDX-License-Identifier: GPL-2.0-or-later
-.\"
-.\" 2005-07-19 Robert Love <rlove@rlove.org> - initial version
-.\" 2006-02-07 mtk, various changes
-.\"
-.TH inotify_add_watch 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-inotify_add_watch \- add a watch to an initialized inotify instance
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/inotify.h>
-.P
-.BI "int inotify_add_watch(int " fd ", const char *" pathname ", uint32_t " mask );
-.fi
-.SH DESCRIPTION
-.BR inotify_add_watch ()
-adds a new watch, or modifies an existing watch,
-for the file whose location is specified in
-.IR pathname ;
-the caller must have read permission for this file.
-The
-.I fd
-argument is a file descriptor referring to the
-inotify instance whose watch list is to be modified.
-The events to be monitored for
-.I pathname
-are specified in the
-.I mask
-bit-mask argument.
-See
-.BR inotify (7)
-for a description of the bits that can be set in
-.IR mask .
-.P
-A successful call to
-.BR inotify_add_watch ()
-returns a unique watch descriptor for this inotify instance,
-for the filesystem object (inode) that corresponds to
-.IR pathname .
-If the filesystem object
-was not previously being watched by this inotify instance,
-then the watch descriptor is newly allocated.
-If the filesystem object was already being watched
-(perhaps via a different link to the same object), then the descriptor
-for the existing watch is returned.
-.P
-The watch descriptor is returned by later
-.BR read (2)s
-from the inotify file descriptor.
-These reads fetch
-.I inotify_event
-structures (see
-.BR inotify (7))
-indicating filesystem events;
-the watch descriptor inside this structure identifies
-the object for which the event occurred.
-.SH RETURN VALUE
-On success,
-.BR inotify_add_watch ()
-returns a watch descriptor (a nonnegative integer).
-On error, \-1 is returned and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EACCES
-Read access to the given file is not permitted.
-.TP
-.B EBADF
-The given file descriptor is not valid.
-.TP
-.B EEXIST
-.I mask
-contains
-.B IN_MASK_CREATE
-and
-.I pathname
-refers to a file already being watched by the same
-.IR fd .
-.TP
-.B EFAULT
-.I pathname
-points outside of the process's accessible address space.
-.TP
-.B EINVAL
-The given event mask contains no valid events; or
-.I mask
-contains both
-.B IN_MASK_ADD
-and
-.BR IN_MASK_CREATE ;
-or
-.I fd
-is not an inotify file descriptor.
-.TP
-.B ENAMETOOLONG
-.I pathname
-is too long.
-.TP
-.B ENOENT
-A directory component in
-.I pathname
-does not exist or is a dangling symbolic link.
-.TP
-.B ENOMEM
-Insufficient kernel memory was available.
-.TP
-.B ENOSPC
-The user limit on the total number of inotify watches was reached or the
-kernel failed to allocate a needed resource.
-.TP
-.B ENOTDIR
-.I mask
-contains
-.B IN_ONLYDIR
-and
-.I pathname
-is not a directory.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 2.6.13.
-.SH EXAMPLES
-See
-.BR inotify (7).
-.SH SEE ALSO
-.BR inotify_init (2),
-.BR inotify_rm_watch (2),
-.BR inotify (7)
diff --git a/man2/inotify_init.2 b/man2/inotify_init.2
deleted file mode 100644
index c5072ef70..000000000
--- a/man2/inotify_init.2
+++ /dev/null
@@ -1,97 +0,0 @@
-.\" Copyright (C) 2005 Robert Love
-.\" and Copyright (C) 2008, Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: GPL-2.0-or-later
-.\"
-.\" 2005-07-19 Robert Love <rlove@rlove.org> - initial version
-.\" 2006-02-07 mtk, minor changes
-.\" 2008-10-10 mtk: add description of inotify_init1()
-.\"
-.TH inotify_init 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-inotify_init, inotify_init1 \- initialize an inotify instance
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/inotify.h>
-.P
-.B "int inotify_init(void);"
-.BI "int inotify_init1(int " flags );
-.fi
-.SH DESCRIPTION
-For an overview of the inotify API, see
-.BR inotify (7).
-.P
-.BR inotify_init ()
-initializes a new inotify instance and returns a file descriptor associated
-with a new inotify event queue.
-.P
-If
-.I flags
-is 0, then
-.BR inotify_init1 ()
-is the same as
-.BR inotify_init ().
-The following values can be bitwise ORed in
-.I flags
-to obtain different behavior:
-.TP
-.B IN_NONBLOCK
-Set the
-.B O_NONBLOCK
-file status flag on the open file description (see
-.BR open (2))
-referred to by the new file descriptor.
-Using this flag saves extra calls to
-.BR fcntl (2)
-to achieve the same result.
-.TP
-.B IN_CLOEXEC
-Set the close-on-exec
-.RB ( FD_CLOEXEC )
-flag on the new file descriptor.
-See the description of the
-.B O_CLOEXEC
-flag in
-.BR open (2)
-for reasons why this may be useful.
-.SH RETURN VALUE
-On success, these system calls return a new file descriptor.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EINVAL
-.RB ( inotify_init1 ())
-An invalid value was specified in
-.IR flags .
-.TP
-.B EMFILE
-The user limit on the total number of inotify instances has been reached.
-.TP
-.B EMFILE
-The per-process limit on the number of open file descriptors has been reached.
-.TP
-.B ENFILE
-The system-wide limit on the total number of open files has been reached.
-.TP
-.B ENOMEM
-Insufficient kernel memory is available.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-.TP
-.BR inotify_init ()
-Linux 2.6.13,
-glibc 2.4.
-.TP
-.BR inotify_init1 ()
-Linux 2.6.27,
-glibc 2.9.
-.SH SEE ALSO
-.BR inotify_add_watch (2),
-.BR inotify_rm_watch (2),
-.BR inotify (7)
diff --git a/man2/inotify_init1.2 b/man2/inotify_init1.2
deleted file mode 100644
index 62c5b44e6..000000000
--- a/man2/inotify_init1.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/inotify_init.2
diff --git a/man2/inotify_rm_watch.2 b/man2/inotify_rm_watch.2
deleted file mode 100644
index 2fde52c61..000000000
--- a/man2/inotify_rm_watch.2
+++ /dev/null
@@ -1,60 +0,0 @@
-.\" Copyright (C) 2005 Robert Love
-.\"
-.\" SPDX-License-Identifier: GPL-2.0-or-later
-.\"
-.\" 2005-07-19 Robert Love <rlove@rlove.org> - initial version
-.\" 2006-02-07 mtk, minor changes
-.\"
-.TH inotify_rm_watch 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-inotify_rm_watch \- remove an existing watch from an inotify instance
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/inotify.h>
-.P
-.BI "int inotify_rm_watch(int " fd ", int " wd );
-.\" Before glibc 2.10, the second argument was types as uint32_t.
-.\" https://www.sourceware.org/bugzilla/show_bug.cgi?id=7040
-.fi
-.SH DESCRIPTION
-.BR inotify_rm_watch ()
-removes the watch associated with the watch descriptor
-.I wd
-from the inotify instance associated with the file descriptor
-.IR fd .
-.P
-Removing a watch causes an
-.B IN_IGNORED
-event to be generated for this watch descriptor.
-(See
-.BR inotify (7).)
-.SH RETURN VALUE
-On success,
-.BR inotify_rm_watch ()
-returns zero.
-On error, \-1 is returned and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EBADF
-.I fd
-is not a valid file descriptor.
-.TP
-.B EINVAL
-The watch descriptor
-.I wd
-is not valid; or
-.I fd
-is not an inotify file descriptor.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 2.6.13.
-.SH SEE ALSO
-.BR inotify_add_watch (2),
-.BR inotify_init (2),
-.BR inotify (7)
diff --git a/man2/insb.2 b/man2/insb.2
deleted file mode 100644
index 2c63c7549..000000000
--- a/man2/insb.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/outb.2
diff --git a/man2/insl.2 b/man2/insl.2
deleted file mode 100644
index 2c63c7549..000000000
--- a/man2/insl.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/outb.2
diff --git a/man2/insw.2 b/man2/insw.2
deleted file mode 100644
index 2c63c7549..000000000
--- a/man2/insw.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/outb.2
diff --git a/man2/intro.2 b/man2/intro.2
deleted file mode 100644
index 3986bb0a1..000000000
--- a/man2/intro.2
+++ /dev/null
@@ -1,115 +0,0 @@
-.\" Copyright (C) 2007 Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" 2007-10-23 mtk: moved the _syscallN specific material to the
-.\" new _syscall(2) page, and substantially enhanced and rewrote
-.\" the remaining material on this page.
-.\"
-.TH intro 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-intro \- introduction to system calls
-.SH DESCRIPTION
-Section 2 of the manual describes the Linux system calls.
-A system call is an entry point into the Linux kernel.
-Usually, system calls are not invoked directly:
-instead, most system calls have corresponding C library
-wrapper functions which perform the steps required
-(e.g., trapping to kernel mode) in order to invoke
-the system call.
-Thus, making a system call looks the same as invoking a normal
-library function.
-.P
-In many cases, the C library wrapper function does nothing more than:
-.IP \[bu] 3
-copying arguments and the unique system call number to the
-registers where the kernel expects them;
-.IP \[bu]
-trapping to kernel mode,
-at which point the kernel does the real work of the system call;
-.IP \[bu]
-setting
-.I errno
-if the system call returns an error number when the kernel returns the
-CPU to user mode.
-.P
-However, in a few cases, a wrapper function may do rather more than this,
-for example, performing some preprocessing
-of the arguments before trapping to kernel mode,
-or postprocessing of values returned by the system call.
-Where this is the case, the manual pages in Section 2 generally
-try to note the details of both the (usually GNU) C library API
-interface and the raw system call.
-Most commonly, the main DESCRIPTION will focus on the C library interface,
-and differences for the system call are covered in the NOTES section.
-.P
-For a list of the Linux system calls, see
-.BR syscalls (2).
-.SH RETURN VALUE
-On error, most system calls return a negative error number
-(i.e., the negated value of one of the constants described in
-.BR errno (3)).
-The C library wrapper hides this detail from the caller: when a
-system call returns a negative value, the wrapper copies the
-absolute value into the
-.I errno
-variable, and returns \-1 as the return value of the wrapper.
-.P
-The value returned by a successful system call depends on the call.
-Many system calls return 0 on success, but some can return nonzero
-values from a successful call.
-The details are described in the individual manual pages.
-.P
-In some cases,
-the programmer must define a feature test macro in order to obtain
-the declaration of a system call from the header file specified
-in the man page SYNOPSIS section.
-(Where required, these feature test macros must be defined before including
-.I any
-header files.)
-In such cases, the required macro is described in the man page.
-For further information on feature test macros, see
-.BR feature_test_macros (7).
-.SH STANDARDS
-Certain terms and abbreviations are used to indicate UNIX variants
-and standards to which calls in this section conform.
-See
-.BR standards (7).
-.SH NOTES
-.SS Calling directly
-In most cases, it is unnecessary to invoke a system call directly,
-but there are times when the Standard C library does not implement
-a nice wrapper function for you.
-In this case, the programmer must manually invoke the system call using
-.BR syscall (2).
-Historically, this was also possible using one of the _syscall macros
-described in
-.BR _syscall (2).
-.SS Authors and copyright conditions
-Look at the header of the manual page source for the author(s) and copyright
-conditions.
-Note that these can be different from page to page!
-.SH SEE ALSO
-.ad l
-.nh
-.BR _syscall (2),
-.BR syscall (2),
-.BR syscalls (2),
-.BR errno (3),
-.BR intro (3),
-.BR capabilities (7),
-.BR credentials (7),
-.BR feature_test_macros (7),
-.BR mq_overview (7),
-.BR path_resolution (7),
-.BR pipe (7),
-.BR pty (7),
-.BR sem_overview (7),
-.BR shm_overview (7),
-.BR signal (7),
-.BR socket (7),
-.BR standards (7),
-.BR symlink (7),
-.BR system_data_types (7),
-.BR sysvipc (7),
-.BR time (7)
diff --git a/man2/inw.2 b/man2/inw.2
deleted file mode 100644
index 2c63c7549..000000000
--- a/man2/inw.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/outb.2
diff --git a/man2/inw_p.2 b/man2/inw_p.2
deleted file mode 100644
index 2c63c7549..000000000
--- a/man2/inw_p.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/outb.2
diff --git a/man2/io_cancel.2 b/man2/io_cancel.2
deleted file mode 100644
index 3820a916b..000000000
--- a/man2/io_cancel.2
+++ /dev/null
@@ -1,106 +0,0 @@
-.\" Copyright (C) 2003 Free Software Foundation, Inc.
-.\"
-.\" SPDX-License-Identifier: GPL-1.0-or-later
-.\"
-.TH io_cancel 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-io_cancel \- cancel an outstanding asynchronous I/O operation
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.P
-Alternatively, Asynchronous I/O library
-.RI ( libaio ", " \-laio );
-see VERSIONS.
-.SH SYNOPSIS
-.nf
-.BR "#include <linux/aio_abi.h>" " /* Definition of needed types */"
-.BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */"
-.B #include <unistd.h>
-.P
-.BI "int syscall(SYS_io_cancel, aio_context_t " ctx_id ", struct iocb *" iocb ,
-.BI " struct io_event *" result );
-.fi
-.SH DESCRIPTION
-.IR Note :
-this page describes the raw Linux system call interface.
-The wrapper function provided by
-.I libaio
-uses a different type for the
-.I ctx_id
-argument.
-See VERSIONS.
-.P
-The
-.BR io_cancel ()
-system call
-attempts to cancel an asynchronous I/O operation previously submitted with
-.BR io_submit (2).
-The
-.I iocb
-argument describes the operation to be canceled and the
-.I ctx_id
-argument is the AIO context to which the operation was submitted.
-If the operation is successfully canceled, the event will be copied into
-the memory pointed to by
-.I result
-without being placed into the
-completion queue.
-.SH RETURN VALUE
-On success,
-.BR io_cancel ()
-returns 0.
-For the failure return, see VERSIONS.
-.SH ERRORS
-.TP
-.B EAGAIN
-The \fIiocb\fP specified was not canceled.
-.TP
-.B EFAULT
-One of the data structures points to invalid data.
-.TP
-.B EINVAL
-The AIO context specified by \fIctx_id\fP is invalid.
-.TP
-.B ENOSYS
-.BR io_cancel ()
-is not implemented on this architecture.
-.SH VERSIONS
-You probably want to use the
-.BR io_cancel ()
-wrapper function provided by
-.\" http://git.fedorahosted.org/git/?p=libaio.git
-.IR libaio .
-.P
-Note that the
-.I libaio
-wrapper function uses a different type
-.RI ( io_context_t )
-.\" But glibc is confused, since <libaio.h> uses 'io_context_t' to declare
-.\" the system call.
-for the
-.I ctx_id
-argument.
-Note also that the
-.I libaio
-wrapper does not follow the usual C library conventions for indicating errors:
-on error it returns a negated error number
-(the negative of one of the values listed in ERRORS).
-If the system call is invoked via
-.BR syscall (2),
-then the return value follows the usual conventions for
-indicating an error: \-1, with
-.I errno
-set to a (positive) value that indicates the error.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 2.5.
-.SH SEE ALSO
-.BR io_destroy (2),
-.BR io_getevents (2),
-.BR io_setup (2),
-.BR io_submit (2),
-.BR aio (7)
-.\" .SH AUTHOR
-.\" Kent Yoder.
diff --git a/man2/io_destroy.2 b/man2/io_destroy.2
deleted file mode 100644
index 451cf82ff..000000000
--- a/man2/io_destroy.2
+++ /dev/null
@@ -1,97 +0,0 @@
-.\" Copyright (C) 2003 Free Software Foundation, Inc.
-.\"
-.\" SPDX-License-Identifier: GPL-1.0-or-later
-.\"
-.TH io_destroy 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-io_destroy \- destroy an asynchronous I/O context
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#include <linux/aio_abi.h>" " /* Definition of " aio_context_t " */"
-.BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */"
-.B #include <unistd.h>
-.P
-.BI "int syscall(SYS_io_destroy, aio_context_t " ctx_id );
-.fi
-.P
-.IR Note :
-glibc provides no wrapper for
-.BR io_destroy (),
-necessitating the use of
-.BR syscall (2).
-.SH DESCRIPTION
-.IR Note :
-this page describes the raw Linux system call interface.
-The wrapper function provided by
-.I libaio
-uses a different type for the
-.I ctx_id
-argument.
-See VERSIONS.
-.P
-The
-.BR io_destroy ()
-system call
-will attempt to cancel all outstanding asynchronous I/O operations against
-.IR ctx_id ,
-will block on the completion of all operations
-that could not be canceled, and will destroy the
-.IR ctx_id .
-.SH RETURN VALUE
-On success,
-.BR io_destroy ()
-returns 0.
-For the failure return, see VERSIONS.
-.SH ERRORS
-.TP
-.B EFAULT
-The context pointed to is invalid.
-.TP
-.B EINVAL
-The AIO context specified by \fIctx_id\fP is invalid.
-.TP
-.B ENOSYS
-.BR io_destroy ()
-is not implemented on this architecture.
-.SH VERSIONS
-You probably want to use the
-.BR io_destroy ()
-wrapper function provided by
-.\" http://git.fedorahosted.org/git/?p=libaio.git
-.IR libaio .
-.P
-Note that the
-.I libaio
-wrapper function uses a different type
-.RI ( io_context_t )
-.\" But glibc is confused, since <libaio.h> uses 'io_context_t' to declare
-.\" the system call.
-for the
-.I ctx_id
-argument.
-Note also that the
-.I libaio
-wrapper does not follow the usual C library conventions for indicating errors:
-on error it returns a negated error number
-(the negative of one of the values listed in ERRORS).
-If the system call is invoked via
-.BR syscall (2),
-then the return value follows the usual conventions for
-indicating an error: \-1, with
-.I errno
-set to a (positive) value that indicates the error.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 2.5.
-.SH SEE ALSO
-.BR io_cancel (2),
-.BR io_getevents (2),
-.BR io_setup (2),
-.BR io_submit (2),
-.BR aio (7)
-.\" .SH AUTHOR
-.\" Kent Yoder.
diff --git a/man2/io_getevents.2 b/man2/io_getevents.2
deleted file mode 100644
index e426c1eae..000000000
--- a/man2/io_getevents.2
+++ /dev/null
@@ -1,137 +0,0 @@
-.\" Copyright (C) 2003 Free Software Foundation, Inc.
-.\"
-.\" SPDX-License-Identifier: GPL-1.0-or-later
-.\"
-.TH io_getevents 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-io_getevents \- read asynchronous I/O events from the completion queue
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.P
-Alternatively, Asynchronous I/O library
-.RI ( libaio ", " \-laio );
-see VERSIONS.
-.SH SYNOPSIS
-.nf
-.BR "#include <linux/aio_abi.h>" " /* Definition of " *io_* " types */"
-.BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */"
-.B #include <unistd.h>
-.P
-.BI "int syscall(SYS_io_getevents, aio_context_t " ctx_id ,
-.BI " long " min_nr ", long " nr ", struct io_event *" events ,
-.BI " struct timespec *" timeout );
-.fi
-.P
-.IR Note :
-glibc provides no wrapper for
-.BR io_getevents (),
-necessitating the use of
-.BR syscall (2).
-.SH DESCRIPTION
-.IR Note :
-this page describes the raw Linux system call interface.
-The wrapper function provided by
-.I libaio
-uses a different type for the
-.I ctx_id
-argument.
-See VERSIONS.
-.P
-The
-.BR io_getevents ()
-system call
-attempts to read at least \fImin_nr\fP events and
-up to \fInr\fP events from the completion queue of the AIO context
-specified by \fIctx_id\fP.
-.P
-The \fItimeout\fP argument specifies the amount of time to wait for events,
-and is specified as a relative timeout in a
-.BR timespec (3)
-structure.
-.P
-The specified time will be rounded up to the system clock granularity
-and is guaranteed not to expire early.
-.P
-Specifying
-.I timeout
-as NULL means block indefinitely until at least
-.I min_nr
-events have been obtained.
-.SH RETURN VALUE
-On success,
-.BR io_getevents ()
-returns the number of events read.
-This may be 0, or a value less than
-.IR min_nr ,
-if the
-.I timeout
-expired.
-It may also be a nonzero value less than
-.IR min_nr ,
-if the call was interrupted by a signal handler.
-.P
-For the failure return, see VERSIONS.
-.SH ERRORS
-.TP
-.B EFAULT
-Either \fIevents\fP or \fItimeout\fP is an invalid pointer.
-.TP
-.B EINTR
-Interrupted by a signal handler; see
-.BR signal (7).
-.TP
-.B EINVAL
-\fIctx_id\fP is invalid.
-\fImin_nr\fP is out of range or \fInr\fP is
-out of range.
-.TP
-.B ENOSYS
-.BR io_getevents ()
-is not implemented on this architecture.
-.SH VERSIONS
-You probably want to use the
-.BR io_getevents ()
-wrapper function provided by
-.\" http://git.fedorahosted.org/git/?p=libaio.git
-.IR libaio .
-.P
-Note that the
-.I libaio
-wrapper function uses a different type
-.RI ( io_context_t )
-.\" But glibc is confused, since <libaio.h> uses 'io_context_t' to declare
-.\" the system call.
-for the
-.I ctx_id
-argument.
-Note also that the
-.I libaio
-wrapper does not follow the usual C library conventions for indicating errors:
-on error it returns a negated error number
-(the negative of one of the values listed in ERRORS).
-If the system call is invoked via
-.BR syscall (2),
-then the return value follows the usual conventions for
-indicating an error: \-1, with
-.I errno
-set to a (positive) value that indicates the error.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 2.5.
-.SH BUGS
-An invalid
-.I ctx_id
-may cause a segmentation fault instead of generating the error
-.BR EINVAL .
-.SH SEE ALSO
-.BR io_cancel (2),
-.BR io_destroy (2),
-.BR io_setup (2),
-.BR io_submit (2),
-.BR timespec (3),
-.BR aio (7),
-.BR time (7)
-.\" .SH AUTHOR
-.\" Kent Yoder.
diff --git a/man2/io_setup.2 b/man2/io_setup.2
deleted file mode 100644
index 32db66422..000000000
--- a/man2/io_setup.2
+++ /dev/null
@@ -1,114 +0,0 @@
-.\" Copyright (C) 2003 Free Software Foundation, Inc.
-.\"
-.\" SPDX-License-Identifier: GPL-1.0-or-later
-.\"
-.TH io_setup 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-io_setup \- create an asynchronous I/O context
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.P
-Alternatively, Asynchronous I/O library
-.RI ( libaio ", " \-laio );
-see VERSIONS.
-.SH SYNOPSIS
-.nf
-.BR "#include <linux/aio_abi.h>" " /* Defines needed types */"
-.P
-.BI "long io_setup(unsigned int " nr_events ", aio_context_t *" ctx_idp );
-.fi
-.P
-.IR Note :
-There is no glibc wrapper for this system call; see VERSIONS.
-.SH DESCRIPTION
-.IR Note :
-this page describes the raw Linux system call interface.
-The wrapper function provided by
-.I libaio
-uses a different type for the
-.I ctx_idp
-argument.
-See VERSIONS.
-.P
-The
-.BR io_setup ()
-system call
-creates an asynchronous I/O context suitable for concurrently processing
-\fInr_events\fP operations.
-The
-.I ctx_idp
-argument must not point to an AIO context that already exists, and must
-be initialized to 0 prior to the call.
-On successful creation of the AIO context, \fI*ctx_idp\fP is filled in
-with the resulting handle.
-.SH RETURN VALUE
-On success,
-.BR io_setup ()
-returns 0.
-For the failure return, see VERSIONS.
-.SH ERRORS
-.TP
-.B EAGAIN
-The specified \fInr_events\fP exceeds the limit of available events,
-as defined in
-.I /proc/sys/fs/aio\-max\-nr
-(see
-.BR proc (5)).
-.TP
-.B EFAULT
-An invalid pointer is passed for \fIctx_idp\fP.
-.TP
-.B EINVAL
-\fIctx_idp\fP is not initialized, or the specified \fInr_events\fP
-exceeds internal limits.
-\fInr_events\fP should be greater than 0.
-.TP
-.B ENOMEM
-Insufficient kernel resources are available.
-.TP
-.B ENOSYS
-.BR io_setup ()
-is not implemented on this architecture.
-.SH VERSIONS
-glibc does not provide a wrapper for this system call.
-You could invoke it using
-.BR syscall (2).
-But instead, you probably want to use the
-.BR io_setup ()
-wrapper function provided by
-.\" http://git.fedorahosted.org/git/?p=libaio.git
-.IR libaio .
-.P
-Note that the
-.I libaio
-wrapper function uses a different type
-.RI ( "io_context_t\ *" )
-.\" But glibc is confused, since <libaio.h> uses 'io_context_t' to declare
-.\" the system call.
-for the
-.I ctx_idp
-argument.
-Note also that the
-.I libaio
-wrapper does not follow the usual C library conventions for indicating errors:
-on error it returns a negated error number
-(the negative of one of the values listed in ERRORS).
-If the system call is invoked via
-.BR syscall (2),
-then the return value follows the usual conventions for
-indicating an error: \-1, with
-.I errno
-set to a (positive) value that indicates the error.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 2.5.
-.SH SEE ALSO
-.BR io_cancel (2),
-.BR io_destroy (2),
-.BR io_getevents (2),
-.BR io_submit (2),
-.BR aio (7)
-.\" .SH AUTHOR
-.\" Kent Yoder.
diff --git a/man2/io_submit.2 b/man2/io_submit.2
deleted file mode 100644
index c53ae9aaf..000000000
--- a/man2/io_submit.2
+++ /dev/null
@@ -1,289 +0,0 @@
-.\" Copyright (C) 2003 Free Software Foundation, Inc.
-.\" and Copyright (C) 2017 Goldwyn Rodrigues <rgoldwyn@suse.de>
-.\"
-.\" SPDX-License-Identifier: GPL-1.0-or-later
-.\"
-.TH io_submit 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-io_submit \- submit asynchronous I/O blocks for processing
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.P
-Alternatively, Asynchronous I/O library
-.RI ( libaio ", " \-laio );
-see VERSIONS.
-.SH SYNOPSIS
-.nf
-.BR "#include <linux/aio_abi.h>" " /* Defines needed types */"
-.P
-.BI "int io_submit(aio_context_t " ctx_id ", long " nr \
-", struct iocb **" iocbpp );
-.fi
-.P
-.IR Note :
-There is no glibc wrapper for this system call; see VERSIONS.
-.SH DESCRIPTION
-.IR Note :
-this page describes the raw Linux system call interface.
-The wrapper function provided by
-.I libaio
-uses a different type for the
-.I ctx_id
-argument.
-See VERSIONS.
-.P
-The
-.BR io_submit ()
-system call
-queues \fInr\fP I/O request blocks for processing in
-the AIO context \fIctx_id\fP.
-The
-.I iocbpp
-argument should be an array of \fInr\fP AIO control blocks,
-which will be submitted to context \fIctx_id\fP.
-.P
-The
-.I iocb
-(I/O control block) structure defined in
-.I linux/aio_abi.h
-defines the parameters that control the I/O operation.
-.P
-.in +4n
-.EX
-#include <linux/aio_abi.h>
-\&
-struct iocb {
- __u64 aio_data;
- __u32 PADDED(aio_key, aio_rw_flags);
- __u16 aio_lio_opcode;
- __s16 aio_reqprio;
- __u32 aio_fildes;
- __u64 aio_buf;
- __u64 aio_nbytes;
- __s64 aio_offset;
- __u64 aio_reserved2;
- __u32 aio_flags;
- __u32 aio_resfd;
-};
-.EE
-.in
-.P
-The fields of this structure are as follows:
-.TP
-.I aio_data
-This data is copied into the
-.I data
-field of the
-.I io_event
-structure upon I/O completion (see
-.BR io_getevents (2)).
-.TP
-.I aio_key
-This is an internal field used by the kernel.
-Do not modify this field after an
-.BR io_submit ()
-call.
-.TP
-.I aio_rw_flags
-This defines the R/W flags passed with structure.
-The valid values are:
-.RS
-.TP
-.BR RWF_APPEND " (since Linux 4.16)"
-.\" commit e1fc742e14e01d84d9693c4aca4ab23da65811fb
-Append data to the end of the file.
-See the description of the flag of the same name in
-.BR pwritev2 (2)
-as well as the description of
-.B O_APPEND
-in
-.BR open (2).
-The
-.I aio_offset
-field is ignored.
-The file offset is not changed.
-.TP
-.BR RWF_DSYNC " (since Linux 4.13)"
-Write operation complete according to requirement of
-synchronized I/O data integrity.
-See the description of the flag of the same name in
-.BR pwritev2 (2)
-as well the description of
-.B O_DSYNC
-in
-.BR open (2).
-.TP
-.BR RWF_HIPRI " (since Linux 4.13)"
-High priority request, poll if possible
-.TP
-.BR RWF_NOWAIT " (since Linux 4.14)"
-Don't wait if the I/O will block for operations such as
-file block allocations, dirty page flush, mutex locks,
-or a congested block device inside the kernel.
-If any of these conditions are met, the control block is returned
-immediately with a return value of
-.B \-EAGAIN
-in the
-.I res
-field of the
-.I io_event
-structure (see
-.BR io_getevents (2)).
-.TP
-.BR RWF_SYNC " (since Linux 4.13)"
-Write operation complete according to requirement of
-synchronized I/O file integrity.
-See the description of the flag of the same name in
-.BR pwritev2 (2)
-as well the description of
-.B O_SYNC
-in
-.BR open (2).
-.RE
-.TP
-.I aio_lio_opcode
-This defines the type of I/O to be performed by the
-.I iocb
-structure.
-The
-valid values are defined by the enum defined in
-.IR linux/aio_abi.h :
-.IP
-.in +4n
-.EX
-enum {
- IOCB_CMD_PREAD = 0,
- IOCB_CMD_PWRITE = 1,
- IOCB_CMD_FSYNC = 2,
- IOCB_CMD_FDSYNC = 3,
- IOCB_CMD_POLL = 5,
- IOCB_CMD_NOOP = 6,
- IOCB_CMD_PREADV = 7,
- IOCB_CMD_PWRITEV = 8,
-};
-.EE
-.in
-.TP
-.I aio_reqprio
-This defines the requests priority.
-.TP
-.I aio_fildes
-The file descriptor on which the I/O operation is to be performed.
-.TP
-.I aio_buf
-This is the buffer used to transfer data for a read or write operation.
-.TP
-.I aio_nbytes
-This is the size of the buffer pointed to by
-.IR aio_buf .
-.TP
-.I aio_offset
-This is the file offset at which the I/O operation is to be performed.
-.TP
-.I aio_flags
-This is the set of flags associated with the
-.I iocb
-structure.
-The valid values are:
-.RS
-.TP
-.B IOCB_FLAG_RESFD
-Asynchronous I/O control must signal the file
-descriptor mentioned in
-.I aio_resfd
-upon completion.
-.TP
-.BR IOCB_FLAG_IOPRIO " (since Linux 4.18)"
-.\" commit d9a08a9e616beeccdbd0e7262b7225ffdfa49e92
-Interpret the
-.I aio_reqprio
-field as an
-.B IOPRIO_VALUE
-as defined by
-.IR linux/ioprio.h .
-.RE
-.TP
-.I aio_resfd
-The file descriptor to signal in the event of asynchronous I/O completion.
-.SH RETURN VALUE
-On success,
-.BR io_submit ()
-returns the number of \fIiocb\fPs submitted (which may be
-less than \fInr\fP, or 0 if \fInr\fP is zero).
-For the failure return, see VERSIONS.
-.SH ERRORS
-.TP
-.B EAGAIN
-Insufficient resources are available to queue any \fIiocb\fPs.
-.TP
-.B EBADF
-The file descriptor specified in the first \fIiocb\fP is invalid.
-.TP
-.B EFAULT
-One of the data structures points to invalid data.
-.TP
-.B EINVAL
-The AIO context specified by \fIctx_id\fP is invalid.
-\fInr\fP is less than 0.
-The \fIiocb\fP at
-.I *iocbpp[0]
-is not properly initialized, the operation specified is invalid for the file
-descriptor in the \fIiocb\fP, or the value in the
-.I aio_reqprio
-field is invalid.
-.TP
-.B ENOSYS
-.BR io_submit ()
-is not implemented on this architecture.
-.TP
-.B EPERM
-The
-.I aio_reqprio
-field is set with the class
-.BR IOPRIO_CLASS_RT ,
-but the submitting context does not have the
-.B CAP_SYS_ADMIN
-capability.
-.SH VERSIONS
-glibc does not provide a wrapper for this system call.
-You could invoke it using
-.BR syscall (2).
-But instead, you probably want to use the
-.BR io_submit ()
-wrapper function provided by
-.\" http://git.fedorahosted.org/git/?p=libaio.git
-.IR libaio .
-.P
-Note that the
-.I libaio
-wrapper function uses a different type
-.RI ( io_context_t )
-.\" But glibc is confused, since <libaio.h> uses 'io_context_t' to declare
-.\" the system call.
-for the
-.I ctx_id
-argument.
-Note also that the
-.I libaio
-wrapper does not follow the usual C library conventions for indicating errors:
-on error it returns a negated error number
-(the negative of one of the values listed in ERRORS).
-If the system call is invoked via
-.BR syscall (2),
-then the return value follows the usual conventions for
-indicating an error: \-1, with
-.I errno
-set to a (positive) value that indicates the error.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 2.5.
-.SH SEE ALSO
-.BR io_cancel (2),
-.BR io_destroy (2),
-.BR io_getevents (2),
-.BR io_setup (2),
-.BR aio (7)
-.\" .SH AUTHOR
-.\" Kent Yoder.
diff --git a/man2/ioctl.2 b/man2/ioctl.2
deleted file mode 100644
index 5b8c28a9c..000000000
--- a/man2/ioctl.2
+++ /dev/null
@@ -1,231 +0,0 @@
-.\" Copyright (c) 1980, 1991 Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" SPDX-License-Identifier: BSD-4-Clause-UC
-.\"
-.\" @(#)ioctl.2 6.4 (Berkeley) 3/10/91
-.\"
-.\" Modified 1993-07-23 by Rik Faith <faith@cs.unc.edu>
-.\" Modified 1996-10-22 by Eric S. Raymond <esr@thyrsus.com>
-.\" Modified 1999-06-25 by Rachael Munns <vashti@dream.org.uk>
-.\" Modified 2000-09-21 by Andries Brouwer <aeb@cwi.nl>
-.\"
-.TH ioctl 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-ioctl \- control device
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/ioctl.h>
-.P
-.BI "int ioctl(int " fd ", unsigned long " op ", ...);" "\f[R] /* glibc, BSD */\f[]"
-.BI "int ioctl(int " fd ", int " op ", ...);" "\f[R] /* musl, other UNIX */\f[]"
-.fi
-.SH DESCRIPTION
-The
-.BR ioctl ()
-system call manipulates the underlying device parameters of special files.
-In particular, many operating characteristics of character special files
-(e.g., terminals) may be controlled with
-.BR ioctl ()
-operations.
-The argument
-.I fd
-must be an open file descriptor.
-.P
-The second argument is a device-dependent operation code.
-The third argument is an untyped pointer to memory.
-It's traditionally
-.BI "char *" argp
-(from the days before
-.B "void *"
-was valid C), and will be so named for this discussion.
-.P
-An
-.BR ioctl ()
-.I op
-has encoded in it whether the argument is an
-.I in
-parameter or
-.I out
-parameter, and the size of the argument
-.I argp
-in bytes.
-Macros and defines used in specifying an
-.BR ioctl ()
-.I op
-are located in the file
-.IR <sys/ioctl.h> .
-See NOTES.
-.SH RETURN VALUE
-Usually, on success zero is returned.
-A few
-.BR ioctl ()
-operations use the return value as an output parameter
-and return a nonnegative value on success.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EBADF
-.I fd
-is not a valid file descriptor.
-.TP
-.B EFAULT
-.I argp
-references an inaccessible memory area.
-.TP
-.B EINVAL
-.I op
-or
-.I argp
-is not valid.
-.TP
-.B ENOTTY
-.I fd
-is not associated with a character special device.
-.TP
-.B ENOTTY
-The specified operation does not apply to the kind of object that the
-file descriptor
-.I fd
-references.
-.SH VERSIONS
-Arguments, returns, and semantics of
-.BR ioctl ()
-vary according to the device driver in question (the call is used as a
-catch-all for operations that don't cleanly fit the UNIX stream I/O
-model).
-.SH STANDARDS
-None.
-.SH HISTORY
-Version\~7 AT&T UNIX has
-.PD 0
-.in +4n
-.nf
-.BI "ioctl(int " fildes ", int " op ", struct sgttyb *" argp );
-.fi
-.in
-.P
-.PD
-(where
-.B struct sgttyb
-has historically been used by
-.BR stty (2)
-and
-.BR gtty (2),
-and is polymorphic by operation type (like a
-.B void *
-would be, if it had been available)).
-.P
-SysIII documents
-.I arg
-without a type at all.
-.P
-4.3BSD has
-.PD 0
-.in +4n
-.nf
-.BI "ioctl(int " d ", unsigned long " op ", char *" argp );
-.fi
-.in
-.P
-.PD
-(with
-.B char *
-similarly in for
-.BR "void *" ).
-.P
-SysVr4 has
-.PD 0
-.in +4n
-.nf
-.BI "int ioctl(int " fildes ", int " op ", ... /* " arg " */);"
-.fi
-.in
-.P
-.PD
-.SH NOTES
-In order to use this call, one needs an open file descriptor.
-Often the
-.BR open (2)
-call has unwanted side effects, that can be avoided under Linux
-by giving it the
-.B O_NONBLOCK
-flag.
-.\"
-.SS ioctl structure
-.\" added two sections - aeb
-Ioctl
-.I op
-values are 32-bit constants.
-In principle these constants are completely arbitrary, but people have
-tried to build some structure into them.
-.P
-The old Linux situation was that of mostly 16-bit constants, where the
-last byte is a serial number, and the preceding byte(s) give a type
-indicating the driver.
-Sometimes the major number was used: 0x03
-for the
-.B HDIO_*
-ioctls, 0x06 for the
-.B LP*
-ioctls.
-And sometimes
-one or more ASCII letters were used.
-For example,
-.B TCGETS
-has value
-0x00005401, with 0x54 = \[aq]T\[aq] indicating the terminal driver, and
-.B CYGETTIMEOUT
-has value 0x00435906, with 0x43 0x59 = \[aq]C\[aq] \[aq]Y\[aq]
-indicating the cyclades driver.
-.P
-Later (0.98p5) some more information was built into the number.
-One has 2 direction bits
-(00: none, 01: write, 10: read, 11: read/write)
-followed by 14 size bits (giving the size of the argument),
-followed by an 8-bit type (collecting the ioctls in groups
-for a common purpose or a common driver), and an 8-bit
-serial number.
-.P
-The macros describing this structure live in
-.I <asm/ioctl.h>
-and are
-.B _IO(type,nr)
-and
-.BR "{_IOR,_IOW,_IOWR}(type,nr,size)" .
-They use
-.I sizeof(size)
-so that size is a
-misnomer here: this third argument is a data type.
-.P
-Note that the size bits are very unreliable: in lots of cases
-they are wrong, either because of buggy macros using
-.IR sizeof(sizeof(struct)) ,
-or because of legacy values.
-.P
-Thus, it seems that the new structure only gave disadvantages:
-it does not help in checking, but it causes varying values
-for the various architectures.
-.SH SEE ALSO
-.BR execve (2),
-.BR fcntl (2),
-.BR ioctl_console (2),
-.BR ioctl_fat (2),
-.BR ioctl_ficlone (2),
-.BR ioctl_ficlonerange (2),
-.BR ioctl_fideduperange (2),
-.BR ioctl_fslabel (2),
-.BR ioctl_getfsmap (2),
-.BR ioctl_iflags (2),
-.BR ioctl_ns (2),
-.BR ioctl_tty (2),
-.BR ioctl_userfaultfd (2),
-.BR open (2),
-.\" .BR mt (4),
-.BR sd (4),
-.BR tty (4)
diff --git a/man2/ioctl_console.2 b/man2/ioctl_console.2
deleted file mode 100644
index 93a992264..000000000
--- a/man2/ioctl_console.2
+++ /dev/null
@@ -1,915 +0,0 @@
-'\" t
-.\" Copyright (c) 1995 Jim Van Zandt <jrv@vanzandt.mv.com> and aeb
-.\" Sun Feb 26 11:46:23 MET 1995
-.\"
-.\" SPDX-License-Identifier: GPL-2.0-or-later
-.\"
-.\" Modified, Sun Feb 26 15:04:20 1995, faith@cs.unc.edu
-.\" Modified, Thu Apr 20 22:08:17 1995, jrv@vanzandt.mv.com
-.\" Modified, Mon Sep 18 22:32:47 1995, hpa@storm.net (H. Peter Anvin)
-.\" FIXME The following are not documented:
-.\" KDFONTOP (since Linux 2.1.111)
-.\" KDGKBDIACRUC (since Linux 2.6.24)
-.\" KDSKBDIACR
-.\" KDSKBDIACRUC (since Linux 2.6.24)
-.\" KDKBDREP (since Linux 2.1.113)
-.\" KDMAPDISP (not implemented as at Linux 2.6.27)
-.\" KDUNMAPDISP (not implemented as at Linux 2.6.27)
-.\" VT_LOCKSWITCH (since Linux 1.3.47, needs CAP_SYS_TTY_CONFIG)
-.\" VT_UNLOCKSWITCH (since Linux 1.3.47, needs CAP_SYS_TTY_CONFIG)
-.\" VT_GETHIFONTMASK (since Linux 2.6.18)
-.\"
-.TH ioctl_console 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-ioctl_console \- ioctls for console terminal and virtual consoles
-.SH DESCRIPTION
-The following Linux-specific
-.BR ioctl (2)
-operations are supported for console terminals and virtual consoles.
-Each operation requires a third argument, assumed here to be
-.IR argp .
-.TP
-.B KDGETLED
-Get state of LEDs.
-.I argp
-points to a
-.IR char .
-The lower three bits
-of
-.I *argp
-are set to the state of the LEDs, as follows:
-.TS
-l l l.
-LED_CAP 0x04 caps lock led
-LED_NUM 0x02 num lock led
-LED_SCR 0x01 scroll lock led
-.TE
-.TP
-.B KDSETLED
-Set the LEDs.
-The LEDs are set to correspond to the lower three bits of the
-unsigned long integer in
-.IR argp .
-However, if a higher order bit is set,
-the LEDs revert to normal: displaying the state of the
-keyboard functions of caps lock, num lock, and scroll lock.
-.P
-Before Linux 1.1.54, the LEDs just reflected the state of the corresponding
-keyboard flags, and KDGETLED/KDSETLED would also change the keyboard
-flags.
-Since Linux 1.1.54 the LEDs can be made to display arbitrary
-information, but by default they display the keyboard flags.
-The following two ioctls are used to access the keyboard flags.
-.TP
-.B KDGKBLED
-Get keyboard flags CapsLock, NumLock, ScrollLock (not lights).
-.I argp
-points to a char which is set to the flag state.
-The low order three bits (mask 0x7) get the current flag state,
-and the low order bits of the next nibble (mask 0x70) get
-the default flag state.
-(Since Linux 1.1.54.)
-.TP
-.B KDSKBLED
-Set keyboard flags CapsLock, NumLock, ScrollLock (not lights).
-.I argp
-is an unsigned long integer that has the desired flag state.
-The low order three bits (mask 0x7) have the flag state,
-and the low order bits of the next nibble (mask 0x70) have
-the default flag state.
-(Since Linux 1.1.54.)
-.TP
-.B KDGKBTYPE
-Get keyboard type.
-This returns the value KB_101, defined as 0x02.
-.TP
-.B KDADDIO
-Add I/O port as valid.
-Equivalent to
-.IR ioperm(arg,1,1) .
-.TP
-.B KDDELIO
-Delete I/O port as valid.
-Equivalent to
-.IR ioperm(arg,1,0) .
-.TP
-.B KDENABIO
-Enable I/O to video board.
-Equivalent to
-.IR "ioperm(0x3b4, 0x3df\-0x3b4+1, 1)" .
-.TP
-.B KDDISABIO
-Disable I/O to video board.
-Equivalent to
-.IR "ioperm(0x3b4, 0x3df\-0x3b4+1, 0)" .
-.TP
-.B KDSETMODE
-Set text/graphics mode.
-.I argp
-is an unsigned integer containing one of:
-.TS
-l l.
-KD_TEXT 0x00
-KD_GRAPHICS 0x01
-.TE
-.TP
-.B KDGETMODE
-Get text/graphics mode.
-.I argp
-points to an
-.I int
-which is set to one
-of the values shown above for
-.BR KDSETMODE .
-.TP
-.B KDMKTONE
-Generate tone of specified length.
-The lower 16 bits of the unsigned long integer in
-.I argp
-specify the period in clock cycles,
-and the upper 16 bits give the duration in msec.
-If the duration is zero, the sound is turned off.
-Control returns immediately.
-For example,
-.I argp
-= (125<<16) + 0x637 would specify
-the beep normally associated with a ctrl-G.
-(Thus since Linux 0.99pl1; broken in Linux 2.1.49-50.)
-.TP
-.B KIOCSOUND
-Start or stop sound generation.
-The lower 16 bits of
-.I argp
-specify the period in clock cycles
-(that is,
-.I argp
-= 1193180/frequency).
-.I argp
-= 0 turns sound off.
-In either case, control returns immediately.
-.TP
-.B GIO_CMAP
-Get the current default color map from kernel.
-.I argp
-points to
-a 48-byte array.
-(Since Linux 1.3.3.)
-.TP
-.B PIO_CMAP
-Change the default text-mode color map.
-.I argp
-points to a
-48-byte array which contains, in order, the Red, Green, and Blue
-values for the 16 available screen colors: 0 is off, and 255 is full
-intensity.
-The default colors are, in order: black, dark red, dark
-green, brown, dark blue, dark purple, dark cyan, light grey, dark
-grey, bright red, bright green, yellow, bright blue, bright purple,
-bright cyan, and white.
-(Since Linux 1.3.3.)
-.TP
-.B GIO_FONT
-Gets 256-character screen font in expanded form.
-.I argp
-points to an 8192-byte array.
-Fails with error code
-.B EINVAL
-if the
-currently loaded font is a 512-character font, or if the console is
-not in text mode.
-.TP
-.B GIO_FONTX
-Gets screen font and associated information.
-.I argp
-points to a
-.I "struct consolefontdesc"
-(see
-.BR PIO_FONTX ).
-On call, the
-.I charcount
-field should be set to the maximum number of
-characters that would fit in the buffer pointed to by
-.IR chardata .
-On return, the
-.I charcount
-and
-.I charheight
-are filled with
-the respective data for the currently loaded font, and the
-.I chardata
-array contains the font data if the initial value of
-.I charcount
-indicated enough space was available; otherwise the
-buffer is untouched and
-.I errno
-is set to
-.BR ENOMEM .
-(Since Linux 1.3.1.)
-.TP
-.B PIO_FONT
-Sets 256-character screen font.
-Load font into the EGA/VGA character
-generator.
-.I argp
-points to an 8192-byte map, with 32 bytes per
-character.
-Only the first
-.I N
-of them are used for an 8x\fIN\fP font
-(0 <
-.I N
-<= 32).
-This call also invalidates the Unicode mapping.
-.TP
-.B PIO_FONTX
-Sets screen font and associated rendering information.
-.I argp
-points to a
-.IP
-.in +4n
-.EX
-struct consolefontdesc {
- unsigned short charcount; /* characters in font
- (256 or 512) */
- unsigned short charheight; /* scan lines per
- character (1\-32) */
- char *chardata; /* font data in
- expanded form */
-};
-.EE
-.in
-.IP
-If necessary, the screen will be appropriately resized, and
-.B SIGWINCH
-sent to the appropriate processes.
-This call also invalidates the Unicode mapping.
-(Since Linux 1.3.1.)
-.TP
-.B PIO_FONTRESET
-Resets the screen font, size, and Unicode mapping to the bootup
-defaults.
-.I argp
-is unused, but should be set to NULL to
-ensure compatibility with future versions of Linux.
-(Since Linux 1.3.28.)
-.TP
-.B GIO_SCRNMAP
-Get screen mapping from kernel.
-.I argp
-points to an area of size
-E_TABSZ, which is loaded with the font positions used to display each
-character.
-This call is likely to return useless information if the
-currently loaded font is more than 256 characters.
-.TP
-.B GIO_UNISCRNMAP
-Get full Unicode screen mapping from kernel.
-.I argp
-points to an
-area of size
-.IR "E_TABSZ*sizeof(unsigned short)" ,
-which is loaded with the
-Unicodes each character represent.
-A special set of Unicodes,
-starting at U+F000, are used to represent "direct to font" mappings.
-(Since Linux 1.3.1.)
-.TP
-.B PIO_SCRNMAP
-Loads the "user definable" (fourth) table in the kernel which maps
-bytes into console screen symbols.
-.I argp
-points to an area of
-size E_TABSZ.
-.TP
-.B PIO_UNISCRNMAP
-Loads the "user definable" (fourth) table in the kernel which maps
-bytes into Unicodes, which are then translated into screen symbols
-according to the currently loaded Unicode-to-font map.
-Special Unicodes starting at U+F000 can be used to map directly to the font
-symbols.
-(Since Linux 1.3.1.)
-.TP
-.B GIO_UNIMAP
-Get Unicode-to-font mapping from kernel.
-.I argp
-points to a
-.IP
-.in +4n
-.EX
-struct unimapdesc {
- unsigned short entry_ct;
- struct unipair *entries;
-};
-.EE
-.in
-.IP
-where
-.I entries
-points to an array of
-.IP
-.in +4n
-.EX
-struct unipair {
- unsigned short unicode;
- unsigned short fontpos;
-};
-.EE
-.in
-.IP
-(Since Linux 1.1.92.)
-.TP
-.B PIO_UNIMAP
-Put unicode-to-font mapping in kernel.
-.I argp
-points to a
-.IR "struct unimapdesc" .
-(Since Linux 1.1.92)
-.TP
-.B PIO_UNIMAPCLR
-Clear table, possibly advise hash algorithm.
-.I argp
-points to a
-.IP
-.in +4n
-.EX
-struct unimapinit {
- unsigned short advised_hashsize; /* 0 if no opinion */
- unsigned short advised_hashstep; /* 0 if no opinion */
- unsigned short advised_hashlevel; /* 0 if no opinion */
-};
-.EE
-.in
-.IP
-(Since Linux 1.1.92.)
-.TP
-.B KDGKBMODE
-Gets current keyboard mode.
-.I argp
-points to a
-.I long
-which is set to one
-of these:
-.TS
-l l.
-K_RAW 0x00 /* Raw (scancode) mode */
-K_XLATE 0x01 /* Translate keycodes using keymap */
-K_MEDIUMRAW 0x02 /* Medium raw (scancode) mode */
-K_UNICODE 0x03 /* Unicode mode */
-K_OFF 0x04 /* Disabled mode; since Linux 2.6.39 */
-.\" K_OFF: commit 9fc3de9c83565fcaa23df74c2fc414bb6e7efb0a
-.TE
-.TP
-.B KDSKBMODE
-Sets current keyboard mode.
-.I argp
-is a
-.I long
-equal to one of the values shown for
-.BR KDGKBMODE .
-.TP
-.B KDGKBMETA
-Gets meta key handling mode.
-.I argp
-points to a
-.I long
-which is
-set to one of these:
-.TS
-l l l.
-K_METABIT 0x03 set high order bit
-K_ESCPREFIX 0x04 escape prefix
-.TE
-.TP
-.B KDSKBMETA
-Sets meta key handling mode.
-.I argp
-is a
-.I long
-equal to one of the values shown above for
-.BR KDGKBMETA .
-.TP
-.B KDGKBENT
-Gets one entry in key translation table (keycode to action code).
-.I argp
-points to a
-.IP
-.in +4n
-.EX
-struct kbentry {
- unsigned char kb_table;
- unsigned char kb_index;
- unsigned short kb_value;
-};
-.EE
-.in
-.IP
-with the first two members filled in:
-.I kb_table
-selects the key table (0 <=
-.I kb_table
-< MAX_NR_KEYMAPS),
-and
-.I kb_index
-is the keycode (0 <=
-.I kb_index
-< NR_KEYS).
-.I kb_value
-is set to the corresponding action code,
-or K_HOLE if there is no such key,
-or K_NOSUCHMAP if
-.I kb_table
-is invalid.
-.TP
-.B KDSKBENT
-Sets one entry in translation table.
-.I argp
-points to a
-.IR "struct kbentry" .
-.TP
-.B KDGKBSENT
-Gets one function key string.
-.I argp
-points to a
-.IP
-.in +4n
-.EX
-struct kbsentry {
- unsigned char kb_func;
- unsigned char kb_string[512];
-};
-.EE
-.in
-.IP
-.I kb_string
-is set to the (null-terminated) string corresponding to
-the
-.IR kb_func th
-function key action code.
-.TP
-.B KDSKBSENT
-Sets one function key string entry.
-.I argp
-points to a
-.IR "struct kbsentry" .
-.TP
-.B KDGKBDIACR
-Read kernel accent table.
-.I argp
-points to a
-.IP
-.in +4n
-.EX
-struct kbdiacrs {
- unsigned int kb_cnt;
- struct kbdiacr kbdiacr[256];
-};
-.EE
-.in
-.IP
-where
-.I kb_cnt
-is the number of entries in the array, each of which
-is a
-.IP
-.in +4n
-.EX
-struct kbdiacr {
- unsigned char diacr;
- unsigned char base;
- unsigned char result;
-};
-.EE
-.in
-.TP
-.B KDGETKEYCODE
-Read kernel keycode table entry (scan code to keycode).
-.I argp
-points to a
-.IP
-.in +4n
-.EX
-struct kbkeycode {
- unsigned int scancode;
- unsigned int keycode;
-};
-.EE
-.in
-.IP
-.I keycode
-is set to correspond to the given
-.IR scancode .
-(89 <=
-.I scancode
-<= 255 only.
-For 1 <=
-.I scancode
-<= 88,
-.IR keycode == scancode .)
-(Since Linux 1.1.63.)
-.TP
-.B KDSETKEYCODE
-Write kernel keycode table entry.
-.I argp
-points to a
-.IR "struct kbkeycode" .
-(Since Linux 1.1.63.)
-.TP
-.B KDSIGACCEPT
-The calling process indicates its willingness to accept the signal
-.I argp
-when it is generated by pressing an appropriate key combination.
-(1 <=
-.I argp
-<= NSIG).
-(See
-.IR spawn_console ()
-in
-.IR linux/drivers/char/keyboard.c .)
-.TP
-.B VT_OPENQRY
-Returns the first available (non-opened) console.
-.I argp
-points to an
-.I int
-which is set to the
-number of the vt (1 <=
-.I *argp
-<= MAX_NR_CONSOLES).
-.TP
-.B VT_GETMODE
-Get mode of active vt.
-.I argp
-points to a
-.IP
-.in +4n
-.EX
-struct vt_mode {
- char mode; /* vt mode */
- char waitv; /* if set, hang on writes if not active */
- short relsig; /* signal to raise on release op */
- short acqsig; /* signal to raise on acquisition */
- short frsig; /* unused (set to 0) */
-};
-.EE
-.in
-.IP
-which is set to the mode of the active vt.
-.I mode
-is set to one of these values:
-.TS
-l l.
-VT_AUTO auto vt switching
-VT_PROCESS process controls switching
-VT_ACKACQ acknowledge switch
-.TE
-.TP
-.B VT_SETMODE
-Set mode of active vt.
-.I argp
-points to a
-.IR "struct vt_mode" .
-.TP
-.B VT_GETSTATE
-Get global vt state info.
-.I argp
-points to a
-.IP
-.in +4n
-.EX
-struct vt_stat {
- unsigned short v_active; /* active vt */
- unsigned short v_signal; /* signal to send */
- unsigned short v_state; /* vt bit mask */
-};
-.EE
-.in
-.IP
-For each vt in use, the corresponding bit in the
-.I v_state
-member is set.
-(Linux 1.0 through Linux 1.1.92.)
-.TP
-.B VT_RELDISP
-Release a display.
-.TP
-.B VT_ACTIVATE
-Switch to vt
-.I argp
-(1 <=
-.I argp
-<= MAX_NR_CONSOLES).
-.TP
-.B VT_WAITACTIVE
-Wait until vt
-.I argp
-has been activated.
-.TP
-.B VT_DISALLOCATE
-Deallocate the memory associated with vt
-.IR argp .
-(Since Linux 1.1.54.)
-.TP
-.B VT_RESIZE
-Set the kernel's idea of screensize.
-.I argp
-points to a
-.IP
-.in +4n
-.EX
-struct vt_sizes {
- unsigned short v_rows; /* # rows */
- unsigned short v_cols; /* # columns */
- unsigned short v_scrollsize; /* no longer used */
-};
-.EE
-.in
-.IP
-Note that this does not change the videomode.
-See
-.BR resizecons (8).
-(Since Linux 1.1.54.)
-.TP
-.B VT_RESIZEX
-Set the kernel's idea of various screen parameters.
-.I argp
-points to a
-.IP
-.in +4n
-.EX
-struct vt_consize {
- unsigned short v_rows; /* number of rows */
- unsigned short v_cols; /* number of columns */
- unsigned short v_vlin; /* number of pixel rows
- on screen */
- unsigned short v_clin; /* number of pixel rows
- per character */
- unsigned short v_vcol; /* number of pixel columns
- on screen */
- unsigned short v_ccol; /* number of pixel columns
- per character */
-};
-.EE
-.in
-.IP
-Any parameter may be set to zero, indicating "no change", but if
-multiple parameters are set, they must be self-consistent.
-Note that this does not change the videomode.
-See
-.BR resizecons (8).
-(Since Linux 1.3.3.)
-.P
-The action of the following ioctls depends on the first byte in the struct
-pointed to by
-.IR argp ,
-referred to here as the
-.IR subcode .
-These are legal only for the superuser or the owner of the current terminal.
-Symbolic
-.IR subcode s
-are available in
-.I <linux/tiocl.h>
-since
-Linux 2.5.71.
-.TP
-.BR TIOCLINUX ", " subcode = 0
-Dump the screen.
-Disappeared in Linux 1.1.92.
-(With Linux 1.1.92 or later, read from
-.I /dev/vcsN
-or
-.I /dev/vcsaN
-instead.)
-.TP
-.BR TIOCLINUX ", " subcode = 1
-Get task information.
-Disappeared in Linux 1.1.92.
-.TP
-.BR TIOCLINUX ", " subcode = TIOCL_SETSEL
-Set selection.
-.I argp
-points to a
-.IP
-.in +4n
-.EX
-struct {
- char subcode;
- short xs, ys, xe, ye;
- short sel_mode;
-};
-.EE
-.in
-.IP
-.I xs
-and
-.I ys
-are the starting column and row.
-.I xe
-and
-.I ye
-are the ending
-column and row.
-(Upper left corner is row=column=1.)
-.I sel_mode
-is 0 for character-by-character selection,
-1 for word-by-word selection,
-or 2 for line-by-line selection.
-The indicated screen characters are highlighted and saved
-in a kernel buffer.
-.IP
-Since Linux 6.7, using this subcode requires the
-.B CAP_SYS_ADMIN
-capability.
-.TP
-.BR TIOCLINUX ", " subcode = TIOCL_PASTESEL
-Paste selection.
-The characters in the selection buffer are
-written to
-.IR fd .
-.IP
-Since Linux 6.7, using this subcode requires the
-.B CAP_SYS_ADMIN
-capability.
-.TP
-.BR TIOCLINUX ", " subcode = TIOCL_UNBLANKSCREEN
-Unblank the screen.
-.TP
-.BR TIOCLINUX ", " subcode = TIOCL_SELLOADLUT
-Sets contents of a 256-bit look up table defining characters in a "word",
-for word-by-word selection.
-(Since Linux 1.1.32.)
-.IP
-Since Linux 6.7, using this subcode requires the
-.B CAP_SYS_ADMIN
-capability.
-.TP
-.BR TIOCLINUX ", " subcode = TIOCL_GETSHIFTSTATE
-.I argp
-points to a char which is set to the value of the kernel
-variable
-.IR shift_state .
-(Since Linux 1.1.32.)
-.TP
-.BR TIOCLINUX ", " subcode = TIOCL_GETMOUSEREPORTING
-.I argp
-points to a char which is set to the value of the kernel
-variable
-.IR report_mouse .
-(Since Linux 1.1.33.)
-.TP
-.BR TIOCLINUX ", " subcode = 8
-Dump screen width and height, cursor position, and all the
-character-attribute pairs.
-(Linux 1.1.67 through Linux 1.1.91 only.
-With Linux 1.1.92 or later, read from
-.I /dev/vcsa*
-instead.)
-.TP
-.BR TIOCLINUX ", " subcode = 9
-Restore screen width and height, cursor position, and all the
-character-attribute pairs.
-(Linux 1.1.67 through Linux 1.1.91 only.
-With Linux 1.1.92 or later, write to
-.I /dev/vcsa*
-instead.)
-.TP
-.BR TIOCLINUX ", " subcode = TIOCL_SETVESABLANK
-Handles the Power Saving
-feature of the new generation of monitors.
-VESA screen blanking mode is set to
-.IR argp[1] ,
-which governs what
-screen blanking does:
-.RS
-.TP
-.B 0
-Screen blanking is disabled.
-.TP
-.B 1
-The current video adapter
-register settings are saved, then the controller is programmed to turn off
-the vertical synchronization pulses.
-This puts the monitor into "standby" mode.
-If your monitor has an Off_Mode timer, then
-it will eventually power down by itself.
-.TP
-.B 2
-The current settings are saved, then both the vertical and horizontal
-synchronization pulses are turned off.
-This puts the monitor into "off" mode.
-If your monitor has no Off_Mode timer,
-or if you want your monitor to power down immediately when the
-blank_timer times out, then you choose this option.
-.RI ( Caution:
-Powering down frequently will damage the monitor.)
-(Since Linux 1.1.76.)
-.RE
-.TP
-.BR TIOCLINUX ", " subcode = TIOCL_SETKMSGREDIRECT
-Change target of kernel messages ("console"):
-by default, and if this is set to
-.BR 0 ,
-messages are written to the currently active VT.
-The VT to write to is a single byte following
-.BR subcode .
-(Since Linux 2.5.36.)
-.TP
-.BR TIOCLINUX ", " subcode = TIOCL_GETFGCONSOLE
-Returns the number of VT currently in foreground.
-(Since Linux 2.5.36.)
-.TP
-.BR TIOCLINUX ", " subcode = TIOCL_SCROLLCONSOLE
-Scroll the foreground VT by the specified amount of
-.I lines
-down,
-or half the screen if
-.BR 0 .
-.I lines
-is *(((int32_t *)&subcode) + 1).
-(Since Linux 2.5.67.)
-.TP
-.BR TIOCLINUX ", " subcode = TIOCL_BLANKSCREEN
-Blank the foreground VT, ignoring "pokes" (typing):
-can only be unblanked explicitly (by switching VTs, to text mode, etc.).
-(Since Linux 2.5.71.)
-.TP
-.BR TIOCLINUX ", " subcode = TIOCL_BLANKEDSCREEN
-Returns the number of VT currently blanked,
-.B 0
-if none.
-(Since Linux 2.5.71.)
-.TP
-.BR TIOCLINUX ", " subcode = 16
-Never used.
-.TP
-.BR TIOCLINUX ", " subcode = TIOCL_GETKMSGREDIRECT
-Returns target of kernel messages.
-(Since Linux 2.6.17.)
-.SH RETURN VALUE
-On success, 0 is returned (except where indicated).
-On failure, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EBADF
-The file descriptor is invalid.
-.TP
-.B EINVAL
-The file descriptor or
-.I argp
-is invalid.
-.TP
-.B ENOTTY
-The file descriptor is not associated with a character special device,
-or the specified operation does not apply to it.
-.TP
-.B EPERM
-Insufficient permission.
-.SH NOTES
-.BR Warning :
-Do not regard this man page as documentation of the Linux console ioctls.
-This is provided for the curious only, as an alternative to reading the
-source.
-Ioctl's are undocumented Linux internals, liable to be changed
-without warning.
-(And indeed, this page more or less describes the
-situation as of kernel version 1.1.94;
-there are many minor and not-so-minor
-differences with earlier versions.)
-.P
-Very often, ioctls are introduced for communication between the
-kernel and one particular well-known program (fdisk, hdparm, setserial,
-tunelp, loadkeys, selection, setfont, etc.), and their behavior will be
-changed when required by this particular program.
-.P
-Programs using these ioctls will not be portable to other versions
-of UNIX, will not work on older versions of Linux, and will not work
-on future versions of Linux.
-.P
-Use POSIX functions.
-.SH SEE ALSO
-.BR dumpkeys (1),
-.BR kbd_mode (1),
-.BR loadkeys (1),
-.BR mknod (1),
-.BR setleds (1),
-.BR setmetamode (1),
-.BR execve (2),
-.BR fcntl (2),
-.BR ioctl_tty (2),
-.BR ioperm (2),
-.BR termios (3),
-.BR console_codes (4),
-.BR mt (4),
-.BR sd (4),
-.BR tty (4),
-.BR ttyS (4),
-.BR vcs (4),
-.BR vcsa (4),
-.BR charsets (7),
-.BR mapscrn (8),
-.BR resizecons (8),
-.BR setfont (8)
-.P
-.IR /usr/include/linux/kd.h ,
-.I /usr/include/linux/vt.h
diff --git a/man2/ioctl_fat.2 b/man2/ioctl_fat.2
deleted file mode 100644
index 294acc6ff..000000000
--- a/man2/ioctl_fat.2
+++ /dev/null
@@ -1,489 +0,0 @@
-.\" Copyright (C) 2014, Heinrich Schuchardt <xypron.glpk@gmx.de>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.TH ioctl_fat 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-ioctl_fat \- manipulating the FAT filesystem
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#include <linux/msdos_fs.h>" " /* Definition of [" V ] FAT_* " and"
-.BR " ATTR_* " constants */"
-.B #include <sys/ioctl.h>
-.P
-.BI "int ioctl(int " fd ", FAT_IOCTL_GET_ATTRIBUTES, uint32_t *" attr );
-.BI "int ioctl(int " fd ", FAT_IOCTL_SET_ATTRIBUTES, uint32_t *" attr );
-.BI "int ioctl(int " fd ", FAT_IOCTL_GET_VOLUME_ID, uint32_t *" id );
-.BI "int ioctl(int " fd ", VFAT_IOCTL_READDIR_BOTH,"
-.BI " struct __fat_dirent " entry [2]);
-.BI "int ioctl(int " fd ", VFAT_IOCTL_READDIR_SHORT,"
-.BI " struct __fat_dirent " entry [2]);
-.fi
-.SH DESCRIPTION
-The
-.BR ioctl (2)
-system call can be used to read and write metadata of FAT filesystems that
-are not accessible using other system calls.
-.SS Reading and setting file attributes
-Files and directories in the FAT filesystem possess an attribute bit mask that
-can be read with
-.B FAT_IOCTL_GET_ATTRIBUTES
-and written with
-.BR FAT_IOCTL_SET_ATTRIBUTES .
-.P
-The
-.I fd
-argument contains a file descriptor for a file or directory.
-It is sufficient to create the file descriptor by calling
-.BR open (2)
-with the
-.B O_RDONLY
-flag.
-.P
-The
-.I attr
-argument contains a pointer to a bit mask.
-The bits of the bit mask are:
-.TP
-.B ATTR_RO
-This bit specifies that the file or directory is read-only.
-.TP
-.B ATTR_HIDDEN
-This bit specifies that the file or directory is hidden.
-.TP
-.B ATTR_SYS
-This bit specifies that the file is a system file.
-.TP
-.B ATTR_VOLUME
-This bit specifies that the file is a volume label.
-This attribute is read-only.
-.TP
-.B ATTR_DIR
-This bit specifies that this is a directory.
-This attribute is read-only.
-.TP
-.B ATTR_ARCH
-This bit indicates that this file or directory should be archived.
-It is set when a file is created or modified.
-It is reset by an archiving system.
-.P
-The zero value
-.B ATTR_NONE
-can be used to indicate that no attribute bit is set.
-.SS Reading the volume ID
-FAT filesystems are identified by a volume ID.
-The volume ID can be read with
-.BR FAT_IOCTL_GET_VOLUME_ID .
-.P
-The
-.I fd
-argument can be a file descriptor for any file or directory of the
-filesystem.
-It is sufficient to create the file descriptor by calling
-.BR open (2)
-with the
-.B O_RDONLY
-flag.
-.P
-The
-.I id
-argument is a pointer to the field that will be filled with the volume ID.
-Typically the volume ID is displayed to the user as a group of two
-16-bit fields:
-.P
-.in +4n
-.EX
-printf("Volume ID %04x\-%04x\en", id >> 16, id & 0xFFFF);
-.EE
-.in
-.SS Reading short filenames of a directory
-A file or directory on a FAT filesystem always has a short filename
-consisting of up to 8 capital letters, optionally followed by a period
-and up to 3 capital letters for the file extension.
-If the actual filename does not fit into this scheme, it is stored
-as a long filename of up to 255 UTF-16 characters.
-.P
-The short filenames in a directory can be read with
-.BR VFAT_IOCTL_READDIR_SHORT .
-.B VFAT_IOCTL_READDIR_BOTH
-reads both the short and the long filenames.
-.P
-The
-.I fd
-argument must be a file descriptor for a directory.
-It is sufficient to create the file descriptor by calling
-.BR open (2)
-with the
-.B O_RDONLY
-flag.
-The file descriptor can be used only once to iterate over the directory
-entries by calling
-.BR ioctl (2)
-repeatedly.
-.P
-The
-.I entry
-argument is a two-element array of the following structures:
-.P
-.in +4n
-.EX
-struct __fat_dirent {
- long d_ino;
- __kernel_off_t d_off;
- uint32_t short d_reclen;
- char d_name[256];
-};
-.EE
-.in
-.P
-The first entry in the array is for the short filename.
-The second entry is for the long filename.
-.P
-The
-.I d_ino
-and
-.I d_off
-fields are filled only for long filenames.
-The
-.I d_ino
-field holds the inode number of the directory.
-The
-.I d_off
-field holds the offset of the file entry in the directory.
-As these values are not available for short filenames, the user code should
-simply ignore them.
-.P
-The field
-.I d_reclen
-contains the length of the filename in the field
-.IR d_name .
-To keep backward compatibility, a length of 0 for the short filename signals
-that the end of the directory has been reached.
-However, the preferred method for detecting the end of the directory
-is to test the
-.BR ioctl (2)
-return value.
-If no long filename exists, field
-.I d_reclen
-is set to 0 and
-.I d_name
-is a character string of length 0 for the long filename.
-.SH RETURN VALUE
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.P
-For
-.B VFAT_IOCTL_READDIR_BOTH
-and
-.B VFAT_IOCTL_READDIR_SHORT
-a return value of 1 signals that a new directory entry has been read and
-a return value of 0 signals that the end of the directory has been reached.
-.SH ERRORS
-.TP
-.B ENOENT
-This error is returned by
-.B VFAT_IOCTL_READDIR_BOTH
-and
-.B VFAT_IOCTL_READDIR_SHORT
-if the file descriptor
-.I fd
-refers to a removed, but still open directory.
-.TP
-.B ENOTDIR
-This error is returned by
-.B VFAT_IOCTL_READDIR_BOTH
-and
-.B VFAT_IOCTL_READDIR_SHORT
-if the file descriptor
-.I fd
-does not refer to a directory.
-.TP
-.B ENOTTY
-The file descriptor
-.I fd
-does not refer to an object in a FAT filesystem.
-.P
-For further error values, see
-.BR ioctl (2).
-.SH STANDARDS
-Linux.
-.SH HISTORY
-.TP
-.B VFAT_IOCTL_READDIR_BOTH
-.TQ
-.B VFAT_IOCTL_READDIR_SHORT
-Linux 2.0.
-.TP
-.B FAT_IOCTL_GET_ATTRIBUTES
-.TQ
-.B FAT_IOCTL_SET_ATTRIBUTES
-.\" just before we got Git history
-Linux 2.6.12.
-.TP
-.B FAT_IOCTL_GET_VOLUME_ID
-Linux 3.11.
-.\" commit 6e5b93ee55d401f1619092fb675b57c28c9ed7ec
-.SH EXAMPLES
-.SS Toggling the archive flag
-The following program demonstrates the usage of
-.BR ioctl (2)
-to manipulate file attributes.
-The program reads and displays the archive attribute of a file.
-After inverting the value of the attribute,
-the program reads and displays the attribute again.
-.P
-The following was recorded when applying the program for the file
-.IR /mnt/user/foo :
-.P
-.in +4n
-.EX
-# ./toggle_fat_archive_flag /mnt/user/foo
-Archive flag is set
-Toggling archive flag
-Archive flag is not set
-.EE
-.in
-.SS Program source (toggle_fat_archive_flag.c)
-\&
-.\" SRC BEGIN (toggle_fat_archive_flag.c)
-.EX
-#include <fcntl.h>
-#include <linux/msdos_fs.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/ioctl.h>
-#include <unistd.h>
-\&
-/*
- * Read file attributes of a file on a FAT filesystem.
- * Output the state of the archive flag.
- */
-static uint32_t
-readattr(int fd)
-{
- int ret;
- uint32_t attr;
-\&
- ret = ioctl(fd, FAT_IOCTL_GET_ATTRIBUTES, &attr);
- if (ret == \-1) {
- perror("ioctl");
- exit(EXIT_FAILURE);
- }
-\&
- if (attr & ATTR_ARCH)
- printf("Archive flag is set\en");
- else
- printf("Archive flag is not set\en");
-\&
- return attr;
-}
-\&
-int
-main(int argc, char *argv[])
-{
- int fd;
- int ret;
- uint32_t attr;
-\&
- if (argc != 2) {
- printf("Usage: %s FILENAME\en", argv[0]);
- exit(EXIT_FAILURE);
- }
-\&
- fd = open(argv[1], O_RDONLY);
- if (fd == \-1) {
- perror("open");
- exit(EXIT_FAILURE);
- }
-\&
- /*
- * Read and display the FAT file attributes.
- */
- attr = readattr(fd);
-\&
- /*
- * Invert archive attribute.
- */
- printf("Toggling archive flag\en");
- attr \[ha]= ATTR_ARCH;
-\&
- /*
- * Write the changed FAT file attributes.
- */
- ret = ioctl(fd, FAT_IOCTL_SET_ATTRIBUTES, &attr);
- if (ret == \-1) {
- perror("ioctl");
- exit(EXIT_FAILURE);
- }
-\&
- /*
- * Read and display the FAT file attributes.
- */
- readattr(fd);
-\&
- close(fd);
-\&
- exit(EXIT_SUCCESS);
-}
-.EE
-.\" SRC END
-.SS Reading the volume ID
-The following program demonstrates the use of
-.BR ioctl (2)
-to display the volume ID of a FAT filesystem.
-.P
-The following output was recorded when applying the program for
-directory
-.IR /mnt/user :
-.P
-.in +4n
-.EX
-$ ./display_fat_volume_id /mnt/user
-Volume ID 6443\-6241
-.EE
-.in
-.SS Program source (display_fat_volume_id.c)
-\&
-.\" SRC BEGIN (display_fat_volume_id.c)
-.EX
-#include <fcntl.h>
-#include <linux/msdos_fs.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/ioctl.h>
-#include <unistd.h>
-\&
-int
-main(int argc, char *argv[])
-{
- int fd;
- int ret;
- uint32_t id;
-\&
- if (argc != 2) {
- printf("Usage: %s FILENAME\en", argv[0]);
- exit(EXIT_FAILURE);
- }
-\&
- fd = open(argv[1], O_RDONLY);
- if (fd == \-1) {
- perror("open");
- exit(EXIT_FAILURE);
- }
-\&
- /*
- * Read volume ID.
- */
- ret = ioctl(fd, FAT_IOCTL_GET_VOLUME_ID, &id);
- if (ret == \-1) {
- perror("ioctl");
- exit(EXIT_FAILURE);
- }
-\&
- /*
- * Format the output as two groups of 16 bits each.
- */
- printf("Volume ID %04x\-%04x\en", id >> 16, id & 0xFFFF);
-\&
- close(fd);
-\&
- exit(EXIT_SUCCESS);
-}
-.EE
-.\" SRC END
-.SS Listing a directory
-The following program demonstrates the use of
-.BR ioctl (2)
-to list a directory.
-.P
-The following was recorded when applying the program to the directory
-.IR /mnt/user :
-.P
-.in +4n
-.EX
-$ \fB./fat_dir /mnt/user\fP
-\&. \-> \[aq]\[aq]
-\&.. \-> \[aq]\[aq]
-ALONGF\[ti]1.TXT \-> \[aq]a long filename.txt\[aq]
-UPPER.TXT \-> \[aq]\[aq]
-LOWER.TXT \-> \[aq]lower.txt\[aq]
-.EE
-.in
-.\"
-.SS Program source
-.in +4n
-.\" SRC BEGIN (ioctl_fat.c)
-.EX
-#include <fcntl.h>
-#include <linux/msdos_fs.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/ioctl.h>
-#include <unistd.h>
-\&
-int
-main(int argc, char *argv[])
-{
- int fd;
- int ret;
- struct __fat_dirent entry[2];
-\&
- if (argc != 2) {
- printf("Usage: %s DIRECTORY\en", argv[0]);
- exit(EXIT_FAILURE);
- }
-\&
- /*
- * Open file descriptor for the directory.
- */
- fd = open(argv[1], O_RDONLY | O_DIRECTORY);
- if (fd == \-1) {
- perror("open");
- exit(EXIT_FAILURE);
- }
-\&
- for (;;) {
-\&
- /*
- * Read next directory entry.
- */
- ret = ioctl(fd, VFAT_IOCTL_READDIR_BOTH, entry);
-\&
- /*
- * If an error occurs, the return value is \-1.
- * If the end of the directory list has been reached,
- * the return value is 0.
- * For backward compatibility the end of the directory
- * list is also signaled by d_reclen == 0.
- */
- if (ret < 1)
- break;
-\&
- /*
- * Write both the short name and the long name.
- */
- printf("%s \-> \[aq]%s\[aq]\en", entry[0].d_name, entry[1].d_name);
- }
-\&
- if (ret == \-1) {
- perror("VFAT_IOCTL_READDIR_BOTH");
- exit(EXIT_FAILURE);
- }
-\&
- /*
- * Close the file descriptor.
- */
- close(fd);
-\&
- exit(EXIT_SUCCESS);
-}
-.EE
-.\" SRC END
-.in
-.SH SEE ALSO
-.BR ioctl (2)
diff --git a/man2/ioctl_ficlone.2 b/man2/ioctl_ficlone.2
deleted file mode 100644
index 19bb3482d..000000000
--- a/man2/ioctl_ficlone.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/ioctl_ficlonerange.2
diff --git a/man2/ioctl_ficlonerange.2 b/man2/ioctl_ficlonerange.2
deleted file mode 100644
index 63a430dd4..000000000
--- a/man2/ioctl_ficlonerange.2
+++ /dev/null
@@ -1,129 +0,0 @@
-.\" Copyright (c) 2016, Oracle. All rights reserved.
-.\"
-.\" SPDX-License-Identifier: GPL-2.0-or-later
-.TH ioctl_ficlonerange 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-ioctl_ficlonerange, ioctl_ficlone \-
-share some the data of one file with another file
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#include <linux/fs.h>" " /* Definition of " FICLONE* " constants */"
-.B #include <sys/ioctl.h>
-.P
-.BI "int ioctl(int " dest_fd ", FICLONERANGE, struct file_clone_range *" arg );
-.BI "int ioctl(int " dest_fd ", FICLONE, int " src_fd );
-.fi
-.SH DESCRIPTION
-If a filesystem supports files sharing physical storage between multiple
-files ("reflink"), this
-.BR ioctl (2)
-operation can be used to make some of the data in the
-.I src_fd
-file appear in the
-.I dest_fd
-file by sharing the underlying storage, which is faster than making a separate
-physical copy of the data.
-Both files must reside within the same filesystem.
-If a file write should occur to a shared region,
-the filesystem must ensure that the changes remain private to the file being
-written.
-This behavior is commonly referred to as "copy on write".
-.P
-This ioctl reflinks up to
-.I src_length
-bytes from file descriptor
-.I src_fd
-at offset
-.I src_offset
-into the file
-.I dest_fd
-at offset
-.IR dest_offset ,
-provided that both are files.
-If
-.I src_length
-is zero, the ioctl reflinks to the end of the source file.
-This information is conveyed in a structure of
-the following form:
-.P
-.in +4n
-.EX
-struct file_clone_range {
- __s64 src_fd;
- __u64 src_offset;
- __u64 src_length;
- __u64 dest_offset;
-};
-.EE
-.in
-.P
-Clones are atomic with regards to concurrent writes, so no locks need to be
-taken to obtain a consistent cloned copy.
-.P
-The
-.B FICLONE
-ioctl clones entire files.
-.SH RETURN VALUE
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-Error codes can be one of, but are not limited to, the following:
-.TP
-.B EBADF
-.I src_fd
-is not open for reading;
-.I dest_fd
-is not open for writing or is open for append-only writes;
-or the filesystem which
-.I src_fd
-resides on does not support reflink.
-.TP
-.B EINVAL
-The filesystem does not support reflinking the ranges of the given files.
-This error can also appear if either file descriptor represents
-a device, FIFO, or socket.
-Disk filesystems generally require the offset and length arguments
-to be aligned to the fundamental block size.
-XFS and Btrfs do not support
-overlapping reflink ranges in the same file.
-.TP
-.B EISDIR
-One of the files is a directory and the filesystem does not support shared
-regions in directories.
-.TP
-.B EOPNOTSUPP
-This can appear if the filesystem does not support reflinking either file
-descriptor, or if either file descriptor refers to special inodes.
-.TP
-.B EPERM
-.I dest_fd
-is immutable.
-.TP
-.B ETXTBSY
-One of the files is a swap file.
-Swap files cannot share storage.
-.TP
-.B EXDEV
-.IR dest_fd " and " src_fd
-are not on the same mounted filesystem.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 4.5.
-.P
-They were previously known as
-.B BTRFS_IOC_CLONE
-and
-.BR BTRFS_IOC_CLONE_RANGE ,
-and were private to Btrfs.
-.SH NOTES
-Because a copy-on-write operation requires the allocation of new storage, the
-.BR fallocate (2)
-operation may unshare shared blocks to guarantee that subsequent writes will
-not fail because of lack of disk space.
-.SH SEE ALSO
-.BR ioctl (2)
diff --git a/man2/ioctl_fideduperange.2 b/man2/ioctl_fideduperange.2
deleted file mode 100644
index 1942c8fd0..000000000
--- a/man2/ioctl_fideduperange.2
+++ /dev/null
@@ -1,200 +0,0 @@
-.\" Copyright (c) 2016, Oracle. All rights reserved.
-.\"
-.\" SPDX-License-Identifier: GPL-2.0-or-later
-.TH ioctl_fideduperange 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-ioctl_fideduperange \- share some the data of one file with another file
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#include <linux/fs.h>" " /* Definition of " FIDEDUPERANGE " and"
-.BR " FILE_DEDUPE_* " constants */
-.B #include <sys/ioctl.h>
-.P
-.BI "int ioctl(int " src_fd ", FIDEDUPERANGE, struct file_dedupe_range *" arg );
-.fi
-.SH DESCRIPTION
-If a filesystem supports files sharing physical storage between multiple
-files, this
-.BR ioctl (2)
-operation can be used to make some of the data in the
-.B src_fd
-file appear in the
-.B dest_fd
-file by sharing the underlying storage if the file data is identical
-("deduplication").
-Both files must reside within the same filesystem.
-This reduces storage consumption by allowing the filesystem
-to store one shared copy of the data.
-If a file write should occur to a shared
-region, the filesystem must ensure that the changes remain private to the file
-being written.
-This behavior is commonly referred to as "copy on write".
-.P
-This ioctl performs the "compare and share if identical" operation on up to
-.I src_length
-bytes from file descriptor
-.I src_fd
-at offset
-.IR src_offset .
-This information is conveyed in a structure of the following form:
-.P
-.in +4n
-.EX
-struct file_dedupe_range {
- __u64 src_offset;
- __u64 src_length;
- __u16 dest_count;
- __u16 reserved1;
- __u32 reserved2;
- struct file_dedupe_range_info info[0];
-};
-.EE
-.in
-.P
-Deduplication is atomic with regards to concurrent writes, so no locks need to
-be taken to obtain a consistent deduplicated copy.
-.P
-The fields
-.IR reserved1 " and " reserved2
-must be zero.
-.P
-Destinations for the deduplication operation are conveyed in the array at the
-end of the structure.
-The number of destinations is given in
-.IR dest_count ,
-and the destination information is conveyed in the following form:
-.P
-.in +4n
-.EX
-struct file_dedupe_range_info {
- __s64 dest_fd;
- __u64 dest_offset;
- __u64 bytes_deduped;
- __s32 status;
- __u32 reserved;
-};
-.EE
-.in
-.P
-Each deduplication operation targets
-.I src_length
-bytes in file descriptor
-.I dest_fd
-at offset
-.IR dest_offset .
-The field
-.I reserved
-must be zero.
-During the call,
-.I src_fd
-must be open for reading and
-.I dest_fd
-must be open for writing.
-The combined size of the struct
-.I file_dedupe_range
-and the struct
-.I file_dedupe_range_info
-array must not exceed the system page size.
-The maximum size of
-.I src_length
-is filesystem dependent and is typically 16\~MiB.
-This limit will be enforced silently by the filesystem.
-By convention, the storage used by
-.I src_fd
-is mapped into
-.I dest_fd
-and the previous contents in
-.I dest_fd
-are freed.
-.P
-Upon successful completion of this ioctl, the number of bytes successfully
-deduplicated is returned in
-.I bytes_deduped
-and a status code for the deduplication operation is returned in
-.IR status .
-If even a single byte in the range does not match, the deduplication
-operation request will be ignored and
-.I status
-set to
-.BR FILE_DEDUPE_RANGE_DIFFERS .
-The
-.I status
-code is set to
-.B FILE_DEDUPE_RANGE_SAME
-for success, a negative error code in case of error, or
-.B FILE_DEDUPE_RANGE_DIFFERS
-if the data did not match.
-.SH RETURN VALUE
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-Possible errors include (but are not limited to) the following:
-.TP
-.B EBADF
-.I src_fd
-is not open for reading;
-.I dest_fd
-is not open for writing or is open for append-only writes; or the filesystem
-which
-.I src_fd
-resides on does not support deduplication.
-.TP
-.B EINVAL
-The filesystem does not support deduplicating the ranges of the given files.
-This error can also appear if either file descriptor represents
-a device, FIFO, or socket.
-Disk filesystems generally require the offset and length arguments
-to be aligned to the fundamental block size.
-Neither Btrfs nor XFS support
-overlapping deduplication ranges in the same file.
-.TP
-.B EISDIR
-One of the files is a directory and the filesystem does not support shared
-regions in directories.
-.TP
-.B ENOMEM
-The kernel was unable to allocate sufficient memory to perform the
-operation or
-.I dest_count
-is so large that the input argument description spans more than a single
-page of memory.
-.TP
-.B EOPNOTSUPP
-This can appear if the filesystem does not support deduplicating either file
-descriptor, or if either file descriptor refers to special inodes.
-.TP
-.B EPERM
-.I dest_fd
-is immutable.
-.TP
-.B ETXTBSY
-One of the files is a swap file.
-Swap files cannot share storage.
-.TP
-.B EXDEV
-.I dest_fd
-and
-.I src_fd
-are not on the same mounted filesystem.
-.SH VERSIONS
-Some filesystems may limit the amount of data that can be deduplicated in a
-single call.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 4.5.
-.P
-It was previously known as
-.B BTRFS_IOC_FILE_EXTENT_SAME
-and was private to Btrfs.
-.SH NOTES
-Because a copy-on-write operation requires the allocation of new storage, the
-.BR fallocate (2)
-operation may unshare shared blocks to guarantee that subsequent writes will
-not fail because of lack of disk space.
-.SH SEE ALSO
-.BR ioctl (2)
diff --git a/man2/ioctl_fslabel.2 b/man2/ioctl_fslabel.2
deleted file mode 100644
index c4a211c4b..000000000
--- a/man2/ioctl_fslabel.2
+++ /dev/null
@@ -1,72 +0,0 @@
-.\" Copyright (c) 2018, Red Hat, Inc. All rights reserved.
-.\"
-.\" SPDX-License-Identifier: GPL-2.0-or-later
-.TH ioctl_fslabel 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-ioctl_fslabel \- get or set a filesystem label
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#include <linux/fs.h>" " /* Definition of " *FSLABEL* " constants */"
-.B #include <sys/ioctl.h>
-.P
-.BI "int ioctl(int " fd ", FS_IOC_GETFSLABEL, char " label [FSLABEL_MAX]);
-.BI "int ioctl(int " fd ", FS_IOC_SETFSLABEL, char " label [FSLABEL_MAX]);
-.fi
-.SH DESCRIPTION
-If a filesystem supports online label manipulation, these
-.BR ioctl (2)
-operations can be used to get or set the filesystem label for the filesystem
-on which
-.I fd
-resides.
-The
-.B FS_IOC_SETFSLABEL
-operation requires privilege
-.RB ( CAP_SYS_ADMIN ).
-.SH RETURN VALUE
-On success zero is returned.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-Possible errors include (but are not limited to) the following:
-.TP
-.B EFAULT
-.I label
-references an inaccessible memory area.
-.TP
-.B EINVAL
-The specified label exceeds the maximum label length for the filesystem.
-.TP
-.B ENOTTY
-This can appear if the filesystem does not support online label manipulation.
-.TP
-.B EPERM
-The calling process does not have sufficient permissions to set the label.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 4.18.
-.P
-They were previously known as
-.B BTRFS_IOC_GET_FSLABEL
-and
-.B BTRFS_IOC_SET_FSLABEL
-and were private to Btrfs.
-.SH NOTES
-The maximum string length for this interface is
-.BR FSLABEL_MAX ,
-including the terminating null byte (\[aq]\\0\[aq]).
-Filesystems have differing maximum label lengths, which may or
-may not include the terminating null.
-The string provided to
-.B FS_IOC_SETFSLABEL
-must always be null-terminated, and the string returned by
-.B FS_IOC_GETFSLABEL
-will always be null-terminated.
-.SH SEE ALSO
-.BR ioctl (2),
-.BR blkid (8)
diff --git a/man2/ioctl_getfsmap.2 b/man2/ioctl_getfsmap.2
deleted file mode 100644
index fa9d91894..000000000
--- a/man2/ioctl_getfsmap.2
+++ /dev/null
@@ -1,351 +0,0 @@
-.\" Copyright (c) 2017, Oracle. All rights reserved.
-.\"
-.\" SPDX-License-Identifier: GPL-2.0-or-later
-.TH ioctl_getfsmap 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-ioctl_getfsmap \- retrieve the physical layout of the filesystem
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#include <linux/fsmap.h> " "/* Definition of " FS_IOC_GETFSMAP ,
-.BR " FM?_OF_*" ", and " *FMR_OWN_* " constants */"
-.B #include <sys/ioctl.h>
-.P
-.BI "int ioctl(int " fd ", FS_IOC_GETFSMAP, struct fsmap_head * " arg );
-.fi
-.SH DESCRIPTION
-This
-.BR ioctl (2)
-operation retrieves physical extent mappings for a filesystem.
-This information can be used to discover which files are mapped to a physical
-block, examine free space, or find known bad blocks, among other things.
-.P
-The sole argument to this operation should be a pointer to a single
-.IR "struct fsmap_head" ":"
-.P
-.in +4n
-.EX
-struct fsmap {
- __u32 fmr_device; /* Device ID */
- __u32 fmr_flags; /* Mapping flags */
- __u64 fmr_physical; /* Device offset of segment */
- __u64 fmr_owner; /* Owner ID */
- __u64 fmr_offset; /* File offset of segment */
- __u64 fmr_length; /* Length of segment */
- __u64 fmr_reserved[3]; /* Must be zero */
-};
-\&
-struct fsmap_head {
- __u32 fmh_iflags; /* Control flags */
- __u32 fmh_oflags; /* Output flags */
- __u32 fmh_count; /* # of entries in array incl. input */
- __u32 fmh_entries; /* # of entries filled in (output) */
- __u64 fmh_reserved[6]; /* Must be zero */
-\&
- struct fsmap fmh_keys[2]; /* Low and high keys for
- the mapping search */
- struct fsmap fmh_recs[]; /* Returned records */
-};
-.EE
-.in
-.P
-The two
-.I fmh_keys
-array elements specify the lowest and highest reverse-mapping
-key for which the application would like physical mapping
-information.
-A reverse mapping key consists of the tuple (device, block, owner, offset).
-The owner and offset fields are part of the key because some filesystems
-support sharing physical blocks between multiple files and
-therefore may return multiple mappings for a given physical block.
-.P
-Filesystem mappings are copied into the
-.I fmh_recs
-array, which immediately follows the header data.
-.\"
-.SS Fields of struct fsmap_head
-The
-.I fmh_iflags
-field is a bit mask passed to the kernel to alter the output.
-No flags are currently defined, so the caller must set this value to zero.
-.P
-The
-.I fmh_oflags
-field is a bit mask of flags set by the kernel concerning the returned mappings.
-If
-.B FMH_OF_DEV_T
-is set, then the
-.I fmr_device
-field represents a
-.I dev_t
-structure containing the major and minor numbers of the block device.
-.P
-The
-.I fmh_count
-field contains the number of elements in the array being passed to the
-kernel.
-If this value is 0,
-.I fmh_entries
-will be set to the number of records that would have been returned had
-the array been large enough;
-no mapping information will be returned.
-.P
-The
-.I fmh_entries
-field contains the number of elements in the
-.I fmh_recs
-array that contain useful information.
-.P
-The
-.I fmh_reserved
-fields must be set to zero.
-.\"
-.SS Keys
-The two key records in
-.I fsmap_head.fmh_keys
-specify the lowest and highest extent records in the keyspace that the caller
-wants returned.
-A filesystem that can share blocks between files likely requires the tuple
-.RI "(" "device" ", " "physical" ", " "owner" ", " "offset" ", " "flags" ")"
-to uniquely index any filesystem mapping record.
-Classic non-sharing filesystems might be able to identify any record with only
-.RI "(" "device" ", " "physical" ", " "flags" ")."
-For example, if the low key is set to (8:0, 36864, 0, 0, 0), the filesystem will
-only return records for extents starting at or above 36\ KiB on disk.
-If the high key is set to (8:0, 1048576, 0, 0, 0),
-only records below 1\ MiB will be returned.
-The format of
-.I fmr_device
-in the keys must match the format of the same field in the output records,
-as defined below.
-By convention, the field
-.I fsmap_head.fmh_keys[0]
-must contain the low key and
-.I fsmap_head.fmh_keys[1]
-must contain the high key for the operation.
-.P
-For convenience, if
-.I fmr_length
-is set in the low key, it will be added to
-.IR fmr_block " or " fmr_offset
-as appropriate.
-The caller can take advantage of this subtlety to set up subsequent calls
-by copying
-.I fsmap_head.fmh_recs[fsmap_head.fmh_entries \- 1]
-into the low key.
-The function
-.I fsmap_advance
-(defined in
-.IR linux/fsmap.h )
-provides this functionality.
-.\"
-.SS Fields of struct fsmap
-The
-.I fmr_device
-field uniquely identifies the underlying storage device.
-If the
-.B FMH_OF_DEV_T
-flag is set in the header's
-.I fmh_oflags
-field, this field contains a
-.I dev_t
-from which major and minor numbers can be extracted.
-If the flag is not set, this field contains a value that must be unique
-for each unique storage device.
-.P
-The
-.I fmr_physical
-field contains the disk address of the extent in bytes.
-.P
-The
-.I fmr_owner
-field contains the owner of the extent.
-This is an inode number unless
-.B FMR_OF_SPECIAL_OWNER
-is set in the
-.I fmr_flags
-field, in which case the value is determined by the filesystem.
-See the section below about owner values for more details.
-.P
-The
-.I fmr_offset
-field contains the logical address in the mapping record in bytes.
-This field has no meaning if the
-.BR FMR_OF_SPECIAL_OWNER " or " FMR_OF_EXTENT_MAP
-flags are set in
-.IR fmr_flags "."
-.P
-The
-.I fmr_length
-field contains the length of the extent in bytes.
-.P
-The
-.I fmr_flags
-field is a bit mask of extent state flags.
-The bits are:
-.RS 0.4i
-.TP
-.B FMR_OF_PREALLOC
-The extent is allocated but not yet written.
-.TP
-.B FMR_OF_ATTR_FORK
-This extent contains extended attribute data.
-.TP
-.B FMR_OF_EXTENT_MAP
-This extent contains extent map information for the owner.
-.TP
-.B FMR_OF_SHARED
-Parts of this extent may be shared.
-.TP
-.B FMR_OF_SPECIAL_OWNER
-The
-.I fmr_owner
-field contains a special value instead of an inode number.
-.TP
-.B FMR_OF_LAST
-This is the last record in the data set.
-.RE
-.P
-The
-.I fmr_reserved
-field will be set to zero.
-.\"
-.SS Owner values
-Generally, the value of the
-.I fmr_owner
-field for non-metadata extents should be an inode number.
-However, filesystems are under no obligation to report inode numbers;
-they may instead report
-.B FMR_OWN_UNKNOWN
-if the inode number cannot easily be retrieved, if the caller lacks
-sufficient privilege, if the filesystem does not support stable
-inode numbers, or for any other reason.
-If a filesystem wishes to condition the reporting of inode numbers based
-on process capabilities, it is strongly urged that the
-.B CAP_SYS_ADMIN
-capability be used for this purpose.
-.TP
-The following special owner values are generic to all filesystems:
-.RS 0.4i
-.TP
-.B FMR_OWN_FREE
-Free space.
-.TP
-.B FMR_OWN_UNKNOWN
-This extent is in use but its owner is not known or not easily retrieved.
-.TP
-.B FMR_OWN_METADATA
-This extent is filesystem metadata.
-.RE
-.P
-XFS can return the following special owner values:
-.RS 0.4i
-.TP
-.B XFS_FMR_OWN_FREE
-Free space.
-.TP
-.B XFS_FMR_OWN_UNKNOWN
-This extent is in use but its owner is not known or not easily retrieved.
-.TP
-.B XFS_FMR_OWN_FS
-Static filesystem metadata which exists at a fixed address.
-These are the AG superblock, the AGF, the AGFL, and the AGI headers.
-.TP
-.B XFS_FMR_OWN_LOG
-The filesystem journal.
-.TP
-.B XFS_FMR_OWN_AG
-Allocation group metadata, such as the free space btrees and the
-reverse mapping btrees.
-.TP
-.B XFS_FMR_OWN_INOBT
-The inode and free inode btrees.
-.TP
-.B XFS_FMR_OWN_INODES
-Inode records.
-.TP
-.B XFS_FMR_OWN_REFC
-Reference count information.
-.TP
-.B XFS_FMR_OWN_COW
-This extent is being used to stage a copy-on-write.
-.TP
-.B XFS_FMR_OWN_DEFECTIVE:
-This extent has been marked defective either by the filesystem or the
-underlying device.
-.RE
-.P
-ext4 can return the following special owner values:
-.RS 0.4i
-.TP
-.B EXT4_FMR_OWN_FREE
-Free space.
-.TP
-.B EXT4_FMR_OWN_UNKNOWN
-This extent is in use but its owner is not known or not easily retrieved.
-.TP
-.B EXT4_FMR_OWN_FS
-Static filesystem metadata which exists at a fixed address.
-This is the superblock and the group descriptors.
-.TP
-.B EXT4_FMR_OWN_LOG
-The filesystem journal.
-.TP
-.B EXT4_FMR_OWN_INODES
-Inode records.
-.TP
-.B EXT4_FMR_OWN_BLKBM
-Block bit map.
-.TP
-.B EXT4_FMR_OWN_INOBM
-Inode bit map.
-.RE
-.SH RETURN VALUE
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-The error placed in
-.I errno
-can be one of, but is not limited to, the following:
-.TP
-.B EBADF
-.I fd
-is not open for reading.
-.TP
-.B EBADMSG
-The filesystem has detected a checksum error in the metadata.
-.TP
-.B EFAULT
-The pointer passed in was not mapped to a valid memory address.
-.TP
-.B EINVAL
-The array is not long enough, the keys do not point to a valid part of
-the filesystem, the low key points to a higher point in the filesystem's
-physical storage address space than the high key, or a nonzero value
-was passed in one of the fields that must be zero.
-.TP
-.B ENOMEM
-Insufficient memory to process the operation.
-.TP
-.B EOPNOTSUPP
-The filesystem does not support this operation.
-.TP
-.B EUCLEAN
-The filesystem metadata is corrupt and needs repair.
-.SH STANDARDS
-Linux.
-.P
-Not all filesystems support it.
-.SH HISTORY
-Linux 4.12.
-.SH EXAMPLES
-See
-.I io/fsmap.c
-in the
-.I xfsprogs
-distribution for a sample program.
-.SH SEE ALSO
-.BR ioctl (2)
diff --git a/man2/ioctl_iflags.2 b/man2/ioctl_iflags.2
deleted file mode 100644
index 0f4c533bc..000000000
--- a/man2/ioctl_iflags.2
+++ /dev/null
@@ -1,202 +0,0 @@
-.\" Copyright (c) 2017 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\"
-.TH ioctl_iflags 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-ioctl_iflags \- ioctl() operations for inode flags
-.SH DESCRIPTION
-Various Linux filesystems support the notion of
-.IR "inode flags" \[em]attributes
-that modify the semantics of files and directories.
-These flags can be retrieved and modified using two
-.BR ioctl (2)
-operations:
-.P
-.in +4n
-.EX
-int attr;
-fd = open("pathname", ...);
-\&
-ioctl(fd, FS_IOC_GETFLAGS, &attr); /* Place current flags
- in \[aq]attr\[aq] */
-attr |= FS_NOATIME_FL; /* Tweak returned bit mask */
-ioctl(fd, FS_IOC_SETFLAGS, &attr); /* Update flags for inode
- referred to by \[aq]fd\[aq] */
-.EE
-.in
-.P
-The
-.BR lsattr (1)
-and
-.BR chattr (1)
-shell commands provide interfaces to these two operations,
-allowing a user to view and modify the inode flags associated with a file.
-.P
-The following flags are supported
-(shown along with the corresponding letter used to indicate the flag by
-.BR lsattr (1)
-and
-.BR chattr (1)):
-.TP
-.BR FS_APPEND_FL " \[aq]a\[aq]"
-The file can be opened only with the
-.B O_APPEND
-flag.
-(This restriction applies even to the superuser.)
-Only a privileged process
-.RB ( CAP_LINUX_IMMUTABLE )
-can set or clear this attribute.
-.TP
-.BR FS_COMPR_FL " \[aq]c\[aq]"
-Store the file in a compressed format on disk.
-This flag is
-.I not
-supported by most of the mainstream filesystem implementations;
-one exception is
-.BR btrfs (5).
-.TP
-.BR FS_DIRSYNC_FL " \[aq]D\[aq] (since Linux 2.6.0)"
-Write directory changes synchronously to disk.
-This flag provides semantics equivalent to the
-.BR mount (2)
-.B MS_DIRSYNC
-option, but on a per-directory basis.
-This flag can be applied only to directories.
-.\" .TP
-.\" .BR FS_EXTENT_FL " \[aq]e\[aq]"
-.\" FIXME Some support on ext4? (EXT4_EXTENTS_FL)
-.TP
-.BR FS_IMMUTABLE_FL " \[aq]i\[aq]"
-The file is immutable:
-no changes are permitted to the file contents or metadata
-(permissions, timestamps, ownership, link count, and so on).
-(This restriction applies even to the superuser.)
-Only a privileged process
-.RB ( CAP_LINUX_IMMUTABLE )
-can set or clear this attribute.
-.TP
-.BR FS_JOURNAL_DATA_FL " \[aq]j\[aq]"
-Enable journaling of file data on
-.BR ext3 (5)
-and
-.BR ext4 (5)
-filesystems.
-On a filesystem that is journaling in
-.I ordered
-or
-.I writeback
-mode, a privileged
-.RB ( CAP_SYS_RESOURCE )
-process can set this flag to enable journaling of data updates on
-a per-file basis.
-.TP
-.BR FS_NOATIME_FL " \[aq]A\[aq]"
-Don't update the file last access time when the file is accessed.
-This can provide I/O performance benefits for applications that do not care
-about the accuracy of this timestamp.
-This flag provides functionality similar to the
-.BR mount (2)
-.B MS_NOATIME
-flag, but on a per-file basis.
-.\" .TP
-.\" .BR FS_NOCOMP_FL " \[aq]\[aq]"
-.\" FIXME Support for FS_NOCOMP_FL on Btrfs?
-.TP
-.BR FS_NOCOW_FL " \[aq]C\[aq] (since Linux 2.6.39)"
-The file will not be subject to copy-on-write updates.
-This flag has an effect only on filesystems that support copy-on-write
-semantics, such as Btrfs.
-See
-.BR chattr (1)
-and
-.BR btrfs (5).
-.TP
-.BR FS_NODUMP_FL " \[aq]d\[aq]"
-Don't include this file in backups made using
-.BR dump (8).
-.TP
-.BR FS_NOTAIL_FL " \[aq]t\[aq]"
-This flag is supported only on Reiserfs.
-It disables the Reiserfs tail-packing feature,
-which tries to pack small files (and the final fragment of larger files)
-into the same disk block as the file metadata.
-.TP
-.BR FS_PROJINHERIT_FL " \[aq]P\[aq] (since Linux 4.5)"
-.\" commit 040cb3786d9b25293b8b0b05b90da0f871e1eb9b
-.\" Flag name was added in Linux 4.4
-.\" FIXME Not currently supported because not in FS_FL_USER_MODIFIABLE?
-Inherit the quota project ID.
-Files and subdirectories will inherit the project ID of the directory.
-This flag can be applied only to directories.
-.TP
-.BR FS_SECRM_FL " \[aq]s\[aq]"
-Mark the file for secure deletion.
-This feature is not implemented by any filesystem,
-since the task of securely erasing a file from a recording medium
-is surprisingly difficult.
-.TP
-.BR FS_SYNC_FL " \[aq]S\[aq]"
-Make file updates synchronous.
-For files, this makes all writes synchronous
-(as though all opens of the file were with the
-.B O_SYNC
-flag).
-For directories, this has the same effect as the
-.B FS_DIRSYNC_FL
-flag.
-.TP
-.BR FS_TOPDIR_FL " \[aq]T\[aq]"
-Mark a directory for special treatment under the Orlov block-allocation
-strategy.
-See
-.BR chattr (1)
-for details.
-This flag can be applied only to directories and
-has an effect only for ext2, ext3, and ext4.
-.TP
-.BR FS_UNRM_FL " \[aq]u\[aq]"
-Allow the file to be undeleted if it is deleted.
-This feature is not implemented by any filesystem,
-since it is possible to implement file-recovery mechanisms outside the kernel.
-.P
-In most cases,
-when any of the above flags is set on a directory,
-the flag is inherited by files and subdirectories
-created inside that directory.
-Exceptions include
-.BR FS_TOPDIR_FL ,
-which is not inheritable, and
-.BR FS_DIRSYNC_FL ,
-which is inherited only by subdirectories.
-.SH STANDARDS
-Linux.
-.SH NOTES
-In order to change the inode flags of a file using the
-.B FS_IOC_SETFLAGS
-operation,
-the effective user ID of the caller must match the owner of the file,
-or the caller must have the
-.B CAP_FOWNER
-capability.
-.P
-The type of the argument given to the
-.B FS_IOC_GETFLAGS
-and
-.B FS_IOC_SETFLAGS
-operations is
-.IR int\~* ,
-notwithstanding the implication in the kernel source file
-.I include/uapi/linux/fs.h
-that the argument is
-.IR long\~* .
-.SH SEE ALSO
-.BR chattr (1),
-.BR lsattr (1),
-.BR mount (2),
-.BR btrfs (5),
-.BR ext4 (5),
-.BR xfs (5),
-.BR xattr (7),
-.BR mount (8)
diff --git a/man2/ioctl_ns.2 b/man2/ioctl_ns.2
deleted file mode 100644
index 958a686b0..000000000
--- a/man2/ioctl_ns.2
+++ /dev/null
@@ -1,342 +0,0 @@
-.\" Copyright (c) 2017 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\"
-.TH ioctl_ns 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-ioctl_ns \- ioctl() operations for Linux namespaces
-.SH DESCRIPTION
-.\" ============================================================
-.\"
-.SS Discovering namespace relationships
-The following
-.BR ioctl (2)
-operations are provided to allow discovery of namespace relationships (see
-.BR user_namespaces (7)
-and
-.BR pid_namespaces (7)).
-The form of the calls is:
-.P
-.in +4n
-.EX
-new_fd = ioctl(fd, op);
-.EE
-.in
-.P
-In each case,
-.I fd
-refers to a
-.IR /proc/ pid /ns/*
-file.
-Both operations return a new file descriptor on success.
-.TP
-.BR NS_GET_USERNS " (since Linux 4.9)"
-.\" commit bcac25a58bfc6bd79191ac5d7afb49bea96da8c9
-.\" commit 6786741dbf99e44fb0c0ed85a37582b8a26f1c3b
-Returns a file descriptor that refers to the owning user namespace
-for the namespace referred to by
-.IR fd .
-.TP
-.BR NS_GET_PARENT " (since Linux 4.9)"
-.\" commit a7306ed8d94af729ecef8b6e37506a1c6fc14788
-Returns a file descriptor that refers to the parent namespace of
-the namespace referred to by
-.IR fd .
-This operation is valid only for hierarchical namespaces
-(i.e., PID and user namespaces).
-For user namespaces,
-.B NS_GET_PARENT
-is synonymous with
-.BR NS_GET_USERNS .
-.P
-The new file descriptor returned by these operations is opened with the
-.B O_RDONLY
-and
-.B O_CLOEXEC
-(close-on-exec; see
-.BR fcntl (2))
-flags.
-.P
-By applying
-.BR fstat (2)
-to the returned file descriptor, one obtains a
-.I stat
-structure whose
-.I st_dev
-(resident device) and
-.I st_ino
-(inode number) fields together identify the owning/parent namespace.
-This inode number can be matched with the inode number of another
-.IR /proc/ pid /ns/ { pid , user }
-file to determine whether that is the owning/parent namespace.
-.P
-Either of these
-.BR ioctl (2)
-operations can fail with the following errors:
-.TP
-.B EPERM
-The requested namespace is outside of the caller's namespace scope.
-This error can occur if, for example, the owning user namespace is an
-ancestor of the caller's current user namespace.
-It can also occur on attempts to obtain the parent of the initial
-user or PID namespace.
-.TP
-.B ENOTTY
-The operation is not supported by this kernel version.
-.P
-Additionally, the
-.B NS_GET_PARENT
-operation can fail with the following error:
-.TP
-.B EINVAL
-.I fd
-refers to a nonhierarchical namespace.
-.P
-See the EXAMPLES section for an example of the use of these operations.
-.\" ============================================================
-.\"
-.SS Discovering the namespace type
-The
-.B NS_GET_NSTYPE
-.\" commit e5ff5ce6e20ee22511398bb31fb912466cf82a36
-operation (available since Linux 4.11) can be used to discover
-the type of namespace referred to by the file descriptor
-.IR fd :
-.P
-.in +4n
-.EX
-nstype = ioctl(fd, NS_GET_NSTYPE);
-.EE
-.in
-.P
-.I fd
-refers to a
-.IR /proc/ pid /ns/*
-file.
-.P
-The return value is one of the
-.B CLONE_NEW*
-values that can be specified to
-.BR clone (2)
-or
-.BR unshare (2)
-in order to create a namespace.
-.\" ============================================================
-.\"
-.SS Discovering the owner of a user namespace
-The
-.B NS_GET_OWNER_UID
-.\" commit 015bb305b8ebe8d601a238ab70ebdc394c7a19ba
-operation (available since Linux 4.11) can be used to discover
-the owner user ID of a user namespace (i.e., the effective user ID
-of the process that created the user namespace).
-The form of the call is:
-.P
-.in +4n
-.EX
-uid_t uid;
-ioctl(fd, NS_GET_OWNER_UID, &uid);
-.EE
-.in
-.P
-.I fd
-refers to a
-.IR /proc/ pid /ns/user
-file.
-.P
-The owner user ID is returned in the
-.I uid_t
-pointed to by the third argument.
-.P
-This operation can fail with the following error:
-.TP
-.B EINVAL
-.I fd
-does not refer to a user namespace.
-.SH ERRORS
-Any of the above
-.BR ioctl ()
-operations can return the following errors:
-.TP
-.B ENOTTY
-.I fd
-does not refer to a
-.IR /proc/ pid /ns/ *
-file.
-.SH STANDARDS
-Linux.
-.SH EXAMPLES
-The example shown below uses the
-.BR ioctl (2)
-operations described above to perform simple
-discovery of namespace relationships.
-The following shell sessions show various examples of the use
-of this program.
-.P
-Trying to get the parent of the initial user namespace fails,
-since it has no parent:
-.P
-.in +4n
-.EX
-$ \fB./ns_show /proc/self/ns/user p\fP
-The parent namespace is outside your namespace scope
-.EE
-.in
-.P
-Create a process running
-.BR sleep (1)
-that resides in new user and UTS namespaces,
-and show that the new UTS namespace is associated with the new user namespace:
-.P
-.in +4n
-.EX
-$ \fBunshare \-Uu sleep 1000 &\fP
-[1] 23235
-$ \fB./ns_show /proc/23235/ns/uts u\fP
-Device/Inode of owning user namespace is: [0,3] / 4026532448
-$ \fBreadlink /proc/23235/ns/user\fP
-user:[4026532448]
-.EE
-.in
-.P
-Then show that the parent of the new user namespace in the preceding
-example is the initial user namespace:
-.P
-.in +4n
-.EX
-$ \fBreadlink /proc/self/ns/user\fP
-user:[4026531837]
-$ \fB./ns_show /proc/23235/ns/user p\fP
-Device/Inode of parent namespace is: [0,3] / 4026531837
-.EE
-.in
-.P
-Start a shell in a new user namespace, and show that from within
-this shell, the parent user namespace can't be discovered.
-Similarly, the UTS namespace
-(which is associated with the initial user namespace)
-can't be discovered.
-.P
-.in +4n
-.EX
-$ \fBPS1="sh2$ " unshare \-U bash\fP
-sh2$ \fB./ns_show /proc/self/ns/user p\fP
-The parent namespace is outside your namespace scope
-sh2$ \fB./ns_show /proc/self/ns/uts u\fP
-The owning user namespace is outside your namespace scope
-.EE
-.in
-.SS Program source
-\&
-.\" SRC BEGIN (ns_show.c)
-.EX
-/* ns_show.c
-\&
- Licensed under the GNU General Public License v2 or later.
-*/
-#include <errno.h>
-#include <fcntl.h>
-#include <linux/nsfs.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-#include <sys/stat.h>
-#include <sys/sysmacros.h>
-#include <unistd.h>
-\&
-int
-main(int argc, char *argv[])
-{
- int fd, userns_fd, parent_fd;
- struct stat sb;
-\&
- if (argc < 2) {
- fprintf(stderr, "Usage: %s /proc/[pid]/ns/[file] [p|u]\en",
- argv[0]);
- fprintf(stderr, "\enDisplay the result of one or both "
- "of NS_GET_USERNS (u) or NS_GET_PARENT (p)\en"
- "for the specified /proc/[pid]/ns/[file]. If neither "
- "\[aq]p\[aq] nor \[aq]u\[aq] is specified,\en"
- "NS_GET_USERNS is the default.\en");
- exit(EXIT_FAILURE);
- }
-\&
- /* Obtain a file descriptor for the \[aq]ns\[aq] file specified
- in argv[1]. */
-\&
- fd = open(argv[1], O_RDONLY);
- if (fd == \-1) {
- perror("open");
- exit(EXIT_FAILURE);
- }
-\&
- /* Obtain a file descriptor for the owning user namespace and
- then obtain and display the inode number of that namespace. */
-\&
- if (argc < 3 || strchr(argv[2], \[aq]u\[aq])) {
- userns_fd = ioctl(fd, NS_GET_USERNS);
-\&
- if (userns_fd == \-1) {
- if (errno == EPERM)
- printf("The owning user namespace is outside "
- "your namespace scope\en");
- else
- perror("ioctl\-NS_GET_USERNS");
- exit(EXIT_FAILURE);
- }
-\&
- if (fstat(userns_fd, &sb) == \-1) {
- perror("fstat\-userns");
- exit(EXIT_FAILURE);
- }
- printf("Device/Inode of owning user namespace is: "
- "[%x,%x] / %ju\en",
- major(sb.st_dev),
- minor(sb.st_dev),
- (uintmax_t) sb.st_ino);
-\&
- close(userns_fd);
- }
-\&
- /* Obtain a file descriptor for the parent namespace and
- then obtain and display the inode number of that namespace. */
-\&
- if (argc > 2 && strchr(argv[2], \[aq]p\[aq])) {
- parent_fd = ioctl(fd, NS_GET_PARENT);
-\&
- if (parent_fd == \-1) {
- if (errno == EINVAL)
- printf("Can\[aq] get parent namespace of a "
- "nonhierarchical namespace\en");
- else if (errno == EPERM)
- printf("The parent namespace is outside "
- "your namespace scope\en");
- else
- perror("ioctl\-NS_GET_PARENT");
- exit(EXIT_FAILURE);
- }
-\&
- if (fstat(parent_fd, &sb) == \-1) {
- perror("fstat\-parentns");
- exit(EXIT_FAILURE);
- }
- printf("Device/Inode of parent namespace is: [%x,%x] / %ju\en",
- major(sb.st_dev),
- minor(sb.st_dev),
- (uintmax_t) sb.st_ino);
-\&
- close(parent_fd);
- }
-\&
- exit(EXIT_SUCCESS);
-}
-.EE
-.\" SRC END
-.SH SEE ALSO
-.BR fstat (2),
-.BR ioctl (2),
-.BR proc (5),
-.BR namespaces (7)
diff --git a/man2/ioctl_pagemap_scan.2 b/man2/ioctl_pagemap_scan.2
deleted file mode 100644
index f7235b8be..000000000
--- a/man2/ioctl_pagemap_scan.2
+++ /dev/null
@@ -1,206 +0,0 @@
-.\" This manpage is Copyright (C) 2023 Collabora;
-.\" Written by Muhammad Usama Anjum <usama.anjum@collabora.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.TH ioctl_pagemap_scan 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-ioctl_pagemap_scan \- get and/or clear page flags
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#include <linux/fs.h>" " /* Definition of " "struct pm_scan_arg" ,
-.BR " struct page_region" ", and " PAGE_IS_* " constants */"
-.B #include <sys/ioctl.h>
-.P
-.BI "int ioctl(int " pagemap_fd ", PAGEMAP_SCAN, struct pm_scan_arg *" arg );
-.fi
-.SH DESCRIPTION
-This
-.BR ioctl (2)
-is used to get and optionally clear some specific flags from page table entries.
-The information is returned with
-.B PAGE_SIZE
-granularity.
-.P
-To start tracking the written state (flag) of a page or range of memory,
-the
-.B UFFD_FEATURE_WP_ASYNC
-must be enabled by
-.B UFFDIO_API
-.BR ioctl (2)
-on
-.B userfaultfd
-and memory range must be registered with
-.B UFFDIO_REGISTER
-.BR ioctl (2)
-in
-.B UFFDIO_REGISTER_MODE_WP
-mode.
-.SS Supported page flags
-The following page table entry flags are supported:
-.TP
-.B PAGE_IS_WPALLOWED
-The page has asynchronous write-protection enabled.
-.TP
-.B PAGE_IS_WRITTEN
-The page has been written to from the time it was write protected.
-.TP
-.B PAGE_IS_FILE
-The page is file backed.
-.TP
-.B PAGE_IS_PRESENT
-The page is present in the memory.
-.TP
-.B PAGE_IS_SWAPPED
-The page is swapped.
-.TP
-.B PAGE_IS_PFNZERO
-The page has zero PFN.
-.TP
-.B PAGE_IS_HUGE
-The page is THP or Hugetlb backed.
-.SS Supported operations
-The get operation is always performed
-if the output buffer is specified.
-The other operations are as following:
-.TP
-.B PM_SCAN_WP_MATCHING
-Write protect the matched pages.
-.TP
-.B PM_SCAN_CHECK_WPASYNC
-Abort the scan
-when a page is found
-which doesn't have the Userfaultfd Asynchronous Write protection enabled.
-.SS The \f[I]struct pm_scan_arg\f[] argument
-.EX
-struct pm_scan_arg {
- __u64 size;
- __u64 flags;
- __u64 start;
- __u64 end;
- __u64 walk_end;
- __u64 vec;
- __u64 vec_len;
- __u64 max_pages
- __u64 category_inverted;
- __u64 category_mask;
- __u64 category_anyof_mask
- __u64 return_mask;
-};
-.EE
-.TP
-.B size
-This field should be set to the size of the structure in bytes,
-as in
-.IR sizeof(struct\~pm_scan_arg) .
-.TP
-.B flags
-The operations to be performed are specified in it.
-.TP
-.B start
-The starting address of the scan is specified in it.
-.TP
-.B end
-The ending address of the scan is specified in it.
-.TP
-.B walk_end
-The kernel returns the scan's ending address in it.
-The
-.I walk_end
-equal to
-.I end
-means that scan has completed on the entire range.
-.TP
-.B vec
-The address of
-.I page_region
-array for output.
-.IP
-.in +4n
-.EX
-struct page_region {
- __u64 start;
- __u64 end;
- __u64 categories;
-};
-.EE
-.in
-.TP
-.B vec_len
-The length of the
-.I page_region
-struct array.
-.TP
-.B max_pages
-It is the optional limit for the number of output pages required.
-.TP
-.B category_inverted
-.BI PAGE_IS_ *
-categories which values match if 0 instead of 1.
-.TP
-.B category_mask
-Skip pages for which any
-.BI PAGE_IS_ *
-category doesn't match.
-.TP
-.B category_anyof_mask
-Skip pages for which no
-.BI PAGE_IS_ *
-category matches.
-.TP
-.B return_mask
-.BI PAGE_IS_ *
-categories that are to be reported in
-.IR page_region .
-.SH RETURN VALUE
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-Error codes can be one of, but are not limited to, the following:
-.TP
-.B EINVAL
-Invalid arguments i.e.,
-invalid
-.I size
-of the argument,
-invalid
-.IR flags ,
-invalid
-.IR categories ,
-the
-.I start
-address isn't aligned with
-.BR PAGE_SIZE ,
-or
-.I vec_len
-is specified when
-.I vec
-is NULL.
-.TP
-.B EFAULT
-Invalid
-.I arg
-pointer,
-invalid
-.I vec
-pointer,
-or invalid address range specified by
-.I start
-and
-.IR end .
-.TP
-.B ENOMEM
-No memory is available.
-.TP
-.B EINTR
-Fetal signal is pending.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 6.7.
-.SH SEE ALSO
-.BR ioctl (2)
diff --git a/man2/ioctl_pipe.2 b/man2/ioctl_pipe.2
deleted file mode 100644
index 5f5c42652..000000000
--- a/man2/ioctl_pipe.2
+++ /dev/null
@@ -1,64 +0,0 @@
-.\" Copyright (c) 2022 by Cyril Hrubis <chrubi@suse.cz>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.TH ioctl_pipe 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-ioctl_pipe \- ioctl() operations for General notification mechanism
-.SH SYNOPSIS
-.nf
-.BR "#include <linux/watch_queue.h>" " /* Definition of " IOC_WATCH_QUEUE_ "* */"
-.B #include <sys/ioctl.h>
-.P
-.BI "int ioctl(int " pipefd "[1], IOC_WATCH_QUEUE_SET_SIZE, int " size );
-.BI "int ioctl(int " pipefd "[1], IOC_WATCH_QUEUE_SET_FILTER,"
-.BI " struct watch_notification_filter *" filter );
-.fi
-.SH DESCRIPTION
-The following
-.BR ioctl (2)
-operations are provided to set up general notification queue parameters.
-The notification queue is built on the top of a
-.BR pipe (2)
-opened with the
-.B O_NOTIFICATION_PIPE
-flag.
-.TP
-.BR IOC_WATCH_QUEUE_SET_SIZE " (since Linux 5.8)"
-.\" commit c73be61cede5882f9605a852414db559c0ebedfd
-Preallocates the pipe buffer memory so that
-it can fit
-.I size
-notification messages.
-Currently,
-.I size
-must be between 1 and 512.
-.TP
-.BR IOC_WATCH_QUEUE_SET_FILTER " (since Linux 5.8)"
-.\" commit c73be61cede5882f9605a852414db559c0ebedfd
-Watch queue filter can limit events that are received.
-Filters are passed in a
-.I struct watch_notification_filter
-and each filter is described by a
-.I struct watch_notification_type_filter
-structure.
-.IP
-.in +4n
-.EX
-struct watch_notification_filter {
- __u32 nr_filters;
- __u32 __reserved;
- struct watch_notification_type_filter filters[];
-};
-\&
-struct watch_notification_type_filter {
- __u32 type;
- __u32 info_filter;
- __u32 info_mask;
- __u32 subtype_filter[8];
-};
-.EE
-.in
-.SH SEE ALSO
-.BR pipe (2),
-.BR ioctl (2)
diff --git a/man2/ioctl_tty.2 b/man2/ioctl_tty.2
deleted file mode 100644
index c458933a2..000000000
--- a/man2/ioctl_tty.2
+++ /dev/null
@@ -1,922 +0,0 @@
-'\" t
-.\" Copyright 2002 Walter Harms <walter.harms@informatik.uni-oldenburg.de>
-.\" and Andries Brouwer <aeb@cwi.nl>.
-.\"
-.\" SPDX-License-Identifier: GPL-1.0-or-later
-.\"
-.TH ioctl_tty 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-ioctl_tty \- ioctls for terminals and serial lines
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/ioctl.h>
-.BR "#include <asm/termbits.h>" " /* Definition of " "struct termios" ,
-.BR " struct termios2" ", and"
-.BR " Bnnn" ", " BOTHER ", " CBAUD ", " CLOCAL ,
-.BR " TC*" { FLUSH , ON , OFF "} and other constants */"
-.P
-.BI "int ioctl(int " fd ", int " op ", ...);"
-.fi
-.SH DESCRIPTION
-The
-.BR ioctl (2)
-call for terminals and serial ports accepts many possible operation arguments.
-Most require a third argument, of varying type, here called
-.I argp
-or
-.IR arg .
-.P
-Use of
-.BR ioctl ()
-makes for nonportable programs.
-Use the POSIX interface described in
-.BR termios (3)
-whenever possible.
-.P
-Please note that
-.B struct termios
-from
-.I <asm/termbits.h>
-is different and incompatible with
-.B struct termios
-from
-.IR <termios.h> .
-These ioctl calls require
-.B struct termios
-from
-.IR <asm/termbits.h> .
-.SS Get and set terminal attributes
-.TP
-.B TCGETS
-Argument:
-.BI "struct termios\~*" argp
-.IP
-Equivalent to
-.IR "tcgetattr(fd, argp)" .
-.IP
-Get the current serial port settings.
-.TP
-.B TCSETS
-Argument:
-.BI "const struct termios\~*" argp
-.IP
-Equivalent to
-.IR "tcsetattr(fd, TCSANOW, argp)" .
-.IP
-Set the current serial port settings.
-.TP
-.B TCSETSW
-Argument:
-.BI "const struct termios\~*" argp
-.IP
-Equivalent to
-.IR "tcsetattr(fd, TCSADRAIN, argp)" .
-.IP
-Allow the output buffer to drain, and
-set the current serial port settings.
-.TP
-.B TCSETSF
-Argument:
-.BI "const struct termios\~*" argp
-.IP
-Equivalent to
-.IR "tcsetattr(fd, TCSAFLUSH, argp)" .
-.IP
-Allow the output buffer to drain, discard pending input, and
-set the current serial port settings.
-.P
-The following four ioctls, added in Linux 2.6.20,
-.\" commit 64bb6c5e1ddcd47c951740485026ef08975ee2e6
-.\" commit 592ee3a5e5e2a981ef2829a0380093006d045661
-are just like
-.BR TCGETS ,
-.BR TCSETS ,
-.BR TCSETSW ,
-.BR TCSETSF ,
-except that they take a
-.I "struct termios2\~*"
-instead of a
-.IR "struct termios\~*" .
-If the structure member
-.B c_cflag
-contains the flag
-.BR BOTHER ,
-then the baud rate is stored in the structure members
-.B c_ispeed
-and
-.B c_ospeed
-as integer values.
-These ioctls are not supported on all architectures.
-.RS
-.TS
-lb l.
-TCGETS2 \fBstruct termios2 *\fPargp
-TCSETS2 \fBconst struct termios2 *\fPargp
-TCSETSW2 \fBconst struct termios2 *\fPargp
-TCSETSF2 \fBconst struct termios2 *\fPargp
-.TE
-.RE
-.P
-The following four ioctls are just like
-.BR TCGETS ,
-.BR TCSETS ,
-.BR TCSETSW ,
-.BR TCSETSF ,
-except that they take a
-.I "struct termio\~*"
-instead of a
-.IR "struct termios\~*" .
-.RS
-.TS
-lb l.
-TCGETA \fBstruct termio *\fPargp
-TCSETA \fBconst struct termio *\fPargp
-TCSETAW \fBconst struct termio *\fPargp
-TCSETAF \fBconst struct termio *\fPargp
-.TE
-.RE
-.SS Locking the termios structure
-The
-.I termios
-structure of a terminal can be locked.
-The lock is itself a
-.I termios
-structure, with nonzero bits or fields indicating a
-locked value.
-.TP
-.B TIOCGLCKTRMIOS
-Argument:
-.BI "struct termios\~*" argp
-.IP
-Gets the locking status of the
-.I termios
-structure of the terminal.
-.TP
-.B TIOCSLCKTRMIOS
-Argument:
-.BI "const struct termios\~*" argp
-.IP
-Sets the locking status of the
-.I termios
-structure of the terminal.
-Only a process with the
-.B CAP_SYS_ADMIN
-capability can do this.
-.SS Get and set window size
-Window sizes are kept in the kernel, but not used by the kernel
-(except in the case of virtual consoles, where the kernel will
-update the window size when the size of the virtual console changes,
-for example, by loading a new font).
-.TP
-.B TIOCGWINSZ
-Argument:
-.BI "struct winsize\~*" argp
-.IP
-Get window size.
-.TP
-.B TIOCSWINSZ
-Argument:
-.BI "const struct winsize\~*" argp
-.IP
-Set window size.
-.P
-The struct used by these ioctls is defined as
-.P
-.in +4n
-.EX
-struct winsize {
- unsigned short ws_row;
- unsigned short ws_col;
- unsigned short ws_xpixel; /* unused */
- unsigned short ws_ypixel; /* unused */
-};
-.EE
-.in
-.P
-When the window size changes, a
-.B SIGWINCH
-signal is sent to the
-foreground process group.
-.SS Sending a break
-.TP
-.B TCSBRK
-Argument:
-.BI "int " arg
-.IP
-Equivalent to
-.IR "tcsendbreak(fd, arg)" .
-.IP
-If the terminal is using asynchronous serial data transmission, and
-.I arg
-is zero, then send a break (a stream of zero bits) for between
-0.25 and 0.5 seconds.
-If the terminal is not using asynchronous
-serial data transmission, then either a break is sent, or the function
-returns without doing anything.
-When
-.I arg
-is nonzero, nobody knows what will happen.
-.IP
-(SVr4, UnixWare, Solaris, and Linux treat
-.I "tcsendbreak(fd,arg)"
-with nonzero
-.I arg
-like
-.IR "tcdrain(fd)" .
-SunOS treats
-.I arg
-as a multiplier, and sends a stream of bits
-.I arg
-times as long as done for zero
-.IR arg .
-DG/UX and AIX treat
-.I arg
-(when nonzero) as a time interval measured in milliseconds.
-HP-UX ignores
-.IR arg .)
-.TP
-.B TCSBRKP
-Argument:
-.BI "int " arg
-.IP
-So-called "POSIX version" of
-.BR TCSBRK .
-It treats nonzero
-.I arg
-as a time interval measured in deciseconds, and does nothing
-when the driver does not support breaks.
-.TP
-.B TIOCSBRK
-Argument:
-.B void
-.IP
-Turn break on, that is, start sending zero bits.
-.TP
-.B TIOCCBRK
-Argument:
-.B void
-.IP
-Turn break off, that is, stop sending zero bits.
-.SS Software flow control
-.TP
-.B TCXONC
-Argument:
-.BI "int " arg
-.IP
-Equivalent to
-.IR "tcflow(fd, arg)" .
-.IP
-See
-.BR tcflow (3)
-for the argument values
-.BR TCOOFF ,
-.BR TCOON ,
-.BR TCIOFF ,
-.BR TCION .
-.SS Buffer count and flushing
-.TP
-.B FIONREAD
-Argument:
-.BI "int\~*" argp
-.IP
-Get the number of bytes in the input buffer.
-.TP
-.B TIOCINQ
-Argument:
-.BI "int\~*" argp
-.IP
-Same as
-.BR FIONREAD .
-.TP
-.B TIOCOUTQ
-Argument:
-.BI "int\~*" argp
-.IP
-Get the number of bytes in the output buffer.
-.TP
-.B TCFLSH
-Argument:
-.BI "int " arg
-.IP
-Equivalent to
-.IR "tcflush(fd, arg)" .
-.IP
-See
-.BR tcflush (3)
-for the argument values
-.BR TCIFLUSH ,
-.BR TCOFLUSH ,
-.BR TCIOFLUSH .
-.TP
-.B TIOCSERGETLSR
-Argument:
-.BI "int\~*" argp
-.IP
-Get line status register.
-Status register has
-.B TIOCSER_TEMT
-bit set when
-output buffer is empty and also hardware transmitter is physically empty.
-.IP
-Does not have to be supported by all serial tty drivers.
-.IP
-.BR tcdrain (3)
-does not wait and returns immediately when
-.B TIOCSER_TEMT
-bit is set.
-.SS Faking input
-.TP
-.B TIOCSTI
-Argument:
-.BI "const char\~*" argp
-.IP
-Insert the given byte in the input queue.
-.IP
-Since Linux 6.2,
-.\" commit 690c8b804ad2eafbd35da5d3c95ad325ca7d5061
-.\" commit 83efeeeb3d04b22aaed1df99bc70a48fe9d22c4d
-this operation may require the
-.B CAP_SYS_ADMIN
-capability (if the
-.I dev.tty.legacy_tiocsti
-sysctl variable is set to false).
-.SS Redirecting console output
-.TP
-.B TIOCCONS
-Argument:
-.B void
-.IP
-Redirect output that would have gone to
-.I /dev/console
-or
-.I /dev/tty0
-to the given terminal.
-If that was a pseudoterminal master, send it to the slave.
-Before Linux 2.6.10,
-anybody can do this as long as the output was not redirected yet;
-since Linux 2.6.10, only a process with the
-.B CAP_SYS_ADMIN
-capability may do this.
-If output was redirected already, then
-.B EBUSY
-is returned,
-but redirection can be stopped by using this ioctl with
-.I fd
-pointing at
-.I /dev/console
-or
-.IR /dev/tty0 .
-.SS Controlling terminal
-.TP
-.B TIOCSCTTY
-Argument:
-.BI "int " arg
-.IP
-Make the given terminal the controlling terminal of the calling process.
-The calling process must be a session leader and not have a
-controlling terminal already.
-For this case,
-.I arg
-should be specified as zero.
-.IP
-If this terminal is already the controlling terminal
-of a different session group, then the ioctl fails with
-.BR EPERM ,
-unless the caller has the
-.B CAP_SYS_ADMIN
-capability and
-.I arg
-equals 1, in which case the terminal is stolen, and all processes that had
-it as controlling terminal lose it.
-.TP
-.B TIOCNOTTY
-Argument:
-.B void
-.IP
-If the given terminal was the controlling terminal of the calling process,
-give up this controlling terminal.
-If the process was session leader,
-then send
-.B SIGHUP
-and
-.B SIGCONT
-to the foreground process group
-and all processes in the current session lose their controlling terminal.
-.SS Process group and session ID
-.TP
-.B TIOCGPGRP
-Argument:
-.BI "pid_t\~*" argp
-.IP
-When successful, equivalent to
-.IR "*argp = tcgetpgrp(fd)" .
-.IP
-Get the process group ID of the foreground process group on this terminal.
-.TP
-.B TIOCSPGRP
-Argument:
-.BI "const pid_t\~*" argp
-.IP
-Equivalent to
-.IR "tcsetpgrp(fd, *argp)" .
-.IP
-Set the foreground process group ID of this terminal.
-.TP
-.B TIOCGSID
-Argument:
-.BI "pid_t\~*" argp
-.IP
-When successful, equivalent to
-.IR "*argp = tcgetsid(fd)" .
-.IP
-Get the session ID of the given terminal.
-This fails with the error
-.B ENOTTY
-if the terminal is not a master pseudoterminal
-and not our controlling terminal.
-Strange.
-.SS Exclusive mode
-.TP
-.B TIOCEXCL
-Argument:
-.B void
-.IP
-Put the terminal into exclusive mode.
-No further
-.BR open (2)
-operations on the terminal are permitted.
-(They fail with
-.BR EBUSY ,
-except for a process with the
-.B CAP_SYS_ADMIN
-capability.)
-.TP
-.B TIOCGEXCL
-Argument:
-.BI "int\~*" argp
-.IP
-(since Linux 3.8)
-If the terminal is currently in exclusive mode,
-place a nonzero value in the location pointed to by
-.IR argp ;
-otherwise, place zero in
-.IR *argp .
-.TP
-.B TIOCNXCL
-Argument:
-.B void
-.IP
-Disable exclusive mode.
-.SS Line discipline
-.TP
-.B TIOCGETD
-Argument:
-.BI "int\~*" argp
-.IP
-Get the line discipline of the terminal.
-.TP
-.B TIOCSETD
-Argument:
-.BI "const int\~*" argp
-.IP
-Set the line discipline of the terminal.
-.SS Pseudoterminal ioctls
-.TP
-.B TIOCPKT
-Argument:
-.BI "const int\~*" argp
-.IP
-Enable (when
-.RI * argp
-is nonzero) or disable packet mode.
-Can be applied to the master side of a pseudoterminal only (and will return
-.B ENOTTY
-otherwise).
-In packet mode, each subsequent
-.BR read (2)
-will return a packet that either contains a single nonzero control byte,
-or has a single byte containing zero (\[aq]\e0\[aq]) followed by data
-written on the slave side of the pseudoterminal.
-If the first byte is not
-.B TIOCPKT_DATA
-(0), it is an OR of one
-or more of the following bits:
-.IP
-.ad l
-.TS
-lb l.
-TIOCPKT_FLUSHREAD T{
-The read queue for the terminal is flushed.
-T}
-TIOCPKT_FLUSHWRITE T{
-The write queue for the terminal is flushed.
-T}
-TIOCPKT_STOP T{
-Output to the terminal is stopped.
-T}
-TIOCPKT_START T{
-Output to the terminal is restarted.
-T}
-TIOCPKT_DOSTOP T{
-The start and stop characters are \fB\[ha]S\fP/\fB\[ha]Q\fP.
-T}
-TIOCPKT_NOSTOP T{
-The start and stop characters are not \fB\[ha]S\fP/\fB\[ha]Q\fP.
-T}
-.TE
-.ad
-.IP
-While packet mode is in use, the presence
-of control status information to be read
-from the master side may be detected by a
-.BR select (2)
-for exceptional conditions or a
-.BR poll (2)
-for the
-.B POLLPRI
-event.
-.IP
-This mode is used by
-.BR rlogin (1)
-and
-.BR rlogind (8)
-to implement a remote-echoed,
-locally \fB\[ha]S\fP/\fB\[ha]Q\fP flow-controlled remote login.
-.TP
-.B TIOCGPKT
-Argument:
-.BI "const int\~*" argp
-.IP
-(since Linux 3.8)
-Return the current packet mode setting in the integer pointed to by
-.IR argp .
-.TP
-.B TIOCSPTLCK
-Argument:
-.BI "int\~*" argp
-.IP
-Set (if
-.I *argp
-is nonzero) or remove (if
-.I *argp
-is zero) the lock on the pseudoterminal slave device.
-(See also
-.BR unlockpt (3).)
-.TP
-.B TIOCGPTLCK
-Argument:
-.BI "int\~*" argp
-.IP
-(since Linux 3.8)
-Place the current lock state of the pseudoterminal slave device
-in the location pointed to by
-.IR argp .
-.TP
-.B TIOCGPTPEER
-Argument:
-.BI "int " flags
-.IP
-.\" commit 54ebbfb1603415d9953c150535850d30609ef077
-(since Linux 4.13)
-Given a file descriptor in
-.I fd
-that refers to a pseudoterminal master,
-open (with the given
-.BR open (2)-style
-.IR flags )
-and return a new file descriptor that refers to the peer
-pseudoterminal slave device.
-This operation can be performed
-regardless of whether the pathname of the slave device
-is accessible through the calling process's mount namespace.
-.IP
-Security-conscious programs interacting with namespaces may wish to use this
-operation rather than
-.BR open (2)
-with the pathname returned by
-.BR ptsname (3),
-and similar library functions that have insecure APIs.
-(For example, confusion can occur in some cases using
-.BR ptsname (3)
-with a pathname where a devpts filesystem
-has been mounted in a different mount namespace.)
-.P
-The BSD ioctls
-.BR TIOCSTOP ,
-.BR TIOCSTART ,
-.BR TIOCUCNTL ,
-and
-.B TIOCREMOTE
-have not been implemented under Linux.
-.SS Modem control
-.TP
-.B TIOCMGET
-Argument:
-.BI "int\~*" argp
-.IP
-Get the status of modem bits.
-.TP
-.B TIOCMSET
-Argument:
-.BI "const int\~*" argp
-.IP
-Set the status of modem bits.
-.TP
-.B TIOCMBIC
-Argument:
-.BI "const int\~*" argp
-.IP
-Clear the indicated modem bits.
-.TP
-.B TIOCMBIS
-Argument:
-.BI "const int\~*" argp
-.IP
-Set the indicated modem bits.
-.P
-The following bits are used by the above ioctls:
-.P
-.TS
-lb l.
-TIOCM_LE DSR (data set ready/line enable)
-TIOCM_DTR DTR (data terminal ready)
-TIOCM_RTS RTS (request to send)
-TIOCM_ST Secondary TXD (transmit)
-TIOCM_SR Secondary RXD (receive)
-TIOCM_CTS CTS (clear to send)
-TIOCM_CAR DCD (data carrier detect)
-TIOCM_CD see TIOCM_CAR
-TIOCM_RNG RNG (ring)
-TIOCM_RI see TIOCM_RNG
-TIOCM_DSR DSR (data set ready)
-.TE
-.TP
-.B TIOCMIWAIT
-Argument:
-.BI "int " arg
-.IP
-Wait for any of the 4 modem bits (DCD, RI, DSR, CTS) to change.
-The bits of interest are specified as a bit mask in
-.IR arg ,
-by ORing together any of the bit values,
-.BR TIOCM_RNG ,
-.BR TIOCM_DSR ,
-.BR TIOCM_CD ,
-and
-.BR TIOCM_CTS .
-The caller should use
-.B TIOCGICOUNT
-to see which bit has changed.
-.TP
-.B TIOCGICOUNT
-Argument:
-.BI "struct serial_icounter_struct\~*" argp
-.IP
-Get counts of input serial line interrupts (DCD, RI, DSR, CTS).
-The counts are written to the
-.I serial_icounter_struct
-structure pointed to by
-.IR argp .
-.IP
-Note: both 1->0 and 0->1 transitions are counted, except for
-RI, where only 0->1 transitions are counted.
-.SS Marking a line as local
-.TP
-.B TIOCGSOFTCAR
-Argument:
-.BI "int\~*" argp
-.IP
-("Get software carrier flag")
-Get the status of the CLOCAL flag in the c_cflag field of the
-.I termios
-structure.
-.TP
-.B TIOCSSOFTCAR
-Argument:
-.BI "const int\~*" argp
-.IP
-("Set software carrier flag")
-Set the CLOCAL flag in the
-.I termios
-structure when
-.RI * argp
-is nonzero, and clear it otherwise.
-.P
-If the
-.B CLOCAL
-flag for a line is off, the hardware carrier detect (DCD)
-signal is significant, and an
-.BR open (2)
-of the corresponding terminal will block until DCD is asserted,
-unless the
-.B O_NONBLOCK
-flag is given.
-If
-.B CLOCAL
-is set, the line behaves as if DCD is always asserted.
-The software carrier flag is usually turned on for local devices,
-and is off for lines with modems.
-.SS Linux-specific
-For the
-.B TIOCLINUX
-ioctl, see
-.BR ioctl_console (2).
-.SS Kernel debugging
-.B "#include <linux/tty.h>"
-.TP
-.B TIOCTTYGSTRUCT
-Argument:
-.BI "struct tty_struct\~*" argp
-.IP
-Get the
-.I tty_struct
-corresponding to
-.IR fd .
-This operation was removed in Linux 2.5.67.
-.\" commit b3506a09d15dc5aee6d4bb88d759b157016e1864
-.\" Author: Andries E. Brouwer <andries.brouwer@cwi.nl>
-.\" Date: Tue Apr 1 04:42:46 2003 -0800
-.\"
-.\" [PATCH] kill TIOCTTYGSTRUCT
-.\"
-.\" Only used for (dubious) debugging purposes, and exposes
-.\" internal kernel state.
-.\"
-.\" .SS Serial info
-.\" .BR "#include <linux/serial.h>"
-.\" .P
-.\" .TP
-.\" .BI "TIOCGSERIAL struct serial_struct *" argp
-.\" Get serial info.
-.\" .TP
-.\" .BI "TIOCSSERIAL const struct serial_struct *" argp
-.\" Set serial info.
-.SH RETURN VALUE
-The
-.BR ioctl (2)
-system call returns 0 on success.
-On error, it returns \-1 and sets
-.I errno
-to indicate the error.
-.SH ERRORS
-.TP
-.B EINVAL
-Invalid operation parameter.
-.TP
-.B ENOIOCTLCMD
-Unknown operation.
-.TP
-.B ENOTTY
-Inappropriate
-.IR fd .
-.TP
-.B EPERM
-Insufficient permission.
-.SH EXAMPLES
-Check the condition of DTR on the serial port.
-.P
-.\" SRC BEGIN (tiocmget.c)
-.EX
-#include <fcntl.h>
-#include <stdio.h>
-#include <sys/ioctl.h>
-#include <unistd.h>
-\&
-int
-main(void)
-{
- int fd, serial;
-\&
- fd = open("/dev/ttyS0", O_RDONLY);
- ioctl(fd, TIOCMGET, &serial);
- if (serial & TIOCM_DTR)
- puts("TIOCM_DTR is set");
- else
- puts("TIOCM_DTR is not set");
- close(fd);
-}
-.EE
-.\" SRC END
-.P
-Get or set arbitrary baudrate on the serial port.
-.P
-.\" SRC BEGIN (tcgets.c)
-.EX
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-\&
-#include <asm/termbits.h>
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/ioctl.h>
-#include <unistd.h>
-\&
-int
-main(int argc, char *argv[])
-{
-#if !defined BOTHER
- fprintf(stderr, "BOTHER is unsupported\en");
- /* Program may fallback to TCGETS/TCSETS with Bnnn constants */
- exit(EXIT_FAILURE);
-#else
- /* Declare tio structure, its type depends on supported ioctl */
-# if defined TCGETS2
- struct termios2 tio;
-# else
- struct termios tio;
-# endif
- int fd, rc;
-\&
- if (argc != 2 && argc != 3 && argc != 4) {
- fprintf(stderr, "Usage: %s device [output [input] ]\en", argv[0]);
- exit(EXIT_FAILURE);
- }
-\&
- fd = open(argv[1], O_RDWR | O_NONBLOCK | O_NOCTTY);
- if (fd < 0) {
- perror("open");
- exit(EXIT_FAILURE);
- }
-\&
- /* Get the current serial port settings via supported ioctl */
-# if defined TCGETS2
- rc = ioctl(fd, TCGETS2, &tio);
-# else
- rc = ioctl(fd, TCGETS, &tio);
-# endif
- if (rc) {
- perror("TCGETS");
- close(fd);
- exit(EXIT_FAILURE);
- }
-\&
- /* Change baud rate when more arguments were provided */
- if (argc == 3 || argc == 4) {
- /* Clear the current output baud rate and fill a new value */
- tio.c_cflag &= \[ti]CBAUD;
- tio.c_cflag |= BOTHER;
- tio.c_ospeed = atoi(argv[2]);
-\&
- /* Clear the current input baud rate and fill a new value */
- tio.c_cflag &= \[ti](CBAUD << IBSHIFT);
- tio.c_cflag |= BOTHER << IBSHIFT;
- /* When 4th argument is not provided reuse output baud rate */
- tio.c_ispeed = (argc == 4) ? atoi(argv[3]) : atoi(argv[2]);
-\&
- /* Set new serial port settings via supported ioctl */
-# if defined TCSETS2
- rc = ioctl(fd, TCSETS2, &tio);
-# else
- rc = ioctl(fd, TCSETS, &tio);
-# endif
- if (rc) {
- perror("TCSETS");
- close(fd);
- exit(EXIT_FAILURE);
- }
-\&
- /* And get new values which were really configured */
-# if defined TCGETS2
- rc = ioctl(fd, TCGETS2, &tio);
-# else
- rc = ioctl(fd, TCGETS, &tio);
-# endif
- if (rc) {
- perror("TCGETS");
- close(fd);
- exit(EXIT_FAILURE);
- }
- }
-\&
- close(fd);
-\&
- printf("output baud rate: %u\en", tio.c_ospeed);
- printf("input baud rate: %u\en", tio.c_ispeed);
-\&
- exit(EXIT_SUCCESS);
-#endif
-}
-.EE
-.\" SRC END
-.SH SEE ALSO
-.BR ldattach (8),
-.BR ioctl (2),
-.BR ioctl_console (2),
-.BR termios (3),
-.BR pty (7)
-.\"
-.\" FIONBIO const int *
-.\" FIONCLEX void
-.\" FIOCLEX void
-.\" FIOASYNC const int *
-.\" from serial.c:
-.\" TIOCSERCONFIG void
-.\" TIOCSERGWILD int *
-.\" TIOCSERSWILD const int *
-.\" TIOCSERGSTRUCT struct async_struct *
-.\" TIOCSERGETMULTI struct serial_multiport_struct *
-.\" TIOCSERSETMULTI const struct serial_multiport_struct *
-.\" TIOCGSERIAL, TIOCSSERIAL (see above)
diff --git a/man2/ioctl_userfaultfd.2 b/man2/ioctl_userfaultfd.2
deleted file mode 100644
index cbd0c7978..000000000
--- a/man2/ioctl_userfaultfd.2
+++ /dev/null
@@ -1,1072 +0,0 @@
-.\" Copyright (c) 2016, IBM Corporation.
-.\" Written by Mike Rapoport <rppt@linux.vnet.ibm.com>
-.\" and Copyright (C) 2016 Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\"
-.TH ioctl_userfaultfd 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-ioctl_userfaultfd \- create a file descriptor for handling page faults in user
-space
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#include <linux/userfaultfd.h>" " /* Definition of " UFFD* " constants */"
-.B #include <sys/ioctl.h>
-.P
-.BI "int ioctl(int " fd ", int " op ", ...);"
-.fi
-.SH DESCRIPTION
-Various
-.BR ioctl (2)
-operations can be performed on a userfaultfd object (created by a call to
-.BR userfaultfd (2))
-using calls of the form:
-.P
-.in +4n
-.EX
-ioctl(fd, op, argp);
-.EE
-.in
-.P
-In the above,
-.I fd
-is a file descriptor referring to a userfaultfd object,
-.I op
-is one of the operations listed below, and
-.I argp
-is a pointer to a data structure that is specific to
-.IR op .
-.P
-The various
-.BR ioctl (2)
-operations are described below.
-The
-.BR UFFDIO_API ,
-.BR UFFDIO_REGISTER ,
-and
-.B UFFDIO_UNREGISTER
-operations are used to
-.I configure
-userfaultfd behavior.
-These operations allow the caller to choose what features will be enabled and
-what kinds of events will be delivered to the application.
-The remaining operations are
-.I range
-operations.
-These operations enable the calling application to resolve page-fault
-events.
-.\"
-.SS UFFDIO_API
-(Since Linux 4.3.)
-Enable operation of the userfaultfd and perform API handshake.
-.P
-The
-.I argp
-argument is a pointer to a
-.I uffdio_api
-structure, defined as:
-.P
-.in +4n
-.EX
-struct uffdio_api {
- __u64 api; /* Requested API version (input) */
- __u64 features; /* Requested features (input/output) */
- __u64 ioctls; /* Available ioctl() operations (output) */
-};
-.EE
-.in
-.P
-The
-.I api
-field denotes the API version requested by the application.
-The kernel verifies that it can support the requested API version,
-and sets the
-.I features
-and
-.I ioctls
-fields to bit masks representing all the available features and the generic
-.BR ioctl (2)
-operations available.
-.P
-Since Linux 4.11,
-applications should use the
-.I features
-field to perform a two-step handshake.
-First,
-.B UFFDIO_API
-is called with the
-.I features
-field set to zero.
-The kernel responds by setting all supported feature bits.
-.P
-Applications which do not require any specific features
-can begin using the userfaultfd immediately.
-Applications which do need specific features
-should call
-.B UFFDIO_API
-again with a subset of the reported feature bits set
-to enable those features.
-.P
-Before Linux 4.11, the
-.I features
-field must be initialized to zero before the call to
-.BR UFFDIO_API ,
-and zero (i.e., no feature bits) is placed in the
-.I features
-field by the kernel upon return from
-.BR ioctl (2).
-.P
-If the application sets unsupported feature bits,
-the kernel will zero out the returned
-.I uffdio_api
-structure and return
-.BR EINVAL .
-.P
-The following feature bits may be set:
-.TP
-.BR UFFD_FEATURE_EVENT_FORK " (since Linux 4.11)"
-When this feature is enabled,
-the userfaultfd objects associated with a parent process are duplicated
-into the child process during
-.BR fork (2)
-and a
-.B UFFD_EVENT_FORK
-event is delivered to the userfaultfd monitor
-.TP
-.BR UFFD_FEATURE_EVENT_REMAP " (since Linux 4.11)"
-If this feature is enabled,
-when the faulting process invokes
-.BR mremap (2),
-the userfaultfd monitor will receive an event of type
-.BR UFFD_EVENT_REMAP .
-.TP
-.BR UFFD_FEATURE_EVENT_REMOVE " (since Linux 4.11)"
-If this feature is enabled,
-when the faulting process calls
-.BR madvise (2)
-with the
-.B MADV_DONTNEED
-or
-.B MADV_REMOVE
-advice value to free a virtual memory area
-the userfaultfd monitor will receive an event of type
-.BR UFFD_EVENT_REMOVE .
-.TP
-.BR UFFD_FEATURE_EVENT_UNMAP " (since Linux 4.11)"
-If this feature is enabled,
-when the faulting process unmaps virtual memory either explicitly with
-.BR munmap (2),
-or implicitly during either
-.BR mmap (2)
-or
-.BR mremap (2),
-the userfaultfd monitor will receive an event of type
-.BR UFFD_EVENT_UNMAP .
-.TP
-.BR UFFD_FEATURE_MISSING_HUGETLBFS " (since Linux 4.11)"
-If this feature bit is set,
-the kernel supports registering userfaultfd ranges on hugetlbfs
-virtual memory areas
-.TP
-.BR UFFD_FEATURE_MISSING_SHMEM " (since Linux 4.11)"
-If this feature bit is set,
-the kernel supports registering userfaultfd ranges on shared memory areas.
-This includes all kernel shared memory APIs:
-System V shared memory,
-.BR tmpfs (5),
-shared mappings of
-.IR /dev/zero ,
-.BR mmap (2)
-with the
-.B MAP_SHARED
-flag set,
-.BR memfd_create (2),
-and so on.
-.TP
-.BR UFFD_FEATURE_SIGBUS " (since Linux 4.14)"
-.\" commit 2d6d6f5a09a96cc1fec7ed992b825e05f64cb50e
-If this feature bit is set, no page-fault events
-.RB ( UFFD_EVENT_PAGEFAULT )
-will be delivered.
-Instead, a
-.B SIGBUS
-signal will be sent to the faulting process.
-Applications using this
-feature will not require the use of a userfaultfd monitor for processing
-memory accesses to the regions registered with userfaultfd.
-.TP
-.BR UFFD_FEATURE_THREAD_ID " (since Linux 4.14)"
-If this feature bit is set,
-.I uffd_msg.pagefault.feat.ptid
-will be set to the faulted thread ID for each page-fault message.
-.TP
-.BR UFFD_FEATURE_PAGEFAULT_FLAG_WP " (since Linux 5.10)"
-If this feature bit is set,
-userfaultfd supports write-protect faults
-for anonymous memory.
-(Note that shmem / hugetlbfs support
-is indicated by a separate feature.)
-.TP
-.BR UFFD_FEATURE_MINOR_HUGETLBFS " (since Linux 5.13)"
-If this feature bit is set,
-the kernel supports registering userfaultfd ranges
-in minor mode on hugetlbfs-backed memory areas.
-.TP
-.BR UFFD_FEATURE_MINOR_SHMEM " (since Linux 5.14)"
-If this feature bit is set,
-the kernel supports registering userfaultfd ranges
-in minor mode on shmem-backed memory areas.
-.TP
-.BR UFFD_FEATURE_EXACT_ADDRESS " (since Linux 5.18)"
-If this feature bit is set,
-.I uffd_msg.pagefault.address
-will be set to the exact page-fault address that was reported by the hardware,
-and will not mask the offset within the page.
-Note that old Linux versions might indicate the exact address as well,
-even though the feature bit is not set.
-.TP
-.BR UFFD_FEATURE_WP_HUGETLBFS_SHMEM " (since Linux 5.19)"
-If this feature bit is set,
-userfaultfd supports write-protect faults
-for hugetlbfs and shmem / tmpfs memory.
-.TP
-.BR UFFD_FEATURE_WP_UNPOPULATED " (since Linux 6.4)"
-If this feature bit is set,
-the kernel will handle anonymous memory the same way as file memory,
-by allowing the user to write-protect unpopulated page table entries.
-.TP
-.BR UFFD_FEATURE_POISON " (since Linux 6.6)"
-If this feature bit is set,
-the kernel supports resolving faults with the
-.B UFFDIO_POISON
-ioctl.
-.TP
-.BR UFFD_FEATURE_WP_ASYNC " (since Linux 6.7)"
-If this feature bit is set,
-the write protection faults would be asynchronously resolved
-by the kernel.
-.P
-The returned
-.I ioctls
-field can contain the following bits:
-.\" FIXME This user-space API seems not fully polished. Why are there
-.\" not constants defined for each of the bit-mask values listed below?
-.TP
-.B 1 << _UFFDIO_API
-The
-.B UFFDIO_API
-operation is supported.
-.TP
-.B 1 << _UFFDIO_REGISTER
-The
-.B UFFDIO_REGISTER
-operation is supported.
-.TP
-.B 1 << _UFFDIO_UNREGISTER
-The
-.B UFFDIO_UNREGISTER
-operation is supported.
-.P
-This
-.BR ioctl (2)
-operation returns 0 on success.
-On error, \-1 is returned and
-.I errno
-is set to indicate the error.
-If an error occurs,
-the kernel may zero the provided
-.I uffdio_api
-structure.
-The caller should treat its contents as unspecified,
-and reinitialize it before re-attempting another
-.B UFFDIO_API
-call.
-Possible errors include:
-.TP
-.B EFAULT
-.I argp
-refers to an address that is outside the calling process's
-accessible address space.
-.TP
-.B EINVAL
-The API version requested in the
-.I api
-field is not supported by this kernel, or the
-.I features
-field passed to the kernel includes feature bits that are not supported
-by the current kernel version.
-.TP
-.B EINVAL
-A previous
-.B UFFDIO_API
-call already enabled one or more features for this userfaultfd.
-Calling
-.B UFFDIO_API
-twice,
-the first time with no features set,
-is explicitly allowed
-as per the two-step feature detection handshake.
-.TP
-.B EPERM
-The
-.B UFFD_FEATURE_EVENT_FORK
-feature was enabled,
-but the calling process doesn't have the
-.B CAP_SYS_PTRACE
-capability.
-.SS UFFDIO_REGISTER
-(Since Linux 4.3.)
-Register a memory address range with the userfaultfd object.
-The pages in the range must be \[lq]compatible\[rq].
-Please refer to the list of register modes below
-for the compatible memory backends for each mode.
-.P
-The
-.I argp
-argument is a pointer to a
-.I uffdio_register
-structure, defined as:
-.P
-.in +4n
-.EX
-struct uffdio_range {
- __u64 start; /* Start of range */
- __u64 len; /* Length of range (bytes) */
-};
-\&
-struct uffdio_register {
- struct uffdio_range range;
- __u64 mode; /* Desired mode of operation (input) */
- __u64 ioctls; /* Available ioctl() operations (output) */
-};
-.EE
-.in
-.P
-The
-.I range
-field defines a memory range starting at
-.I start
-and continuing for
-.I len
-bytes that should be handled by the userfaultfd.
-.P
-The
-.I mode
-field defines the mode of operation desired for this memory region.
-The following values may be bitwise ORed to set the userfaultfd mode for
-the specified range:
-.TP
-.B UFFDIO_REGISTER_MODE_MISSING
-Track page faults on missing pages.
-Since Linux 4.3,
-only private anonymous ranges are compatible.
-Since Linux 4.11,
-hugetlbfs and shared memory ranges are also compatible.
-.TP
-.B UFFDIO_REGISTER_MODE_WP
-Track page faults on write-protected pages.
-Since Linux 5.7,
-only private anonymous ranges are compatible.
-.TP
-.B UFFDIO_REGISTER_MODE_MINOR
-Track minor page faults.
-Since Linux 5.13,
-only hugetlbfs ranges are compatible.
-Since Linux 5.14,
-compatibility with shmem ranges was added.
-.P
-If the operation is successful, the kernel modifies the
-.I ioctls
-bit-mask field to indicate which
-.BR ioctl (2)
-operations are available for the specified range.
-This returned bit mask can contain the following bits:
-.TP
-.B 1 << _UFFDIO_COPY
-The
-.B UFFDIO_COPY
-operation is supported.
-.TP
-.B 1 << _UFFDIO_WAKE
-The
-.B UFFDIO_WAKE
-operation is supported.
-.TP
-.B 1 << _UFFDIO_WRITEPROTECT
-The
-.B UFFDIO_WRITEPROTECT
-operation is supported.
-.TP
-.B 1 << _UFFDIO_ZEROPAGE
-The
-.B UFFDIO_ZEROPAGE
-operation is supported.
-.TP
-.B 1 << _UFFDIO_CONTINUE
-The
-.B UFFDIO_CONTINUE
-operation is supported.
-.TP
-.B 1 << _UFFDIO_POISON
-The
-.B UFFDIO_POISON
-operation is supported.
-.P
-This
-.BR ioctl (2)
-operation returns 0 on success.
-On error, \-1 is returned and
-.I errno
-is set to indicate the error.
-Possible errors include:
-.\" FIXME Is the following error list correct?
-.\"
-.TP
-.B EBUSY
-A mapping in the specified range is registered with another
-userfaultfd object.
-.TP
-.B EFAULT
-.I argp
-refers to an address that is outside the calling process's
-accessible address space.
-.TP
-.B EINVAL
-An invalid or unsupported bit was specified in the
-.I mode
-field; or the
-.I mode
-field was zero.
-.TP
-.B EINVAL
-There is no mapping in the specified address range.
-.TP
-.B EINVAL
-.I range.start
-or
-.I range.len
-is not a multiple of the system page size; or,
-.I range.len
-is zero; or these fields are otherwise invalid.
-.TP
-.B EINVAL
-There as an incompatible mapping in the specified address range.
-.\" Mike Rapoport:
-.\" ENOMEM if the process is exiting and the
-.\" mm_struct has gone by the time userfault grabs it.
-.SS UFFDIO_UNREGISTER
-(Since Linux 4.3.)
-Unregister a memory address range from userfaultfd.
-The pages in the range must be \[lq]compatible\[rq]
-(see the description of
-.BR UFFDIO_REGISTER .)
-.P
-The address range to unregister is specified in the
-.I uffdio_range
-structure pointed to by
-.IR argp .
-.P
-This
-.BR ioctl (2)
-operation returns 0 on success.
-On error, \-1 is returned and
-.I errno
-is set to indicate the error.
-Possible errors include:
-.TP
-.B EINVAL
-Either the
-.I start
-or the
-.I len
-field of the
-.I ufdio_range
-structure was not a multiple of the system page size; or the
-.I len
-field was zero; or these fields were otherwise invalid.
-.TP
-.B EINVAL
-There as an incompatible mapping in the specified address range.
-.TP
-.B EINVAL
-There was no mapping in the specified address range.
-.\"
-.SS UFFDIO_COPY
-(Since Linux 4.3.)
-Atomically copy a continuous memory chunk into the userfault registered
-range and optionally wake up the blocked thread.
-The source and destination addresses and the number of bytes to copy are
-specified by the
-.IR src ,
-.IR dst ,
-and
-.I len
-fields of the
-.I uffdio_copy
-structure pointed to by
-.IR argp :
-.P
-.in +4n
-.EX
-struct uffdio_copy {
- __u64 dst; /* Destination of copy */
- __u64 src; /* Source of copy */
- __u64 len; /* Number of bytes to copy */
- __u64 mode; /* Flags controlling behavior of copy */
- __s64 copy; /* Number of bytes copied, or negated error */
-};
-.EE
-.in
-.P
-The following value may be bitwise ORed in
-.I mode
-to change the behavior of the
-.B UFFDIO_COPY
-operation:
-.TP
-.B UFFDIO_COPY_MODE_DONTWAKE
-Do not wake up the thread that waits for page-fault resolution
-.TP
-.B UFFDIO_COPY_MODE_WP
-Copy the page with read-only permission.
-This allows the user to trap the next write to the page,
-which will block and generate another write-protect userfault message.
-This is used only when both
-.B UFFDIO_REGISTER_MODE_MISSING
-and
-.B UFFDIO_REGISTER_MODE_WP
-modes are enabled for the registered range.
-.P
-The
-.I copy
-field is used by the kernel to return the number of bytes
-that was actually copied, or an error (a negated
-.IR errno -style
-value).
-.\" FIXME Above: Why is the 'copy' field used to return error values?
-.\" This should be explained in the manual page.
-If the value returned in
-.I copy
-doesn't match the value that was specified in
-.IR len ,
-the operation fails with the error
-.BR EAGAIN .
-The
-.I copy
-field is output-only;
-it is not read by the
-.B UFFDIO_COPY
-operation.
-.P
-This
-.BR ioctl (2)
-operation returns 0 on success.
-In this case, the entire area was copied.
-On error, \-1 is returned and
-.I errno
-is set to indicate the error.
-Possible errors include:
-.TP
-.B EAGAIN
-The number of bytes copied (i.e., the value returned in the
-.I copy
-field)
-does not equal the value that was specified in the
-.I len
-field.
-.TP
-.B EINVAL
-Either
-.I dst
-or
-.I len
-was not a multiple of the system page size, or the range specified by
-.I src
-and
-.I len
-or
-.I dst
-and
-.I len
-was invalid.
-.TP
-.B EINVAL
-An invalid bit was specified in the
-.I mode
-field.
-.TP
-.BR ENOENT " (since Linux 4.11)"
-The faulting process has changed
-its virtual memory layout simultaneously with an outstanding
-.B UFFDIO_COPY
-operation.
-.TP
-.BR ENOSPC " (from Linux 4.11 until Linux 4.13)"
-The faulting process has exited at the time of a
-.B UFFDIO_COPY
-operation.
-.TP
-.BR ESRCH " (since Linux 4.13)"
-The faulting process has exited at the time of a
-.B UFFDIO_COPY
-operation.
-.\"
-.SS UFFDIO_ZEROPAGE
-(Since Linux 4.3.)
-Zero out a memory range registered with userfaultfd.
-.P
-The requested range is specified by the
-.I range
-field of the
-.I uffdio_zeropage
-structure pointed to by
-.IR argp :
-.P
-.in +4n
-.EX
-struct uffdio_zeropage {
- struct uffdio_range range;
- __u64 mode; /* Flags controlling behavior of copy */
- __s64 zeropage; /* Number of bytes zeroed, or negated error */
-};
-.EE
-.in
-.P
-The following value may be bitwise ORed in
-.I mode
-to change the behavior of the
-.B UFFDIO_ZEROPAGE
-operation:
-.TP
-.B UFFDIO_ZEROPAGE_MODE_DONTWAKE
-Do not wake up the thread that waits for page-fault resolution.
-.P
-The
-.I zeropage
-field is used by the kernel to return the number of bytes
-that was actually zeroed,
-or an error in the same manner as
-.BR UFFDIO_COPY .
-.\" FIXME Why is the 'zeropage' field used to return error values?
-.\" This should be explained in the manual page.
-If the value returned in the
-.I zeropage
-field doesn't match the value that was specified in
-.IR range.len ,
-the operation fails with the error
-.BR EAGAIN .
-The
-.I zeropage
-field is output-only;
-it is not read by the
-.B UFFDIO_ZEROPAGE
-operation.
-.P
-This
-.BR ioctl (2)
-operation returns 0 on success.
-In this case, the entire area was zeroed.
-On error, \-1 is returned and
-.I errno
-is set to indicate the error.
-Possible errors include:
-.TP
-.B EAGAIN
-The number of bytes zeroed (i.e., the value returned in the
-.I zeropage
-field)
-does not equal the value that was specified in the
-.I range.len
-field.
-.TP
-.B EINVAL
-Either
-.I range.start
-or
-.I range.len
-was not a multiple of the system page size; or
-.I range.len
-was zero; or the range specified was invalid.
-.TP
-.B EINVAL
-An invalid bit was specified in the
-.I mode
-field.
-.TP
-.BR ESRCH " (since Linux 4.13)"
-The faulting process has exited at the time of a
-.B UFFDIO_ZEROPAGE
-operation.
-.\"
-.SS UFFDIO_WAKE
-(Since Linux 4.3.)
-Wake up the thread waiting for page-fault resolution on
-a specified memory address range.
-.P
-The
-.B UFFDIO_WAKE
-operation is used in conjunction with
-.B UFFDIO_COPY
-and
-.B UFFDIO_ZEROPAGE
-operations that have the
-.B UFFDIO_COPY_MODE_DONTWAKE
-or
-.B UFFDIO_ZEROPAGE_MODE_DONTWAKE
-bit set in the
-.I mode
-field.
-The userfault monitor can perform several
-.B UFFDIO_COPY
-and
-.B UFFDIO_ZEROPAGE
-operations in a batch and then explicitly wake up the faulting thread using
-.BR UFFDIO_WAKE .
-.P
-The
-.I argp
-argument is a pointer to a
-.I uffdio_range
-structure (shown above) that specifies the address range.
-.P
-This
-.BR ioctl (2)
-operation returns 0 on success.
-On error, \-1 is returned and
-.I errno
-is set to indicate the error.
-Possible errors include:
-.TP
-.B EINVAL
-The
-.I start
-or the
-.I len
-field of the
-.I ufdio_range
-structure was not a multiple of the system page size; or
-.I len
-was zero; or the specified range was otherwise invalid.
-.SS UFFDIO_WRITEPROTECT
-(Since Linux 5.7.)
-Write-protect or write-unprotect a userfaultfd-registered memory range
-registered with mode
-.BR UFFDIO_REGISTER_MODE_WP .
-.P
-The
-.I argp
-argument is a pointer to a
-.I uffdio_range
-structure as shown below:
-.P
-.in +4n
-.EX
-struct uffdio_writeprotect {
- struct uffdio_range range; /* Range to change write permission*/
- __u64 mode; /* Mode to change write permission */
-};
-.EE
-.in
-.P
-There are two mode bits that are supported in this structure:
-.TP
-.B UFFDIO_WRITEPROTECT_MODE_WP
-When this mode bit is set,
-the ioctl will be a write-protect operation upon the memory range specified by
-.IR range .
-Otherwise it will be a write-unprotect operation upon the specified range,
-which can be used to resolve a userfaultfd write-protect page fault.
-.TP
-.B UFFDIO_WRITEPROTECT_MODE_DONTWAKE
-When this mode bit is set,
-do not wake up any thread that waits for
-page-fault resolution after the operation.
-This can be specified only if
-.B UFFDIO_WRITEPROTECT_MODE_WP
-is not specified.
-.P
-This
-.BR ioctl (2)
-operation returns 0 on success.
-On error, \-1 is returned and
-.I errno
-is set to indicate the error.
-Possible errors include:
-.TP
-.B EINVAL
-The
-.I start
-or the
-.I len
-field of the
-.I ufdio_range
-structure was not a multiple of the system page size; or
-.I len
-was zero; or the specified range was otherwise invalid.
-.TP
-.B EAGAIN
-The process was interrupted; retry this call.
-.TP
-.B ENOENT
-The range specified in
-.I range
-is not valid.
-For example, the virtual address does not exist,
-or not registered with userfaultfd write-protect mode.
-.TP
-.B EFAULT
-Encountered a generic fault during processing.
-.\"
-.SS UFFDIO_CONTINUE
-(Since Linux 5.13.)
-Resolve a minor page fault
-by installing page table entries
-for existing pages in the page cache.
-.P
-The
-.I argp
-argument is a pointer to a
-.I uffdio_continue
-structure as shown below:
-.P
-.in +4n
-.EX
-struct uffdio_continue {
- struct uffdio_range range;
- /* Range to install PTEs for and continue */
- __u64 mode; /* Flags controlling the behavior of continue */
- __s64 mapped; /* Number of bytes mapped, or negated error */
-};
-.EE
-.in
-.P
-The following value may be bitwise ORed in
-.I mode
-to change the behavior of the
-.B UFFDIO_CONTINUE
-operation:
-.TP
-.B UFFDIO_CONTINUE_MODE_DONTWAKE
-Do not wake up the thread that waits for page-fault resolution.
-.P
-The
-.I mapped
-field is used by the kernel
-to return the number of bytes that were actually mapped,
-or an error in the same manner as
-.BR UFFDIO_COPY .
-If the value returned in the
-.I mapped
-field doesn't match the value that was specified in
-.IR range.len ,
-the operation fails with the error
-.BR EAGAIN .
-The
-.I mapped
-field is output-only;
-it is not read by the
-.B UFFDIO_CONTINUE
-operation.
-.P
-This
-.BR ioctl (2)
-operation returns 0 on success.
-In this case,
-the entire area was mapped.
-On error, \-1 is returned and
-.I errno
-is set to indicate the error.
-Possible errors include:
-.TP
-.B EAGAIN
-The number of bytes mapped
-(i.e., the value returned in the
-.I mapped
-field)
-does not equal the value that was specified in the
-.I range.len
-field.
-.TP
-.B EEXIST
-One or more pages were already mapped in the given range.
-.TP
-.B EFAULT
-No existing page could be found in the page cache for the given range.
-.TP
-.B EINVAL
-Either
-.I range.start
-or
-.I range.len
-was not a multiple of the system page size; or
-.I range.len
-was zero; or the range specified was invalid.
-.TP
-.B EINVAL
-An invalid bit was specified in the
-.I mode
-field.
-.TP
-.B ENOENT
-The faulting process has changed its virtual memory layout simultaneously with
-an outstanding
-.B UFFDIO_CONTINUE
-operation.
-.TP
-.B ENOMEM
-Allocating memory needed to setup the page table mappings failed.
-.TP
-.B ESRCH
-The faulting process has exited at the time of a
-.B UFFDIO_CONTINUE
-operation.
-.\"
-.SS UFFDIO_POISON
-(Since Linux 6.6.)
-Mark an address range as "poisoned".
-Future accesses to these addresses will raise a
-.B SIGBUS
-signal.
-Unlike
-.B MADV_HWPOISON
-this works by installing page table entries,
-rather than "really" poisoning the underlying physical pages.
-This means it only affects this particular address space.
-.P
-The
-.I argp
-argument is a pointer to a
-.I uffdio_poison
-structure as shown below:
-.P
-.in +4n
-.EX
-struct uffdio_poison {
- struct uffdio_range range;
- /* Range to install poison PTE markers in */
- __u64 mode; /* Flags controlling the behavior of poison */
- __s64 updated; /* Number of bytes poisoned, or negated error */
-};
-.EE
-.in
-.P
-The following value may be bitwise ORed in
-.I mode
-to change the behavior of the
-.B UFFDIO_POISON
-operation:
-.TP
-.B UFFDIO_POISON_MODE_DONTWAKE
-Do not wake up the thread that waits for page-fault resolution.
-.P
-The
-.I updated
-field is used by the kernel
-to return the number of bytes that were actually poisoned,
-or an error in the same manner as
-.BR UFFDIO_COPY .
-If the value returned in the
-.I updated
-field doesn't match the value that was specified in
-.IR range.len ,
-the operation fails with the error
-.BR EAGAIN .
-The
-.I updated
-field is output-only;
-it is not read by the
-.B UFFDIO_POISON
-operation.
-.P
-This
-.BR ioctl (2)
-operation returns 0 on success.
-In this case,
-the entire area was poisoned.
-On error, \-1 is returned and
-.I errno
-is set to indicate the error.
-Possible errors include:
-.TP
-.B EAGAIN
-The number of bytes mapped
-(i.e., the value returned in the
-.I updated
-field)
-does not equal the value that was specified in the
-.I range.len
-field.
-.TP
-.B EINVAL
-Either
-.I range.start
-or
-.I range.len
-was not a multiple of the system page size; or
-.I range.len
-was zero; or the range specified was invalid.
-.TP
-.B EINVAL
-An invalid bit was specified in the
-.I mode
-field.
-.TP
-.B EEXIST
-One or more pages were already mapped in the given range.
-.TP
-.B ENOENT
-The faulting process has changed its virtual memory layout simultaneously with
-an outstanding
-.B UFFDIO_POISON
-operation.
-.TP
-.B ENOMEM
-Allocating memory for page table entries failed.
-.TP
-.B ESRCH
-The faulting process has exited at the time of a
-.B UFFDIO_POISON
-operation.
-.\"
-.SH RETURN VALUE
-See descriptions of the individual operations, above.
-.SH ERRORS
-See descriptions of the individual operations, above.
-In addition, the following general errors can occur for all of the
-operations described above:
-.TP
-.B EFAULT
-.I argp
-does not point to a valid memory address.
-.TP
-.B EINVAL
-(For all operations except
-.BR UFFDIO_API .)
-The userfaultfd object has not yet been enabled (via the
-.B UFFDIO_API
-operation).
-.SH STANDARDS
-Linux.
-.SH BUGS
-In order to detect available userfault features and
-enable some subset of those features
-the userfaultfd file descriptor must be closed after the first
-.B UFFDIO_API
-operation that queries features availability and reopened before
-the second
-.B UFFDIO_API
-operation that actually enables the desired features.
-.SH EXAMPLES
-See
-.BR userfaultfd (2).
-.SH SEE ALSO
-.BR ioctl (2),
-.BR mmap (2),
-.BR userfaultfd (2)
-.P
-.I Documentation/admin\-guide/mm/userfaultfd.rst
-in the Linux kernel source tree
diff --git a/man2/ioperm.2 b/man2/ioperm.2
deleted file mode 100644
index a2cde4721..000000000
--- a/man2/ioperm.2
+++ /dev/null
@@ -1,105 +0,0 @@
-.\" Copyright (c) 1993 Michael Haardt
-.\" (michael@moria.de)
-.\" Fri Apr 2 11:32:09 MET DST 1993
-.\"
-.\" SPDX-License-Identifier: GPL-2.0-or-later
-.\"
-.\" Modified Sat Jul 24 15:12:05 1993 by Rik Faith <faith@cs.unc.edu>
-.\" Modified Tue Aug 1 16:27 1995 by Jochen Karrer
-.\" <cip307@cip.physik.uni-wuerzburg.de>
-.\" Modified Tue Oct 22 08:11:14 EDT 1996 by Eric S. Raymond <esr@thyrsus.com>
-.\" Modified Mon Feb 15 17:28:41 CET 1999 by Andries E. Brouwer <aeb@cwi.nl>
-.\" Modified, 27 May 2004, Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Added notes on capability requirements
-.\"
-.TH ioperm 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-ioperm \- set port input/output permissions
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/io.h>
-.P
-.BI "int ioperm(unsigned long " from ", unsigned long " num ", int " turn_on );
-.fi
-.SH DESCRIPTION
-.BR ioperm ()
-sets the port access permission bits for the calling thread for
-.I num
-bits starting from port address
-.IR from .
-If
-.I turn_on
-is nonzero, then permission for the specified bits is enabled;
-otherwise it is disabled.
-If
-.I turn_on
-is nonzero, the calling thread must be privileged
-.RB ( CAP_SYS_RAWIO ).
-.P
-Before Linux 2.6.8,
-only the first 0x3ff I/O ports could be specified in this manner.
-For more ports, the
-.BR iopl (2)
-system call had to be used (with a
-.I level
-argument of 3).
-Since Linux 2.6.8, 65,536 I/O ports can be specified.
-.P
-Permissions are inherited by the child created by
-.BR fork (2)
-(but see NOTES).
-Permissions are preserved across
-.BR execve (2);
-this is useful for giving port access permissions to unprivileged
-programs.
-.P
-This call is mostly for the i386 architecture.
-On many other architectures it does not exist or will always
-return an error.
-.SH RETURN VALUE
-On success, zero is returned.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EINVAL
-Invalid values for
-.I from
-or
-.IR num .
-.TP
-.B EIO
-(on PowerPC) This call is not supported.
-.TP
-.B ENOMEM
-.\" Could not allocate I/O bitmap.
-Out of memory.
-.TP
-.B EPERM
-The calling thread has insufficient privilege.
-.SH VERSIONS
-glibc has an
-.BR ioperm ()
-prototype both in
-.I <sys/io.h>
-and in
-.IR <sys/perm.h> .
-Avoid the latter, it is available on i386 only.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Before Linux 2.4,
-permissions were not inherited by a child created by
-.BR fork (2).
-.SH NOTES
-The
-.I /proc/ioports
-file shows the I/O ports that are currently allocated on the system.
-.SH SEE ALSO
-.BR iopl (2),
-.BR outb (2),
-.BR capabilities (7)
diff --git a/man2/iopl.2 b/man2/iopl.2
deleted file mode 100644
index 8279336e3..000000000
--- a/man2/iopl.2
+++ /dev/null
@@ -1,92 +0,0 @@
-.\" Copyright 1993 Rickard E. Faith (faith@cs.unc.edu)
-.\" Portions extracted from linux/kernel/ioport.c (no copyright notice).
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Modified Tue Aug 1 16:47 1995 by Jochen Karrer
-.\" <cip307@cip.physik.uni-wuerzburg.de>
-.\" Modified Tue Oct 22 08:11:14 EDT 1996 by Eric S. Raymond <esr@thyrsus.com>
-.\" Modified Fri Nov 27 14:50:36 CET 1998 by Andries Brouwer <aeb@cwi.nl>
-.\" Modified, 27 May 2004, Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Added notes on capability requirements
-.\"
-.TH iopl 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-iopl \- change I/O privilege level
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/io.h>
-.P
-.BI "[[deprecated]] int iopl(int " level );
-.fi
-.SH DESCRIPTION
-.BR iopl ()
-changes the I/O privilege level of the calling thread,
-as specified by the two least significant bits in
-.IR level .
-.P
-The I/O privilege level for a normal thread is 0.
-Permissions are inherited from parents to children.
-.P
-This call is deprecated, is significantly slower than
-.BR ioperm (2),
-and is only provided for older X servers which require
-access to all 65536 I/O ports.
-It is mostly for the i386 architecture.
-On many other architectures it does not exist or will always
-return an error.
-.SH RETURN VALUE
-On success, zero is returned.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EINVAL
-.I level
-is greater than 3.
-.TP
-.B ENOSYS
-This call is unimplemented.
-.TP
-.B EPERM
-The calling thread has insufficient privilege to call
-.BR iopl ();
-the
-.B CAP_SYS_RAWIO
-capability is required to raise the I/O privilege level
-above its current value.
-.SH VERSIONS
-.\" Libc5 treats it as a system call and has a prototype in
-.\" .IR <unistd.h> .
-.\" glibc1 does not have a prototype.
-glibc2 has a prototype both in
-.I <sys/io.h>
-and in
-.IR <sys/perm.h> .
-Avoid the latter, it is available on i386 only.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Prior to Linux 5.5
-.BR iopl ()
-allowed the thread to disable interrupts while running
-at a higher I/O privilege level.
-This will probably crash the system, and is not recommended.
-.P
-Prior to Linux 3.7,
-on some architectures (such as i386), permissions
-.I were
-inherited by the child produced by
-.BR fork (2)
-and were preserved across
-.BR execve (2).
-This behavior was inadvertently changed in Linux 3.7,
-and won't be reinstated.
-.SH SEE ALSO
-.BR ioperm (2),
-.BR outb (2),
-.BR capabilities (7)
diff --git a/man2/ioprio_get.2 b/man2/ioprio_get.2
deleted file mode 100644
index d6d5b3b68..000000000
--- a/man2/ioprio_get.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/ioprio_set.2
diff --git a/man2/ioprio_set.2 b/man2/ioprio_set.2
deleted file mode 100644
index b4b9fe080..000000000
--- a/man2/ioprio_set.2
+++ /dev/null
@@ -1,362 +0,0 @@
-.\" Copyright (c) International Business Machines orp., 2006
-.\"
-.\" SPDX-License-Identifier: GPL-2.0-or-later
-.\"
-.\" HISTORY:
-.\" 2006-04-27, created by Eduardo M. Fleury <efleury@br.ibm.com>
-.\" with various additions by Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\"
-.TH ioprio_set 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-ioprio_get, ioprio_set \- get/set I/O scheduling class and priority
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#include <linux/ioprio.h> " "/* Definition of " IOPRIO_* " constants */"
-.BR "#include <sys/syscall.h> " "/* Definition of " SYS_* " constants */"
-.B #include <unistd.h>
-.P
-.BI "int syscall(SYS_ioprio_get, int " which ", int " who );
-.BI "int syscall(SYS_ioprio_set, int " which ", int " who ", int " ioprio );
-.fi
-.P
-.IR Note :
-glibc provides no wrappers for these system calls,
-necessitating the use of
-.BR syscall (2).
-.SH DESCRIPTION
-The
-.BR ioprio_get ()
-and
-.BR ioprio_set ()
-system calls get and set the I/O scheduling class and
-priority of one or more threads.
-.P
-The
-.I which
-and
-.I who
-arguments identify the thread(s) on which the system
-calls operate.
-The
-.I which
-argument determines how
-.I who
-is interpreted, and has one of the following values:
-.TP
-.B IOPRIO_WHO_PROCESS
-.I who
-is a process ID or thread ID identifying a single process or thread.
-If
-.I who
-is 0, then operate on the calling thread.
-.TP
-.B IOPRIO_WHO_PGRP
-.I who
-is a process group ID identifying all the members of a process group.
-If
-.I who
-is 0, then operate on the process group of which the caller is a member.
-.TP
-.B IOPRIO_WHO_USER
-.I who
-is a user ID identifying all of the processes that
-have a matching real UID.
-.\" FIXME . Need to document the behavior when 'who" is specified as 0
-.\" See http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=652443
-.P
-If
-.I which
-is specified as
-.B IOPRIO_WHO_PGRP
-or
-.B IOPRIO_WHO_USER
-when calling
-.BR ioprio_get (),
-and more than one process matches
-.IR who ,
-then the returned priority will be the highest one found among
-all of the matching processes.
-One priority is said to be
-higher than another one if it belongs to a higher priority
-class
-.RB ( IOPRIO_CLASS_RT
-is the highest priority class;
-.B IOPRIO_CLASS_IDLE
-is the lowest)
-or if it belongs to the same priority class as the other process but
-has a higher priority level (a lower priority number means a
-higher priority level).
-.P
-The
-.I ioprio
-argument given to
-.BR ioprio_set ()
-is a bit mask that specifies both the scheduling class and the
-priority to be assigned to the target process(es).
-The following macros are used for assembling and dissecting
-.I ioprio
-values:
-.TP
-.BI IOPRIO_PRIO_VALUE( class ", " data )
-Given a scheduling
-.I class
-and priority
-.RI ( data ),
-this macro combines the two values to produce an
-.I ioprio
-value, which is returned as the result of the macro.
-.TP
-.BI IOPRIO_PRIO_CLASS( mask )
-Given
-.I mask
-(an
-.I ioprio
-value), this macro returns its I/O class component, that is,
-one of the values
-.BR IOPRIO_CLASS_RT ,
-.BR IOPRIO_CLASS_BE ,
-or
-.BR IOPRIO_CLASS_IDLE .
-.TP
-.BI IOPRIO_PRIO_DATA( mask )
-Given
-.I mask
-(an
-.I ioprio
-value), this macro returns its priority
-.RI ( data )
-component.
-.P
-See the NOTES section for more
-information on scheduling classes and priorities,
-as well as the meaning of specifying
-.I ioprio
-as 0.
-.P
-I/O priorities are supported for reads and for synchronous
-.RB ( O_DIRECT ,
-.BR O_SYNC )
-writes.
-I/O priorities are not supported for asynchronous
-writes because they are issued outside the context of the program
-dirtying the memory, and thus program-specific priorities do not apply.
-.SH RETURN VALUE
-On success,
-.BR ioprio_get ()
-returns the
-.I ioprio
-value of the process with highest I/O priority of any of the processes
-that match the criteria specified in
-.I which
-and
-.IR who .
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.P
-On success,
-.BR ioprio_set ()
-returns 0.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EINVAL
-Invalid value for
-.I which
-or
-.IR ioprio .
-Refer to the NOTES section for available scheduler
-classes and priority levels for
-.IR ioprio .
-.TP
-.B EPERM
-The calling process does not have the privilege needed to assign this
-.I ioprio
-to the specified process(es).
-See the NOTES section for more information on required
-privileges for
-.BR ioprio_set ().
-.TP
-.B ESRCH
-No process(es) could be found that matched the specification in
-.I which
-and
-.IR who .
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 2.6.13.
-.SH NOTES
-Two or more processes or threads can share an I/O context.
-This will be the case when
-.BR clone (2)
-was called with the
-.B CLONE_IO
-flag.
-However, by default, the distinct threads of a process will
-.B not
-share the same I/O context.
-This means that if you want to change the I/O
-priority of all threads in a process, you may need to call
-.BR ioprio_set ()
-on each of the threads.
-The thread ID that you would need for this operation
-is the one that is returned by
-.BR gettid (2)
-or
-.BR clone (2).
-.P
-These system calls have an effect only when used
-in conjunction with an I/O scheduler that supports I/O priorities.
-As at kernel 2.6.17 the only such scheduler is the Completely Fair Queuing
-(CFQ) I/O scheduler.
-.P
-If no I/O scheduler has been set for a thread,
-then by default the I/O priority will follow the CPU nice value
-.RB ( setpriority (2)).
-Before Linux 2.6.24,
-once an I/O priority had been set using
-.BR ioprio_set (),
-there was no way to reset the I/O scheduling behavior to the default.
-Since Linux 2.6.24,
-.\" commit 8ec680e4c3ec818efd1652f15199ed1c216ab550
-specifying
-.I ioprio
-as 0 can be used to reset to the default I/O scheduling behavior.
-.SS Selecting an I/O scheduler
-I/O schedulers are selected on a per-device basis via the special
-file
-.IR /sys/block/ device /queue/scheduler .
-.P
-One can view the current I/O scheduler via the
-.I /sys
-filesystem.
-For example, the following command
-displays a list of all schedulers currently loaded in the kernel:
-.P
-.in +4n
-.EX
-.RB "$" " cat /sys/block/sda/queue/scheduler"
-noop anticipatory deadline [cfq]
-.EE
-.in
-.P
-The scheduler surrounded by brackets is the one actually
-in use for the device
-.RI ( sda
-in the example).
-Setting another scheduler is done by writing the name of the
-new scheduler to this file.
-For example, the following command will set the
-scheduler for the
-.I sda
-device to
-.IR cfq :
-.P
-.in +4n
-.EX
-.RB "$" " su"
-Password:
-.RB "#" " echo cfq > /sys/block/sda/queue/scheduler"
-.EE
-.in
-.\"
-.SS The Completely Fair Queuing (CFQ) I/O scheduler
-Since version 3 (also known as CFQ Time Sliced), CFQ implements
-I/O nice levels similar to those
-of CPU scheduling.
-These nice levels are grouped into three scheduling classes,
-each one containing one or more priority levels:
-.TP
-.BR IOPRIO_CLASS_RT " (1)"
-This is the real-time I/O class.
-This scheduling class is given
-higher priority than any other class:
-processes from this class are
-given first access to the disk every time.
-Thus, this I/O class needs to be used with some
-care: one I/O real-time process can starve the entire system.
-Within the real-time class,
-there are 8 levels of class data (priority) that determine exactly
-how much time this process needs the disk for on each service.
-The highest real-time priority level is 0; the lowest is 7.
-In the future, this might change to be more directly mappable to
-performance, by passing in a desired data rate instead.
-.TP
-.BR IOPRIO_CLASS_BE " (2)"
-This is the best-effort scheduling class,
-which is the default for any process
-that hasn't set a specific I/O priority.
-The class data (priority) determines how much
-I/O bandwidth the process will get.
-Best-effort priority levels are analogous to CPU nice values
-(see
-.BR getpriority (2)).
-The priority level determines a priority relative
-to other processes in the best-effort scheduling class.
-Priority levels range from 0 (highest) to 7 (lowest).
-.TP
-.BR IOPRIO_CLASS_IDLE " (3)"
-This is the idle scheduling class.
-Processes running at this level get I/O
-time only when no one else needs the disk.
-The idle class has no class data.
-Attention is required when assigning this priority class to a process,
-since it may become starved if higher priority processes are
-constantly accessing the disk.
-.P
-Refer to the kernel source file
-.I Documentation/block/ioprio.txt
-for more information on the CFQ I/O Scheduler and an example program.
-.SS Required permissions to set I/O priorities
-Permission to change a process's priority is granted or denied based
-on two criteria:
-.TP
-.B "Process ownership"
-An unprivileged process may set the I/O priority only for a process
-whose real UID
-matches the real or effective UID of the calling process.
-A process which has the
-.B CAP_SYS_NICE
-capability can change the priority of any process.
-.TP
-.B "What is the desired priority"
-Attempts to set very high priorities
-.RB ( IOPRIO_CLASS_RT )
-require the
-.B CAP_SYS_ADMIN
-capability.
-Up to Linux 2.6.24 also required
-.B CAP_SYS_ADMIN
-to set a very low priority
-.RB ( IOPRIO_CLASS_IDLE ),
-but since Linux 2.6.25, this is no longer required.
-.P
-A call to
-.BR ioprio_set ()
-must follow both rules, or the call will fail with the error
-.BR EPERM .
-.SH BUGS
-.\" 6 May 07: Bug report raised:
-.\" https://www.sourceware.org/bugzilla/show_bug.cgi?id=4464
-.\" Ulrich Drepper replied that he wasn't going to add these
-.\" to glibc.
-glibc does not yet provide a suitable header file defining
-the function prototypes and macros described on this page.
-Suitable definitions can be found in
-.IR linux/ioprio.h .
-.SH SEE ALSO
-.BR ionice (1),
-.BR getpriority (2),
-.BR open (2),
-.BR capabilities (7),
-.BR cgroups (7)
-.P
-.I Documentation/block/ioprio.txt
-in the Linux kernel source tree
diff --git a/man2/ipc.2 b/man2/ipc.2
deleted file mode 100644
index 91eb1ac50..000000000
--- a/man2/ipc.2
+++ /dev/null
@@ -1,63 +0,0 @@
-.\" Copyright (c) 1995 Michael Chastain (mec@shell.portal.com), 15 April 1995.
-.\"
-.\" SPDX-License-Identifier: GPL-2.0-or-later
-.\"
-.\" Modified Tue Oct 22 08:11:14 EDT 1996 by Eric S. Raymond <esr@thyrsus.com>
-.TH ipc 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-ipc \- System V IPC system calls
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#include <linux/ipc.h>" " /* Definition of needed constants */"
-.BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */"
-.B #include <unistd.h>
-.P
-.BI "int syscall(SYS_ipc, unsigned int " call ", int " first ,
-.BI " unsigned long " second ", unsigned long " third \
-", void *" ptr ,
-.BI " long " fifth );
-.fi
-.P
-.IR Note :
-glibc provides no wrapper for
-.BR ipc (),
-necessitating the use of
-.BR syscall (2).
-.SH DESCRIPTION
-.BR ipc ()
-is a common kernel entry point for the System\ V IPC calls
-for messages, semaphores, and shared memory.
-.I call
-determines which IPC function to invoke;
-the other arguments are passed through to the appropriate call.
-.P
-User-space programs should call the appropriate functions by their usual names.
-Only standard library implementors and kernel hackers need to know about
-.BR ipc ().
-.SH VERSIONS
-On some architectures\[em]for example x86-64 and ARM\[em]there is no
-.BR ipc ()
-system call; instead,
-.BR msgctl (2),
-.BR semctl (2),
-.BR shmctl (2),
-and so on really are implemented as separate system calls.
-.SH STANDARDS
-Linux.
-.SH SEE ALSO
-.BR msgctl (2),
-.BR msgget (2),
-.BR msgrcv (2),
-.BR msgsnd (2),
-.BR semctl (2),
-.BR semget (2),
-.BR semop (2),
-.BR semtimedop (2),
-.BR shmat (2),
-.BR shmctl (2),
-.BR shmdt (2),
-.BR shmget (2),
-.BR sysvipc (7)
diff --git a/man2/isastream.2 b/man2/isastream.2
deleted file mode 100644
index 5d25ea6ce..000000000
--- a/man2/isastream.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/unimplemented.2
diff --git a/man2/kcmp.2 b/man2/kcmp.2
deleted file mode 100644
index 0ea604eac..000000000
--- a/man2/kcmp.2
+++ /dev/null
@@ -1,420 +0,0 @@
-.\" Copyright (C) 2012, Cyrill Gorcunov <gorcunov@openvz.org>
-.\" and Copyright (C) 2012, 2016, Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Kernel commit d97b46a64674a267bc41c9e16132ee2a98c3347d
-.\"
-.TH kcmp 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-kcmp \- compare two processes to determine if they share a kernel resource
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#include <linux/kcmp.h>" " /* Definition of " KCMP_* " constants */"
-.BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */"
-.B #include <unistd.h>
-.P
-.BI "int syscall(SYS_kcmp, pid_t " pid1 ", pid_t " pid2 ", int " type ,
-.BI " unsigned long " idx1 ", unsigned long " idx2 );
-.fi
-.P
-.IR Note :
-glibc provides no wrapper for
-.BR kcmp (),
-necessitating the use of
-.BR syscall (2).
-.SH DESCRIPTION
-The
-.BR kcmp ()
-system call can be used to check whether the two processes identified by
-.I pid1
-and
-.I pid2
-share a kernel resource such as virtual memory, file descriptors,
-and so on.
-.P
-Permission to employ
-.BR kcmp ()
-is governed by ptrace access mode
-.B PTRACE_MODE_READ_REALCREDS
-checks against both
-.I pid1
-and
-.IR pid2 ;
-see
-.BR ptrace (2).
-.P
-The
-.I type
-argument specifies which resource is to be compared in the two processes.
-It has one of the following values:
-.TP
-.B KCMP_FILE
-Check whether a file descriptor
-.I idx1
-in the process
-.I pid1
-refers to the same open file description (see
-.BR open (2))
-as file descriptor
-.I idx2
-in the process
-.IR pid2 .
-The existence of two file descriptors that refer to the same
-open file description can occur as a result of
-.BR dup (2)
-(and similar)
-.BR fork (2),
-or passing file descriptors via a domain socket (see
-.BR unix (7)).
-.TP
-.B KCMP_FILES
-Check whether the processes share the same set of open file descriptors.
-The arguments
-.I idx1
-and
-.I idx2
-are ignored.
-See the discussion of the
-.B CLONE_FILES
-flag in
-.BR clone (2).
-.TP
-.B KCMP_FS
-Check whether the processes share the same filesystem information
-(i.e., file mode creation mask, working directory, and filesystem root).
-The arguments
-.I idx1
-and
-.I idx2
-are ignored.
-See the discussion of the
-.B CLONE_FS
-flag in
-.BR clone (2).
-.TP
-.B KCMP_IO
-Check whether the processes share I/O context.
-The arguments
-.I idx1
-and
-.I idx2
-are ignored.
-See the discussion of the
-.B CLONE_IO
-flag in
-.BR clone (2).
-.TP
-.B KCMP_SIGHAND
-Check whether the processes share the same table of signal dispositions.
-The arguments
-.I idx1
-and
-.I idx2
-are ignored.
-See the discussion of the
-.B CLONE_SIGHAND
-flag in
-.BR clone (2).
-.TP
-.B KCMP_SYSVSEM
-Check whether the processes share the same
-list of System\ V semaphore undo operations.
-The arguments
-.I idx1
-and
-.I idx2
-are ignored.
-See the discussion of the
-.B CLONE_SYSVSEM
-flag in
-.BR clone (2).
-.TP
-.B KCMP_VM
-Check whether the processes share the same address space.
-The arguments
-.I idx1
-and
-.I idx2
-are ignored.
-See the discussion of the
-.B CLONE_VM
-flag in
-.BR clone (2).
-.TP
-.BR KCMP_EPOLL_TFD " (since Linux 4.13)"
-.\" commit 0791e3644e5ef21646fe565b9061788d05ec71d4
-Check whether the file descriptor
-.I idx1
-of the process
-.I pid1
-is present in the
-.BR epoll (7)
-instance described by
-.I idx2
-of the process
-.IR pid2 .
-The argument
-.I idx2
-is a pointer to a structure where the target file is described.
-This structure has the form:
-.P
-.in +4n
-.EX
-struct kcmp_epoll_slot {
- __u32 efd;
- __u32 tfd;
- __u64 toff;
-};
-.EE
-.in
-.P
-Within this structure,
-.I efd
-is an epoll file descriptor returned from
-.BR epoll_create (2),
-.I tfd
-is a target file descriptor number, and
-.I toff
-is a target file offset counted from zero.
-Several different targets may be registered with
-the same file descriptor number and setting a specific
-offset helps to investigate each of them.
-.P
-Note the
-.BR kcmp ()
-is not protected against false positives which may occur if
-the processes are currently running.
-One should stop the processes by sending
-.B SIGSTOP
-(see
-.BR signal (7))
-prior to inspection with this system call to obtain meaningful results.
-.SH RETURN VALUE
-The return value of a successful call to
-.BR kcmp ()
-is simply the result of arithmetic comparison
-of kernel pointers (when the kernel compares resources, it uses their
-memory addresses).
-.P
-The easiest way to explain is to consider an example.
-Suppose that
-.I v1
-and
-.I v2
-are the addresses of appropriate resources, then the return value
-is one of the following:
-.RS
-.TP
-.B 0
-.I v1
-is equal to
-.IR v2 ;
-in other words, the two processes share the resource.
-.TP
-.B 1
-.I v1
-is less than
-.IR v2 .
-.TP
-.B 2
-.I v1
-is greater than
-.IR v2 .
-.TP
-.B 3
-.I v1
-is not equal to
-.IR v2 ,
-but ordering information is unavailable.
-.RE
-.P
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.P
-.BR kcmp ()
-was designed to return values suitable for sorting.
-This is particularly handy if one needs to compare
-a large number of file descriptors.
-.SH ERRORS
-.TP
-.B EBADF
-.I type
-is
-.B KCMP_FILE
-and
-.I fd1
-or
-.I fd2
-is not an open file descriptor.
-.TP
-.B EFAULT
-The epoll slot addressed by
-.I idx2
-is outside of the user's address space.
-.TP
-.B EINVAL
-.I type
-is invalid.
-.TP
-.B ENOENT
-The target file is not present in
-.BR epoll (7)
-instance.
-.TP
-.B EPERM
-Insufficient permission to inspect process resources.
-The
-.B CAP_SYS_PTRACE
-capability is required to inspect processes that you do not own.
-Other ptrace limitations may also apply, such as
-.BR CONFIG_SECURITY_YAMA ,
-which, when
-.I /proc/sys/kernel/yama/ptrace_scope
-is 2, limits
-.BR kcmp ()
-to child processes;
-see
-.BR ptrace (2).
-.TP
-.B ESRCH
-Process
-.I pid1
-or
-.I pid2
-does not exist.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 3.5.
-.P
-Before Linux 5.12,
-this system call is available only if the kernel is configured with
-.BR CONFIG_CHECKPOINT_RESTORE ,
-since the original purpose of the system call was for the
-checkpoint/restore in user space (CRIU) feature.
-(The alternative to this system call would have been to expose suitable
-process information via the
-.BR proc (5)
-filesystem; this was deemed to be unsuitable for security reasons.)
-Since Linux 5.12,
-this system call is also available if the kernel is configured with
-.BR CONFIG_KCMP .
-.SH NOTES
-See
-.BR clone (2)
-for some background information on the shared resources
-referred to on this page.
-.SH EXAMPLES
-The program below uses
-.BR kcmp ()
-to test whether pairs of file descriptors refer to
-the same open file description.
-The program tests different cases for the file descriptor pairs,
-as described in the program output.
-An example run of the program is as follows:
-.P
-.in +4n
-.EX
-$ \fB./a.out\fP
-Parent PID is 1144
-Parent opened file on FD 3
-\&
-PID of child of fork() is 1145
- Compare duplicate FDs from different processes:
- kcmp(1145, 1144, KCMP_FILE, 3, 3) ==> same
-Child opened file on FD 4
- Compare FDs from distinct open()s in same process:
- kcmp(1145, 1145, KCMP_FILE, 3, 4) ==> different
-Child duplicated FD 3 to create FD 5
- Compare duplicated FDs in same process:
- kcmp(1145, 1145, KCMP_FILE, 3, 5) ==> same
-.EE
-.in
-.SS Program source
-\&
-.\" SRC BEGIN (kcmp.c)
-.EX
-#define _GNU_SOURCE
-#include <err.h>
-#include <fcntl.h>
-#include <linux/kcmp.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/syscall.h>
-#include <sys/wait.h>
-#include <unistd.h>
-\&
-static int
-kcmp(pid_t pid1, pid_t pid2, int type,
- unsigned long idx1, unsigned long idx2)
-{
- return syscall(SYS_kcmp, pid1, pid2, type, idx1, idx2);
-}
-\&
-static void
-test_kcmp(char *msg, pid_t pid1, pid_t pid2, int fd_a, int fd_b)
-{
- printf("\et%s\en", msg);
- printf("\et\etkcmp(%jd, %jd, KCMP_FILE, %d, %d) ==> %s\en",
- (intmax_t) pid1, (intmax_t) pid2, fd_a, fd_b,
- (kcmp(pid1, pid2, KCMP_FILE, fd_a, fd_b) == 0) ?
- "same" : "different");
-}
-\&
-int
-main(void)
-{
- int fd1, fd2, fd3;
- static const char pathname[] = "/tmp/kcmp.test";
-\&
- fd1 = open(pathname, O_CREAT | O_RDWR, 0600);
- if (fd1 == \-1)
- err(EXIT_FAILURE, "open");
-\&
- printf("Parent PID is %jd\en", (intmax_t) getpid());
- printf("Parent opened file on FD %d\en\en", fd1);
-\&
- switch (fork()) {
- case \-1:
- err(EXIT_FAILURE, "fork");
-\&
- case 0:
- printf("PID of child of fork() is %jd\en", (intmax_t) getpid());
-\&
- test_kcmp("Compare duplicate FDs from different processes:",
- getpid(), getppid(), fd1, fd1);
-\&
- fd2 = open(pathname, O_CREAT | O_RDWR, 0600);
- if (fd2 == \-1)
- err(EXIT_FAILURE, "open");
- printf("Child opened file on FD %d\en", fd2);
-\&
- test_kcmp("Compare FDs from distinct open()s in same process:",
- getpid(), getpid(), fd1, fd2);
-\&
- fd3 = dup(fd1);
- if (fd3 == \-1)
- err(EXIT_FAILURE, "dup");
- printf("Child duplicated FD %d to create FD %d\en", fd1, fd3);
-\&
- test_kcmp("Compare duplicated FDs in same process:",
- getpid(), getpid(), fd1, fd3);
- break;
-\&
- default:
- wait(NULL);
- }
-\&
- exit(EXIT_SUCCESS);
-}
-.EE
-.\" SRC END
-.SH SEE ALSO
-.BR clone (2),
-.BR unshare (2)
diff --git a/man2/kexec_file_load.2 b/man2/kexec_file_load.2
deleted file mode 100644
index 6c20331d3..000000000
--- a/man2/kexec_file_load.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/kexec_load.2
diff --git a/man2/kexec_load.2 b/man2/kexec_load.2
deleted file mode 100644
index 58a894f30..000000000
--- a/man2/kexec_load.2
+++ /dev/null
@@ -1,331 +0,0 @@
-.\" Copyright (C) 2010 Intel Corporation, Author: Andi Kleen
-.\" and Copyright 2014, Vivek Goyal <vgoyal@redhat.com>
-.\" and Copyright (c) 2015, Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.TH kexec_load 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-kexec_load, kexec_file_load \- load a new kernel for later execution
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#include <linux/kexec.h>" " /* Definition of " KEXEC_* " constants */"
-.BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */"
-.B #include <unistd.h>
-.P
-.BI "long syscall(SYS_kexec_load, unsigned long " entry ,
-.BI " unsigned long " nr_segments \
-", struct kexec_segment *" segments ,
-.BI " unsigned long " flags );
-.BI "long syscall(SYS_kexec_file_load, int " kernel_fd ", int " initrd_fd ,
-.BI " unsigned long " cmdline_len ", const char *" cmdline ,
-.BI " unsigned long " flags );
-.fi
-.P
-.IR Note :
-glibc provides no wrappers for these system calls,
-necessitating the use of
-.BR syscall (2).
-.SH DESCRIPTION
-The
-.BR kexec_load ()
-system call loads a new kernel that can be executed later by
-.BR reboot (2).
-.P
-The
-.I flags
-argument is a bit mask that controls the operation of the call.
-The following values can be specified in
-.IR flags :
-.TP
-.BR KEXEC_ON_CRASH " (since Linux 2.6.13)"
-Execute the new kernel automatically on a system crash.
-This "crash kernel" is loaded into an area of reserved memory that
-is determined at boot time using the
-.I crashkernel
-kernel command-line parameter.
-The location of this reserved memory is exported to user space via the
-.I /proc/iomem
-file, in an entry labeled "Crash kernel".
-A user-space application can parse this file and prepare a list of
-segments (see below) that specify this reserved memory as destination.
-If this flag is specified, the kernel checks that the
-target segments specified in
-.I segments
-fall within the reserved region.
-.TP
-.BR KEXEC_PRESERVE_CONTEXT " (since Linux 2.6.27)"
-Preserve the system hardware and
-software states before executing the new kernel.
-This could be used for system suspend.
-This flag is available only if the kernel was configured with
-.BR CONFIG_KEXEC_JUMP ,
-and is effective only if
-.I nr_segments
-is greater than 0.
-.P
-The high-order bits (corresponding to the mask 0xffff0000) of
-.I flags
-contain the architecture of the to-be-executed kernel.
-Specify (OR) the constant
-.B KEXEC_ARCH_DEFAULT
-to use the current architecture,
-or one of the following architecture constants
-.BR KEXEC_ARCH_386 ,
-.BR KEXEC_ARCH_68K ,
-.BR KEXEC_ARCH_X86_64 ,
-.BR KEXEC_ARCH_PPC ,
-.BR KEXEC_ARCH_PPC64 ,
-.BR KEXEC_ARCH_IA_64 ,
-.BR KEXEC_ARCH_ARM ,
-.BR KEXEC_ARCH_S390 ,
-.BR KEXEC_ARCH_SH ,
-.BR KEXEC_ARCH_MIPS ,
-and
-.BR KEXEC_ARCH_MIPS_LE .
-The architecture must be executable on the CPU of the system.
-.P
-The
-.I entry
-argument is the physical entry address in the kernel image.
-The
-.I nr_segments
-argument is the number of segments pointed to by the
-.I segments
-pointer;
-the kernel imposes an (arbitrary) limit of 16 on the number of segments.
-The
-.I segments
-argument is an array of
-.I kexec_segment
-structures which define the kernel layout:
-.P
-.in +4n
-.EX
-struct kexec_segment {
- void *buf; /* Buffer in user space */
- size_t bufsz; /* Buffer length in user space */
- void *mem; /* Physical address of kernel */
- size_t memsz; /* Physical address length */
-};
-.EE
-.in
-.P
-The kernel image defined by
-.I segments
-is copied from the calling process into
-the kernel either in regular
-memory or in reserved memory (if
-.B KEXEC_ON_CRASH
-is set).
-The kernel first performs various sanity checks on the
-information passed in
-.IR segments .
-If these checks pass, the kernel copies the segment data to kernel memory.
-Each segment specified in
-.I segments
-is copied as follows:
-.IP \[bu] 3
-.I buf
-and
-.I bufsz
-identify a memory region in the caller's virtual address space
-that is the source of the copy.
-The value in
-.I bufsz
-may not exceed the value in the
-.I memsz
-field.
-.IP \[bu]
-.I mem
-and
-.I memsz
-specify a physical address range that is the target of the copy.
-The values specified in both fields must be multiples of
-the system page size.
-.IP \[bu]
-.I bufsz
-bytes are copied from the source buffer to the target kernel buffer.
-If
-.I bufsz
-is less than
-.IR memsz ,
-then the excess bytes in the kernel buffer are zeroed out.
-.P
-In case of a normal kexec (i.e., the
-.B KEXEC_ON_CRASH
-flag is not set), the segment data is loaded in any available memory
-and is moved to the final destination at kexec reboot time (e.g., when the
-.BR kexec (8)
-command is executed with the
-.I \-e
-option).
-.P
-In case of kexec on panic (i.e., the
-.B KEXEC_ON_CRASH
-flag is set), the segment data is
-loaded to reserved memory at the time of the call, and, after a crash,
-the kexec mechanism simply passes control to that kernel.
-.P
-The
-.BR kexec_load ()
-system call is available only if the kernel was configured with
-.BR CONFIG_KEXEC .
-.SS kexec_file_load()
-The
-.BR kexec_file_load ()
-system call is similar to
-.BR kexec_load (),
-but it takes a different set of arguments.
-It reads the kernel to be loaded from the file referred to by
-the file descriptor
-.IR kernel_fd ,
-and the initrd (initial RAM disk)
-to be loaded from file referred to by the file descriptor
-.IR initrd_fd .
-The
-.I cmdline
-argument is a pointer to a buffer containing the command line
-for the new kernel.
-The
-.I cmdline_len
-argument specifies size of the buffer.
-The last byte in the buffer must be a null byte (\[aq]\e0\[aq]).
-.P
-The
-.I flags
-argument is a bit mask which modifies the behavior of the call.
-The following values can be specified in
-.IR flags :
-.TP
-.B KEXEC_FILE_UNLOAD
-Unload the currently loaded kernel.
-.TP
-.B KEXEC_FILE_ON_CRASH
-Load the new kernel in the memory region reserved for the crash kernel
-(as for
-.BR KEXEC_ON_CRASH ).
-This kernel is booted if the currently running kernel crashes.
-.TP
-.B KEXEC_FILE_NO_INITRAMFS
-Loading initrd/initramfs is optional.
-Specify this flag if no initramfs is being loaded.
-If this flag is set, the value passed in
-.I initrd_fd
-is ignored.
-.P
-The
-.BR kexec_file_load ()
-.\" See also http://lwn.net/Articles/603116/
-system call was added to provide support for systems
-where "kexec" loading should be restricted to
-only kernels that are signed.
-This system call is available only if the kernel was configured with
-.BR CONFIG_KEXEC_FILE .
-.SH RETURN VALUE
-On success, these system calls returns 0.
-On error, \-1 is returned and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EADDRNOTAVAIL
-.\" See kernel/kexec.::sanity_check_segment_list in the 3.19 kernel source
-The
-.B KEXEC_ON_CRASH
-flags was specified, but the region specified by the
-.I mem
-and
-.I memsz
-fields of one of the
-.I segments
-entries lies outside the range of memory reserved for the crash kernel.
-.TP
-.B EADDRNOTAVAIL
-The value in a
-.I mem
-or
-.I memsz
-field in one of the
-.I segments
-entries is not a multiple of the system page size.
-.TP
-.B EBADF
-.I kernel_fd
-or
-.I initrd_fd
-is not a valid file descriptor.
-.TP
-.B EBUSY
-Another crash kernel is already being loaded
-or a crash kernel is already in use.
-.TP
-.B EINVAL
-.I flags
-is invalid.
-.TP
-.B EINVAL
-The value of a
-.I bufsz
-field in one of the
-.I segments
-entries exceeds the value in the corresponding
-.I memsz
-field.
-.TP
-.B EINVAL
-.I nr_segments
-exceeds
-.B KEXEC_SEGMENT_MAX
-(16).
-.TP
-.B EINVAL
-Two or more of the kernel target buffers overlap.
-.TP
-.B EINVAL
-The value in
-.I cmdline[cmdline_len\-1]
-is not \[aq]\e0\[aq].
-.TP
-.B EINVAL
-The file referred to by
-.I kernel_fd
-or
-.I initrd_fd
-is empty (length zero).
-.TP
-.B ENOEXEC
-.I kernel_fd
-does not refer to an open file, or the kernel can't load this file.
-Currently, the file must be a bzImage and contain an x86 kernel that
-is loadable above 4\ GiB in memory (see the kernel source file
-.IR Documentation/x86/boot.txt ).
-.TP
-.B ENOMEM
-Could not allocate memory.
-.TP
-.B EPERM
-The caller does not have the
-.B CAP_SYS_BOOT
-capability.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-.TP
-.BR kexec_load ()
-Linux 2.6.13.
-.TP
-.BR kexec_file_load ()
-Linux 3.17.
-.SH SEE ALSO
-.BR reboot (2),
-.BR syscall (2),
-.BR kexec (8)
-.P
-The kernel source files
-.I Documentation/kdump/kdump.txt
-and
-.I Documentation/admin\-guide/kernel\-parameters.txt
diff --git a/man2/keyctl.2 b/man2/keyctl.2
deleted file mode 100644
index a2a4ab047..000000000
--- a/man2/keyctl.2
+++ /dev/null
@@ -1,2297 +0,0 @@
-.\" Copyright (C) 2016 Michael Kerrisk <mtk.manpages@gmail.com>
-.\" and Copyright (C) 2016 Eugene Syromyatnikov <evgsyr@gmail.com>
-.\" A very few fragments remain from an earlier version of this page
-.\" written by David Howells (dhowells@redhat.com)
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.TH keyctl 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-keyctl \- manipulate the kernel's key management facility
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.P
-Alternatively, Linux Key Management Utilities
-.RI ( libkeyutils ", " \-lkeyutils );
-see VERSIONS.
-.SH SYNOPSIS
-.nf
-.BR "#include <linux/keyctl.h>" " /* Definition of " KEY* " constants */"
-.BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */"
-.B #include <unistd.h>
-.P
-.BI "long syscall(SYS_keyctl, int " operation ", unsigned long " arg2 ,
-.BI " unsigned long " arg3 ", unsigned long " arg4 ,
-.BI " unsigned long " arg5 );
-.fi
-.P
-.IR Note :
-glibc provides no wrapper for
-.BR keyctl (),
-necessitating the use of
-.BR syscall (2).
-.SH DESCRIPTION
-.BR keyctl ()
-allows user-space programs to perform key manipulation.
-.P
-The operation performed by
-.BR keyctl ()
-is determined by the value of the
-.I operation
-argument.
-Each of these operations is wrapped by the
-.I libkeyutils
-library (provided by the
-.I keyutils
-package) into individual functions (noted below)
-to permit the compiler to check types.
-.P
-The permitted values for
-.I operation
-are:
-.TP
-.BR KEYCTL_GET_KEYRING_ID " (since Linux 2.6.10)"
-Map a special key ID to a real key ID for this process.
-.IP
-This operation looks up the special key whose ID is provided in
-.I arg2
-(cast to
-.IR key_serial_t ).
-If the special key is found,
-the ID of the corresponding real key is returned as the function result.
-The following values may be specified in
-.IR arg2 :
-.RS
-.TP
-.B KEY_SPEC_THREAD_KEYRING
-This specifies the calling thread's thread-specific keyring.
-See
-.BR thread\-keyring (7).
-.TP
-.B KEY_SPEC_PROCESS_KEYRING
-This specifies the caller's process-specific keyring.
-See
-.BR process\-keyring (7).
-.TP
-.B KEY_SPEC_SESSION_KEYRING
-This specifies the caller's session-specific keyring.
-See
-.BR session\-keyring (7).
-.TP
-.B KEY_SPEC_USER_KEYRING
-This specifies the caller's UID-specific keyring.
-See
-.BR user\-keyring (7).
-.TP
-.B KEY_SPEC_USER_SESSION_KEYRING
-This specifies the caller's UID-session keyring.
-See
-.BR user\-session\-keyring (7).
-.TP
-.BR KEY_SPEC_REQKEY_AUTH_KEY " (since Linux 2.6.16)"
-.\" commit b5f545c880a2a47947ba2118b2509644ab7a2969
-This specifies the authorization key created by
-.BR request_key (2)
-and passed to the process it spawns to generate a key.
-This key is available only in a
-.BR request\-key (8)-style
-program that was passed an authorization key by the kernel and
-ceases to be available once the requested key has been instantiated; see
-.BR request_key (2).
-.TP
-.BR KEY_SPEC_REQUESTOR_KEYRING " (since Linux 2.6.29)"
-.\" commit 8bbf4976b59fc9fc2861e79cab7beb3f6d647640
-This specifies the key ID for the
-.BR request_key (2)
-destination keyring.
-This keyring is available only in a
-.BR request\-key (8)-style
-program that was passed an authorization key by the kernel and
-ceases to be available once the requested key has been instantiated; see
-.BR request_key (2).
-.RE
-.IP
-The behavior if the key specified in
-.I arg2
-does not exist depends on the value of
-.I arg3
-(cast to
-.IR int ).
-If
-.I arg3
-contains a nonzero value, then\[em]if it is appropriate to do so
-(e.g., when looking up the user, user-session, or session key)\[em]a new key
-is created and its real key ID returned as the function result.
-.\" The keyctl_get_keyring_ID.3 page says that a new key
-.\" "will be created *if it is appropriate to do so**. What is the
-.\" determiner for appropriate?
-.\" David Howells: Some special keys such as KEY_SPEC_REQKEY_AUTH_KEY
-.\" wouldn't get created but user/user-session/session keyring would
-.\" be created.
-Otherwise, the operation fails with the error
-.BR ENOKEY .
-.IP
-If a valid key ID is specified in
-.IR arg2 ,
-and the key exists, then this operation simply returns the key ID.
-If the key does not exist, the call fails with error
-.BR ENOKEY .
-.IP
-The caller must have
-.I search
-permission on a keyring in order for it to be found.
-.IP
-The arguments
-.I arg4
-and
-.I arg5
-are ignored.
-.IP
-This operation is exposed by
-.I libkeyutils
-via the function
-.BR keyctl_get_keyring_ID (3).
-.TP
-.BR KEYCTL_JOIN_SESSION_KEYRING " (since Linux 2.6.10)"
-Replace the session keyring this process subscribes to with
-a new session keyring.
-.\" This may be useful in conjunction with some sort of
-.\" session management framework that is employed by the application.
-.IP
-If
-.I arg2
-is NULL,
-an anonymous keyring with the description "_ses" is created
-and the process is subscribed to that keyring as its session keyring,
-displacing the previous session keyring.
-.IP
-Otherwise,
-.I arg2
-(cast to
-.IR "char\ *" )
-is treated as the description (name) of a keyring,
-and the behavior is as follows:
-.RS
-.IP \[bu] 3
-If a keyring with a matching description exists,
-the process will attempt to subscribe to that keyring
-as its session keyring if possible;
-if that is not possible, an error is returned.
-In order to subscribe to the keyring,
-the caller must have
-.I search
-permission on the keyring.
-.IP \[bu]
-If a keyring with a matching description does not exist,
-then a new keyring with the specified description is created,
-and the process is subscribed to that keyring as its session keyring.
-.RE
-.IP
-The arguments
-.IR arg3 ,
-.IR arg4 ,
-and
-.I arg5
-are ignored.
-.IP
-This operation is exposed by
-.I libkeyutils
-via the function
-.BR keyctl_join_session_keyring (3).
-.TP
-.BR KEYCTL_UPDATE " (since Linux 2.6.10)"
-Update a key's data payload.
-.IP
-The
-.I arg2
-argument (cast to
-.IR key_serial_t )
-specifies the ID of the key to be updated.
-The
-.I arg3
-argument (cast to
-.IR "void\ *" )
-points to the new payload and
-.I arg4
-(cast to
-.IR size_t )
-contains the new payload size in bytes.
-.IP
-The caller must have
-.I write
-permission on the key specified and the key type must support updating.
-.IP
-A negatively instantiated key (see the description of
-.BR KEYCTL_REJECT )
-can be positively instantiated with this operation.
-.IP
-The
-.I arg5
-argument is ignored.
-.IP
-This operation is exposed by
-.I libkeyutils
-via the function
-.BR keyctl_update (3).
-.TP
-.BR KEYCTL_REVOKE " (since Linux 2.6.10)"
-Revoke the key with the ID provided in
-.I arg2
-(cast to
-.IR key_serial_t ).
-The key is scheduled for garbage collection;
-it will no longer be findable,
-and will be unavailable for further operations.
-Further attempts to use the key will fail with the error
-.BR EKEYREVOKED .
-.IP
-The caller must have
-.I write
-or
-.I setattr
-permission on the key.
-.\" Keys with the KEY_FLAG_KEEP bit set cause an EPERM
-.\" error for KEYCTL_REVOKE. Does this need to be documented?
-.\" David Howells: No significance for user space.
-.IP
-The arguments
-.IR arg3 ,
-.IR arg4 ,
-and
-.I arg5
-are ignored.
-.IP
-This operation is exposed by
-.I libkeyutils
-via the function
-.BR keyctl_revoke (3).
-.TP
-.BR KEYCTL_CHOWN " (since Linux 2.6.10)"
-Change the ownership (user and group ID) of a key.
-.IP
-The
-.I arg2
-argument (cast to
-.IR key_serial_t )
-contains the key ID.
-The
-.I arg3
-argument (cast to
-.IR uid_t )
-contains the new user ID (or \-1 in case the user ID shouldn't be changed).
-The
-.I arg4
-argument (cast to
-.IR gid_t )
-contains the new group ID (or \-1 in case the group ID shouldn't be changed).
-.IP
-The key must grant the caller
-.I setattr
-permission.
-.IP
-For the UID to be changed, or for the GID to be changed to a group
-the caller is not a member of, the caller must have the
-.B CAP_SYS_ADMIN
-capability (see
-.BR capabilities (7)).
-.IP
-If the UID is to be changed, the new user must have sufficient
-quota to accept the key.
-The quota deduction will be removed from the old user
-to the new user should the UID be changed.
-.IP
-The
-.I arg5
-argument is ignored.
-.IP
-This operation is exposed by
-.I libkeyutils
-via the function
-.BR keyctl_chown (3).
-.TP
-.BR KEYCTL_SETPERM " (since Linux 2.6.10)"
-Change the permissions of the key with the ID provided in the
-.I arg2
-argument (cast to
-.IR key_serial_t )
-to the permissions provided in the
-.I arg3
-argument (cast to
-.IR key_perm_t ).
-.IP
-If the caller doesn't have the
-.B CAP_SYS_ADMIN
-capability, it can change permissions only for the keys it owns.
-(More precisely: the caller's filesystem UID must match the UID of the key.)
-.IP
-The key must grant
-.I setattr
-permission to the caller
-.I regardless
-of the caller's capabilities.
-.\" FIXME Above, is it really intended that a privileged process can't
-.\" override the lack of the 'setattr' permission?
-.IP
-The permissions in
-.I arg3
-specify masks of available operations
-for each of the following user categories:
-.RS
-.TP
-.IR possessor " (since Linux 2.6.14)"
-.\" commit 664cceb0093b755739e56572b836a99104ee8a75
-This is the permission granted to a process that possesses the key
-(has it attached searchably to one of the process's keyrings);
-see
-.BR keyrings (7).
-.TP
-.I user
-This is the permission granted to a process
-whose filesystem UID matches the UID of the key.
-.TP
-.I group
-This is the permission granted to a process
-whose filesystem GID or any of its supplementary GIDs
-matches the GID of the key.
-.TP
-.I other
-This is the permission granted to other processes
-that do not match the
-.I user
-and
-.I group
-categories.
-.RE
-.IP
-The
-.IR user ,
-.IR group ,
-and
-.I other
-categories are exclusive: if a process matches the
-.I user
-category, it will not receive permissions granted in the
-.I group
-category; if a process matches the
-.I user
-or
-.I group
-category, then it will not receive permissions granted in the
-.I other
-category.
-.IP
-The
-.I possessor
-category grants permissions that are cumulative with the grants from the
-.IR user ,
-.IR group ,
-or
-.I other
-category.
-.IP
-Each permission mask is eight bits in size,
-with only six bits currently used.
-The available permissions are:
-.RS
-.TP
-.I view
-This permission allows reading attributes of a key.
-.IP
-This permission is required for the
-.B KEYCTL_DESCRIBE
-operation.
-.IP
-The permission bits for each category are
-.BR KEY_POS_VIEW ,
-.BR KEY_USR_VIEW ,
-.BR KEY_GRP_VIEW ,
-and
-.BR KEY_OTH_VIEW .
-.TP
-.I read
-This permission allows reading a key's payload.
-.IP
-This permission is required for the
-.B KEYCTL_READ
-operation.
-.IP
-The permission bits for each category are
-.BR KEY_POS_READ ,
-.BR KEY_USR_READ ,
-.BR KEY_GRP_READ ,
-and
-.BR KEY_OTH_READ .
-.TP
-.I write
-This permission allows update or instantiation of a key's payload.
-For a keyring, it allows keys to be linked and unlinked from the keyring,
-.IP
-This permission is required for the
-.BR KEYCTL_UPDATE ,
-.BR KEYCTL_REVOKE ,
-.BR KEYCTL_CLEAR ,
-.BR KEYCTL_LINK ,
-and
-.B KEYCTL_UNLINK
-operations.
-.IP
-The permission bits for each category are
-.BR KEY_POS_WRITE ,
-.BR KEY_USR_WRITE ,
-.BR KEY_GRP_WRITE ,
-and
-.BR KEY_OTH_WRITE .
-.TP
-.I search
-This permission allows keyrings to be searched and keys to be found.
-Searches can recurse only into nested keyrings that have
-.I search
-permission set.
-.IP
-This permission is required for the
-.BR KEYCTL_GET_KEYRING_ID ,
-.BR KEYCTL_JOIN_SESSION_KEYRING ,
-.BR KEYCTL_SEARCH ,
-and
-.B KEYCTL_INVALIDATE
-operations.
-.IP
-The permission bits for each category are
-.BR KEY_POS_SEARCH ,
-.BR KEY_USR_SEARCH ,
-.BR KEY_GRP_SEARCH ,
-and
-.BR KEY_OTH_SEARCH .
-.TP
-.I link
-This permission allows a key or keyring to be linked to.
-.IP
-This permission is required for the
-.B KEYCTL_LINK
-and
-.B KEYCTL_SESSION_TO_PARENT
-operations.
-.IP
-The permission bits for each category are
-.BR KEY_POS_LINK ,
-.BR KEY_USR_LINK ,
-.BR KEY_GRP_LINK ,
-and
-.BR KEY_OTH_LINK .
-.TP
-.IR setattr " (since Linux 2.6.15)."
-This permission allows a key's UID, GID, and permissions mask to be changed.
-.IP
-This permission is required for the
-.BR KEYCTL_REVOKE ,
-.BR KEYCTL_CHOWN ,
-and
-.B KEYCTL_SETPERM
-operations.
-.IP
-The permission bits for each category are
-.BR KEY_POS_SETATTR ,
-.BR KEY_USR_SETATTR ,
-.BR KEY_GRP_SETATTR ,
-and
-.BR KEY_OTH_SETATTR .
-.RE
-.IP
-As a convenience, the following macros are defined as masks for
-all of the permission bits in each of the user categories:
-.BR KEY_POS_ALL ,
-.BR KEY_USR_ALL ,
-.BR KEY_GRP_ALL ,
-and
-.BR KEY_OTH_ALL .
-.IP
-The
-.I arg4
-and
-.I arg5
-arguments are ignored.
-.IP
-This operation is exposed by
-.I libkeyutils
-via the function
-.BR keyctl_setperm (3).
-.TP
-.BR KEYCTL_DESCRIBE " (since Linux 2.6.10)"
-Obtain a string describing the attributes of a specified key.
-.IP
-The ID of the key to be described is specified in
-.I arg2
-(cast to
-.IR key_serial_t ).
-The descriptive string is returned in the buffer pointed to by
-.I arg3
-(cast to
-.IR char\~* );
-.I arg4
-(cast to
-.IR size_t )
-specifies the size of that buffer in bytes.
-.IP
-The key must grant the caller
-.I view
-permission.
-.IP
-The returned string is null-terminated and
-contains the following information about the key:
-.IP
-.in +4n
-.IR type ; uid ; gid ; perm ; description
-.in
-.IP
-In the above,
-.I type
-and
-.I description
-are strings,
-.I uid
-and
-.I gid
-are decimal strings, and
-.I perm
-is a hexadecimal permissions mask.
-The descriptive string is written with the following format:
-.IP
-.in +4n
-.EX
-%s;%d;%d;%08x;%s
-.EE
-.in
-.IP
-.B Note: the intention is that the descriptive string should
-.B be extensible in future kernel versions.
-In particular, the
-.I description
-field will not contain semicolons;
-.\" FIXME But, the kernel does not enforce the requirement
-.\" that the key description contains no semicolons!
-.\" So, user space has no guarantee here??
-.\" Either something more needs to be said here,
-.\" or a kernel fix is required.
-it should be parsed by working backwards from the end of the string
-to find the last semicolon.
-This allows future semicolon-delimited fields to be inserted
-in the descriptive string in the future.
-.IP
-Writing to the buffer is attempted only when
-.I arg3
-is non-NULL and the specified buffer size
-is large enough to accept the descriptive string
-(including the terminating null byte).
-.\" Function commentary says it copies up to buflen bytes, but see the
-.\" (buffer && buflen >= ret) condition in keyctl_describe_key() in
-.\" security/keyctl.c
-In order to determine whether the buffer size was too small,
-check to see if the return value of the operation is greater than
-.IR arg4 .
-.IP
-The
-.I arg5
-argument is ignored.
-.IP
-This operation is exposed by
-.I libkeyutils
-via the function
-.BR keyctl_describe (3).
-.TP
-.B KEYCTL_CLEAR
-Clear the contents of (i.e., unlink all keys from) a keyring.
-.IP
-The ID of the key
-(which must be of keyring type)
-.\" or the error ENOTDIR results
-is provided in
-.I arg2
-(cast to
-.IR key_serial_t ).
-.\" According to Documentation/security/keys.txt:
-.\" This function can also be used to clear special kernel keyrings if they
-.\" are appropriately marked if the user has CAP_SYS_ADMIN capability. The
-.\" DNS resolver cache keyring is an example of this.
-.IP
-The caller must have
-.I write
-permission on the keyring.
-.IP
-The arguments
-.IR arg3 ,
-.IR arg4 ,
-and
-.I arg5
-are ignored.
-.IP
-This operation is exposed by
-.I libkeyutils
-via the function
-.BR keyctl_clear (3).
-.TP
-.BR KEYCTL_LINK " (since Linux 2.6.10)"
-Create a link from a keyring to a key.
-.IP
-The key to be linked is specified in
-.I arg2
-(cast to
-.IR key_serial_t );
-the keyring is specified in
-.I arg3
-(cast to
-.IR key_serial_t ).
-.IP
-If a key with the same type and description is already linked in the keyring,
-then that key is displaced from the keyring.
-.IP
-Before creating the link,
-the kernel checks the nesting of the keyrings and returns appropriate errors
-if the link would produce a cycle
-or if the nesting of keyrings would be too deep
-(The limit on the nesting of keyrings is determined by the kernel constant
-.BR KEYRING_SEARCH_MAX_DEPTH ,
-defined with the value 6, and is necessary to prevent overflows
-on the kernel stack when recursively searching keyrings).
-.IP
-The caller must have
-.I link
-permission on the key being added and
-.I write
-permission on the keyring.
-.IP
-The arguments
-.I arg4
-and
-.I arg5
-are ignored.
-.IP
-This operation is exposed by
-.I libkeyutils
-via the function
-.BR keyctl_link (3).
-.TP
-.BR KEYCTL_UNLINK " (since Linux 2.6.10)"
-Unlink a key from a keyring.
-.IP
-The ID of the key to be unlinked is specified in
-.I arg2
-(cast to
-.IR key_serial_t );
-the ID of the keyring from which it is to be unlinked is specified in
-.I arg3
-(cast to
-.IR key_serial_t ).
-.IP
-If the key is not currently linked into the keyring, an error results.
-.IP
-The caller must have
-.I write
-permission on the keyring from which the key is being removed.
-.IP
-If the last link to a key is removed,
-then that key will be scheduled for destruction.
-.IP
-The arguments
-.I arg4
-and
-.I arg5
-are ignored.
-.IP
-This operation is exposed by
-.I libkeyutils
-via the function
-.BR keyctl_unlink (3).
-.TP
-.BR KEYCTL_SEARCH " (since Linux 2.6.10)"
-Search for a key in a keyring tree,
-returning its ID and optionally linking it to a specified keyring.
-.IP
-The tree to be searched is specified by passing
-the ID of the head keyring in
-.I arg2
-(cast to
-.IR key_serial_t ).
-The search is performed breadth-first and recursively.
-.IP
-The
-.I arg3
-and
-.I arg4
-arguments specify the key to be searched for:
-.I arg3
-(cast as
-.IR char\~* )
-contains the key type
-(a null-terminated character string up to 32 bytes in size,
-including the terminating null byte), and
-.I arg4
-(cast as
-.IR char\~* )
-contains the description of the key
-(a null-terminated character string up to 4096 bytes in size,
-including the terminating null byte).
-.IP
-The source keyring must grant
-.I search
-permission to the caller.
-When performing the recursive search, only keyrings that grant the caller
-.I search
-permission will be searched.
-Only keys with for which the caller has
-.I search
-permission can be found.
-.IP
-If the key is found, its ID is returned as the function result.
-.IP
-If the key is found and
-.I arg5
-(cast to
-.IR key_serial_t )
-is nonzero, then, subject to the same constraints and rules as
-.BR KEYCTL_LINK ,
-the key is linked into the keyring whose ID is specified in
-.IR arg5 .
-If the destination keyring specified in
-.I arg5
-already contains a link to a key that has the same type and description,
-then that link will be displaced by a link to
-the key found by this operation.
-.IP
-Instead of valid existing keyring IDs, the source
-.RI ( arg2 )
-and destination
-.RI ( arg5 )
-keyrings can be one of the special keyring IDs listed under
-.BR KEYCTL_GET_KEYRING_ID .
-.IP
-This operation is exposed by
-.I libkeyutils
-via the function
-.BR keyctl_search (3).
-.TP
-.BR KEYCTL_READ " (since Linux 2.6.10)"
-Read the payload data of a key.
-.IP
-The ID of the key whose payload is to be read is specified in
-.I arg2
-(cast to
-.IR key_serial_t ).
-This can be the ID of an existing key,
-or any of the special key IDs listed for
-.BR KEYCTL_GET_KEYRING_ID .
-.\" including KEY_SPEC_REQKEY_AUTH_KEY
-.IP
-The payload is placed in the buffer pointed by
-.I arg3
-(cast to
-.IR "char\ *" );
-the size of that buffer must be specified in
-.I arg4
-(cast to
-.IR size_t ).
-.IP
-The returned data will be processed for presentation
-according to the key type.
-For example, a keyring will return an array of
-.I key_serial_t
-entries representing the IDs of all the keys that are linked to it.
-The
-.I user
-key type will return its data as is.
-If a key type does not implement this function,
-the operation fails with the error
-.BR EOPNOTSUPP .
-.IP
-If
-.I arg3
-is not NULL,
-as much of the payload data as will fit is copied into the buffer.
-On a successful return,
-the return value is always the total size of the payload data.
-To determine whether the buffer was of sufficient size,
-check to see that the return value is less than or equal to
-the value supplied in
-.IR arg4 .
-.IP
-The key must either grant the caller
-.I read
-permission, or grant the caller
-.I search
-permission when searched for from the process keyrings
-(i.e., the key is possessed).
-.IP
-The
-.I arg5
-argument is ignored.
-.IP
-This operation is exposed by
-.I libkeyutils
-via the function
-.BR keyctl_read (3).
-.TP
-.BR KEYCTL_INSTANTIATE " (since Linux 2.6.10)"
-(Positively) instantiate an uninstantiated key with a specified payload.
-.IP
-The ID of the key to be instantiated is provided in
-.I arg2
-(cast to
-.IR key_serial_t ).
-.IP
-The key payload is specified in the buffer pointed to by
-.I arg3
-(cast to
-.IR "void\ *");
-the size of that buffer is specified in
-.I arg4
-(cast to
-.IR size_t ).
-.IP
-The payload may be a null pointer and the buffer size may be 0
-if this is supported by the key type (e.g., it is a keyring).
-.IP
-The operation may be fail if the payload data is in the wrong format
-or is otherwise invalid.
-.IP
-If
-.I arg5
-(cast to
-.IR key_serial_t )
-is nonzero, then, subject to the same constraints and rules as
-.BR KEYCTL_LINK ,
-the instantiated key is linked into the keyring whose ID specified in
-.IR arg5 .
-.IP
-The caller must have the appropriate authorization key,
-and once the uninstantiated key has been instantiated,
-the authorization key is revoked.
-In other words, this operation is available only from a
-.BR request\-key (8)-style
-program.
-See
-.BR request_key (2)
-for an explanation of uninstantiated keys and key instantiation.
-.IP
-This operation is exposed by
-.I libkeyutils
-via the function
-.BR keyctl_instantiate (3).
-.TP
-.BR KEYCTL_NEGATE " (since Linux 2.6.10)"
-Negatively instantiate an uninstantiated key.
-.IP
-This operation is equivalent to the call:
-.IP
-.in +4n
-.EX
-keyctl(KEYCTL_REJECT, arg2, arg3, ENOKEY, arg4);
-.EE
-.in
-.IP
-The
-.I arg5
-argument is ignored.
-.IP
-This operation is exposed by
-.I libkeyutils
-via the function
-.BR keyctl_negate (3).
-.TP
-.BR KEYCTL_SET_REQKEY_KEYRING " (since Linux 2.6.13)"
-Set the default keyring to which implicitly requested keys
-will be linked for this thread, and return the previous setting.
-Implicit key requests are those made by internal kernel components,
-.\" I.e., calls to the kernel's internal request_key() interface,
-.\" which is distinct from the request_key(2) system call (which
-.\" ultimately employs the kernel-internal interface).
-such as can occur when, for example, opening files
-on an AFS or NFS filesystem.
-Setting the default keyring also has an effect when requesting
-a key from user space; see
-.BR request_key (2)
-for details.
-.IP
-The
-.I arg2
-argument (cast to
-.IR int )
-should contain one of the following values,
-to specify the new default keyring:
-.RS
-.TP
-.B KEY_REQKEY_DEFL_NO_CHANGE
-Don't change the default keyring.
-This can be used to discover the current default keyring
-(without changing it).
-.TP
-.B KEY_REQKEY_DEFL_DEFAULT
-This selects the default behaviour,
-which is to use the thread-specific keyring if there is one,
-otherwise the process-specific keyring if there is one,
-otherwise the session keyring if there is one,
-otherwise the UID-specific session keyring,
-otherwise the user-specific keyring.
-.TP
-.B KEY_REQKEY_DEFL_THREAD_KEYRING
-Use the thread-specific keyring
-.RB ( thread\-keyring (7))
-as the new default keyring.
-.TP
-.B KEY_REQKEY_DEFL_PROCESS_KEYRING
-Use the process-specific keyring
-.RB ( process\-keyring (7))
-as the new default keyring.
-.TP
-.B KEY_REQKEY_DEFL_SESSION_KEYRING
-Use the session-specific keyring
-.RB ( session\-keyring (7))
-as the new default keyring.
-.TP
-.B KEY_REQKEY_DEFL_USER_KEYRING
-Use the UID-specific keyring
-.RB ( user\-keyring (7))
-as the new default keyring.
-.TP
-.B KEY_REQKEY_DEFL_USER_SESSION_KEYRING
-Use the UID-specific session keyring
-.RB ( user\-session\-keyring (7))
-as the new default keyring.
-.TP
-.BR KEY_REQKEY_DEFL_REQUESTOR_KEYRING " (since Linux 2.6.29)"
-.\" 8bbf4976b59fc9fc2861e79cab7beb3f6d647640
-Use the requestor keyring.
-.\" FIXME The preceding explanation needs to be expanded.
-.\" Is the following correct:
-.\"
-.\" The requestor keyring is the dest_keyring that
-.\" was supplied to a call to request_key(2)?
-.\"
-.\" David Howells said: to be checked
-.RE
-.IP
-All other values are invalid.
-.\" (including the still-unsupported KEY_REQKEY_DEFL_GROUP_KEYRING)
-.IP
-The arguments
-.IR arg3 ,
-.IR arg4 ,
-and
-.I arg5
-are ignored.
-.IP
-The setting controlled by this operation is inherited by the child of
-.BR fork (2)
-and preserved across
-.BR execve (2).
-.IP
-This operation is exposed by
-.I libkeyutils
-via the function
-.BR keyctl_set_reqkey_keyring (3).
-.TP
-.BR KEYCTL_SET_TIMEOUT " (since Linux 2.6.16)"
-Set a timeout on a key.
-.IP
-The ID of the key is specified in
-.I arg2
-(cast to
-.IR key_serial_t ).
-The timeout value, in seconds from the current time,
-is specified in
-.I arg3
-(cast to
-.IR "unsigned int" ).
-The timeout is measured against the realtime clock.
-.IP
-Specifying the timeout value as 0 clears any existing timeout on the key.
-.IP
-The
-.I /proc/keys
-file displays the remaining time until each key will expire.
-(This is the only method of discovering the timeout on a key.)
-.IP
-The caller must either have the
-.I setattr
-permission on the key
-or hold an instantiation authorization token for the key (see
-.BR request_key (2)).
-.IP
-The key and any links to the key will be
-automatically garbage collected after the timeout expires.
-Subsequent attempts to access the key will then fail with the error
-.BR EKEYEXPIRED .
-.IP
-This operation cannot be used to set timeouts on revoked, expired,
-or negatively instantiated keys.
-.IP
-The arguments
-.I arg4
-and
-.I arg5
-are ignored.
-.IP
-This operation is exposed by
-.I libkeyutils
-via the function
-.BR keyctl_set_timeout (3).
-.TP
-.BR KEYCTL_ASSUME_AUTHORITY " (since Linux 2.6.16)"
-Assume (or divest) the authority for the calling thread
-to instantiate a key.
-.IP
-The
-.I arg2
-argument (cast to
-.IR key_serial_t )
-specifies either a nonzero key ID to assume authority,
-or the value 0 to divest authority.
-.IP
-If
-.I arg2
-is nonzero, then it specifies the ID of an uninstantiated key for which
-authority is to be assumed.
-That key can then be instantiated using one of
-.BR KEYCTL_INSTANTIATE ,
-.BR KEYCTL_INSTANTIATE_IOV ,
-.BR KEYCTL_REJECT ,
-or
-.BR KEYCTL_NEGATE .
-Once the key has been instantiated,
-the thread is automatically divested of authority to instantiate the key.
-.IP
-Authority over a key can be assumed only if the calling thread has present
-in its keyrings the authorization key that is
-associated with the specified key.
-(In other words, the
-.B KEYCTL_ASSUME_AUTHORITY
-operation is available only from a
-.BR request\-key (8)-style
-program; see
-.BR request_key (2)
-for an explanation of how this operation is used.)
-The caller must have
-.I search
-permission on the authorization key.
-.IP
-If the specified key has a matching authorization key,
-then the ID of that key is returned.
-The authorization key can be read
-.RB ( KEYCTL_READ )
-to obtain the callout information passed to
-.BR request_key (2).
-.IP
-If the ID given in
-.I arg2
-is 0, then the currently assumed authority is cleared (divested),
-and the value 0 is returned.
-.IP
-The
-.B KEYCTL_ASSUME_AUTHORITY
-mechanism allows a program such as
-.BR request\-key (8)
-to assume the necessary authority to instantiate a new uninstantiated key
-that was created as a consequence of a call to
-.BR request_key (2).
-For further information, see
-.BR request_key (2)
-and the kernel source file
-.IR Documentation/security/keys\-request\-key.txt .
-.IP
-The arguments
-.IR arg3 ,
-.IR arg4 ,
-and
-.I arg5
-are ignored.
-.IP
-This operation is exposed by
-.I libkeyutils
-via the function
-.BR keyctl_assume_authority (3).
-.TP
-.BR KEYCTL_GET_SECURITY " (since Linux 2.6.26)"
-.\" commit 70a5bb72b55e82fbfbf1e22cae6975fac58a1e2d
-Get the LSM (Linux Security Module) security label of the specified key.
-.IP
-The ID of the key whose security label is to be fetched is specified in
-.I arg2
-(cast to
-.IR key_serial_t ).
-The security label (terminated by a null byte)
-will be placed in the buffer pointed to by
-.I arg3
-argument (cast to
-.IR "char\ *" );
-the size of the buffer must be provided in
-.I arg4
-(cast to
-.IR size_t ).
-.IP
-If
-.I arg3
-is specified as NULL or the buffer size specified in
-.I arg4
-is too small, the full size of the security label string
-(including the terminating null byte)
-is returned as the function result,
-and nothing is copied to the buffer.
-.IP
-The caller must have
-.I view
-permission on the specified key.
-.IP
-The returned security label string will be rendered in a form appropriate
-to the LSM in force.
-For example, with SELinux, it may look like:
-.IP
-.in +4n
-.EX
-unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023
-.EE
-.in
-.IP
-If no LSM is currently in force,
-then an empty string is placed in the buffer.
-.IP
-The
-.I arg5
-argument is ignored.
-.IP
-This operation is exposed by
-.I libkeyutils
-via the functions
-.BR keyctl_get_security (3)
-and
-.BR keyctl_get_security_alloc (3).
-.TP
-.BR KEYCTL_SESSION_TO_PARENT " (since Linux 2.6.32)"
-.\" commit ee18d64c1f632043a02e6f5ba5e045bb26a5465f
-Replace the session keyring to which the
-.I parent
-of the calling process
-subscribes with the session keyring of the calling process.
-.\" What is the use case for KEYCTL_SESSION_TO_PARENT?
-.\" David Howells: the Process Authentication Groups people requested this,
-.\" but then didn't use it; maybe there are no users.
-.IP
-The keyring will be replaced in the parent process at the point
-where the parent next transitions from kernel space to user space.
-.IP
-The keyring must exist and must grant the caller
-.I link
-permission.
-The parent process must be single-threaded and have
-the same effective ownership as this process
-and must not be set-user-ID or set-group-ID.
-The UID of the parent process's existing session keyring (f it has one),
-as well as the UID of the caller's session keyring
-much match the caller's effective UID.
-.IP
-The fact that it is the parent process that is affected by this operation
-allows a program such as the shell to start a child process that
-uses this operation to change the shell's session keyring.
-(This is what the
-.BR keyctl (1)
-.B new_session
-command does.)
-.IP
-The arguments
-.IR arg2 ,
-.IR arg3 ,
-.IR arg4 ,
-and
-.I arg5
-are ignored.
-.IP
-This operation is exposed by
-.I libkeyutils
-via the function
-.BR keyctl_session_to_parent (3).
-.TP
-.BR KEYCTL_REJECT " (since Linux 2.6.39)"
-.\" commit fdd1b94581782a2ddf9124414e5b7a5f48ce2f9c
-Mark a key as negatively instantiated and set an expiration timer
-on the key.
-This operation provides a superset of the functionality of the earlier
-.B KEYCTL_NEGATE
-operation.
-.IP
-The ID of the key that is to be negatively instantiated is specified in
-.I arg2
-(cast to
-.IR key_serial_t ).
-The
-.I arg3
-(cast to
-.IR "unsigned int" )
-argument specifies the lifetime of the key, in seconds.
-The
-.I arg4
-argument (cast to
-.IR "unsigned int" )
-specifies the error to be returned when a search hits this key;
-typically, this is one of
-.BR EKEYREJECTED ,
-.BR EKEYREVOKED ,
-or
-.BR EKEYEXPIRED .
-.IP
-If
-.I arg5
-(cast to
-.IR key_serial_t )
-is nonzero, then, subject to the same constraints and rules as
-.BR KEYCTL_LINK ,
-the negatively instantiated key is linked into the keyring
-whose ID is specified in
-.IR arg5 .
-.IP
-The caller must have the appropriate authorization key.
-In other words, this operation is available only from a
-.BR request\-key (8)-style
-program.
-See
-.BR request_key (2).
-.IP
-The caller must have the appropriate authorization key,
-and once the uninstantiated key has been instantiated,
-the authorization key is revoked.
-In other words, this operation is available only from a
-.BR request\-key (8)-style
-program.
-See
-.BR request_key (2)
-for an explanation of uninstantiated keys and key instantiation.
-.IP
-This operation is exposed by
-.I libkeyutils
-via the function
-.BR keyctl_reject (3).
-.TP
-.BR KEYCTL_INSTANTIATE_IOV " (since Linux 2.6.39)"
-.\" commit ee009e4a0d4555ed522a631bae9896399674f063
-Instantiate an uninstantiated key with a payload specified
-via a vector of buffers.
-.IP
-This operation is the same as
-.BR KEYCTL_INSTANTIATE ,
-but the payload data is specified as an array of
-.I iovec
-structures (see
-.BR iovec (3type)).
-.IP
-The pointer to the payload vector is specified in
-.I arg3
-(cast as
-.IR "const struct iovec\~*" ).
-The number of items in the vector is specified in
-.I arg4
-(cast as
-.IR "unsigned int" ).
-.IP
-The
-.I arg2
-(key ID)
-and
-.I arg5
-(keyring ID)
-are interpreted as for
-.BR KEYCTL_INSTANTIATE .
-.IP
-This operation is exposed by
-.I libkeyutils
-via the function
-.BR keyctl_instantiate_iov (3).
-.TP
-.BR KEYCTL_INVALIDATE " (since Linux 3.5)"
-.\" commit fd75815f727f157a05f4c96b5294a4617c0557da
-Mark a key as invalid.
-.IP
-The ID of the key to be invalidated is specified in
-.I arg2
-(cast to
-.IR key_serial_t ).
-.IP
-To invalidate a key,
-the caller must have
-.I search
-permission on the key.
-.\" CAP_SYS_ADMIN is permitted to invalidate certain special keys
-.IP
-This operation marks the key as invalid
-and schedules immediate garbage collection.
-The garbage collector removes the invalidated key from all keyrings and
-deletes the key when its reference count reaches zero.
-After this operation,
-the key will be ignored by all searches,
-even if it is not yet deleted.
-.IP
-Keys that are marked invalid become invisible to normal key operations
-immediately, though they are still visible in
-.I /proc/keys
-(marked with an 'i' flag)
-until they are actually removed.
-.IP
-The arguments
-.IR arg3 ,
-.IR arg4 ,
-and
-.I arg5
-are ignored.
-.IP
-This operation is exposed by
-.I libkeyutils
-via the function
-.BR keyctl_invalidate (3).
-.TP
-.BR KEYCTL_GET_PERSISTENT " (since Linux 3.13)"
-.\" commit f36f8c75ae2e7d4da34f4c908cebdb4aa42c977e
-Get the persistent keyring
-.RB ( persistent\-keyring (7))
-for a specified user and link it to a specified keyring.
-.IP
-The user ID is specified in
-.I arg2
-(cast to
-.IR uid_t ).
-If the value \-1 is specified, the caller's real user ID is used.
-The ID of the destination keyring is specified in
-.I arg3
-(cast to
-.IR key_serial_t ).
-.IP
-The caller must have the
-.B CAP_SETUID
-capability in its user namespace in order to fetch the persistent keyring
-for a user ID that does not match either the real or effective user ID
-of the caller.
-.IP
-If the call is successful,
-a link to the persistent keyring is added to the keyring
-whose ID was specified in
-.IR arg3 .
-.IP
-The caller must have
-.I write
-permission on the keyring.
-.IP
-The persistent keyring will be created by the kernel
-if it does not yet exist.
-.IP
-Each time the
-.B KEYCTL_GET_PERSISTENT
-operation is performed, the persistent keyring will
-have its expiration timeout reset to the value in:
-.IP
-.in +4n
-.EX
-/proc/sys/kernel/keys/persistent_keyring_expiry
-.EE
-.in
-.IP
-Should the timeout be reached,
-the persistent keyring will be removed and
-everything it pins can then be garbage collected.
-.IP
-Persistent keyrings were added in Linux 3.13.
-.IP
-The arguments
-.I arg4
-and
-.I arg5
-are ignored.
-.IP
-This operation is exposed by
-.I libkeyutils
-via the function
-.BR keyctl_get_persistent (3).
-.TP
-.BR KEYCTL_DH_COMPUTE " (since Linux 4.7)"
-.\" commit ddbb41148724367394d0880c516bfaeed127b52e
-Compute a Diffie-Hellman shared secret or public key,
-optionally applying key derivation function (KDF) to the result.
-.IP
-The
-.I arg2
-argument is a pointer to a set of parameters containing
-serial numbers for three
-.I \[dq]user\[dq]
-keys used in the Diffie-Hellman calculation,
-packaged in a structure of the following form:
-.IP
-.in +4n
-.EX
-struct keyctl_dh_params {
- int32_t private; /* The local private key */
- int32_t prime; /* The prime, known to both parties */
- int32_t base; /* The base integer: either a shared
- generator or the remote public key */
-};
-.EE
-.in
-.IP
-Each of the three keys specified in this structure must grant the caller
-.I read
-permission.
-The payloads of these keys are used to calculate the Diffie-Hellman
-result as:
-.IP
-.in +4n
-.EX
-base \[ha] private mod prime
-.EE
-.in
-.IP
-If the base is the shared generator, the result is the local public key.
-If the base is the remote public key, the result is the shared secret.
-.IP
-The
-.I arg3
-argument (cast to
-.IR char\~* )
-points to a buffer where the result of the calculation is placed.
-The size of that buffer is specified in
-.I arg4
-(cast to
-.IR size_t ).
-.IP
-The buffer must be large enough to accommodate the output data,
-otherwise an error is returned.
-If
-.I arg4
-is specified zero,
-in which case the buffer is not used and
-the operation returns the minimum required buffer size
-(i.e., the length of the prime).
-.IP
-Diffie-Hellman computations can be performed in user space,
-but require a multiple-precision integer (MPI) library.
-Moving the implementation into the kernel gives access to
-the kernel MPI implementation,
-and allows access to secure or acceleration hardware.
-.IP
-Adding support for DH computation to the
-.BR keyctl ()
-system call was considered a good fit due to the DH algorithm's use
-for deriving shared keys;
-it also allows the type of the key to determine
-which DH implementation (software or hardware) is appropriate.
-.\" commit f1c316a3ab9d24df6022682422fe897492f2c0c8
-.IP
-If the
-.I arg5
-argument is
-.BR NULL ,
-then the DH result itself is returned.
-Otherwise (since Linux 4.12), it is a pointer to a structure which specifies
-parameters of the KDF operation to be applied:
-.IP
-.in +4n
-.EX
-struct keyctl_kdf_params {
- char *hashname; /* Hash algorithm name */
- char *otherinfo; /* SP800\-56A OtherInfo */
- __u32 otherinfolen; /* Length of otherinfo data */
- __u32 __spare[8]; /* Reserved */
-};
-.EE
-.in
-.IP
-The
-.I hashname
-field is a null-terminated string which specifies a hash name
-(available in the kernel's crypto API; the list of the hashes available
-is rather tricky to observe; please refer to the
-.UR https://www.kernel.org\:/doc\:/html\:/latest\:/crypto\:/architecture.html
-"Kernel Crypto API Architecture"
-.UE
-documentation for the information regarding how hash names are constructed and
-your kernel's source and configuration regarding what ciphers
-and templates with type
-.B CRYPTO_ALG_TYPE_SHASH
-are available)
-to be applied to DH result in KDF operation.
-.IP
-The
-.I otherinfo
-field is an
-.I OtherInfo
-data as described in SP800-56A section 5.8.1.2 and is algorithm-specific.
-This data is concatenated with the result of DH operation and is provided as
-an input to the KDF operation.
-Its size is provided in the
-.I otherinfolen
-field and is limited by
-.B KEYCTL_KDF_MAX_OI_LEN
-constant that defined in
-.I security/keys/internal.h
-to a value of 64.
-.IP
-The
-.B __spare
-field is currently unused.
-.\" commit 4f9dabfaf8df971f8a3b6aa324f8f817be38d538
-It was ignored until Linux 4.13 (but still should be
-user-addressable since it is copied to the kernel),
-and should contain zeros since Linux 4.13.
-.IP
-The KDF implementation complies with SP800-56A as well
-as with SP800-108 (the counter KDF).
-.IP
-.\" keyutils commit 742c9d7b94051d3b21f9f61a73ed6b5f3544cb82
-.\" keyutils commit d68a981e5db41d059ac782071c35d1e8f3aaf61c
-This operation is exposed by
-.I libkeyutils
-(from
-.I libkeyutils
-1.5.10 onwards) via the functions
-.BR keyctl_dh_compute (3)
-and
-.BR keyctl_dh_compute_alloc (3).
-.TP
-.BR KEYCTL_RESTRICT_KEYRING " (since Linux 4.12)"
-.\" commit 6563c91fd645556c7801748f15bc727c77fcd311
-.\" commit 7228b66aaf723a623e578aa4db7d083bb39546c9
-Apply a key-linking restriction to the keyring with the ID provided in
-.I arg2
-(cast to
-.IR key_serial_t ).
-The caller must have
-.I setattr
-permission on the key.
-If
-.I arg3
-is NULL, any attempt to add a key to the keyring is blocked;
-otherwise it contains a pointer to a string with a key type name and
-.I arg4
-contains a pointer to string that describes the type-specific restriction.
-As of Linux 4.12, only the type "asymmetric" has restrictions defined:
-.RS
-.TP
-.B builtin_trusted
-Allows only keys that are signed by a key linked to the built-in keyring
-(".builtin_trusted_keys").
-.TP
-.B builtin_and_secondary_trusted
-Allows only keys that are signed by a key linked to the secondary keyring
-(".secondary_trusted_keys") or, by extension, a key in a built-in keyring,
-as the latter is linked to the former.
-.TP
-.BI key_or_keyring: key
-.TQ
-.BI key_or_keyring: key :chain
-If
-.I key
-specifies the ID of a key of type "asymmetric",
-then only keys that are signed by this key are allowed.
-.IP
-If
-.I key
-specifies the ID of a keyring,
-then only keys that are signed by a key linked
-to this keyring are allowed.
-.IP
-If ":chain" is specified, keys that are signed by a keys linked to the
-destination keyring (that is, the keyring with the ID specified in the
-.I arg2
-argument) are also allowed.
-.RE
-.IP
-Note that a restriction can be configured only once for the specified keyring;
-once a restriction is set, it can't be overridden.
-.IP
-The argument
-.I arg5
-is ignored.
-.\" FIXME Document KEYCTL_RESTRICT_KEYRING, added in Linux 4.12
-.\" commit 6563c91fd645556c7801748f15bc727c77fcd311
-.\" Author: Mat Martineau <mathew.j.martineau@linux.intel.com>
-.\" See Documentation/security/keys.txt
-.SH RETURN VALUE
-For a successful call, the return value depends on the operation:
-.TP
-.B KEYCTL_GET_KEYRING_ID
-The ID of the requested keyring.
-.TP
-.B KEYCTL_JOIN_SESSION_KEYRING
-The ID of the joined session keyring.
-.TP
-.B KEYCTL_DESCRIBE
-The size of the description (including the terminating null byte),
-irrespective of the provided buffer size.
-.TP
-.B KEYCTL_SEARCH
-The ID of the key that was found.
-.TP
-.B KEYCTL_READ
-The amount of data that is available in the key,
-irrespective of the provided buffer size.
-.TP
-.B KEYCTL_SET_REQKEY_KEYRING
-The ID of the previous default keyring
-to which implicitly requested keys were linked
-(one of
-.BR KEY_REQKEY_DEFL_USER_* ).
-.TP
-.B KEYCTL_ASSUME_AUTHORITY
-Either 0, if the ID given was 0,
-or the ID of the authorization key matching the specified key,
-if a nonzero key ID was provided.
-.TP
-.B KEYCTL_GET_SECURITY
-The size of the LSM security label string
-(including the terminating null byte),
-irrespective of the provided buffer size.
-.TP
-.B KEYCTL_GET_PERSISTENT
-The ID of the persistent keyring.
-.TP
-.B KEYCTL_DH_COMPUTE
-The number of bytes copied to the buffer, or, if
-.I arg4
-is 0, the required buffer size.
-.TP
-All other operations
-Zero.
-.P
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EACCES
-The requested operation wasn't permitted.
-.TP
-.B EAGAIN
-.I operation
-was
-.B KEYCTL_DH_COMPUTE
-and there was an error during crypto module initialization.
-.TP
-.B EDEADLK
-.I operation
-was
-.B KEYCTL_LINK
-and the requested link would result in a cycle.
-.TP
-.B EDEADLK
-.I operation
-was
-.B KEYCTL_RESTRICT_KEYRING
-and the requested keyring restriction would result in a cycle.
-.TP
-.B EDQUOT
-The key quota for the caller's user would be exceeded by creating a key or
-linking it to the keyring.
-.TP
-.B EEXIST
-.I operation
-was
-.B KEYCTL_RESTRICT_KEYRING
-and keyring provided in
-.I arg2
-argument already has a restriction set.
-.TP
-.B EFAULT
-.I operation
-was
-.B KEYCTL_DH_COMPUTE
-and one of the following has failed:
-.RS
-.IP \[bu] 3
-copying of the
-.IR "struct keyctl_dh_params" ,
-provided in the
-.I arg2
-argument, from user space;
-.IP \[bu]
-copying of the
-.IR "struct keyctl_kdf_params" ,
-provided in the non-NULL
-.I arg5
-argument, from user space
-(in case kernel supports performing KDF operation on DH operation result);
-.IP \[bu]
-copying of data pointed by the
-.I hashname
-field of the
-.I "struct keyctl_kdf_params"
-from user space;
-.IP \[bu]
-copying of data pointed by the
-.I otherinfo
-field of the
-.I struct keyctl_kdf_params
-from user space if the
-.I otherinfolen
-field was nonzero;
-.IP \[bu]
-copying of the result to user space.
-.RE
-.TP
-.B EINVAL
-.I operation
-was
-.B KEYCTL_SETPERM
-and an invalid permission bit was specified in
-.IR arg3 .
-.TP
-.B EINVAL
-.I operation
-was
-.B KEYCTL_SEARCH
-and the size of the description in
-.I arg4
-(including the terminating null byte) exceeded 4096 bytes.
-.TP
-.B EINVAL
-size of the string (including the terminating null byte) specified in
-.I arg3
-(the key type)
-or
-.I arg4
-(the key description)
-exceeded the limit (32 bytes and 4096 bytes respectively).
-.TP
-.BR EINVAL " (before Linux 4.12)"
-.I operation
-was
-.BR KEYCTL_DH_COMPUTE ,
-argument
-.I arg5
-was non-NULL.
-.TP
-.B EINVAL
-.I operation
-was
-.B KEYCTL_DH_COMPUTE
-And the digest size of the hashing algorithm supplied is zero.
-.TP
-.B EINVAL
-.I operation
-was
-.B KEYCTL_DH_COMPUTE
-and the buffer size provided is not enough to hold the result.
-Provide 0 as a buffer size in order to obtain the minimum buffer size.
-.TP
-.B EINVAL
-.I operation
-was
-.B KEYCTL_DH_COMPUTE
-and the hash name provided in the
-.I hashname
-field of the
-.I struct keyctl_kdf_params
-pointed by
-.I arg5
-argument is too big (the limit is implementation-specific and varies between
-kernel versions, but it is deemed big enough for all valid algorithm names).
-.TP
-.B EINVAL
-.\" commit 4f9dabfaf8df971f8a3b6aa324f8f817be38d538
-.I operation
-was
-.B KEYCTL_DH_COMPUTE
-and the
-.I __spare
-field of the
-.I struct keyctl_kdf_params
-provided in the
-.I arg5
-argument contains nonzero values.
-.TP
-.B EKEYEXPIRED
-An expired key was found or specified.
-.TP
-.B EKEYREJECTED
-A rejected key was found or specified.
-.TP
-.B EKEYREVOKED
-A revoked key was found or specified.
-.TP
-.B ELOOP
-.I operation
-was
-.B KEYCTL_LINK
-and the requested link would cause the maximum nesting depth
-for keyrings to be exceeded.
-.TP
-.B EMSGSIZE
-.I operation
-was
-.B KEYCTL_DH_COMPUTE
-and the buffer length exceeds
-.B KEYCTL_KDF_MAX_OUTPUT_LEN
-(which is 1024 currently)
-or the
-.I otherinfolen
-field of the
-.I struct keyctl_kdf_parms
-passed in
-.I arg5
-exceeds
-.B KEYCTL_KDF_MAX_OI_LEN
-(which is 64 currently).
-.TP
-.BR ENFILE " (before Linux 3.13)"
-.I operation
-was
-.B KEYCTL_LINK
-and the keyring is full.
-(Before Linux 3.13,
-.\" commit b2a4df200d570b2c33a57e1ebfa5896e4bc81b69
-the available space for storing keyring links was limited to
-a single page of memory; since Linux 3.13, there is no fixed limit.)
-.TP
-.B ENOENT
-.I operation
-was
-.B KEYCTL_UNLINK
-and the key to be unlinked isn't linked to the keyring.
-.TP
-.B ENOENT
-.I operation
-was
-.B KEYCTL_DH_COMPUTE
-and the hashing algorithm specified in the
-.I hashname
-field of the
-.I struct keyctl_kdf_params
-pointed by
-.I arg5
-argument hasn't been found.
-.TP
-.B ENOENT
-.I operation
-was
-.B KEYCTL_RESTRICT_KEYRING
-and the type provided in
-.I arg3
-argument doesn't support setting key linking restrictions.
-.TP
-.B ENOKEY
-No matching key was found or an invalid key was specified.
-.TP
-.B ENOKEY
-The value
-.B KEYCTL_GET_KEYRING_ID
-was specified in
-.IR operation ,
-the key specified in
-.I arg2
-did not exist, and
-.I arg3
-was zero (meaning don't create the key if it didn't exist).
-.TP
-.B ENOMEM
-One of kernel memory allocation routines failed during the execution of the
-syscall.
-.TP
-.B ENOTDIR
-A key of keyring type was expected but the ID of a key with
-a different type was provided.
-.TP
-.B EOPNOTSUPP
-.I operation
-was
-.B KEYCTL_READ
-and the key type does not support reading
-(e.g., the type is
-.IR \[dq]login\[dq] ).
-.TP
-.B EOPNOTSUPP
-.I operation
-was
-.B KEYCTL_UPDATE
-and the key type does not support updating.
-.TP
-.B EOPNOTSUPP
-.I operation
-was
-.BR KEYCTL_RESTRICT_KEYRING ,
-the type provided in
-.I arg3
-argument was "asymmetric",
-and the key specified in the restriction specification provided in
-.I arg4
-has type other than "asymmetric" or "keyring".
-.TP
-.B EPERM
-.I operation
-was
-.BR KEYCTL_GET_PERSISTENT ,
-.I arg2
-specified a UID other than the calling thread's real or effective UID,
-and the caller did not have the
-.B CAP_SETUID
-capability.
-.TP
-.B EPERM
-.I operation
-was
-.B KEYCTL_SESSION_TO_PARENT
-and either:
-all of the UIDs (GIDs) of the parent process do not match
-the effective UID (GID) of the calling process;
-the UID of the parent's existing session keyring or
-the UID of the caller's session keyring did not match
-the effective UID of the caller;
-the parent process is not single-thread;
-or the parent process is
-.BR init (1)
-or a kernel thread.
-.TP
-.B ETIMEDOUT
-.I operation
-was
-.B KEYCTL_DH_COMPUTE
-and the initialization of crypto modules has timed out.
-.SH VERSIONS
-A wrapper is provided in the
-.I libkeyutils
-library.
-(The accompanying package provides the
-.I <keyutils.h>
-header file.)
-However, rather than using this system call directly,
-you probably want to use the various library functions
-mentioned in the descriptions of individual operations above.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 2.6.10.
-.SH EXAMPLES
-The program below provide subset of the functionality of the
-.BR request\-key (8)
-program provided by the
-.I keyutils
-package.
-For informational purposes,
-the program records various information in a log file.
-.P
-As described in
-.BR request_key (2),
-the
-.BR request\-key (8)
-program is invoked with command-line arguments that
-describe a key that is to be instantiated.
-The example program fetches and logs these arguments.
-The program assumes authority to instantiate the requested key,
-and then instantiates that key.
-.P
-The following shell session demonstrates the use of this program.
-In the session,
-we compile the program and then use it to temporarily replace the standard
-.BR request\-key (8)
-program.
-(Note that temporarily disabling the standard
-.BR request\-key (8)
-program may not be safe on some systems.)
-While our example program is installed,
-we use the example program shown in
-.BR request_key (2)
-to request a key.
-.P
-.in +4n
-.EX
-$ \fBcc \-o key_instantiate key_instantiate.c \-lkeyutils\fP
-$ \fBsudo mv /sbin/request\-key /sbin/request\-key.backup\fP
-$ \fBsudo cp key_instantiate /sbin/request\-key\fP
-$ \fB./t_request_key user mykey somepayloaddata\fP
-Key ID is 20d035bf
-$ \fBsudo mv /sbin/request\-key.backup /sbin/request\-key\fP
-.EE
-.in
-.P
-Looking at the log file created by this program,
-we can see the command-line arguments supplied to our example program:
-.P
-.in +4n
-.EX
-$ \fBcat /tmp/key_instantiate.log\fP
-Time: Mon Nov 7 13:06:47 2016
-\&
-Command line arguments:
- argv[0]: /sbin/request\-key
- operation: create
- key_to_instantiate: 20d035bf
- UID: 1000
- GID: 1000
- thread_keyring: 0
- process_keyring: 0
- session_keyring: 256e6a6
-\&
-Key description: user;1000;1000;3f010000;mykey
-Auth key payload: somepayloaddata
-Destination keyring: 256e6a6
-Auth key description: .request_key_auth;1000;1000;0b010000;20d035bf
-.EE
-.in
-.P
-The last few lines of the above output show that the example program
-was able to fetch:
-.IP \[bu] 3
-the description of the key to be instantiated,
-which included the name of the key
-.RI ( mykey );
-.IP \[bu]
-the payload of the authorization key, which consisted of the data
-.RI ( somepayloaddata )
-passed to
-.BR request_key (2);
-.IP \[bu]
-the destination keyring that was specified in the call to
-.BR request_key (2);
-and
-.IP \[bu]
-the description of the authorization key,
-where we can see that the name of the authorization key matches
-the ID of the key that is to be instantiated
-.RI ( 20d035bf ).
-.P
-The example program in
-.BR request_key (2)
-specified the destination keyring as
-.BR KEY_SPEC_SESSION_KEYRING .
-By examining the contents of
-.IR /proc/keys ,
-we can see that this was translated to the ID of the destination keyring
-.RI ( 0256e6a6 )
-shown in the log output above;
-we can also see the newly created key with the name
-.I mykey
-and ID
-.IR 20d035bf .
-.P
-.in +4n
-.EX
-$ \fBcat /proc/keys | egrep \[aq]mykey|256e6a6\[aq]\fP
-0256e6a6 I\-\-Q\-\-\- 194 perm 3f030000 1000 1000 keyring _ses: 3
-20d035bf I\-\-Q\-\-\- 1 perm 3f010000 1000 1000 user mykey: 16
-.EE
-.in
-.SS Program source
-\&
-.\" SRC BEGIN (key_instantiate.c)
-.EX
-/* key_instantiate.c */
-\&
-#include <errno.h>
-#include <keyutils.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/types.h>
-#include <time.h>
-\&
-#ifndef KEY_SPEC_REQUESTOR_KEYRING
-#define KEY_SPEC_REQUESTOR_KEYRING (\-8)
-#endif
-\&
-int
-main(int argc, char *argv[])
-{
- int akp_size; /* Size of auth_key_payload */
- int auth_key;
- char dbuf[256];
- char auth_key_payload[256];
- char *operation;
- FILE *fp;
- gid_t gid;
- uid_t uid;
- time_t t;
- key_serial_t key_to_instantiate, dest_keyring;
- key_serial_t thread_keyring, process_keyring, session_keyring;
-\&
- if (argc != 8) {
- fprintf(stderr, "Usage: %s op key uid gid thread_keyring "
- "process_keyring session_keyring\en", argv[0]);
- exit(EXIT_FAILURE);
- }
-\&
- fp = fopen("/tmp/key_instantiate.log", "w");
- if (fp == NULL)
- exit(EXIT_FAILURE);
-\&
- setbuf(fp, NULL);
-\&
- t = time(NULL);
- fprintf(fp, "Time: %s\en", ctime(&t));
-\&
- /*
- * The kernel passes a fixed set of arguments to the program
- * that it execs; fetch them.
- */
- operation = argv[1];
- key_to_instantiate = atoi(argv[2]);
- uid = atoi(argv[3]);
- gid = atoi(argv[4]);
- thread_keyring = atoi(argv[5]);
- process_keyring = atoi(argv[6]);
- session_keyring = atoi(argv[7]);
-\&
- fprintf(fp, "Command line arguments:\en");
- fprintf(fp, " argv[0]: %s\en", argv[0]);
- fprintf(fp, " operation: %s\en", operation);
- fprintf(fp, " key_to_instantiate: %jx\en",
- (uintmax_t) key_to_instantiate);
- fprintf(fp, " UID: %jd\en", (intmax_t) uid);
- fprintf(fp, " GID: %jd\en", (intmax_t) gid);
- fprintf(fp, " thread_keyring: %jx\en",
- (uintmax_t) thread_keyring);
- fprintf(fp, " process_keyring: %jx\en",
- (uintmax_t) process_keyring);
- fprintf(fp, " session_keyring: %jx\en",
- (uintmax_t) session_keyring);
- fprintf(fp, "\en");
-\&
- /*
- * Assume the authority to instantiate the key named in argv[2].
- */
- if (keyctl(KEYCTL_ASSUME_AUTHORITY, key_to_instantiate) == \-1) {
- fprintf(fp, "KEYCTL_ASSUME_AUTHORITY failed: %s\en",
- strerror(errno));
- exit(EXIT_FAILURE);
- }
-\&
- /*
- * Fetch the description of the key that is to be instantiated.
- */
- if (keyctl(KEYCTL_DESCRIBE, key_to_instantiate,
- dbuf, sizeof(dbuf)) == \-1) {
- fprintf(fp, "KEYCTL_DESCRIBE failed: %s\en", strerror(errno));
- exit(EXIT_FAILURE);
- }
-\&
- fprintf(fp, "Key description: %s\en", dbuf);
-\&
- /*
- * Fetch the payload of the authorization key, which is
- * actually the callout data given to request_key().
- */
- akp_size = keyctl(KEYCTL_READ, KEY_SPEC_REQKEY_AUTH_KEY,
- auth_key_payload, sizeof(auth_key_payload));
- if (akp_size == \-1) {
- fprintf(fp, "KEYCTL_READ failed: %s\en", strerror(errno));
- exit(EXIT_FAILURE);
- }
-\&
- auth_key_payload[akp_size] = \[aq]\e0\[aq];
- fprintf(fp, "Auth key payload: %s\en", auth_key_payload);
-\&
- /*
- * For interest, get the ID of the authorization key and
- * display it.
- */
- auth_key = keyctl(KEYCTL_GET_KEYRING_ID,
- KEY_SPEC_REQKEY_AUTH_KEY);
- if (auth_key == \-1) {
- fprintf(fp, "KEYCTL_GET_KEYRING_ID failed: %s\en",
- strerror(errno));
- exit(EXIT_FAILURE);
- }
-\&
- fprintf(fp, "Auth key ID: %jx\en", (uintmax_t) auth_key);
-\&
- /*
- * Fetch key ID for the request_key(2) destination keyring.
- */
- dest_keyring = keyctl(KEYCTL_GET_KEYRING_ID,
- KEY_SPEC_REQUESTOR_KEYRING);
- if (dest_keyring == \-1) {
- fprintf(fp, "KEYCTL_GET_KEYRING_ID failed: %s\en",
- strerror(errno));
- exit(EXIT_FAILURE);
- }
-\&
- fprintf(fp, "Destination keyring: %jx\en", (uintmax_t) dest_keyring);
-\&
- /*
- * Fetch the description of the authorization key. This
- * allows us to see the key type, UID, GID, permissions,
- * and description (name) of the key. Among other things,
- * we will see that the name of the key is a hexadecimal
- * string representing the ID of the key to be instantiated.
- */
- if (keyctl(KEYCTL_DESCRIBE, KEY_SPEC_REQKEY_AUTH_KEY,
- dbuf, sizeof(dbuf)) == \-1)
- {
- fprintf(fp, "KEYCTL_DESCRIBE failed: %s\en", strerror(errno));
- exit(EXIT_FAILURE);
- }
-\&
- fprintf(fp, "Auth key description: %s\en", dbuf);
-\&
- /*
- * Instantiate the key using the callout data that was supplied
- * in the payload of the authorization key.
- */
- if (keyctl(KEYCTL_INSTANTIATE, key_to_instantiate,
- auth_key_payload, akp_size + 1, dest_keyring) == \-1)
- {
- fprintf(fp, "KEYCTL_INSTANTIATE failed: %s\en",
- strerror(errno));
- exit(EXIT_FAILURE);
- }
-\&
- exit(EXIT_SUCCESS);
-}
-.EE
-.\" SRC END
-.SH SEE ALSO
-.ad l
-.nh
-.BR keyctl (1),
-.BR add_key (2),
-.BR request_key (2),
-.\" .BR find_key_by_type_and_name (3)
-.\" There is a man page, but this function seems not to exist
-.BR keyctl (3),
-.BR keyctl_assume_authority (3),
-.BR keyctl_chown (3),
-.BR keyctl_clear (3),
-.BR keyctl_describe (3),
-.BR keyctl_describe_alloc (3),
-.BR keyctl_dh_compute (3),
-.BR keyctl_dh_compute_alloc (3),
-.BR keyctl_get_keyring_ID (3),
-.BR keyctl_get_persistent (3),
-.BR keyctl_get_security (3),
-.BR keyctl_get_security_alloc (3),
-.BR keyctl_instantiate (3),
-.BR keyctl_instantiate_iov (3),
-.BR keyctl_invalidate (3),
-.BR keyctl_join_session_keyring (3),
-.BR keyctl_link (3),
-.BR keyctl_negate (3),
-.BR keyctl_read (3),
-.BR keyctl_read_alloc (3),
-.BR keyctl_reject (3),
-.BR keyctl_revoke (3),
-.BR keyctl_search (3),
-.BR keyctl_session_to_parent (3),
-.BR keyctl_set_reqkey_keyring (3),
-.BR keyctl_set_timeout (3),
-.BR keyctl_setperm (3),
-.BR keyctl_unlink (3),
-.BR keyctl_update (3),
-.BR recursive_key_scan (3),
-.BR recursive_session_key_scan (3),
-.BR capabilities (7),
-.BR credentials (7),
-.BR keyrings (7),
-.BR keyutils (7),
-.BR persistent\-keyring (7),
-.BR process\-keyring (7),
-.BR session\-keyring (7),
-.BR thread\-keyring (7),
-.BR user\-keyring (7),
-.BR user_namespaces (7),
-.BR user\-session\-keyring (7),
-.BR request\-key (8)
-.P
-The kernel source files under
-.I Documentation/security/keys/
-(or, before Linux 4.13, in the file
-.IR Documentation/security/keys.txt ).
diff --git a/man2/kill.2 b/man2/kill.2
deleted file mode 100644
index 16500fba2..000000000
--- a/man2/kill.2
+++ /dev/null
@@ -1,165 +0,0 @@
-.\" Copyright (c) 1992 Drew Eckhardt (drew@cs.colorado.edu), March 28, 1992
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Modified by Michael Haardt <michael@moria.de>
-.\" Modified by Thomas Koenig <ig25@rz.uni-karlsruhe.de>
-.\" Modified 1993-07-23 by Rik Faith <faith@cs.unc.edu>
-.\" Modified 1993-07-25 by Rik Faith <faith@cs.unc.edu>
-.\" Modified 1995-11-01 by Michael Haardt
-.\" <michael@cantor.informatik.rwth-aachen.de>
-.\" Modified 1996-04-14 by Andries Brouwer <aeb@cwi.nl>
-.\" [added some polishing contributed by Mike Battersby <mib@deakin.edu.au>]
-.\" Modified 1996-07-21 by Andries Brouwer <aeb@cwi.nl>
-.\" Modified 1997-01-17 by Andries Brouwer <aeb@cwi.nl>
-.\" Modified 2001-12-18 by Andries Brouwer <aeb@cwi.nl>
-.\" Modified 2002-07-24 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Added note on historical rules enforced when an unprivileged process
-.\" sends a signal.
-.\" Modified 2004-06-16 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Added note on CAP_KILL
-.\" Modified 2004-06-24 by aeb
-.\" Modified, 2004-11-30, after idea from emmanuel.colbus@ensimag.imag.fr
-.\"
-.TH kill 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-kill \- send signal to a process
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <signal.h>
-.P
-.BI "int kill(pid_t " pid ", int " sig );
-.fi
-.P
-.RS -4
-Feature Test Macro Requirements for glibc (see
-.BR feature_test_macros (7)):
-.RE
-.P
-.BR kill ():
-.nf
- _POSIX_C_SOURCE
-.fi
-.SH DESCRIPTION
-The
-.BR kill ()
-system call
-can be used to send any signal to any process group or process.
-.P
-If \fIpid\fP is positive, then signal \fIsig\fP is sent to the
-process with the ID specified by \fIpid\fP.
-.P
-If \fIpid\fP equals 0, then \fIsig\fP is sent to every process in the
-process group of the calling process.
-.P
-If \fIpid\fP equals \-1, then \fIsig\fP is sent to every process
-for which the calling process has permission to send signals,
-except for process 1 (\fIinit\fP), but see below.
-.P
-If \fIpid\fP is less than \-1, then \fIsig\fP is sent to every process
-in the process group whose ID is \fI\-pid\fP.
-.P
-If \fIsig\fP is 0, then no signal is sent,
-but existence and permission checks are still performed;
-this can be used to check for the existence of a process ID or
-process group ID that the caller is permitted to signal.
-.P
-For a process to have permission to send a signal,
-it must either be privileged (under Linux: have the
-.B CAP_KILL
-capability in the user namespace of the target process),
-or the real or effective user ID of the sending process must equal
-the real or saved set-user-ID of the target process.
-In the case of
-.BR SIGCONT ,
-it suffices when the sending and receiving
-processes belong to the same session.
-(Historically, the rules were different; see NOTES.)
-.SH RETURN VALUE
-On success (at least one signal was sent), zero is returned.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EINVAL
-An invalid signal was specified.
-.TP
-.B EPERM
-The calling process does not have permission to send the signal
-to any of the target processes.
-.TP
-.B ESRCH
-The target process or process group does not exist.
-Note that an existing process might be a zombie,
-a process that has terminated execution, but
-has not yet been
-.BR wait (2)ed
-for.
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-POSIX.1-2001, SVr4, 4.3BSD.
-.SS Linux notes
-Across different kernel versions, Linux has enforced different rules
-for the permissions required for an unprivileged process
-to send a signal to another process.
-.\" In the 0.* kernels things chopped and changed quite
-.\" a bit - MTK, 24 Jul 02
-In Linux 1.0 to 1.2.2, a signal could be sent if the
-effective user ID of the sender matched effective user ID of the target,
-or the real user ID of the sender matched the real user ID of the target.
-From Linux 1.2.3 until 1.3.77, a signal could be sent if the
-effective user ID of the sender matched either the real or effective
-user ID of the target.
-The current rules, which conform to POSIX.1, were adopted
-in Linux 1.3.78.
-.SH NOTES
-The only signals that can be sent to process ID 1, the
-.I init
-process, are those for which
-.I init
-has explicitly installed signal handlers.
-This is done to assure the
-system is not brought down accidentally.
-.P
-POSIX.1 requires that \fIkill(\-1,sig)\fP send \fIsig\fP
-to all processes that the calling process may send signals to,
-except possibly for some implementation-defined system processes.
-Linux allows a process to signal itself, but on Linux the call
-\fIkill(\-1,sig)\fP does not signal the calling process.
-.P
-POSIX.1 requires that if a process sends a signal to itself,
-and the sending thread does not have the signal blocked,
-and no other thread
-has it unblocked or is waiting for it in
-.BR sigwait (3),
-at least one
-unblocked signal must be delivered to the sending thread before the
-.BR kill ()
-returns.
-.SH BUGS
-In Linux 2.6 up to and including Linux 2.6.7,
-there was a bug that meant that when sending signals to a process group,
-.BR kill ()
-failed with the error
-.B EPERM
-if the caller did not have permission to send the signal to \fIany\fP (rather
-than \fIall\fP) of the members of the process group.
-Notwithstanding this error return, the signal was still delivered
-to all of the processes for which the caller had permission to signal.
-.SH SEE ALSO
-.BR kill (1),
-.BR _exit (2),
-.BR pidfd_send_signal (2),
-.BR signal (2),
-.BR tkill (2),
-.BR exit (3),
-.BR killpg (3),
-.BR sigqueue (3),
-.BR capabilities (7),
-.BR credentials (7),
-.BR signal (7)
diff --git a/man2/landlock_add_rule.2 b/man2/landlock_add_rule.2
deleted file mode 100644
index 4b95afb07..000000000
--- a/man2/landlock_add_rule.2
+++ /dev/null
@@ -1,131 +0,0 @@
-.\" Copyright © 2017-2020 Mickaël Salaün <mic@digikod.net>
-.\" Copyright © 2019-2020 ANSSI
-.\" Copyright © 2021 Microsoft Corporation
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.TH landlock_add_rule 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-landlock_add_rule \- add a new Landlock rule to a ruleset
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#include <linux/landlock.h>" " /* Definition of " LANDLOCK_* " constants */"
-.BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */"
-.P
-.BI "int syscall(SYS_landlock_add_rule, int " ruleset_fd ,
-.BI " enum landlock_rule_type " rule_type ,
-.BI " const void *" rule_attr ", uint32_t " flags );
-.fi
-.SH DESCRIPTION
-A Landlock rule describes an action on an object.
-An object is currently a file hierarchy,
-and the related filesystem actions
-are defined with a set of access rights.
-This
-.BR landlock_add_rule ()
-system call enables adding a new Landlock rule to an existing ruleset
-created with
-.BR landlock_create_ruleset (2).
-See
-.BR landlock (7)
-for a global overview.
-.P
-.I ruleset_fd
-is a Landlock ruleset file descriptor obtained with
-.BR landlock_create_ruleset (2).
-.P
-.I rule_type
-identifies the structure type pointed to by
-.IR rule_attr .
-Currently, Linux supports the following
-.I rule_type
-value:
-.TP
-.B LANDLOCK_RULE_PATH_BENEATH
-This defines the object type as a file hierarchy.
-In this case,
-.I rule_attr
-points to the following structure:
-.IP
-.in +4n
-.EX
-struct landlock_path_beneath_attr {
- __u64 allowed_access;
- __s32 parent_fd;
-} __attribute__((packed));
-.EE
-.in
-.IP
-.I allowed_access
-contains a bitmask of allowed filesystem actions for this file hierarchy
-(see
-.B Filesystem actions
-in
-.BR landlock (7)).
-.IP
-.I parent_fd
-is an opened file descriptor, preferably with the
-.I O_PATH
-flag,
-which identifies the parent directory of the file hierarchy or
-just a file.
-.P
-.I flags
-must be 0.
-.SH RETURN VALUE
-On success,
-.BR landlock_add_rule ()
-returns 0.
-.SH ERRORS
-.BR landlock_add_rule ()
-can fail for the following reasons:
-.TP
-.B EOPNOTSUPP
-Landlock is supported by the kernel but disabled at boot time.
-.TP
-.B EINVAL
-.I flags
-is not 0, or the rule accesses are inconsistent (i.e.,
-.I rule_attr\->allowed_access
-is not a subset of the ruleset handled accesses).
-.TP
-.B ENOMSG
-Empty accesses (i.e.,
-.I rule_attr\->allowed_access
-is 0).
-.TP
-.B EBADF
-.I ruleset_fd
-is not a file descriptor for the current thread,
-or a member of
-.I rule_attr
-is not a file descriptor as expected.
-.TP
-.B EBADFD
-.I ruleset_fd
-is not a ruleset file descriptor,
-or a member of
-.I rule_attr
-is not the expected file descriptor type.
-.TP
-.B EPERM
-.I ruleset_fd
-has no write access to the underlying ruleset.
-.TP
-.B EFAULT
-.I rule_attr
-was not a valid address.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 5.13.
-.SH EXAMPLES
-See
-.BR landlock (7).
-.SH SEE ALSO
-.BR landlock_create_ruleset (2),
-.BR landlock_restrict_self (2),
-.BR landlock (7)
diff --git a/man2/landlock_create_ruleset.2 b/man2/landlock_create_ruleset.2
deleted file mode 100644
index e62a3f9b9..000000000
--- a/man2/landlock_create_ruleset.2
+++ /dev/null
@@ -1,124 +0,0 @@
-.\" Copyright © 2017-2020 Mickaël Salaün <mic@digikod.net>
-.\" Copyright © 2019-2020 ANSSI
-.\" Copyright © 2021 Microsoft Corporation
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.TH landlock_create_ruleset 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-landlock_create_ruleset \- create a new Landlock ruleset
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#include <linux/landlock.h>" " /* Definition of " LANDLOCK_* " constants */"
-.BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */"
-.P
-.B int syscall(SYS_landlock_create_ruleset,
-.BI " const struct landlock_ruleset_attr *" attr ,
-.BI " size_t " size " , uint32_t " flags );
-.fi
-.SH DESCRIPTION
-A Landlock ruleset identifies a set of rules (i.e., actions on objects).
-This
-.BR landlock_create_ruleset ()
-system call enables creating a new file descriptor identifying a ruleset.
-This file descriptor can then be used by
-.BR landlock_add_rule (2)
-and
-.BR landlock_restrict_self (2).
-See
-.BR landlock (7)
-for a global overview.
-.P
-.I attr
-specifies the properties of the new ruleset.
-It points to the following structure:
-.IP
-.in +4n
-.EX
-struct landlock_ruleset_attr {
- __u64 handled_access_fs;
-};
-.EE
-.in
-.IP
-.I handled_access_fs
-is a bitmask of actions that is handled by this ruleset and
-should then be forbidden if no rule explicitly allows them
-(see
-.B Filesystem actions
-in
-.BR landlock (7)).
-This enables simply restricting ambient rights
-(e.g., global filesystem access) and is needed for compatibility reasons.
-.P
-.I size
-must be specified as
-.I sizeof(struct landlock_ruleset_attr)
-for compatibility reasons.
-.P
-.I flags
-must be 0 if
-.I attr
-is used.
-Otherwise,
-.I flags
-can be set to:
-.TP
-.B LANDLOCK_CREATE_RULESET_VERSION
-If
-.I attr
-is NULL and
-.I size
-is 0, then the returned value is the highest supported Landlock ABI version
-(starting at 1).
-This version can be used for a best-effort security approach,
-which is encouraged when user space is not pinned to a specific kernel
-version.
-All features documented in these man pages are available with the version
-1.
-.SH RETURN VALUE
-On success,
-.BR landlock_create_ruleset ()
-returns a new Landlock ruleset file descriptor,
-or a Landlock ABI version,
-according to
-.IR flags .
-.SH ERRORS
-.BR landlock_create_ruleset ()
-can fail for the following reasons:
-.TP
-.B EOPNOTSUPP
-Landlock is supported by the kernel but disabled at boot time.
-.TP
-.B EINVAL
-Unknown
-.IR flags ,
-or unknown access, or too small
-.IR size .
-.TP
-.B E2BIG
-.I size
-is too big.
-.TP
-.B EFAULT
-.I attr
-was not a valid address.
-.TP
-.B ENOMSG
-Empty accesses (i.e.,
-.I attr\->handled_access_fs
-is 0).
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 5.13.
-.SH EXAMPLES
-See
-.BR landlock (7).
-.SH SEE ALSO
-.BR landlock_add_rule (2),
-.BR landlock_restrict_self (2),
-.BR landlock (7)
diff --git a/man2/landlock_restrict_self.2 b/man2/landlock_restrict_self.2
deleted file mode 100644
index 43f15c932..000000000
--- a/man2/landlock_restrict_self.2
+++ /dev/null
@@ -1,116 +0,0 @@
-.\" Copyright © 2017-2020 Mickaël Salaün <mic@digikod.net>
-.\" Copyright © 2019-2020 ANSSI
-.\" Copyright © 2021 Microsoft Corporation
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.TH landlock_restrict_self 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-landlock_restrict_self \- enforce a Landlock ruleset
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#include <linux/landlock.h>" " /* Definition of " LANDLOCK_* " constants */"
-.BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */"
-.P
-.BI "int syscall(SYS_landlock_restrict_self, int " ruleset_fd ,
-.BI " uint32_t " flags );
-.SH DESCRIPTION
-Once a Landlock ruleset is populated with the desired rules, the
-.BR landlock_restrict_self ()
-system call enables enforcing this ruleset on the calling thread.
-See
-.BR landlock (7)
-for a global overview.
-.P
-A thread can be restricted with multiple rulesets that are then
-composed together to form the thread's Landlock domain.
-This can be seen as a stack of rulesets but
-it is implemented in a more efficient way.
-A domain can only be updated in such a way that
-the constraints of each past and future composed rulesets
-will restrict the thread and its future children for their entire life.
-It is then possible to gradually enforce tailored access control policies
-with multiple independent rulesets coming from different sources
-(e.g., init system configuration, user session policy,
-built-in application policy).
-However, most applications should only need one call to
-.BR landlock_restrict_self ()
-and they should avoid arbitrary numbers of such calls because of the
-composed rulesets limit.
-Instead, developers are encouraged to build a tailored ruleset thanks to
-multiple calls to
-.BR landlock_add_rule (2).
-.P
-In order to enforce a ruleset, either the caller must have the
-.B CAP_SYS_ADMIN
-capability in its user namespace, or the thread must already have the
-.I no_new_privs
-bit set.
-As for
-.BR seccomp (2),
-this avoids scenarios where unprivileged processes can affect
-the behavior of privileged children (e.g., because of set-user-ID binaries).
-If that bit was not already set by an ancestor of this thread,
-the thread must make the following call:
-.IP
-.EX
-prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
-.EE
-.P
-.I ruleset_fd
-is a Landlock ruleset file descriptor obtained with
-.BR landlock_create_ruleset (2)
-and fully populated with a set of calls to
-.BR landlock_add_rule (2).
-.P
-.I flags
-must be 0.
-.SH RETURN VALUE
-On success,
-.BR landlock_restrict_self ()
-returns 0.
-.SH ERRORS
-.BR landlock_restrict_self ()
-can fail for the following reasons:
-.TP
-.B EOPNOTSUPP
-Landlock is supported by the kernel but disabled at boot time.
-.TP
-.B EINVAL
-.I flags
-is not 0.
-.TP
-.B EBADF
-.I ruleset_fd
-is not a file descriptor for the current thread.
-.TP
-.B EBADFD
-.I ruleset_fd
-is not a ruleset file descriptor.
-.TP
-.B EPERM
-.I ruleset_fd
-has no read access to the underlying ruleset,
-or the calling thread is not running with
-.IR no_new_privs ,
-or it doesn't have the
-.B CAP_SYS_ADMIN
-in its user namespace.
-.TP
-.B E2BIG
-The maximum number of composed rulesets is reached for the calling thread.
-This limit is currently 64.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 5.13.
-.SH EXAMPLES
-See
-.BR landlock (7).
-.SH SEE ALSO
-.BR landlock_create_ruleset (2),
-.BR landlock_add_rule (2),
-.BR landlock (7)
diff --git a/man2/lchown.2 b/man2/lchown.2
deleted file mode 100644
index f0a5635ae..000000000
--- a/man2/lchown.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/chown.2
diff --git a/man2/lchown32.2 b/man2/lchown32.2
deleted file mode 100644
index 8ed3964e0..000000000
--- a/man2/lchown32.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/lchown.2
diff --git a/man2/lgetxattr.2 b/man2/lgetxattr.2
deleted file mode 100644
index d9e5d9037..000000000
--- a/man2/lgetxattr.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/getxattr.2
diff --git a/man2/link.2 b/man2/link.2
deleted file mode 100644
index 576b5510b..000000000
--- a/man2/link.2
+++ /dev/null
@@ -1,425 +0,0 @@
-.\" This manpage is Copyright (C) 1992 Drew Eckhardt;
-.\" and Copyright (C) 1993 Michael Haardt, Ian Jackson.
-.\" and Copyright (C) 2006, 2014 Michael Kerrisk
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Modified 1993-07-23 by Rik Faith <faith@cs.unc.edu>
-.\" Modified 1994-08-21 by Michael Haardt
-.\" Modified 2004-06-23 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Modified 2005-04-04, as per suggestion by Michael Hardt for rename.2
-.\"
-.TH link 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-link, linkat \- make a new name for a file
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <unistd.h>
-.P
-.BI "int link(const char *" oldpath ", const char *" newpath );
-.P
-.BR "#include <fcntl.h> " "/* Definition of " AT_* " constants */"
-.B #include <unistd.h>
-.P
-.BI "int linkat(int " olddirfd ", const char *" oldpath ,
-.BI " int " newdirfd ", const char *" newpath ", int " flags );
-.fi
-.P
-.RS -4
-Feature Test Macro Requirements for glibc (see
-.BR feature_test_macros (7)):
-.RE
-.P
-.BR linkat ():
-.nf
- Since glibc 2.10:
- _POSIX_C_SOURCE >= 200809L
- Before glibc 2.10:
- _ATFILE_SOURCE
-.fi
-.SH DESCRIPTION
-.BR link ()
-creates a new link (also known as a hard link) to an existing file.
-.P
-If
-.I newpath
-exists, it will
-.I not
-be overwritten.
-.P
-This new name may be used exactly as the old one for any operation;
-both names refer to the same file (and so have the same permissions
-and ownership) and it is impossible to tell which name was the
-"original".
-.SS linkat()
-The
-.BR linkat ()
-system call operates in exactly the same way as
-.BR link (),
-except for the differences described here.
-.P
-If the pathname given in
-.I oldpath
-is relative, then it is interpreted relative to the directory
-referred to by the file descriptor
-.I olddirfd
-(rather than relative to the current working directory of
-the calling process, as is done by
-.BR link ()
-for a relative pathname).
-.P
-If
-.I oldpath
-is relative and
-.I olddirfd
-is the special value
-.BR AT_FDCWD ,
-then
-.I oldpath
-is interpreted relative to the current working
-directory of the calling process (like
-.BR link ()).
-.P
-If
-.I oldpath
-is absolute, then
-.I olddirfd
-is ignored.
-.P
-The interpretation of
-.I newpath
-is as for
-.IR oldpath ,
-except that a relative pathname is interpreted relative
-to the directory referred to by the file descriptor
-.IR newdirfd .
-.P
-The following values can be bitwise ORed in
-.IR flags :
-.TP
-.BR AT_EMPTY_PATH " (since Linux 2.6.39)"
-.\" commit 11a7b371b64ef39fc5fb1b6f2218eef7c4d035e3
-If
-.I oldpath
-is an empty string, create a link to the file referenced by
-.I olddirfd
-(which may have been obtained using the
-.BR open (2)
-.B O_PATH
-flag).
-In this case,
-.I olddirfd
-can refer to any type of file except a directory.
-This will generally not work if the file has a link count of zero (files
-created with
-.B O_TMPFILE
-and without
-.B O_EXCL
-are an exception).
-The caller must have the
-.B CAP_DAC_READ_SEARCH
-capability in order to use this flag.
-This flag is Linux-specific; define
-.B _GNU_SOURCE
-.\" Before glibc 2.16, defining _ATFILE_SOURCE sufficed
-to obtain its definition.
-.TP
-.BR AT_SYMLINK_FOLLOW " (since Linux 2.6.18)"
-By default,
-.BR linkat (),
-does not dereference
-.I oldpath
-if it is a symbolic link (like
-.BR link ()).
-The flag
-.B AT_SYMLINK_FOLLOW
-can be specified in
-.I flags
-to cause
-.I oldpath
-to be dereferenced if it is a symbolic link.
-If procfs is mounted,
-this can be used as an alternative to
-.BR AT_EMPTY_PATH ,
-like this:
-.IP
-.in +4n
-.EX
-linkat(AT_FDCWD, "/proc/self/fd/<fd>", newdirfd,
- newname, AT_SYMLINK_FOLLOW);
-.EE
-.in
-.P
-Before Linux 2.6.18, the
-.I flags
-argument was unused, and had to be specified as 0.
-.P
-See
-.BR openat (2)
-for an explanation of the need for
-.BR linkat ().
-.SH RETURN VALUE
-On success, zero is returned.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EACCES
-Write access to the directory containing
-.I newpath
-is denied, or search permission is denied for one of the directories
-in the path prefix of
-.I oldpath
-or
-.IR newpath .
-(See also
-.BR path_resolution (7).)
-.TP
-.B EDQUOT
-The user's quota of disk blocks on the filesystem has been exhausted.
-.TP
-.B EEXIST
-.I newpath
-already exists.
-.TP
-.B EFAULT
-.IR oldpath " or " newpath " points outside your accessible address space."
-.TP
-.B EIO
-An I/O error occurred.
-.TP
-.B ELOOP
-Too many symbolic links were encountered in resolving
-.IR oldpath " or " newpath .
-.TP
-.B EMLINK
-The file referred to by
-.I oldpath
-already has the maximum number of links to it.
-For example, on an
-.BR ext4 (5)
-filesystem that does not employ the
-.I dir_index
-feature, the limit on the number of hard links to a file is 65,000; on
-.BR btrfs (5),
-the limit is 65,535 links.
-.TP
-.B ENAMETOOLONG
-.IR oldpath " or " newpath " was too long."
-.TP
-.B ENOENT
-A directory component in
-.IR oldpath " or " newpath
-does not exist or is a dangling symbolic link.
-.TP
-.B ENOMEM
-Insufficient kernel memory was available.
-.TP
-.B ENOSPC
-The device containing the file has no room for the new directory
-entry.
-.TP
-.B ENOTDIR
-A component used as a directory in
-.IR oldpath " or " newpath
-is not, in fact, a directory.
-.TP
-.B EPERM
-.I oldpath
-is a directory.
-.TP
-.B EPERM
-The filesystem containing
-.IR oldpath " and " newpath
-does not support the creation of hard links.
-.TP
-.BR EPERM " (since Linux 3.6)"
-The caller does not have permission to create a hard link to this file
-(see the description of
-.I /proc/sys/fs/protected_hardlinks
-in
-.BR proc (5)).
-.TP
-.B EPERM
-.I oldpath
-is marked immutable or append-only.
-(See
-.BR ioctl_iflags (2).)
-.TP
-.B EROFS
-The file is on a read-only filesystem.
-.TP
-.B EXDEV
-.IR oldpath " and " newpath
-are not on the same mounted filesystem.
-(Linux permits a filesystem to be mounted at multiple points, but
-.BR link ()
-does not work across different mounts,
-even if the same filesystem is mounted on both.)
-.P
-The following additional errors can occur for
-.BR linkat ():
-.TP
-.B EBADF
-.I oldpath
-.RI ( newpath )
-is relative but
-.I olddirfd
-.RI ( newdirfd )
-is neither
-.B AT_FDCWD
-nor a valid file descriptor.
-.TP
-.B EINVAL
-An invalid flag value was specified in
-.IR flags .
-.TP
-.B ENOENT
-.B AT_EMPTY_PATH
-was specified in
-.IR flags ,
-but the caller did not have the
-.B CAP_DAC_READ_SEARCH
-capability.
-.TP
-.B ENOENT
-An attempt was made to link to the
-.I /proc/self/fd/NN
-file corresponding to a file descriptor created with
-.IP
-.in +4n
-.EX
-open(path, O_TMPFILE | O_EXCL, mode);
-.EE
-.in
-.IP
-See
-.BR open (2).
-.TP
-.B ENOENT
-An attempt was made to link to a
-.I /proc/self/fd/NN
-file corresponding to a file that has been deleted.
-.TP
-.B ENOENT
-.I oldpath
-is a relative pathname and
-.I olddirfd
-refers to a directory that has been deleted,
-or
-.I newpath
-is a relative pathname and
-.I newdirfd
-refers to a directory that has been deleted.
-.TP
-.B ENOTDIR
-.I oldpath
-is relative and
-.I olddirfd
-is a file descriptor referring to a file other than a directory;
-or similar for
-.I newpath
-and
-.I newdirfd
-.TP
-.B EPERM
-.B AT_EMPTY_PATH
-was specified in
-.IR flags ,
-.I oldpath
-is an empty string, and
-.I olddirfd
-refers to a directory.
-.SH VERSIONS
-POSIX.1-2001 says that
-.BR link ()
-should dereference
-.I oldpath
-if it is a symbolic link.
-However, since Linux 2.0,
-.\" more precisely: since Linux 1.3.56
-Linux does not do so: if
-.I oldpath
-is a symbolic link, then
-.I newpath
-is created as a (hard) link to the same symbolic link file
-(i.e.,
-.I newpath
-becomes a symbolic link to the same file that
-.I oldpath
-refers to).
-Some other implementations behave in the same manner as Linux.
-.\" For example, the default Solaris compilation environment
-.\" behaves like Linux, and contributors to a March 2005
-.\" thread in the Austin mailing list reported that some
-.\" other (System V) implementations did/do the same -- MTK, Apr 05
-POSIX.1-2008 changes the specification of
-.BR link (),
-making it implementation-dependent whether or not
-.I oldpath
-is dereferenced if it is a symbolic link.
-For precise control over the treatment of symbolic links when
-creating a link, use
-.BR linkat ().
-.SS glibc
-On older kernels where
-.BR linkat ()
-is unavailable, the glibc wrapper function falls back to the use of
-.BR link (),
-unless the
-.B AT_SYMLINK_FOLLOW
-is specified.
-When
-.I oldpath
-and
-.I newpath
-are relative pathnames,
-glibc constructs pathnames based on the symbolic links in
-.I /proc/self/fd
-that correspond to the
-.I olddirfd
-and
-.I newdirfd
-arguments.
-.SH STANDARDS
-.TP
-.BR link ()
-POSIX.1-2008.
-.SH HISTORY
-.TP
-.BR link ()
-SVr4, 4.3BSD, POSIX.1-2001 (but see VERSIONS).
-.\" SVr4 documents additional ENOLINK and
-.\" EMULTIHOP error conditions; POSIX.1 does not document ELOOP.
-.\" X/OPEN does not document EFAULT, ENOMEM or EIO.
-.TP
-.BR linkat ()
-POSIX.1-2008.
-Linux 2.6.16,
-glibc 2.4.
-.SH NOTES
-Hard links, as created by
-.BR link (),
-cannot span filesystems.
-Use
-.BR symlink (2)
-if this is required.
-.SH BUGS
-On NFS filesystems, the return code may be wrong in case the NFS server
-performs the link creation and dies before it can say so.
-Use
-.BR stat (2)
-to find out if the link got created.
-.SH SEE ALSO
-.BR ln (1),
-.BR open (2),
-.BR rename (2),
-.BR stat (2),
-.BR symlink (2),
-.BR unlink (2),
-.BR path_resolution (7),
-.BR symlink (7)
diff --git a/man2/linkat.2 b/man2/linkat.2
deleted file mode 100644
index a7d6da568..000000000
--- a/man2/linkat.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/link.2
diff --git a/man2/listen.2 b/man2/listen.2
deleted file mode 100644
index 6ee5ad11a..000000000
--- a/man2/listen.2
+++ /dev/null
@@ -1,155 +0,0 @@
-.\" Copyright (c) 1983, 1991 The Regents of the University of California.
-.\" and Copyright (C) 2007, Michael Kerrisk <mtk.manpages@gmail.com>
-.\" All rights reserved.
-.\"
-.\" SPDX-License-Identifier: BSD-4-Clause-UC
-.\"
-.\" $Id: listen.2,v 1.6 1999/05/18 14:10:32 freitag Exp $
-.\"
-.\" Modified Fri Jul 23 22:07:54 1993 by Rik Faith <faith@cs.unc.edu>
-.\" Modified 950727 by aeb, following a suggestion by Urs Thuermann
-.\" <urs@isnogud.escape.de>
-.\" Modified Tue Oct 22 08:11:14 EDT 1996 by Eric S. Raymond <esr@thyrsus.com>
-.\" Modified 1998 by Andi Kleen
-.\" Modified 11 May 2001 by Sam Varshavchik <mrsam@courier-mta.com>
-.\"
-.\"
-.TH listen 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-listen \- listen for connections on a socket
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/socket.h>
-.P
-.BI "int listen(int " sockfd ", int " backlog );
-.fi
-.SH DESCRIPTION
-.BR listen ()
-marks the socket referred to by
-.I sockfd
-as a passive socket, that is, as a socket that will
-be used to accept incoming connection requests using
-.BR accept (2).
-.P
-The
-.I sockfd
-argument is a file descriptor that refers to a socket of type
-.B SOCK_STREAM
-or
-.BR SOCK_SEQPACKET .
-.P
-The
-.I backlog
-argument defines the maximum length
-to which the queue of pending connections for
-.I sockfd
-may grow.
-If a connection request arrives when the queue is full, the client
-may receive an error with an indication of
-.B ECONNREFUSED
-or, if the underlying protocol supports retransmission, the request may be
-ignored so that a later reattempt at connection succeeds.
-.SH RETURN VALUE
-On success, zero is returned.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EADDRINUSE
-Another socket is already listening on the same port.
-.TP
-.B EADDRINUSE
-(Internet domain sockets)
-The socket referred to by
-.I sockfd
-had not previously been bound to an address and,
-upon attempting to bind it to an ephemeral port,
-it was determined that all port numbers in the ephemeral port range
-are currently in use.
-See the discussion of
-.I /proc/sys/net/ipv4/ip_local_port_range
-in
-.BR ip (7).
-.TP
-.B EBADF
-The argument
-.I sockfd
-is not a valid file descriptor.
-.TP
-.B ENOTSOCK
-The file descriptor
-.I sockfd
-does not refer to a socket.
-.TP
-.B EOPNOTSUPP
-The socket is not of a type that supports the
-.BR listen ()
-operation.
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-POSIX.1-2001, 4.4BSD
-(first appeared in 4.2BSD).
-.SH NOTES
-To accept connections, the following steps are performed:
-.RS 4
-.IP (1) 5
-A socket is created with
-.BR socket (2).
-.IP (2)
-The socket is bound to a local address using
-.BR bind (2),
-so that other sockets may be
-.BR connect (2)ed
-to it.
-.IP (3)
-A willingness to accept incoming connections and a queue limit for incoming
-connections are specified with
-.BR listen ().
-.IP (4)
-Connections are accepted with
-.BR accept (2).
-.RE
-.P
-The behavior of the
-.I backlog
-argument on TCP sockets changed with Linux 2.2.
-Now it specifies the queue length for
-.I completely
-established sockets waiting to be accepted,
-instead of the number of incomplete connection requests.
-The maximum length of the queue for incomplete sockets
-can be set using
-.IR /proc/sys/net/ipv4/tcp_max_syn_backlog .
-When syncookies are enabled there is no logical maximum
-length and this setting is ignored.
-See
-.BR tcp (7)
-for more information.
-.P
-If the
-.I backlog
-argument is greater than the value in
-.IR /proc/sys/net/core/somaxconn ,
-then it is silently capped to that value.
-Since Linux 5.4, the default in this file is 4096;
-in earlier kernels, the default value is 128.
-Before Linux 2.4.25, this limit was a hard coded value,
-.BR SOMAXCONN ,
-with the value 128.
-.\" The following is now rather historic information (MTK, Jun 05)
-.\" Don't rely on this value in portable applications since BSD
-.\" (and some BSD-derived systems) limit the backlog to 5.
-.SH EXAMPLES
-See
-.BR bind (2).
-.SH SEE ALSO
-.BR accept (2),
-.BR bind (2),
-.BR connect (2),
-.BR socket (2),
-.BR socket (7)
diff --git a/man2/listxattr.2 b/man2/listxattr.2
deleted file mode 100644
index 72b769a0e..000000000
--- a/man2/listxattr.2
+++ /dev/null
@@ -1,322 +0,0 @@
-.\" Copyright (C) Andreas Gruenbacher, February 2001
-.\" Copyright (C) Silicon Graphics Inc, September 2001
-.\" Copyright (C) 2015 Heinrich Schuchardt <xypron.glpk@gmx.de>
-.\"
-.\" SPDX-License-Identifier: GPL-2.0-or-later
-.\"
-.TH listxattr 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-listxattr, llistxattr, flistxattr \- list extended attribute names
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/xattr.h>
-.P
-.BI "ssize_t listxattr(const char *" path ", char *_Nullable " list \
-", size_t " size );
-.BI "ssize_t llistxattr(const char *" path ", char *_Nullable " list \
-", size_t " size );
-.BI "ssize_t flistxattr(int " fd ", char *_Nullable " list ", size_t " size );
-.fi
-.SH DESCRIPTION
-Extended attributes are
-.IR name : value
-pairs associated with inodes (files, directories, symbolic links, etc.).
-They are extensions to the normal attributes which are associated
-with all inodes in the system (i.e., the
-.BR stat (2)
-data).
-A complete overview of extended attributes concepts can be found in
-.BR xattr (7).
-.P
-.BR listxattr ()
-retrieves the list
-of extended attribute names associated with the given
-.I path
-in the filesystem.
-The retrieved list is placed in
-.IR list ,
-a caller-allocated buffer whose size (in bytes) is specified in the argument
-.IR size .
-The list is the set of (null-terminated) names, one after the other.
-Names of extended attributes to which the calling process does not
-have access may be omitted from the list.
-The length of the attribute name
-.I list
-is returned.
-.P
-.BR llistxattr ()
-is identical to
-.BR listxattr (),
-except in the case of a symbolic link, where the list of names of
-extended attributes associated with the link itself is retrieved,
-not the file that it refers to.
-.P
-.BR flistxattr ()
-is identical to
-.BR listxattr (),
-only the open file referred to by
-.I fd
-(as returned by
-.BR open (2))
-is interrogated in place of
-.IR path .
-.P
-A single extended attribute
-.I name
-is a null-terminated string.
-The name includes a namespace prefix; there may be several, disjoint
-namespaces associated with an individual inode.
-.P
-If
-.I size
-is specified as zero, these calls return the current size of the
-list of extended attribute names (and leave
-.I list
-unchanged).
-This can be used to determine the size of the buffer that
-should be supplied in a subsequent call.
-(But, bear in mind that there is a possibility that the
-set of extended attributes may change between the two calls,
-so that it is still necessary to check the return status
-from the second call.)
-.SS Example
-The
-.I list
-of names is returned as an unordered array of null-terminated character strings
-(attribute names are separated by null bytes (\[aq]\e0\[aq])),
-like this:
-.P
-.in +4n
-.EX
-user.name1\e0system.name1\e0user.name2\e0
-.EE
-.in
-.P
-Filesystems that implement POSIX ACLs using
-extended attributes might return a
-.I list
-like this:
-.P
-.in +4n
-.EX
-system.posix_acl_access\e0system.posix_acl_default\e0
-.EE
-.in
-.SH RETURN VALUE
-On success, a nonnegative number is returned indicating the size of the
-extended attribute name list.
-On failure, \-1 is returned and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B E2BIG
-The size of the list of extended attribute names is larger than the maximum
-size allowed; the list cannot be retrieved.
-This can happen on filesystems that support an unlimited number of
-extended attributes per file such as XFS, for example.
-See BUGS.
-.TP
-.B ENOTSUP
-Extended attributes are not supported by the filesystem, or are disabled.
-.TP
-.B ERANGE
-The
-.I size
-of the
-.I list
-buffer is too small to hold the result.
-.P
-In addition, the errors documented in
-.BR stat (2)
-can also occur.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 2.4,
-glibc 2.3.
-.\" .SH AUTHORS
-.\" Andreas Gruenbacher,
-.\" .RI < a.gruenbacher@computer.org >
-.\" and the SGI XFS development team,
-.\" .RI < linux-xfs@oss.sgi.com >.
-.\" Please send any bug reports or comments to these addresses.
-.SH BUGS
-.\" The xattr(7) page refers to this text:
-As noted in
-.BR xattr (7),
-the VFS imposes a limit of 64\ kB on the size of the extended
-attribute name list returned by
-.BR listxattr ().
-If the total size of attribute names attached to a file exceeds this limit,
-it is no longer possible to retrieve the list of attribute names.
-.SH EXAMPLES
-The following program demonstrates the usage of
-.BR listxattr ()
-and
-.BR getxattr (2).
-For the file whose pathname is provided as a command-line argument,
-it lists all extended file attributes and their values.
-.P
-To keep the code simple, the program assumes that attribute keys and
-values are constant during the execution of the program.
-A production program should expect and handle changes during
-execution of the program.
-For example,
-the number of bytes required for attribute keys
-might increase between the two calls to
-.BR listxattr ().
-An application could handle this possibility using
-a loop that retries the call
-(perhaps up to a predetermined maximum number of attempts)
-with a larger buffer each time it fails with the error
-.BR ERANGE .
-Calls to
-.BR getxattr (2)
-could be handled similarly.
-.P
-The following output was recorded by first creating a file, setting
-some extended file attributes,
-and then listing the attributes with the example program.
-.SS Example output
-.in +4n
-.EX
-$ \fBtouch /tmp/foo\fP
-$ \fBsetfattr \-n user.fred \-v chocolate /tmp/foo\fP
-$ \fBsetfattr \-n user.frieda \-v bar /tmp/foo\fP
-$ \fBsetfattr \-n user.empty /tmp/foo\fP
-$ \fB./listxattr /tmp/foo\fP
-user.fred: chocolate
-user.frieda: bar
-user.empty: <no value>
-.EE
-.in
-.SS Program source (listxattr.c)
-.\" SRC BEGIN (listxattr.c)
-.EX
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/xattr.h>
-\&
-int
-main(int argc, char *argv[])
-{
- char *buf, *key, *val;
- ssize_t buflen, keylen, vallen;
-\&
- if (argc != 2) {
- fprintf(stderr, "Usage: %s path\en", argv[0]);
- exit(EXIT_FAILURE);
- }
-\&
- /*
- * Determine the length of the buffer needed.
- */
- buflen = listxattr(argv[1], NULL, 0);
- if (buflen == \-1) {
- perror("listxattr");
- exit(EXIT_FAILURE);
- }
- if (buflen == 0) {
- printf("%s has no attributes.\en", argv[1]);
- exit(EXIT_SUCCESS);
- }
-\&
- /*
- * Allocate the buffer.
- */
- buf = malloc(buflen);
- if (buf == NULL) {
- perror("malloc");
- exit(EXIT_FAILURE);
- }
-\&
- /*
- * Copy the list of attribute keys to the buffer.
- */
- buflen = listxattr(argv[1], buf, buflen);
- if (buflen == \-1) {
- perror("listxattr");
- exit(EXIT_FAILURE);
- }
-\&
- /*
- * Loop over the list of zero terminated strings with the
- * attribute keys. Use the remaining buffer length to determine
- * the end of the list.
- */
- key = buf;
- while (buflen > 0) {
-\&
- /*
- * Output attribute key.
- */
- printf("%s: ", key);
-\&
- /*
- * Determine length of the value.
- */
- vallen = getxattr(argv[1], key, NULL, 0);
- if (vallen == \-1)
- perror("getxattr");
-\&
- if (vallen > 0) {
-\&
- /*
- * Allocate value buffer.
- * One extra byte is needed to append 0x00.
- */
- val = malloc(vallen + 1);
- if (val == NULL) {
- perror("malloc");
- exit(EXIT_FAILURE);
- }
-\&
- /*
- * Copy value to buffer.
- */
- vallen = getxattr(argv[1], key, val, vallen);
- if (vallen == \-1) {
- perror("getxattr");
- } else {
- /*
- * Output attribute value.
- */
- val[vallen] = 0;
- printf("%s", val);
- }
-\&
- free(val);
- } else if (vallen == 0) {
- printf("<no value>");
- }
-\&
- printf("\en");
-\&
- /*
- * Forward to next attribute key.
- */
- keylen = strlen(key) + 1;
- buflen \-= keylen;
- key += keylen;
- }
-\&
- free(buf);
- exit(EXIT_SUCCESS);
-}
-.EE
-.\" SRC END
-.SH SEE ALSO
-.BR getfattr (1),
-.BR setfattr (1),
-.BR getxattr (2),
-.BR open (2),
-.BR removexattr (2),
-.BR setxattr (2),
-.BR stat (2),
-.BR symlink (7),
-.BR xattr (7)
diff --git a/man2/llistxattr.2 b/man2/llistxattr.2
deleted file mode 100644
index 117bd2b53..000000000
--- a/man2/llistxattr.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/listxattr.2
diff --git a/man2/llseek.2 b/man2/llseek.2
deleted file mode 100644
index 1a3f365f2..000000000
--- a/man2/llseek.2
+++ /dev/null
@@ -1,92 +0,0 @@
-.\" Copyright (C) 1995 Andries Brouwer (aeb@cwi.nl)
-.\" Written 10 June 1995 by Andries Brouwer <aeb@cwi.nl>
-.\" and Copyright (C) 2007, 2015, 2020, Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Modified Thu Oct 31 15:16:23 1996 by Eric S. Raymond <esr@thyrsus.com>
-.\"
-.TH _llseek 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-_llseek \- reposition read/write file offset
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */"
-.B #include <unistd.h>
-.P
-.BI "int syscall(SYS__llseek, unsigned int " fd ", unsigned long " offset_high ,
-.BI " unsigned long " offset_low ", loff_t *" result ,
-.BI " unsigned int " whence );
-.fi
-.P
-.IR Note :
-glibc provides no wrapper for
-.BR _llseek (),
-necessitating the use of
-.BR syscall (2).
-.SH DESCRIPTION
-Note: for information about the
-.BR llseek (3)
-library function, see
-.BR lseek64 (3).
-.P
-The
-.BR _llseek ()
-system call repositions the offset of the open file description associated
-with the file descriptor
-.I fd
-to the value
-.IP
-(offset_high << 32) | offset_low
-.P
-This new offset is a byte offset
-relative to the beginning of the file, the current file offset,
-or the end of the file, depending on whether
-.I whence
-is
-.BR SEEK_SET ,
-.BR SEEK_CUR ,
-or
-.BR SEEK_END ,
-respectively.
-.P
-The new file offset is returned in the argument
-.IR result .
-The type
-.I loff_t
-is a 64-bit signed type.
-.P
-This system call exists on various 32-bit platforms to support
-seeking to large file offsets.
-.SH RETURN VALUE
-Upon successful completion,
-.BR _llseek ()
-returns 0.
-Otherwise, a value of \-1 is returned and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EBADF
-.I fd
-is not an open file descriptor.
-.TP
-.B EFAULT
-Problem with copying results to user space.
-.TP
-.B EINVAL
-.I whence
-is invalid.
-.SH VERSIONS
-You probably want to use the
-.BR lseek (2)
-wrapper function instead.
-.SH STANDARDS
-Linux.
-.SH SEE ALSO
-.BR lseek (2),
-.BR open (2),
-.BR lseek64 (3)
diff --git a/man2/lock.2 b/man2/lock.2
deleted file mode 100644
index 5d25ea6ce..000000000
--- a/man2/lock.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/unimplemented.2
diff --git a/man2/lookup_dcookie.2 b/man2/lookup_dcookie.2
deleted file mode 100644
index acc5e43e6..000000000
--- a/man2/lookup_dcookie.2
+++ /dev/null
@@ -1,86 +0,0 @@
-.\" Copyright (C) 2003 John Levon <levon@movementarian.org>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Modified 2004-06-17 Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.TH lookup_dcookie 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-lookup_dcookie \- return a directory entry's path
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */"
-.B #include <unistd.h>
-.P
-.BI "int syscall(SYS_lookup_dcookie, uint64_t " cookie ", char *" buffer ,
-.BI " size_t " len );
-.fi
-.P
-.IR Note :
-glibc provides no wrapper for
-.BR lookup_dcookie (),
-necessitating the use of
-.BR syscall (2).
-.SH DESCRIPTION
-Look up the full path of the directory entry specified by the value
-.IR cookie .
-The cookie is an opaque identifier uniquely identifying a particular
-directory entry.
-The buffer given is filled in with the full path of the directory entry.
-.P
-For
-.BR lookup_dcookie ()
-to return successfully,
-the kernel must still hold a cookie reference to the directory entry.
-.SH RETURN VALUE
-On success,
-.BR lookup_dcookie ()
-returns the length of the path string copied into the buffer.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EFAULT
-The buffer was not valid.
-.TP
-.B EINVAL
-The kernel has no registered cookie/directory entry mappings at the
-time of lookup, or the cookie does not refer to a valid directory entry.
-.TP
-.B ENAMETOOLONG
-The name could not fit in the buffer.
-.TP
-.B ENOMEM
-The kernel could not allocate memory for the temporary buffer holding
-the path.
-.TP
-.B EPERM
-The process does not have the capability
-.B CAP_SYS_ADMIN
-required to look up cookie values.
-.TP
-.B ERANGE
-The buffer was not large enough to hold the path of the directory entry.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 2.5.43.
-.P
-The
-.B ENAMETOOLONG
-error was added in Linux 2.5.70.
-.SH NOTES
-.BR lookup_dcookie ()
-is a special-purpose system call, currently used only by the
-.BR oprofile (1)
-profiler.
-It relies on a kernel driver to register cookies for directory entries.
-.P
-The path returned may be suffixed by the string " (deleted)" if the directory
-entry has been removed.
-.SH SEE ALSO
-.BR oprofile (1)
diff --git a/man2/lremovexattr.2 b/man2/lremovexattr.2
deleted file mode 100644
index 38d01ccde..000000000
--- a/man2/lremovexattr.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/removexattr.2
diff --git a/man2/lseek.2 b/man2/lseek.2
deleted file mode 100644
index 14fc13734..000000000
--- a/man2/lseek.2
+++ /dev/null
@@ -1,252 +0,0 @@
-.\" Copyright (c) 1980, 1991 Regents of the University of California.
-.\" and Copyright (c) 2011, Michael Kerrisk <mtk.manpages@gmail.com>
-.\" All rights reserved.
-.\"
-.\" SPDX-License-Identifier: BSD-4-Clause-UC
-.\"
-.\" @(#)lseek.2 6.5 (Berkeley) 3/10/91
-.\"
-.\" Modified 1993-07-23 by Rik Faith <faith@cs.unc.edu>
-.\" Modified 1995-06-10 by Andries Brouwer <aeb@cwi.nl>
-.\" Modified 1996-10-31 by Eric S. Raymond <esr@thyrsus.com>
-.\" Modified 1998-01-17 by Michael Haardt
-.\" <michael@cantor.informatik.rwth-aachen.de>
-.\" Modified 2001-09-24 by Michael Haardt <michael@moria.de>
-.\" Modified 2003-08-21 by Andries Brouwer <aeb@cwi.nl>
-.\" 2011-09-18, mtk, Added SEEK_DATA + SEEK_HOLE
-.\"
-.TH lseek 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-lseek \- reposition read/write file offset
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <unistd.h>
-.P
-.BI "off_t lseek(int " fd ", off_t " offset ", int " whence );
-.fi
-.SH DESCRIPTION
-.BR lseek ()
-repositions the file offset of the open file description
-associated with the file descriptor
-.I fd
-to the argument
-.I offset
-according to the directive
-.I whence
-as follows:
-.TP
-.B SEEK_SET
-The file offset is set to
-.I offset
-bytes.
-.TP
-.B SEEK_CUR
-The file offset is set to its current location plus
-.I offset
-bytes.
-.TP
-.B SEEK_END
-The file offset is set to the size of the file plus
-.I offset
-bytes.
-.P
-.BR lseek ()
-allows the file offset to be set beyond the end
-of the file (but this does not change the size of the file).
-If data is later written at this point, subsequent reads of the data
-in the gap (a "hole") return null bytes (\[aq]\e0\[aq]) until
-data is actually written into the gap.
-.SS Seeking file data and holes
-Since Linux 3.1, Linux supports the following additional values for
-.IR whence :
-.TP
-.B SEEK_DATA
-Adjust the file offset to the next location
-in the file greater than or equal to
-.I offset
-containing data.
-If
-.I offset
-points to data,
-then the file offset is set to
-.IR offset .
-.TP
-.B SEEK_HOLE
-Adjust the file offset to the next hole in the file
-greater than or equal to
-.IR offset .
-If
-.I offset
-points into the middle of a hole,
-then the file offset is set to
-.IR offset .
-If there is no hole past
-.IR offset ,
-then the file offset is adjusted to the end of the file
-(i.e., there is an implicit hole at the end of any file).
-.P
-In both of the above cases,
-.BR lseek ()
-fails if
-.I offset
-points past the end of the file.
-.P
-These operations allow applications to map holes in a sparsely
-allocated file.
-This can be useful for applications such as file backup tools,
-which can save space when creating backups and preserve holes,
-if they have a mechanism for discovering holes.
-.P
-For the purposes of these operations, a hole is a sequence of zeros that
-(normally) has not been allocated in the underlying file storage.
-However, a filesystem is not obliged to report holes,
-so these operations are not a guaranteed mechanism for
-mapping the storage space actually allocated to a file.
-(Furthermore, a sequence of zeros that actually has been written
-to the underlying storage may not be reported as a hole.)
-In the simplest implementation,
-a filesystem can support the operations by making
-.B SEEK_HOLE
-always return the offset of the end of the file,
-and making
-.B SEEK_DATA
-always return
-.I offset
-(i.e., even if the location referred to by
-.I offset
-is a hole,
-it can be considered to consist of data that is a sequence of zeros).
-.\" https://lkml.org/lkml/2011/4/22/79
-.\" http://lwn.net/Articles/440255/
-.\" http://blogs.oracle.com/bonwick/entry/seek_hole_and_seek_data
-.P
-The
-.B _GNU_SOURCE
-feature test macro must be defined in order to obtain the definitions of
-.B SEEK_DATA
-and
-.B SEEK_HOLE
-from
-.IR <unistd.h> .
-.P
-The
-.B SEEK_HOLE
-and
-.B SEEK_DATA
-operations are supported for the following filesystems:
-.IP \[bu] 3
-Btrfs (since Linux 3.1)
-.IP \[bu]
-OCFS (since Linux 3.2)
-.\" commit 93862d5e1ab875664c6cc95254fc365028a48bb1
-.IP \[bu]
-XFS (since Linux 3.5)
-.IP \[bu]
-ext4 (since Linux 3.8)
-.IP \[bu]
-.BR tmpfs (5)
-(since Linux 3.8)
-.IP \[bu]
-NFS (since Linux 3.18)
-.\" commit 1c6dcbe5ceff81c2cf8d929646af675cd59fe7c0
-.\" commit 24bab491220faa446d945624086d838af41d616c
-.IP \[bu]
-FUSE (since Linux 4.5)
-.\" commit 0b5da8db145bfd44266ac964a2636a0cf8d7c286
-.IP \[bu]
-GFS2 (since Linux 4.15)
-.\" commit 3a27411cb4bc3ce31db228e3569ad01b462a4310
-.SH RETURN VALUE
-Upon successful completion,
-.BR lseek ()
-returns the resulting offset location as measured in bytes from the
-beginning of the file.
-On error, the value \fI(off_t)\ \-1\fP is returned and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EBADF
-.I fd
-is not an open file descriptor.
-.TP
-.B EINVAL
-.I whence
-is not valid.
-Or: the resulting file offset would be negative,
-or beyond the end of a seekable device.
-.\" Some systems may allow negative offsets for character devices
-.\" and/or for remote filesystems.
-.TP
-.B ENXIO
-.I whence
-is
-.B SEEK_DATA
-or
-.BR SEEK_HOLE ,
-and
-.I offset
-is beyond the end of the file, or
-.I whence
-is
-.B SEEK_DATA
-and
-.I offset
-is within a hole at the end of the file.
-.TP
-.B EOVERFLOW
-.\" HP-UX 11 says EINVAL for this case (but POSIX.1 says EOVERFLOW)
-The resulting file offset cannot be represented in an
-.IR off_t .
-.TP
-.B ESPIPE
-.I fd
-is associated with a pipe, socket, or FIFO.
-.SH VERSIONS
-On Linux, using
-.BR lseek ()
-on a terminal device fails with the error
-.BR ESPIPE .
-.\" Other systems return the number of written characters,
-.\" using SEEK_SET to set the counter. (Of written characters.)
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-POSIX.1-2001, SVr4, 4.3BSD.
-.P
-.B SEEK_DATA
-and
-.B SEEK_HOLE
-are nonstandard extensions also present in Solaris,
-FreeBSD, and DragonFly BSD;
-they are proposed for inclusion in the next POSIX revision (Issue 8).
-.\" FIXME . Review http://austingroupbugs.net/view.php?id=415 in the future
-.SH NOTES
-See
-.BR open (2)
-for a discussion of the relationship between file descriptors,
-open file descriptions, and files.
-.P
-If the
-.B O_APPEND
-file status flag is set on the open file description,
-then a
-.BR write (2)
-.I always
-moves the file offset to the end of the file, regardless of the use of
-.BR lseek ().
-.P
-Some devices are incapable of seeking and POSIX does not specify which
-devices must support
-.BR lseek ().
-.SH SEE ALSO
-.BR dup (2),
-.BR fallocate (2),
-.BR fork (2),
-.BR open (2),
-.BR fseek (3),
-.BR lseek64 (3),
-.BR posix_fallocate (3)
diff --git a/man2/lsetxattr.2 b/man2/lsetxattr.2
deleted file mode 100644
index dc0780751..000000000
--- a/man2/lsetxattr.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/setxattr.2
diff --git a/man2/lstat.2 b/man2/lstat.2
deleted file mode 100644
index b1a86c195..000000000
--- a/man2/lstat.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/stat.2
diff --git a/man2/lstat64.2 b/man2/lstat64.2
deleted file mode 100644
index 89b1c84d9..000000000
--- a/man2/lstat64.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/lstat.2
diff --git a/man2/madvise.2 b/man2/madvise.2
deleted file mode 100644
index 441edfbc1..000000000
--- a/man2/madvise.2
+++ /dev/null
@@ -1,898 +0,0 @@
-.\" Copyright (C) 2001 David Gómez <davidge@jazzfree.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Based on comments from mm/filemap.c. Last modified on 10-06-2001
-.\" Modified, 25 Feb 2002, Michael Kerrisk, <mtk.manpages@gmail.com>
-.\" Added notes on MADV_DONTNEED
-.\" 2010-06-19, mtk, Added documentation of MADV_MERGEABLE and
-.\" MADV_UNMERGEABLE
-.\" 2010-06-15, Andi Kleen, Add documentation of MADV_HWPOISON.
-.\" 2010-06-19, Andi Kleen, Add documentation of MADV_SOFT_OFFLINE.
-.\" 2011-09-18, Doug Goldstein <cardoe@cardoe.com>
-.\" Document MADV_HUGEPAGE and MADV_NOHUGEPAGE
-.\"
-.TH madvise 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-madvise \- give advice about use of memory
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/mman.h>
-.P
-.BI "int madvise(void " addr [. length "], size_t " length ", int " advice );
-.fi
-.P
-.RS -4
-Feature Test Macro Requirements for glibc (see
-.BR feature_test_macros (7)):
-.RE
-.P
-.BR madvise ():
-.nf
- Since glibc 2.19:
- _DEFAULT_SOURCE
- Up to and including glibc 2.19:
- _BSD_SOURCE
-.fi
-.SH DESCRIPTION
-The
-.BR madvise ()
-system call is used to give advice or directions to the kernel
-about the address range beginning at address
-.I addr
-and with size
-.IR length .
-.BR madvise ()
-only operates on whole pages, therefore
-.I addr
-must be page-aligned.
-The value of
-.I length
-is rounded up to a multiple of page size.
-In most cases,
-the goal of such advice is to improve system or application performance.
-.P
-Initially, the system call supported a set of "conventional"
-.I advice
-values, which are also available on several other implementations.
-(Note, though, that
-.BR madvise ()
-is not specified in POSIX.)
-Subsequently, a number of Linux-specific
-.I advice
-values have been added.
-.\"
-.\" ======================================================================
-.\"
-.SS Conventional advice values
-The
-.I advice
-values listed below
-allow an application to tell the kernel how it expects to use
-some mapped or shared memory areas, so that the kernel can choose
-appropriate read-ahead and caching techniques.
-These
-.I advice
-values do not influence the semantics of the application
-(except in the case of
-.BR MADV_DONTNEED ),
-but may influence its performance.
-All of the
-.I advice
-values listed here have analogs in the POSIX-specified
-.BR posix_madvise (3)
-function, and the values have the same meanings, with the exception of
-.BR MADV_DONTNEED .
-.P
-The advice is indicated in the
-.I advice
-argument, which is one of the following:
-.TP
-.B MADV_NORMAL
-No special treatment.
-This is the default.
-.TP
-.B MADV_RANDOM
-Expect page references in random order.
-(Hence, read ahead may be less useful than normally.)
-.TP
-.B MADV_SEQUENTIAL
-Expect page references in sequential order.
-(Hence, pages in the given range can be aggressively read ahead,
-and may be freed soon after they are accessed.)
-.TP
-.B MADV_WILLNEED
-Expect access in the near future.
-(Hence, it might be a good idea to read some pages ahead.)
-.TP
-.B MADV_DONTNEED
-Do not expect access in the near future.
-(For the time being, the application is finished with the given range,
-so the kernel can free resources associated with it.)
-.IP
-After a successful
-.B MADV_DONTNEED
-operation,
-the semantics of memory access in the specified region are changed:
-subsequent accesses of pages in the range will succeed, but will result
-in either repopulating the memory contents from the
-up-to-date contents of the underlying mapped file
-(for shared file mappings, shared anonymous mappings,
-and shmem-based techniques such as System V shared memory segments)
-or zero-fill-on-demand pages for anonymous private mappings.
-.IP
-Note that, when applied to shared mappings,
-.B MADV_DONTNEED
-might not lead to immediate freeing of the pages in the range.
-The kernel is free to delay freeing the pages until an appropriate moment.
-The resident set size (RSS) of the calling process will be immediately
-reduced however.
-.IP
-.B MADV_DONTNEED
-cannot be applied to locked pages, or
-.B VM_PFNMAP
-pages.
-(Pages marked with the kernel-internal
-.B VM_PFNMAP
-.\" http://lwn.net/Articles/162860/
-flag are special memory areas that are not managed
-by the virtual memory subsystem.
-Such pages are typically created by device drivers that
-map the pages into user space.)
-.IP
-Support for Huge TLB pages was added in Linux v5.18.
-Addresses within a mapping backed by Huge TLB pages must be aligned
-to the underlying Huge TLB page size,
-and the range length is rounded up
-to a multiple of the underlying Huge TLB page size.
-.\"
-.\" ======================================================================
-.\"
-.SS Linux-specific advice values
-The following Linux-specific
-.I advice
-values have no counterparts in the POSIX-specified
-.BR posix_madvise (3),
-and may or may not have counterparts in the
-.BR madvise ()
-interface available on other implementations.
-Note that some of these operations change the semantics of memory accesses.
-.TP
-.BR MADV_REMOVE " (since Linux 2.6.16)"
-.\" commit f6b3ec238d12c8cc6cc71490c6e3127988460349
-Free up a given range of pages
-and its associated backing store.
-This is equivalent to punching a hole in the corresponding
-range of the backing store (see
-.BR fallocate (2)).
-Subsequent accesses in the specified address range will see
-data with a value of zero.
-.\" Databases want to use this feature to drop a section of their
-.\" bufferpool (shared memory segments) - without writing back to
-.\" disk/swap space. This feature is also useful for supporting
-.\" hot-plug memory on UML.
-.IP
-The specified address range must be mapped shared and writable.
-This flag cannot be applied to locked pages, or
-.B VM_PFNMAP
-pages.
-.IP
-In the initial implementation, only
-.BR tmpfs (5)
-supported
-.BR MADV_REMOVE ;
-but since Linux 3.5,
-.\" commit 3f31d07571eeea18a7d34db9af21d2285b807a17
-any filesystem which supports the
-.BR fallocate (2)
-.B FALLOC_FL_PUNCH_HOLE
-mode also supports
-.BR MADV_REMOVE .
-Filesystems which do not support
-.B MADV_REMOVE
-fail with the error
-.BR EOPNOTSUPP .
-.IP
-Support for the Huge TLB filesystem was added in Linux v4.3.
-.TP
-.BR MADV_DONTFORK " (since Linux 2.6.16)"
-.\" commit f822566165dd46ff5de9bf895cfa6c51f53bb0c4
-.\" See http://lwn.net/Articles/171941/
-Do not make the pages in this range available to the child after a
-.BR fork (2).
-This is useful to prevent copy-on-write semantics from changing
-the physical location of a page if the parent writes to it after a
-.BR fork (2).
-(Such page relocations cause problems for hardware that
-DMAs into the page.)
-.\" [PATCH] madvise MADV_DONTFORK/MADV_DOFORK
-.\" Currently, copy-on-write may change the physical address of
-.\" a page even if the user requested that the page is pinned in
-.\" memory (either by mlock or by get_user_pages). This happens
-.\" if the process forks meanwhile, and the parent writes to that
-.\" page. As a result, the page is orphaned: in case of
-.\" get_user_pages, the application will never see any data hardware
-.\" DMA's into this page after the COW. In case of mlock'd memory,
-.\" the parent is not getting the realtime/security benefits of mlock.
-.\"
-.\" In particular, this affects the Infiniband modules which do DMA from
-.\" and into user pages all the time.
-.\"
-.\" This patch adds madvise options to control whether memory range is
-.\" inherited across fork. Useful e.g. for when hardware is doing DMA
-.\" from/into these pages. Could also be useful to an application
-.\" wanting to speed up its forks by cutting large areas out of
-.\" consideration.
-.\"
-.\" SEE ALSO: http://lwn.net/Articles/171941/
-.\" "Tweaks to madvise() and posix_fadvise()", 14 Feb 2006
-.TP
-.BR MADV_DOFORK " (since Linux 2.6.16)"
-Undo the effect of
-.BR MADV_DONTFORK ,
-restoring the default behavior, whereby a mapping is inherited across
-.BR fork (2).
-.TP
-.BR MADV_HWPOISON " (since Linux 2.6.32)"
-.\" commit 9893e49d64a4874ea67849ee2cfbf3f3d6817573
-Poison the pages in the range specified by
-.I addr
-and
-.I length
-and handle subsequent references to those pages
-like a hardware memory corruption.
-This operation is available only for privileged
-.RB ( CAP_SYS_ADMIN )
-processes.
-This operation may result in the calling process receiving a
-.B SIGBUS
-and the page being unmapped.
-.IP
-This feature is intended for testing of memory error-handling code;
-it is available only if the kernel was configured with
-.BR CONFIG_MEMORY_FAILURE .
-.TP
-.BR MADV_MERGEABLE " (since Linux 2.6.32)"
-.\" commit f8af4da3b4c14e7267c4ffb952079af3912c51c5
-Enable Kernel Samepage Merging (KSM) for the pages in the range specified by
-.I addr
-and
-.IR length .
-The kernel regularly scans those areas of user memory that have
-been marked as mergeable,
-looking for pages with identical content.
-These are replaced by a single write-protected page (which is automatically
-copied if a process later wants to update the content of the page).
-KSM merges only private anonymous pages (see
-.BR mmap (2)).
-.IP
-The KSM feature is intended for applications that generate many
-instances of the same data (e.g., virtualization systems such as KVM).
-It can consume a lot of processing power; use with care.
-See the Linux kernel source file
-.I Documentation/admin\-guide/mm/ksm.rst
-for more details.
-.IP
-The
-.B MADV_MERGEABLE
-and
-.B MADV_UNMERGEABLE
-operations are available only if the kernel was configured with
-.BR CONFIG_KSM .
-.TP
-.BR MADV_UNMERGEABLE " (since Linux 2.6.32)"
-Undo the effect of an earlier
-.B MADV_MERGEABLE
-operation on the specified address range;
-KSM unmerges whatever pages it had merged in the address range specified by
-.I addr
-and
-.IR length .
-.TP
-.BR MADV_SOFT_OFFLINE " (since Linux 2.6.33)"
-.\" commit afcf938ee0aac4ef95b1a23bac704c6fbeb26de6
-Soft offline the pages in the range specified by
-.I addr
-and
-.IR length .
-The memory of each page in the specified range is preserved
-(i.e., when next accessed, the same content will be visible,
-but in a new physical page frame),
-and the original page is offlined
-(i.e., no longer used, and taken out of normal memory management).
-The effect of the
-.B MADV_SOFT_OFFLINE
-operation is invisible to (i.e., does not change the semantics of)
-the calling process.
-.IP
-This feature is intended for testing of memory error-handling code;
-it is available only if the kernel was configured with
-.BR CONFIG_MEMORY_FAILURE .
-.TP
-.BR MADV_HUGEPAGE " (since Linux 2.6.38)"
-.\" commit 0af4e98b6b095c74588af04872f83d333c958c32
-.\" http://lwn.net/Articles/358904/
-.\" https://lwn.net/Articles/423584/
-Enable Transparent Huge Pages (THP) for pages in the range specified by
-.I addr
-and
-.IR length .
-The kernel will regularly scan the areas marked as huge page candidates
-to replace them with huge pages.
-The kernel will also allocate huge pages directly when the region is
-naturally aligned to the huge page size (see
-.BR posix_memalign (2)).
-.IP
-This feature is primarily aimed at applications that use large mappings of
-data and access large regions of that memory at a time (e.g., virtualization
-systems such as QEMU).
-It can very easily waste memory (e.g., a 2\ MB mapping that only ever accesses
-1 byte will result in 2\ MB of wired memory instead of one 4\ KB page).
-See the Linux kernel source file
-.I Documentation/admin\-guide/mm/transhuge.rst
-for more details.
-.IP
-Most common kernels configurations provide
-.BR MADV_HUGEPAGE -style
-behavior by default, and thus
-.B MADV_HUGEPAGE
-is normally not necessary.
-It is mostly intended for embedded systems, where
-.BR MADV_HUGEPAGE -style
-behavior may not be enabled by default in the kernel.
-On such systems,
-this flag can be used in order to selectively enable THP.
-Whenever
-.B MADV_HUGEPAGE
-is used, it should always be in regions of memory with
-an access pattern that the developer knows in advance won't risk
-to increase the memory footprint of the application when transparent
-hugepages are enabled.
-.IP
-.\" commit 99cb0dbd47a15d395bf3faa78dc122bc5efe3fc0
-Since Linux 5.4,
-automatic scan of eligible areas and replacement by huge pages works with
-private anonymous pages (see
-.BR mmap (2)),
-shmem pages,
-and file-backed pages.
-For all memory types,
-memory may only be replaced by huge pages on hugepage-aligned boundaries.
-For file-mapped memory
-\[em]including tmpfs (see
-.BR tmpfs (2))\[em]
-the mapping must also be naturally hugepage-aligned within the file.
-Additionally,
-for file-backed,
-non-tmpfs memory,
-the file must not be open for write and the mapping must be executable.
-.IP
-The VMA must not be marked
-.BR VM_NOHUGEPAGE ,
-.BR VM_HUGETLB ,
-.BR VM_IO ,
-.BR VM_DONTEXPAND ,
-.BR VM_MIXEDMAP ,
-or
-.BR VM_PFNMAP ,
-nor can it be stack memory or backed by a DAX-enabled device
-(unless the DAX device is hot-plugged as System RAM).
-The process must also not have
-.B PR_SET_THP_DISABLE
-set (see
-.BR prctl (2)).
-.IP
-The
-.BR MADV_HUGEPAGE ,
-.BR MADV_NOHUGEPAGE ,
-and
-.B MADV_COLLAPSE
-operations are available only if the kernel was configured with
-.B CONFIG_TRANSPARENT_HUGEPAGE
-and file/shmem memory is only supported if the kernel was configured with
-.BR CONFIG_READ_ONLY_THP_FOR_FS .
-.TP
-.BR MADV_NOHUGEPAGE " (since Linux 2.6.38)"
-Ensures that memory in the address range specified by
-.I addr
-and
-.I length
-will not be backed by transparent hugepages.
-.TP
-.BR MADV_COLLAPSE " (since Linux 6.1)"
-.\" commit 7d8faaf155454f8798ec56404faca29a82689c77
-.\" commit 34488399fa08faaf664743fa54b271eb6f9e1321
-Perform a best-effort synchronous collapse of
-the native pages mapped by the memory range
-into Transparent Huge Pages (THPs).
-.B MADV_COLLAPSE
-operates on the current state of memory of the calling process and
-makes no persistent changes or guarantees on how pages will be mapped,
-constructed,
-or faulted in the future.
-.IP
-.B MADV_COLLAPSE
-supports private anonymous pages (see
-.BR mmap (2)),
-shmem pages,
-and file-backed pages.
-See
-.B MADV_HUGEPAGE
-for general information on memory requirements for THP.
-If the range provided spans multiple VMAs,
-the semantics of the collapse over each VMA is independent from the others.
-If collapse of a given huge page-aligned/sized region fails,
-the operation may continue to attempt collapsing
-the remainder of the specified memory.
-.B MADV_COLLAPSE
-will automatically clamp the provided range to be hugepage-aligned.
-.IP
-All non-resident pages covered by the range
-will first be swapped/faulted-in,
-before being copied onto a freshly allocated hugepage.
-If the native pages compose the same PTE-mapped hugepage,
-and are suitably aligned,
-allocation of a new hugepage may be elided and
-collapse may happen in-place.
-Unmapped pages will have their data directly initialized to 0
-in the new hugepage.
-However,
-for every eligible hugepage-aligned/sized region to be collapsed,
-at least one page must currently be backed by physical memory.
-.IP
-.B MADV_COLLAPSE
-is independent of any sysfs
-(see
-.BR sysfs (5))
-setting under
-.IR /sys/kernel/mm/transparent_hugepage ,
-both in terms of determining THP eligibility,
-and allocation semantics.
-See Linux kernel source file
-.I Documentation/admin\-guide/mm/transhuge.rst
-for more information.
-.B MADV_COLLAPSE
-also ignores
-.B huge=
-tmpfs mount when operating on tmpfs files.
-Allocation for the new hugepage may enter direct reclaim and/or compaction,
-regardless of VMA flags
-(though
-.B VM_NOHUGEPAGE
-is still respected).
-.IP
-When the system has multiple NUMA nodes,
-the hugepage will be allocated from
-the node providing the most native pages.
-.IP
-If all hugepage-sized/aligned regions covered by the provided range were
-either successfully collapsed,
-or were already PMD-mapped THPs,
-this operation will be deemed successful.
-Note that this doesn't guarantee anything about
-other possible mappings of the memory.
-In the event multiple hugepage-aligned/sized areas fail to collapse,
-only the most-recently\[en]failed code will be set in
-.IR errno .
-.TP
-.BR MADV_DONTDUMP " (since Linux 3.4)"
-.\" commit 909af768e88867016f427264ae39d27a57b6a8ed
-.\" commit accb61fe7bb0f5c2a4102239e4981650f9048519
-Exclude from a core dump those pages in the range specified by
-.I addr
-and
-.IR length .
-This is useful in applications that have large areas of memory
-that are known not to be useful in a core dump.
-The effect of
-.B MADV_DONTDUMP
-takes precedence over the bit mask that is set via the
-.IR /proc/ pid /coredump_filter
-file (see
-.BR core (5)).
-.TP
-.BR MADV_DODUMP " (since Linux 3.4)"
-Undo the effect of an earlier
-.BR MADV_DONTDUMP .
-.TP
-.BR MADV_FREE " (since Linux 4.5)"
-The application no longer requires the pages in the range specified by
-.I addr
-and
-.IR len .
-The kernel can thus free these pages,
-but the freeing could be delayed until memory pressure occurs.
-For each of the pages that has been marked to be freed
-but has not yet been freed,
-the free operation will be canceled if the caller writes into the page.
-After a successful
-.B MADV_FREE
-operation, any stale data (i.e., dirty, unwritten pages) will be lost
-when the kernel frees the pages.
-However, subsequent writes to pages in the range will succeed
-and then kernel cannot free those dirtied pages,
-so that the caller can always see just written data.
-If there is no subsequent write,
-the kernel can free the pages at any time.
-Once pages in the range have been freed, the caller will
-see zero-fill-on-demand pages upon subsequent page references.
-.IP
-The
-.B MADV_FREE
-operation
-can be applied only to private anonymous pages (see
-.BR mmap (2)).
-Before Linux 4.12,
-.\" commit 93e06c7a645343d222c9a838834a51042eebbbf7
-when freeing pages on a swapless system,
-the pages in the given range are freed instantly,
-regardless of memory pressure.
-.TP
-.BR MADV_WIPEONFORK " (since Linux 4.14)"
-.\" commit d2cd9ede6e193dd7d88b6d27399e96229a551b19
-Present the child process with zero-filled memory in this range after a
-.BR fork (2).
-This is useful in forking servers in order to ensure
-that sensitive per-process data
-(for example, PRNG seeds, cryptographic secrets, and so on)
-is not handed to child processes.
-.IP
-The
-.B MADV_WIPEONFORK
-operation can be applied only to private anonymous pages (see
-.BR mmap (2)).
-.IP
-Within the child created by
-.BR fork (2),
-the
-.B MADV_WIPEONFORK
-setting remains in place on the specified address range.
-This setting is cleared during
-.BR execve (2).
-.TP
-.BR MADV_KEEPONFORK " (since Linux 4.14)"
-.\" commit d2cd9ede6e193dd7d88b6d27399e96229a551b19
-Undo the effect of an earlier
-.BR MADV_WIPEONFORK .
-.TP
-.BR MADV_COLD " (since Linux 5.4)"
-.\" commit 9c276cc65a58faf98be8e56962745ec99ab87636
-Deactivate a given range of pages.
-This will make the pages a more probable
-reclaim target should there be a memory pressure.
-This is a nondestructive operation.
-The advice might be ignored for some pages in the range when it is not
-applicable.
-.TP
-.BR MADV_PAGEOUT " (since Linux 5.4)"
-.\" commit 1a4e58cce84ee88129d5d49c064bd2852b481357
-Reclaim a given range of pages.
-This is done to free up memory occupied by these pages.
-If a page is anonymous, it will be swapped out.
-If a page is file-backed and dirty, it will be written back to the backing
-storage.
-The advice might be ignored for some pages in the range when it is not
-applicable.
-.TP
-.BR MADV_POPULATE_READ " (since Linux 5.14)"
-"Populate (prefault) page tables readable,
-faulting in all pages in the range just as if manually reading from each page;
-however,
-avoid the actual memory access that would have been performed after handling
-the fault.
-.IP
-In contrast to
-.BR MAP_POPULATE ,
-.B MADV_POPULATE_READ
-does not hide errors,
-can be applied to (parts of) existing mappings and will always populate
-(prefault) page tables readable.
-One example use case is prefaulting a file mapping,
-reading all file content from disk;
-however,
-pages won't be dirtied and consequently won't have to be written back to disk
-when evicting the pages from memory.
-.IP
-Depending on the underlying mapping,
-map the shared zeropage,
-preallocate memory or read the underlying file;
-files with holes might or might not preallocate blocks.
-If populating fails,
-a
-.B SIGBUS
-signal is not generated; instead, an error is returned.
-.IP
-If
-.B MADV_POPULATE_READ
-succeeds,
-all page tables have been populated (prefaulted) readable once.
-If
-.B MADV_POPULATE_READ
-fails,
-some page tables might have been populated.
-.IP
-.B MADV_POPULATE_READ
-cannot be applied to mappings without read permissions
-and special mappings,
-for example,
-mappings marked with kernel-internal flags such as
-.B VM_PFNMAP
-or
-.BR VM_IO ,
-or secret memory regions created using
-.BR memfd_secret(2) .
-.IP
-Note that with
-.BR MADV_POPULATE_READ ,
-the process can be killed at any moment when the system runs out of memory.
-.TP
-.BR MADV_POPULATE_WRITE " (since Linux 5.14)"
-Populate (prefault) page tables writable,
-faulting in all pages in the range just as if manually writing to each
-each page;
-however,
-avoid the actual memory access that would have been performed after handling
-the fault.
-.IP
-In contrast to
-.BR MAP_POPULATE ,
-MADV_POPULATE_WRITE does not hide errors,
-can be applied to (parts of) existing mappings and will always populate
-(prefault) page tables writable.
-One example use case is preallocating memory,
-breaking any CoW (Copy on Write).
-.IP
-Depending on the underlying mapping,
-preallocate memory or read the underlying file;
-files with holes will preallocate blocks.
-If populating fails,
-a
-.B SIGBUS
-signal is not generated; instead, an error is returned.
-.IP
-If
-.B MADV_POPULATE_WRITE
-succeeds,
-all page tables have been populated (prefaulted) writable once.
-If
-.B MADV_POPULATE_WRITE
-fails,
-some page tables might have been populated.
-.IP
-.B MADV_POPULATE_WRITE
-cannot be applied to mappings without write permissions
-and special mappings,
-for example,
-mappings marked with kernel-internal flags such as
-.B VM_PFNMAP
-or
-.BR VM_IO ,
-or secret memory regions created using
-.BR memfd_secret(2) .
-.IP
-Note that with
-.BR MADV_POPULATE_WRITE ,
-the process can be killed at any moment when the system runs out of memory.
-.SH RETURN VALUE
-On success,
-.BR madvise ()
-returns zero.
-On error, it returns \-1 and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EACCES
-.I advice
-is
-.BR MADV_REMOVE ,
-but the specified address range is not a shared writable mapping.
-.TP
-.B EAGAIN
-A kernel resource was temporarily unavailable.
-.TP
-.B EBADF
-The map exists, but the area maps something that isn't a file.
-.TP
-.B EBUSY
-(for
-.BR MADV_COLLAPSE )
-Could not charge hugepage to cgroup: cgroup limit exceeded.
-.TP
-.B EFAULT
-.I advice
-is
-.B MADV_POPULATE_READ
-or
-.BR MADV_POPULATE_WRITE ,
-and populating (prefaulting) page tables failed because a
-.B SIGBUS
-would have been generated on actual memory access and the reason is not a
-HW poisoned page
-(HW poisoned pages can,
-for example,
-be created using the
-.B MADV_HWPOISON
-flag described elsewhere in this page).
-.TP
-.B EINVAL
-.I addr
-is not page-aligned or
-.I length
-is negative.
-.\" .I length
-.\" is zero,
-.TP
-.B EINVAL
-.I advice
-is not a valid.
-.TP
-.B EINVAL
-.I advice
-is
-.B MADV_COLD
-or
-.B MADV_PAGEOUT
-and the specified address range includes locked, Huge TLB pages, or
-.B VM_PFNMAP
-pages.
-.TP
-.B EINVAL
-.I advice
-is
-.B MADV_DONTNEED
-or
-.B MADV_REMOVE
-and the specified address range includes locked, Huge TLB pages, or
-.B VM_PFNMAP
-pages.
-.TP
-.B EINVAL
-.I advice
-is
-.B MADV_MERGEABLE
-or
-.BR MADV_UNMERGEABLE ,
-but the kernel was not configured with
-.BR CONFIG_KSM .
-.TP
-.B EINVAL
-.I advice
-is
-.B MADV_FREE
-or
-.B MADV_WIPEONFORK
-but the specified address range includes file, Huge TLB,
-.BR MAP_SHARED ,
-or
-.B VM_PFNMAP
-ranges.
-.TP
-.B EINVAL
-.I advice
-is
-.B MADV_POPULATE_READ
-or
-.BR MADV_POPULATE_WRITE ,
-but the specified address range includes ranges with insufficient permissions
-or special mappings,
-for example,
-mappings marked with kernel-internal flags such a
-.B VM_IO
-or
-.BR VM_PFNMAP ,
-or secret memory regions created using
-.BR memfd_secret(2) .
-.TP
-.B EIO
-(for
-.BR MADV_WILLNEED )
-Paging in this area would exceed the process's
-maximum resident set size.
-.TP
-.B ENOMEM
-(for
-.BR MADV_WILLNEED )
-Not enough memory: paging in failed.
-.TP
-.B ENOMEM
-(for
-.BR MADV_COLLAPSE )
-Not enough memory: could not allocate hugepage.
-.TP
-.B ENOMEM
-Addresses in the specified range are not currently
-mapped, or are outside the address space of the process.
-.TP
-.B ENOMEM
-.I advice
-is
-.B MADV_POPULATE_READ
-or
-.BR MADV_POPULATE_WRITE ,
-and populating (prefaulting) page tables failed because there was not enough
-memory.
-.TP
-.B EPERM
-.I advice
-is
-.BR MADV_HWPOISON ,
-but the caller does not have the
-.B CAP_SYS_ADMIN
-capability.
-.TP
-.B EHWPOISON
-.I advice
-is
-.B MADV_POPULATE_READ
-or
-.BR MADV_POPULATE_WRITE ,
-and populating (prefaulting) page tables failed because a HW poisoned page
-(HW poisoned pages can,
-for example,
-be created using the
-.B MADV_HWPOISON
-flag described elsewhere in this page)
-was encountered.
-.SH VERSIONS
-Versions of this system call, implementing a wide variety of
-.I advice
-values, exist on many other implementations.
-Other implementations typically implement at least the flags listed
-above under
-.IR "Conventional advice flags" ,
-albeit with some variation in semantics.
-.P
-POSIX.1-2001 describes
-.BR posix_madvise (3)
-with constants
-.BR POSIX_MADV_NORMAL ,
-.BR POSIX_MADV_RANDOM ,
-.BR POSIX_MADV_SEQUENTIAL ,
-.BR POSIX_MADV_WILLNEED ,
-and
-.BR POSIX_MADV_DONTNEED ,
-and so on, with behavior close to the similarly named flags listed above.
-.SS Linux
-The Linux implementation requires that the address
-.I addr
-be page-aligned, and allows
-.I length
-to be zero.
-If there are some parts of the specified address range
-that are not mapped, the Linux version of
-.BR madvise ()
-ignores them and applies the call to the rest (but returns
-.B ENOMEM
-from the system call, as it should).
-.P
-.I madvise(0,\ 0,\ advice)
-will return zero iff
-.I advice
-is supported by the kernel and can be relied on to probe for support.
-.SH STANDARDS
-None.
-.SH HISTORY
-First appeared in 4.4BSD.
-.P
-Since Linux 3.18,
-.\" commit d3ac21cacc24790eb45d735769f35753f5b56ceb
-support for this system call is optional,
-depending on the setting of the
-.B CONFIG_ADVISE_SYSCALLS
-configuration option.
-.SH SEE ALSO
-.BR getrlimit (2),
-.BR memfd_secret (2),
-.BR mincore (2),
-.BR mmap (2),
-.BR mprotect (2),
-.BR msync (2),
-.BR munmap (2),
-.BR prctl (2),
-.BR process_madvise (2),
-.BR posix_madvise (3),
-.BR core (5)
diff --git a/man2/madvise1.2 b/man2/madvise1.2
deleted file mode 100644
index 5d25ea6ce..000000000
--- a/man2/madvise1.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/unimplemented.2
diff --git a/man2/mbind.2 b/man2/mbind.2
deleted file mode 100644
index 96264ce81..000000000
--- a/man2/mbind.2
+++ /dev/null
@@ -1,521 +0,0 @@
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft-var
-.\"
-.\" Copyright 2003,2004 Andi Kleen, SuSE Labs.
-.\" and Copyright 2007 Lee Schermerhorn, Hewlett Packard
-.\"
-.\" 2006-02-03, mtk, substantial wording changes and other improvements
-.\" 2007-08-27, Lee Schermerhorn <Lee.Schermerhorn@hp.com>
-.\" more precise specification of behavior.
-.\"
-.\" FIXME
-.\" Linux 3.8 added MPOL_MF_LAZY, which needs to be documented.
-.\" Does it also apply for move_pages()?
-.\"
-.\" commit b24f53a0bea38b266d219ee651b22dba727c44ae
-.\" Author: Lee Schermerhorn <lee.schermerhorn@hp.com>
-.\" Date: Thu Oct 25 14:16:32 2012 +0200
-.\"
-.TH mbind 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-mbind \- set memory policy for a memory range
-.SH LIBRARY
-NUMA (Non-Uniform Memory Access) policy library
-.RI ( libnuma ", " \-lnuma )
-.SH SYNOPSIS
-.nf
-.B "#include <numaif.h>"
-.P
-.BI "long mbind(void " addr [. len "], unsigned long " len ", int " mode ,
-.BI " const unsigned long " nodemask [(. maxnode " + ULONG_WIDTH - 1)"
-.B " / ULONG_WIDTH],"
-.BI " unsigned long " maxnode ", unsigned int " flags );
-.fi
-.SH DESCRIPTION
-.BR mbind ()
-sets the NUMA memory policy,
-which consists of a policy mode and zero or more nodes,
-for the memory range starting with
-.I addr
-and continuing for
-.I len
-bytes.
-The memory policy defines from which node memory is allocated.
-.P
-If the memory range specified by the
-.IR addr " and " len
-arguments includes an "anonymous" region of memory\[em]that is
-a region of memory created using the
-.BR mmap (2)
-system call with the
-.BR MAP_ANONYMOUS \[em]or
-a memory-mapped file, mapped using the
-.BR mmap (2)
-system call with the
-.B MAP_PRIVATE
-flag, pages will be allocated only according to the specified
-policy when the application writes (stores) to the page.
-For anonymous regions, an initial read access will use a shared
-page in the kernel containing all zeros.
-For a file mapped with
-.BR MAP_PRIVATE ,
-an initial read access will allocate pages according to the
-memory policy of the thread that causes the page to be allocated.
-This may not be the thread that called
-.BR mbind ().
-.P
-The specified policy will be ignored for any
-.B MAP_SHARED
-mappings in the specified memory range.
-Rather the pages will be allocated according to the memory policy
-of the thread that caused the page to be allocated.
-Again, this may not be the thread that called
-.BR mbind ().
-.P
-If the specified memory range includes a shared memory region
-created using the
-.BR shmget (2)
-system call and attached using the
-.BR shmat (2)
-system call,
-pages allocated for the anonymous or shared memory region will
-be allocated according to the policy specified, regardless of which
-process attached to the shared memory segment causes the allocation.
-If, however, the shared memory region was created with the
-.B SHM_HUGETLB
-flag,
-the huge pages will be allocated according to the policy specified
-only if the page allocation is caused by the process that calls
-.BR mbind ()
-for that region.
-.P
-By default,
-.BR mbind ()
-has an effect only for new allocations; if the pages inside
-the range have been already touched before setting the policy,
-then the policy has no effect.
-This default behavior may be overridden by the
-.B MPOL_MF_MOVE
-and
-.B MPOL_MF_MOVE_ALL
-flags described below.
-.P
-The
-.I mode
-argument must specify one of
-.BR MPOL_DEFAULT ,
-.BR MPOL_BIND ,
-.BR MPOL_INTERLEAVE ,
-.BR MPOL_WEIGHTED_INTERLEAVE ,
-.BR MPOL_PREFERRED ,
-or
-.B MPOL_LOCAL
-(which are described in detail below).
-All policy modes except
-.B MPOL_DEFAULT
-require the caller to specify the node or nodes to which the mode applies,
-via the
-.I nodemask
-argument.
-.P
-The
-.I mode
-argument may also include an optional
-.IR "mode flag" .
-The supported
-.I "mode flags"
-are:
-.TP
-.BR MPOL_F_NUMA_BALANCING " (since Linux 5.15)"
-.\" commit bda420b985054a3badafef23807c4b4fa38a3dff
-.\" commit 6d2aec9e123bb9c49cb5c7fc654f25f81e688e8c
-When
-.I mode
-is
-.BR MPOL_BIND ,
-enable the kernel NUMA balancing for the task if it is supported by the kernel.
-If the flag isn't supported by the kernel, or is used with
-.I mode
-other than
-.BR MPOL_BIND ,
-\-1 is returned and
-.I errno
-is set to
-.BR EINVAL .
-.TP
-.BR MPOL_F_STATIC_NODES " (since Linux-2.6.26)"
-A nonempty
-.I nodemask
-specifies physical node IDs.
-Linux does not remap the
-.I nodemask
-when the thread moves to a different cpuset context,
-nor when the set of nodes allowed by the thread's
-current cpuset context changes.
-.TP
-.BR MPOL_F_RELATIVE_NODES " (since Linux-2.6.26)"
-A nonempty
-.I nodemask
-specifies node IDs that are relative to the set of
-node IDs allowed by the thread's current cpuset.
-.P
-.I nodemask
-points to a bit mask of nodes containing up to
-.I maxnode
-bits.
-The bit mask size is rounded to the next multiple of
-.IR "sizeof(unsigned long)" ,
-but the kernel will use bits only up to
-.IR maxnode .
-A NULL value of
-.I nodemask
-or a
-.I maxnode
-value of zero specifies the empty set of nodes.
-If the value of
-.I maxnode
-is zero,
-the
-.I nodemask
-argument is ignored.
-Where a
-.I nodemask
-is required, it must contain at least one node that is on-line,
-allowed by the thread's current cpuset context
-(unless the
-.B MPOL_F_STATIC_NODES
-mode flag is specified),
-and contains memory.
-.P
-The
-.I mode
-argument must include one of the following values:
-.TP
-.B MPOL_DEFAULT
-This mode requests that any nondefault policy be removed,
-restoring default behavior.
-When applied to a range of memory via
-.BR mbind (),
-this means to use the thread memory policy,
-which may have been set with
-.BR set_mempolicy (2).
-If the mode of the thread memory policy is also
-.BR MPOL_DEFAULT ,
-the system-wide default policy will be used.
-The system-wide default policy allocates
-pages on the node of the CPU that triggers the allocation.
-For
-.BR MPOL_DEFAULT ,
-the
-.I nodemask
-and
-.I maxnode
-arguments must be specify the empty set of nodes.
-.TP
-.B MPOL_BIND
-This mode specifies a strict policy that restricts memory allocation to
-the nodes specified in
-.IR nodemask .
-If
-.I nodemask
-specifies more than one node, page allocations will come from
-the node with sufficient free memory that is closest to
-the node where the allocation takes place.
-Pages will not be allocated from any node not specified in the
-IR nodemask .
-(Before Linux 2.6.26,
-.\" commit 19770b32609b6bf97a3dece2529089494cbfc549
-page allocations came from
-the node with the lowest numeric node ID first, until that node
-contained no free memory.
-Allocations then came from the node with the next highest
-node ID specified in
-.I nodemask
-and so forth, until none of the specified nodes contained free memory.)
-.TP
-.B MPOL_INTERLEAVE
-This mode specifies that page allocations be interleaved across the
-set of nodes specified in
-.IR nodemask .
-This optimizes for bandwidth instead of latency
-by spreading out pages and memory accesses to those pages across
-multiple nodes.
-To be effective the memory area should be fairly large,
-at least 1\ MB or bigger with a fairly uniform access pattern.
-Accesses to a single page of the area will still be limited to
-the memory bandwidth of a single node.
-.TP
-.BR MPOL_WEIGHTED_INTERLEAVE " (since Linux 6.9)"
-.\" commit fa3bea4e1f8202d787709b7e3654eb0a99aed758
-This mode interleaves page allocations across the nodes specified in
-.I nodemask
-according to the weights in
-.IR /sys/kernel/mm/mempolicy/weighted_interleave .
-For example, if bits 0, 2, and 5 are set in
-.IR nodemask ,
-and the contents of
-.IR /sys/kernel/mm/mempolicy/weighted_interleave/node0 ,
-.IR /sys/ .\|.\|. /node2 ,
-and
-.IR /sys/ .\|.\|. /node5
-are 4, 7, and 9, respectively,
-then pages in this region will be allocated on nodes 0, 2, and 5
-in a 4:7:9 ratio.
-.TP
-.B MPOL_PREFERRED
-This mode sets the preferred node for allocation.
-The kernel will try to allocate pages from this
-node first and fall back to other nodes if the
-preferred nodes is low on free memory.
-If
-.I nodemask
-specifies more than one node ID, the first node in the
-mask will be selected as the preferred node.
-If the
-.I nodemask
-and
-.I maxnode
-arguments specify the empty set, then the memory is allocated on
-the node of the CPU that triggered the allocation.
-.TP
-.BR MPOL_LOCAL " (since Linux 3.8)"
-.\" commit 479e2802d09f1e18a97262c4c6f8f17ae5884bd8
-.\" commit f2a07f40dbc603c15f8b06e6ec7f768af67b424f
-This mode specifies "local allocation"; the memory is allocated on
-the node of the CPU that triggered the allocation (the "local node").
-The
-.I nodemask
-and
-.I maxnode
-arguments must specify the empty set.
-If the "local node" is low on free memory,
-the kernel will try to allocate memory from other nodes.
-The kernel will allocate memory from the "local node"
-whenever memory for this node is available.
-If the "local node" is not allowed by the thread's current cpuset context,
-the kernel will try to allocate memory from other nodes.
-The kernel will allocate memory from the "local node" whenever
-it becomes allowed by the thread's current cpuset context.
-By contrast,
-.B MPOL_DEFAULT
-reverts to the memory policy of the thread (which may be set via
-.BR set_mempolicy (2));
-that policy may be something other than "local allocation".
-.P
-If
-.B MPOL_MF_STRICT
-is passed in
-.I flags
-and
-.I mode
-is not
-.BR MPOL_DEFAULT ,
-then the call fails with the error
-.B EIO
-if the existing pages in the memory range don't follow the policy.
-.\" According to the kernel code, the following is not true
-.\" --Lee Schermerhorn
-.\" In Linux 2.6.16 or later the kernel will also try to move pages
-.\" to the requested node with this flag.
-.P
-If
-.B MPOL_MF_MOVE
-is specified in
-.IR flags ,
-then the kernel will attempt to move all the existing pages
-in the memory range so that they follow the policy.
-Pages that are shared with other processes will not be moved.
-If
-.B MPOL_MF_STRICT
-is also specified, then the call fails with the error
-.B EIO
-if some pages could not be moved.
-If the
-.B MPOL_INTERLEAVE
-policy was specified,
-pages already residing on the specified nodes
-will not be moved such that they are interleaved.
-.P
-If
-.B MPOL_MF_MOVE_ALL
-is passed in
-.IR flags ,
-then the kernel will attempt to move all existing pages in the memory range
-regardless of whether other processes use the pages.
-The calling thread must be privileged
-.RB ( CAP_SYS_NICE )
-to use this flag.
-If
-.B MPOL_MF_STRICT
-is also specified, then the call fails with the error
-.B EIO
-if some pages could not be moved.
-If the
-.B MPOL_INTERLEAVE
-policy was specified,
-pages already residing on the specified nodes
-will not be moved such that they are interleaved.
-.\" ---------------------------------------------------------------
-.SH RETURN VALUE
-On success,
-.BR mbind ()
-returns 0;
-on error, \-1 is returned and
-.I errno
-is set to indicate the error.
-.\" ---------------------------------------------------------------
-.SH ERRORS
-.\" I think I got all of the error returns. --Lee Schermerhorn
-.TP
-.B EFAULT
-Part or all of the memory range specified by
-.I nodemask
-and
-.I maxnode
-points outside your accessible address space.
-Or, there was an unmapped hole in the specified memory range specified by
-.I addr
-and
-.IR len .
-.TP
-.B EINVAL
-An invalid value was specified for
-.I flags
-or
-.IR mode ;
-or
-.I addr + len
-was less than
-.IR addr ;
-or
-.I addr
-is not a multiple of the system page size.
-Or,
-.I mode
-is
-.B MPOL_DEFAULT
-and
-.I nodemask
-specified a nonempty set;
-or
-.I mode
-is
-.B MPOL_BIND
-or
-.B MPOL_INTERLEAVE
-and
-.I nodemask
-is empty.
-Or,
-.I maxnode
-exceeds a kernel-imposed limit.
-.\" As at 2.6.23, this limit is "a page worth of bits", e.g.,
-.\" 8 * 4096 bits, assuming a 4kB page size.
-Or,
-.I nodemask
-specifies one or more node IDs that are
-greater than the maximum supported node ID.
-Or, none of the node IDs specified by
-.I nodemask
-are on-line and allowed by the thread's current cpuset context,
-or none of the specified nodes contain memory.
-Or, the
-.I mode
-argument specified both
-.B MPOL_F_STATIC_NODES
-and
-.BR MPOL_F_RELATIVE_NODES .
-.TP
-.B EIO
-.B MPOL_MF_STRICT
-was specified and an existing page was already on a node
-that does not follow the policy;
-or
-.B MPOL_MF_MOVE
-or
-.B MPOL_MF_MOVE_ALL
-was specified and the kernel was unable to move all existing
-pages in the range.
-.TP
-.B ENOMEM
-Insufficient kernel memory was available.
-.TP
-.B EPERM
-The
-.I flags
-argument included the
-.B MPOL_MF_MOVE_ALL
-flag and the caller does not have the
-.B CAP_SYS_NICE
-privilege.
-.\" ---------------------------------------------------------------
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 2.6.7.
-.P
-Support for huge page policy was added with Linux 2.6.16.
-For interleave policy to be effective on huge page mappings the
-policied memory needs to be tens of megabytes or larger.
-.P
-Before Linux 5.7.
-.\" commit dcf1763546d76c372f3136c8d6b2b6e77f140cf0
-.B MPOL_MF_STRICT
-was ignored on huge page mappings.
-.P
-.B MPOL_MF_MOVE
-and
-.B MPOL_MF_MOVE_ALL
-are available only on Linux 2.6.16 and later.
-.SH NOTES
-For information on library support, see
-.BR numa (7).
-.P
-NUMA policy is not supported on a memory-mapped file range
-that was mapped with the
-.B MAP_SHARED
-flag.
-.P
-The
-.B MPOL_DEFAULT
-mode can have different effects for
-.BR mbind ()
-and
-.BR set_mempolicy (2).
-When
-.B MPOL_DEFAULT
-is specified for
-.BR set_mempolicy (2),
-the thread's memory policy reverts to the system default policy
-or local allocation.
-When
-.B MPOL_DEFAULT
-is specified for a range of memory using
-.BR mbind (),
-any pages subsequently allocated for that range will use
-the thread's memory policy, as set by
-.BR set_mempolicy (2).
-This effectively removes the explicit policy from the
-specified range, "falling back" to a possibly nondefault
-policy.
-To select explicit "local allocation" for a memory range,
-specify a
-.I mode
-of
-.B MPOL_LOCAL
-or
-.B MPOL_PREFERRED
-with an empty set of nodes.
-This method will work for
-.BR set_mempolicy (2),
-as well.
-.SH SEE ALSO
-.BR get_mempolicy (2),
-.BR getcpu (2),
-.BR mmap (2),
-.BR set_mempolicy (2),
-.BR shmat (2),
-.BR shmget (2),
-.BR numa (3),
-.BR cpuset (7),
-.BR numa (7),
-.BR numactl (8)
diff --git a/man2/membarrier.2 b/man2/membarrier.2
deleted file mode 100644
index cd8029dd9..000000000
--- a/man2/membarrier.2
+++ /dev/null
@@ -1,460 +0,0 @@
-'\" t
-.\" Copyright 2015-2017 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.TH membarrier 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-membarrier \- issue memory barriers on a set of threads
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.P
-.BR "#include <linux/membarrier.h>" \
-" /* Definition of " MEMBARRIER_* " constants */"
-.BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */"
-.B #include <unistd.h>
-.P
-.BI "int syscall(SYS_membarrier, int " cmd ", unsigned int " flags \
-", int " cpu_id );
-.fi
-.P
-.IR Note :
-glibc provides no wrapper for
-.BR membarrier (),
-necessitating the use of
-.BR syscall (2).
-.SH DESCRIPTION
-The
-.BR membarrier ()
-system call helps reducing the overhead of the memory barrier
-instructions required to order memory accesses on multi-core systems.
-However, this system call is heavier than a memory barrier, so using it
-effectively is
-.I not
-as simple as replacing memory barriers with this
-system call, but requires understanding of the details below.
-.P
-Use of memory barriers needs to be done taking into account that a
-memory barrier always needs to be either matched with its memory barrier
-counterparts, or that the architecture's memory model doesn't require the
-matching barriers.
-.P
-There are cases where one side of the matching barriers (which we will
-refer to as "fast side") is executed much more often than the other
-(which we will refer to as "slow side").
-This is a prime target for the use of
-.BR membarrier ().
-The key idea is to replace, for these matching
-barriers, the fast-side memory barriers by simple compiler barriers,
-for example:
-.P
-.in +4n
-.EX
-asm volatile ("" : : : "memory")
-.EE
-.in
-.P
-and replace the slow-side memory barriers by calls to
-.BR membarrier ().
-.P
-This will add overhead to the slow side, and remove overhead from the
-fast side, thus resulting in an overall performance increase as long as
-the slow side is infrequent enough that the overhead of the
-.BR membarrier ()
-calls does not outweigh the performance gain on the fast side.
-.P
-The
-.I cmd
-argument is one of the following:
-.TP
-.BR MEMBARRIER_CMD_QUERY " (since Linux 4.3)"
-Query the set of supported commands.
-The return value of the call is a bit mask of supported
-commands.
-.BR MEMBARRIER_CMD_QUERY ,
-which has the value 0,
-is not itself included in this bit mask.
-This command is always supported (on kernels where
-.BR membarrier ()
-is provided).
-.TP
-.BR MEMBARRIER_CMD_GLOBAL " (since Linux 4.16)"
-Ensure that all threads from all processes on the system pass through a
-state where all memory accesses to user-space addresses match program
-order between entry to and return from the
-.BR membarrier ()
-system call.
-All threads on the system are targeted by this command.
-.TP
-.BR MEMBARRIER_CMD_GLOBAL_EXPEDITED " (since Linux 4.16)"
-Execute a memory barrier on all running threads of all processes that
-previously registered with
-.BR MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED .
-.IP
-Upon return from the system call, the calling thread has a guarantee that all
-running threads have passed through a state where all memory accesses to
-user-space addresses match program order between entry to and return
-from the system call (non-running threads are de facto in such a state).
-This guarantee is provided only for the threads of processes that
-previously registered with
-.BR MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED .
-.IP
-Given that registration is about the intent to receive the barriers, it
-is valid to invoke
-.B MEMBARRIER_CMD_GLOBAL_EXPEDITED
-from a process that has not employed
-.BR MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED .
-.IP
-The "expedited" commands complete faster than the non-expedited ones;
-they never block, but have the downside of causing extra overhead.
-.TP
-.BR MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED " (since Linux 4.16)"
-Register the process's intent to receive
-.B MEMBARRIER_CMD_GLOBAL_EXPEDITED
-memory barriers.
-.TP
-.BR MEMBARRIER_CMD_PRIVATE_EXPEDITED " (since Linux 4.14)"
-Execute a memory barrier on each running thread belonging to the same
-process as the calling thread.
-.IP
-Upon return from the system call, the calling
-thread has a guarantee that all its running thread siblings have passed
-through a state where all memory accesses to user-space addresses match
-program order between entry to and return from the system call
-(non-running threads are de facto in such a state).
-This guarantee is provided only for threads in
-the same process as the calling thread.
-.IP
-The "expedited" commands complete faster than the non-expedited ones;
-they never block, but have the downside of causing extra overhead.
-.IP
-A process must register its intent to use the private
-expedited command prior to using it.
-.TP
-.BR MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED " (since Linux 4.14)"
-Register the process's intent to use
-.BR MEMBARRIER_CMD_PRIVATE_EXPEDITED .
-.TP
-.BR MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE " (since Linux 4.16)"
-In addition to providing the memory ordering guarantees described in
-.BR MEMBARRIER_CMD_PRIVATE_EXPEDITED ,
-upon return from system call the calling thread has a guarantee that all its
-running thread siblings have executed a core serializing instruction.
-This guarantee is provided only for threads in
-the same process as the calling thread.
-.IP
-The "expedited" commands complete faster than the non-expedited ones,
-they never block, but have the downside of causing extra overhead.
-.IP
-A process must register its intent to use the private expedited sync
-core command prior to using it.
-.TP
-.BR MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE " (since Linux 4.16)"
-Register the process's intent to use
-.BR MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE .
-.TP
-.BR MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ " (since Linux 5.10)"
-Ensure the caller thread, upon return from system call, that all its
-running thread siblings have any currently running rseq critical sections
-restarted if
-.I flags
-parameter is 0; if
-.I flags
-parameter is
-.BR MEMBARRIER_CMD_FLAG_CPU ,
-then this operation is performed only on CPU indicated by
-.IR cpu_id .
-This guarantee is provided only for threads in
-the same process as the calling thread.
-.IP
-RSEQ membarrier is only available in the "private expedited" form.
-.IP
-A process must register its intent to use the private expedited rseq
-command prior to using it.
-.TP
-.BR MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ " (since Linux 5.10)"
-Register the process's intent to use
-.BR MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ .
-.TP
-.BR MEMBARRIER_CMD_SHARED " (since Linux 4.3)"
-This is an alias for
-.B MEMBARRIER_CMD_GLOBAL
-that exists for header backward compatibility.
-.P
-The
-.I flags
-argument must be specified as 0 unless the command is
-.BR MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ ,
-in which case
-.I flags
-can be either 0 or
-.BR MEMBARRIER_CMD_FLAG_CPU .
-.P
-The
-.I cpu_id
-argument is ignored unless
-.I flags
-is
-.BR MEMBARRIER_CMD_FLAG_CPU ,
-in which case it must specify the CPU targeted by this membarrier
-command.
-.P
-All memory accesses performed in program order from each targeted thread
-are guaranteed to be ordered with respect to
-.BR membarrier ().
-.P
-If we use the semantic
-.I barrier()
-to represent a compiler barrier forcing memory
-accesses to be performed in program order across the barrier, and
-.I smp_mb()
-to represent explicit memory barriers forcing full memory
-ordering across the barrier, we have the following ordering table for
-each pairing of
-.IR barrier() ,
-.BR membarrier (),
-and
-.IR smp_mb() .
-The pair ordering is detailed as (O: ordered, X: not ordered):
-.P
-.RS
-.TS
-l c c c.
-\& barrier() smp_mb() membarrier()
-barrier() X X O
-smp_mb() X O O
-membarrier() O O O
-.TE
-.RE
-.SH RETURN VALUE
-On success, the
-.B MEMBARRIER_CMD_QUERY
-operation returns a bit mask of supported commands, and the
-.BR MEMBARRIER_CMD_GLOBAL ,
-.BR MEMBARRIER_CMD_GLOBAL_EXPEDITED ,
-.BR MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED ,
-.BR MEMBARRIER_CMD_PRIVATE_EXPEDITED ,
-.BR MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED ,
-.BR MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE ,
-and
-.B MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE
-operations return zero.
-On error, \-1 is returned,
-and
-.I errno
-is set to indicate the error.
-.P
-For a given command, with
-.I flags
-set to 0, this system call is
-guaranteed to always return the same value until reboot.
-Further calls with the same arguments will lead to the same result.
-Therefore, with
-.I flags
-set to 0, error handling is required only for the first call to
-.BR membarrier ().
-.SH ERRORS
-.TP
-.B EINVAL
-.I cmd
-is invalid, or
-.I flags
-is nonzero, or the
-.B MEMBARRIER_CMD_GLOBAL
-command is disabled because the
-.I nohz_full
-CPU parameter has been set, or the
-.B MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE
-and
-.B MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE
-commands are not implemented by the architecture.
-.TP
-.B ENOSYS
-The
-.BR membarrier ()
-system call is not implemented by this kernel.
-.TP
-.B EPERM
-The current process was not registered prior to using private expedited
-commands.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 4.3.
-.P
-Before Linux 5.10, the prototype was:
-.P
-.in +4n
-.EX
-.BI "int membarrier(int " cmd ", int " flags );
-.EE
-.in
-.SH NOTES
-A memory barrier instruction is part of the instruction set of
-architectures with weakly ordered memory models.
-It orders memory
-accesses prior to the barrier and after the barrier with respect to
-matching barriers on other cores.
-For instance, a load fence can order
-loads prior to and following that fence with respect to stores ordered
-by store fences.
-.P
-Program order is the order in which instructions are ordered in the
-program assembly code.
-.P
-Examples where
-.BR membarrier ()
-can be useful include implementations
-of Read-Copy-Update libraries and garbage collectors.
-.SH EXAMPLES
-Assuming a multithreaded application where "fast_path()" is executed
-very frequently, and where "slow_path()" is executed infrequently, the
-following code (x86) can be transformed using
-.BR membarrier ():
-.P
-.in +4n
-.\" SRC BEGIN (membarrier.c)
-.EX
-#include <stdlib.h>
-\&
-static volatile int a, b;
-\&
-static void
-fast_path(int *read_b)
-{
- a = 1;
- asm volatile ("mfence" : : : "memory");
- *read_b = b;
-}
-\&
-static void
-slow_path(int *read_a)
-{
- b = 1;
- asm volatile ("mfence" : : : "memory");
- *read_a = a;
-}
-\&
-int
-main(void)
-{
- int read_a, read_b;
-\&
- /*
- * Real applications would call fast_path() and slow_path()
- * from different threads. Call those from main() to keep
- * this example short.
- */
-\&
- slow_path(&read_a);
- fast_path(&read_b);
-\&
- /*
- * read_b == 0 implies read_a == 1 and
- * read_a == 0 implies read_b == 1.
- */
-\&
- if (read_b == 0 && read_a == 0)
- abort();
-\&
- exit(EXIT_SUCCESS);
-}
-.EE
-.\" SRC END
-.in
-.P
-The code above transformed to use
-.BR membarrier ()
-becomes:
-.P
-.in +4n
-.EX
-#define _GNU_SOURCE
-#include <stdlib.h>
-#include <stdio.h>
-#include <unistd.h>
-#include <sys/syscall.h>
-#include <linux/membarrier.h>
-\&
-static volatile int a, b;
-\&
-static int
-membarrier(int cmd, unsigned int flags, int cpu_id)
-{
- return syscall(__NR_membarrier, cmd, flags, cpu_id);
-}
-\&
-static int
-init_membarrier(void)
-{
- int ret;
-\&
- /* Check that membarrier() is supported. */
-\&
- ret = membarrier(MEMBARRIER_CMD_QUERY, 0, 0);
- if (ret < 0) {
- perror("membarrier");
- return \-1;
- }
-\&
- if (!(ret & MEMBARRIER_CMD_GLOBAL)) {
- fprintf(stderr,
- "membarrier does not support MEMBARRIER_CMD_GLOBAL\en");
- return \-1;
- }
-\&
- return 0;
-}
-\&
-static void
-fast_path(int *read_b)
-{
- a = 1;
- asm volatile ("" : : : "memory");
- *read_b = b;
-}
-\&
-static void
-slow_path(int *read_a)
-{
- b = 1;
- membarrier(MEMBARRIER_CMD_GLOBAL, 0, 0);
- *read_a = a;
-}
-\&
-int
-main(int argc, char *argv[])
-{
- int read_a, read_b;
-\&
- if (init_membarrier())
- exit(EXIT_FAILURE);
-\&
- /*
- * Real applications would call fast_path() and slow_path()
- * from different threads. Call those from main() to keep
- * this example short.
- */
-\&
- slow_path(&read_a);
- fast_path(&read_b);
-\&
- /*
- * read_b == 0 implies read_a == 1 and
- * read_a == 0 implies read_b == 1.
- */
-\&
- if (read_b == 0 && read_a == 0)
- abort();
-\&
- exit(EXIT_SUCCESS);
-}
-.EE
-.in
-.\" .SH SEE ALSO
-.\" FIXME See if the following syscalls make it into Linux 4.15 or later
-.\" .BR cpu_opv (2),
-.\" .BR rseq (2)
diff --git a/man2/memfd_create.2 b/man2/memfd_create.2
deleted file mode 100644
index 794aa0e54..000000000
--- a/man2/memfd_create.2
+++ /dev/null
@@ -1,549 +0,0 @@
-.\" Copyright (C) 2014 Michael Kerrisk <mtk.manpages@gmail.com>
-.\" and Copyright (C) 2014 David Herrmann <dh.herrmann@gmail.com>
-.\"
-.\" SPDX-License-Identifier: GPL-2.0-or-later
-.\"
-.TH memfd_create 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-memfd_create \- create an anonymous file
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#define _GNU_SOURCE" " /* See feature_test_macros(7) */"
-.B #include <sys/mman.h>
-.P
-.BI "int memfd_create(const char *" name ", unsigned int " flags ");"
-.fi
-.SH DESCRIPTION
-.BR memfd_create ()
-creates an anonymous file and returns a file descriptor that refers to it.
-The file behaves like a regular file, and so can be modified,
-truncated, memory-mapped, and so on.
-However, unlike a regular file,
-it lives in RAM and has a volatile backing storage.
-Once all references to the file are dropped, it is automatically released.
-Anonymous memory is used for all backing pages of the file.
-Therefore, files created by
-.BR memfd_create ()
-have the same semantics as other anonymous
-.\" David Herrmann:
-.\" memfd uses VM_NORESERVE so each page is accounted on first access.
-.\" This means, the overcommit-limits (see __vm_enough_memory()) and the
-.\" memory-cgroup limits (mem_cgroup_try_charge()) are applied. Note that
-.\" those are accounted on "current" and "current->mm", that is, the
-.\" process doing the first page access.
-memory allocations such as those allocated using
-.BR mmap (2)
-with the
-.B MAP_ANONYMOUS
-flag.
-.P
-The initial size of the file is set to 0.
-Following the call, the file size should be set using
-.BR ftruncate (2).
-(Alternatively, the file may be populated by calls to
-.BR write (2)
-or similar.)
-.P
-The name supplied in
-.I name
-is used as a filename and will be displayed
-as the target of the corresponding symbolic link in the directory
-.IR /proc/self/fd/ .
-The displayed name is always prefixed with
-.I memfd:
-and serves only for debugging purposes.
-Names do not affect the behavior of the file descriptor,
-and as such multiple files can have the same name without any side effects.
-.P
-The following values may be bitwise ORed in
-.I flags
-to change the behavior of
-.BR memfd_create ():
-.TP
-.B MFD_CLOEXEC
-Set the close-on-exec
-.RB ( FD_CLOEXEC )
-flag on the new file descriptor.
-See the description of the
-.B O_CLOEXEC
-flag in
-.BR open (2)
-for reasons why this may be useful.
-.TP
-.B MFD_ALLOW_SEALING
-Allow sealing operations on this file.
-See the discussion of the
-.B F_ADD_SEALS
-and
-.B F_GET_SEALS
-operations in
-.BR fcntl (2),
-and also NOTES, below.
-The initial set of seals is empty.
-If this flag is not set, the initial set of seals will be
-.BR F_SEAL_SEAL ,
-meaning that no other seals can be set on the file.
-.\" FIXME Why is the MFD_ALLOW_SEALING behavior not simply the default?
-.\" Is it worth adding some text explaining this?
-.TP
-.BR MFD_HUGETLB " (since Linux 4.14)"
-.\" commit 749df87bd7bee5a79cef073f5d032ddb2b211de8
-The anonymous file will be created in the hugetlbfs filesystem using
-huge pages.
-See the Linux kernel source file
-.I Documentation/admin\-guide/mm/hugetlbpage.rst
-for more information about hugetlbfs.
-.\" commit 47b9012ecdc747f6936395265e677d41e11a31ff
-Specifying both
-.B MFD_HUGETLB
-and
-.B MFD_ALLOW_SEALING
-in
-.I flags
-is supported since Linux 4.16.
-.TP
-.B MFD_HUGE_2MB
-.TQ
-.B MFD_HUGE_1GB
-.TQ
-\&.\|.\|.
-Used in conjunction with
-.B MFD_HUGETLB
-to select alternative hugetlb page sizes (respectively, 2\ MB, 1\ GB, ...)
-on systems that support multiple hugetlb page sizes.
-Definitions for known
-huge page sizes are included in the header file
-.I <linux/memfd.h>.
-.IP
-For details on encoding huge page sizes not included in the header file,
-see the discussion of the similarly named constants in
-.BR mmap (2).
-.P
-Unused bits in
-.I flags
-must be 0.
-.P
-As its return value,
-.BR memfd_create ()
-returns a new file descriptor that can be used to refer to the file.
-This file descriptor is opened for both reading and writing
-.RB ( O_RDWR )
-and
-.B O_LARGEFILE
-is set for the file descriptor.
-.P
-With respect to
-.BR fork (2)
-and
-.BR execve (2),
-the usual semantics apply for the file descriptor created by
-.BR memfd_create ().
-A copy of the file descriptor is inherited by the child produced by
-.BR fork (2)
-and refers to the same file.
-The file descriptor is preserved across
-.BR execve (2),
-unless the close-on-exec flag has been set.
-.SH RETURN VALUE
-On success,
-.BR memfd_create ()
-returns a new file descriptor.
-On error, \-1 is returned and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EFAULT
-The address in
-.I name
-points to invalid memory.
-.TP
-.B EINVAL
-.I flags
-included unknown bits.
-.TP
-.B EINVAL
-.I name
-was too long.
-(The limit is
-.\" NAME_MAX - strlen("memfd:")
-249 bytes, excluding the terminating null byte.)
-.TP
-.B EINVAL
-Both
-.B MFD_HUGETLB
-and
-.B MFD_ALLOW_SEALING
-were specified in
-.IR flags .
-.TP
-.B EMFILE
-The per-process limit on the number of open file descriptors has been reached.
-.TP
-.B ENFILE
-The system-wide limit on the total number of open files has been reached.
-.TP
-.B ENOMEM
-There was insufficient memory to create a new anonymous file.
-.TP
-.B EPERM
-The
-.B MFD_HUGETLB
-flag was specified, but the caller was not privileged (did not have the
-.B CAP_IPC_LOCK
-capability)
-and is not a member of the
-.I sysctl_hugetlb_shm_group
-group; see the description of
-.I /proc/sys/vm/sysctl_hugetlb_shm_group
-in
-.BR proc (5).
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 3.17,
-glibc 2.27.
-.SH NOTES
-.\" See also http://lwn.net/Articles/593918/
-.\" and http://lwn.net/Articles/594919/ and http://lwn.net/Articles/591108/
-The
-.BR memfd_create ()
-system call provides a simple alternative to manually mounting a
-.BR tmpfs (5)
-filesystem and creating and opening a file in that filesystem.
-The primary purpose of
-.BR memfd_create ()
-is to create files and associated file descriptors that are
-used with the file-sealing APIs provided by
-.BR fcntl (2).
-.P
-The
-.BR memfd_create ()
-system call also has uses without file sealing
-(which is why file-sealing is disabled, unless explicitly requested with the
-.B MFD_ALLOW_SEALING
-flag).
-In particular, it can be used as an alternative to creating files in
-.I tmp
-or as an alternative to using the
-.BR open (2)
-.B O_TMPFILE
-in cases where there is no intention to actually link the
-resulting file into the filesystem.
-.SS File sealing
-In the absence of file sealing,
-processes that communicate via shared memory must either trust each other,
-or take measures to deal with the possibility that an untrusted peer
-may manipulate the shared memory region in problematic ways.
-For example, an untrusted peer might modify the contents of the
-shared memory at any time, or shrink the shared memory region.
-The former possibility leaves the local process vulnerable to
-time-of-check-to-time-of-use race conditions
-(typically dealt with by copying data from
-the shared memory region before checking and using it).
-The latter possibility leaves the local process vulnerable to
-.B SIGBUS
-signals when an attempt is made to access a now-nonexistent
-location in the shared memory region.
-(Dealing with this possibility necessitates the use of a handler for the
-.B SIGBUS
-signal.)
-.P
-Dealing with untrusted peers imposes extra complexity on
-code that employs shared memory.
-Memory sealing enables that extra complexity to be eliminated,
-by allowing a process to operate secure in the knowledge that
-its peer can't modify the shared memory in an undesired fashion.
-.P
-An example of the usage of the sealing mechanism is as follows:
-.IP (1) 5
-The first process creates a
-.BR tmpfs (5)
-file using
-.BR memfd_create ().
-The call yields a file descriptor used in subsequent steps.
-.IP (2)
-The first process
-sizes the file created in the previous step using
-.BR ftruncate (2),
-maps it using
-.BR mmap (2),
-and populates the shared memory with the desired data.
-.IP (3)
-The first process uses the
-.BR fcntl (2)
-.B F_ADD_SEALS
-operation to place one or more seals on the file,
-in order to restrict further modifications on the file.
-(If placing the seal
-.BR F_SEAL_WRITE ,
-then it will be necessary to first unmap the shared writable mapping
-created in the previous step.
-Otherwise, behavior similar to
-.B F_SEAL_WRITE
-can be achieved by using
-.BR F_SEAL_FUTURE_WRITE ,
-which will prevent future writes via
-.BR mmap (2)
-and
-.BR write (2)
-from succeeding while keeping existing shared writable mappings).
-.IP (4)
-A second process obtains a file descriptor for the
-.BR tmpfs (5)
-file and maps it.
-Among the possible ways in which this could happen are the following:
-.RS
-.IP \[bu] 3
-The process that called
-.BR memfd_create ()
-could transfer the resulting file descriptor to the second process
-via a UNIX domain socket (see
-.BR unix (7)
-and
-.BR cmsg (3)).
-The second process then maps the file using
-.BR mmap (2).
-.IP \[bu]
-The second process is created via
-.BR fork (2)
-and thus automatically inherits the file descriptor and mapping.
-(Note that in this case and the next,
-there is a natural trust relationship between the two processes,
-since they are running under the same user ID.
-Therefore, file sealing would not normally be necessary.)
-.IP \[bu]
-The second process opens the file
-.IR /proc/ pid /fd/ fd,
-where
-.I <pid>
-is the PID of the first process (the one that called
-.BR memfd_create ()),
-and
-.I <fd>
-is the number of the file descriptor returned by the call to
-.BR memfd_create ()
-in that process.
-The second process then maps the file using
-.BR mmap (2).
-.RE
-.IP (5)
-The second process uses the
-.BR fcntl (2)
-.B F_GET_SEALS
-operation to retrieve the bit mask of seals
-that has been applied to the file.
-This bit mask can be inspected in order to determine
-what kinds of restrictions have been placed on file modifications.
-If desired, the second process can apply further seals
-to impose additional restrictions (so long as the
-.B F_SEAL_SEAL
-seal has not yet been applied).
-.SH EXAMPLES
-Below are shown two example programs that demonstrate the use of
-.BR memfd_create ()
-and the file sealing API.
-.P
-The first program,
-.IR t_memfd_create.c ,
-creates a
-.BR tmpfs (5)
-file using
-.BR memfd_create (),
-sets a size for the file, maps it into memory,
-and optionally places some seals on the file.
-The program accepts up to three command-line arguments,
-of which the first two are required.
-The first argument is the name to associate with the file,
-the second argument is the size to be set for the file,
-and the optional third argument is a string of characters that specify
-seals to be set on the file.
-.P
-The second program,
-.IR t_get_seals.c ,
-can be used to open an existing file that was created via
-.BR memfd_create ()
-and inspect the set of seals that have been applied to that file.
-.P
-The following shell session demonstrates the use of these programs.
-First we create a
-.BR tmpfs (5)
-file and set some seals on it:
-.P
-.in +4n
-.EX
-$ \fB./t_memfd_create my_memfd_file 4096 sw &\fP
-[1] 11775
-PID: 11775; fd: 3; /proc/11775/fd/3
-.EE
-.in
-.P
-At this point, the
-.I t_memfd_create
-program continues to run in the background.
-From another program, we can obtain a file descriptor for the
-file created by
-.BR memfd_create ()
-by opening the
-.IR /proc/ pid /fd
-file that corresponds to the file descriptor opened by
-.BR memfd_create ().
-Using that pathname, we inspect the content of the
-.IR /proc/ pid /fd
-symbolic link, and use our
-.I t_get_seals
-program to view the seals that have been placed on the file:
-.P
-.in +4n
-.EX
-$ \fBreadlink /proc/11775/fd/3\fP
-/memfd:my_memfd_file (deleted)
-$ \fB./t_get_seals /proc/11775/fd/3\fP
-Existing seals: WRITE SHRINK
-.EE
-.in
-.SS Program source: t_memfd_create.c
-\&
-.\" SRC BEGIN (t_memfd_create.c)
-.EX
-#define _GNU_SOURCE
-#include <err.h>
-#include <fcntl.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/mman.h>
-#include <unistd.h>
-\&
-int
-main(int argc, char *argv[])
-{
- int fd;
- char *name, *seals_arg;
- ssize_t len;
- unsigned int seals;
-\&
- if (argc < 3) {
- fprintf(stderr, "%s name size [seals]\en", argv[0]);
- fprintf(stderr, "\et\[aq]seals\[aq] can contain any of the "
- "following characters:\en");
- fprintf(stderr, "\et\etg \- F_SEAL_GROW\en");
- fprintf(stderr, "\et\ets \- F_SEAL_SHRINK\en");
- fprintf(stderr, "\et\etw \- F_SEAL_WRITE\en");
- fprintf(stderr, "\et\etW \- F_SEAL_FUTURE_WRITE\en");
- fprintf(stderr, "\et\etS \- F_SEAL_SEAL\en");
- exit(EXIT_FAILURE);
- }
-\&
- name = argv[1];
- len = atoi(argv[2]);
- seals_arg = argv[3];
-\&
- /* Create an anonymous file in tmpfs; allow seals to be
- placed on the file. */
-\&
- fd = memfd_create(name, MFD_ALLOW_SEALING);
- if (fd == \-1)
- err(EXIT_FAILURE, "memfd_create");
-\&
- /* Size the file as specified on the command line. */
-\&
- if (ftruncate(fd, len) == \-1)
- err(EXIT_FAILURE, "truncate");
-\&
- printf("PID: %jd; fd: %d; /proc/%jd/fd/%d\en",
- (intmax_t) getpid(), fd, (intmax_t) getpid(), fd);
-\&
- /* Code to map the file and populate the mapping with data
- omitted. */
-\&
- /* If a \[aq]seals\[aq] command\-line argument was supplied, set some
- seals on the file. */
-\&
- if (seals_arg != NULL) {
- seals = 0;
-\&
- if (strchr(seals_arg, \[aq]g\[aq]) != NULL)
- seals |= F_SEAL_GROW;
- if (strchr(seals_arg, \[aq]s\[aq]) != NULL)
- seals |= F_SEAL_SHRINK;
- if (strchr(seals_arg, \[aq]w\[aq]) != NULL)
- seals |= F_SEAL_WRITE;
- if (strchr(seals_arg, \[aq]W\[aq]) != NULL)
- seals |= F_SEAL_FUTURE_WRITE;
- if (strchr(seals_arg, \[aq]S\[aq]) != NULL)
- seals |= F_SEAL_SEAL;
-\&
- if (fcntl(fd, F_ADD_SEALS, seals) == \-1)
- err(EXIT_FAILURE, "fcntl");
- }
-\&
- /* Keep running, so that the file created by memfd_create()
- continues to exist. */
-\&
- pause();
-\&
- exit(EXIT_SUCCESS);
-}
-.EE
-.\" SRC END
-.SS Program source: t_get_seals.c
-\&
-.\" SRC BEGIN (t_get_seals.c)
-.EX
-#define _GNU_SOURCE
-#include <err.h>
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-\&
-int
-main(int argc, char *argv[])
-{
- int fd;
- unsigned int seals;
-\&
- if (argc != 2) {
- fprintf(stderr, "%s /proc/PID/fd/FD\en", argv[0]);
- exit(EXIT_FAILURE);
- }
-\&
- fd = open(argv[1], O_RDWR);
- if (fd == \-1)
- err(EXIT_FAILURE, "open");
-\&
- seals = fcntl(fd, F_GET_SEALS);
- if (seals == \-1)
- err(EXIT_FAILURE, "fcntl");
-\&
- printf("Existing seals:");
- if (seals & F_SEAL_SEAL)
- printf(" SEAL");
- if (seals & F_SEAL_GROW)
- printf(" GROW");
- if (seals & F_SEAL_WRITE)
- printf(" WRITE");
- if (seals & F_SEAL_FUTURE_WRITE)
- printf(" FUTURE_WRITE");
- if (seals & F_SEAL_SHRINK)
- printf(" SHRINK");
- printf("\en");
-\&
- /* Code to map the file and access the contents of the
- resulting mapping omitted. */
-\&
- exit(EXIT_SUCCESS);
-}
-.EE
-.\" SRC END
-.SH SEE ALSO
-.BR fcntl (2),
-.BR ftruncate (2),
-.BR memfd_secret (2),
-.BR mmap (2),
-.BR shmget (2),
-.BR shm_open (3)
diff --git a/man2/memfd_secret.2 b/man2/memfd_secret.2
deleted file mode 100644
index 5d2436cc5..000000000
--- a/man2/memfd_secret.2
+++ /dev/null
@@ -1,204 +0,0 @@
-.\" Copyright (c) 2021, IBM Corporation.
-.\" Written by Mike Rapoport <rppt@linux.ibm.com>
-.\"
-.\" Based on memfd_create(2) man page
-.\" Copyright (C) 2014 Michael Kerrisk <mtk.manpages@gmail.com>
-.\" and Copyright (C) 2014 David Herrmann <dh.herrmann@gmail.com>
-.\"
-.\" SPDX-License-Identifier: GPL-2.0-or-later
-.\"
-.TH memfd_secret 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-memfd_secret \- create an anonymous RAM-based file
-to access secret memory regions
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.P
-.BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */"
-.B #include <unistd.h>
-.P
-.BI "int syscall(SYS_memfd_secret, unsigned int " flags );
-.fi
-.P
-.IR Note :
-glibc provides no wrapper for
-.BR memfd_secret (),
-necessitating the use of
-.BR syscall (2).
-.SH DESCRIPTION
-.BR memfd_secret ()
-creates an anonymous RAM-based file and returns a file descriptor
-that refers to it.
-The file provides a way to create and access memory regions
-with stronger protection than usual RAM-based files and
-anonymous memory mappings.
-Once all open references to the file are closed,
-it is automatically released.
-The initial size of the file is set to 0.
-Following the call, the file size should be set using
-.BR ftruncate (2).
-.P
-The memory areas backing the file created with
-.BR memfd_secret (2)
-are visible only to the processes that have access to the file descriptor.
-The memory region is removed from the kernel page tables
-and only the page tables of the processes holding the file descriptor
-map the corresponding physical memory.
-(Thus, the pages in the region can't be accessed by the kernel itself,
-so that, for example, pointers to the region can't be passed to
-system calls.)
-.P
-The following values may be bitwise ORed in
-.I flags
-to control the behavior of
-.BR memfd_secret ():
-.TP
-.B FD_CLOEXEC
-Set the close-on-exec flag on the new file descriptor,
-which causes the region to be removed from the process on
-.BR execve (2).
-See the description of the
-.B O_CLOEXEC
-flag in
-.BR open (2)
-.P
-As its return value,
-.BR memfd_secret ()
-returns a new file descriptor that refers to an anonymous file.
-This file descriptor is opened for both reading and writing
-.RB ( O_RDWR )
-and
-.B O_LARGEFILE
-is set for the file descriptor.
-.P
-With respect to
-.BR fork (2)
-and
-.BR execve (2),
-the usual semantics apply for the file descriptor created by
-.BR memfd_secret ().
-A copy of the file descriptor is inherited by the child produced by
-.BR fork (2)
-and refers to the same file.
-The file descriptor is preserved across
-.BR execve (2),
-unless the close-on-exec flag has been set.
-.P
-The memory region is locked into memory in the same way as with
-.BR mlock (2),
-so that it will never be written into swap,
-and hibernation is inhibited for as long as any
-.BR memfd_secret ()
-descriptions exist.
-However the implementation of
-.BR memfd_secret ()
-will not try to populate the whole range during the
-.BR mmap (2)
-call that attaches the region into the process's address space;
-instead, the pages are only actually allocated
-as they are faulted in.
-The amount of memory allowed for memory mappings
-of the file descriptor obeys the same rules as
-.BR mlock (2)
-and cannot exceed
-.BR RLIMIT_MEMLOCK .
-.SH RETURN VALUE
-On success,
-.BR memfd_secret ()
-returns a new file descriptor.
-On error, \-1 is returned and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EINVAL
-.I flags
-included unknown bits.
-.TP
-.B EMFILE
-The per-process limit on the number of open file descriptors has been reached.
-.TP
-.B EMFILE
-The system-wide limit on the total number of open files has been reached.
-.TP
-.B ENOMEM
-There was insufficient memory to create a new anonymous file.
-.TP
-.B ENOSYS
-.BR memfd_secret ()
-is not implemented on this architecture,
-or has not been enabled on the kernel command-line with
-.BR secretmem_enable =1.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 5.14.
-.SH NOTES
-The
-.BR memfd_secret ()
-system call is designed to allow a user-space process
-to create a range of memory that is inaccessible to anybody else -
-kernel included.
-There is no 100% guarantee that kernel won't be able to access
-memory ranges backed by
-.BR memfd_secret ()
-in any circumstances, but nevertheless,
-it is much harder to exfiltrate data from these regions.
-.P
-.BR memfd_secret ()
-provides the following protections:
-.IP \[bu] 3
-Enhanced protection
-(in conjunction with all the other in-kernel attack prevention systems)
-against ROP attacks.
-Absence of any in-kernel primitive for accessing memory backed by
-.BR memfd_secret ()
-means that one-gadget ROP attack
-can't work to perform data exfiltration.
-The attacker would need to find enough ROP gadgets
-to reconstruct the missing page table entries,
-which significantly increases difficulty of the attack,
-especially when other protections like the kernel stack size limit
-and address space layout randomization are in place.
-.IP \[bu]
-Prevent cross-process user-space memory exposures.
-Once a region for a
-.BR memfd_secret ()
-memory mapping is allocated,
-the user can't accidentally pass it into the kernel
-to be transmitted somewhere.
-The memory pages in this region cannot be accessed via the direct map
-and they are disallowed in get_user_pages.
-.IP \[bu]
-Harden against exploited kernel flaws.
-In order to access memory areas backed by
-.BR memfd_secret (),
-a kernel-side attack would need to
-either walk the page tables and create new ones,
-or spawn a new privileged user-space process to perform
-secrets exfiltration using
-.BR ptrace (2).
-.P
-The way
-.BR memfd_secret ()
-allocates and locks the memory may impact overall system performance,
-therefore the system call is disabled by default and only available
-if the system administrator turned it on using
-"secretmem.enable=y" kernel parameter.
-.P
-To prevent potential data leaks of memory regions backed by
-.BR memfd_secret ()
-from a hybernation image,
-hybernation is prevented when there are active
-.BR memfd_secret ()
-users.
-.SH SEE ALSO
-.BR fcntl (2),
-.BR ftruncate (2),
-.BR mlock (2),
-.BR memfd_create (2),
-.BR mmap (2),
-.BR setrlimit (2)
diff --git a/man2/migrate_pages.2 b/man2/migrate_pages.2
deleted file mode 100644
index 3e96f0322..000000000
--- a/man2/migrate_pages.2
+++ /dev/null
@@ -1,174 +0,0 @@
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft-2-para
-.\"
-.\" Copyright 2009 Intel Corporation
-.\" Author: Andi Kleen
-.\" Based on the move_pages manpage which was
-.\" This manpage is Copyright (C) 2006 Silicon Graphics, Inc.
-.\" Christoph Lameter
-.\"
-.TH migrate_pages 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-migrate_pages \- move all pages in a process to another set of nodes
-.SH LIBRARY
-NUMA (Non-Uniform Memory Access) policy library
-.RI ( libnuma ", " \-lnuma )
-.SH SYNOPSIS
-.nf
-.B #include <numaif.h>
-.P
-.BI "long migrate_pages(int " pid ", unsigned long " maxnode,
-.BI " const unsigned long *" old_nodes,
-.BI " const unsigned long *" new_nodes );
-.fi
-.SH DESCRIPTION
-.BR migrate_pages ()
-attempts to move all pages of the process
-.I pid
-that are in memory nodes
-.I old_nodes
-to the memory nodes in
-.IR new_nodes .
-Pages not located in any node in
-.I old_nodes
-will not be migrated.
-As far as possible,
-the kernel maintains the relative topology relationship inside
-.I old_nodes
-during the migration to
-.IR new_nodes .
-.P
-The
-.I old_nodes
-and
-.I new_nodes
-arguments are pointers to bit masks of node numbers, with up to
-.I maxnode
-bits in each mask.
-These masks are maintained as arrays of unsigned
-.I long
-integers (in the last
-.I long
-integer, the bits beyond those specified by
-.I maxnode
-are ignored).
-The
-.I maxnode
-argument is the maximum node number in the bit mask plus one (this is the same
-as in
-.BR mbind (2),
-but different from
-.BR select (2)).
-.P
-The
-.I pid
-argument is the ID of the process whose pages are to be moved.
-To move pages in another process,
-the caller must be privileged
-.RB ( CAP_SYS_NICE )
-or the real or effective user ID of the calling process must match the
-real or saved-set user ID of the target process.
-If
-.I pid
-is 0, then
-.BR migrate_pages ()
-moves pages of the calling process.
-.P
-Pages shared with another process will be moved only if the initiating
-process has the
-.B CAP_SYS_NICE
-privilege.
-.SH RETURN VALUE
-On success
-.BR migrate_pages ()
-returns the number of pages that could not be moved
-(i.e., a return of zero means that all pages were successfully moved).
-On error, it returns \-1, and sets
-.I errno
-to indicate the error.
-.SH ERRORS
-.TP
-.B EFAULT
-Part or all of the memory range specified by
-.IR old_nodes / new_nodes
-and
-.I maxnode
-points outside your accessible address space.
-.TP
-.B EINVAL
-The value specified by
-.I maxnode
-exceeds a kernel-imposed limit.
-.\" As at 3.5, this limit is "a page worth of bits", e.g.,
-.\" 8 * 4096 bits, assuming a 4kB page size.
-Or,
-.I old_nodes
-or
-.I new_nodes
-specifies one or more node IDs that are
-greater than the maximum supported node ID.
-Or, none of the node IDs specified by
-.I new_nodes
-are on-line and allowed by the process's current cpuset context,
-or none of the specified nodes contain memory.
-.TP
-.B EPERM
-Insufficient privilege
-.RB ( CAP_SYS_NICE )
-to move pages of the process specified by
-.IR pid ,
-or insufficient privilege
-.RB ( CAP_SYS_NICE )
-to access the specified target nodes.
-.TP
-.B ESRCH
-No process matching
-.I pid
-could be found.
-.\" FIXME Document the other errors that can occur for migrate_pages()
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 2.6.16.
-.SH NOTES
-For information on library support, see
-.BR numa (7).
-.P
-Use
-.BR get_mempolicy (2)
-with the
-.B MPOL_F_MEMS_ALLOWED
-flag to obtain the set of nodes that are allowed by
-the calling process's cpuset.
-Note that this information is subject to change at any
-time by manual or automatic reconfiguration of the cpuset.
-.P
-Use of
-.BR migrate_pages ()
-may result in pages whose location
-(node) violates the memory policy established for the
-specified addresses (see
-.BR mbind (2))
-and/or the specified process (see
-.BR set_mempolicy (2)).
-That is, memory policy does not constrain the destination
-nodes used by
-.BR migrate_pages ().
-.P
-The
-.I <numaif.h>
-header is not included with glibc, but requires installing
-.I libnuma\-devel
-or a similar package.
-.SH SEE ALSO
-.BR get_mempolicy (2),
-.BR mbind (2),
-.BR set_mempolicy (2),
-.BR numa (3),
-.BR numa_maps (5),
-.BR cpuset (7),
-.BR numa (7),
-.BR migratepages (8),
-.BR numastat (8)
-.P
-.I Documentation/vm/page_migration.rst
-in the Linux kernel source tree
diff --git a/man2/mincore.2 b/man2/mincore.2
deleted file mode 100644
index bc34562dc..000000000
--- a/man2/mincore.2
+++ /dev/null
@@ -1,158 +0,0 @@
-.\" Copyright (C) 2001 Bert Hubert <ahu@ds9a.nl>
-.\" and Copyright (C) 2007 Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Created Sun Jun 3 17:23:32 2001 by bert hubert <ahu@ds9a.nl>
-.\" Slightly adapted, following comments by Hugh Dickins, aeb, 2001-06-04.
-.\" Modified, 20 May 2003, Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Modified, 30 Apr 2004, Michael Kerrisk <mtk.manpages@gmail.com>
-.\" 2005-04-05 mtk, Fixed error descriptions
-.\" after message from <gordon.jin@intel.com>
-.\" 2007-01-08 mtk, rewrote various parts
-.\"
-.TH mincore 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-mincore \- determine whether pages are resident in memory
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/mman.h>
-.P
-.BI "int mincore(void " addr [. length "], size_t " length ", unsigned char *" vec );
-.fi
-.P
-.RS -4
-Feature Test Macro Requirements for glibc (see
-.BR feature_test_macros (7)):
-.RE
-.P
-.BR mincore ():
-.nf
- Since glibc 2.19:
- _DEFAULT_SOURCE
- glibc 2.19 and earlier:
- _BSD_SOURCE || _SVID_SOURCE
-.fi
-.SH DESCRIPTION
-.BR mincore ()
-returns a vector that indicates whether pages
-of the calling process's virtual memory are resident in core (RAM),
-and so will not cause a disk access (page fault) if referenced.
-The kernel returns residency information about the pages
-starting at the address
-.IR addr ,
-and continuing for
-.I length
-bytes.
-.P
-The
-.I addr
-argument must be a multiple of the system page size.
-The
-.I length
-argument need not be a multiple of the page size,
-but since residency information is returned for whole pages,
-.I length
-is effectively rounded up to the next multiple of the page size.
-One may obtain the page size
-.RB ( PAGE_SIZE )
-using
-.IR sysconf(_SC_PAGESIZE) .
-.P
-The
-.I vec
-argument must point to an array containing at least
-.I "(length+PAGE_SIZE\-1) / PAGE_SIZE"
-bytes.
-On return,
-the least significant bit of each byte will be set if
-the corresponding page is currently resident in memory,
-and be clear otherwise.
-(The settings of the other bits in each byte are undefined;
-these bits are reserved for possible later use.)
-Of course the information returned in
-.I vec
-is only a snapshot: pages that are not
-locked in memory can come and go at any moment, and the contents of
-.I vec
-may already be stale by the time this call returns.
-.SH RETURN VALUE
-On success,
-.BR mincore ()
-returns zero.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.B EAGAIN
-kernel is temporarily out of resources.
-.TP
-.B EFAULT
-.I vec
-points to an invalid address.
-.TP
-.B EINVAL
-.I addr
-is not a multiple of the page size.
-.TP
-.B ENOMEM
-.I length
-is greater than
-.RI ( TASK_SIZE " \- " addr ).
-(This could occur if a negative value is specified for
-.IR length ,
-since that value will be interpreted as a large
-unsigned integer.)
-In Linux 2.6.11 and earlier, the error
-.B EINVAL
-was returned for this condition.
-.TP
-.B ENOMEM
-.I addr
-to
-.I addr
-+
-.I length
-contained unmapped memory.
-.SH STANDARDS
-None.
-.SH HISTORY
-Linux 2.3.99pre1,
-glibc 2.2.
-.P
-First appeared in 4.4BSD.
-.P
-NetBSD, FreeBSD, OpenBSD, Solaris 8,
-AIX 5.1, SunOS 4.1.
-.SH BUGS
-Before Linux 2.6.21,
-.BR mincore ()
-did not return correct information for
-.B MAP_PRIVATE
-mappings, or for nonlinear mappings (established using
-.BR remap_file_pages (2)).
-.\" Linux (up to now, 2.6.5),
-.\" .B mincore
-.\" does not return correct information for MAP_PRIVATE mappings:
-.\" for a MAP_PRIVATE file mapping,
-.\" .B mincore
-.\" returns the residency of the file pages, rather than any
-.\" modified process-private pages that have been copied on write;
-.\" for a MAP_PRIVATE mapping of
-.\" .IR /dev/zero ,
-.\" .B mincore
-.\" always reports pages as nonresident;
-.\" and for a MAP_PRIVATE, MAP_ANONYMOUS mapping,
-.\" .B mincore
-.\" always fails with the error
-.\" .BR ENOMEM .
-.SH SEE ALSO
-.BR fincore (1),
-.BR madvise (2),
-.BR mlock (2),
-.BR mmap (2),
-.BR posix_fadvise (2),
-.BR posix_madvise (3)
diff --git a/man2/mkdir.2 b/man2/mkdir.2
deleted file mode 100644
index 1d9cddbe6..000000000
--- a/man2/mkdir.2
+++ /dev/null
@@ -1,250 +0,0 @@
-.\" This manpage is Copyright (C) 1992 Drew Eckhardt;
-.\" and Copyright (C) 1993 Michael Haardt
-.\" and Copyright (C) 1993,1994 Ian Jackson
-.\" and Copyright (C) 2006, 2014 Michael Kerrisk
-.\"
-.\" SPDX-License-Identifier: GPL-1.0-or-later
-.\"
-.TH mkdir 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-mkdir, mkdirat \- create a directory
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/stat.h>
-.\" .B #include <unistd.h>
-.P
-.BI "int mkdir(const char *" pathname ", mode_t " mode );
-.P
-.BR "#include <fcntl.h> " "/* Definition of AT_* constants */"
-.B #include <sys/stat.h>
-.P
-.BI "int mkdirat(int " dirfd ", const char *" pathname ", mode_t " mode );
-.fi
-.P
-.RS -4
-Feature Test Macro Requirements for glibc (see
-.BR feature_test_macros (7)):
-.RE
-.P
-.BR mkdirat ():
-.nf
- Since glibc 2.10:
- _POSIX_C_SOURCE >= 200809L
- Before glibc 2.10:
- _ATFILE_SOURCE
-.fi
-.SH DESCRIPTION
-.BR mkdir ()
-attempts to create a directory named
-.IR pathname .
-.P
-The argument
-.I mode
-specifies the mode for the new directory (see
-.BR inode (7)).
-It is modified by the process's
-.I umask
-in the usual way: in the absence of a default ACL, the mode of the
-created directory is
-.RI ( mode " & \[ti]" umask " & 0777)."
-Whether other
-.I mode
-bits are honored for the created directory depends on the operating system.
-For Linux, see NOTES below.
-.P
-The newly created directory will be owned by the effective user ID of the
-process.
-If the directory containing the file has the set-group-ID
-bit set, or if the filesystem is mounted with BSD group semantics
-.RI ( "mount \-o bsdgroups"
-or, synonymously
-.IR "mount \-o grpid" ),
-the new directory will inherit the group ownership from its parent;
-otherwise it will be owned by the effective group ID of the process.
-.P
-If the parent directory has the set-group-ID bit set, then so will the
-newly created directory.
-.\"
-.\"
-.SS mkdirat()
-The
-.BR mkdirat ()
-system call operates in exactly the same way as
-.BR mkdir (),
-except for the differences described here.
-.P
-If the pathname given in
-.I pathname
-is relative, then it is interpreted relative to the directory
-referred to by the file descriptor
-.I dirfd
-(rather than relative to the current working directory of
-the calling process, as is done by
-.BR mkdir ()
-for a relative pathname).
-.P
-If
-.I pathname
-is relative and
-.I dirfd
-is the special value
-.BR AT_FDCWD ,
-then
-.I pathname
-is interpreted relative to the current working
-directory of the calling process (like
-.BR mkdir ()).
-.P
-If
-.I pathname
-is absolute, then
-.I dirfd
-is ignored.
-.P
-See
-.BR openat (2)
-for an explanation of the need for
-.BR mkdirat ().
-.SH RETURN VALUE
-.BR mkdir ()
-and
-.BR mkdirat ()
-return zero on success.
-On error, \-1 is returned and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EACCES
-The parent directory does not allow write permission to the process,
-or one of the directories in
-.I pathname
-did not allow search permission.
-(See also
-.BR path_resolution (7).)
-.TP
-.B EBADF
-.RB ( mkdirat ())
-.I pathname
-is relative but
-.I dirfd
-is neither
-.B AT_FDCWD
-nor a valid file descriptor.
-.TP
-.B EDQUOT
-The user's quota of disk blocks or inodes on the filesystem has been
-exhausted.
-.TP
-.B EEXIST
-.I pathname
-already exists (not necessarily as a directory).
-This includes the case where
-.I pathname
-is a symbolic link, dangling or not.
-.TP
-.B EFAULT
-.IR pathname " points outside your accessible address space."
-.TP
-.B EINVAL
-The final component ("basename") of the new directory's
-.I pathname
-is invalid
-(e.g., it contains characters not permitted by the underlying filesystem).
-.TP
-.B ELOOP
-Too many symbolic links were encountered in resolving
-.IR pathname .
-.TP
-.B EMLINK
-The number of links to the parent directory would exceed
-.BR LINK_MAX .
-.TP
-.B ENAMETOOLONG
-.IR pathname " was too long."
-.TP
-.B ENOENT
-A directory component in
-.I pathname
-does not exist or is a dangling symbolic link.
-.TP
-.B ENOMEM
-Insufficient kernel memory was available.
-.TP
-.B ENOSPC
-The device containing
-.I pathname
-has no room for the new directory.
-.TP
-.B ENOSPC
-The new directory cannot be created because the user's disk quota is
-exhausted.
-.TP
-.B ENOTDIR
-A component used as a directory in
-.I pathname
-is not, in fact, a directory.
-.TP
-.B ENOTDIR
-.RB ( mkdirat ())
-.I pathname
-is relative and
-.I dirfd
-is a file descriptor referring to a file other than a directory.
-.TP
-.B EPERM
-The filesystem containing
-.I pathname
-does not support the creation of directories.
-.TP
-.B EROFS
-.I pathname
-refers to a file on a read-only filesystem.
-.SH VERSIONS
-Under Linux, apart from the permission bits, the
-.B S_ISVTX
-.I mode
-bit is also honored.
-.SS glibc notes
-On older kernels where
-.BR mkdirat ()
-is unavailable, the glibc wrapper function falls back to the use of
-.BR mkdir ().
-When
-.I pathname
-is a relative pathname,
-glibc constructs a pathname based on the symbolic link in
-.I /proc/self/fd
-that corresponds to the
-.I dirfd
-argument.
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-.TP
-.BR mkdir ()
-SVr4, BSD, POSIX.1-2001.
-.\" SVr4 documents additional EIO, EMULTIHOP
-.TP
-.BR mkdirat ()
-Linux 2.6.16,
-glibc 2.4.
-.SH NOTES
-There are many infelicities in the protocol underlying NFS.
-Some of these affect
-.BR mkdir ().
-.SH SEE ALSO
-.BR mkdir (1),
-.BR chmod (2),
-.BR chown (2),
-.BR mknod (2),
-.BR mount (2),
-.BR rmdir (2),
-.BR stat (2),
-.BR umask (2),
-.BR unlink (2),
-.BR acl (5),
-.BR path_resolution (7)
diff --git a/man2/mkdirat.2 b/man2/mkdirat.2
deleted file mode 100644
index 467b98a8b..000000000
--- a/man2/mkdirat.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/mkdir.2
diff --git a/man2/mknod.2 b/man2/mknod.2
deleted file mode 100644
index 86b3b8c4f..000000000
--- a/man2/mknod.2
+++ /dev/null
@@ -1,302 +0,0 @@
-.\" This manpage is Copyright (C) 1992 Drew Eckhardt;
-.\" and Copyright (C) 1993 Michael Haardt
-.\" and Copyright (C) 1993,1994 Ian Jackson
-.\" and Copyright (C) 2006, 2014, Michael Kerrisk
-.\"
-.\" SPDX-License-Identifier: GPL-1.0-or-later
-.\"
-.\" Modified 1996-08-18 by urs
-.\" Modified 2003-04-23 by Michael Kerrisk
-.\" Modified 2004-06-23 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.TH mknod 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-mknod, mknodat \- create a special or ordinary file
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/stat.h>
-.P
-.BI "int mknod(const char *" pathname ", mode_t " mode ", dev_t " dev );
-.P
-.BR "#include <fcntl.h> " "/* Definition of AT_* constants */"
-.B #include <sys/stat.h>
-.P
-.BI "int mknodat(int " dirfd ", const char *" pathname ", mode_t " mode \
-", dev_t " dev );
-.fi
-.P
-.RS -4
-Feature Test Macro Requirements for glibc (see
-.BR feature_test_macros (7)):
-.RE
-.P
-.BR mknod ():
-.nf
- _XOPEN_SOURCE >= 500
-.\" || _XOPEN_SOURCE && _XOPEN_SOURCE_EXTENDED
- || /* Since glibc 2.19: */ _DEFAULT_SOURCE
- || /* glibc <= 2.19: */ _BSD_SOURCE || _SVID_SOURCE
-.fi
-.SH DESCRIPTION
-The system call
-.BR mknod ()
-creates a filesystem node (file, device special file, or
-named pipe) named
-.IR pathname ,
-with attributes specified by
-.I mode
-and
-.IR dev .
-.P
-The
-.I mode
-argument specifies both the file mode to use and the type of node
-to be created.
-It should be a combination (using bitwise OR) of one of the file types
-listed below and zero or more of the file mode bits listed in
-.BR inode (7).
-.P
-The file mode is modified by the process's
-.I umask
-in the usual way: in the absence of a default ACL, the permissions of the
-created node are
-.RI ( mode " & \[ti]" umask ).
-.P
-The file type must be one of
-.BR S_IFREG ,
-.BR S_IFCHR ,
-.BR S_IFBLK ,
-.BR S_IFIFO ,
-or
-.B S_IFSOCK
-.\" (S_IFSOCK since Linux 1.2.4)
-to specify a regular file (which will be created empty), character
-special file, block special file, FIFO (named pipe), or UNIX domain socket,
-respectively.
-(Zero file type is equivalent to type
-.BR S_IFREG .)
-.P
-If the file type is
-.B S_IFCHR
-or
-.BR S_IFBLK ,
-then
-.I dev
-specifies the major and minor numbers of the newly created device
-special file
-.RB ( makedev (3)
-may be useful to build the value for
-.IR dev );
-otherwise it is ignored.
-.P
-If
-.I pathname
-already exists, or is a symbolic link, this call fails with an
-.B EEXIST
-error.
-.P
-The newly created node will be owned by the effective user ID of the
-process.
-If the directory containing the node has the set-group-ID
-bit set, or if the filesystem is mounted with BSD group semantics, the
-new node will inherit the group ownership from its parent directory;
-otherwise it will be owned by the effective group ID of the process.
-.\"
-.\"
-.SS mknodat()
-The
-.BR mknodat ()
-system call operates in exactly the same way as
-.BR mknod (),
-except for the differences described here.
-.P
-If the pathname given in
-.I pathname
-is relative, then it is interpreted relative to the directory
-referred to by the file descriptor
-.I dirfd
-(rather than relative to the current working directory of
-the calling process, as is done by
-.BR mknod ()
-for a relative pathname).
-.P
-If
-.I pathname
-is relative and
-.I dirfd
-is the special value
-.BR AT_FDCWD ,
-then
-.I pathname
-is interpreted relative to the current working
-directory of the calling process (like
-.BR mknod ()).
-.P
-If
-.I pathname
-is absolute, then
-.I dirfd
-is ignored.
-.P
-See
-.BR openat (2)
-for an explanation of the need for
-.BR mknodat ().
-.SH RETURN VALUE
-.BR mknod ()
-and
-.BR mknodat ()
-return zero on success.
-On error, \-1 is returned and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EACCES
-The parent directory does not allow write permission to the process,
-or one of the directories in the path prefix of
-.I pathname
-did not allow search permission.
-(See also
-.BR path_resolution (7).)
-.TP
-.B EBADF
-.RB ( mknodat ())
-.I pathname
-is relative but
-.I dirfd
-is neither
-.B AT_FDCWD
-nor a valid file descriptor.
-.TP
-.B EDQUOT
-The user's quota of disk blocks or inodes on the filesystem has been
-exhausted.
-.TP
-.B EEXIST
-.I pathname
-already exists.
-This includes the case where
-.I pathname
-is a symbolic link, dangling or not.
-.TP
-.B EFAULT
-.IR pathname " points outside your accessible address space."
-.TP
-.B EINVAL
-.I mode
-requested creation of something other than a regular file, device
-special file, FIFO or socket.
-.TP
-.B ELOOP
-Too many symbolic links were encountered in resolving
-.IR pathname .
-.TP
-.B ENAMETOOLONG
-.IR pathname " was too long."
-.TP
-.B ENOENT
-A directory component in
-.I pathname
-does not exist or is a dangling symbolic link.
-.TP
-.B ENOMEM
-Insufficient kernel memory was available.
-.TP
-.B ENOSPC
-The device containing
-.I pathname
-has no room for the new node.
-.TP
-.B ENOTDIR
-A component used as a directory in
-.I pathname
-is not, in fact, a directory.
-.TP
-.B ENOTDIR
-.RB ( mknodat ())
-.I pathname
-is relative and
-.I dirfd
-is a file descriptor referring to a file other than a directory.
-.TP
-.B EPERM
-.I mode
-requested creation of something other than a regular file,
-FIFO (named pipe), or UNIX domain socket, and the caller
-is not privileged (Linux: does not have the
-.B CAP_MKNOD
-capability);
-.\" For UNIX domain sockets and regular files, EPERM is returned only in
-.\" Linux 2.2 and earlier; in Linux 2.4 and later, unprivileged can
-.\" use mknod() to make these files.
-also returned if the filesystem containing
-.I pathname
-does not support the type of node requested.
-.TP
-.B EROFS
-.I pathname
-refers to a file on a read-only filesystem.
-.SH VERSIONS
-POSIX.1-2001 says: "The only portable use of
-.BR mknod ()
-is to create a FIFO-special file.
-If
-.I mode
-is not
-.B S_IFIFO
-or
-.I dev
-is not 0, the behavior of
-.BR mknod ()
-is unspecified."
-However, nowadays one should never use
-.BR mknod ()
-for this purpose; one should use
-.BR mkfifo (3),
-a function especially defined for this purpose.
-.P
-Under Linux,
-.BR mknod ()
-cannot be used to create directories.
-One should make directories with
-.BR mkdir (2).
-.\" and one should make UNIX domain sockets with socket(2) and bind(2).
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-.TP
-.BR mknod ()
-SVr4, 4.4BSD, POSIX.1-2001 (but see VERSIONS).
-.\" The Linux version differs from the SVr4 version in that it
-.\" does not require root permission to create pipes, also in that no
-.\" EMULTIHOP, ENOLINK, or EINTR error is documented.
-.TP
-.BR mknodat ()
-Linux 2.6.16,
-glibc 2.4.
-POSIX.1-2008.
-.SH NOTES
-There are many infelicities in the protocol underlying NFS.
-Some of these affect
-.BR mknod ()
-and
-.BR mknodat ().
-.SH SEE ALSO
-.BR mknod (1),
-.BR chmod (2),
-.BR chown (2),
-.BR fcntl (2),
-.BR mkdir (2),
-.BR mount (2),
-.BR socket (2),
-.BR stat (2),
-.BR umask (2),
-.BR unlink (2),
-.BR makedev (3),
-.BR mkfifo (3),
-.BR acl (5),
-.BR path_resolution (7)
diff --git a/man2/mknodat.2 b/man2/mknodat.2
deleted file mode 100644
index 3db228243..000000000
--- a/man2/mknodat.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/mknod.2
diff --git a/man2/mlock.2 b/man2/mlock.2
deleted file mode 100644
index 30f6ac130..000000000
--- a/man2/mlock.2
+++ /dev/null
@@ -1,507 +0,0 @@
-.\" Copyright (C) Michael Kerrisk, 2004
-.\" using some material drawn from earlier man pages
-.\" written by Thomas Kuhn, Copyright 1996
-.\"
-.\" SPDX-License-Identifier: GPL-2.0-or-later
-.\"
-.TH mlock 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-mlock, mlock2, munlock, mlockall, munlockall \- lock and unlock memory
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/mman.h>
-.P
-.BI "int mlock(const void " addr [. len "], size_t " len );
-.BI "int mlock2(const void " addr [. len "], size_t " len ", \
-unsigned int " flags );
-.BI "int munlock(const void " addr [. len "], size_t " len );
-.P
-.BI "int mlockall(int " flags );
-.B int munlockall(void);
-.fi
-.SH DESCRIPTION
-.BR mlock (),
-.BR mlock2 (),
-and
-.BR mlockall ()
-lock part or all of the calling process's virtual address
-space into RAM, preventing that memory from being paged to the
-swap area.
-.P
-.BR munlock ()
-and
-.BR munlockall ()
-perform the converse operation,
-unlocking part or all of the calling process's virtual address space,
-so that pages in the specified virtual address range
-can be swapped out again if required by the kernel memory manager.
-.P
-Memory locking and unlocking are performed in units of whole pages.
-.SS mlock(), mlock2(), and munlock()
-.BR mlock ()
-locks pages in the address range starting at
-.I addr
-and continuing for
-.I len
-bytes.
-All pages that contain a part of the specified address range are
-guaranteed to be resident in RAM when the call returns successfully;
-the pages are guaranteed to stay in RAM until later unlocked.
-.P
-.BR mlock2 ()
-.\" commit a8ca5d0ecbdde5cc3d7accacbd69968b0c98764e
-.\" commit de60f5f10c58d4f34b68622442c0e04180367f3f
-.\" commit b0f205c2a3082dd9081f9a94e50658c5fa906ff1
-also locks pages in the specified range starting at
-.I addr
-and continuing for
-.I len
-bytes.
-However, the state of the pages contained in that range after the call
-returns successfully will depend on the value in the
-.I flags
-argument.
-.P
-The
-.I flags
-argument can be either 0 or the following constant:
-.TP
-.B MLOCK_ONFAULT
-Lock pages that are currently resident and mark the entire range so
-that the remaining nonresident pages are locked when they are populated
-by a page fault.
-.P
-If
-.I flags
-is 0,
-.BR mlock2 ()
-behaves exactly the same as
-.BR mlock ().
-.P
-.BR munlock ()
-unlocks pages in the address range starting at
-.I addr
-and continuing for
-.I len
-bytes.
-After this call, all pages that contain a part of the specified
-memory range can be moved to external swap space again by the kernel.
-.SS mlockall() and munlockall()
-.BR mlockall ()
-locks all pages mapped into the address space of the
-calling process.
-This includes the pages of the code, data, and stack
-segment, as well as shared libraries, user space kernel data, shared
-memory, and memory-mapped files.
-All mapped pages are guaranteed
-to be resident in RAM when the call returns successfully;
-the pages are guaranteed to stay in RAM until later unlocked.
-.P
-The
-.I flags
-argument is constructed as the bitwise OR of one or more of the
-following constants:
-.TP
-.B MCL_CURRENT
-Lock all pages which are currently mapped into the address space of
-the process.
-.TP
-.B MCL_FUTURE
-Lock all pages which will become mapped into the address space of the
-process in the future.
-These could be, for instance, new pages required
-by a growing heap and stack as well as new memory-mapped files or
-shared memory regions.
-.TP
-.BR MCL_ONFAULT " (since Linux 4.4)"
-Used together with
-.BR MCL_CURRENT ,
-.BR MCL_FUTURE ,
-or both.
-Mark all current (with
-.BR MCL_CURRENT )
-or future (with
-.BR MCL_FUTURE )
-mappings to lock pages when they are faulted in.
-When used with
-.BR MCL_CURRENT ,
-all present pages are locked, but
-.BR mlockall ()
-will not fault in non-present pages.
-When used with
-.BR MCL_FUTURE ,
-all future mappings will be marked to lock pages when they are faulted
-in, but they will not be populated by the lock when the mapping is
-created.
-.B MCL_ONFAULT
-must be used with either
-.B MCL_CURRENT
-or
-.B MCL_FUTURE
-or both.
-.P
-If
-.B MCL_FUTURE
-has been specified, then a later system call (e.g.,
-.BR mmap (2),
-.BR sbrk (2),
-.BR malloc (3)),
-may fail if it would cause the number of locked bytes to exceed
-the permitted maximum (see below).
-In the same circumstances, stack growth may likewise fail:
-the kernel will deny stack expansion and deliver a
-.B SIGSEGV
-signal to the process.
-.P
-.BR munlockall ()
-unlocks all pages mapped into the address space of the
-calling process.
-.SH RETURN VALUE
-On success, these system calls return 0.
-On error, \-1 is returned,
-.I errno
-is set to indicate the error,
-and no changes are made to any locks in the
-address space of the process.
-.SH ERRORS
-.\"SVr4 documents an additional EAGAIN error code.
-.TP
-.B EAGAIN
-.RB ( mlock (),
-.BR mlock2 (),
-and
-.BR munlock ())
-Some or all of the specified address range could not be locked.
-.TP
-.B EINVAL
-.RB ( mlock (),
-.BR mlock2 (),
-and
-.BR munlock ())
-The result of the addition
-.IR addr + len
-was less than
-.I addr
-(e.g., the addition may have resulted in an overflow).
-.TP
-.B EINVAL
-.RB ( mlock2 ())
-Unknown \fIflags\fP were specified.
-.TP
-.B EINVAL
-.RB ( mlockall ())
-Unknown \fIflags\fP were specified or
-.B MCL_ONFAULT
-was specified without either
-.B MCL_FUTURE
-or
-.BR MCL_CURRENT .
-.TP
-.B EINVAL
-(Not on Linux)
-.I addr
-was not a multiple of the page size.
-.TP
-.B ENOMEM
-.RB ( mlock (),
-.BR mlock2 (),
-and
-.BR munlock ())
-Some of the specified address range does not correspond to mapped
-pages in the address space of the process.
-.TP
-.B ENOMEM
-.RB ( mlock (),
-.BR mlock2 (),
-and
-.BR munlock ())
-Locking or unlocking a region would result in the total number of
-mappings with distinct attributes (e.g., locked versus unlocked)
-exceeding the allowed maximum.
-.\" I.e., the number of VMAs would exceed the 64kB maximum
-(For example, unlocking a range in the middle of a currently locked
-mapping would result in three mappings:
-two locked mappings at each end and an unlocked mapping in the middle.)
-.TP
-.B ENOMEM
-(Linux 2.6.9 and later) the caller had a nonzero
-.B RLIMIT_MEMLOCK
-soft resource limit, but tried to lock more memory than the limit
-permitted.
-This limit is not enforced if the process is privileged
-.RB ( CAP_IPC_LOCK ).
-.TP
-.B ENOMEM
-(Linux 2.4 and earlier) the calling process tried to lock more than
-half of RAM.
-.\" In the case of mlock(), this check is somewhat buggy: it doesn't
-.\" take into account whether the to-be-locked range overlaps with
-.\" already locked pages. Thus, suppose we allocate
-.\" (num_physpages / 4 + 1) of memory, and lock those pages once using
-.\" mlock(), and then lock the *same* page range a second time.
-.\" In the case, the second mlock() call will fail, since the check
-.\" calculates that the process is trying to lock (num_physpages / 2 + 2)
-.\" pages, which of course is not true. (MTK, Nov 04, kernel 2.4.28)
-.TP
-.B EPERM
-The caller is not privileged, but needs privilege
-.RB ( CAP_IPC_LOCK )
-to perform the requested operation.
-.TP
-.B EPERM
-.RB ( munlockall ())
-(Linux 2.6.8 and earlier) The caller was not privileged
-.RB ( CAP_IPC_LOCK ).
-.SH VERSIONS
-.SS Linux
-Under Linux,
-.BR mlock (),
-.BR mlock2 (),
-and
-.BR munlock ()
-automatically round
-.I addr
-down to the nearest page boundary.
-However, the POSIX.1 specification of
-.BR mlock ()
-and
-.BR munlock ()
-allows an implementation to require that
-.I addr
-is page aligned, so portable applications should ensure this.
-.P
-The
-.I VmLck
-field of the Linux-specific
-.IR /proc/ pid /status
-file shows how many kilobytes of memory the process with ID
-.I PID
-has locked using
-.BR mlock (),
-.BR mlock2 (),
-.BR mlockall (),
-and
-.BR mmap (2)
-.BR MAP_LOCKED .
-.SH STANDARDS
-.TP
-.BR mlock ()
-.TQ
-.BR munlock ()
-.TQ
-.BR mlockall ()
-.TQ
-.BR munlockall ()
-POSIX.1-2008.
-.TP
-.BR mlock2 ()
-Linux.
-.P
-On POSIX systems on which
-.BR mlock ()
-and
-.BR munlock ()
-are available,
-.B _POSIX_MEMLOCK_RANGE
-is defined in \fI<unistd.h>\fP and the number of bytes in a page
-can be determined from the constant
-.B PAGESIZE
-(if defined) in \fI<limits.h>\fP or by calling
-.IR sysconf(_SC_PAGESIZE) .
-.P
-On POSIX systems on which
-.BR mlockall ()
-and
-.BR munlockall ()
-are available,
-.B _POSIX_MEMLOCK
-is defined in \fI<unistd.h>\fP to a value greater than 0.
-(See also
-.BR sysconf (3).)
-.\" POSIX.1-2001: It shall be defined to -1 or 0 or 200112L.
-.\" -1: unavailable, 0: ask using sysconf().
-.\" glibc defines it to 1.
-.SH HISTORY
-.TP
-.BR mlock ()
-.TQ
-.BR munlock ()
-.TQ
-.BR mlockall ()
-.TQ
-.BR munlockall ()
-POSIX.1-2001, POSIX.1-2008, SVr4.
-.TP
-.BR mlock2 ()
-Linux 4.4,
-glibc 2.27.
-.SH NOTES
-Memory locking has two main applications: real-time algorithms and
-high-security data processing.
-Real-time applications require
-deterministic timing, and, like scheduling, paging is one major cause
-of unexpected program execution delays.
-Real-time applications will
-usually also switch to a real-time scheduler with
-.BR sched_setscheduler (2).
-Cryptographic security software often handles critical bytes like
-passwords or secret keys as data structures.
-As a result of paging,
-these secrets could be transferred onto a persistent swap store medium,
-where they might be accessible to the enemy long after the security
-software has erased the secrets in RAM and terminated.
-(But be aware that the suspend mode on laptops and some desktop
-computers will save a copy of the system's RAM to disk, regardless
-of memory locks.)
-.P
-Real-time processes that are using
-.BR mlockall ()
-to prevent delays on page faults should reserve enough
-locked stack pages before entering the time-critical section,
-so that no page fault can be caused by function calls.
-This can be achieved by calling a function that allocates a
-sufficiently large automatic variable (an array) and writes to the
-memory occupied by this array in order to touch these stack pages.
-This way, enough pages will be mapped for the stack and can be
-locked into RAM.
-The dummy writes ensure that not even copy-on-write
-page faults can occur in the critical section.
-.P
-Memory locks are not inherited by a child created via
-.BR fork (2)
-and are automatically removed (unlocked) during an
-.BR execve (2)
-or when the process terminates.
-The
-.BR mlockall ()
-.B MCL_FUTURE
-and
-.B MCL_FUTURE | MCL_ONFAULT
-settings are not inherited by a child created via
-.BR fork (2)
-and are cleared during an
-.BR execve (2).
-.P
-Note that
-.BR fork (2)
-will prepare the address space for a copy-on-write operation.
-The consequence is that any write access that follows will cause
-a page fault that in turn may cause high latencies for a real-time process.
-Therefore, it is crucial not to invoke
-.BR fork (2)
-after an
-.BR mlockall ()
-or
-.BR mlock ()
-operation\[em]not even from a thread which runs at a low priority within
-a process which also has a thread running at elevated priority.
-.P
-The memory lock on an address range is automatically removed
-if the address range is unmapped via
-.BR munmap (2).
-.P
-Memory locks do not stack, that is, pages which have been locked several times
-by calls to
-.BR mlock (),
-.BR mlock2 (),
-or
-.BR mlockall ()
-will be unlocked by a single call to
-.BR munlock ()
-for the corresponding range or by
-.BR munlockall ().
-Pages which are mapped to several locations or by several processes stay
-locked into RAM as long as they are locked at least at one location or by
-at least one process.
-.P
-If a call to
-.BR mlockall ()
-which uses the
-.B MCL_FUTURE
-flag is followed by another call that does not specify this flag, the
-changes made by the
-.B MCL_FUTURE
-call will be lost.
-.P
-The
-.BR mlock2 ()
-.B MLOCK_ONFAULT
-flag and the
-.BR mlockall ()
-.B MCL_ONFAULT
-flag allow efficient memory locking for applications that deal with
-large mappings where only a (small) portion of pages in the mapping are touched.
-In such cases, locking all of the pages in a mapping would incur
-a significant penalty for memory locking.
-.SS Limits and permissions
-In Linux 2.6.8 and earlier,
-a process must be privileged
-.RB ( CAP_IPC_LOCK )
-in order to lock memory and the
-.B RLIMIT_MEMLOCK
-soft resource limit defines a limit on how much memory the process may lock.
-.P
-Since Linux 2.6.9, no limits are placed on the amount of memory
-that a privileged process can lock and the
-.B RLIMIT_MEMLOCK
-soft resource limit instead defines a limit on how much memory an
-unprivileged process may lock.
-.SH BUGS
-In Linux 4.8 and earlier,
-a bug in the kernel's accounting of locked memory for unprivileged processes
-(i.e., without
-.BR CAP_IPC_LOCK )
-meant that if the region specified by
-.I addr
-and
-.I len
-overlapped an existing lock,
-then the already locked bytes in the overlapping region were counted twice
-when checking against the limit.
-Such double accounting could incorrectly calculate a "total locked memory"
-value for the process that exceeded the
-.B RLIMIT_MEMLOCK
-limit, with the result that
-.BR mlock ()
-and
-.BR mlock2 ()
-would fail on requests that should have succeeded.
-This bug was fixed
-.\" commit 0cf2f6f6dc605e587d2c1120f295934c77e810e8
-in Linux 4.9.
-.P
-In Linux 2.4 series of kernels up to and including Linux 2.4.17,
-a bug caused the
-.BR mlockall ()
-.B MCL_FUTURE
-flag to be inherited across a
-.BR fork (2).
-This was rectified in Linux 2.4.18.
-.P
-Since Linux 2.6.9, if a privileged process calls
-.I mlockall(MCL_FUTURE)
-and later drops privileges (loses the
-.B CAP_IPC_LOCK
-capability by, for example,
-setting its effective UID to a nonzero value),
-then subsequent memory allocations (e.g.,
-.BR mmap (2),
-.BR brk (2))
-will fail if the
-.B RLIMIT_MEMLOCK
-resource limit is encountered.
-.\" See the following LKML thread:
-.\" http://marc.theaimsgroup.com/?l=linux-kernel&m=113801392825023&w=2
-.\" "Rationale for RLIMIT_MEMLOCK"
-.\" 23 Jan 2006
-.SH SEE ALSO
-.BR mincore (2),
-.BR mmap (2),
-.BR setrlimit (2),
-.BR shmctl (2),
-.BR sysconf (3),
-.BR proc (5),
-.BR capabilities (7)
diff --git a/man2/mlock2.2 b/man2/mlock2.2
deleted file mode 100644
index 5e5b3c741..000000000
--- a/man2/mlock2.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/mlock.2
diff --git a/man2/mlockall.2 b/man2/mlockall.2
deleted file mode 100644
index 5e5b3c741..000000000
--- a/man2/mlockall.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/mlock.2
diff --git a/man2/mmap.2 b/man2/mmap.2
deleted file mode 100644
index 63df5a98a..000000000
--- a/man2/mmap.2
+++ /dev/null
@@ -1,1037 +0,0 @@
-'\" t
-.\" Copyright (C) 1996 Andries Brouwer <aeb@cwi.nl>
-.\" and Copyright (C) 2006, 2007 Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Modified 1997-01-31 by Eric S. Raymond <esr@thyrsus.com>
-.\" Modified 2000-03-25 by Jim Van Zandt <jrv@vanzandt.mv.com>
-.\" Modified 2001-10-04 by John Levon <moz@compsoc.man.ac.uk>
-.\" Modified 2003-02-02 by Andi Kleen <ak@muc.de>
-.\" Modified 2003-05-21 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\" MAP_LOCKED works from Linux 2.5.37
-.\" Modified 2004-06-17 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Modified 2004-09-11 by aeb
-.\" Modified 2004-12-08, from Eric Estievenart <eric.estievenart@free.fr>
-.\" Modified 2004-12-08, mtk, formatting tidy-ups
-.\" Modified 2006-12-04, mtk, various parts rewritten
-.\" 2007-07-10, mtk, Added an example program.
-.\" 2008-11-18, mtk, document MAP_STACK
-.\"
-.TH mmap 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-mmap, munmap \- map or unmap files or devices into memory
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/mman.h>
-.P
-.BI "void *mmap(void " addr [. length "], size_t " length \
-", int " prot ", int " flags ,
-.BI " int " fd ", off_t " offset );
-.BI "int munmap(void " addr [. length "], size_t " length );
-.fi
-.P
-See NOTES for information on feature test macro requirements.
-.SH DESCRIPTION
-.BR mmap ()
-creates a new mapping in the virtual address space of
-the calling process.
-The starting address for the new mapping is specified in
-.IR addr .
-The
-.I length
-argument specifies the length of the mapping (which must be greater than 0).
-.P
-If
-.I addr
-is NULL,
-then the kernel chooses the (page-aligned) address
-at which to create the mapping;
-this is the most portable method of creating a new mapping.
-If
-.I addr
-is not NULL,
-then the kernel takes it as a hint about where to place the mapping;
-on Linux, the kernel will pick a nearby page boundary (but always above
-or equal to the value specified by
-.IR /proc/sys/vm/mmap_min_addr )
-and attempt to create the mapping there.
-If another mapping already exists there, the kernel picks a new address that
-may or may not depend on the hint.
-.\" Before Linux 2.6.24, the address was rounded up to the next page
-.\" boundary; since Linux 2.6.24, it is rounded down!
-The address of the new mapping is returned as the result of the call.
-.P
-The contents of a file mapping (as opposed to an anonymous mapping; see
-.B MAP_ANONYMOUS
-below), are initialized using
-.I length
-bytes starting at offset
-.I offset
-in the file (or other object) referred to by the file descriptor
-.IR fd .
-.I offset
-must be a multiple of the page size as returned by
-.IR sysconf(_SC_PAGE_SIZE) .
-.P
-After the
-.BR mmap ()
-call has returned, the file descriptor,
-.IR fd ,
-can be closed immediately without invalidating the mapping.
-.P
-The
-.I prot
-argument describes the desired memory protection of the mapping
-(and must not conflict with the open mode of the file).
-It is either
-.B PROT_NONE
-or the bitwise OR of one or more of the following flags:
-.TP 1.1i
-.B PROT_EXEC
-Pages may be executed.
-.TP
-.B PROT_READ
-Pages may be read.
-.TP
-.B PROT_WRITE
-Pages may be written.
-.TP
-.B PROT_NONE
-Pages may not be accessed.
-.\"
-.SS The flags argument
-The
-.I flags
-argument determines whether updates to the mapping
-are visible to other processes mapping the same region,
-and whether updates are carried through to the underlying file.
-This behavior is determined by including exactly one
-of the following values in
-.IR flags :
-.TP
-.B MAP_SHARED
-Share this mapping.
-Updates to the mapping are visible to other processes mapping the same region,
-and (in the case of file-backed mappings)
-are carried through to the underlying file.
-(To precisely control when updates are carried through
-to the underlying file requires the use of
-.BR msync (2).)
-.TP
-.BR MAP_SHARED_VALIDATE " (since Linux 4.15)"
-This flag provides the same behavior as
-.B MAP_SHARED
-except that
-.B MAP_SHARED
-mappings ignore unknown flags in
-.IR flags .
-By contrast, when creating a mapping using
-.BR MAP_SHARED_VALIDATE ,
-the kernel verifies all passed flags are known and fails the
-mapping with the error
-.B EOPNOTSUPP
-for unknown flags.
-This mapping type is also required to be able to use some mapping flags
-(e.g.,
-.BR MAP_SYNC ).
-.TP
-.B MAP_PRIVATE
-Create a private copy-on-write mapping.
-Updates to the mapping are not visible to other processes
-mapping the same file, and are not carried through to
-the underlying file.
-It is unspecified whether changes made to the file after the
-.BR mmap ()
-call are visible in the mapped region.
-.P
-Both
-.B MAP_SHARED
-and
-.B MAP_PRIVATE
-are described in POSIX.1-2001 and POSIX.1-2008.
-.B MAP_SHARED_VALIDATE
-is a Linux extension.
-.P
-In addition, zero or more of the following values can be ORed in
-.IR flags :
-.TP
-.BR MAP_32BIT " (since Linux 2.4.20, 2.6)"
-Put the mapping into the first 2 Gigabytes of the process address space.
-This flag is supported only on x86-64, for 64-bit programs.
-It was added to allow thread stacks to be allocated somewhere
-in the first 2\ GB of memory,
-so as to improve context-switch performance on some early
-64-bit processors.
-.\" See http://lwn.net/Articles/294642 "Tangled up in threads", 19 Aug 08
-Modern x86-64 processors no longer have this performance problem,
-so use of this flag is not required on those systems.
-The
-.B MAP_32BIT
-flag is ignored when
-.B MAP_FIXED
-is set.
-.TP
-.B MAP_ANON
-Synonym for
-.BR MAP_ANONYMOUS ;
-provided for compatibility with other implementations.
-.TP
-.B MAP_ANONYMOUS
-The mapping is not backed by any file;
-its contents are initialized to zero.
-The
-.I fd
-argument is ignored;
-however, some implementations require
-.I fd
-to be \-1 if
-.B MAP_ANONYMOUS
-(or
-.BR MAP_ANON )
-is specified,
-and portable applications should ensure this.
-The
-.I offset
-argument should be zero.
-.\" See the pgoff overflow check in do_mmap().
-.\" See the offset check in sys_mmap in arch/x86/kernel/sys_x86_64.c.
-Support for
-.B MAP_ANONYMOUS
-in conjunction with
-.B MAP_SHARED
-was added in Linux 2.4.
-.TP
-.B MAP_DENYWRITE
-This flag is ignored.
-.\" Introduced in 1.1.36, removed in 1.3.24.
-(Long ago\[em]Linux 2.0 and earlier\[em]it signaled
-that attempts to write to the underlying file should fail with
-.BR ETXTBSY .
-But this was a source of denial-of-service attacks.)
-.TP
-.B MAP_EXECUTABLE
-This flag is ignored.
-.\" Introduced in 1.1.38, removed in 1.3.24. Flag tested in proc_follow_link.
-.\" (Long ago, it signaled that the underlying file is an executable.
-.\" However, that information was not really used anywhere.)
-.\" Linus talked about DOS related to MAP_EXECUTABLE, but he was thinking of
-.\" MAP_DENYWRITE?
-.TP
-.B MAP_FILE
-Compatibility flag.
-Ignored.
-.\" On some systems, this was required as the opposite of
-.\" MAP_ANONYMOUS -- mtk, 1 May 2007
-.TP
-.B MAP_FIXED
-Don't interpret
-.I addr
-as a hint: place the mapping at exactly that address.
-.I addr
-must be suitably aligned: for most architectures a multiple of the page
-size is sufficient; however, some architectures may impose additional
-restrictions.
-If the memory region specified by
-.I addr
-and
-.I length
-overlaps pages of any existing mapping(s), then the overlapped
-part of the existing mapping(s) will be discarded.
-If the specified address cannot be used,
-.BR mmap ()
-will fail.
-.IP
-Software that aspires to be portable should use the
-.B MAP_FIXED
-flag with care,
-keeping in mind that the exact layout of a process's memory mappings
-is allowed to change significantly between Linux versions,
-C library versions, and operating system releases.
-.I Carefully read the discussion of this flag in NOTES!
-.TP
-.BR MAP_FIXED_NOREPLACE " (since Linux 4.17)"
-.\" commit a4ff8e8620d3f4f50ac4b41e8067b7d395056843
-This flag provides behavior that is similar to
-.B MAP_FIXED
-with respect to the
-.I addr
-enforcement, but differs in that
-.B MAP_FIXED_NOREPLACE
-never clobbers a preexisting mapped range.
-If the requested range would collide with an existing mapping,
-then this call fails with the error
-.B EEXIST.
-This flag can therefore be used as a way to atomically
-(with respect to other threads) attempt to map an address range:
-one thread will succeed; all others will report failure.
-.IP
-Note that older kernels which do not recognize the
-.B MAP_FIXED_NOREPLACE
-flag will typically (upon detecting a collision with a preexisting mapping)
-fall back to a
-.RB \[lq]non- MAP_FIXED \[rq]
-type of behavior:
-they will return an address that is different from the requested address.
-Therefore, backward-compatible software
-should check the returned address against the requested address.
-.TP
-.B MAP_GROWSDOWN
-This flag is used for stacks.
-It indicates to the kernel virtual memory system that the mapping
-should extend downward in memory.
-The return address is one page lower than the memory area that is
-actually created in the process's virtual address space.
-Touching an address in the "guard" page below the mapping will cause
-the mapping to grow by a page.
-This growth can be repeated until the mapping grows to within a
-page of the high end of the next lower mapping,
-at which point touching the "guard" page will result in a
-.B SIGSEGV
-signal.
-.TP
-.BR MAP_HUGETLB " (since Linux 2.6.32)"
-Allocate the mapping using "huge" pages.
-See the Linux kernel source file
-.I Documentation/admin\-guide/mm/hugetlbpage.rst
-for further information, as well as NOTES, below.
-.TP
-.B MAP_HUGE_2MB
-.TQ
-.BR MAP_HUGE_1GB " (since Linux 3.8)"
-.\" See https://lwn.net/Articles/533499/
-Used in conjunction with
-.B MAP_HUGETLB
-to select alternative hugetlb page sizes (respectively, 2\ MB and 1\ GB)
-on systems that support multiple hugetlb page sizes.
-.IP
-More generally, the desired huge page size can be configured by encoding
-the base-2 logarithm of the desired page size in the six bits at the offset
-.BR MAP_HUGE_SHIFT .
-(A value of zero in this bit field provides the default huge page size;
-the default huge page size can be discovered via the
-.I Hugepagesize
-field exposed by
-.IR /proc/meminfo .)
-Thus, the above two constants are defined as:
-.IP
-.in +4n
-.EX
-#define MAP_HUGE_2MB (21 << MAP_HUGE_SHIFT)
-#define MAP_HUGE_1GB (30 << MAP_HUGE_SHIFT)
-.EE
-.in
-.IP
-The range of huge page sizes that are supported by the system
-can be discovered by listing the subdirectories in
-.IR /sys/kernel/mm/hugepages .
-.TP
-.BR MAP_LOCKED " (since Linux 2.5.37)"
-Mark the mapped region to be locked in the same way as
-.BR mlock (2).
-This implementation will try to populate (prefault) the whole range but the
-.BR mmap ()
-call doesn't fail with
-.B ENOMEM
-if this fails.
-Therefore major faults might happen later on.
-So the semantic is not as strong as
-.BR mlock (2).
-One should use
-.BR mmap ()
-plus
-.BR mlock (2)
-when major faults are not acceptable after the initialization of the mapping.
-The
-.B MAP_LOCKED
-flag is ignored in older kernels.
-.\" If set, the mapped pages will not be swapped out.
-.TP
-.BR MAP_NONBLOCK " (since Linux 2.5.46)"
-This flag is meaningful only in conjunction with
-.BR MAP_POPULATE .
-Don't perform read-ahead:
-create page tables entries only for pages
-that are already present in RAM.
-Since Linux 2.6.23,
-.\" commit 54cb8821de07f2ffcd28c380ce9b93d5784b40d7
-this flag causes
-.B MAP_POPULATE
-to do nothing.
-One day, the combination of
-.B MAP_POPULATE
-and
-.B MAP_NONBLOCK
-may be reimplemented.
-.TP
-.B MAP_NORESERVE
-Do not reserve swap space for this mapping.
-When swap space is reserved, one has the guarantee
-that it is possible to modify the mapping.
-When swap space is not reserved one might get
-.B SIGSEGV
-upon a write
-if no physical memory is available.
-See also the discussion of the file
-.I /proc/sys/vm/overcommit_memory
-in
-.BR proc (5).
-Before Linux 2.6, this flag had effect only for
-private writable mappings.
-.TP
-.BR MAP_POPULATE " (since Linux 2.5.46)"
-Populate (prefault) page tables for a mapping.
-For a file mapping, this causes read-ahead on the file.
-This will help to reduce blocking on page faults later.
-The
-.BR mmap ()
-call doesn't fail if the mapping cannot be populated (for example, due
-to limitations on the number of mapped huge pages when using
-.BR MAP_HUGETLB ).
-Support for
-.B MAP_POPULATE
-in conjunction with private mappings was added in Linux 2.6.23.
-.TP
-.BR MAP_STACK " (since Linux 2.6.27)"
-Allocate the mapping at an address suitable for a process
-or thread stack.
-.IP
-This flag is currently a no-op on Linux.
-However, by employing this flag, applications can ensure that
-they transparently obtain support if the flag
-is implemented in the future.
-Thus, it is used in the glibc threading implementation to allow for
-the fact that some architectures may (later) require special treatment
-for stack allocations.
-.\" See http://lwn.net/Articles/294642 "Tangled up in threads", 19 Aug 08
-.\" commit cd98a04a59e2f94fa64d5bf1e26498d27427d5e7
-.\" http://thread.gmane.org/gmane.linux.kernel/720412
-.\" "pthread_create() slow for many threads; also time to revisit 64b
-.\" context switch optimization?"
-A further reason to employ this flag is portability:
-.B MAP_STACK
-exists (and has an effect) on some other systems (e.g., some of the BSDs).
-.TP
-.BR MAP_SYNC " (since Linux 4.15)"
-This flag is available only with the
-.B MAP_SHARED_VALIDATE
-mapping type;
-mappings of type
-.B MAP_SHARED
-will silently ignore this flag.
-This flag is supported only for files supporting DAX
-(direct mapping of persistent memory).
-For other files, creating a mapping with this flag results in an
-.B EOPNOTSUPP
-error.
-.IP
-Shared file mappings with this flag provide the guarantee that while
-some memory is mapped writable in the address space of the process,
-it will be visible in the same file at the same offset even after
-the system crashes or is rebooted.
-In conjunction with the use of appropriate CPU instructions,
-this provides users of such mappings with a more efficient way
-of making data modifications persistent.
-.TP
-.BR MAP_UNINITIALIZED " (since Linux 2.6.33)"
-Don't clear anonymous pages.
-This flag is intended to improve performance on embedded devices.
-This flag is honored only if the kernel was configured with the
-.B CONFIG_MMAP_ALLOW_UNINITIALIZED
-option.
-Because of the security implications,
-that option is normally enabled only on embedded devices
-(i.e., devices where one has complete control of the contents of user memory).
-.P
-Of the above flags, only
-.B MAP_FIXED
-is specified in POSIX.1-2001 and POSIX.1-2008.
-However, most systems also support
-.B MAP_ANONYMOUS
-(or its synonym
-.BR MAP_ANON ).
-.\" FIXME . for later review when Issue 8 is one day released...
-.\" POSIX may add MAP_ANON in the future
-.\" http://austingroupbugs.net/tag_view_page.php?tag_id=8
-.\" http://austingroupbugs.net/view.php?id=850
-.SS munmap()
-The
-.BR munmap ()
-system call deletes the mappings for the specified address range, and
-causes further references to addresses within the range to generate
-invalid memory references.
-The region is also automatically unmapped
-when the process is terminated.
-On the other hand, closing the file
-descriptor does not unmap the region.
-.P
-The address
-.I addr
-must be a multiple of the page size (but
-.I length
-need not be).
-All pages containing a part
-of the indicated range are unmapped, and subsequent references
-to these pages will generate
-.BR SIGSEGV .
-It is not an error if the
-indicated range does not contain any mapped pages.
-.SH RETURN VALUE
-On success,
-.BR mmap ()
-returns a pointer to the mapped area.
-On error, the value
-.B MAP_FAILED
-(that is,
-.IR "(void\ *)\ \-1" )
-is returned, and
-.I errno
-is set to indicate the error.
-.P
-On success,
-.BR munmap ()
-returns 0.
-On failure, it returns \-1, and
-.I errno
-is set to indicate the error (probably to
-.BR EINVAL ).
-.SH ERRORS
-.TP
-.B EACCES
-A file descriptor refers to a non-regular file.
-Or a file mapping was requested, but
-.I fd
-is not open for reading.
-Or
-.B MAP_SHARED
-was requested and
-.B PROT_WRITE
-is set, but
-.I fd
-is not open in read/write
-.RB ( O_RDWR )
-mode.
-Or
-.B PROT_WRITE
-is set, but the file is append-only.
-.TP
-.B EAGAIN
-The file has been locked, or too much memory has been locked (see
-.BR setrlimit (2)).
-.TP
-.B EBADF
-.I fd
-is not a valid file descriptor (and
-.B MAP_ANONYMOUS
-was not set).
-.TP
-.B EEXIST
-.B MAP_FIXED_NOREPLACE
-was specified in
-.IR flags ,
-and the range covered by
-.I addr
-and
-.I length
-clashes with an existing mapping.
-.TP
-.B EINVAL
-We don't like
-.IR addr ,
-.IR length ,
-or
-.I offset
-(e.g., they are too large, or not aligned on a page boundary).
-.TP
-.B EINVAL
-(since Linux 2.6.12)
-.I length
-was 0.
-.TP
-.B EINVAL
-.I flags
-contained none of
-.BR MAP_PRIVATE ,
-.BR MAP_SHARED ,
-or
-.BR MAP_SHARED_VALIDATE .
-.TP
-.B ENFILE
-.\" This is for shared anonymous segments
-.\" [2.6.7] shmem_zero_setup()-->shmem_file_setup()-->get_empty_filp()
-The system-wide limit on the total number of open files has been reached.
-.\" .TP
-.\" .B ENOEXEC
-.\" A file could not be mapped for reading.
-.TP
-.B ENODEV
-The underlying filesystem of the specified file does not support
-memory mapping.
-.TP
-.B ENOMEM
-No memory is available.
-.TP
-.B ENOMEM
-The process's maximum number of mappings would have been exceeded.
-This error can also occur for
-.BR munmap (),
-when unmapping a region in the middle of an existing mapping,
-since this results in two smaller mappings on either side of
-the region being unmapped.
-.TP
-.B ENOMEM
-(since Linux 4.7)
-The process's
-.B RLIMIT_DATA
-limit, described in
-.BR getrlimit (2),
-would have been exceeded.
-.TP
-.B ENOMEM
-We don't like
-.IR addr ,
-because it exceeds the virtual address space of the CPU.
-.TP
-.B EOVERFLOW
-On 32-bit architecture together with the large file extension
-(i.e., using 64-bit
-.IR off_t ):
-the number of pages used for
-.I length
-plus number of pages used for
-.I offset
-would overflow
-.I "unsigned long"
-(32 bits).
-.TP
-.B EPERM
-The
-.I prot
-argument asks for
-.B PROT_EXEC
-but the mapped area belongs to a file on a filesystem that
-was mounted no-exec.
-.\" (Since Linux 2.4.25 / Linux 2.6.0.)
-.TP
-.B EPERM
-The operation was prevented by a file seal; see
-.BR fcntl (2).
-.TP
-.B EPERM
-The
-.B MAP_HUGETLB
-flag was specified, but the caller was not privileged (did not have the
-.B CAP_IPC_LOCK
-capability)
-and is not a member of the
-.I sysctl_hugetlb_shm_group
-group; see the description of
-.I /proc/sys/vm/sysctl_hugetlb_shm_group
-in
-.BR proc_sys (5).
-.TP
-.B ETXTBSY
-.B MAP_DENYWRITE
-was set but the object specified by
-.I fd
-is open for writing.
-.P
-Use of a mapped region can result in these signals:
-.TP
-.B SIGSEGV
-Attempted write into a region mapped as read-only.
-.TP
-.B SIGBUS
-Attempted access to a page of the buffer that lies beyond the
-end of the mapped file.
-For an explanation of the treatment of the bytes in the page that
-corresponds to the end of a mapped file that is not a multiple
-of the page size, see NOTES.
-.SH ATTRIBUTES
-For an explanation of the terms used in this section, see
-.BR attributes (7).
-.TS
-allbox;
-lbx lb lb
-l l l.
-Interface Attribute Value
-T{
-.na
-.nh
-.BR mmap (),
-.BR munmap ()
-T} Thread safety MT-Safe
-.TE
-.SH VERSIONS
-On some hardware architectures (e.g., i386),
-.B PROT_WRITE
-implies
-.BR PROT_READ .
-It is architecture dependent whether
-.B PROT_READ
-implies
-.B PROT_EXEC
-or not.
-Portable programs should always set
-.B PROT_EXEC
-if they intend to execute code in the new mapping.
-.P
-The portable way to create a mapping is to specify
-.I addr
-as 0 (NULL), and omit
-.B MAP_FIXED
-from
-.IR flags .
-In this case, the system chooses the address for the mapping;
-the address is chosen so as not to conflict with any existing mapping,
-and will not be 0.
-If the
-.B MAP_FIXED
-flag is specified, and
-.I addr
-is 0 (NULL), then the mapped address will be 0 (NULL).
-.P
-Certain
-.I flags
-constants are defined only if suitable feature test macros are defined
-(possibly by default):
-.B _DEFAULT_SOURCE
-with glibc 2.19 or later;
-or
-.B _BSD_SOURCE
-or
-.B _SVID_SOURCE
-in glibc 2.19 and earlier.
-(Employing
-.B _GNU_SOURCE
-also suffices,
-and requiring that macro specifically would have been more logical,
-since these flags are all Linux-specific.)
-The relevant flags are:
-.BR MAP_32BIT ,
-.B MAP_ANONYMOUS
-(and the synonym
-.BR MAP_ANON ),
-.BR MAP_DENYWRITE ,
-.BR MAP_EXECUTABLE ,
-.BR MAP_FILE ,
-.BR MAP_GROWSDOWN ,
-.BR MAP_HUGETLB ,
-.BR MAP_LOCKED ,
-.BR MAP_NONBLOCK ,
-.BR MAP_NORESERVE ,
-.BR MAP_POPULATE ,
-and
-.BR MAP_STACK .
-.SS C library/kernel differences
-This page describes the interface provided by the glibc
-.BR mmap ()
-wrapper function.
-Originally, this function invoked a system call of the same name.
-Since Linux 2.4, that system call has been superseded by
-.BR mmap2 (2),
-and nowadays
-.\" Since around glibc 2.1/2.2, depending on the platform.
-the glibc
-.BR mmap ()
-wrapper function invokes
-.BR mmap2 (2)
-with a suitably adjusted value for
-.IR offset .
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-POSIX.1-2001, SVr4, 4.4BSD.
-.\" SVr4 documents additional error codes ENXIO and ENODEV.
-.\" SUSv2 documents additional error codes EMFILE and EOVERFLOW.
-.P
-On POSIX systems on which
-.BR mmap (),
-.BR msync (2),
-and
-.BR munmap ()
-are available,
-.B _POSIX_MAPPED_FILES
-is defined in \fI<unistd.h>\fP to a value greater than 0.
-(See also
-.BR sysconf (3).)
-.\" POSIX.1-2001: It shall be defined to -1 or 0 or 200112L.
-.\" -1: unavailable, 0: ask using sysconf().
-.\" glibc defines it to 1.
-.SH NOTES
-Memory mapped by
-.BR mmap ()
-is preserved across
-.BR fork (2),
-with the same attributes.
-.P
-A file is mapped in multiples of the page size.
-For a file that is not
-a multiple of the page size,
-the remaining bytes in the partial page at the end of the mapping
-are zeroed when mapped,
-and modifications to that region are not written out to the file.
-The effect of
-changing the size of the underlying file of a mapping on the pages that
-correspond to added or removed regions of the file is unspecified.
-.P
-An application can determine which pages of a mapping are
-currently resident in the buffer/page cache using
-.BR mincore (2).
-.\"
-.SS Using MAP_FIXED safely
-The only safe use for
-.B MAP_FIXED
-is where the address range specified by
-.I addr
-and
-.I length
-was previously reserved using another mapping;
-otherwise, the use of
-.B MAP_FIXED
-is hazardous because it forcibly removes preexisting mappings,
-making it easy for a multithreaded process to corrupt its own address space.
-.P
-For example, suppose that thread A looks through
-.IR /proc/ pid /maps
-in order to locate an unused address range that it can map using
-.BR MAP_FIXED ,
-while thread B simultaneously acquires part or all of that same
-address range.
-When thread A subsequently employs
-.BR mmap(MAP_FIXED) ,
-it will effectively clobber the mapping that thread B created.
-In this scenario,
-thread B need not create a mapping directly; simply making a library call
-that, internally, uses
-.BR dlopen (3)
-to load some other shared library, will suffice.
-The
-.BR dlopen (3)
-call will map the library into the process's address space.
-Furthermore, almost any library call may be implemented in a way that
-adds memory mappings to the address space, either with this technique,
-or by simply allocating memory.
-Examples include
-.BR brk (2),
-.BR malloc (3),
-.BR pthread_create (3),
-and the PAM libraries
-.UR http://www.linux-pam.org
-.UE .
-.P
-Since Linux 4.17, a multithreaded program can use the
-.B MAP_FIXED_NOREPLACE
-flag to avoid the hazard described above
-when attempting to create a mapping at a fixed address
-that has not been reserved by a preexisting mapping.
-.\"
-.SS Timestamps changes for file-backed mappings
-For file-backed mappings, the
-.I st_atime
-field for the mapped file may be updated at any time between the
-.BR mmap ()
-and the corresponding unmapping; the first reference to a mapped
-page will update the field if it has not been already.
-.P
-The
-.I st_ctime
-and
-.I st_mtime
-field for a file mapped with
-.B PROT_WRITE
-and
-.B MAP_SHARED
-will be updated after
-a write to the mapped region, and before a subsequent
-.BR msync (2)
-with the
-.B MS_SYNC
-or
-.B MS_ASYNC
-flag, if one occurs.
-.\"
-.SS Huge page (Huge TLB) mappings
-For mappings that employ huge pages, the requirements for the arguments of
-.BR mmap ()
-and
-.BR munmap ()
-differ somewhat from the requirements for mappings
-that use the native system page size.
-.P
-For
-.BR mmap (),
-.I offset
-must be a multiple of the underlying huge page size.
-The system automatically aligns
-.I length
-to be a multiple of the underlying huge page size.
-.P
-For
-.BR munmap (),
-.IR addr ,
-and
-.I length
-must both be a multiple of the underlying huge page size.
-.\"
-.SH BUGS
-On Linux, there are no guarantees like those suggested above under
-.BR MAP_NORESERVE .
-By default, any process can be killed
-at any moment when the system runs out of memory.
-.P
-Before Linux 2.6.7, the
-.B MAP_POPULATE
-flag has effect only if
-.I prot
-is specified as
-.BR PROT_NONE .
-.P
-SUSv3 specifies that
-.BR mmap ()
-should fail if
-.I length
-is 0.
-However, before Linux 2.6.12,
-.BR mmap ()
-succeeded in this case: no mapping was created and the call returned
-.IR addr .
-Since Linux 2.6.12,
-.BR mmap ()
-fails with the error
-.B EINVAL
-for this case.
-.P
-POSIX specifies that the system shall always
-zero fill any partial page at the end
-of the object and that system will never write any modification of the
-object beyond its end.
-On Linux, when you write data to such partial page after the end
-of the object, the data stays in the page cache even after the file
-is closed and unmapped
-and even though the data is never written to the file itself,
-subsequent mappings may see the modified content.
-In some cases, this could be fixed by calling
-.BR msync (2)
-before the unmap takes place;
-however, this doesn't work on
-.BR tmpfs (5)
-(for example, when using the POSIX shared memory interface documented in
-.BR shm_overview (7)).
-.SH EXAMPLES
-.\" FIXME . Add an example here that uses an anonymous shared region for
-.\" IPC between parent and child.
-The following program prints part of the file specified in
-its first command-line argument to standard output.
-The range of bytes to be printed is specified via offset and length
-values in the second and third command-line arguments.
-The program creates a memory mapping of the required
-pages of the file and then uses
-.BR write (2)
-to output the desired bytes.
-.SS Program source
-.\" SRC BEGIN (mmap.c)
-.EX
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/mman.h>
-#include <sys/stat.h>
-#include <unistd.h>
-\&
-#define handle_error(msg) \e
- do { perror(msg); exit(EXIT_FAILURE); } while (0)
-\&
-int
-main(int argc, char *argv[])
-{
- int fd;
- char *addr;
- off_t offset, pa_offset;
- size_t length;
- ssize_t s;
- struct stat sb;
-\&
- if (argc < 3 || argc > 4) {
- fprintf(stderr, "%s file offset [length]\en", argv[0]);
- exit(EXIT_FAILURE);
- }
-\&
- fd = open(argv[1], O_RDONLY);
- if (fd == \-1)
- handle_error("open");
-\&
- if (fstat(fd, &sb) == \-1) /* To obtain file size */
- handle_error("fstat");
-\&
- offset = atoi(argv[2]);
- pa_offset = offset & \[ti](sysconf(_SC_PAGE_SIZE) \- 1);
- /* offset for mmap() must be page aligned */
-\&
- if (offset >= sb.st_size) {
- fprintf(stderr, "offset is past end of file\en");
- exit(EXIT_FAILURE);
- }
-\&
- if (argc == 4) {
- length = atoi(argv[3]);
- if (offset + length > sb.st_size)
- length = sb.st_size \- offset;
- /* Can\[aq]t display bytes past end of file */
-\&
- } else { /* No length arg ==> display to end of file */
- length = sb.st_size \- offset;
- }
-\&
- addr = mmap(NULL, length + offset \- pa_offset, PROT_READ,
- MAP_PRIVATE, fd, pa_offset);
- if (addr == MAP_FAILED)
- handle_error("mmap");
-\&
- s = write(STDOUT_FILENO, addr + offset \- pa_offset, length);
- if (s != length) {
- if (s == \-1)
- handle_error("write");
-\&
- fprintf(stderr, "partial write");
- exit(EXIT_FAILURE);
- }
-\&
- munmap(addr, length + offset \- pa_offset);
- close(fd);
-\&
- exit(EXIT_SUCCESS);
-}
-.EE
-.\" SRC END
-.SH SEE ALSO
-.BR ftruncate (2),
-.BR getpagesize (2),
-.BR memfd_create (2),
-.BR mincore (2),
-.BR mlock (2),
-.BR mmap2 (2),
-.BR mprotect (2),
-.BR mremap (2),
-.BR msync (2),
-.BR remap_file_pages (2),
-.BR setrlimit (2),
-.BR shmat (2),
-.BR userfaultfd (2),
-.BR shm_open (3),
-.BR shm_overview (7)
-.P
-The descriptions of the following files in
-.BR proc (5):
-.IR /proc/ pid /maps ,
-.IR /proc/ pid /map_files ,
-and
-.IR /proc/ pid /smaps .
-.P
-B.O. Gallmeister, POSIX.4, O'Reilly, pp. 128\[en]129 and 389\[en]391.
-.\"
-.\" Repeat after me: private read-only mappings are 100% equivalent to
-.\" shared read-only mappings. No ifs, buts, or maybes. -- Linus
diff --git a/man2/mmap2.2 b/man2/mmap2.2
deleted file mode 100644
index 594a207ec..000000000
--- a/man2/mmap2.2
+++ /dev/null
@@ -1,85 +0,0 @@
-.\" Copyright (C) 2002, Michael Kerrisk
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Modified 31 Jan 2002, Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Added description of mmap2
-.\" Modified, 2004-11-25, mtk -- removed stray #endif in prototype
-.\"
-.TH mmap2 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-mmap2 \- map files or devices into memory
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#include <sys/mman.h>" " /* Definition of " MAP_* " and " PROT_* " constants */"
-.BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */"
-.B #include <unistd.h>
-.P
-.BI "void *syscall(SYS_mmap2, unsigned long " addr ", unsigned long " length ,
-.BI " unsigned long " prot ", unsigned long " flags ,
-.BI " unsigned long " fd ", unsigned long " pgoffset );
-.fi
-.SH DESCRIPTION
-This is probably not the system call that you are interested in; instead, see
-.BR mmap (2),
-which describes the glibc wrapper function that invokes this system call.
-.P
-The
-.BR mmap2 ()
-system call provides the same interface as
-.BR mmap (2),
-except that the final argument specifies the offset into the
-file in 4096-byte units (instead of bytes, as is done by
-.BR mmap (2)).
-This enables applications that use a 32-bit
-.I off_t
-to map large files (up to 2\[ha]44 bytes).
-.SH RETURN VALUE
-On success,
-.BR mmap2 ()
-returns a pointer to the mapped area.
-On error, \-1 is returned and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EFAULT
-Problem with getting the data from user space.
-.TP
-.B EINVAL
-(Various platforms where the page size is not 4096 bytes.)
-.I "offset\ *\ 4096"
-is not a multiple of the system page size.
-.P
-.BR mmap2 ()
-can also return any of the errors described in
-.BR mmap (2).
-.SH VERSIONS
-On architectures where this system call is present,
-the glibc
-.BR mmap ()
-wrapper function invokes this system call rather than the
-.BR mmap (2)
-system call.
-.P
-This system call does not exist on x86-64.
-.P
-On ia64, the unit for
-.I offset
-is actually the system page size, rather than 4096 bytes.
-.\" ia64 can have page sizes ranging from 4 kB to 64 kB.
-.\" On cris, it looks like the unit might also be the page size,
-.\" which is 8192 bytes. -- mtk, June 2007
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 2.3.31.
-.SH SEE ALSO
-.BR getpagesize (2),
-.BR mmap (2),
-.BR mremap (2),
-.BR msync (2),
-.BR shm_open (3)
diff --git a/man2/modify_ldt.2 b/man2/modify_ldt.2
deleted file mode 100644
index 7af91ed59..000000000
--- a/man2/modify_ldt.2
+++ /dev/null
@@ -1,196 +0,0 @@
-.\" Copyright (c) 1995 Michael Chastain (mec@duracef.shout.net), 22 July 1995.
-.\" Copyright (c) 2015 Andrew Lutomirski
-.\"
-.\" SPDX-License-Identifier: GPL-2.0-or-later
-.\"
-.TH modify_ldt 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-modify_ldt \- get or set a per-process LDT entry
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#include <asm/ldt.h>" " /* Definition of " "struct user_desc" " */"
-.BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */"
-.B #include <unistd.h>
-.P
-.BI "int syscall(SYS_modify_ldt, int " func ", void " ptr [. bytecount ],
-.BI " unsigned long " bytecount );
-.fi
-.P
-.IR Note :
-glibc provides no wrapper for
-.BR modify_ldt (),
-necessitating the use of
-.BR syscall (2).
-.SH DESCRIPTION
-.BR modify_ldt ()
-reads or writes the local descriptor table (LDT) for a process.
-The LDT
-is an array of segment descriptors that can be referenced by user code.
-Linux allows processes to configure a per-process (actually per-mm) LDT.
-For more information about the LDT, see the Intel Software Developer's
-Manual or the AMD Architecture Programming Manual.
-.P
-When
-.I func
-is 0,
-.BR modify_ldt ()
-reads the LDT into the memory pointed to by
-.IR ptr .
-The number of bytes read is the smaller of
-.I bytecount
-and the actual size of the LDT, although the kernel may act as though
-the LDT is padded with additional trailing zero bytes.
-On success,
-.BR modify_ldt ()
-will return the number of bytes read.
-.P
-When
-.I func
-is 1 or 0x11,
-.BR modify_ldt ()
-modifies the LDT entry indicated by
-.IR ptr\->entry_number .
-.I ptr
-points to a
-.I user_desc
-structure
-and
-.I bytecount
-must equal the size of this structure.
-.P
-The
-.I user_desc
-structure is defined in \fI<asm/ldt.h>\fP as:
-.P
-.in +4n
-.EX
-struct user_desc {
- unsigned int entry_number;
- unsigned int base_addr;
- unsigned int limit;
- unsigned int seg_32bit:1;
- unsigned int contents:2;
- unsigned int read_exec_only:1;
- unsigned int limit_in_pages:1;
- unsigned int seg_not_present:1;
- unsigned int useable:1;
-};
-.EE
-.in
-.P
-In Linux 2.4 and earlier, this structure was named
-.IR modify_ldt_ldt_s .
-.P
-The
-.I contents
-field is the segment type (data, expand-down data, non-conforming code, or
-conforming code).
-The other fields match their descriptions in the CPU manual, although
-.BR modify_ldt ()
-cannot set the hardware-defined "accessed" bit described in the CPU manual.
-.P
-A
-.I user_desc
-is considered "empty" if
-.I read_exec_only
-and
-.I seg_not_present
-are set to 1 and all of the other fields are 0.
-An LDT entry can be cleared by setting it to an "empty"
-.I user_desc
-or, if
-.I func
-is 1, by setting both
-.I base
-and
-.I limit
-to 0.
-.P
-A conforming code segment (i.e., one with
-.IR contents==3 )
-will be rejected if
-.I
-func
-is 1 or if
-.I seg_not_present
-is 0.
-.P
-When
-.I func
-is 2,
-.BR modify_ldt ()
-will read zeros.
-This appears to be a leftover from Linux 2.4.
-.SH RETURN VALUE
-On success,
-.BR modify_ldt ()
-returns either the actual number of bytes read (for reading)
-or 0 (for writing).
-On failure,
-.BR modify_ldt ()
-returns \-1 and sets
-.I errno
-to indicate the error.
-.SH ERRORS
-.TP
-.B EFAULT
-.I ptr
-points outside the address space.
-.TP
-.B EINVAL
-.I ptr
-is 0,
-or
-.I func
-is 1 and
-.I bytecount
-is not equal to the size of the structure
-.IR user_desc ,
-or
-.I func
-is 1 or 0x11 and the new LDT entry has invalid values.
-.TP
-.B ENOSYS
-.I func
-is neither 0, 1, 2, nor 0x11.
-.SH STANDARDS
-Linux.
-.SH NOTES
-.BR modify_ldt ()
-should not be used for thread-local storage, as it slows down context
-switches and only supports a limited number of threads.
-Threading libraries should use
-.BR set_thread_area (2)
-or
-.BR arch_prctl (2)
-instead, except on extremely old kernels that do not support those system
-calls.
-.P
-The normal use for
-.BR modify_ldt ()
-is to run legacy 16-bit or segmented 32-bit code.
-Not all kernels allow 16-bit segments to be installed, however.
-.P
-Even on 64-bit kernels,
-.BR modify_ldt ()
-cannot be used to create a long mode (i.e., 64-bit) code segment.
-The undocumented field "lm" in
-.I user_desc
-is not useful, and, despite its name,
-does not result in a long mode segment.
-.SH BUGS
-On 64-bit kernels before Linux 3.19,
-.\" commit e30ab185c490e9a9381385529e0fd32f0a399495
-setting the "lm" bit in
-.I user_desc
-prevents the descriptor from being considered empty.
-Keep in mind that the
-"lm" bit does not exist in the 32-bit headers, but these buggy kernels
-will still notice the bit even when set in a 32-bit process.
-.SH SEE ALSO
-.BR arch_prctl (2),
-.BR set_thread_area (2),
-.BR vm86 (2)
diff --git a/man2/mount.2 b/man2/mount.2
deleted file mode 100644
index d8a691e13..000000000
--- a/man2/mount.2
+++ /dev/null
@@ -1,971 +0,0 @@
-.\" Copyright (C) 1993 Rickard E. Faith <faith@cs.unc.edu>
-.\" and Copyright (C) 1994 Andries E. Brouwer <aeb@cwi.nl>
-.\" and Copyright (C) 2002, 2005, 2016 Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Modified 1996-11-04 by Eric S. Raymond <esr@thyrsus.com>
-.\" Modified 2001-10-13 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Added note on historical behavior of MS_NOSUID
-.\" Modified 2002-05-16 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Extensive changes and additions
-.\" Modified 2002-05-27 by aeb
-.\" Modified 2002-06-11 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Enhanced descriptions of MS_MOVE, MS_BIND, and MS_REMOUNT
-.\" Modified 2004-06-17 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\" 2005-05-18, mtk, Added MNT_EXPIRE, plus a few other tidy-ups.
-.\" 2008-10-06, mtk: move umount*() material into separate umount.2 page.
-.\" 2008-10-06, mtk: Add discussion of namespaces.
-.\"
-.TH mount 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-mount \- mount filesystem
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B "#include <sys/mount.h>"
-.P
-.BI "int mount(const char *" source ", const char *" target ,
-.BI " const char *" filesystemtype ", unsigned long " mountflags ,
-.BI " const void *_Nullable " data );
-.fi
-.SH DESCRIPTION
-.BR mount ()
-attaches the filesystem specified by
-.I source
-(which is often a pathname referring to a device,
-but can also be the pathname of a directory or file,
-or a dummy string) to the location (a directory or file)
-specified by the pathname in
-.IR target .
-.P
-Appropriate privilege (Linux: the
-.B CAP_SYS_ADMIN
-capability) is required to mount filesystems.
-.P
-Values for the
-.I filesystemtype
-argument supported by the kernel are listed in
-.I /proc/filesystems
-(e.g., "btrfs", "ext4", "jfs", "xfs", "vfat", "fuse",
-"tmpfs", "cgroup", "proc", "mqueue", "nfs", "cifs", "iso9660").
-Further types may become available when the appropriate modules
-are loaded.
-.P
-The
-.I data
-argument is interpreted by the different filesystems.
-Typically it is a string of comma-separated options
-understood by this filesystem.
-See
-.BR mount (8)
-for details of the options available for each filesystem type.
-This argument may be specified as NULL, if there are no options.
-.P
-A call to
-.BR mount ()
-performs one of a number of general types of operation,
-depending on the bits specified in
-.IR mountflags .
-The choice of which operation to perform is determined by
-testing the bits set in
-.IR mountflags ,
-with the tests being conducted in the order listed here:
-.IP \[bu] 3
-Remount an existing mount:
-.I mountflags
-includes
-.BR MS_REMOUNT .
-.IP \[bu]
-Create a bind mount:
-.I mountflags
-includes
-.BR MS_BIND .
-.IP \[bu]
-Change the propagation type of an existing mount:
-.I mountflags
-includes one of
-.BR MS_SHARED ,
-.BR MS_PRIVATE ,
-.BR MS_SLAVE ,
-or
-.BR MS_UNBINDABLE .
-.IP \[bu]
-Move an existing mount to a new location:
-.I mountflags
-includes
-.BR MS_MOVE .
-.IP \[bu]
-Create a new mount:
-.I mountflags
-includes none of the above flags.
-.P
-Each of these operations is detailed later in this page.
-Further flags may be specified in
-.I mountflags
-to modify the behavior of
-.BR mount (),
-as described below.
-.\"
-.SS Additional mount flags
-The list below describes the additional flags that can be specified in
-.IR mountflags .
-Note that some operation types ignore some or all of these flags,
-as described later in this page.
-.\"
-.\" FIXME 2.6.25 Added MS_I_VERSION, which needs to be documented.
-.\" commit 7a224228ed79d587ece2304869000aad1b8e97dd
-.\" (This is a per-superblock flag)
-.\"
-.TP
-.BR MS_DIRSYNC " (since Linux 2.5.19)"
-Make directory changes on this filesystem synchronous.
-(This property can be obtained for individual directories
-or subtrees using
-.BR chattr (1).)
-.TP
-.BR MS_LAZYTIME " (since Linux 4.0)"
-.\" commit 0ae45f63d4ef8d8eeec49c7d8b44a1775fff13e8
-.\" commit fe032c422c5ba562ba9c2d316f55e258e03259c6
-.\" commit a26f49926da938f47561f386be56a83dd37a496d
-Reduce on-disk updates of inode timestamps (atime, mtime, ctime)
-by maintaining these changes only in memory.
-The on-disk timestamps are updated only when:
-.RS
-.IP \[bu] 3
-the inode needs to be updated for some change unrelated to file timestamps;
-.IP \[bu]
-the application employs
-.BR fsync (2),
-.BR syncfs (2),
-or
-.BR sync (2);
-.IP \[bu]
-an undeleted inode is evicted from memory; or
-.IP \[bu]
-more than 24 hours have passed since the inode was written to disk.
-.RE
-.IP
-This mount option significantly reduces writes
-needed to update the inode's timestamps, especially mtime and atime.
-However, in the event of a system crash, the atime and mtime fields
-on disk might be out of date by up to 24 hours.
-.IP
-Examples of workloads where this option could be of significant benefit
-include frequent random writes to preallocated files,
-as well as cases where the
-.B MS_STRICTATIME
-mount option is also enabled.
-(The advantage of combining
-.B MS_STRICTATIME
-and
-.B MS_LAZYTIME
-is that
-.BR stat (2)
-will return the correctly updated atime, but the atime updates
-will be flushed to disk only in the cases listed above.)
-.TP
-.B MS_MANDLOCK
-Permit mandatory locking on files in this filesystem.
-(Mandatory locking must still be enabled on a per-file basis,
-as described in
-.BR fcntl (2).)
-Since Linux 4.5,
-.\" commit 95ace75414f312f9a7b93d873f386987b92a5301
-this mount option requires the
-.B CAP_SYS_ADMIN
-capability and a kernel configured with the
-.B CONFIG_MANDATORY_FILE_LOCKING
-option.
-Mandatory locking has been fully deprecated in Linux 5.15, so
-this flag should be considered deprecated.
-.TP
-.B MS_NOATIME
-Do not update access times for (all types of) files on this filesystem.
-.TP
-.B MS_NODEV
-Do not allow access to devices (special files) on this filesystem.
-.TP
-.B MS_NODIRATIME
-Do not update access times for directories on this filesystem.
-This flag provides a subset of the functionality provided by
-.BR MS_NOATIME ;
-that is,
-.B MS_NOATIME
-implies
-.BR MS_NODIRATIME .
-.TP
-.B MS_NOEXEC
-Do not allow programs to be executed from this filesystem.
-.\" (Possibly useful for a filesystem that contains non-Linux executables.
-.\" Often used as a security feature, e.g., to make sure that restricted
-.\" users cannot execute files uploaded using ftp or so.)
-.TP
-.B MS_NOSUID
-Do not honor set-user-ID and set-group-ID bits or file capabilities
-when executing programs from this filesystem.
-In addition, SELinux domain
-transitions require the permission
-.IR nosuid_transition ,
-which in turn needs
-also the policy capability
-.IR nnp_nosuid_transition .
-.\" (This is a security feature to prevent users executing set-user-ID and
-.\" set-group-ID programs from removable disk devices.)
-.TP
-.B MS_RDONLY
-Mount filesystem read-only.
-.TP
-.BR MS_REC " (since Linux 2.4.11)"
-Used in conjunction with
-.B MS_BIND
-to create a recursive bind mount,
-and in conjunction with the propagation type flags to recursively change
-the propagation type of all of the mounts in a subtree.
-See below for further details.
-.TP
-.BR MS_RELATIME " (since Linux 2.6.20)"
-When a file on this filesystem is accessed,
-update the file's last access time (atime) only if the current value
-of atime is less than or equal to the file's last modification time (mtime)
-or last status change time (ctime).
-This option is useful for programs, such as
-.BR mutt (1),
-that need to know when a file has been read since it was last modified.
-Since Linux 2.6.30, the kernel defaults to the behavior provided
-by this flag (unless
-.B MS_NOATIME
-was specified), and the
-.B MS_STRICTATIME
-flag is required to obtain traditional semantics.
-In addition, since Linux 2.6.30,
-the file's last access time is always updated if it
-is more than 1 day old.
-.\" Matthew Garrett notes in the patch that added this behavior
-.\" that this lets utilities such as tmpreaper (which deletes
-.\" files based on last access time) work correctly.
-.TP
-.BR MS_SILENT " (since Linux 2.6.17)"
-Suppress the display of certain
-.RI ( printk ())
-warning messages in the kernel log.
-This flag supersedes the misnamed and obsolete
-.B MS_VERBOSE
-flag (available since Linux 2.4.12), which has the same meaning.
-.TP
-.BR MS_STRICTATIME " (since Linux 2.6.30)"
-Always update the last access time (atime) when files on this
-filesystem are accessed.
-(This was the default behavior before Linux 2.6.30.)
-Specifying this flag overrides the effect of setting the
-.B MS_NOATIME
-and
-.B MS_RELATIME
-flags.
-.TP
-.B MS_SYNCHRONOUS
-Make writes on this filesystem synchronous (as though
-the
-.B O_SYNC
-flag to
-.BR open (2)
-was specified for all file opens to this filesystem).
-.TP
-.BR MS_NOSYMFOLLOW " (since Linux 5.10)"
-.\" dab741e0e02bd3c4f5e2e97be74b39df2523fc6e
-Do not follow symbolic links when resolving paths.
-Symbolic links can still be created,
-and
-.BR readlink (1),
-.BR readlink (2),
-.BR realpath (1),
-and
-.BR realpath (3)
-all still work properly.
-.P
-From Linux 2.4 onward, some of the above flags are
-settable on a per-mount basis,
-while others apply to the superblock of the mounted filesystem,
-meaning that all mounts of the same filesystem share those flags.
-(Previously, all of the flags were per-superblock.)
-.P
-The per-mount-point flags are as follows:
-.IP \[bu] 3
-Since Linux 2.4:
-.BR MS_NODEV ", " MS_NOEXEC ", and " MS_NOSUID
-flags are settable on a per-mount-point basis.
-.IP \[bu]
-Additionally, since Linux 2.6.16:
-.B MS_NOATIME
-and
-.BR MS_NODIRATIME .
-.IP \[bu]
-Additionally, since Linux 2.6.20:
-.BR MS_RELATIME .
-.P
-The following flags are per-superblock:
-.BR MS_DIRSYNC ,
-.BR MS_LAZYTIME ,
-.BR MS_MANDLOCK ,
-.BR MS_SILENT ,
-and
-.BR MS_SYNCHRONOUS .
-.\" And MS_I_VERSION?
-The initial settings of these flags are determined on the first
-mount of the filesystem, and will be shared by all subsequent mounts
-of the same filesystem.
-Subsequently, the settings of the flags can be changed
-via a remount operation (see below).
-Such changes will be visible via all mounts associated
-with the filesystem.
-.P
-Since Linux 2.6.16,
-.B MS_RDONLY
-can be set or cleared on a per-mount-point basis as well as on
-the underlying filesystem superblock.
-The mounted filesystem will be writable only if neither the filesystem
-nor the mountpoint are flagged as read-only.
-.\"
-.SS Remounting an existing mount
-An existing mount may be remounted by specifying
-.B MS_REMOUNT
-in
-.IR mountflags .
-This allows you to change the
-.I mountflags
-and
-.I data
-of an existing mount without having to unmount and remount the filesystem.
-.I target
-should be the same value specified in the initial
-.BR mount ()
-call.
-.P
-The
-.I source
-and
-.I filesystemtype
-arguments are ignored.
-.P
-The
-.I mountflags
-and
-.I data
-arguments should match the values used in the original
-.BR mount ()
-call, except for those parameters that are being deliberately changed.
-.P
-The following
-.I mountflags
-can be changed:
-.BR MS_LAZYTIME ,
-.\" FIXME
-.\" MS_LAZYTIME seems to be available only on a few filesystems,
-.\" and on ext4, it seems (from experiment that this flag
-.\" can only be enabled (but not disabled) on a remount.
-.\" The following code in ext4_remount() (kernel 4.17) seems to
-.\" confirm this:
-.\"
-.\" if (*flags & SB_LAZYTIME)
-.\" sb->s_flags |= SB_LAZYTIME;
-.BR MS_MANDLOCK ,
-.BR MS_NOATIME ,
-.BR MS_NODEV ,
-.BR MS_NODIRATIME ,
-.BR MS_NOEXEC ,
-.BR MS_NOSUID ,
-.BR MS_RELATIME ,
-.BR MS_RDONLY ,
-.B MS_STRICTATIME
-(whose effect is to clear the
-.B MS_NOATIME
-and
-.B MS_RELATIME
-flags),
-and
-.BR MS_SYNCHRONOUS .
-Attempts to change the setting of the
-.\" See the definition of MS_RMT_MASK in include/uapi/linux/fs.h,
-.\" which excludes MS_DIRSYNC and MS_SILENT, although SB_DIRSYNC
-.\" and SB_SILENT are split out as per-superblock flags in do_mount()
-.\" (Linux 4.17 source code)
-.B MS_DIRSYNC
-and
-.B MS_SILENT
-flags during a remount are silently ignored.
-Note that changes to per-superblock flags are visible via
-all mounts of the associated filesystem
-(because the per-superblock flags are shared by all mounts).
-.P
-Since Linux 3.17,
-.\" commit ffbc6f0ead47fa5a1dc9642b0331cb75c20a640e
-if none of
-.BR MS_NOATIME ,
-.BR MS_NODIRATIME ,
-.BR MS_RELATIME ,
-or
-.B MS_STRICTATIME
-is specified in
-.IR mountflags ,
-then the remount operation preserves the existing values of these flags
-(rather than defaulting to
-.BR MS_RELATIME ).
-.P
-Since Linux 2.6.26, the
-.B MS_REMOUNT
-flag can be used with
-.B MS_BIND
-to modify only the per-mount-point flags.
-.\" See https://lwn.net/Articles/281157/
-This is particularly useful for setting or clearing the "read-only"
-flag on a mount without changing the underlying filesystem.
-Specifying
-.I mountflags
-as:
-.P
-.in +4n
-.EX
-MS_REMOUNT | MS_BIND | MS_RDONLY
-.EE
-.in
-.P
-will make access through this mountpoint read-only, without affecting
-other mounts.
-.\"
-.SS Creating a bind mount
-If
-.I mountflags
-includes
-.B MS_BIND
-(available since Linux 2.4),
-.\" since Linux 2.4.0-test9
-then perform a bind mount.
-A bind mount makes a file or a directory subtree visible at
-another point within the single directory hierarchy.
-Bind mounts may cross filesystem boundaries and span
-.BR chroot (2)
-jails.
-.P
-The
-.I filesystemtype
-and
-.I data
-arguments are ignored.
-.P
-The remaining bits (other than
-.BR MS_REC ,
-described below) in the
-.I mountflags
-argument are also ignored.
-(The bind mount has the same mount options as
-the underlying mount.)
-However, see the discussion of remounting above,
-for a method of making an existing bind mount read-only.
-.P
-By default, when a directory is bind mounted,
-only that directory is mounted;
-if there are any submounts under the directory tree,
-they are not bind mounted.
-If the
-.B MS_REC
-flag is also specified, then a recursive bind mount operation is performed:
-all submounts under the
-.I source
-subtree (other than unbindable mounts)
-are also bind mounted at the corresponding location in the
-.I target
-subtree.
-.\"
-.SS Changing the propagation type of an existing mount
-If
-.I mountflags
-includes one of
-.BR MS_SHARED ,
-.BR MS_PRIVATE ,
-.BR MS_SLAVE ,
-or
-.B MS_UNBINDABLE
-(all available since Linux 2.6.15),
-then the propagation type of an existing mount is changed.
-If more than one of these flags is specified, an error results.
-.P
-The only other flags that can be specified while changing
-the propagation type are
-.B MS_REC
-(described below) and
-.B MS_SILENT
-(which is ignored).
-.P
-The
-.IR source ,
-.IR filesystemtype ,
-and
-.I data
-arguments are ignored.
-.P
-The meanings of the propagation type flags are as follows:
-.TP
-.B MS_SHARED
-Make this mount shared.
-Mount and unmount events immediately under this mount will propagate
-to the other mounts that are members of this mount's peer group.
-Propagation here means that the same mount or unmount will automatically
-occur under all of the other mounts in the peer group.
-Conversely, mount and unmount events that take place under
-peer mounts will propagate to this mount.
-.TP
-.B MS_PRIVATE
-Make this mount private.
-Mount and unmount events do not propagate into or out of this mount.
-.TP
-.B MS_SLAVE
-If this is a shared mount that is a member of a peer group
-that contains other members, convert it to a slave mount.
-If this is a shared mount that is a member of a peer group
-that contains no other members, convert it to a private mount.
-Otherwise, the propagation type of the mount is left unchanged.
-.IP
-When a mount is a slave,
-mount and unmount events propagate into this mount from
-the (master) shared peer group of which it was formerly a member.
-Mount and unmount events under this mount do not propagate to any peer.
-.IP
-A mount can be the slave of another peer group
-while at the same time sharing mount and unmount events
-with a peer group of which it is a member.
-.TP
-.B MS_UNBINDABLE
-Make this mount unbindable.
-This is like a private mount,
-and in addition this mount can't be bind mounted.
-When a recursive bind mount
-.RB ( mount ()
-with the
-.B MS_BIND
-and
-.B MS_REC
-flags) is performed on a directory subtree,
-any unbindable mounts within the subtree are automatically pruned
-(i.e., not replicated)
-when replicating that subtree to produce the target subtree.
-.P
-By default, changing the propagation type affects only the
-.I target
-mount.
-If the
-.B MS_REC
-flag is also specified in
-.IR mountflags ,
-then the propagation type of all mounts under
-.I target
-is also changed.
-.P
-For further details regarding mount propagation types
-(including the default propagation type assigned to new mounts), see
-.BR mount_namespaces (7).
-.\"
-.SS Moving a mount
-If
-.I mountflags
-contains the flag
-.B MS_MOVE
-(available since Linux 2.4.18),
-then move a subtree:
-.I source
-specifies an existing mount and
-.I target
-specifies the new location to which that mount is to be relocated.
-The move is atomic: at no point is the subtree unmounted.
-.P
-The remaining bits in the
-.I mountflags
-argument are ignored, as are the
-.I filesystemtype
-and
-.I data
-arguments.
-.\"
-.SS Creating a new mount
-If none of
-.BR MS_REMOUNT ,
-.BR MS_BIND ,
-.BR MS_MOVE ,
-.BR MS_SHARED ,
-.BR MS_PRIVATE ,
-.BR MS_SLAVE ,
-or
-.B MS_UNBINDABLE
-is specified in
-.IR mountflags ,
-then
-.BR mount ()
-performs its default action: creating a new mount.
-.I source
-specifies the source for the new mount, and
-.I target
-specifies the directory at which to create the mount point.
-.P
-The
-.I filesystemtype
-and
-.I data
-arguments are employed, and further bits may be specified in
-.I mountflags
-to modify the behavior of the call.
-.\"
-.SH RETURN VALUE
-On success, zero is returned.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-The error values given below result from filesystem type independent
-errors.
-Each filesystem type may have its own special errors and its
-own special behavior.
-See the Linux kernel source code for details.
-.TP
-.B EACCES
-A component of a path was not searchable.
-(See also
-.BR path_resolution (7).)
-.TP
-.B EACCES
-Mounting a read-only filesystem was attempted without giving the
-.B MS_RDONLY
-flag.
-.IP
-The filesystem may be read-only for various reasons, including:
-it resides on a read-only optical disk;
-it is resides on a device with a physical switch that has been set to
-mark the device read-only;
-the filesystem implementation was compiled with read-only support;
-or errors were detected when initially mounting the filesystem,
-so that it was marked read-only
-and can't be remounted as read-write (until the errors are fixed).
-.IP
-Some filesystems instead return the error
-.B EROFS
-on an attempt to mount a read-only filesystem.
-.TP
-.B EACCES
-The block device
-.I source
-is located on a filesystem mounted with the
-.B MS_NODEV
-option.
-.\" mtk: Probably: write permission is required for MS_BIND, with
-.\" the error EPERM if not present; CAP_DAC_OVERRIDE is required.
-.TP
-.B EBUSY
-An attempt was made to stack a new mount directly on
-top of an existing mount point that was created in this
-mount namespace with the same
-.I source
-and
-.IR target .
-.TP
-.B EBUSY
-.I source
-cannot be remounted read-only,
-because it still holds files open for writing.
-.TP
-.B EFAULT
-One of the pointer arguments points outside the user address space.
-.TP
-.B EINVAL
-.I source
-had an invalid superblock.
-.TP
-.B EINVAL
-A remount operation
-.RB ( MS_REMOUNT )
-was attempted, but
-.I source
-was not already mounted on
-.IR target .
-.TP
-.B EINVAL
-A move operation
-.RB ( MS_MOVE )
-was attempted, but the mount tree under
-.I source
-includes unbindable mounts and
-.I target
-is a mount that has propagation type
-.BR MS_SHARED .
-.TP
-.B EINVAL
-A move operation
-.RB ( MS_MOVE )
-was attempted, but the parent mount of
-.I source
-mount has propagation type
-.BR MS_SHARED .
-.TP
-.B EINVAL
-A move operation
-.RB ( MS_MOVE )
-was attempted, but
-.I source
-was not a mount, or was \[aq]/\[aq].
-.TP
-.B EINVAL
-A bind operation
-.RB ( MS_BIND )
-was requested where
-.I source
-referred a mount namespace magic link (i.e., a
-.IR /proc/ pid /ns/mnt
-magic link or a bind mount to such a link)
-and the propagation type of the parent mount of
-.I target
-was
-.BR MS_SHARED ,
-.\" See commit 8823c079ba7136dc1948d6f6dcb5f8022bde438e
-but propagation of the requested bind mount could lead to a circular
-dependency that might prevent the mount namespace from ever being freed.
-.TP
-.B EINVAL
-.I mountflags
-includes more than one of
-.BR MS_SHARED ,
-.BR MS_PRIVATE ,
-.BR MS_SLAVE ,
-or
-.BR MS_UNBINDABLE .
-.TP
-.B EINVAL
-.I mountflags
-includes
-.BR MS_SHARED ,
-.BR MS_PRIVATE ,
-.BR MS_SLAVE ,
-or
-.B MS_UNBINDABLE
-and also includes a flag other than
-.B MS_REC
-or
-.BR MS_SILENT .
-.TP
-.B EINVAL
-An attempt was made to bind mount an unbindable mount.
-.TP
-.B EINVAL
-In an unprivileged mount namespace
-(i.e., a mount namespace owned by a user namespace
-that was created by an unprivileged user),
-a bind mount operation
-.RB ( MS_BIND )
-was attempted without specifying
-.RB ( MS_REC ),
-which would have revealed the filesystem tree underneath one of
-the submounts of the directory being bound.
-.TP
-.B ELOOP
-Too many links encountered during pathname resolution.
-.TP
-.B ELOOP
-A move operation was attempted, and
-.I target
-is a descendant of
-.IR source .
-.TP
-.B EMFILE
-(In case no block device is required:)
-Table of dummy devices is full.
-.TP
-.B ENAMETOOLONG
-A pathname was longer than
-.BR MAXPATHLEN .
-.TP
-.B ENODEV
-.I filesystemtype
-not configured in the kernel.
-.TP
-.B ENOENT
-A pathname was empty or had a nonexistent component.
-.TP
-.B ENOMEM
-The kernel could not allocate a free page to copy filenames or data into.
-.TP
-.B ENOTBLK
-.I source
-is not a block device (and a device was required).
-.TP
-.B ENOTDIR
-.IR target ,
-or a prefix of
-.IR source ,
-is not a directory.
-.TP
-.B ENXIO
-The major number of the block device
-.I source
-is out of range.
-.TP
-.B EPERM
-The caller does not have the required privileges.
-.TP
-.B EPERM
-An attempt was made to modify
-.RB ( MS_REMOUNT )
-the
-.BR MS_RDONLY ,
-.BR MS_NOSUID ,
-or
-.B MS_NOEXEC
-flag, or one of the "atime" flags
-.RB ( MS_NOATIME ,
-.BR MS_NODIRATIME ,
-.BR MS_RELATIME )
-of an existing mount, but the mount is locked; see
-.BR mount_namespaces (7).
-.TP
-.B EROFS
-Mounting a read-only filesystem was attempted without giving the
-.B MS_RDONLY
-flag.
-See
-.BR EACCES ,
-above.
-.\"
-.SH STANDARDS
-Linux.
-.SH HISTORY
-The definitions of
-.BR MS_DIRSYNC ,
-.BR MS_MOVE ,
-.BR MS_PRIVATE ,
-.BR MS_REC ,
-.BR MS_RELATIME ,
-.BR MS_SHARED ,
-.BR MS_SLAVE ,
-.BR MS_STRICTATIME ,
-and
-.B MS_UNBINDABLE
-were added to glibc headers in glibc 2.12.
-.P
-Since Linux 2.4 a single filesystem can be mounted at
-multiple mount points, and multiple mounts can be stacked
-on the same mount point.
-.\" Multiple mounts on same mount point: since Linux 2.3.99pre7.
-.P
-The
-.I mountflags
-argument may have the magic number 0xC0ED (\fBMS_MGC_VAL\fP)
-in the top 16 bits.
-(All of the other flags discussed in DESCRIPTION
-occupy the low order 16 bits of
-.IR mountflags .)
-Specifying
-.B MS_MGC_VAL
-was required before Linux 2.4,
-but since Linux 2.4 is no longer required and is ignored if specified.
-.P
-The original
-.B MS_SYNC
-flag was renamed
-.B MS_SYNCHRONOUS
-in 1.1.69
-when a different
-.B MS_SYNC
-was added to \fI<mman.h>\fP.
-.P
-Before Linux 2.4 an attempt to execute a set-user-ID or set-group-ID program
-on a filesystem mounted with
-.B MS_NOSUID
-would fail with
-.BR EPERM .
-Since Linux 2.4 the set-user-ID and set-group-ID bits are
-just silently ignored in this case.
-.\" The change is in patch-2.4.0-prerelease.
-.\"
-.SH NOTES
-.SS Mount namespaces
-Starting with Linux 2.4.19, Linux provides mount namespaces.
-A mount namespace is the set of filesystem mounts that
-are visible to a process.
-Mount namespaces can be (and usually are)
-shared between multiple processes,
-and changes to the namespace (i.e., mounts and unmounts) by one process
-are visible to all other processes sharing the same namespace.
-(The pre-2.4.19 Linux situation can be considered as one in which
-a single namespace was shared by every process on the system.)
-.P
-A child process created by
-.BR fork (2)
-shares its parent's mount namespace;
-the mount namespace is preserved across an
-.BR execve (2).
-.P
-A process can obtain a private mount namespace if:
-it was created using the
-.BR clone (2)
-.B CLONE_NEWNS
-flag,
-in which case its new namespace is initialized to be a
-.I copy
-of the namespace of the process that called
-.BR clone (2);
-or it calls
-.BR unshare (2)
-with the
-.B CLONE_NEWNS
-flag,
-which causes the caller's mount namespace to obtain a private copy
-of the namespace that it was previously sharing with other processes,
-so that future mounts and unmounts by the caller are invisible
-to other processes (except child processes that the caller
-subsequently creates) and vice versa.
-.P
-For further details on mount namespaces, see
-.BR mount_namespaces (7).
-.\"
-.SS Parental relationship between mounts
-Each mount has a parent mount.
-The overall parental relationship of all mounts defines
-the single directory hierarchy seen by the processes within a mount namespace.
-.P
-The parent of a new mount is defined when the mount is created.
-In the usual case,
-the parent of a new mount is the mount of the filesystem
-containing the directory or file at which the new mount is attached.
-In the case where a new mount is stacked on top of an existing mount,
-the parent of the new mount is the previous mount that was stacked
-at that location.
-.P
-The parental relationship between mounts can be discovered via the
-.IR /proc/ pid /mountinfo
-file (see below).
-.\"
-.SS \fI/proc/\fPpid\fI/mounts\fP and \fI/proc/\fPpid\fI/mountinfo\fP
-The Linux-specific
-.IR /proc/ pid /mounts
-file exposes the list of mounts in the mount
-namespace of the process with the specified ID.
-The
-.IR /proc/ pid /mountinfo
-file exposes even more information about mounts,
-including the propagation type and mount ID information that makes it
-possible to discover the parental relationship between mounts.
-See
-.BR proc (5)
-and
-.BR mount_namespaces (7)
-for details of this file.
-.SH SEE ALSO
-.BR mountpoint (1),
-.BR chroot (2),
-.BR ioctl_iflags (2),
-.BR mount_setattr (2),
-.BR pivot_root (2),
-.BR umount (2),
-.BR mount_namespaces (7),
-.BR path_resolution (7),
-.BR findmnt (8),
-.BR lsblk (8),
-.BR mount (8),
-.BR umount (8)
diff --git a/man2/mount_setattr.2 b/man2/mount_setattr.2
deleted file mode 100644
index f4bbc088b..000000000
--- a/man2/mount_setattr.2
+++ /dev/null
@@ -1,1067 +0,0 @@
-.\" Copyright (c) 2021 by Christian Brauner <christian.brauner@ubuntu.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.TH mount_setattr 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-mount_setattr \- change properties of a mount or mount tree
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#include <linux/fcntl.h>" " /* Definition of " AT_* " constants */"
-.BR "#include <linux/mount.h>" " /* Definition of " MOUNT_ATTR_* " constants */"
-.BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */"
-.B #include <unistd.h>
-.P
-.BI "int syscall(SYS_mount_setattr, int " dirfd ", const char *" pathname ,
-.BI " unsigned int " flags ", struct mount_attr *" attr \
-", size_t " size );
-.fi
-.P
-.IR Note :
-glibc provides no wrapper for
-.BR mount_setattr (),
-necessitating the use of
-.BR syscall (2).
-.SH DESCRIPTION
-The
-.BR mount_setattr ()
-system call changes the mount properties of a mount or an entire mount tree.
-If
-.I pathname
-is a relative pathname,
-then it is interpreted relative to
-the directory referred to by the file descriptor
-.IR dirfd .
-If
-.I dirfd
-is the special value
-.BR AT_FDCWD ,
-then
-.I pathname
-is interpreted relative to
-the current working directory of the calling process.
-If
-.I pathname
-is the empty string and
-.B AT_EMPTY_PATH
-is specified in
-.IR flags ,
-then the mount properties of the mount identified by
-.I dirfd
-are changed.
-(See
-.BR openat (2)
-for an explanation of why the
-.I dirfd
-argument is useful.)
-.P
-The
-.BR mount_setattr ()
-system call uses an extensible structure
-.RI ( "struct mount_attr" )
-to allow for future extensions.
-Any non-flag extensions to
-.BR mount_setattr ()
-will be implemented as new fields appended to the this structure,
-with a zero value in a new field resulting in the kernel behaving
-as though that extension field was not present.
-Therefore,
-the caller
-.I must
-zero-fill this structure on initialization.
-See the "Extensibility" subsection under
-.B NOTES
-for more details.
-.P
-The
-.I size
-argument should usually be specified as
-.IR "sizeof(struct mount_attr)" .
-However, if the caller is using a kernel that supports an extended
-.IR "struct mount_attr" ,
-but the caller does not intend to make use of these features,
-it is possible to pass the size of an earlier
-version of the structure together with the extended structure.
-This allows the kernel to not copy later parts of the structure
-that aren't used anyway.
-With each extension that changes the size of
-.IR "struct mount_attr" ,
-the kernel will expose a definition of the form
-.BI MOUNT_ATTR_SIZE_VER number\c
-\&.
-For example, the macro for the size of the initial version of
-.I struct mount_attr
-is
-.BR MOUNT_ATTR_SIZE_VER0 .
-.P
-The
-.I flags
-argument can be used to alter the pathname resolution behavior.
-The supported values are:
-.TP
-.B AT_EMPTY_PATH
-If
-.I pathname
-is the empty string,
-change the mount properties on
-.I dirfd
-itself.
-.TP
-.B AT_RECURSIVE
-Change the mount properties of the entire mount tree.
-.TP
-.B AT_SYMLINK_NOFOLLOW
-Don't follow trailing symbolic links.
-.TP
-.B AT_NO_AUTOMOUNT
-Don't trigger automounts.
-.P
-The
-.I attr
-argument of
-.BR mount_setattr ()
-is a structure of the following form:
-.P
-.in +4n
-.EX
-struct mount_attr {
- __u64 attr_set; /* Mount properties to set */
- __u64 attr_clr; /* Mount properties to clear */
- __u64 propagation; /* Mount propagation type */
- __u64 userns_fd; /* User namespace file descriptor */
-};
-.EE
-.in
-.P
-The
-.I attr_set
-and
-.I attr_clr
-members are used to specify the mount properties that
-are supposed to be set or cleared for a mount or mount tree.
-Flags set in
-.I attr_set
-enable a property on a mount or mount tree,
-and flags set in
-.I attr_clr
-remove a property from a mount or mount tree.
-.P
-When changing mount properties,
-the kernel will first clear the flags specified
-in the
-.I attr_clr
-field,
-and then set the flags specified in the
-.I attr_set
-field.
-For example, these settings:
-.P
-.in +4n
-.EX
-struct mount_attr attr = {
- .attr_clr = MOUNT_ATTR_NOEXEC | MOUNT_ATTR_NODEV,
- .attr_set = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID,
-};
-.EE
-.in
-.P
-are equivalent to the following steps:
-.P
-.in +4n
-.EX
-unsigned int current_mnt_flags = mnt\->mnt_flags;
-\&
-/*
- * Clear all flags set in .attr_clr,
- * clearing MOUNT_ATTR_NOEXEC and MOUNT_ATTR_NODEV.
- */
-current_mnt_flags &= \(tiattr\->attr_clr;
-\&
-/*
- * Now set all flags set in .attr_set,
- * applying MOUNT_ATTR_RDONLY and MOUNT_ATTR_NOSUID.
- */
-current_mnt_flags |= attr\->attr_set;
-\&
-mnt\->mnt_flags = current_mnt_flags;
-.EE
-.in
-.P
-As a result of this change, the mount or mount tree (a) is read-only;
-(b) blocks the execution of set-user-ID and set-group-ID programs;
-(c) allows execution of programs; and (d) allows access to devices.
-.P
-Multiple changes with the same set of flags requested
-in
-.I attr_clr
-and
-.I attr_set
-are guaranteed to be idempotent after the changes have been applied.
-.P
-The following mount attributes can be specified in the
-.I attr_set
-or
-.I attr_clr
-fields:
-.TP
-.B MOUNT_ATTR_RDONLY
-If set in
-.IR attr_set ,
-makes the mount read-only.
-If set in
-.IR attr_clr ,
-removes the read-only setting if set on the mount.
-.TP
-.B MOUNT_ATTR_NOSUID
-If set in
-.IR attr_set ,
-causes the mount not to honor the set-user-ID and set-group-ID mode bits and
-file capabilities when executing programs.
-If set in
-.IR attr_clr ,
-clears the set-user-ID, set-group-ID,
-and file capability restriction if set on this mount.
-.TP
-.B MOUNT_ATTR_NODEV
-If set in
-.IR attr_set ,
-prevents access to devices on this mount.
-If set in
-.IR attr_clr ,
-removes the restriction that prevented accessing devices on this mount.
-.TP
-.B MOUNT_ATTR_NOEXEC
-If set in
-.IR attr_set ,
-prevents executing programs on this mount.
-If set in
-.IR attr_clr ,
-removes the restriction that prevented executing programs on this mount.
-.TP
-.B MOUNT_ATTR_NOSYMFOLLOW
-If set in
-.IR attr_set ,
-prevents following symbolic links on this mount.
-If set in
-.IR attr_clr ,
-removes the restriction that prevented following symbolic links on this mount.
-.TP
-.B MOUNT_ATTR_NODIRATIME
-If set in
-.IR attr_set ,
-prevents updating access time for directories on this mount.
-If set in
-.IR attr_clr ,
-removes the restriction that prevented updating access time for directories.
-Note that
-.B MOUNT_ATTR_NODIRATIME
-can be combined with other access-time settings
-and is implied by the noatime setting.
-All other access-time settings are mutually exclusive.
-.TP
-.BR MOUNT_ATTR__ATIME " - changing access-time settings"
-The access-time values listed below are an enumeration that
-includes the value zero, expressed in the bits defined by the mask
-.BR MOUNT_ATTR__ATIME .
-Even though these bits are an enumeration
-(in contrast to the other mount flags such as
-.BR MOUNT_ATTR_NOEXEC ),
-they are nonetheless passed in
-.I attr_set
-and
-.I attr_clr
-for consistency with
-.BR fsmount (2),
-which introduced this behavior.
-.IP
-Note that,
-since the access-time values are an enumeration rather than bit values,
-a caller wanting to transition to a different access-time setting
-cannot simply specify the access-time setting in
-.IR attr_set ,
-but must also include
-.B MOUNT_ATTR__ATIME
-in the
-.I attr_clr
-field.
-The kernel will verify that
-.B MOUNT_ATTR__ATIME
-isn't partially set in
-.I attr_clr
-(i.e., either all bits in the
-.B MOUNT_ATTR__ATIME
-bit field are either set or clear), and that
-.I attr_set
-doesn't have any access-time bits set if
-.B MOUNT_ATTR__ATIME
-isn't set in
-.IR attr_clr .
-.RS
-.TP
-.B MOUNT_ATTR_RELATIME
-When a file is accessed via this mount,
-update the file's last access time (atime)
-only if the current value of atime is less than or equal to
-the file's last modification time (mtime) or last status change time (ctime).
-.IP
-To enable this access-time setting on a mount or mount tree,
-.B MOUNT_ATTR_RELATIME
-must be set in
-.I attr_set
-and
-.B MOUNT_ATTR__ATIME
-must be set in the
-.I attr_clr
-field.
-.TP
-.B MOUNT_ATTR_NOATIME
-Do not update access times for (all types of) files on this mount.
-.IP
-To enable this access-time setting on a mount or mount tree,
-.B MOUNT_ATTR_NOATIME
-must be set in
-.I attr_set
-and
-.B MOUNT_ATTR__ATIME
-must be set in the
-.I attr_clr
-field.
-.TP
-.B MOUNT_ATTR_STRICTATIME
-Always update the last access time (atime)
-when files are accessed on this mount.
-.IP
-To enable this access-time setting on a mount or mount tree,
-.B MOUNT_ATTR_STRICTATIME
-must be set in
-.I attr_set
-and
-.B MOUNT_ATTR__ATIME
-must be set in the
-.I attr_clr
-field.
-.RE
-.TP
-.B MOUNT_ATTR_IDMAP
-If set in
-.IR attr_set ,
-creates an ID-mapped mount.
-The ID mapping is taken from the user namespace specified in
-.I userns_fd
-and attached to the mount.
-.IP
-Since it is not supported to
-change the ID mapping of a mount after it has been ID mapped,
-it is invalid to specify
-.B MOUNT_ATTR_IDMAP
-in
-.IR attr_clr .
-.IP
-For further details, see the subsection "ID-mapped mounts" under NOTES.
-.P
-The
-.I propagation
-field is used to specify the propagation type of the mount or mount tree.
-This field either has the value zero,
-meaning leave the propagation type unchanged, or it has one of
-the following values:
-.TP
-.B MS_PRIVATE
-Turn all mounts into private mounts.
-.TP
-.B MS_SHARED
-Turn all mounts into shared mounts.
-.TP
-.B MS_SLAVE
-Turn all mounts into dependent mounts.
-.TP
-.B MS_UNBINDABLE
-Turn all mounts into unbindable mounts.
-.P
-For further details on the above propagation types, see
-.BR mount_namespaces (7).
-.SH RETURN VALUE
-On success,
-.BR mount_setattr ()
-returns zero.
-On error,
-\-1 is returned and
-.I errno
-is set to indicate the cause of the error.
-.SH ERRORS
-.TP
-.B EBADF
-.I pathname
-is relative but
-.I dirfd
-is neither
-.B AT_FDCWD
-nor a valid file descriptor.
-.TP
-.B EBADF
-.I userns_fd
-is not a valid file descriptor.
-.TP
-.B EBUSY
-The caller tried to change the mount to
-.BR MOUNT_ATTR_RDONLY ,
-but the mount still holds files open for writing.
-.TP
-.B EBUSY
-The caller tried to create an ID-mapped mount raising
-.B MOUNT_ATTR_IDMAP
-and specifying
-.I userns_fd
-but the mount still holds files open for writing.
-.TP
-.B EINVAL
-The pathname specified via the
-.I dirfd
-and
-.I pathname
-arguments to
-.BR mount_setattr ()
-isn't a mount point.
-.TP
-.B EINVAL
-An unsupported value was set in
-.IR flags .
-.TP
-.B EINVAL
-An unsupported value was specified in the
-.I attr_set
-field of
-.IR mount_attr .
-.TP
-.B EINVAL
-An unsupported value was specified in the
-.I attr_clr
-field of
-.IR mount_attr .
-.TP
-.B EINVAL
-An unsupported value was specified in the
-.I propagation
-field of
-.IR mount_attr .
-.TP
-.B EINVAL
-More than one of
-.BR MS_SHARED ,
-.BR MS_SLAVE ,
-.BR MS_PRIVATE ,
-or
-.B MS_UNBINDABLE
-was set in the
-.I propagation
-field of
-.IR mount_attr .
-.TP
-.B EINVAL
-An access-time setting was specified in the
-.I attr_set
-field without
-.B MOUNT_ATTR__ATIME
-being set in the
-.I attr_clr
-field.
-.TP
-.B EINVAL
-.B MOUNT_ATTR_IDMAP
-was specified in
-.IR attr_clr .
-.TP
-.B EINVAL
-A file descriptor value was specified in
-.I userns_fd
-which exceeds
-.BR INT_MAX .
-.TP
-.B EINVAL
-A valid file descriptor value was specified in
-.IR userns_fd ,
-but the file descriptor did not refer to a user namespace.
-.TP
-.B EINVAL
-The underlying filesystem does not support ID-mapped mounts.
-.TP
-.B EINVAL
-The mount that is to be ID mapped is not a detached mount;
-that is, the mount has not previously been visible in a mount namespace.
-.TP
-.B EINVAL
-A partial access-time setting was specified in
-.I attr_clr
-instead of
-.B MOUNT_ATTR__ATIME
-being set.
-.TP
-.B EINVAL
-The mount is located outside the caller's mount namespace.
-.TP
-.B EINVAL
-The underlying filesystem has been mounted in a mount namespace that is
-owned by a noninitial user namespace
-.TP
-.B ENOENT
-A pathname was empty or had a nonexistent component.
-.TP
-.B ENOMEM
-When changing mount propagation to
-.BR MS_SHARED ,
-a new peer group ID needs to be allocated for all mounts without a peer group
-ID set.
-This allocation failed because there was not
-enough memory to allocate the relevant internal structures.
-.TP
-.B ENOSPC
-When changing mount propagation to
-.BR MS_SHARED ,
-a new peer group ID needs to be allocated for all mounts without a peer group
-ID set.
-This allocation failed because
-the kernel has run out of IDs.
-.\" Christian Brauner: i.e. someone has somehow managed to
-.\" allocate so many peer groups and managed to keep the kernel running
-.\" (???) that the ida has ran out of ids
-.\" Note that technically further error codes are possible that are
-.\" specific to the ID allocation implementation used.
-.TP
-.B EPERM
-One of the mounts had at least one of
-.BR MOUNT_ATTR_NOATIME ,
-.BR MOUNT_ATTR_NODEV ,
-.BR MOUNT_ATTR_NODIRATIME ,
-.BR MOUNT_ATTR_NOEXEC ,
-.BR MOUNT_ATTR_NOSUID ,
-or
-.B MOUNT_ATTR_RDONLY
-set and the flag is locked.
-Mount attributes become locked on a mount if:
-.RS
-.IP \[bu] 3
-A new mount or mount tree is created causing mount propagation across user
-namespaces
-(i.e., propagation to a mount namespace owned by a different user namespace).
-The kernel will lock the aforementioned flags to prevent these sensitive
-properties from being altered.
-.IP \[bu]
-A new mount and user namespace pair is created.
-This happens for example when specifying
-.B CLONE_NEWUSER | CLONE_NEWNS
-in
-.BR unshare (2),
-.BR clone (2),
-or
-.BR clone3 (2).
-The aforementioned flags become locked in the new mount namespace
-to prevent sensitive mount properties from being altered.
-Since the newly created mount namespace will be owned by the
-newly created user namespace,
-a calling process that is privileged in the new
-user namespace would\[em]in the absence of such locking\[em]be
-able to alter sensitive mount properties (e.g., to remount a mount
-that was marked read-only as read-write in the new mount namespace).
-.RE
-.TP
-.B EPERM
-A valid file descriptor value was specified in
-.IR userns_fd ,
-but the file descriptor refers to the initial user namespace.
-.TP
-.B EPERM
-An attempt was made to add an ID mapping to a mount that is already ID mapped.
-.TP
-.B EPERM
-The caller does not have
-.B CAP_SYS_ADMIN
-in the initial user namespace.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 5.12.
-.\" commit 7d6beb71da3cc033649d641e1e608713b8220290
-.\" commit 2a1867219c7b27f928e2545782b86daaf9ad50bd
-.\" commit 9caccd41541a6f7d6279928d9f971f6642c361af
-.SH NOTES
-.SS ID-mapped mounts
-Creating an ID-mapped mount makes it possible to
-change the ownership of all files located under a mount.
-Thus, ID-mapped mounts make it possible to
-change ownership in a temporary and localized way.
-It is a localized change because the ownership changes are
-visible only via a specific mount.
-All other users and locations where the filesystem is exposed are unaffected.
-It is a temporary change because
-the ownership changes are tied to the lifetime of the mount.
-.P
-Whenever callers interact with the filesystem through an ID-mapped mount,
-the ID mapping of the mount will be applied to
-user and group IDs associated with filesystem objects.
-This encompasses the user and group IDs associated with inodes
-and also the following
-.BR xattr (7)
-keys:
-.IP \[bu] 3
-.IR security.capability ,
-whenever filesystem capabilities
-are stored or returned in the
-.B VFS_CAP_REVISION_3
-format,
-which stores a root user ID alongside the capabilities
-(see
-.BR capabilities (7)).
-.IP \[bu]
-.I system.posix_acl_access
-and
-.IR system.posix_acl_default ,
-whenever user IDs or group IDs are stored in
-.B ACL_USER
-or
-.B ACL_GROUP
-entries.
-.P
-The following conditions must be met in order to create an ID-mapped mount:
-.IP \[bu] 3
-The caller must have the
-.B CAP_SYS_ADMIN
-capability in the user namespace the filesystem was mounted in.
-.\" commit bd303368b776eead1c29e6cdda82bde7128b82a7
-.\" Christian Brauner
-.\" Note, currently no filesystems mountable in non-initial user namespaces
-.\" support ID-mapped mounts.
-.IP \[bu]
-The underlying filesystem must support ID-mapped mounts.
-Currently, the following filesystems support ID-mapped mounts:
-.\" fs_flags = FS_ALLOW_IDMAP in kernel sources
-.RS
-.IP \[bu] 3
-.PD 0
-.BR xfs (5)
-(since Linux 5.12)
-.IP \[bu]
-.BR ext4 (5)
-(since Linux 5.12)
-.IP \[bu]
-.B FAT
-(since Linux 5.12)
-.IP \[bu]
-.BR btrfs (5)
-(since Linux 5.15)
-.\" commit 5b9b26f5d0b88b74001dcfe4ab8a8f2f4e744112
-.IP \[bu]
-.B ntfs3
-(since Linux 5.15)
-.\" commit 82cae269cfa953032fbb8980a7d554d60fb00b17
-.IP \[bu]
-.B f2fs
-(since Linux 5.18)
-.\" commit 984fc4e76d63345499f01c0c198a4b44860cf027
-.IP \[bu]
-.B erofs
-(since Linux 5.19)
-.\" commit 6c459b78d4793afbba6d864c466cc5cd2932459d
-.IP \[bu]
-.B overlayfs
-(ID-mapped lower and upper layers supported since Linux 5.19)
-.IP \[bu]
-.B squashfs
-(since Linux 6.2)
-.IP \[bu]
-.B tmpfs
-(since Linux 6.3)
-.IP \[bu]
-.B cephfs
-(since Linux 6.7)
-.IP \[bu]
-.B hugetlbfs
-(since Linux 6.9)
-.PD
-.RE
-.IP \[bu]
-The mount must not already be ID-mapped.
-This also implies that the ID mapping of a mount cannot be altered.
-.IP \[bu]
-The mount must not have any writers.
-.\" commit 1bbcd277a53e08d619ffeec56c5c9287f2bf42f
-.IP \[bu]
-The mount must be a detached mount;
-that is,
-it must have been created by calling
-.BR open_tree (2)
-with the
-.B OPEN_TREE_CLONE
-flag and it must not already have been visible in a mount namespace.
-(To put things another way:
-the mount must not have been attached to the filesystem hierarchy
-with a system call such as
-.BR move_mount (2).)
-.P
-ID mappings can be created for user IDs, group IDs, and project IDs.
-An ID mapping is essentially a mapping of a range of user or group IDs into
-another or the same range of user or group IDs.
-ID mappings are written to map files as three numbers
-separated by white space.
-The first two numbers specify the starting user or group ID
-in each of the two user namespaces.
-The third number specifies the range of the ID mapping.
-For example,
-a mapping for user IDs such as "1000\ 1001\ 1" would indicate that
-user ID 1000 in the caller's user namespace is mapped to
-user ID 1001 in its ancestor user namespace.
-Since the map range is 1,
-only user ID 1000 is mapped.
-.P
-It is possible to specify up to 340 ID mappings for each ID mapping type.
-If any user IDs or group IDs are not mapped,
-all files owned by that unmapped user or group ID will appear as
-being owned by the overflow user ID or overflow group ID respectively.
-.P
-Further details on setting up ID mappings can be found in
-.BR user_namespaces (7).
-.P
-In the common case, the user namespace passed in
-.I userns_fd
-(together with
-.B MOUNT_ATTR_IDMAP
-in
-.IR attr_set )
-to create an ID-mapped mount will be the user namespace of a container.
-In other scenarios it will be a dedicated user namespace associated with
-a user's login session as is the case for portable home directories in
-.BR systemd-homed.service (8)).
-It is also perfectly fine to create a dedicated user namespace
-for the sake of ID mapping a mount.
-.P
-ID-mapped mounts can be useful in the following
-and a variety of other scenarios:
-.IP \[bu] 3
-Sharing files or filesystems
-between multiple users or multiple machines,
-especially in complex scenarios.
-For example,
-ID-mapped mounts are used to implement portable home directories in
-.BR systemd-homed.service (8),
-where they allow users to move their home directory
-to an external storage device
-and use it on multiple computers
-where they are assigned different user IDs and group IDs.
-This effectively makes it possible to
-assign random user IDs and group IDs at login time.
-.IP \[bu]
-Sharing files or filesystems
-from the host with unprivileged containers.
-This allows a user to avoid having to change ownership permanently through
-.BR chown (2).
-.IP \[bu]
-ID mapping a container's root filesystem.
-Users don't need to change ownership permanently through
-.BR chown (2).
-Especially for large root filesystems, using
-.BR chown (2)
-can be prohibitively expensive.
-.IP \[bu]
-Sharing files or filesystems
-between containers with non-overlapping ID mappings.
-.IP \[bu]
-Implementing discretionary access (DAC) permission checking
-for filesystems lacking a concept of ownership.
-.IP \[bu]
-Efficiently changing ownership on a per-mount basis.
-In contrast to
-.BR chown (2),
-changing ownership of large sets of files is instantaneous with
-ID-mapped mounts.
-This is especially useful when ownership of
-an entire root filesystem of a virtual machine or container
-is to be changed as mentioned above.
-With ID-mapped mounts,
-a single
-.BR mount_setattr ()
-system call will be sufficient to change the ownership of all files.
-.IP \[bu]
-Taking the current ownership into account.
-ID mappings specify precisely
-what a user or group ID is supposed to be mapped to.
-This contrasts with the
-.BR chown (2)
-system call which cannot by itself
-take the current ownership of the files it changes into account.
-It simply changes the ownership to the specified user ID and group ID.
-.IP \[bu]
-Locally and temporarily restricted ownership changes.
-ID-mapped mounts make it possible to change ownership locally,
-restricting the ownership changes to specific mounts,
-and temporarily as the ownership changes only apply as long as the mount exists.
-By contrast,
-changing ownership via the
-.BR chown (2)
-system call changes the ownership globally and permanently.
-.\"
-.SS Extensibility
-In order to allow for future extensibility,
-.BR mount_setattr ()
-requires the user-space application to specify the size of the
-.I mount_attr
-structure that it is passing.
-By providing this information, it is possible for
-.BR mount_setattr ()
-to provide both forwards- and backwards-compatibility, with
-.I size
-acting as an implicit version number.
-(Because new extension fields will always
-be appended, the structure size will always increase.)
-This extensibility design is very similar to other system calls such as
-.BR perf_setattr (2),
-.BR perf_event_open (2),
-.BR clone3 (2)
-and
-.BR openat2 (2).
-.P
-Let
-.I usize
-be the size of the structure as specified by the user-space application,
-and let
-.I ksize
-be the size of the structure which the kernel supports,
-then there are three cases to consider:
-.IP \[bu] 3
-If
-.I ksize
-equals
-.IR usize ,
-then there is no version mismatch and
-.I attr
-can be used verbatim.
-.IP \[bu]
-If
-.I ksize
-is larger than
-.IR usize ,
-then there are some extension fields that the kernel supports
-which the user-space application is unaware of.
-Because a zero value in any added extension field signifies a no-op,
-the kernel treats all of the extension fields
-not provided by the user-space application
-as having zero values.
-This provides backwards-compatibility.
-.IP \[bu]
-If
-.I ksize
-is smaller than
-.IR usize ,
-then there are some extension fields which the user-space application is aware
-of but which the kernel does not support.
-Because any extension field must have its zero values signify a no-op,
-the kernel can safely ignore the unsupported extension fields
-if they are all zero.
-If any unsupported extension fields are non-zero,
-then \-1 is returned and
-.I errno
-is set to
-.BR E2BIG .
-This provides forwards-compatibility.
-.P
-Because the definition of
-.I struct mount_attr
-may change in the future
-(with new fields being added when system headers are updated),
-user-space applications should zero-fill
-.I struct mount_attr
-to ensure that recompiling the program with new headers will not result in
-spurious errors at run time.
-The simplest way is to use a designated initializer:
-.P
-.in +4n
-.EX
-struct mount_attr attr = {
- .attr_set = MOUNT_ATTR_RDONLY,
- .attr_clr = MOUNT_ATTR_NODEV
-};
-.EE
-.in
-.P
-Alternatively, the structure can be zero-filled using
-.BR memset (3)
-or similar functions:
-.P
-.in +4n
-.EX
-struct mount_attr attr;
-memset(&attr, 0, sizeof(attr));
-attr.attr_set = MOUNT_ATTR_RDONLY;
-attr.attr_clr = MOUNT_ATTR_NODEV;
-.EE
-.in
-.P
-A user-space application that wishes to determine which extensions the running
-kernel supports can do so by conducting a binary search on
-.I size
-with a structure which has every byte nonzero
-(to find the largest value which doesn't produce an error of
-.BR E2BIG ).
-.SH EXAMPLES
-.\" SRC BEGIN (mount_setattr.c)
-.EX
-/*
- * This program allows the caller to create a new detached mount
- * and set various properties on it.
- */
-#define _GNU_SOURCE
-#include <err.h>
-#include <fcntl.h>
-#include <getopt.h>
-#include <linux/mount.h>
-#include <linux/types.h>
-#include <stdbool.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/syscall.h>
-#include <unistd.h>
-\&
-static inline int
-mount_setattr(int dirfd, const char *pathname, unsigned int flags,
- struct mount_attr *attr, size_t size)
-{
- return syscall(SYS_mount_setattr, dirfd, pathname, flags,
- attr, size);
-}
-\&
-static inline int
-open_tree(int dirfd, const char *filename, unsigned int flags)
-{
- return syscall(SYS_open_tree, dirfd, filename, flags);
-}
-\&
-static inline int
-move_mount(int from_dirfd, const char *from_pathname,
- int to_dirfd, const char *to_pathname, unsigned int flags)
-{
- return syscall(SYS_move_mount, from_dirfd, from_pathname,
- to_dirfd, to_pathname, flags);
-}
-\&
-static const struct option longopts[] = {
- {"map\-mount", required_argument, NULL, \[aq]a\[aq]},
- {"recursive", no_argument, NULL, \[aq]b\[aq]},
- {"read\-only", no_argument, NULL, \[aq]c\[aq]},
- {"block\-setid", no_argument, NULL, \[aq]d\[aq]},
- {"block\-devices", no_argument, NULL, \[aq]e\[aq]},
- {"block\-exec", no_argument, NULL, \[aq]f\[aq]},
- {"no\-access\-time", no_argument, NULL, \[aq]g\[aq]},
- { NULL, 0, NULL, 0 },
-};
-\&
-int
-main(int argc, char *argv[])
-{
- int fd_userns = \-1;
- int fd_tree;
- int index = 0;
- int ret;
- bool recursive = false;
- const char *source;
- const char *target;
- struct mount_attr *attr = &(struct mount_attr){};
-\&
- while ((ret = getopt_long_only(argc, argv, "",
- longopts, &index)) != \-1) {
- switch (ret) {
- case \[aq]a\[aq]:
- fd_userns = open(optarg, O_RDONLY | O_CLOEXEC);
- if (fd_userns == \-1)
- err(EXIT_FAILURE, "open(%s)", optarg);
- break;
- case \[aq]b\[aq]:
- recursive = true;
- break;
- case \[aq]c\[aq]:
- attr\->attr_set |= MOUNT_ATTR_RDONLY;
- break;
- case \[aq]d\[aq]:
- attr\->attr_set |= MOUNT_ATTR_NOSUID;
- break;
- case \[aq]e\[aq]:
- attr\->attr_set |= MOUNT_ATTR_NODEV;
- break;
- case \[aq]f\[aq]:
- attr\->attr_set |= MOUNT_ATTR_NOEXEC;
- break;
- case \[aq]g\[aq]:
- attr\->attr_set |= MOUNT_ATTR_NOATIME;
- attr\->attr_clr |= MOUNT_ATTR__ATIME;
- break;
- default:
- errx(EXIT_FAILURE, "Invalid argument specified");
- }
- }
-\&
- if ((argc \- optind) < 2)
- errx(EXIT_FAILURE, "Missing source or target mount point");
-\&
- source = argv[optind];
- target = argv[optind + 1];
-\&
- /* In the following, \-1 as the \[aq]dirfd\[aq] argument ensures that
- open_tree() fails if \[aq]source\[aq] is not an absolute pathname. */
-.\" Christian Brauner
-.\" When writing programs I like to never use relative paths with AT_FDCWD
-.\" because. Because making assumptions about the current working directory
-.\" of the calling process is just too easy to get wrong; especially when
-.\" pivot_root() or chroot() are in play.
-.\" My absolut preference (joke intended) is to open a well-known starting
-.\" point with an absolute path to get a dirfd and then scope all future
-.\" operations beneath that dirfd. This already works with old-style
-.\" openat() and _very_ cautious programming but openat2() and its
-.\" resolve-flag space have made this **chef's kiss**.
-.\" If I can't operate based on a well-known dirfd I use absolute paths
-.\" with a -EBADF dirfd passed to *at() functions.
-\&
- fd_tree = open_tree(\-1, source,
- OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC |
- AT_EMPTY_PATH | (recursive ? AT_RECURSIVE : 0));
- if (fd_tree == \-1)
- err(EXIT_FAILURE, "open(%s)", source);
-\&
- if (fd_userns >= 0) {
- attr\->attr_set |= MOUNT_ATTR_IDMAP;
- attr\->userns_fd = fd_userns;
- }
-\&
- ret = mount_setattr(fd_tree, "",
- AT_EMPTY_PATH | (recursive ? AT_RECURSIVE : 0),
- attr, sizeof(struct mount_attr));
- if (ret == \-1)
- err(EXIT_FAILURE, "mount_setattr");
-\&
- close(fd_userns);
-\&
- /* In the following, \-1 as the \[aq]to_dirfd\[aq] argument ensures that
- open_tree() fails if \[aq]target\[aq] is not an absolute pathname. */
-\&
- ret = move_mount(fd_tree, "", \-1, target,
- MOVE_MOUNT_F_EMPTY_PATH);
- if (ret == \-1)
- err(EXIT_FAILURE, "move_mount() to %s", target);
-\&
- close(fd_tree);
-\&
- exit(EXIT_SUCCESS);
-}
-.EE
-.\" SRC END
-.SH SEE ALSO
-.BR newgidmap (1),
-.BR newuidmap (1),
-.BR clone (2),
-.BR mount (2),
-.BR unshare (2),
-.BR proc (5),
-.BR capabilities (7),
-.BR mount_namespaces (7),
-.BR user_namespaces (7),
-.BR xattr (7)
diff --git a/man2/move_pages.2 b/man2/move_pages.2
deleted file mode 100644
index 1b6281bd5..000000000
--- a/man2/move_pages.2
+++ /dev/null
@@ -1,253 +0,0 @@
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" This manpage is Copyright (C) 2006 Silicon Graphics, Inc.
-.\" Christoph Lameter
-.\"
-.\" FIXME Should programs normally be using move_pages() directly, or should
-.\" they rather be using interfaces in the numactl package?
-.\" (e.g., compare with recommendation in mbind(2)).
-.\" Does this page need to give advice on this topic?
-.\"
-.TH move_pages 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-move_pages \- move individual pages of a process to another node
-.SH LIBRARY
-NUMA (Non-Uniform Memory Access) policy library
-.RI ( libnuma ", " \-lnuma )
-.SH SYNOPSIS
-.nf
-.B #include <numaif.h>
-.P
-.BI "long move_pages(int " pid ", unsigned long " count ", \
-void *" pages [. count ],
-.BI " const int " nodes [. count "], int " status [. count "], \
-int " flags );
-.fi
-.SH DESCRIPTION
-.BR move_pages ()
-moves the specified
-.I pages
-of the process
-.I pid
-to the memory nodes specified by
-.IR nodes .
-The result of the move is reflected in
-.IR status .
-The
-.I flags
-indicate constraints on the pages to be moved.
-.P
-.I pid
-is the ID of the process in which pages are to be moved.
-If
-.I pid
-is 0, then
-.BR move_pages ()
-moves pages of the calling process.
-.P
-To move pages in another process requires the following privileges:
-.IP \[bu] 3
-Up to and including Linux 4.12:
-the caller must be privileged
-.RB ( CAP_SYS_NICE )
-or the real or effective user ID of the calling process must match the
-real or saved-set user ID of the target process.
-.IP \[bu]
-The older rules allowed the caller to discover various
-virtual address choices made by the kernel that could lead
-to the defeat of address-space-layout randomization
-for a process owned by the same UID as the caller,
-the rules were changed starting with Linux 4.13.
-Since Linux 4.13,
-.\" commit 197e7e521384a23b9e585178f3f11c9fa08274b9
-permission is governed by a ptrace access mode
-.B PTRACE_MODE_READ_REALCREDS
-check with respect to the target process; see
-.BR ptrace (2).
-.P
-.I count
-is the number of pages to move.
-It defines the size of the three arrays
-.IR pages ,
-.IR nodes ,
-and
-.IR status .
-.P
-.I pages
-is an array of pointers to the pages that should be moved.
-These are pointers that should be aligned to page boundaries.
-.\" FIXME Describe the result if pointers in the 'pages' array are
-.\" not aligned to page boundaries
-Addresses are specified as seen by the process specified by
-.IR pid .
-.P
-.I nodes
-is an array of integers that specify the desired location for each page.
-Each element in the array is a node number.
-.I nodes
-can also be NULL, in which case
-.BR move_pages ()
-does not move any pages but instead will return the node
-where each page currently resides, in the
-.I status
-array.
-Obtaining the status of each page may be necessary to determine
-pages that need to be moved.
-.P
-.I status
-is an array of integers that return the status of each page.
-The array contains valid values only if
-.BR move_pages ()
-did not return an error.
-Preinitialization of the array to a value
-which cannot represent a real numa node or valid error of status array
-could help to identify pages that have been migrated.
-.P
-.I flags
-specify what types of pages to move.
-.B MPOL_MF_MOVE
-means that only pages that are in exclusive use by the process
-are to be moved.
-.B MPOL_MF_MOVE_ALL
-means that pages shared between multiple processes can also be moved.
-The process must be privileged
-.RB ( CAP_SYS_NICE )
-to use
-.BR MPOL_MF_MOVE_ALL .
-.SS Page states in the status array
-The following values can be returned in each element of the
-.I status
-array.
-.TP
-.B 0..MAX_NUMNODES
-Identifies the node on which the page resides.
-.TP
-.B \-EACCES
-The page is mapped by multiple processes and can be moved only if
-.B MPOL_MF_MOVE_ALL
-is specified.
-.TP
-.B \-EBUSY
-The page is currently busy and cannot be moved.
-Try again later.
-This occurs if a page is undergoing I/O or another kernel subsystem
-is holding a reference to the page.
-.TP
-.B \-EFAULT
-This is a zero page or the memory area is not mapped by the process.
-.TP
-.B \-EIO
-Unable to write back a page.
-The page has to be written back
-in order to move it since the page is dirty and the filesystem
-does not provide a migration function that would allow the move
-of dirty pages.
-.TP
-.B \-EINVAL
-A dirty page cannot be moved.
-The filesystem does not
-provide a migration function and has no ability to write back pages.
-.TP
-.B \-ENOENT
-The page is not present.
-.TP
-.B \-ENOMEM
-Unable to allocate memory on target node.
-.SH RETURN VALUE
-On success
-.BR move_pages ()
-returns zero.
-.\" FIXME . Is the following quite true: does the wrapper in numactl
-.\" do the right thing?
-On error, it returns \-1, and sets
-.I errno
-to indicate the error.
-If positive value is returned, it is the number of
-nonmigrated pages.
-.SH ERRORS
-.TP
-.B Positive value
-The number of nonmigrated pages if they were the result of nonfatal
-reasons (since
-.\" commit a49bd4d7163707de377aee062f17befef6da891b
-Linux 4.17).
-.TP
-.B E2BIG
-Too many pages to move.
-Since Linux 2.6.29,
-.\" commit 3140a2273009c01c27d316f35ab76a37e105fdd8
-the kernel no longer generates this error.
-.TP
-.B EACCES
-.\" FIXME Clarify "current cpuset" in the description of the EACCES error.
-.\" Is that the cpuset of the caller or the target?
-One of the target nodes is not allowed by the current cpuset.
-.TP
-.B EFAULT
-Parameter array could not be accessed.
-.TP
-.B EINVAL
-Flags other than
-.B MPOL_MF_MOVE
-and
-.B MPOL_MF_MOVE_ALL
-was specified or an attempt was made to migrate pages of a kernel thread.
-.TP
-.B ENODEV
-One of the target nodes is not online.
-.TP
-.B EPERM
-The caller specified
-.B MPOL_MF_MOVE_ALL
-without sufficient privileges
-.RB ( CAP_SYS_NICE ).
-Or, the caller attempted to move pages of a process belonging
-to another user but did not have privilege to do so
-.RB ( CAP_SYS_NICE ).
-.TP
-.B ESRCH
-Process does not exist.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 2.6.18.
-.SH NOTES
-For information on library support, see
-.BR numa (7).
-.P
-Use
-.BR get_mempolicy (2)
-with the
-.B MPOL_F_MEMS_ALLOWED
-flag to obtain the set of nodes that are allowed by
-.\" FIXME Clarify "current cpuset". Is that the cpuset of the caller
-.\" or the target?
-the current cpuset.
-Note that this information is subject to change at any
-time by manual or automatic reconfiguration of the cpuset.
-.P
-Use of this function may result in pages whose location
-(node) violates the memory policy established for the
-specified addresses (See
-.BR mbind (2))
-and/or the specified process (See
-.BR set_mempolicy (2)).
-That is, memory policy does not constrain the destination
-nodes used by
-.BR move_pages ().
-.P
-The
-.I <numaif.h>
-header is not included with glibc, but requires installing
-.I libnuma\-devel
-or a similar package.
-.SH SEE ALSO
-.BR get_mempolicy (2),
-.BR mbind (2),
-.BR set_mempolicy (2),
-.BR numa (3),
-.BR numa_maps (5),
-.BR cpuset (7),
-.BR numa (7),
-.BR migratepages (8),
-.BR numastat (8)
diff --git a/man2/mprotect.2 b/man2/mprotect.2
deleted file mode 100644
index 63781f536..000000000
--- a/man2/mprotect.2
+++ /dev/null
@@ -1,363 +0,0 @@
-.\" Copyright (C) 2007 Michael Kerrisk <mtk.manpages@gmail.com>
-.\" and Copyright (C) 1995 Michael Shields <shields@tembel.org>.
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Modified 1996-10-22 by Eric S. Raymond <esr@thyrsus.com>
-.\" Modified 1997-05-31 by Andries Brouwer <aeb@cwi.nl>
-.\" Modified 2003-08-24 by Andries Brouwer <aeb@cwi.nl>
-.\" Modified 2004-08-16 by Andi Kleen <ak@muc.de>
-.\" 2007-06-02, mtk: Fairly substantial rewrites and additions, and
-.\" a much improved example program.
-.\"
-.TH mprotect 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-mprotect, pkey_mprotect \- set protection on a region of memory
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/mman.h>
-.P
-.BI "int mprotect(void " addr [. len "], size_t " len ", int " prot );
-.P
-.BR "#define _GNU_SOURCE" " /* See feature_test_macros(7) */"
-.B #include <sys/mman.h>
-.P
-.BI "int pkey_mprotect(void " addr [. len "], size_t " len ", int " prot ", int " pkey ");"
-.fi
-.SH DESCRIPTION
-.BR mprotect ()
-changes the access protections for the calling process's memory pages
-containing any part of the address range in the
-interval [\fIaddr\fP,\ \fIaddr\fP+\fIlen\fP\-1].
-.I addr
-must be aligned to a page boundary.
-.P
-If the calling process tries to access memory in a manner
-that violates the protections, then the kernel generates a
-.B SIGSEGV
-signal for the process.
-.P
-.I prot
-is a combination of the following access flags:
-.B PROT_NONE
-or a bitwise OR of the other values in the following list:
-.TP
-.B PROT_NONE
-The memory cannot be accessed at all.
-.TP
-.B PROT_READ
-The memory can be read.
-.TP
-.B PROT_WRITE
-The memory can be modified.
-.TP
-.B PROT_EXEC
-The memory can be executed.
-.TP
-.BR PROT_SEM " (since Linux 2.5.7)"
-The memory can be used for atomic operations.
-This flag was introduced as part of the
-.BR futex (2)
-implementation (in order to guarantee the ability to perform atomic
-operations required by commands such as
-.BR FUTEX_WAIT ),
-but is not currently used in on any architecture.
-.TP
-.BR PROT_SAO " (since Linux 2.6.26)"
-.\" commit aba46c5027cb59d98052231b36efcbbde9c77a1d
-.\" commit ef3d3246a0d06be622867d21af25f997aeeb105f
-The memory should have strong access ordering.
-This feature is specific to
-the PowerPC architecture
-(version 2.06 of the architecture specification adds the SAO CPU feature,
-and it is available on POWER 7 or PowerPC A2, for example).
-.P
-Additionally (since Linux 2.6.0),
-.I prot
-can have one of the following flags set:
-.TP
-.\" mm/mmap.c:
-.\" vm_flags |= calc_vm_prot_bits(prot, pkey) | calc_vm_flag_bits(flags) |
-.\" mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
-.\" And calc_vm_flag_bits converts only GROWSDOWN/DENYWRITE/LOCKED.
-.B PROT_GROWSUP
-Apply the protection mode up to the end of a mapping
-that grows upwards.
-(Such mappings are created for the stack area on
-architectures\[em]for example, HP-PARISC\[em]that
-have an upwardly growing stack.)
-.\" The VMA is one that was marked with VM_GROWSUP by the kernel
-.\" when the stack was created. Note that (unlike VM_GROWSDOWN),
-.\" there is no mmap() flag (analogous to MAP_GROWSDOWN) for
-.\" creating a VMA that is marked VM_GROWSUP.
-.TP
-.B PROT_GROWSDOWN
-Apply the protection mode down to the beginning of a mapping
-that grows downward
-(which should be a stack segment or a segment mapped with the
-.B MAP_GROWSDOWN
-flag set).
-.P
-Like
-.BR mprotect (),
-.BR pkey_mprotect ()
-changes the protection on the pages specified by
-.I addr
-and
-.IR len .
-The
-.I pkey
-argument specifies the protection key (see
-.BR pkeys (7))
-to assign to the memory.
-The protection key must be allocated with
-.BR pkey_alloc (2)
-before it is passed to
-.BR pkey_mprotect ().
-For an example of the use of this system call, see
-.BR pkeys (7).
-.SH RETURN VALUE
-On success,
-.BR mprotect ()
-and
-.BR pkey_mprotect ()
-return zero.
-On error, these system calls return \-1, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EACCES
-The memory cannot be given the specified access.
-This can happen, for example, if you
-.BR mmap (2)
-a file to which you have read-only access, then ask
-.BR mprotect ()
-to mark it
-.BR PROT_WRITE .
-.TP
-.B EINVAL
-\fIaddr\fP is not a valid pointer,
-or not a multiple of the system page size.
-.TP
-.B EINVAL
-.RB ( pkey_mprotect ())
-\fIpkey\fP has not been allocated with
-.BR pkey_alloc (2)
-.TP
-.B EINVAL
-Both
-.B PROT_GROWSUP
-and
-.B PROT_GROWSDOWN
-were specified in
-.IR prot .
-.TP
-.B EINVAL
-Invalid flags specified in
-.IR prot .
-.TP
-.B EINVAL
-(PowerPC architecture)
-.B PROT_SAO
-was specified in
-.IR prot ,
-but SAO hardware feature is not available.
-.TP
-.B ENOMEM
-Internal kernel structures could not be allocated.
-.TP
-.B ENOMEM
-Addresses in the range
-.RI [ addr ,
-.IR addr + len \-1]
-are invalid for the address space of the process,
-or specify one or more pages that are not mapped.
-(Before Linux 2.4.19, the error
-.B EFAULT
-was incorrectly produced for these cases.)
-.TP
-.B ENOMEM
-Changing the protection of a memory region would result in the total number of
-mappings with distinct attributes (e.g., read versus read/write protection)
-exceeding the allowed maximum.
-.\" I.e., the number of VMAs would exceed the 64 kB maximum
-(For example, making the protection of a range
-.B PROT_READ
-in the middle of a region currently protected as
-.B PROT_READ|PROT_WRITE
-would result in three mappings:
-two read/write mappings at each end and a read-only mapping in the middle.)
-.SH VERSIONS
-.\" SVr4 defines an additional error
-.\" code EAGAIN. The SVr4 error conditions don't map neatly onto Linux's.
-POSIX says that the behavior of
-.BR mprotect ()
-is unspecified if it is applied to a region of memory that
-was not obtained via
-.BR mmap (2).
-.P
-On Linux, it is always permissible to call
-.BR mprotect ()
-on any address in a process's address space (except for the
-kernel vsyscall area).
-In particular, it can be used
-to change existing code mappings to be writable.
-.P
-Whether
-.B PROT_EXEC
-has any effect different from
-.B PROT_READ
-depends on processor architecture, kernel version, and process state.
-If
-.B READ_IMPLIES_EXEC
-is set in the process's personality flags (see
-.BR personality (2)),
-specifying
-.B PROT_READ
-will implicitly add
-.BR PROT_EXEC .
-.P
-On some hardware architectures (e.g., i386),
-.B PROT_WRITE
-implies
-.BR PROT_READ .
-.P
-POSIX.1 says that an implementation may permit access
-other than that specified in
-.IR prot ,
-but at a minimum can allow write access only if
-.B PROT_WRITE
-has been set, and must not allow any access if
-.B PROT_NONE
-has been set.
-.P
-Applications should be careful when mixing use of
-.BR mprotect ()
-and
-.BR pkey_mprotect ().
-On x86, when
-.BR mprotect ()
-is used with
-.I prot
-set to
-.B PROT_EXEC
-a pkey may be allocated and set on the memory implicitly
-by the kernel, but only when the pkey was 0 previously.
-.P
-On systems that do not support protection keys in hardware,
-.BR pkey_mprotect ()
-may still be used, but
-.I pkey
-must be set to \-1.
-When called this way, the operation of
-.BR pkey_mprotect ()
-is equivalent to
-.BR mprotect ().
-.SH STANDARDS
-.TP
-.BR mprotect ()
-POSIX.1-2008.
-.TP
-.BR pkey_mprotect ()
-Linux.
-.SH HISTORY
-.TP
-.BR mprotect ()
-POSIX.1-2001, SVr4.
-.TP
-.BR pkey_mprotect ()
-Linux 4.9,
-glibc 2.27.
-.SH NOTES
-.SH EXAMPLES
-.\" sigaction.2 refers to this example
-The program below demonstrates the use of
-.BR mprotect ().
-The program allocates four pages of memory, makes the third
-of these pages read-only, and then executes a loop that walks upward
-through the allocated region modifying bytes.
-.P
-An example of what we might see when running the program is the
-following:
-.P
-.in +4n
-.EX
-.RB "$" " ./a.out"
-Start of region: 0x804c000
-Got SIGSEGV at address: 0x804e000
-.EE
-.in
-.SS Program source
-\&
-.\" SRC BEGIN (mprotect.c)
-.EX
-#include <malloc.h>
-#include <signal.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/mman.h>
-#include <unistd.h>
-\&
-#define handle_error(msg) \e
- do { perror(msg); exit(EXIT_FAILURE); } while (0)
-\&
-static char *buffer;
-\&
-static void
-handler(int sig, siginfo_t *si, void *unused)
-{
- /* Note: calling printf() from a signal handler is not safe
- (and should not be done in production programs), since
- printf() is not async\-signal\-safe; see signal\-safety(7).
- Nevertheless, we use printf() here as a simple way of
- showing that the handler was called. */
-\&
- printf("Got SIGSEGV at address: %p\en", si\->si_addr);
- exit(EXIT_FAILURE);
-}
-\&
-int
-main(void)
-{
- int pagesize;
- struct sigaction sa;
-\&
- sa.sa_flags = SA_SIGINFO;
- sigemptyset(&sa.sa_mask);
- sa.sa_sigaction = handler;
- if (sigaction(SIGSEGV, &sa, NULL) == \-1)
- handle_error("sigaction");
-\&
- pagesize = sysconf(_SC_PAGE_SIZE);
- if (pagesize == \-1)
- handle_error("sysconf");
-\&
- /* Allocate a buffer aligned on a page boundary;
- initial protection is PROT_READ | PROT_WRITE. */
-\&
- buffer = memalign(pagesize, 4 * pagesize);
- if (buffer == NULL)
- handle_error("memalign");
-\&
- printf("Start of region: %p\en", buffer);
-\&
- if (mprotect(buffer + pagesize * 2, pagesize,
- PROT_READ) == \-1)
- handle_error("mprotect");
-\&
- for (char *p = buffer ; ; )
- *(p++) = \[aq]a\[aq];
-\&
- printf("Loop completed\en"); /* Should never happen */
- exit(EXIT_SUCCESS);
-}
-.EE
-.\" SRC END
-.SH SEE ALSO
-.BR mmap (2),
-.BR sysconf (3),
-.BR pkeys (7)
diff --git a/man2/mpx.2 b/man2/mpx.2
deleted file mode 100644
index 5d25ea6ce..000000000
--- a/man2/mpx.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/unimplemented.2
diff --git a/man2/mq_getsetattr.2 b/man2/mq_getsetattr.2
deleted file mode 100644
index 8f08436fd..000000000
--- a/man2/mq_getsetattr.2
+++ /dev/null
@@ -1,33 +0,0 @@
-.\" Copyright (C) 2006 Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.TH mq_getsetattr 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-mq_getsetattr \- get/set message queue attributes
-.SH SYNOPSIS
-.nf
-.BR "#include <mqueue.h>" " /* Definition of " "struct mq_attr" " */"
-.BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */"
-.B #include <unistd.h>
-.P
-.BI "int syscall(SYS_mq_getsetattr, mqd_t " mqdes ,
-.BI " const struct mq_attr *" newattr ", struct mq_attr *" oldattr );
-.fi
-.SH DESCRIPTION
-Do not use this system call.
-.P
-This is the low-level system call used to implement
-.BR mq_getattr (3)
-and
-.BR mq_setattr (3).
-For an explanation of how this system call operates,
-see the description of
-.BR mq_setattr (3).
-.SH STANDARDS
-None.
-.SH NOTES
-Never call it unless you are writing a C library!
-.SH SEE ALSO
-.BR mq_getattr (3),
-.BR mq_overview (7)
diff --git a/man2/mq_notify.2 b/man2/mq_notify.2
deleted file mode 100644
index 505a45e2e..000000000
--- a/man2/mq_notify.2
+++ /dev/null
@@ -1,2 +0,0 @@
-.so man3/mq_notify.3
-.\" Because mq_notify(3) is layered on a system call of the same name
diff --git a/man2/mq_open.2 b/man2/mq_open.2
deleted file mode 100644
index ce82835e6..000000000
--- a/man2/mq_open.2
+++ /dev/null
@@ -1,2 +0,0 @@
-.so man3/mq_open.3
-.\" Because mq_open(3) is layered on a system call of the same name
diff --git a/man2/mq_timedreceive.2 b/man2/mq_timedreceive.2
deleted file mode 100644
index b4184f8c2..000000000
--- a/man2/mq_timedreceive.2
+++ /dev/null
@@ -1,2 +0,0 @@
-.so man3/mq_timedreceive.3
-.\" Because mq_timedreceive(3) is layered on a system call of the same name
diff --git a/man2/mq_timedsend.2 b/man2/mq_timedsend.2
deleted file mode 100644
index db95863ad..000000000
--- a/man2/mq_timedsend.2
+++ /dev/null
@@ -1,2 +0,0 @@
-.so man3/mq_timedsend.3
-.\" Because mq_timedsend(3) is layered on a system call of the same name
diff --git a/man2/mq_unlink.2 b/man2/mq_unlink.2
deleted file mode 100644
index c5f276826..000000000
--- a/man2/mq_unlink.2
+++ /dev/null
@@ -1,2 +0,0 @@
-.so man3/mq_unlink.3
-.\" Because mq_unlink(3) is layered on a system call of the same name
diff --git a/man2/mremap.2 b/man2/mremap.2
deleted file mode 100644
index 336aae676..000000000
--- a/man2/mremap.2
+++ /dev/null
@@ -1,357 +0,0 @@
-.\" Copyright (c) 1996 Tom Bjorkholm <tomb@mydata.se>
-.\"
-.\" SPDX-License-Identifier: GPL-2.0-or-later
-.\"
-.\" 1996-04-11 Tom Bjorkholm <tomb@mydata.se>
-.\" First version written (1.3.86)
-.\" 1996-04-12 Tom Bjorkholm <tomb@mydata.se>
-.\" Update for Linux 1.3.87 and later
-.\" 2005-10-11 mtk: Added NOTES for MREMAP_FIXED; revised EINVAL text.
-.\"
-.TH mremap 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-mremap \- remap a virtual memory address
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#define _GNU_SOURCE" " /* See feature_test_macros(7) */"
-.B #include <sys/mman.h>
-.P
-.BI "void *mremap(void " old_address [. old_size "], size_t " old_size ,
-.BI " size_t " new_size ", int " flags ", ... /* void *" new_address " */);"
-.fi
-.SH DESCRIPTION
-.BR mremap ()
-expands (or shrinks) an existing memory mapping, potentially
-moving it at the same time (controlled by the \fIflags\fP argument and
-the available virtual address space).
-.P
-\fIold_address\fP is the old address of the virtual memory block that you
-want to expand (or shrink).
-Note that \fIold_address\fP has to be page
-aligned.
-\fIold_size\fP is the old size of the
-virtual memory block.
-\fInew_size\fP is the requested size of the
-virtual memory block after the resize.
-An optional fifth argument,
-.IR new_address ,
-may be provided; see the description of
-.B MREMAP_FIXED
-below.
-.P
-If the value of \fIold_size\fP is zero, and \fIold_address\fP refers to
-a shareable mapping
-(see the description of
-.B MAP_SHARED
-in
-.BR mmap (2)),
-then
-.BR mremap ()
-will create a new mapping of the same pages.
-\fInew_size\fP
-will be the size of the new mapping and the location of the new mapping
-may be specified with \fInew_address\fP; see the description of
-.B MREMAP_FIXED
-below.
-If a new mapping is requested via this method, then the
-.B MREMAP_MAYMOVE
-flag must also be specified.
-.P
-The \fIflags\fP bit-mask argument may be 0, or include the following flags:
-.TP
-.B MREMAP_MAYMOVE
-By default, if there is not sufficient space to expand a mapping
-at its current location, then
-.BR mremap ()
-fails.
-If this flag is specified, then the kernel is permitted to
-relocate the mapping to a new virtual address, if necessary.
-If the mapping is relocated,
-then absolute pointers into the old mapping location
-become invalid (offsets relative to the starting address of
-the mapping should be employed).
-.TP
-.BR MREMAP_FIXED " (since Linux 2.3.31)"
-This flag serves a similar purpose to the
-.B MAP_FIXED
-flag of
-.BR mmap (2).
-If this flag is specified, then
-.BR mremap ()
-accepts a fifth argument,
-.IR "void\ *new_address" ,
-which specifies a page-aligned address to which the mapping must
-be moved.
-Any previous mapping at the address range specified by
-.I new_address
-and
-.I new_size
-is unmapped.
-.IP
-If
-.B MREMAP_FIXED
-is specified, then
-.B MREMAP_MAYMOVE
-must also be specified.
-.TP
-.BR MREMAP_DONTUNMAP " (since Linux 5.7)"
-.\" commit e346b3813067d4b17383f975f197a9aa28a3b077
-This flag, which must be used in conjunction with
-.BR MREMAP_MAYMOVE ,
-remaps a mapping to a new address but does not unmap the mapping at
-.IR old_address .
-.IP
-The
-.B MREMAP_DONTUNMAP
-flag can be used only with private anonymous mappings
-(see the description of
-.B MAP_PRIVATE
-and
-.B MAP_ANONYMOUS
-in
-.BR mmap (2)).
-.IP
-After completion,
-any access to the range specified by
-.I old_address
-and
-.I old_size
-will result in a page fault.
-The page fault will be handled by a
-.BR userfaultfd (2)
-handler
-if the address is in a range previously registered with
-.BR userfaultfd (2).
-Otherwise, the kernel allocates a zero-filled page to handle the fault.
-.IP
-The
-.B MREMAP_DONTUNMAP
-flag may be used to atomically move a mapping while leaving the source
-mapped.
-See NOTES for some possible applications of
-.BR MREMAP_DONTUNMAP .
-.P
-If the memory segment specified by
-.I old_address
-and
-.I old_size
-is locked (using
-.BR mlock (2)
-or similar), then this lock is maintained when the segment is
-resized and/or relocated.
-As a consequence, the amount of memory locked by the process may change.
-.SH RETURN VALUE
-On success
-.BR mremap ()
-returns a pointer to the new virtual memory area.
-On error, the value
-.B MAP_FAILED
-(that is, \fI(void\ *)\ \-1\fP) is returned,
-and \fIerrno\fP is set to indicate the error.
-.SH ERRORS
-.TP
-.B EAGAIN
-The caller tried to expand a memory segment that is locked,
-but this was not possible without exceeding the
-.B RLIMIT_MEMLOCK
-resource limit.
-.TP
-.B EFAULT
-Some address in the range
-\fIold_address\fP to \fIold_address\fP+\fIold_size\fP is an invalid
-virtual memory address for this process.
-You can also get
-.B EFAULT
-even if there exist mappings that cover the
-whole address space requested, but those mappings are of different types.
-.TP
-.B EINVAL
-An invalid argument was given.
-Possible causes are:
-.RS
-.IP \[bu] 3
-\fIold_address\fP was not
-page aligned;
-.IP \[bu]
-a value other than
-.B MREMAP_MAYMOVE
-or
-.B MREMAP_FIXED
-or
-.B MREMAP_DONTUNMAP
-was specified in
-.IR flags ;
-.IP \[bu]
-.I new_size
-was zero;
-.IP \[bu]
-.I new_size
-or
-.I new_address
-was invalid;
-.IP \[bu]
-the new address range specified by
-.I new_address
-and
-.I new_size
-overlapped the old address range specified by
-.I old_address
-and
-.IR old_size ;
-.IP \[bu]
-.B MREMAP_FIXED
-or
-.B MREMAP_DONTUNMAP
-was specified without also specifying
-.BR MREMAP_MAYMOVE ;
-.IP \[bu]
-.B MREMAP_DONTUNMAP
-was specified, but one or more pages in the range specified by
-.I old_address
-and
-.I old_size
-were not private anonymous;
-.IP \[bu]
-.B MREMAP_DONTUNMAP
-was specified and
-.I old_size
-was not equal to
-.IR new_size ;
-.IP \[bu]
-\fIold_size\fP was zero and \fIold_address\fP does not refer to a
-shareable mapping (but see BUGS);
-.IP \[bu]
-\fIold_size\fP was zero and the
-.B MREMAP_MAYMOVE
-flag was not specified.
-.RE
-.TP
-.B ENOMEM
-Not enough memory was available to complete the operation.
-Possible causes are:
-.RS
-.IP \[bu] 3
-The memory area cannot be expanded at the current virtual address, and the
-.B MREMAP_MAYMOVE
-flag is not set in \fIflags\fP.
-Or, there is not enough (virtual) memory available.
-.IP \[bu]
-.B MREMAP_DONTUNMAP
-was used causing a new mapping to be created that would exceed the
-(virtual) memory available.
-Or, it would exceed the maximum number of allowed mappings.
-.RE
-.SH STANDARDS
-Linux.
-.SH HISTORY
-.\" 4.2BSD had a (never actually implemented)
-.\" .BR mremap (2)
-.\" call with completely different semantics.
-.\" .P
-Prior to glibc 2.4, glibc did not expose the definition of
-.BR MREMAP_FIXED ,
-and the prototype for
-.BR mremap ()
-did not allow for the
-.I new_address
-argument.
-.SH NOTES
-.BR mremap ()
-changes the
-mapping between virtual addresses and memory pages.
-This can be used to implement a very efficient
-.BR realloc (3).
-.P
-In Linux, memory is divided into pages.
-A process has (one or)
-several linear virtual memory segments.
-Each virtual memory segment has one
-or more mappings to real memory pages (in the page table).
-Each virtual memory segment has its own
-protection (access rights), which may cause
-a segmentation violation
-.RB ( SIGSEGV )
-if the memory is accessed incorrectly (e.g.,
-writing to a read-only segment).
-Accessing virtual memory outside of the
-segments will also cause a segmentation violation.
-.P
-If
-.BR mremap ()
-is used to move or expand an area locked with
-.BR mlock (2)
-or equivalent, the
-.BR mremap ()
-call will make a best effort to populate the new area but will not fail
-with
-.B ENOMEM
-if the area cannot be populated.
-.\"
-.SS MREMAP_DONTUNMAP use cases
-Possible applications for
-.B MREMAP_DONTUNMAP
-include:
-.IP \[bu] 3
-Non-cooperative
-.BR userfaultfd (2):
-an application can yank out a virtual address range using
-.B MREMAP_DONTUNMAP
-and then employ a
-.BR userfaultfd (2)
-handler to handle the page faults that subsequently occur
-as other threads in the process touch pages in the yanked range.
-.IP \[bu]
-Garbage collection:
-.B MREMAP_DONTUNMAP
-can be used in conjunction with
-.BR userfaultfd (2)
-to implement garbage collection algorithms (e.g., in a Java virtual machine).
-Such an implementation can be cheaper (and simpler)
-than conventional garbage collection techniques that involve
-marking pages with protection
-.B PROT_NONE
-in conjunction with the use of a
-.B SIGSEGV
-handler to catch accesses to those pages.
-.SH BUGS
-Before Linux 4.14,
-if
-.I old_size
-was zero and the mapping referred to by
-.I old_address
-was a private mapping
-(see the description of
-.B MAP_PRIVATE
-in
-.BR mmap (2)),
-.BR mremap ()
-created a new private mapping unrelated to the original mapping.
-This behavior was unintended
-and probably unexpected in user-space applications
-(since the intention of
-.BR mremap ()
-is to create a new mapping based on the original mapping).
-Since Linux 4.14,
-.\" commit dba58d3b8c5045ad89c1c95d33d01451e3964db7
-.BR mremap ()
-fails with the error
-.B EINVAL
-in this scenario.
-.SH SEE ALSO
-.BR brk (2),
-.BR getpagesize (2),
-.BR getrlimit (2),
-.BR mlock (2),
-.BR mmap (2),
-.BR sbrk (2),
-.BR malloc (3),
-.BR realloc (3)
-.P
-Your favorite text book on operating systems
-for more information on paged memory
-(e.g., \fIModern Operating Systems\fP by Andrew S.\& Tanenbaum,
-\fIInside Linux\fP by Randolph Bentson,
-\fIThe Design of the UNIX Operating System\fP by Maurice J.\& Bach)
diff --git a/man2/msgctl.2 b/man2/msgctl.2
deleted file mode 100644
index 70f4d1cf8..000000000
--- a/man2/msgctl.2
+++ /dev/null
@@ -1,424 +0,0 @@
-'\" t
-.\" Copyright 1993 Giorgio Ciucci (giorgio@crcc.it)
-.\" and Copyright 2004, 2005 Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Modified Tue Oct 22 08:11:14 EDT 1996 by Eric S. Raymond <esr@thyrsus.com>
-.\" Modified Sun Feb 18 01:59:29 2001 by Andries E. Brouwer <aeb@cwi.nl>
-.\" Modified, 27 May 2004, Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Added notes on CAP_IPC_OWNER requirement
-.\" Modified, 17 Jun 2004, Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Added notes on CAP_SYS_ADMIN requirement for IPC_SET and IPC_RMID
-.\" Modified, 11 Nov 2004, Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Language and formatting clean-ups
-.\" Added msqid_ds and ipc_perm structure definitions
-.\" 2005-08-02, mtk: Added IPC_INFO, MSG_INFO, MSG_STAT descriptions
-.\" 2018-03-20, dbueso: Added MSG_STAT_ANY description.
-.\"
-.TH msgctl 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-msgctl \- System V message control operations
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/msg.h>
-.P
-.BI "int msgctl(int " msqid ", int " op ", struct msqid_ds *" buf );
-.fi
-.SH DESCRIPTION
-.BR msgctl ()
-performs the control operation specified by
-.I op
-on the System\ V message queue with identifier
-.IR msqid .
-.P
-The
-.I msqid_ds
-data structure is defined in \fI<sys/msg.h>\fP as follows:
-.P
-.in +4n
-.EX
-struct msqid_ds {
- struct ipc_perm msg_perm; /* Ownership and permissions */
- time_t msg_stime; /* Time of last msgsnd(2) */
- time_t msg_rtime; /* Time of last msgrcv(2) */
- time_t msg_ctime; /* Time of creation or last
- modification by msgctl() */
- unsigned long msg_cbytes; /* # of bytes in queue */
- msgqnum_t msg_qnum; /* # number of messages in queue */
- msglen_t msg_qbytes; /* Maximum # of bytes in queue */
- pid_t msg_lspid; /* PID of last msgsnd(2) */
- pid_t msg_lrpid; /* PID of last msgrcv(2) */
-};
-.EE
-.in
-.P
-The fields of the
-.I msqid_ds
-structure are as follows:
-.TP 11
-.I msg_perm
-This is an
-.I ipc_perm
-structure (see below) that specifies the access permissions on the message
-queue.
-.TP
-.I msg_stime
-Time of the last
-.BR msgsnd (2)
-system call.
-.TP
-.I msg_rtime
-Time of the last
-.BR msgrcv (2)
-system call.
-.TP
-.I msg_ctime
-Time of creation of queue or time of last
-.BR msgctl ()
-.B IPC_SET
-operation.
-.TP
-.I msg_cbytes
-Number of bytes in all messages currently on the message queue.
-This is a nonstandard Linux extension that is not specified in POSIX.
-.TP
-.I msg_qnum
-Number of messages currently on the message queue.
-.TP
-.I msg_qbytes
-Maximum number of bytes of message text allowed on the message
-queue.
-.TP
-.I msg_lspid
-ID of the process that performed the last
-.BR msgsnd (2)
-system call.
-.TP
-.I msg_lrpid
-ID of the process that performed the last
-.BR msgrcv (2)
-system call.
-.P
-The
-.I ipc_perm
-structure is defined as follows
-(the highlighted fields are settable using
-.BR IPC_SET ):
-.P
-.in +4n
-.EX
-struct ipc_perm {
- key_t __key; /* Key supplied to msgget(2) */
- uid_t \fBuid\fP; /* Effective UID of owner */
- gid_t \fBgid\fP; /* Effective GID of owner */
- uid_t cuid; /* Effective UID of creator */
- gid_t cgid; /* Effective GID of creator */
- unsigned short \fBmode\fP; /* Permissions */
- unsigned short __seq; /* Sequence number */
-};
-.EE
-.in
-.P
-The least significant 9 bits of the
-.I mode
-field of the
-.I ipc_perm
-structure define the access permissions for the message queue.
-The permission bits are as follows:
-.TS
-l l.
-0400 Read by user
-0200 Write by user
-0040 Read by group
-0020 Write by group
-0004 Read by others
-0002 Write by others
-.TE
-.P
-Bits 0100, 0010, and 0001 (the execute bits) are unused by the system.
-.P
-Valid values for
-.I op
-are:
-.TP
-.B IPC_STAT
-Copy information from the kernel data structure associated with
-.I msqid
-into the
-.I msqid_ds
-structure pointed to by
-.IR buf .
-The caller must have read permission on the message queue.
-.TP
-.B IPC_SET
-Write the values of some members of the
-.I msqid_ds
-structure pointed to by
-.I buf
-to the kernel data structure associated with this message queue,
-updating also its
-.I msg_ctime
-member.
-.IP
-The following members of the structure are updated:
-.IR msg_qbytes ,
-.IR msg_perm.uid ,
-.IR msg_perm.gid ,
-and (the least significant 9 bits of)
-.IR msg_perm.mode .
-.IP
-The effective UID of the calling process must match the owner
-.RI ( msg_perm.uid )
-or creator
-.RI ( msg_perm.cuid )
-of the message queue, or the caller must be privileged.
-Appropriate privilege (Linux: the
-.B CAP_SYS_RESOURCE
-capability) is required to raise the
-.I msg_qbytes
-value beyond the system parameter
-.BR MSGMNB .
-.TP
-.B IPC_RMID
-Immediately remove the message queue,
-awakening all waiting reader and writer processes (with an error
-return and
-.I errno
-set to
-.BR EIDRM ).
-The calling process must have appropriate privileges
-or its effective user ID must be either that of the creator or owner
-of the message queue.
-The third argument to
-.BR msgctl ()
-is ignored in this case.
-.TP
-.BR IPC_INFO " (Linux-specific)"
-Return information about system-wide message queue limits and
-parameters in the structure pointed to by
-.IR buf .
-This structure is of type
-.I msginfo
-(thus, a cast is required),
-defined in
-.I <sys/msg.h>
-if the
-.B _GNU_SOURCE
-feature test macro is defined:
-.IP
-.in +4n
-.EX
-struct msginfo {
- int msgpool; /* Size in kibibytes of buffer pool
- used to hold message data;
- unused within kernel */
- int msgmap; /* Maximum number of entries in message
- map; unused within kernel */
- int msgmax; /* Maximum number of bytes that can be
- written in a single message */
- int msgmnb; /* Maximum number of bytes that can be
- written to queue; used to initialize
- msg_qbytes during queue creation
- (msgget(2)) */
- int msgmni; /* Maximum number of message queues */
- int msgssz; /* Message segment size;
- unused within kernel */
- int msgtql; /* Maximum number of messages on all queues
- in system; unused within kernel */
- unsigned short msgseg;
- /* Maximum number of segments;
- unused within kernel */
-};
-.EE
-.in
-.IP
-The
-.IR msgmni ,
-.IR msgmax ,
-and
-.I msgmnb
-settings can be changed via
-.I /proc
-files of the same name; see
-.BR proc (5)
-for details.
-.TP
-.BR MSG_INFO " (Linux-specific)"
-Return a
-.I msginfo
-structure containing the same information as for
-.BR IPC_INFO ,
-except that the following fields are returned with information
-about system resources consumed by message queues: the
-.I msgpool
-field returns the number of message queues that currently exist
-on the system; the
-.I msgmap
-field returns the total number of messages in all queues
-on the system; and the
-.I msgtql
-field returns the total number of bytes in all messages
-in all queues on the system.
-.TP
-.BR MSG_STAT " (Linux-specific)"
-Return a
-.I msqid_ds
-structure as for
-.BR IPC_STAT .
-However, the
-.I msqid
-argument is not a queue identifier, but instead an index into
-the kernel's internal array that maintains information about
-all message queues on the system.
-.TP
-.BR MSG_STAT_ANY " (Linux-specific, since Linux 4.17)"
-Return a
-.I msqid_ds
-structure as for
-.BR MSG_STAT .
-However,
-.I msg_perm.mode
-is not checked for read access for
-.I msqid
-meaning that any user can employ this operation (just as any user may read
-.I /proc/sysvipc/msg
-to obtain the same information).
-.SH RETURN VALUE
-On success,
-.BR IPC_STAT ,
-.BR IPC_SET ,
-and
-.B IPC_RMID
-return 0.
-A successful
-.B IPC_INFO
-or
-.B MSG_INFO
-operation returns the index of the highest used entry in the
-kernel's internal array recording information about all
-message queues.
-(This information can be used with repeated
-.B MSG_STAT
-or
-.B MSG_STAT_ANY
-operations to obtain information about all queues on the system.)
-A successful
-.B MSG_STAT
-or
-.B MSG_STAT_ANY
-operation returns the identifier of the queue whose index was given in
-.IR msqid .
-.P
-On failure, \-1 is returned and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EACCES
-The argument
-.I op
-is equal to
-.B IPC_STAT
-or
-.BR MSG_STAT ,
-but the calling process does not have read permission on the message queue
-.IR msqid ,
-and does not have the
-.B CAP_IPC_OWNER
-capability in the user namespace that governs its IPC namespace.
-.TP
-.B EFAULT
-The argument
-.I op
-has the value
-.B IPC_SET
-or
-.BR IPC_STAT ,
-but the address pointed to by
-.I buf
-isn't accessible.
-.TP
-.B EIDRM
-The message queue was removed.
-.TP
-.B EINVAL
-Invalid value for
-.I op
-or
-.IR msqid .
-Or: for a
-.B MSG_STAT
-operation, the index value specified in
-.I msqid
-referred to an array slot that is currently unused.
-.TP
-.B EPERM
-The argument
-.I op
-has the value
-.B IPC_SET
-or
-.BR IPC_RMID ,
-but the effective user ID of the calling process is not the creator
-(as found in
-.IR msg_perm.cuid )
-or the owner
-(as found in
-.IR msg_perm.uid )
-of the message queue,
-and the caller is not privileged (Linux: does not have the
-.B CAP_SYS_ADMIN
-capability).
-.TP
-.B EPERM
-An attempt
-.RB ( IPC_SET )
-was made to increase
-.I msg_qbytes
-beyond the system parameter
-.BR MSGMNB ,
-but the caller is not privileged (Linux: does not have the
-.B CAP_SYS_RESOURCE
-capability).
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-POSIX.1-2001, SVr4.
-.\" SVID does not document the EIDRM error condition.
-.P
-Various fields in the \fIstruct msqid_ds\fP were
-typed as
-.I short
-under Linux 2.2
-and have become
-.I long
-under Linux 2.4.
-To take advantage of this,
-a recompilation under glibc-2.1.91 or later should suffice.
-(The kernel distinguishes old and new calls by an
-.B IPC_64
-flag in
-.IR op .)
-.SH NOTES
-The
-.BR IPC_INFO ,
-.BR MSG_STAT ,
-and
-.B MSG_INFO
-operations are used by the
-.BR ipcs (1)
-program to provide information on allocated resources.
-In the future these may modified or moved to a
-.I /proc
-filesystem interface.
-.SH SEE ALSO
-.BR msgget (2),
-.BR msgrcv (2),
-.BR msgsnd (2),
-.BR capabilities (7),
-.BR mq_overview (7),
-.BR sysvipc (7)
diff --git a/man2/msgget.2 b/man2/msgget.2
deleted file mode 100644
index b7105e7f5..000000000
--- a/man2/msgget.2
+++ /dev/null
@@ -1,217 +0,0 @@
-.\" Copyright 1993 Giorgio Ciucci <giorgio@crcc.it>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Added correction due to Nick Duffek <nsd@bbc.com>, aeb, 960426
-.\" Modified Wed Nov 6 04:00:31 1996 by Eric S. Raymond <esr@thyrsus.com>
-.\" Modified, 8 Jan 2003, Michael Kerrisk, <mtk.manpages@gmail.com>
-.\" Removed EIDRM from errors - that can't happen...
-.\" Modified, 27 May 2004, Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Added notes on capability requirements
-.\" Modified, 11 Nov 2004, Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Language and formatting clean-ups
-.\" Added notes on /proc files
-.\"
-.TH msgget 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-msgget \- get a System V message queue identifier
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/msg.h>
-.P
-.BI "int msgget(key_t " key ", int " msgflg );
-.fi
-.SH DESCRIPTION
-The
-.BR msgget ()
-system call returns the System\ V message queue identifier associated
-with the value of the
-.I key
-argument.
-It may be used either to obtain the identifier of a previously created
-message queue (when
-.I msgflg
-is zero and
-.I key
-does not have the value
-.BR IPC_PRIVATE ),
-or to create a new set.
-.P
-A new message queue is created if
-.I key
-has the value
-.B IPC_PRIVATE
-or
-.I key
-isn't
-.BR IPC_PRIVATE ,
-no message queue with the given key
-.I key
-exists, and
-.B IPC_CREAT
-is specified in
-.IR msgflg .
-.P
-If
-.I msgflg
-specifies both
-.B IPC_CREAT
-and
-.B IPC_EXCL
-and a message queue already exists for
-.IR key ,
-then
-.BR msgget ()
-fails with
-.I errno
-set to
-.BR EEXIST .
-(This is analogous to the effect of the combination
-.B O_CREAT | O_EXCL
-for
-.BR open (2).)
-.P
-Upon creation, the least significant bits of the argument
-.I msgflg
-define the permissions of the message queue.
-These permission bits have the same format and semantics
-as the permissions specified for the
-.I mode
-argument of
-.BR open (2).
-(The execute permissions are not used.)
-.P
-If a new message queue is created,
-then its associated data structure
-.I msqid_ds
-(see
-.BR msgctl (2))
-is initialized as follows:
-.IP \[bu] 3
-.I msg_perm.cuid
-and
-.I msg_perm.uid
-are set to the effective user ID of the calling process.
-.IP \[bu]
-.I msg_perm.cgid
-and
-.I msg_perm.gid
-are set to the effective group ID of the calling process.
-.IP \[bu]
-The least significant 9 bits of
-.I msg_perm.mode
-are set to the least significant 9 bits of
-.IR msgflg .
-.IP \[bu]
-.IR msg_qnum ,
-.IR msg_lspid ,
-.IR msg_lrpid ,
-.IR msg_stime ,
-and
-.I msg_rtime
-are set to 0.
-.IP \[bu]
-.I msg_ctime
-is set to the current time.
-.IP \[bu]
-.I msg_qbytes
-is set to the system limit
-.BR MSGMNB .
-.P
-If the message queue already exists the permissions are
-verified, and a check is made to see if it is marked for
-destruction.
-.SH RETURN VALUE
-On success,
-.BR msgget ()
-returns the message queue identifier (a nonnegative integer).
-On failure, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EACCES
-A message queue exists for
-.IR key ,
-but the calling process does not have permission to access the queue,
-and does not have the
-.B CAP_IPC_OWNER
-capability in the user namespace that governs its IPC namespace.
-.TP
-.B EEXIST
-.B IPC_CREAT
-and
-.B IPC_EXCL
-were specified in
-.IR msgflg ,
-but a message queue already exists for
-.IR key .
-.TP
-.B ENOENT
-No message queue exists for
-.I key
-and
-.I msgflg
-did not specify
-.BR IPC_CREAT .
-.TP
-.B ENOMEM
-A message queue has to be created but the system does not have enough
-memory for the new data structure.
-.TP
-.B ENOSPC
-A message queue has to be created but the system limit for the maximum
-number of message queues
-.RB ( MSGMNI )
-would be exceeded.
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-POSIX.1-2001, SVr4.
-.SS Linux
-Until Linux 2.3.20, Linux would return
-.B EIDRM
-for a
-.BR msgget ()
-on a message queue scheduled for deletion.
-.SH NOTES
-.B IPC_PRIVATE
-isn't a flag field but a
-.I key_t
-type.
-If this special value is used for
-.IR key ,
-the system call ignores everything but the least significant 9 bits of
-.I msgflg
-and creates a new message queue (on success).
-.P
-The following is a system limit on message queue resources affecting a
-.BR msgget ()
-call:
-.TP
-.B MSGMNI
-System-wide limit on the number of message queues.
-Before Linux 3.19,
-.\" commit 0050ee059f7fc86b1df2527aaa14ed5dc72f9973
-the default value for this limit was calculated using a formula
-based on available system memory.
-Since Linux 3.19, the default value is 32,000.
-On Linux, this limit can be read and modified via
-.IR /proc/sys/kernel/msgmni .
-.SH BUGS
-The name choice
-.B IPC_PRIVATE
-was perhaps unfortunate,
-.B IPC_NEW
-would more clearly show its function.
-.SH SEE ALSO
-.BR msgctl (2),
-.BR msgrcv (2),
-.BR msgsnd (2),
-.BR ftok (3),
-.BR capabilities (7),
-.BR mq_overview (7),
-.BR sysvipc (7)
diff --git a/man2/msgop.2 b/man2/msgop.2
deleted file mode 100644
index 6ffd76b2f..000000000
--- a/man2/msgop.2
+++ /dev/null
@@ -1,684 +0,0 @@
-.\" Copyright 1993 Giorgio Ciucci <giorgio@crcc.it>
-.\" and Copyright 2015 Bill Pemberton <wfp5p@worldbroken.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Modified Tue Oct 22 16:40:11 1996 by Eric S. Raymond <esr@thyrsus.com>
-.\" Modified Mon Jul 10 21:09:59 2000 by aeb
-.\" Modified 1 Jun 2002, Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Language clean-ups.
-.\" Enhanced and corrected information on msg_qbytes, MSGMNB and MSGMAX
-.\" Added note on restart behavior of msgsnd() and msgrcv()
-.\" Formatting clean-ups (argument and field names marked as .I
-.\" instead of .B)
-.\" Modified, 27 May 2004, Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Added notes on capability requirements
-.\" Modified, 11 Nov 2004, Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Language and formatting clean-ups
-.\" Added notes on /proc files
-.\"
-.TH MSGOP 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-msgrcv, msgsnd \- System V message queue operations
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/msg.h>
-.P
-.BI "int msgsnd(int " msqid ", const void " msgp [. msgsz "], size_t " msgsz ,
-.BI " int " msgflg );
-.P
-.BI "ssize_t msgrcv(int " msqid ", void " msgp [. msgsz "], size_t " msgsz \
-", long " msgtyp ,
-.BI " int " msgflg );
-.fi
-.SH DESCRIPTION
-The
-.BR msgsnd ()
-and
-.BR msgrcv ()
-system calls are used to send messages to,
-and receive messages from, a System\ V message queue.
-The calling process must have write permission on the message queue
-in order to send a message, and read permission to receive a message.
-.P
-The
-.I msgp
-argument is a pointer to a caller-defined structure
-of the following general form:
-.P
-.in +4n
-.EX
-struct msgbuf {
- long mtype; /* message type, must be > 0 */
- char mtext[1]; /* message data */
-};
-.EE
-.in
-.P
-The
-.I mtext
-field is an array (or other structure) whose size is specified by
-.IR msgsz ,
-a nonnegative integer value.
-Messages of zero length (i.e., no
-.I mtext
-field) are permitted.
-The
-.I mtype
-field must have a strictly positive integer value.
-This value can be
-used by the receiving process for message selection
-(see the description of
-.BR msgrcv ()
-below).
-.SS msgsnd()
-The
-.BR msgsnd ()
-system call appends a copy of the message pointed to by
-.I msgp
-to the message queue whose identifier is specified
-by
-.IR msqid .
-.P
-If sufficient space is available in the queue,
-.BR msgsnd ()
-succeeds immediately.
-The queue capacity is governed by the
-.I msg_qbytes
-field in the associated data structure for the message queue.
-During queue creation this field is initialized to
-.B MSGMNB
-bytes, but this limit can be modified using
-.BR msgctl (2).
-A message queue is considered to be full if either of the following
-conditions is true:
-.IP \[bu] 3
-Adding a new message to the queue would cause the total number of bytes
-in the queue to exceed the queue's maximum size (the
-.I msg_qbytes
-field).
-.IP \[bu]
-Adding another message to the queue would cause the total number of messages
-in the queue to exceed the queue's maximum size (the
-.I msg_qbytes
-field).
-This check is necessary to prevent an unlimited number of zero-length
-messages being placed on the queue.
-Although such messages contain no data,
-they nevertheless consume (locked) kernel memory.
-.P
-If insufficient space is available in the queue, then the default
-behavior of
-.BR msgsnd ()
-is to block until space becomes available.
-If
-.B IPC_NOWAIT
-is specified in
-.IR msgflg ,
-then the call instead fails with the error
-.BR EAGAIN .
-.P
-A blocked
-.BR msgsnd ()
-call may also fail if:
-.IP \[bu] 3
-the queue is removed,
-in which case the system call fails with
-.I errno
-set to
-.BR EIDRM ;
-or
-.IP \[bu]
-a signal is caught, in which case the system call fails
-with
-.I errno
-set to
-.BR EINTR ; see
-.BR signal (7).
-.RB ( msgsnd ()
-is never automatically restarted after being interrupted by a
-signal handler, regardless of the setting of the
-.B SA_RESTART
-flag when establishing a signal handler.)
-.P
-Upon successful completion the message queue data structure is updated
-as follows:
-.IP \[bu] 3
-.I msg_lspid
-is set to the process ID of the calling process.
-.IP \[bu]
-.I msg_qnum
-is incremented by 1.
-.IP \[bu]
-.I msg_stime
-is set to the current time.
-.SS msgrcv()
-The
-.BR msgrcv ()
-system call removes a message from the queue specified by
-.I msqid
-and places it in the buffer
-pointed to by
-.IR msgp .
-.P
-The argument
-.I msgsz
-specifies the maximum size in bytes for the member
-.I mtext
-of the structure pointed to by the
-.I msgp
-argument.
-If the message text has length greater than
-.IR msgsz ,
-then the behavior depends on whether
-.B MSG_NOERROR
-is specified in
-.IR msgflg .
-If
-.B MSG_NOERROR
-is specified, then
-the message text will be truncated (and the truncated part will be
-lost); if
-.B MSG_NOERROR
-is not specified, then
-the message isn't removed from the queue and
-the system call fails returning \-1 with
-.I errno
-set to
-.BR E2BIG .
-.P
-Unless
-.B MSG_COPY
-is specified in
-.I msgflg
-(see below),
-the
-.I msgtyp
-argument specifies the type of message requested, as follows:
-.IP \[bu] 3
-If
-.I msgtyp
-is 0,
-then the first message in the queue is read.
-.IP \[bu]
-If
-.I msgtyp
-is greater than 0,
-then the first message in the queue of type
-.I msgtyp
-is read, unless
-.B MSG_EXCEPT
-was specified in
-.IR msgflg ,
-in which case
-the first message in the queue of type not equal to
-.I msgtyp
-will be read.
-.IP \[bu]
-If
-.I msgtyp
-is less than 0,
-then the first message in the queue with the lowest type less than or
-equal to the absolute value of
-.I msgtyp
-will be read.
-.P
-The
-.I msgflg
-argument is a bit mask constructed by ORing together zero or more
-of the following flags:
-.TP
-.B IPC_NOWAIT
-Return immediately if no message of the requested type is in the queue.
-The system call fails with
-.I errno
-set to
-.BR ENOMSG .
-.TP
-.BR MSG_COPY " (since Linux 3.8)"
-.\" commit 4a674f34ba04a002244edaf891b5da7fc1473ae8
-Nondestructively fetch a copy of the message at the ordinal position
-in the queue specified by
-.I msgtyp
-(messages are considered to be numbered starting at 0).
-.IP
-This flag must be specified in conjunction with
-.BR IPC_NOWAIT ,
-with the result that, if there is no message available at the given position,
-the call fails immediately with the error
-.BR ENOMSG .
-Because they alter the meaning of
-.I msgtyp
-in orthogonal ways,
-.B MSG_COPY
-and
-.B MSG_EXCEPT
-may not both be specified in
-.IR msgflg .
-.IP
-The
-.B MSG_COPY
-flag was added for the implementation of
-the kernel checkpoint-restore facility and
-is available only if the kernel was built with the
-.B CONFIG_CHECKPOINT_RESTORE
-option.
-.TP
-.B MSG_EXCEPT
-Used with
-.I msgtyp
-greater than 0
-to read the first message in the queue with message type that differs
-from
-.IR msgtyp .
-.TP
-.B MSG_NOERROR
-To truncate the message text if longer than
-.I msgsz
-bytes.
-.P
-If no message of the requested type is available and
-.B IPC_NOWAIT
-isn't specified in
-.IR msgflg ,
-the calling process is blocked until one of the following conditions occurs:
-.IP \[bu] 3
-A message of the desired type is placed in the queue.
-.IP \[bu]
-The message queue is removed from the system.
-In this case, the system call fails with
-.I errno
-set to
-.BR EIDRM .
-.IP \[bu]
-The calling process catches a signal.
-In this case, the system call fails with
-.I errno
-set to
-.BR EINTR .
-.RB ( msgrcv ()
-is never automatically restarted after being interrupted by a
-signal handler, regardless of the setting of the
-.B SA_RESTART
-flag when establishing a signal handler.)
-.P
-Upon successful completion the message queue data structure is updated
-as follows:
-.IP
-.I msg_lrpid
-is set to the process ID of the calling process.
-.IP
-.I msg_qnum
-is decremented by 1.
-.IP
-.I msg_rtime
-is set to the current time.
-.SH RETURN VALUE
-On success,
-.BR msgsnd ()
-returns 0
-and
-.BR msgrcv ()
-returns the number of bytes actually copied into the
-.I mtext
-array.
-On failure, both functions return \-1, and set
-.I errno
-to indicate the error.
-.SH ERRORS
-.BR msgsnd ()
-can fail with the following errors:
-.TP
-.B EACCES
-The calling process does not have write permission on the message queue,
-and does not have the
-.B CAP_IPC_OWNER
-capability in the user namespace that governs its IPC namespace.
-.TP
-.B EAGAIN
-The message can't be sent due to the
-.I msg_qbytes
-limit for the queue and
-.B IPC_NOWAIT
-was specified in
-.IR msgflg .
-.TP
-.B EFAULT
-The address pointed to by
-.I msgp
-isn't accessible.
-.TP
-.B EIDRM
-The message queue was removed.
-.TP
-.B EINTR
-Sleeping on a full message queue condition, the process caught a signal.
-.TP
-.B EINVAL
-Invalid
-.I msqid
-value, or nonpositive
-.I mtype
-value, or
-invalid
-.I msgsz
-value (less than 0 or greater than the system value
-.BR MSGMAX ).
-.TP
-.B ENOMEM
-The system does not have enough memory to make a copy of the
-message pointed to by
-.IR msgp .
-.P
-.BR msgrcv ()
-can fail with the following errors:
-.TP
-.B E2BIG
-The message text length is greater than
-.I msgsz
-and
-.B MSG_NOERROR
-isn't specified in
-.IR msgflg .
-.TP
-.B EACCES
-The calling process does not have read permission on the message queue,
-and does not have the
-.B CAP_IPC_OWNER
-capability in the user namespace that governs its IPC namespace.
-.TP
-.B EFAULT
-The address pointed to by
-.I msgp
-isn't accessible.
-.TP
-.B EIDRM
-While the process was sleeping to receive a message,
-the message queue was removed.
-.TP
-.B EINTR
-While the process was sleeping to receive a message,
-the process caught a signal; see
-.BR signal (7).
-.TP
-.B EINVAL
-.I msqid
-was invalid, or
-.I msgsz
-was less than 0.
-.TP
-.BR EINVAL " (since Linux 3.14)"
-.I msgflg
-specified
-.BR MSG_COPY ,
-but not
-.BR IPC_NOWAIT .
-.TP
-.BR EINVAL " (since Linux 3.14)"
-.I msgflg
-specified both
-.B MSG_COPY
-and
-.BR MSG_EXCEPT .
-.TP
-.B ENOMSG
-.B IPC_NOWAIT
-was specified in
-.I msgflg
-and no message of the requested type existed on the message queue.
-.TP
-.B ENOMSG
-.B IPC_NOWAIT
-and
-.B MSG_COPY
-were specified in
-.I msgflg
-and the queue contains less than
-.I msgtyp
-messages.
-.TP
-.BR ENOSYS " (since Linux 3.8)"
-Both
-.B MSG_COPY
-and
-.B IPC_NOWAIT
-were specified in
-.IR msgflg ,
-and this kernel was configured without
-.BR CONFIG_CHECKPOINT_RESTORE .
-.SH STANDARDS
-POSIX.1-2008.
-.P
-The
-.B MSG_EXCEPT
-and
-.B MSG_COPY
-flags are Linux-specific;
-their definitions can be obtained by defining the
-.B _GNU_SOURCE
-.\" MSG_COPY since glibc 2.18
-feature test macro.
-.SH HISTORY
-POSIX.1-2001, SVr4.
-.P
-The
-.I msgp
-argument is declared as \fIstruct msgbuf\ *\fP in
-glibc 2.0 and 2.1.
-It is declared as \fIvoid\ *\fP
-in glibc 2.2 and later, as required by SUSv2 and SUSv3.
-.SH NOTES
-The following limits on message queue resources affect the
-.BR msgsnd ()
-call:
-.TP
-.B MSGMAX
-Maximum size of a message text, in bytes (default value: 8192 bytes).
-On Linux, this limit can be read and modified via
-.IR /proc/sys/kernel/msgmax .
-.TP
-.B MSGMNB
-Maximum number of bytes that can be held in a message queue
-(default value: 16384 bytes).
-On Linux, this limit can be read and modified via
-.IR /proc/sys/kernel/msgmnb .
-A privileged process
-(Linux: a process with the
-.B CAP_SYS_RESOURCE
-capability)
-can increase the size of a message queue beyond
-.B MSGMNB
-using the
-.BR msgctl (2)
-.B IPC_SET
-operation.
-.P
-The implementation has no intrinsic system-wide limits on the
-number of message headers
-.RB ( MSGTQL )
-and the number of bytes in the message pool
-.RB ( MSGPOOL ).
-.SH BUGS
-In Linux 3.13 and earlier,
-if
-.BR msgrcv ()
-was called with the
-.B MSG_COPY
-flag, but without
-.BR IPC_NOWAIT ,
-and the message queue contained less than
-.I msgtyp
-messages, then the call would block until the next message is written
-to the queue.
-.\" http://marc.info/?l=linux-kernel&m=139048542803605&w=2
-At that point, the call would return a copy of the message,
-.I regardless
-of whether that message was at the ordinal position
-.IR msgtyp .
-This bug is fixed
-.\" commit 4f87dac386cc43d5525da7a939d4b4e7edbea22c
-in Linux 3.14.
-.P
-Specifying both
-.B MSG_COPY
-and
-.B MSC_EXCEPT
-in
-.I msgflg
-is a logical error (since these flags impose different interpretations on
-.IR msgtyp ).
-In Linux 3.13 and earlier,
-.\" http://marc.info/?l=linux-kernel&m=139048542803605&w=2
-this error was not diagnosed by
-.BR msgrcv ().
-This bug is fixed
-.\" commit 4f87dac386cc43d5525da7a939d4b4e7edbea22c
-in Linux 3.14.
-.SH EXAMPLES
-The program below demonstrates the use of
-.BR msgsnd ()
-and
-.BR msgrcv ().
-.P
-The example program is first run with the \fB\-s\fP option to send a
-message and then run again with the \fB\-r\fP option to receive a
-message.
-.P
-The following shell session shows a sample run of the program:
-.P
-.in +4n
-.EX
-.RB "$" " ./a.out \-s"
-sent: a message at Wed Mar 4 16:25:45 2015
-.P
-.RB "$" " ./a.out \-r"
-message received: a message at Wed Mar 4 16:25:45 2015
-.EE
-.in
-.SS Program source
-\&
-.\" SRC BEGIN (msgop.c)
-.EX
-#include <errno.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/ipc.h>
-#include <sys/msg.h>
-#include <time.h>
-#include <unistd.h>
-\&
-struct msgbuf {
- long mtype;
- char mtext[80];
-};
-\&
-static void
-usage(char *prog_name, char *msg)
-{
- if (msg != NULL)
- fputs(msg, stderr);
-\&
- fprintf(stderr, "Usage: %s [options]\en", prog_name);
- fprintf(stderr, "Options are:\en");
- fprintf(stderr, "\-s send message using msgsnd()\en");
- fprintf(stderr, "\-r read message using msgrcv()\en");
- fprintf(stderr, "\-t message type (default is 1)\en");
- fprintf(stderr, "\-k message queue key (default is 1234)\en");
- exit(EXIT_FAILURE);
-}
-\&
-static void
-send_msg(int qid, int msgtype)
-{
- time_t t;
- struct msgbuf msg;
-\&
- msg.mtype = msgtype;
-\&
- time(&t);
- snprintf(msg.mtext, sizeof(msg.mtext), "a message at %s",
- ctime(&t));
-\&
- if (msgsnd(qid, &msg, sizeof(msg.mtext),
- IPC_NOWAIT) == \-1)
- {
- perror("msgsnd error");
- exit(EXIT_FAILURE);
- }
- printf("sent: %s\en", msg.mtext);
-}
-\&
-static void
-get_msg(int qid, int msgtype)
-{
- struct msgbuf msg;
-\&
- if (msgrcv(qid, &msg, sizeof(msg.mtext), msgtype,
- MSG_NOERROR | IPC_NOWAIT) == \-1) {
- if (errno != ENOMSG) {
- perror("msgrcv");
- exit(EXIT_FAILURE);
- }
- printf("No message available for msgrcv()\en");
- } else {
- printf("message received: %s\en", msg.mtext);
- }
-}
-\&
-int
-main(int argc, char *argv[])
-{
- int qid, opt;
- int mode = 0; /* 1 = send, 2 = receive */
- int msgtype = 1;
- int msgkey = 1234;
-\&
- while ((opt = getopt(argc, argv, "srt:k:")) != \-1) {
- switch (opt) {
- case \[aq]s\[aq]:
- mode = 1;
- break;
- case \[aq]r\[aq]:
- mode = 2;
- break;
- case \[aq]t\[aq]:
- msgtype = atoi(optarg);
- if (msgtype <= 0)
- usage(argv[0], "\-t option must be greater than 0\en");
- break;
- case \[aq]k\[aq]:
- msgkey = atoi(optarg);
- break;
- default:
- usage(argv[0], "Unrecognized option\en");
- }
- }
-\&
- if (mode == 0)
- usage(argv[0], "must use either \-s or \-r option\en");
-\&
- qid = msgget(msgkey, IPC_CREAT | 0666);
-\&
- if (qid == \-1) {
- perror("msgget");
- exit(EXIT_FAILURE);
- }
-\&
- if (mode == 2)
- get_msg(qid, msgtype);
- else
- send_msg(qid, msgtype);
-\&
- exit(EXIT_SUCCESS);
-}
-.EE
-.\" SRC END
-.SH SEE ALSO
-.BR msgctl (2),
-.BR msgget (2),
-.BR capabilities (7),
-.BR mq_overview (7),
-.BR sysvipc (7)
diff --git a/man2/msgrcv.2 b/man2/msgrcv.2
deleted file mode 100644
index b34869edf..000000000
--- a/man2/msgrcv.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/msgop.2
diff --git a/man2/msgsnd.2 b/man2/msgsnd.2
deleted file mode 100644
index b34869edf..000000000
--- a/man2/msgsnd.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/msgop.2
diff --git a/man2/msync.2 b/man2/msync.2
deleted file mode 100644
index f46b0ff90..000000000
--- a/man2/msync.2
+++ /dev/null
@@ -1,140 +0,0 @@
-.\" Copyright (C) 1996 Andries Brouwer (aeb@cwi.nl)
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.TH msync 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-msync \- synchronize a file with a memory map
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/mman.h>
-.P
-.BI "int msync(void " addr [. length "], size_t " length ", int " flags );
-.fi
-.SH DESCRIPTION
-.BR msync ()
-flushes changes made to the in-core copy of a file that was mapped
-into memory using
-.BR mmap (2)
-back to the filesystem.
-Without use of this call,
-there is no guarantee that changes are written back before
-.BR munmap (2)
-is called.
-To be more precise, the part of the file that
-corresponds to the memory area starting at
-.I addr
-and having length
-.I length
-is updated.
-.P
-The
-.I flags
-argument should specify exactly one of
-.B MS_ASYNC
-and
-.BR MS_SYNC ,
-and may additionally include the
-.B MS_INVALIDATE
-bit.
-These bits have the following meanings:
-.TP
-.B MS_ASYNC
-Specifies that an update be scheduled, but the call returns immediately.
-.TP
-.B MS_SYNC
-Requests an update and waits for it to complete.
-.TP
-.B MS_INVALIDATE
-.\" Since Linux 2.4, this seems to be a no-op (other than the
-.\" EBUSY check for VM_LOCKED).
-Asks to invalidate other mappings of the same file
-(so that they can be updated with the fresh values just written).
-.SH RETURN VALUE
-On success, zero is returned.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EBUSY
-.B MS_INVALIDATE
-was specified in
-.IR flags ,
-and a memory lock exists for the specified address range.
-.TP
-.B EINVAL
-.I addr
-is not a multiple of PAGESIZE; or any bit other than
-.BR MS_ASYNC " | " MS_INVALIDATE " | " MS_SYNC
-is set in
-.IR flags ;
-or both
-.B MS_SYNC
-and
-.B MS_ASYNC
-are set in
-.IR flags .
-.TP
-.B ENOMEM
-The indicated memory (or part of it) was not mapped.
-.SH VERSIONS
-According to POSIX, either
-.B MS_SYNC
-or
-.B MS_ASYNC
-must be specified in
-.IR flags ,
-and indeed failure to include one of these flags will cause
-.BR msync ()
-to fail on some systems.
-However, Linux permits a call to
-.BR msync ()
-that specifies neither of these flags,
-with semantics that are (currently) equivalent to specifying
-.BR MS_ASYNC .
-(Since Linux 2.6.19,
-.\" commit 204ec841fbea3e5138168edbc3a76d46747cc987
-.B MS_ASYNC
-is in fact a no-op, since the kernel properly tracks dirty
-pages and flushes them to storage as necessary.)
-Notwithstanding the Linux behavior,
-portable, future-proof applications should ensure that they specify either
-.B MS_SYNC
-or
-.B MS_ASYNC
-in
-.IR flags .
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-POSIX.1-2001.
-.P
-This call was introduced in Linux 1.3.21, and then used
-.B EFAULT
-instead of
-.BR ENOMEM .
-In Linux 2.4.19, this was changed to the POSIX value
-.BR ENOMEM .
-.P
-On POSIX systems on which
-.BR msync ()
-is available, both
-.B _POSIX_MAPPED_FILES
-and
-.B _POSIX_SYNCHRONIZED_IO
-are defined in
-.I <unistd.h>
-to a value greater than 0.
-(See also
-.BR sysconf (3).)
-.\" POSIX.1-2001: It shall be defined to -1 or 0 or 200112L.
-.\" -1: unavailable, 0: ask using sysconf().
-.\" glibc defines them to 1.
-.SH SEE ALSO
-.BR mmap (2)
-.P
-B.O. Gallmeister, POSIX.4, O'Reilly, pp. 128\[en]129 and 389\[en]391.
diff --git a/man2/munlock.2 b/man2/munlock.2
deleted file mode 100644
index 5e5b3c741..000000000
--- a/man2/munlock.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/mlock.2
diff --git a/man2/munlockall.2 b/man2/munlockall.2
deleted file mode 100644
index 5e5b3c741..000000000
--- a/man2/munlockall.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/mlock.2
diff --git a/man2/munmap.2 b/man2/munmap.2
deleted file mode 100644
index 8902d1b35..000000000
--- a/man2/munmap.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/mmap.2
diff --git a/man2/name_to_handle_at.2 b/man2/name_to_handle_at.2
deleted file mode 100644
index 090521c86..000000000
--- a/man2/name_to_handle_at.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/open_by_handle_at.2
diff --git a/man2/nanosleep.2 b/man2/nanosleep.2
deleted file mode 100644
index e7132ee32..000000000
--- a/man2/nanosleep.2
+++ /dev/null
@@ -1,221 +0,0 @@
-.\" Copyright (C) Markus Kuhn, 1996
-.\" and Copyright (C) Linux Foundation, 2008, written by Michael Kerrisk
-.\" <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: GPL-2.0-or-later
-.\"
-.\" 1996-04-10 Markus Kuhn <mskuhn@cip.informatik.uni-erlangen.de>
-.\" First version written
-.\" Modified, 2004-10-24, aeb
-.\" 2008-06-24, mtk
-.\" Minor rewrites of some parts.
-.\" NOTES: describe case where clock_nanosleep() can be preferable.
-.\" NOTES: describe CLOCK_REALTIME versus CLOCK_NANOSLEEP
-.\" Replace crufty discussion of HZ with a pointer to time(7).
-.TH nanosleep 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-nanosleep \- high-resolution sleep
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <time.h>
-.P
-.BI "int nanosleep(const struct timespec *" duration ,
-.BI " struct timespec *_Nullable " rem );
-.fi
-.P
-.RS -4
-Feature Test Macro Requirements for glibc (see
-.BR feature_test_macros (7)):
-.RE
-.P
-.BR nanosleep ():
-.nf
- _POSIX_C_SOURCE >= 199309L
-.fi
-.SH DESCRIPTION
-.BR nanosleep ()
-suspends the execution of the calling thread
-until either at least the time specified in
-.I *duration
-has elapsed, or the delivery of a signal
-that triggers the invocation of a handler in the calling thread or
-that terminates the process.
-.P
-If the call is interrupted by a signal handler,
-.BR nanosleep ()
-returns \-1, sets
-.I errno
-to
-.BR EINTR ,
-and writes the remaining time into the structure pointed to by
-.I rem
-unless
-.I rem
-is NULL.
-The value of
-.I *rem
-can then be used to call
-.BR nanosleep ()
-again and complete the specified pause (but see NOTES).
-.P
-The
-.BR timespec (3)
-structure
-is used to specify intervals of time with nanosecond precision.
-.P
-The value of the nanoseconds field must be in the range [0, 999999999].
-.P
-Compared to
-.BR sleep (3)
-and
-.BR usleep (3),
-.BR nanosleep ()
-has the following advantages:
-it provides a higher resolution for specifying the sleep interval;
-POSIX.1 explicitly specifies that it
-does not interact with signals;
-and it makes the task of resuming a sleep that has been
-interrupted by a signal handler easier.
-.SH RETURN VALUE
-On successfully sleeping for the requested duration,
-.BR nanosleep ()
-returns 0.
-If the call is interrupted by a signal handler or encounters an error,
-then it returns \-1, with
-.I errno
-set to indicate the error.
-.SH ERRORS
-.TP
-.B EFAULT
-Problem with copying information from user space.
-.TP
-.B EINTR
-The pause has been interrupted by a signal that was
-delivered to the thread (see
-.BR signal (7)).
-The remaining sleep time has been written
-into
-.I *rem
-so that the thread can easily call
-.BR nanosleep ()
-again and continue with the pause.
-.TP
-.B EINVAL
-The value in the
-.I tv_nsec
-field was not in the range [0, 999999999] or
-.I tv_sec
-was negative.
-.SH VERSIONS
-POSIX.1 specifies that
-.BR nanosleep ()
-should measure time against the
-.B CLOCK_REALTIME
-clock.
-However, Linux measures the time using the
-.B CLOCK_MONOTONIC
-clock.
-.\" See also http://thread.gmane.org/gmane.linux.kernel/696854/
-.\" Subject: nanosleep() uses CLOCK_MONOTONIC, should be CLOCK_REALTIME?
-.\" Date: 2008-06-22 07:35:41 GMT
-This probably does not matter, since the POSIX.1 specification for
-.BR clock_settime (2)
-says that discontinuous changes in
-.B CLOCK_REALTIME
-should not affect
-.BR nanosleep ():
-.RS
-.P
-Setting the value of the
-.B CLOCK_REALTIME
-clock via
-.BR clock_settime (2)
-shall
-have no effect on threads that are blocked waiting for a relative time
-service based upon this clock, including the
-.BR nanosleep ()
-function; ...
-Consequently,
-these time services shall expire when the requested duration elapses,
-independently of the new or old value of the clock.
-.RE
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-POSIX.1-2001.
-.P
-In order to support applications requiring much more precise pauses
-(e.g., in order to control some time-critical hardware),
-.BR nanosleep ()
-would handle pauses of up to 2 milliseconds by busy waiting with microsecond
-precision when called from a thread scheduled under a real-time policy
-like
-.B SCHED_FIFO
-or
-.BR SCHED_RR .
-This special extension was removed in Linux 2.5.39,
-and is thus not available in Linux 2.6.0 and later kernels.
-.SH NOTES
-If the
-.I duration
-is not an exact multiple of the granularity underlying clock (see
-.BR time (7)),
-then the interval will be rounded up to the next multiple.
-Furthermore, after the sleep completes, there may still be a delay before
-the CPU becomes free to once again execute the calling thread.
-.P
-The fact that
-.BR nanosleep ()
-sleeps for a relative interval can be problematic if the call
-is repeatedly restarted after being interrupted by signals,
-since the time between the interruptions and restarts of the call
-will lead to drift in the time when the sleep finally completes.
-This problem can be avoided by using
-.BR clock_nanosleep (2)
-with an absolute time value.
-.SH BUGS
-If a program that catches signals and uses
-.BR nanosleep ()
-receives signals at a very high rate,
-then scheduling delays and rounding errors in the kernel's
-calculation of the sleep interval and the returned
-.I remain
-value mean that the
-.I remain
-value may steadily
-.I increase
-on successive restarts of the
-.BR nanosleep ()
-call.
-To avoid such problems, use
-.BR clock_nanosleep (2)
-with the
-.B TIMER_ABSTIME
-flag to sleep to an absolute deadline.
-.P
-In Linux 2.4, if
-.BR nanosleep ()
-is stopped by a signal (e.g.,
-.BR SIGTSTP ),
-then the call fails with the error
-.B EINTR
-after the thread is resumed by a
-.B SIGCONT
-signal.
-If the system call is subsequently restarted,
-then the time that the thread spent in the stopped state is
-.I not
-counted against the sleep interval.
-This problem is fixed in Linux 2.6.0 and later kernels.
-.SH SEE ALSO
-.BR clock_nanosleep (2),
-.BR restart_syscall (2),
-.BR sched_setscheduler (2),
-.BR timer_create (2),
-.BR sleep (3),
-.BR timespec (3),
-.BR usleep (3),
-.BR time (7)
diff --git a/man2/newfstatat.2 b/man2/newfstatat.2
deleted file mode 100644
index 7791269e6..000000000
--- a/man2/newfstatat.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/fstatat.2
diff --git a/man2/nfsservctl.2 b/man2/nfsservctl.2
deleted file mode 100644
index 462fda2ac..000000000
--- a/man2/nfsservctl.2
+++ /dev/null
@@ -1,70 +0,0 @@
-.\" %%%LICENSE_START(PUBLIC_DOMAIN)
-.\" This text is in the public domain.
-.\" %%%LICENSE_END
-.\"
-.TH nfsservctl 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-nfsservctl \- syscall interface to kernel nfs daemon
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <linux/nfsd/syscall.h>
-.P
-.BI "long nfsservctl(int " cmd ", struct nfsctl_arg *" argp ,
-.BI " union nfsctl_res *" resp );
-.fi
-.SH DESCRIPTION
-.IR Note :
-Since Linux 3.1, this system call no longer exists.
-It has been replaced by a set of files in the
-.I nfsd
-filesystem; see
-.BR nfsd (7).
-.P
-.in +4n
-.EX
-/*
- * These are the commands understood by nfsctl().
- */
-#define NFSCTL_SVC 0 /* This is a server process. */
-#define NFSCTL_ADDCLIENT 1 /* Add an NFS client. */
-#define NFSCTL_DELCLIENT 2 /* Remove an NFS client. */
-#define NFSCTL_EXPORT 3 /* Export a filesystem. */
-#define NFSCTL_UNEXPORT 4 /* Unexport a filesystem. */
-#define NFSCTL_UGIDUPDATE 5 /* Update a client\[aq]s UID/GID map
- (only in Linux 2.4.x and earlier). */
-#define NFSCTL_GETFH 6 /* Get a file handle (used by mountd(8))
- (only in Linux 2.4.x and earlier). */
-\&
-struct nfsctl_arg {
- int ca_version; /* safeguard */
- union {
- struct nfsctl_svc u_svc;
- struct nfsctl_client u_client;
- struct nfsctl_export u_export;
- struct nfsctl_uidmap u_umap;
- struct nfsctl_fhparm u_getfh;
- unsigned int u_debug;
- } u;
-}
-\&
-union nfsctl_res {
- struct knfs_fh cr_getfh;
- unsigned int cr_debug;
-};
-.EE
-.in
-.SH RETURN VALUE
-On success, zero is returned.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Removed in Linux 3.1.
-Removed in glibc 2.28.
-.SH SEE ALSO
-.BR nfsd (7)
diff --git a/man2/nice.2 b/man2/nice.2
deleted file mode 100644
index 20d8d8ec1..000000000
--- a/man2/nice.2
+++ /dev/null
@@ -1,118 +0,0 @@
-.\" Copyright (c) 1992 Drew Eckhardt <drew@cs.colorado.edu>, March 28, 1992
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Modified by Michael Haardt <michael@moria.de>
-.\" Modified 1993-07-24 by Rik Faith <faith@cs.unc.edu>
-.\" Modified 1996-11-04 by Eric S. Raymond <esr@thyrsus.com>
-.\" Modified 2001-06-04 by aeb
-.\" Modified 2004-05-27 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.TH nice 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-nice \- change process priority
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <unistd.h>
-.P
-.BI "int nice(int " inc );
-.fi
-.P
-.RS -4
-Feature Test Macro Requirements for glibc (see
-.BR feature_test_macros (7)):
-.RE
-.P
-.BR nice ():
-.nf
- _XOPEN_SOURCE
- || /* Since glibc 2.19: */ _DEFAULT_SOURCE
- || /* glibc <= 2.19: */ _BSD_SOURCE || _SVID_SOURCE
-.fi
-.SH DESCRIPTION
-.BR nice ()
-adds
-.I inc
-to the nice value for the calling thread.
-(A higher nice value means a lower priority.)
-.P
-The range of the nice value is +19 (low priority) to \-20 (high priority).
-Attempts to set a nice value outside the range are clamped to the range.
-.P
-Traditionally, only a privileged process could lower the nice value
-(i.e., set a higher priority).
-However, since Linux 2.6.12, an unprivileged process can decrease
-the nice value of a target process that has a suitable
-.B RLIMIT_NICE
-soft limit; see
-.BR getrlimit (2)
-for details.
-.SH RETURN VALUE
-On success, the new nice value is returned (but see NOTES below).
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.P
-A successful call can legitimately return \-1.
-To detect an error, set
-.I errno
-to 0 before the call, and check whether it is nonzero after
-.BR nice ()
-returns \-1.
-.SH ERRORS
-.TP
-.B EPERM
-The calling process attempted to increase its priority by
-supplying a negative
-.I inc
-but has insufficient privileges.
-Under Linux, the
-.B CAP_SYS_NICE
-capability is required.
-(But see the discussion of the
-.B RLIMIT_NICE
-resource limit in
-.BR setrlimit (2).)
-.SH VERSIONS
-.SS C library/kernel differences
-POSIX.1 specifies that
-.BR nice ()
-should return the new nice value.
-However, the raw Linux system call returns 0 on success.
-Likewise, the
-.BR nice ()
-wrapper function provided in glibc 2.2.3 and earlier returns 0 on success.
-.P
-Since glibc 2.2.4, the
-.BR nice ()
-wrapper function provided by glibc provides conformance to POSIX.1 by calling
-.BR getpriority (2)
-to obtain the new nice value, which is then returned to the caller.
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-POSIX.1-2001, SVr4, 4.3BSD.
-.\" SVr4 documents an additional
-.\" .B EINVAL
-.\" error code.
-.SH NOTES
-For further details on the nice value, see
-.BR sched (7).
-.P
-.IR Note :
-the addition of the "autogroup" feature in Linux 2.6.38 means that
-the nice value no longer has its traditional effect in many circumstances.
-For details, see
-.BR sched (7).
-.SH SEE ALSO
-.BR nice (1),
-.BR renice (1),
-.BR fork (2),
-.BR getpriority (2),
-.BR getrlimit (2),
-.BR setpriority (2),
-.BR capabilities (7),
-.BR sched (7)
diff --git a/man2/oldfstat.2 b/man2/oldfstat.2
deleted file mode 100644
index b1a86c195..000000000
--- a/man2/oldfstat.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/stat.2
diff --git a/man2/oldlstat.2 b/man2/oldlstat.2
deleted file mode 100644
index b1a86c195..000000000
--- a/man2/oldlstat.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/stat.2
diff --git a/man2/oldolduname.2 b/man2/oldolduname.2
deleted file mode 100644
index 450f7b1ca..000000000
--- a/man2/oldolduname.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/uname.2
diff --git a/man2/oldstat.2 b/man2/oldstat.2
deleted file mode 100644
index b1a86c195..000000000
--- a/man2/oldstat.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/stat.2
diff --git a/man2/olduname.2 b/man2/olduname.2
deleted file mode 100644
index 450f7b1ca..000000000
--- a/man2/olduname.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/uname.2
diff --git a/man2/open.2 b/man2/open.2
deleted file mode 100644
index 1e98a1df4..000000000
--- a/man2/open.2
+++ /dev/null
@@ -1,1941 +0,0 @@
-.\" This manpage is Copyright (C) 1992 Drew Eckhardt;
-.\" and Copyright (C) 1993 Michael Haardt, Ian Jackson.
-.\" and Copyright (C) 2008 Greg Banks
-.\" and Copyright (C) 2006, 2008, 2013, 2014 Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Modified 1993-07-21 by Rik Faith <faith@cs.unc.edu>
-.\" Modified 1994-08-21 by Michael Haardt
-.\" Modified 1996-04-13 by Andries Brouwer <aeb@cwi.nl>
-.\" Modified 1996-05-13 by Thomas Koenig
-.\" Modified 1996-12-20 by Michael Haardt
-.\" Modified 1999-02-19 by Andries Brouwer <aeb@cwi.nl>
-.\" Modified 1998-11-28 by Joseph S. Myers <jsm28@hermes.cam.ac.uk>
-.\" Modified 1999-06-03 by Michael Haardt
-.\" Modified 2002-05-07 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Modified 2004-06-23 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\" 2004-12-08, mtk, reordered flags list alphabetically
-.\" 2004-12-08, Martin Pool <mbp@sourcefrog.net> (& mtk), added O_NOATIME
-.\" 2007-09-18, mtk, Added description of O_CLOEXEC + other minor edits
-.\" 2008-01-03, mtk, with input from Trond Myklebust
-.\" <trond.myklebust@fys.uio.no> and Timo Sirainen <tss@iki.fi>
-.\" Rewrite description of O_EXCL.
-.\" 2008-01-11, Greg Banks <gnb@melbourne.sgi.com>: add more detail
-.\" on O_DIRECT.
-.\" 2008-02-26, Michael Haardt: Reorganized text for O_CREAT and mode
-.\"
-.\" FIXME . Apr 08: The next POSIX revision has O_EXEC, O_SEARCH, and
-.\" O_TTYINIT. Eventually these may need to be documented. --mtk
-.\"
-.TH open 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-open, openat, creat \- open and possibly create a file
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <fcntl.h>
-.P
-.BI "int open(const char *" pathname ", int " flags ", ..."
-.BI " \fR/*\fP mode_t " mode " \fR*/\fP );"
-.P
-.BI "int creat(const char *" pathname ", mode_t " mode );
-.P
-.BI "int openat(int " dirfd ", const char *" pathname ", int " flags ", ..."
-.BI " \fR/*\fP mode_t " mode " \fR*/\fP );"
-.P
-/* Documented separately, in \c
-.BR openat2 (2):\c
-\& */
-.BI "int openat2(int " dirfd ", const char *" pathname ,
-.BI " const struct open_how *" how ", size_t " size );
-.fi
-.P
-.RS -4
-Feature Test Macro Requirements for glibc (see
-.BR feature_test_macros (7)):
-.RE
-.P
-.BR openat ():
-.nf
- Since glibc 2.10:
- _POSIX_C_SOURCE >= 200809L
- Before glibc 2.10:
- _ATFILE_SOURCE
-.fi
-.SH DESCRIPTION
-The
-.BR open ()
-system call opens the file specified by
-.IR pathname .
-If the specified file does not exist,
-it may optionally (if
-.B O_CREAT
-is specified in
-.IR flags )
-be created by
-.BR open ().
-.P
-The return value of
-.BR open ()
-is a file descriptor, a small, nonnegative integer that is an index
-to an entry in the process's table of open file descriptors.
-The file descriptor is used
-in subsequent system calls
-(\c
-.BR read (2),
-.BR write (2),
-.BR lseek (2),
-.BR fcntl (2),
-etc.)
-to refer to the open file.
-The file descriptor returned by a successful call will be
-the lowest-numbered file descriptor not currently open for the process.
-.P
-By default, the new file descriptor is set to remain open across an
-.BR execve (2)
-(i.e., the
-.B FD_CLOEXEC
-file descriptor flag described in
-.BR fcntl (2)
-is initially disabled); the
-.B O_CLOEXEC
-flag, described below, can be used to change this default.
-The file offset is set to the beginning of the file (see
-.BR lseek (2)).
-.P
-A call to
-.BR open ()
-creates a new
-.IR "open file description" ,
-an entry in the system-wide table of open files.
-The open file description records the file offset and the file status flags
-(see below).
-A file descriptor is a reference to an open file description;
-this reference is unaffected if
-.I pathname
-is subsequently removed or modified to refer to a different file.
-For further details on open file descriptions, see NOTES.
-.P
-The argument
-.I flags
-must include one of the following
-.IR "access modes" :
-.BR O_RDONLY ", " O_WRONLY ", or " O_RDWR .
-These request opening the file read-only, write-only, or read/write,
-respectively.
-.P
-In addition, zero or more file creation flags and file status flags
-can be
-bitwise ORed
-in
-.IR flags .
-The
-.I file creation flags
-are
-.BR O_CLOEXEC ,
-.BR O_CREAT ,
-.BR O_DIRECTORY ,
-.BR O_EXCL ,
-.BR O_NOCTTY ,
-.BR O_NOFOLLOW ,
-.BR O_TMPFILE ,
-and
-.BR O_TRUNC .
-The
-.I file status flags
-are all of the remaining flags listed below.
-.\" SUSv4 divides the flags into:
-.\" * Access mode
-.\" * File creation
-.\" * File status
-.\" * Other (O_CLOEXEC, O_DIRECTORY, O_NOFOLLOW)
-.\" though it's not clear what the difference between "other" and
-.\" "File creation" flags is. I raised an Aardvark to see if this
-.\" can be clarified in SUSv4; 10 Oct 2008.
-.\" http://thread.gmane.org/gmane.comp.standards.posix.austin.general/64/focus=67
-.\" TC1 (balloted in 2013), resolved this, so that those three constants
-.\" are also categorized" as file status flags.
-.\"
-The distinction between these two groups of flags is that
-the file creation flags affect the semantics of the open operation itself,
-while the file status flags affect the semantics of subsequent I/O operations.
-The file status flags can be retrieved and (in some cases)
-modified; see
-.BR fcntl (2)
-for details.
-.P
-The full list of file creation flags and file status flags is as follows:
-.TP
-.B O_APPEND
-The file is opened in append mode.
-Before each
-.BR write (2),
-the file offset is positioned at the end of the file,
-as if with
-.BR lseek (2).
-The modification of the file offset and the write operation
-are performed as a single atomic step.
-.IP
-.B O_APPEND
-may lead to corrupted files on NFS filesystems if more than one process
-appends data to a file at once.
-.\" For more background, see
-.\" http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=453946
-.\" http://nfs.sourceforge.net/
-This is because NFS does not support
-appending to a file, so the client kernel has to simulate it, which
-can't be done without a race condition.
-.TP
-.B O_ASYNC
-Enable signal-driven I/O:
-generate a signal
-.RB ( SIGIO
-by default, but this can be changed via
-.BR fcntl (2))
-when input or output becomes possible on this file descriptor.
-This feature is available only for terminals, pseudoterminals,
-sockets, and (since Linux 2.6) pipes and FIFOs.
-See
-.BR fcntl (2)
-for further details.
-See also BUGS, below.
-.TP
-.BR O_CLOEXEC " (since Linux 2.6.23)"
-.\" NOTE! several other man pages refer to this text
-Enable the close-on-exec flag for the new file descriptor.
-.\" FIXME . for later review when Issue 8 is one day released...
-.\" POSIX proposes to fix many APIs that provide hidden FDs
-.\" http://austingroupbugs.net/tag_view_page.php?tag_id=8
-.\" http://austingroupbugs.net/view.php?id=368
-Specifying this flag permits a program to avoid additional
-.BR fcntl (2)
-.B F_SETFD
-operations to set the
-.B FD_CLOEXEC
-flag.
-.IP
-Note that the use of this flag is essential in some multithreaded programs,
-because using a separate
-.BR fcntl (2)
-.B F_SETFD
-operation to set the
-.B FD_CLOEXEC
-flag does not suffice to avoid race conditions
-where one thread opens a file descriptor and
-attempts to set its close-on-exec flag using
-.BR fcntl (2)
-at the same time as another thread does a
-.BR fork (2)
-plus
-.BR execve (2).
-Depending on the order of execution,
-the race may lead to the file descriptor returned by
-.BR open ()
-being unintentionally leaked to the program executed by the child process
-created by
-.BR fork (2).
-(This kind of race is in principle possible for any system call
-that creates a file descriptor whose close-on-exec flag should be set,
-and various other Linux system calls provide an equivalent of the
-.B O_CLOEXEC
-flag to deal with this problem.)
-.\" This flag fixes only one form of the race condition;
-.\" The race can also occur with, for example, file descriptors
-.\" returned by accept(), pipe(), etc.
-.TP
-.B O_CREAT
-If
-.I pathname
-does not exist, create it as a regular file.
-.IP
-The owner (user ID) of the new file is set to the effective user ID
-of the process.
-.IP
-The group ownership (group ID) of the new file is set either to
-the effective group ID of the process (System V semantics)
-or to the group ID of the parent directory (BSD semantics).
-On Linux, the behavior depends on whether the
-set-group-ID mode bit is set on the parent directory:
-if that bit is set, then BSD semantics apply;
-otherwise, System V semantics apply.
-For some filesystems, the behavior also depends on the
-.I bsdgroups
-and
-.I sysvgroups
-mount options described in
-.BR mount (8).
-.\" As at Linux 2.6.25, bsdgroups is supported by ext2, ext3, ext4, and
-.\" XFS (since Linux 2.6.14).
-.IP
-The
-.I mode
-argument specifies the file mode bits to be applied when a new file is created.
-If neither
-.B O_CREAT
-nor
-.B O_TMPFILE
-is specified in
-.IR flags ,
-then
-.I mode
-is ignored (and can thus be specified as 0, or simply omitted).
-The
-.I mode
-argument
-.B must
-be supplied if
-.B O_CREAT
-or
-.B O_TMPFILE
-is specified in
-.IR flags ;
-if it is not supplied,
-some arbitrary bytes from the stack will be applied as the file mode.
-.IP
-The effective mode is modified by the process's
-.I umask
-in the usual way: in the absence of a default ACL, the mode of the
-created file is
-.IR "(mode\ &\ \[ti]umask)" .
-.IP
-Note that
-.I mode
-applies only to future accesses of the
-newly created file; the
-.BR open ()
-call that creates a read-only file may well return a read/write
-file descriptor.
-.IP
-The following symbolic constants are provided for
-.IR mode :
-.RS
-.TP 9
-.B S_IRWXU
-00700 user (file owner) has read, write, and execute permission
-.TP
-.B S_IRUSR
-00400 user has read permission
-.TP
-.B S_IWUSR
-00200 user has write permission
-.TP
-.B S_IXUSR
-00100 user has execute permission
-.TP
-.B S_IRWXG
-00070 group has read, write, and execute permission
-.TP
-.B S_IRGRP
-00040 group has read permission
-.TP
-.B S_IWGRP
-00020 group has write permission
-.TP
-.B S_IXGRP
-00010 group has execute permission
-.TP
-.B S_IRWXO
-00007 others have read, write, and execute permission
-.TP
-.B S_IROTH
-00004 others have read permission
-.TP
-.B S_IWOTH
-00002 others have write permission
-.TP
-.B S_IXOTH
-00001 others have execute permission
-.RE
-.IP
-According to POSIX, the effect when other bits are set in
-.I mode
-is unspecified.
-On Linux, the following bits are also honored in
-.IR mode :
-.RS
-.TP 9
-.B S_ISUID
-0004000 set-user-ID bit
-.TP
-.B S_ISGID
-0002000 set-group-ID bit (see
-.BR inode (7)).
-.TP
-.B S_ISVTX
-0001000 sticky bit (see
-.BR inode (7)).
-.RE
-.TP
-.BR O_DIRECT " (since Linux 2.4.10)"
-Try to minimize cache effects of the I/O to and from this file.
-In general this will degrade performance, but it is useful in
-special situations, such as when applications do their own caching.
-File I/O is done directly to/from user-space buffers.
-The
-.B O_DIRECT
-flag on its own makes an effort to transfer data synchronously,
-but does not give the guarantees of the
-.B O_SYNC
-flag that data and necessary metadata are transferred.
-To guarantee synchronous I/O,
-.B O_SYNC
-must be used in addition to
-.BR O_DIRECT .
-See NOTES below for further discussion.
-.IP
-A semantically similar (but deprecated) interface for block devices
-is described in
-.BR raw (8).
-.TP
-.B O_DIRECTORY
-If \fIpathname\fP is not a directory, cause the open to fail.
-.\" But see the following and its replies:
-.\" http://marc.theaimsgroup.com/?t=112748702800001&r=1&w=2
-.\" [PATCH] open: O_DIRECTORY and O_CREAT together should fail
-.\" O_DIRECTORY | O_CREAT causes O_DIRECTORY to be ignored.
-This flag was added in Linux 2.1.126, to
-avoid denial-of-service problems if
-.BR opendir (3)
-is called on a
-FIFO or tape device.
-.TP
-.B O_DSYNC
-Write operations on the file will complete according to the requirements of
-synchronized I/O
-.I data
-integrity completion.
-.IP
-By the time
-.BR write (2)
-(and similar)
-return, the output data
-has been transferred to the underlying hardware,
-along with any file metadata that would be required to retrieve that data
-(i.e., as though each
-.BR write (2)
-was followed by a call to
-.BR fdatasync (2)).
-.IR "See NOTES below" .
-.TP
-.B O_EXCL
-Ensure that this call creates the file:
-if this flag is specified in conjunction with
-.BR O_CREAT ,
-and
-.I pathname
-already exists, then
-.BR open ()
-fails with the error
-.BR EEXIST .
-.IP
-When these two flags are specified, symbolic links are not followed:
-.\" POSIX.1-2001 explicitly requires this behavior.
-if
-.I pathname
-is a symbolic link, then
-.BR open ()
-fails regardless of where the symbolic link points.
-.IP
-In general, the behavior of
-.B O_EXCL
-is undefined if it is used without
-.BR O_CREAT .
-There is one exception: on Linux 2.6 and later,
-.B O_EXCL
-can be used without
-.B O_CREAT
-if
-.I pathname
-refers to a block device.
-If the block device is in use by the system (e.g., mounted),
-.BR open ()
-fails with the error
-.BR EBUSY .
-.IP
-On NFS,
-.B O_EXCL
-is supported only when using NFSv3 or later on kernel 2.6 or later.
-In NFS environments where
-.B O_EXCL
-support is not provided, programs that rely on it
-for performing locking tasks will contain a race condition.
-Portable programs that want to perform atomic file locking using a lockfile,
-and need to avoid reliance on NFS support for
-.BR O_EXCL ,
-can create a unique file on
-the same filesystem (e.g., incorporating hostname and PID), and use
-.BR link (2)
-to make a link to the lockfile.
-If
-.BR link (2)
-returns 0, the lock is successful.
-Otherwise, use
-.BR stat (2)
-on the unique file to check if its link count has increased to 2,
-in which case the lock is also successful.
-.TP
-.B O_LARGEFILE
-(LFS)
-Allow files whose sizes cannot be represented in an
-.I off_t
-(but can be represented in an
-.IR off64_t )
-to be opened.
-The
-.B _LARGEFILE64_SOURCE
-macro must be defined
-(before including
-.I any
-header files)
-in order to obtain this definition.
-Setting the
-.B _FILE_OFFSET_BITS
-feature test macro to 64 (rather than using
-.BR O_LARGEFILE )
-is the preferred
-method of accessing large files on 32-bit systems (see
-.BR feature_test_macros (7)).
-.TP
-.BR O_NOATIME " (since Linux 2.6.8)"
-Do not update the file last access time
-.RI ( st_atime
-in the inode)
-when the file is
-.BR read (2).
-.IP
-This flag can be employed only if one of the following conditions is true:
-.RS
-.IP \[bu] 3
-The effective UID of the process
-.\" Strictly speaking: the filesystem UID
-matches the owner UID of the file.
-.IP \[bu]
-The calling process has the
-.B CAP_FOWNER
-capability in its user namespace and
-the owner UID of the file has a mapping in the namespace.
-.RE
-.IP
-This flag is intended for use by indexing or backup programs,
-where its use can significantly reduce the amount of disk activity.
-This flag may not be effective on all filesystems.
-One example is NFS, where the server maintains the access time.
-.\" The O_NOATIME flag also affects the treatment of st_atime
-.\" by mmap() and readdir(2), MTK, Dec 04.
-.TP
-.B O_NOCTTY
-If
-.I pathname
-refers to a terminal device\[em]see
-.BR tty (4)\[em]it
-will not become the process's controlling terminal even if the
-process does not have one.
-.TP
-.B O_NOFOLLOW
-If the trailing component (i.e., basename) of
-.I pathname
-is a symbolic link, then the open fails, with the error
-.BR ELOOP .
-Symbolic links in earlier components of the pathname will still be
-followed.
-(Note that the
-.B ELOOP
-error that can occur in this case is indistinguishable from the case where
-an open fails because there are too many symbolic links found
-while resolving components in the prefix part of the pathname.)
-.IP
-This flag is a FreeBSD extension, which was added in Linux 2.1.126,
-and has subsequently been standardized in POSIX.1-2008.
-.IP
-See also
-.B O_PATH
-below.
-.\" The headers from glibc 2.0.100 and later include a
-.\" definition of this flag; \fIkernels before Linux 2.1.126 will ignore it if
-.\" used\fP.
-.TP
-.BR O_NONBLOCK " or " O_NDELAY
-When possible, the file is opened in nonblocking mode.
-Neither the
-.BR open ()
-nor any subsequent I/O operations on the file descriptor which is
-returned will cause the calling process to wait.
-.IP
-Note that the setting of this flag has no effect on the operation of
-.BR poll (2),
-.BR select (2),
-.BR epoll (7),
-and similar,
-since those interfaces merely inform the caller about whether
-a file descriptor is "ready",
-meaning that an I/O operation performed on
-the file descriptor with the
-.B O_NONBLOCK
-flag
-.I clear
-would not block.
-.IP
-Note that this flag has no effect for regular files and block devices;
-that is, I/O operations will (briefly) block when device activity
-is required, regardless of whether
-.B O_NONBLOCK
-is set.
-Since
-.B O_NONBLOCK
-semantics might eventually be implemented,
-applications should not depend upon blocking behavior
-when specifying this flag for regular files and block devices.
-.IP
-For the handling of FIFOs (named pipes), see also
-.BR fifo (7).
-For a discussion of the effect of
-.B O_NONBLOCK
-in conjunction with mandatory file locks and with file leases, see
-.BR fcntl (2).
-.TP
-.BR O_PATH " (since Linux 2.6.39)"
-.\" commit 1abf0c718f15a56a0a435588d1b104c7a37dc9bd
-.\" commit 326be7b484843988afe57566b627fb7a70beac56
-.\" commit 65cfc6722361570bfe255698d9cd4dccaf47570d
-.\"
-.\" http://thread.gmane.org/gmane.linux.man/2790/focus=3496
-.\" Subject: Re: [PATCH] open(2): document O_PATH
-.\" Newsgroups: gmane.linux.man, gmane.linux.kernel
-.\"
-Obtain a file descriptor that can be used for two purposes:
-to indicate a location in the filesystem tree and
-to perform operations that act purely at the file descriptor level.
-The file itself is not opened, and other file operations (e.g.,
-.BR read (2),
-.BR write (2),
-.BR fchmod (2),
-.BR fchown (2),
-.BR fgetxattr (2),
-.BR ioctl (2),
-.BR mmap (2))
-fail with the error
-.BR EBADF .
-.IP
-The following operations
-.I can
-be performed on the resulting file descriptor:
-.RS
-.IP \[bu] 3
-.BR close (2).
-.IP \[bu]
-.BR fchdir (2),
-if the file descriptor refers to a directory
-(since Linux 3.5).
-.\" commit 332a2e1244bd08b9e3ecd378028513396a004a24
-.IP \[bu]
-.BR fstat (2)
-(since Linux 3.6).
-.IP \[bu]
-.\" fstat(): commit 55815f70147dcfa3ead5738fd56d3574e2e3c1c2
-.BR fstatfs (2)
-(since Linux 3.12).
-.\" fstatfs(): commit 9d05746e7b16d8565dddbe3200faa1e669d23bbf
-.IP \[bu]
-Duplicating the file descriptor
-.RB ( dup (2),
-.BR fcntl (2)
-.BR F_DUPFD ,
-etc.).
-.IP \[bu]
-Getting and setting file descriptor flags
-.RB ( fcntl (2)
-.B F_GETFD
-and
-.BR F_SETFD ).
-.IP \[bu]
-Retrieving open file status flags using the
-.BR fcntl (2)
-.B F_GETFL
-operation: the returned flags will include the bit
-.BR O_PATH .
-.IP \[bu]
-Passing the file descriptor as the
-.I dirfd
-argument of
-.BR openat ()
-and the other "*at()" system calls.
-This includes
-.BR linkat (2)
-with
-.B AT_EMPTY_PATH
-(or via procfs using
-.BR AT_SYMLINK_FOLLOW )
-even if the file is not a directory.
-.IP \[bu]
-Passing the file descriptor to another process via a UNIX domain socket
-(see
-.B SCM_RIGHTS
-in
-.BR unix (7)).
-.RE
-.IP
-When
-.B O_PATH
-is specified in
-.IR flags ,
-flag bits other than
-.BR O_CLOEXEC ,
-.BR O_DIRECTORY ,
-and
-.B O_NOFOLLOW
-are ignored.
-.IP
-Opening a file or directory with the
-.B O_PATH
-flag requires no permissions on the object itself
-(but does require execute permission on the directories in the path prefix).
-Depending on the subsequent operation,
-a check for suitable file permissions may be performed (e.g.,
-.BR fchdir (2)
-requires execute permission on the directory referred to
-by its file descriptor argument).
-By contrast,
-obtaining a reference to a filesystem object by opening it with the
-.B O_RDONLY
-flag requires that the caller have read permission on the object,
-even when the subsequent operation (e.g.,
-.BR fchdir (2),
-.BR fstat (2))
-does not require read permission on the object.
-.IP
-If
-.I pathname
-is a symbolic link and the
-.B O_NOFOLLOW
-flag is also specified,
-then the call returns a file descriptor referring to the symbolic link.
-This file descriptor can be used as the
-.I dirfd
-argument in calls to
-.BR fchownat (2),
-.BR fstatat (2),
-.BR linkat (2),
-and
-.BR readlinkat (2)
-with an empty pathname to have the calls operate on the symbolic link.
-.IP
-If
-.I pathname
-refers to an automount point that has not yet been triggered, so no
-other filesystem is mounted on it, then the call returns a file
-descriptor referring to the automount directory without triggering a mount.
-.BR fstatfs (2)
-can then be used to determine if it is, in fact, an untriggered
-automount point
-.RB ( ".f_type == AUTOFS_SUPER_MAGIC" ).
-.IP
-One use of
-.B O_PATH
-for regular files is to provide the equivalent of POSIX.1's
-.B O_EXEC
-functionality.
-This permits us to open a file for which we have execute
-permission but not read permission, and then execute that file,
-with steps something like the following:
-.IP
-.in +4n
-.EX
-char buf[PATH_MAX];
-fd = open("some_prog", O_PATH);
-snprintf(buf, PATH_MAX, "/proc/self/fd/%d", fd);
-execl(buf, "some_prog", (char *) NULL);
-.EE
-.in
-.IP
-An
-.B O_PATH
-file descriptor can also be passed as the argument of
-.BR fexecve (3).
-.TP
-.B O_SYNC
-Write operations on the file will complete according to the requirements of
-synchronized I/O
-.I file
-integrity completion
-(by contrast with the
-synchronized I/O
-.I data
-integrity completion
-provided by
-.BR O_DSYNC .)
-.IP
-By the time
-.BR write (2)
-(or similar)
-returns, the output data and associated file metadata
-have been transferred to the underlying hardware
-(i.e., as though each
-.BR write (2)
-was followed by a call to
-.BR fsync (2)).
-.IR "See NOTES below" .
-.TP
-.BR O_TMPFILE " (since Linux 3.11)"
-.\" commit 60545d0d4610b02e55f65d141c95b18ccf855b6e
-.\" commit f4e0c30c191f87851c4a53454abb55ee276f4a7e
-.\" commit bb458c644a59dbba3a1fe59b27106c5e68e1c4bd
-Create an unnamed temporary regular file.
-The
-.I pathname
-argument specifies a directory;
-an unnamed inode will be created in that directory's filesystem.
-Anything written to the resulting file will be lost when
-the last file descriptor is closed, unless the file is given a name.
-.IP
-.B O_TMPFILE
-must be specified with one of
-.B O_RDWR
-or
-.B O_WRONLY
-and, optionally,
-.BR O_EXCL .
-If
-.B O_EXCL
-is not specified, then
-.BR linkat (2)
-can be used to link the temporary file into the filesystem, making it
-permanent, using code like the following:
-.IP
-.in +4n
-.EX
-char path[PATH_MAX];
-fd = open("/path/to/dir", O_TMPFILE | O_RDWR,
- S_IRUSR | S_IWUSR);
-\&
-/* File I/O on \[aq]fd\[aq]... */
-\&
-linkat(fd, "", AT_FDCWD, "/path/for/file", AT_EMPTY_PATH);
-\&
-/* If the caller doesn\[aq]t have the CAP_DAC_READ_SEARCH
- capability (needed to use AT_EMPTY_PATH with linkat(2)),
- and there is a proc(5) filesystem mounted, then the
- linkat(2) call above can be replaced with:
-\&
-snprintf(path, PATH_MAX, "/proc/self/fd/%d", fd);
-linkat(AT_FDCWD, path, AT_FDCWD, "/path/for/file",
- AT_SYMLINK_FOLLOW);
-*/
-.EE
-.in
-.IP
-In this case,
-the
-.BR open ()
-.I mode
-argument determines the file permission mode, as with
-.BR O_CREAT .
-.IP
-Specifying
-.B O_EXCL
-in conjunction with
-.B O_TMPFILE
-prevents a temporary file from being linked into the filesystem
-in the above manner.
-(Note that the meaning of
-.B O_EXCL
-in this case is different from the meaning of
-.B O_EXCL
-otherwise.)
-.IP
-There are two main use cases for
-.\" Inspired by http://lwn.net/Articles/559147/
-.BR O_TMPFILE :
-.RS
-.IP \[bu] 3
-Improved
-.BR tmpfile (3)
-functionality: race-free creation of temporary files that
-(1) are automatically deleted when closed;
-(2) can never be reached via any pathname;
-(3) are not subject to symlink attacks; and
-(4) do not require the caller to devise unique names.
-.IP \[bu]
-Creating a file that is initially invisible, which is then populated
-with data and adjusted to have appropriate filesystem attributes
-.RB ( fchown (2),
-.BR fchmod (2),
-.BR fsetxattr (2),
-etc.)
-before being atomically linked into the filesystem
-in a fully formed state (using
-.BR linkat (2)
-as described above).
-.RE
-.IP
-.B O_TMPFILE
-requires support by the underlying filesystem;
-only a subset of Linux filesystems provide that support.
-In the initial implementation, support was provided in
-the ext2, ext3, ext4, UDF, Minix, and tmpfs filesystems.
-.\" To check for support, grep for "tmpfile" in kernel sources
-Support for other filesystems has subsequently been added as follows:
-XFS (Linux 3.15);
-.\" commit 99b6436bc29e4f10e4388c27a3e4810191cc4788
-.\" commit ab29743117f9f4c22ac44c13c1647fb24fb2bafe
-Btrfs (Linux 3.16);
-.\" commit ef3b9af50bfa6a1f02cd7b3f5124b712b1ba3e3c
-F2FS (Linux 3.16);
-.\" commit 50732df02eefb39ab414ef655979c2c9b64ad21c
-and ubifs (Linux 4.9)
-.TP
-.B O_TRUNC
-If the file already exists and is a regular file and the access mode allows
-writing (i.e., is
-.B O_RDWR
-or
-.BR O_WRONLY )
-it will be truncated to length 0.
-If the file is a FIFO or terminal device file, the
-.B O_TRUNC
-flag is ignored.
-Otherwise, the effect of
-.B O_TRUNC
-is unspecified.
-.SS creat()
-A call to
-.BR creat ()
-is equivalent to calling
-.BR open ()
-with
-.I flags
-equal to
-.BR O_CREAT|O_WRONLY|O_TRUNC .
-.SS openat()
-The
-.BR openat ()
-system call operates in exactly the same way as
-.BR open (),
-except for the differences described here.
-.P
-The
-.I dirfd
-argument is used in conjunction with the
-.I pathname
-argument as follows:
-.IP \[bu] 3
-If the pathname given in
-.I pathname
-is absolute, then
-.I dirfd
-is ignored.
-.IP \[bu]
-If the pathname given in
-.I pathname
-is relative and
-.I dirfd
-is the special value
-.BR AT_FDCWD ,
-then
-.I pathname
-is interpreted relative to the current working
-directory of the calling process (like
-.BR open ()).
-.IP \[bu]
-If the pathname given in
-.I pathname
-is relative, then it is interpreted relative to the directory
-referred to by the file descriptor
-.I dirfd
-(rather than relative to the current working directory of
-the calling process, as is done by
-.BR open ()
-for a relative pathname).
-In this case,
-.I dirfd
-must be a directory that was opened for reading
-.RB ( O_RDONLY )
-or using the
-.B O_PATH
-flag.
-.P
-If the pathname given in
-.I pathname
-is relative, and
-.I dirfd
-is not a valid file descriptor, an error
-.RB ( EBADF )
-results.
-(Specifying an invalid file descriptor number in
-.I dirfd
-can be used as a means to ensure that
-.I pathname
-is absolute.)
-.\"
-.SS openat2(2)
-The
-.BR openat2 (2)
-system call is an extension of
-.BR openat (),
-and provides a superset of the features of
-.BR openat ().
-It is documented separately, in
-.BR openat2 (2).
-.SH RETURN VALUE
-On success,
-.BR open (),
-.BR openat (),
-and
-.BR creat ()
-return the new file descriptor (a nonnegative integer).
-On error, \-1 is returned and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.BR open (),
-.BR openat (),
-and
-.BR creat ()
-can fail with the following errors:
-.TP
-.B EACCES
-The requested access to the file is not allowed, or search permission
-is denied for one of the directories in the path prefix of
-.IR pathname ,
-or the file did not exist yet and write access to the parent directory
-is not allowed.
-(See also
-.BR path_resolution (7).)
-.TP
-.B EACCES
-.\" commit 30aba6656f61ed44cba445a3c0d38b296fa9e8f5
-Where
-.B O_CREAT
-is specified, the
-.I protected_fifos
-or
-.I protected_regular
-sysctl is enabled, the file already exists and is a FIFO or regular file, the
-owner of the file is neither the current user nor the owner of the
-containing directory, and the containing directory is both world- or
-group-writable and sticky.
-For details, see the descriptions of
-.I /proc/sys/fs/protected_fifos
-and
-.I /proc/sys/fs/protected_regular
-in
-.BR proc_sys_fs (5).
-.TP
-.B EBADF
-.RB ( openat ())
-.I pathname
-is relative but
-.I dirfd
-is neither
-.B AT_FDCWD
-nor a valid file descriptor.
-.TP
-.B EBUSY
-.B O_EXCL
-was specified in
-.I flags
-and
-.I pathname
-refers to a block device that is in use by the system (e.g., it is mounted).
-.TP
-.B EDQUOT
-Where
-.B O_CREAT
-is specified, the file does not exist, and the user's quota of disk
-blocks or inodes on the filesystem has been exhausted.
-.TP
-.B EEXIST
-.I pathname
-already exists and
-.BR O_CREAT " and " O_EXCL
-were used.
-.TP
-.B EFAULT
-.I pathname
-points outside your accessible address space.
-.TP
-.B EFBIG
-See
-.BR EOVERFLOW .
-.TP
-.B EINTR
-While blocked waiting to complete an open of a slow device
-(e.g., a FIFO; see
-.BR fifo (7)),
-the call was interrupted by a signal handler; see
-.BR signal (7).
-.TP
-.B EINVAL
-The filesystem does not support the
-.B O_DIRECT
-flag.
-See
-.B NOTES
-for more information.
-.TP
-.B EINVAL
-Invalid value in
-.\" In particular, __O_TMPFILE instead of O_TMPFILE
-.IR flags .
-.TP
-.B EINVAL
-.B O_TMPFILE
-was specified in
-.IR flags ,
-but neither
-.B O_WRONLY
-nor
-.B O_RDWR
-was specified.
-.TP
-.B EINVAL
-.B O_CREAT
-was specified in
-.I flags
-and the final component ("basename") of the new file's
-.I pathname
-is invalid
-(e.g., it contains characters not permitted by the underlying filesystem).
-.TP
-.B EINVAL
-The final component ("basename") of
-.I pathname
-is invalid
-(e.g., it contains characters not permitted by the underlying filesystem).
-.TP
-.B EISDIR
-.I pathname
-refers to a directory and the access requested involved writing
-(that is,
-.B O_WRONLY
-or
-.B O_RDWR
-is set).
-.TP
-.B EISDIR
-.I pathname
-refers to an existing directory,
-.B O_TMPFILE
-and one of
-.B O_WRONLY
-or
-.B O_RDWR
-were specified in
-.IR flags ,
-but this kernel version does not provide the
-.B O_TMPFILE
-functionality.
-.TP
-.B ELOOP
-Too many symbolic links were encountered in resolving
-.IR pathname .
-.TP
-.B ELOOP
-.I pathname
-was a symbolic link, and
-.I flags
-specified
-.B O_NOFOLLOW
-but not
-.BR O_PATH .
-.TP
-.B EMFILE
-The per-process limit on the number of open file descriptors has been reached
-(see the description of
-.B RLIMIT_NOFILE
-in
-.BR getrlimit (2)).
-.TP
-.B ENAMETOOLONG
-.I pathname
-was too long.
-.TP
-.B ENFILE
-The system-wide limit on the total number of open files has been reached.
-.TP
-.B ENODEV
-.I pathname
-refers to a device special file and no corresponding device exists.
-(This is a Linux kernel bug; in this situation
-.B ENXIO
-must be returned.)
-.TP
-.B ENOENT
-.B O_CREAT
-is not set and the named file does not exist.
-.TP
-.B ENOENT
-A directory component in
-.I pathname
-does not exist or is a dangling symbolic link.
-.TP
-.B ENOENT
-.I pathname
-refers to a nonexistent directory,
-.B O_TMPFILE
-and one of
-.B O_WRONLY
-or
-.B O_RDWR
-were specified in
-.IR flags ,
-but this kernel version does not provide the
-.B O_TMPFILE
-functionality.
-.TP
-.B ENOMEM
-The named file is a FIFO,
-but memory for the FIFO buffer can't be allocated because
-the per-user hard limit on memory allocation for pipes has been reached
-and the caller is not privileged; see
-.BR pipe (7).
-.TP
-.B ENOMEM
-Insufficient kernel memory was available.
-.TP
-.B ENOSPC
-.I pathname
-was to be created but the device containing
-.I pathname
-has no room for the new file.
-.TP
-.B ENOTDIR
-A component used as a directory in
-.I pathname
-is not, in fact, a directory, or \fBO_DIRECTORY\fP was specified and
-.I pathname
-was not a directory.
-.TP
-.B ENOTDIR
-.RB ( openat ())
-.I pathname
-is a relative pathname and
-.I dirfd
-is a file descriptor referring to a file other than a directory.
-.TP
-.B ENXIO
-.BR O_NONBLOCK " | " O_WRONLY
-is set, the named file is a FIFO, and
-no process has the FIFO open for reading.
-.TP
-.B ENXIO
-The file is a device special file and no corresponding device exists.
-.TP
-.B ENXIO
-The file is a UNIX domain socket.
-.TP
-.B EOPNOTSUPP
-The filesystem containing
-.I pathname
-does not support
-.BR O_TMPFILE .
-.TP
-.B EOVERFLOW
-.I pathname
-refers to a regular file that is too large to be opened.
-The usual scenario here is that an application compiled
-on a 32-bit platform without
-.I \-D_FILE_OFFSET_BITS=64
-tried to open a file whose size exceeds
-.I (1<<31)\-1
-bytes;
-see also
-.B O_LARGEFILE
-above.
-This is the error specified by POSIX.1;
-before Linux 2.6.24, Linux gave the error
-.B EFBIG
-for this case.
-.\" See http://bugzilla.kernel.org/show_bug.cgi?id=7253
-.\" "Open of a large file on 32-bit fails with EFBIG, should be EOVERFLOW"
-.\" Reported 2006-10-03
-.TP
-.B EPERM
-The
-.B O_NOATIME
-flag was specified, but the effective user ID of the caller
-.\" Strictly speaking, it's the filesystem UID... (MTK)
-did not match the owner of the file and the caller was not privileged.
-.TP
-.B EPERM
-The operation was prevented by a file seal; see
-.BR fcntl (2).
-.TP
-.B EROFS
-.I pathname
-refers to a file on a read-only filesystem and write access was
-requested.
-.TP
-.B ETXTBSY
-.I pathname
-refers to an executable image which is currently being executed and
-write access was requested.
-.TP
-.B ETXTBSY
-.I pathname
-refers to a file that is currently in use as a swap file, and the
-.B O_TRUNC
-flag was specified.
-.TP
-.B ETXTBSY
-.I pathname
-refers to a file that is currently being read by the kernel (e.g., for
-module/firmware loading), and write access was requested.
-.TP
-.B EWOULDBLOCK
-The
-.B O_NONBLOCK
-flag was specified, and an incompatible lease was held on the file
-(see
-.BR fcntl (2)).
-.SH VERSIONS
-The (undefined) effect of
-.B O_RDONLY | O_TRUNC
-varies among implementations.
-On many systems the file is actually truncated.
-.\" Linux 2.0, 2.5: truncate
-.\" Solaris 5.7, 5.8: truncate
-.\" Irix 6.5: truncate
-.\" Tru64 5.1B: truncate
-.\" HP-UX 11.22: truncate
-.\" FreeBSD 4.7: truncate
-.SS Synchronized I/O
-The POSIX.1-2008 "synchronized I/O" option
-specifies different variants of synchronized I/O,
-and specifies the
-.BR open ()
-flags
-.BR O_SYNC ,
-.BR O_DSYNC ,
-and
-.B O_RSYNC
-for controlling the behavior.
-Regardless of whether an implementation supports this option,
-it must at least support the use of
-.B O_SYNC
-for regular files.
-.P
-Linux implements
-.B O_SYNC
-and
-.BR O_DSYNC ,
-but not
-.BR O_RSYNC .
-Somewhat incorrectly, glibc defines
-.B O_RSYNC
-to have the same value as
-.BR O_SYNC .
-.RB ( O_RSYNC
-is defined in the Linux header file
-.I <asm/fcntl.h>
-on HP PA-RISC, but it is not used.)
-.P
-.B O_SYNC
-provides synchronized I/O
-.I file
-integrity completion,
-meaning write operations will flush data and all associated metadata
-to the underlying hardware.
-.B O_DSYNC
-provides synchronized I/O
-.I data
-integrity completion,
-meaning write operations will flush data
-to the underlying hardware,
-but will only flush metadata updates that are required
-to allow a subsequent read operation to complete successfully.
-Data integrity completion can reduce the number of disk operations
-that are required for applications that don't need the guarantees
-of file integrity completion.
-.P
-To understand the difference between the two types of completion,
-consider two pieces of file metadata:
-the file last modification timestamp
-.RI ( st_mtime )
-and the file length.
-All write operations will update the last file modification timestamp,
-but only writes that add data to the end of the
-file will change the file length.
-The last modification timestamp is not needed to ensure that
-a read completes successfully, but the file length is.
-Thus,
-.B O_DSYNC
-would only guarantee to flush updates to the file length metadata
-(whereas
-.B O_SYNC
-would also always flush the last modification timestamp metadata).
-.P
-Before Linux 2.6.33, Linux implemented only the
-.B O_SYNC
-flag for
-.BR open ().
-However, when that flag was specified,
-most filesystems actually provided the equivalent of synchronized I/O
-.I data
-integrity completion (i.e.,
-.B O_SYNC
-was actually implemented as the equivalent of
-.BR O_DSYNC ).
-.P
-Since Linux 2.6.33, proper
-.B O_SYNC
-support is provided.
-However, to ensure backward binary compatibility,
-.B O_DSYNC
-was defined with the same value as the historical
-.BR O_SYNC ,
-and
-.B O_SYNC
-was defined as a new (two-bit) flag value that includes the
-.B O_DSYNC
-flag value.
-This ensures that applications compiled against
-new headers get at least
-.B O_DSYNC
-semantics before Linux 2.6.33.
-.\"
-.SS C library/kernel differences
-Since glibc 2.26,
-the glibc wrapper function for
-.BR open ()
-employs the
-.BR openat ()
-system call, rather than the kernel's
-.BR open ()
-system call.
-For certain architectures, this is also true before glibc 2.26.
-.\"
-.SH STANDARDS
-.TP
-.BR open ()
-.TQ
-.BR creat ()
-.TQ
-.BR openat ()
-POSIX.1-2008.
-.P
-.BR openat2 (2)
-Linux.
-.P
-The
-.BR O_DIRECT ,
-.BR O_NOATIME ,
-.BR O_PATH ,
-and
-.B O_TMPFILE
-flags are Linux-specific.
-One must define
-.B _GNU_SOURCE
-to obtain their definitions.
-.P
-The
-.BR O_CLOEXEC ,
-.BR O_DIRECTORY ,
-and
-.B O_NOFOLLOW
-flags are not specified in POSIX.1-2001,
-but are specified in POSIX.1-2008.
-Since glibc 2.12, one can obtain their definitions by defining either
-.B _POSIX_C_SOURCE
-with a value greater than or equal to 200809L or
-.B _XOPEN_SOURCE
-with a value greater than or equal to 700.
-In glibc 2.11 and earlier, one obtains the definitions by defining
-.BR _GNU_SOURCE .
-.SH HISTORY
-.TP
-.BR open ()
-.TQ
-.BR creat ()
-SVr4, 4.3BSD, POSIX.1-2001.
-.TP
-.BR openat ()
-POSIX.1-2008.
-Linux 2.6.16,
-glibc 2.4.
-.SH NOTES
-Under Linux, the
-.B O_NONBLOCK
-flag is sometimes used in cases where one wants to open
-but does not necessarily have the intention to read or write.
-For example,
-this may be used to open a device in order to get a file descriptor
-for use with
-.BR ioctl (2).
-.P
-Note that
-.BR open ()
-can open device special files, but
-.BR creat ()
-cannot create them; use
-.BR mknod (2)
-instead.
-.P
-If the file is newly created, its
-.IR st_atime ,
-.IR st_ctime ,
-.I st_mtime
-fields
-(respectively, time of last access, time of last status change, and
-time of last modification; see
-.BR stat (2))
-are set
-to the current time, and so are the
-.I st_ctime
-and
-.I st_mtime
-fields of the
-parent directory.
-Otherwise, if the file is modified because of the
-.B O_TRUNC
-flag, its
-.I st_ctime
-and
-.I st_mtime
-fields are set to the current time.
-.P
-The files in the
-.IR /proc/ pid /fd
-directory show the open file descriptors of the process with the PID
-.IR pid .
-The files in the
-.IR /proc/ pid /fdinfo
-directory show even more information about these file descriptors.
-See
-.BR proc (5)
-for further details of both of these directories.
-.P
-The Linux header file
-.B <asm/fcntl.h>
-doesn't define
-.BR O_ASYNC ;
-the (BSD-derived)
-.B FASYNC
-synonym is defined instead.
-.\"
-.\"
-.SS Open file descriptions
-The term open file description is the one used by POSIX to refer to the
-entries in the system-wide table of open files.
-In other contexts, this object is
-variously also called an "open file object",
-a "file handle", an "open file table entry",
-or\[em]in kernel-developer parlance\[em]a
-.IR "struct file" .
-.P
-When a file descriptor is duplicated (using
-.BR dup (2)
-or similar),
-the duplicate refers to the same open file description
-as the original file descriptor,
-and the two file descriptors consequently share
-the file offset and file status flags.
-Such sharing can also occur between processes:
-a child process created via
-.BR fork (2)
-inherits duplicates of its parent's file descriptors,
-and those duplicates refer to the same open file descriptions.
-.P
-Each
-.BR open ()
-of a file creates a new open file description;
-thus, there may be multiple open file descriptions
-corresponding to a file inode.
-.P
-On Linux, one can use the
-.BR kcmp (2)
-.B KCMP_FILE
-operation to test whether two file descriptors
-(in the same process or in two different processes)
-refer to the same open file description.
-.\"
-.SS NFS
-There are many infelicities in the protocol underlying NFS, affecting
-amongst others
-.BR O_SYNC " and " O_NDELAY .
-.P
-On NFS filesystems with UID mapping enabled,
-.BR open ()
-may
-return a file descriptor but, for example,
-.BR read (2)
-requests are denied
-with
-.BR EACCES .
-This is because the client performs
-.BR open ()
-by checking the
-permissions, but UID mapping is performed by the server upon
-read and write requests.
-.\"
-.\"
-.SS FIFOs
-Opening the read or write end of a FIFO blocks until the other
-end is also opened (by another process or thread).
-See
-.BR fifo (7)
-for further details.
-.\"
-.\"
-.SS File access mode
-Unlike the other values that can be specified in
-.IR flags ,
-the
-.I "access mode"
-values
-.BR O_RDONLY ", " O_WRONLY ", and " O_RDWR
-do not specify individual bits.
-Rather, they define the low order two bits of
-.IR flags ,
-and are defined respectively as 0, 1, and 2.
-In other words, the combination
-.B "O_RDONLY | O_WRONLY"
-is a logical error, and certainly does not have the same meaning as
-.BR O_RDWR .
-.P
-Linux reserves the special, nonstandard access mode 3 (binary 11) in
-.I flags
-to mean:
-check for read and write permission on the file and return a file descriptor
-that can't be used for reading or writing.
-This nonstandard access mode is used by some Linux drivers to return a
-file descriptor that is to be used only for device-specific
-.BR ioctl (2)
-operations.
-.\" See for example util-linux's disk-utils/setfdprm.c
-.\" For some background on access mode 3, see
-.\" http://thread.gmane.org/gmane.linux.kernel/653123
-.\" "[RFC] correct flags to f_mode conversion in __dentry_open"
-.\" LKML, 12 Mar 2008
-.\"
-.\"
-.SS Rationale for openat() and other "directory file descriptor" APIs
-.BR openat ()
-and the other system calls and library functions that take
-a directory file descriptor argument
-(i.e.,
-.BR execveat (2),
-.BR faccessat (2),
-.BR fanotify_mark (2),
-.BR fchmodat (2),
-.BR fchownat (2),
-.BR fspick (2),
-.BR fstatat (2),
-.BR futimesat (2),
-.BR linkat (2),
-.BR mkdirat (2),
-.BR mknodat (2),
-.BR mount_setattr (2),
-.BR move_mount (2),
-.BR name_to_handle_at (2),
-.BR open_tree (2),
-.BR openat2 (2),
-.BR readlinkat (2),
-.BR renameat (2),
-.BR renameat2 (2),
-.BR statx (2),
-.BR symlinkat (2),
-.BR unlinkat (2),
-.BR utimensat (2),
-.BR mkfifoat (3),
-and
-.BR scandirat (3))
-address two problems with the older interfaces that preceded them.
-Here, the explanation is in terms of the
-.BR openat ()
-call, but the rationale is analogous for the other interfaces.
-.P
-First,
-.BR openat ()
-allows an application to avoid race conditions that could
-occur when using
-.BR open ()
-to open files in directories other than the current working directory.
-These race conditions result from the fact that some component
-of the directory prefix given to
-.BR open ()
-could be changed in parallel with the call to
-.BR open ().
-Suppose, for example, that we wish to create the file
-.I dir1/dir2/xxx.dep
-if the file
-.I dir1/dir2/xxx
-exists.
-The problem is that between the existence check and the file-creation step,
-.I dir1
-or
-.I dir2
-(which might be symbolic links)
-could be modified to point to a different location.
-Such races can be avoided by
-opening a file descriptor for the target directory,
-and then specifying that file descriptor as the
-.I dirfd
-argument of (say)
-.BR fstatat (2)
-and
-.BR openat ().
-The use of the
-.I dirfd
-file descriptor also has other benefits:
-.IP \[bu] 3
-the file descriptor is a stable reference to the directory,
-even if the directory is renamed; and
-.IP \[bu]
-the open file descriptor prevents the underlying filesystem from
-being dismounted,
-just as when a process has a current working directory on a filesystem.
-.P
-Second,
-.BR openat ()
-allows the implementation of a per-thread "current working
-directory", via file descriptor(s) maintained by the application.
-(This functionality can also be obtained by tricks based
-on the use of
-.IR /proc/self/fd/ dirfd,
-but less efficiently.)
-.P
-The
-.I dirfd
-argument for these APIs can be obtained by using
-.BR open ()
-or
-.BR openat ()
-to open a directory (with either the
-.B O_RDONLY
-or the
-.B O_PATH
-flag).
-Alternatively, such a file descriptor can be obtained by applying
-.BR dirfd (3)
-to a directory stream created using
-.BR opendir (3).
-.P
-When these APIs are given a
-.I dirfd
-argument of
-.B AT_FDCWD
-or the specified pathname is absolute,
-then they handle their pathname argument in the same way as
-the corresponding conventional APIs.
-However, in this case, several of the APIs have a
-.I flags
-argument that provides access to functionality that is not available with
-the corresponding conventional APIs.
-.\"
-.\"
-.SS O_DIRECT
-The
-.B O_DIRECT
-flag may impose alignment restrictions on the length and address
-of user-space buffers and the file offset of I/Os.
-In Linux alignment
-restrictions vary by filesystem and kernel version and might be
-absent entirely.
-The handling of misaligned
-.B O_DIRECT
-I/Os also varies;
-they can either fail with
-.B EINVAL
-or fall back to buffered I/O.
-.P
-Since Linux 6.1,
-.B O_DIRECT
-support and alignment restrictions for a file can be queried using
-.BR statx (2),
-using the
-.B STATX_DIOALIGN
-flag.
-Support for
-.B STATX_DIOALIGN
-varies by filesystem;
-see
-.BR statx (2).
-.P
-Some filesystems provide their own interfaces for querying
-.B O_DIRECT
-alignment restrictions,
-for example the
-.B XFS_IOC_DIOINFO
-operation in
-.BR xfsctl (3).
-.B STATX_DIOALIGN
-should be used instead when it is available.
-.P
-If none of the above is available,
-then direct I/O support and alignment restrictions
-can only be assumed from known characteristics of the filesystem,
-the individual file,
-the underlying storage device(s),
-and the kernel version.
-In Linux 2.4,
-most filesystems based on block devices require that
-the file offset and the length and memory address of all I/O segments
-be multiples of the filesystem block size
-(typically 4096 bytes).
-In Linux 2.6.0,
-this was relaxed to the logical block size of the block device
-(typically 512 bytes).
-A block device's logical block size can be determined using the
-.BR ioctl (2)
-.B BLKSSZGET
-operation or from the shell using the command:
-.P
-.in +4n
-.EX
-blockdev \-\-getss
-.EE
-.in
-.P
-.B O_DIRECT
-I/Os should never be run concurrently with the
-.BR fork (2)
-system call,
-if the memory buffer is a private mapping
-(i.e., any mapping created with the
-.BR mmap (2)
-.B MAP_PRIVATE
-flag;
-this includes memory allocated on the heap and statically allocated buffers).
-Any such I/Os, whether submitted via an asynchronous I/O interface or from
-another thread in the process,
-should be completed before
-.BR fork (2)
-is called.
-Failure to do so can result in data corruption and undefined behavior in
-parent and child processes.
-This restriction does not apply when the memory buffer for the
-.B O_DIRECT
-I/Os was created using
-.BR shmat (2)
-or
-.BR mmap (2)
-with the
-.B MAP_SHARED
-flag.
-Nor does this restriction apply when the memory buffer has been advised as
-.B MADV_DONTFORK
-with
-.BR madvise (2),
-ensuring that it will not be available
-to the child after
-.BR fork (2).
-.P
-The
-.B O_DIRECT
-flag was introduced in SGI IRIX, where it has alignment
-restrictions similar to those of Linux 2.4.
-IRIX has also a
-.BR fcntl (2)
-call to query appropriate alignments, and sizes.
-FreeBSD 4.x introduced
-a flag of the same name, but without alignment restrictions.
-.P
-.B O_DIRECT
-support was added in Linux 2.4.10.
-Older Linux kernels simply ignore this flag.
-Some filesystems may not implement the flag, in which case
-.BR open ()
-fails with the error
-.B EINVAL
-if it is used.
-.P
-Applications should avoid mixing
-.B O_DIRECT
-and normal I/O to the same file,
-and especially to overlapping byte regions in the same file.
-Even when the filesystem correctly handles the coherency issues in
-this situation, overall I/O throughput is likely to be slower than
-using either mode alone.
-Likewise, applications should avoid mixing
-.BR mmap (2)
-of files with direct I/O to the same files.
-.P
-The behavior of
-.B O_DIRECT
-with NFS will differ from local filesystems.
-Older kernels, or
-kernels configured in certain ways, may not support this combination.
-The NFS protocol does not support passing the flag to the server, so
-.B O_DIRECT
-I/O will bypass the page cache only on the client; the server may
-still cache the I/O.
-The client asks the server to make the I/O
-synchronous to preserve the synchronous semantics of
-.BR O_DIRECT .
-Some servers will perform poorly under these circumstances, especially
-if the I/O size is small.
-Some servers may also be configured to
-lie to clients about the I/O having reached stable storage; this
-will avoid the performance penalty at some risk to data integrity
-in the event of server power failure.
-The Linux NFS client places no alignment restrictions on
-.B O_DIRECT
-I/O.
-.P
-In summary,
-.B O_DIRECT
-is a potentially powerful tool that should be used with caution.
-It is recommended that applications treat use of
-.B O_DIRECT
-as a performance option which is disabled by default.
-.SH BUGS
-Currently, it is not possible to enable signal-driven
-I/O by specifying
-.B O_ASYNC
-when calling
-.BR open ();
-use
-.BR fcntl (2)
-to enable this flag.
-.\" FIXME . Check bugzilla report on open(O_ASYNC)
-.\" See http://bugzilla.kernel.org/show_bug.cgi?id=5993
-.P
-One must check for two different error codes,
-.B EISDIR
-and
-.BR ENOENT ,
-when trying to determine whether the kernel supports
-.B O_TMPFILE
-functionality.
-.P
-When both
-.B O_CREAT
-and
-.B O_DIRECTORY
-are specified in
-.I flags
-and the file specified by
-.I pathname
-does not exist,
-.BR open ()
-will create a regular file (i.e.,
-.B O_DIRECTORY
-is ignored).
-.SH SEE ALSO
-.BR chmod (2),
-.BR chown (2),
-.BR close (2),
-.BR dup (2),
-.BR fcntl (2),
-.BR link (2),
-.BR lseek (2),
-.BR mknod (2),
-.BR mmap (2),
-.BR mount (2),
-.BR open_by_handle_at (2),
-.BR openat2 (2),
-.BR read (2),
-.BR socket (2),
-.BR stat (2),
-.BR umask (2),
-.BR unlink (2),
-.BR write (2),
-.BR fopen (3),
-.BR acl (5),
-.BR fifo (7),
-.BR inode (7),
-.BR path_resolution (7),
-.BR symlink (7)
diff --git a/man2/open_by_handle_at.2 b/man2/open_by_handle_at.2
deleted file mode 100644
index a4050d664..000000000
--- a/man2/open_by_handle_at.2
+++ /dev/null
@@ -1,787 +0,0 @@
-.\" Copyright (c) 2014 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.TH open_by_handle_at 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-name_to_handle_at, open_by_handle_at \- obtain handle
-for a pathname and open file via a handle
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#define _GNU_SOURCE" " /* See feature_test_macros(7) */"
-.B #include <fcntl.h>
-.P
-.BI "int name_to_handle_at(int " dirfd ", const char *" pathname ,
-.BI " struct file_handle *" handle ,
-.BI " int *" mount_id ", int " flags );
-.BI "int open_by_handle_at(int " mount_fd ", struct file_handle *" handle ,
-.BI " int " flags );
-.fi
-.SH DESCRIPTION
-The
-.BR name_to_handle_at ()
-and
-.BR open_by_handle_at ()
-system calls split the functionality of
-.BR openat (2)
-into two parts:
-.BR name_to_handle_at ()
-returns an opaque handle that corresponds to a specified file;
-.BR open_by_handle_at ()
-opens the file corresponding to a handle returned by a previous call to
-.BR name_to_handle_at ()
-and returns an open file descriptor.
-.\"
-.\"
-.SS name_to_handle_at()
-The
-.BR name_to_handle_at ()
-system call returns a file handle and a mount ID corresponding to
-the file specified by the
-.I dirfd
-and
-.I pathname
-arguments.
-The file handle is returned via the argument
-.IR handle ,
-which is a pointer to a structure of the following form:
-.P
-.in +4n
-.EX
-struct file_handle {
- unsigned int handle_bytes; /* Size of f_handle [in, out] */
- int handle_type; /* Handle type [out] */
- unsigned char f_handle[0]; /* File identifier (sized by
- caller) [out] */
-};
-.EE
-.in
-.P
-It is the caller's responsibility to allocate the structure
-with a size large enough to hold the handle returned in
-.IR f_handle .
-Before the call, the
-.I handle_bytes
-field should be initialized to contain the allocated size for
-.IR f_handle .
-(The constant
-.BR MAX_HANDLE_SZ ,
-defined in
-.IR <fcntl.h> ,
-specifies the maximum expected size for a file handle.
-It is not a
-guaranteed upper limit as future filesystems may require more space.)
-Upon successful return, the
-.I handle_bytes
-field is updated to contain the number of bytes actually written to
-.IR f_handle .
-.P
-The caller can discover the required size for the
-.I file_handle
-structure by making a call in which
-.I handle\->handle_bytes
-is zero;
-in this case, the call fails with the error
-.B EOVERFLOW
-and
-.I handle\->handle_bytes
-is set to indicate the required size;
-the caller can then use this information to allocate a structure
-of the correct size (see EXAMPLES below).
-Some care is needed here as
-.B EOVERFLOW
-can also indicate that no file handle is available for this particular
-name in a filesystem which does normally support file-handle lookup.
-This case can be detected when the
-.B EOVERFLOW
-error is returned without
-.I handle_bytes
-being increased.
-.P
-Other than the use of the
-.I handle_bytes
-field, the caller should treat the
-.I file_handle
-structure as an opaque data type: the
-.I handle_type
-and
-.I f_handle
-fields can be used in a subsequent call to
-.BR open_by_handle_at ().
-The caller can also use the opaque
-.I file_handle
-to compare the identity of filesystem objects
-that were queried at different times and possibly
-at different paths.
-The
-.BR fanotify (7)
-subsystem can report events
-with an information record containing a
-.I file_handle
-to identify the filesystem object.
-.P
-The
-.I flags
-argument is a bit mask constructed by ORing together zero or more of
-.BR AT_HANDLE_FID ,
-.BR AT_EMPTY_PATH ,
-and
-.BR AT_SYMLINK_FOLLOW ,
-described below.
-.P
-When
-.I flags
-contain the
-.BR AT_HANDLE_FID " (since Linux 6.5)"
-.\" commit 96b2b072ee62be8ae68c8ecf14854c4d0505a8f8
-flag, the caller indicates that the returned
-.I file_handle
-is needed to identify the filesystem object,
-and not for opening the file later,
-so it should be expected that a subsequent call to
-.BR open_by_handle_at ()
-with the returned
-.I file_handle
-may fail.
-.P
-Together, the
-.I pathname
-and
-.I dirfd
-arguments identify the file for which a handle is to be obtained.
-There are four distinct cases:
-.IP \[bu] 3
-If
-.I pathname
-is a nonempty string containing an absolute pathname,
-then a handle is returned for the file referred to by that pathname.
-In this case,
-.I dirfd
-is ignored.
-.IP \[bu]
-If
-.I pathname
-is a nonempty string containing a relative pathname and
-.I dirfd
-has the special value
-.BR AT_FDCWD ,
-then
-.I pathname
-is interpreted relative to the current working directory of the caller,
-and a handle is returned for the file to which it refers.
-.IP \[bu]
-If
-.I pathname
-is a nonempty string containing a relative pathname and
-.I dirfd
-is a file descriptor referring to a directory, then
-.I pathname
-is interpreted relative to the directory referred to by
-.IR dirfd ,
-and a handle is returned for the file to which it refers.
-(See
-.BR openat (2)
-for an explanation of why "directory file descriptors" are useful.)
-.IP \[bu]
-If
-.I pathname
-is an empty string and
-.I flags
-specifies the value
-.BR AT_EMPTY_PATH ,
-then
-.I dirfd
-can be an open file descriptor referring to any type of file,
-or
-.BR AT_FDCWD ,
-meaning the current working directory,
-and a handle is returned for the file to which it refers.
-.P
-The
-.I mount_id
-argument returns an identifier for the filesystem
-mount that corresponds to
-.IR pathname .
-This corresponds to the first field in one of the records in
-.IR /proc/self/mountinfo .
-Opening the pathname in the fifth field of that record yields a file
-descriptor for the mount point;
-that file descriptor can be used in a subsequent call to
-.BR open_by_handle_at ().
-.I mount_id
-is returned both for a successful call and for a call that results
-in the error
-.BR EOVERFLOW .
-.P
-By default,
-.BR name_to_handle_at ()
-does not dereference
-.I pathname
-if it is a symbolic link, and thus returns a handle for the link itself.
-If
-.B AT_SYMLINK_FOLLOW
-is specified in
-.IR flags ,
-.I pathname
-is dereferenced if it is a symbolic link
-(so that the call returns a handle for the file referred to by the link).
-.P
-.BR name_to_handle_at ()
-does not trigger a mount when the final component of the pathname is an
-automount point.
-When a filesystem supports both file handles and
-automount points, a
-.BR name_to_handle_at ()
-call on an automount point will return with error
-.B EOVERFLOW
-without having increased
-.IR handle_bytes .
-This can happen since Linux 4.13
-.\" commit 20fa19027286983ab2734b5910c4a687436e0c31
-with NFS when accessing a directory
-which is on a separate filesystem on the server.
-In this case, the automount can be triggered by adding a "/" to the end
-of the pathname.
-.SS open_by_handle_at()
-The
-.BR open_by_handle_at ()
-system call opens the file referred to by
-.IR handle ,
-a file handle returned by a previous call to
-.BR name_to_handle_at ().
-.P
-The
-.I mount_fd
-argument is a file descriptor for any object (file, directory, etc.)
-in the mounted filesystem with respect to which
-.I handle
-should be interpreted.
-The special value
-.B AT_FDCWD
-can be specified, meaning the current working directory of the caller.
-.P
-The
-.I flags
-argument
-is as for
-.BR open (2).
-If
-.I handle
-refers to a symbolic link, the caller must specify the
-.B O_PATH
-flag, and the symbolic link is not dereferenced; the
-.B O_NOFOLLOW
-flag, if specified, is ignored.
-.P
-The caller must have the
-.B CAP_DAC_READ_SEARCH
-capability to invoke
-.BR open_by_handle_at ().
-.SH RETURN VALUE
-On success,
-.BR name_to_handle_at ()
-returns 0,
-and
-.BR open_by_handle_at ()
-returns a file descriptor (a nonnegative integer).
-.P
-In the event of an error, both system calls return \-1 and set
-.I errno
-to indicate the error.
-.SH ERRORS
-.BR name_to_handle_at ()
-and
-.BR open_by_handle_at ()
-can fail for the same errors as
-.BR openat (2).
-In addition, they can fail with the errors noted below.
-.P
-.BR name_to_handle_at ()
-can fail with the following errors:
-.TP
-.B EFAULT
-.IR pathname ,
-.IR mount_id ,
-or
-.I handle
-points outside your accessible address space.
-.TP
-.B EINVAL
-.I flags
-includes an invalid bit value.
-.TP
-.B EINVAL
-.I handle\->handle_bytes
-is greater than
-.BR MAX_HANDLE_SZ .
-.TP
-.B ENOENT
-.I pathname
-is an empty string, but
-.B AT_EMPTY_PATH
-was not specified in
-.IR flags .
-.TP
-.B ENOTDIR
-The file descriptor supplied in
-.I dirfd
-does not refer to a directory,
-and it is not the case that both
-.I flags
-includes
-.B AT_EMPTY_PATH
-and
-.I pathname
-is an empty string.
-.TP
-.B EOPNOTSUPP
-The filesystem does not support decoding of a pathname to a file handle.
-.TP
-.B EOVERFLOW
-The
-.I handle\->handle_bytes
-value passed into the call was too small.
-When this error occurs,
-.I handle\->handle_bytes
-is updated to indicate the required size for the handle.
-.\"
-.\"
-.P
-.BR open_by_handle_at ()
-can fail with the following errors:
-.TP
-.B EBADF
-.I mount_fd
-is not an open file descriptor.
-.TP
-.B EBADF
-.I pathname
-is relative but
-.I dirfd
-is neither
-.B AT_FDCWD
-nor a valid file descriptor.
-.TP
-.B EFAULT
-.I handle
-points outside your accessible address space.
-.TP
-.B EINVAL
-.I handle\->handle_bytes
-is greater than
-.B MAX_HANDLE_SZ
-or is equal to zero.
-.TP
-.B ELOOP
-.I handle
-refers to a symbolic link, but
-.B O_PATH
-was not specified in
-.IR flags .
-.TP
-.B EPERM
-The caller does not have the
-.B CAP_DAC_READ_SEARCH
-capability.
-.TP
-.B ESTALE
-The specified
-.I handle
-is not valid for opening a file.
-This error will occur if, for example, the file has been deleted.
-This error can also occur if the
-.I handle
-was acquired using the
-.B AT_HANDLE_FID
-flag and the filesystem does not support
-.BR open_by_handle_at ().
-.SH VERSIONS
-FreeBSD has a broadly similar pair of system calls in the form of
-.BR getfh ()
-and
-.BR openfh ().
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 2.6.39,
-glibc 2.14.
-.SH NOTES
-A file handle can be generated in one process using
-.BR name_to_handle_at ()
-and later used in a different process that calls
-.BR open_by_handle_at ().
-.P
-Some filesystem don't support the translation of pathnames to
-file handles, for example,
-.IR /proc ,
-.IR /sys ,
-and various network filesystems.
-Some filesystems support the translation of pathnames to
-file handles, but do not support using those file handles in
-.BR open_by_handle_at ().
-.P
-A file handle may become invalid ("stale") if a file is deleted,
-or for other filesystem-specific reasons.
-Invalid handles are notified by an
-.B ESTALE
-error from
-.BR open_by_handle_at ().
-.P
-These system calls are designed for use by user-space file servers.
-For example, a user-space NFS server might generate a file handle
-and pass it to an NFS client.
-Later, when the client wants to open the file,
-it could pass the handle back to the server.
-.\" https://lwn.net/Articles/375888/
-.\" "Open by handle" - Jonathan Corbet, 2010-02-23
-This sort of functionality allows a user-space file server to operate in
-a stateless fashion with respect to the files it serves.
-.P
-If
-.I pathname
-refers to a symbolic link and
-.I flags
-does not specify
-.BR AT_SYMLINK_FOLLOW ,
-then
-.BR name_to_handle_at ()
-returns a handle for the link (rather than the file to which it refers).
-.\" commit bcda76524cd1fa32af748536f27f674a13e56700
-The process receiving the handle can later perform operations
-on the symbolic link by converting the handle to a file descriptor using
-.BR open_by_handle_at ()
-with the
-.B O_PATH
-flag, and then passing the file descriptor as the
-.I dirfd
-argument in system calls such as
-.BR readlinkat (2)
-and
-.BR fchownat (2).
-.SS Obtaining a persistent filesystem ID
-The mount IDs in
-.I /proc/self/mountinfo
-can be reused as filesystems are unmounted and mounted.
-Therefore, the mount ID returned by
-.BR name_to_handle_at ()
-(in
-.IR *mount_id )
-should not be treated as a persistent identifier
-for the corresponding mounted filesystem.
-However, an application can use the information in the
-.I mountinfo
-record that corresponds to the mount ID
-to derive a persistent identifier.
-.P
-For example, one can use the device name in the fifth field of the
-.I mountinfo
-record to search for the corresponding device UUID via the symbolic links in
-.IR /dev/disks/by\-uuid .
-(A more comfortable way of obtaining the UUID is to use the
-.\" e.g., http://stackoverflow.com/questions/6748429/using-libblkid-to-find-uuid-of-a-partition
-.BR libblkid (3)
-library.)
-That process can then be reversed,
-using the UUID to look up the device name,
-and then obtaining the corresponding mount point,
-in order to produce the
-.I mount_fd
-argument used by
-.BR open_by_handle_at ().
-.SH EXAMPLES
-The two programs below demonstrate the use of
-.BR name_to_handle_at ()
-and
-.BR open_by_handle_at ().
-The first program
-.RI ( t_name_to_handle_at.c )
-uses
-.BR name_to_handle_at ()
-to obtain the file handle and mount ID
-for the file specified in its command-line argument;
-the handle and mount ID are written to standard output.
-.P
-The second program
-.RI ( t_open_by_handle_at.c )
-reads a mount ID and file handle from standard input.
-The program then employs
-.BR open_by_handle_at ()
-to open the file using that handle.
-If an optional command-line argument is supplied, then the
-.I mount_fd
-argument for
-.BR open_by_handle_at ()
-is obtained by opening the directory named in that argument.
-Otherwise,
-.I mount_fd
-is obtained by scanning
-.I /proc/self/mountinfo
-to find a record whose mount ID matches the mount ID
-read from standard input,
-and the mount directory specified in that record is opened.
-(These programs do not deal with the fact that mount IDs are not persistent.)
-.P
-The following shell session demonstrates the use of these two programs:
-.P
-.in +4n
-.EX
-$ \fBecho \[aq]Can you please think about it?\[aq] > cecilia.txt\fP
-$ \fB./t_name_to_handle_at cecilia.txt > fh\fP
-$ \fB./t_open_by_handle_at < fh\fP
-open_by_handle_at: Operation not permitted
-$ \fBsudo ./t_open_by_handle_at < fh\fP # Need CAP_SYS_ADMIN
-Read 31 bytes
-$ \fBrm cecilia.txt\fP
-.EE
-.in
-.P
-Now we delete and (quickly) re-create the file so that
-it has the same content and (by chance) the same inode.
-Nevertheless,
-.BR open_by_handle_at ()
-.\" Christoph Hellwig: That's why the file handles contain a generation
-.\" counter that gets incremented in this case.
-recognizes that the original file referred to by the file handle
-no longer exists.
-.P
-.in +4n
-.EX
-$ \fBstat \-\-printf="%i\en" cecilia.txt\fP # Display inode number
-4072121
-$ \fBrm cecilia.txt\fP
-$ \fBecho \[aq]Can you please think about it?\[aq] > cecilia.txt\fP
-$ \fBstat \-\-printf="%i\en" cecilia.txt\fP # Check inode number
-4072121
-$ \fBsudo ./t_open_by_handle_at < fh\fP
-open_by_handle_at: Stale NFS file handle
-.EE
-.in
-.SS Program source: t_name_to_handle_at.c
-\&
-.\" SRC BEGIN (t_name_to_handle_at.c)
-.EX
-#define _GNU_SOURCE
-#include <err.h>
-#include <errno.h>
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-\&
-int
-main(int argc, char *argv[])
-{
- int mount_id, fhsize, flags, dirfd;
- char *pathname;
- struct file_handle *fhp;
-\&
- if (argc != 2) {
- fprintf(stderr, "Usage: %s pathname\en", argv[0]);
- exit(EXIT_FAILURE);
- }
-\&
- pathname = argv[1];
-\&
- /* Allocate file_handle structure. */
-\&
- fhsize = sizeof(*fhp);
- fhp = malloc(fhsize);
- if (fhp == NULL)
- err(EXIT_FAILURE, "malloc");
-\&
- /* Make an initial call to name_to_handle_at() to discover
- the size required for file handle. */
-\&
- dirfd = AT_FDCWD; /* For name_to_handle_at() calls */
- flags = 0; /* For name_to_handle_at() calls */
- fhp\->handle_bytes = 0;
- if (name_to_handle_at(dirfd, pathname, fhp,
- &mount_id, flags) != \-1
- || errno != EOVERFLOW)
- {
- fprintf(stderr, "Unexpected result from name_to_handle_at()\en");
- exit(EXIT_FAILURE);
- }
-\&
- /* Reallocate file_handle structure with correct size. */
-\&
- fhsize = sizeof(*fhp) + fhp\->handle_bytes;
- fhp = realloc(fhp, fhsize); /* Copies fhp\->handle_bytes */
- if (fhp == NULL)
- err(EXIT_FAILURE, "realloc");
-\&
- /* Get file handle from pathname supplied on command line. */
-\&
- if (name_to_handle_at(dirfd, pathname, fhp, &mount_id, flags) == \-1)
- err(EXIT_FAILURE, "name_to_handle_at");
-\&
- /* Write mount ID, file handle size, and file handle to stdout,
- for later reuse by t_open_by_handle_at.c. */
-\&
- printf("%d\en", mount_id);
- printf("%u %d ", fhp\->handle_bytes, fhp\->handle_type);
- for (size_t j = 0; j < fhp\->handle_bytes; j++)
- printf(" %02x", fhp\->f_handle[j]);
- printf("\en");
-\&
- exit(EXIT_SUCCESS);
-}
-.EE
-.\" SRC END
-.SS Program source: t_open_by_handle_at.c
-\&
-.\" SRC BEGIN (t_open_by_handle_at.c)
-.EX
-#define _GNU_SOURCE
-#include <err.h>
-#include <fcntl.h>
-#include <limits.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-\&
-/* Scan /proc/self/mountinfo to find the line whose mount ID matches
- \[aq]mount_id\[aq]. (An easier way to do this is to install and use the
- \[aq]libmount\[aq] library provided by the \[aq]util\-linux\[aq] project.)
- Open the corresponding mount path and return the resulting file
- descriptor. */
-\&
-static int
-open_mount_path_by_id(int mount_id)
-{
- int mi_mount_id, found;
- char mount_path[PATH_MAX];
- char *linep;
- FILE *fp;
- size_t lsize;
- ssize_t nread;
-\&
- fp = fopen("/proc/self/mountinfo", "r");
- if (fp == NULL)
- err(EXIT_FAILURE, "fopen");
-\&
- found = 0;
- linep = NULL;
- while (!found) {
- nread = getline(&linep, &lsize, fp);
- if (nread == \-1)
- break;
-\&
- nread = sscanf(linep, "%d %*d %*s %*s %s",
- &mi_mount_id, mount_path);
- if (nread != 2) {
- fprintf(stderr, "Bad sscanf()\en");
- exit(EXIT_FAILURE);
- }
-\&
- if (mi_mount_id == mount_id)
- found = 1;
- }
- free(linep);
-\&
- fclose(fp);
-\&
- if (!found) {
- fprintf(stderr, "Could not find mount point\en");
- exit(EXIT_FAILURE);
- }
-\&
- return open(mount_path, O_RDONLY);
-}
-\&
-int
-main(int argc, char *argv[])
-{
- int mount_id, fd, mount_fd, handle_bytes;
- char buf[1000];
-#define LINE_SIZE 100
- char line1[LINE_SIZE], line2[LINE_SIZE];
- char *nextp;
- ssize_t nread;
- struct file_handle *fhp;
-\&
- if ((argc > 1 && strcmp(argv[1], "\-\-help") == 0) || argc > 2) {
- fprintf(stderr, "Usage: %s [mount\-path]\en", argv[0]);
- exit(EXIT_FAILURE);
- }
-\&
- /* Standard input contains mount ID and file handle information:
-\&
- Line 1: <mount_id>
- Line 2: <handle_bytes> <handle_type> <bytes of handle in hex>
- */
-\&
- if (fgets(line1, sizeof(line1), stdin) == NULL ||
- fgets(line2, sizeof(line2), stdin) == NULL)
- {
- fprintf(stderr, "Missing mount_id / file handle\en");
- exit(EXIT_FAILURE);
- }
-\&
- mount_id = atoi(line1);
-\&
- handle_bytes = strtoul(line2, &nextp, 0);
-\&
- /* Given handle_bytes, we can now allocate file_handle structure. */
-\&
- fhp = malloc(sizeof(*fhp) + handle_bytes);
- if (fhp == NULL)
- err(EXIT_FAILURE, "malloc");
-\&
- fhp\->handle_bytes = handle_bytes;
-\&
- fhp\->handle_type = strtoul(nextp, &nextp, 0);
-\&
- for (size_t j = 0; j < fhp\->handle_bytes; j++)
- fhp\->f_handle[j] = strtoul(nextp, &nextp, 16);
-\&
- /* Obtain file descriptor for mount point, either by opening
- the pathname specified on the command line, or by scanning
- /proc/self/mounts to find a mount that matches the \[aq]mount_id\[aq]
- that we received from stdin. */
-\&
- if (argc > 1)
- mount_fd = open(argv[1], O_RDONLY);
- else
- mount_fd = open_mount_path_by_id(mount_id);
-\&
- if (mount_fd == \-1)
- err(EXIT_FAILURE, "opening mount fd");
-\&
- /* Open file using handle and mount point. */
-\&
- fd = open_by_handle_at(mount_fd, fhp, O_RDONLY);
- if (fd == \-1)
- err(EXIT_FAILURE, "open_by_handle_at");
-\&
- /* Try reading a few bytes from the file. */
-\&
- nread = read(fd, buf, sizeof(buf));
- if (nread == \-1)
- err(EXIT_FAILURE, "read");
-\&
- printf("Read %zd bytes\en", nread);
-\&
- exit(EXIT_SUCCESS);
-}
-.EE
-.\" SRC END
-.SH SEE ALSO
-.BR open (2),
-.BR libblkid (3),
-.BR blkid (8),
-.BR findfs (8),
-.BR mount (8)
-.P
-The
-.I libblkid
-and
-.I libmount
-documentation in the latest
-.I util\-linux
-release at
-.UR https://www.kernel.org/pub/linux/utils/util\-linux/
-.UE
diff --git a/man2/openat.2 b/man2/openat.2
deleted file mode 100644
index 604e1213a..000000000
--- a/man2/openat.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/open.2
diff --git a/man2/openat2.2 b/man2/openat2.2
deleted file mode 100644
index e2a11162d..000000000
--- a/man2/openat2.2
+++ /dev/null
@@ -1,582 +0,0 @@
-.\" Copyright (C) 2019 Aleksa Sarai <cyphar@cyphar.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.TH openat2 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-openat2 \- open and possibly create a file (extended)
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#include <fcntl.h>" \
-" /* Definition of " O_* " and " S_* " constants */"
-.BR "#include <linux/openat2.h>" " /* Definition of " RESOLVE_* " constants */"
-.BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */"
-.B #include <unistd.h>
-.P
-.BI "long syscall(SYS_openat2, int " dirfd ", const char *" pathname ,
-.BI " struct open_how *" how ", size_t " size );
-.fi
-.P
-.IR Note :
-glibc provides no wrapper for
-.BR openat2 (),
-necessitating the use of
-.BR syscall (2).
-.SH DESCRIPTION
-The
-.BR openat2 ()
-system call is an extension of
-.BR openat (2)
-and provides a superset of its functionality.
-.P
-The
-.BR openat2 ()
-system call opens the file specified by
-.IR pathname .
-If the specified file does not exist, it may optionally (if
-.B O_CREAT
-is specified in
-.IR how.flags )
-be created.
-.P
-As with
-.BR openat (2),
-if
-.I pathname
-is a relative pathname, then it is interpreted relative to the
-directory referred to by the file descriptor
-.I dirfd
-(or the current working directory of the calling process, if
-.I dirfd
-is the special value
-.BR AT_FDCWD ).
-If
-.I pathname
-is an absolute pathname, then
-.I dirfd
-is ignored (unless
-.I how.resolve
-contains
-.BR RESOLVE_IN_ROOT ,
-in which case
-.I pathname
-is resolved relative to
-.IR dirfd ).
-.P
-Rather than taking a single
-.I flags
-argument, an extensible structure (\fIhow\fP) is passed to allow for
-future extensions.
-The
-.I size
-argument must be specified as
-.IR "sizeof(struct open_how)" .
-.\"
-.SS The open_how structure
-The
-.I how
-argument specifies how
-.I pathname
-should be opened, and acts as a superset of the
-.I flags
-and
-.I mode
-arguments to
-.BR openat (2).
-This argument is a pointer to an
-.I open_how
-structure,
-described in
-.BR open_how (2type).
-.P
-Any future extensions to
-.BR openat2 ()
-will be implemented as new fields appended to the
-.I open_how
-structure,
-with a zero value in a new field resulting in the kernel behaving
-as though that extension field was not present.
-Therefore, the caller
-.I must
-zero-fill this structure on
-initialization.
-(See the "Extensibility" section of the
-.B NOTES
-for more detail on why this is necessary.)
-.P
-The fields of the
-.I open_how
-structure are as follows:
-.TP
-.I flags
-This field specifies
-the file creation and file status flags to use when opening the file.
-All of the
-.B O_*
-flags defined for
-.BR openat (2)
-are valid
-.BR openat2 ()
-flag values.
-.IP
-Whereas
-.BR openat (2)
-ignores unknown bits in its
-.I flags
-argument,
-.BR openat2 ()
-returns an error if unknown or conflicting flags are specified in
-.IR how.flags .
-.TP
-.I mode
-This field specifies the
-mode for the new file, with identical semantics to the
-.I mode
-argument of
-.BR openat (2).
-.IP
-Whereas
-.BR openat (2)
-ignores bits other than those in the range
-.I 07777
-in its
-.I mode
-argument,
-.BR openat2 ()
-returns an error if
-.I how.mode
-contains bits other than
-.IR 07777 .
-Similarly, an error is returned if
-.BR openat2 ()
-is called with a nonzero
-.I how.mode
-and
-.I how.flags
-does not contain
-.B O_CREAT
-or
-.BR O_TMPFILE .
-.TP
-.I resolve
-This is a bit-mask of flags that modify the way in which
-.B all
-components of
-.I pathname
-will be resolved.
-(See
-.BR path_resolution (7)
-for background information.)
-.IP
-The primary use case for these flags is to allow trusted programs to restrict
-how untrusted paths (or paths inside untrusted directories) are resolved.
-The full list of
-.I resolve
-flags is as follows:
-.RS
-.TP
-.B RESOLVE_BENEATH
-.\" commit adb21d2b526f7f196b2f3fdca97d80ba05dd14a0
-Do not permit the path resolution to succeed if any component of the resolution
-is not a descendant of the directory indicated by
-.IR dirfd .
-This causes absolute symbolic links (and absolute values of
-.IR pathname )
-to be rejected.
-.IP
-Currently, this flag also disables magic-link resolution (see below).
-However, this may change in the future.
-Therefore, to ensure that magic links are not resolved,
-the caller should explicitly specify
-.BR RESOLVE_NO_MAGICLINKS .
-.TP
-.B RESOLVE_IN_ROOT
-.\" commit 8db52c7e7ee1bd861b6096fcafc0fe7d0f24a994
-Treat the directory referred to by
-.I dirfd
-as the root directory while resolving
-.IR pathname .
-Absolute symbolic links are interpreted relative to
-.IR dirfd .
-If a prefix component of
-.I pathname
-equates to
-.IR dirfd ,
-then an immediately following
-.I ..\&
-component likewise equates to
-.I dirfd
-(just as
-.I /..\&
-is traditionally equivalent to
-.IR / ).
-If
-.I pathname
-is an absolute path, it is also interpreted relative to
-.IR dirfd .
-.IP
-The effect of this flag is as though the calling process had used
-.BR chroot (2)
-to (temporarily) modify its root directory (to the directory
-referred to by
-.IR dirfd ).
-However, unlike
-.BR chroot (2)
-(which changes the filesystem root permanently for a process),
-.B RESOLVE_IN_ROOT
-allows a program to efficiently restrict path resolution on a per-open basis.
-.IP
-Currently, this flag also disables magic-link resolution.
-However, this may change in the future.
-Therefore, to ensure that magic links are not resolved,
-the caller should explicitly specify
-.BR RESOLVE_NO_MAGICLINKS .
-.TP
-.B RESOLVE_NO_MAGICLINKS
-.\" commit 278121417a72d87fb29dd8c48801f80821e8f75a
-Disallow all magic-link resolution during path resolution.
-.IP
-Magic links are symbolic link-like objects that are most notably found in
-.BR proc (5);
-examples include
-.IR /proc/ pid /exe
-and
-.IR /proc/ pid /fd/* .
-(See
-.BR symlink (7)
-for more details.)
-.IP
-Unknowingly opening magic links can be risky for some applications.
-Examples of such risks include the following:
-.RS
-.IP \[bu] 3
-If the process opening a pathname is a controlling process that
-currently has no controlling terminal (see
-.BR credentials (7)),
-then opening a magic link inside
-.IR /proc/ pid /fd
-that happens to refer to a terminal
-would cause the process to acquire a controlling terminal.
-.IP \[bu]
-.\" From https://lwn.net/Articles/796868/:
-.\" The presence of this flag will prevent a path lookup operation
-.\" from traversing through one of these magic links, thus blocking
-.\" (for example) attempts to escape from a container via a /proc
-.\" entry for an open file descriptor.
-In a containerized environment,
-a magic link inside
-.I /proc
-may refer to an object outside the container,
-and thus may provide a means to escape from the container.
-.RE
-.IP
-Because of such risks,
-an application may prefer to disable magic link resolution using the
-.B RESOLVE_NO_MAGICLINKS
-flag.
-.IP
-If the trailing component (i.e., basename) of
-.I pathname
-is a magic link,
-.I how.resolve
-contains
-.BR RESOLVE_NO_MAGICLINKS ,
-and
-.I how.flags
-contains both
-.B O_PATH
-and
-.BR O_NOFOLLOW ,
-then an
-.B O_PATH
-file descriptor referencing the magic link will be returned.
-.TP
-.B RESOLVE_NO_SYMLINKS
-.\" commit 278121417a72d87fb29dd8c48801f80821e8f75a
-Disallow resolution of symbolic links during path resolution.
-This option implies
-.BR RESOLVE_NO_MAGICLINKS .
-.IP
-If the trailing component (i.e., basename) of
-.I pathname
-is a symbolic link,
-.I how.resolve
-contains
-.BR RESOLVE_NO_SYMLINKS ,
-and
-.I how.flags
-contains both
-.B O_PATH
-and
-.BR O_NOFOLLOW ,
-then an
-.B O_PATH
-file descriptor referencing the symbolic link will be returned.
-.IP
-Note that the effect of the
-.B RESOLVE_NO_SYMLINKS
-flag,
-which affects the treatment of symbolic links in all of the components of
-.IR pathname ,
-differs from the effect of the
-.B O_NOFOLLOW
-file creation flag (in
-.IR how.flags ),
-which affects the handling of symbolic links only in the final component of
-.IR pathname .
-.IP
-Applications that employ the
-.B RESOLVE_NO_SYMLINKS
-flag are encouraged to make its use configurable
-(unless it is used for a specific security purpose),
-as symbolic links are very widely used by end-users.
-Setting this flag indiscriminately\[em]i.e.,
-for purposes not specifically related to security\[em]for all uses of
-.BR openat2 ()
-may result in spurious errors on previously functional systems.
-This may occur if, for example,
-a system pathname that is used by an application is modified
-(e.g., in a new distribution release)
-so that a pathname component (now) contains a symbolic link.
-.TP
-.B RESOLVE_NO_XDEV
-.\" commit 72ba29297e1439efaa54d9125b866ae9d15df339
-Disallow traversal of mount points during path resolution (including all bind
-mounts).
-Consequently,
-.I pathname
-must either be on the same mount as the directory referred to by
-.IR dirfd ,
-or on the same mount as the current working directory if
-.I dirfd
-is specified as
-.BR AT_FDCWD .
-.IP
-Applications that employ the
-.B RESOLVE_NO_XDEV
-flag are encouraged to make its use configurable (unless it is
-used for a specific security purpose),
-as bind mounts are widely used by end-users.
-Setting this flag indiscriminately\[em]i.e.,
-for purposes not specifically related to security\[em]for all uses of
-.BR openat2 ()
-may result in spurious errors on previously functional systems.
-This may occur if, for example,
-a system pathname that is used by an application is modified
-(e.g., in a new distribution release)
-so that a pathname component (now) contains a bind mount.
-.TP
-.B RESOLVE_CACHED
-Make the open operation fail unless all path components are already present
-in the kernel's lookup cache.
-If any kind of revalidation or I/O is needed to satisfy the lookup,
-.BR openat2 ()
-fails with the error
-.BR EAGAIN .
-This is useful in providing a fast-path open that can be performed without
-resorting to thread offload, or other mechanisms that an application might
-use to offload slower operations.
-.RE
-.IP
-If any bits other than those listed above are set in
-.IR how.resolve ,
-an error is returned.
-.SH RETURN VALUE
-On success, a new file descriptor is returned.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-The set of errors returned by
-.BR openat2 ()
-includes all of the errors returned by
-.BR openat (2),
-as well as the following additional errors:
-.TP
-.B E2BIG
-An extension that this kernel does not support was specified in
-.IR how .
-(See the "Extensibility" section of
-.B NOTES
-for more detail on how extensions are handled.)
-.TP
-.B EAGAIN
-.I how.resolve
-contains either
-.B RESOLVE_IN_ROOT
-or
-.BR RESOLVE_BENEATH ,
-and the kernel could not ensure that a ".." component didn't escape (due to a
-race condition or potential attack).
-The caller may choose to retry the
-.BR openat2 ()
-call.
-.TP
-.B EAGAIN
-.B RESOLVE_CACHED
-was set, and the open operation cannot be performed using only cached
-information.
-The caller should retry without
-.B RESOLVE_CACHED
-set in
-.IR how.resolve .
-.TP
-.B EINVAL
-An unknown flag or invalid value was specified in
-.IR how .
-.TP
-.B EINVAL
-.I mode
-is nonzero, but
-.I how.flags
-does not contain
-.B O_CREAT
-or
-.BR O_TMPFILE .
-.TP
-.B EINVAL
-.I size
-was smaller than any known version of
-.IR "struct open_how" .
-.TP
-.B ELOOP
-.I how.resolve
-contains
-.BR RESOLVE_NO_SYMLINKS ,
-and one of the path components was a symbolic link (or magic link).
-.TP
-.B ELOOP
-.I how.resolve
-contains
-.BR RESOLVE_NO_MAGICLINKS ,
-and one of the path components was a magic link.
-.TP
-.B EXDEV
-.I how.resolve
-contains either
-.B RESOLVE_IN_ROOT
-or
-.BR RESOLVE_BENEATH ,
-and an escape from the root during path resolution was detected.
-.TP
-.B EXDEV
-.I how.resolve
-contains
-.BR RESOLVE_NO_XDEV ,
-and a path component crosses a mount point.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 5.6.
-.\" commit fddb5d430ad9fa91b49b1d34d0202ffe2fa0e179
-.P
-The semantics of
-.B RESOLVE_BENEATH
-were modeled after FreeBSD's
-.BR O_BENEATH .
-.SH NOTES
-.SS Extensibility
-In order to allow for future extensibility,
-.BR openat2 ()
-requires the user-space application to specify the size of the
-.I open_how
-structure that it is passing.
-By providing this information, it is possible for
-.BR openat2 ()
-to provide both forwards- and backwards-compatibility, with
-.I size
-acting as an implicit version number.
-(Because new extension fields will always
-be appended, the structure size will always increase.)
-This extensibility design is very similar to other system calls such as
-.BR sched_setattr (2),
-.BR perf_event_open (2),
-and
-.BR clone3 (2).
-.P
-If we let
-.I usize
-be the size of the structure as specified by the user-space application, and
-.I ksize
-be the size of the structure which the kernel supports, then there are
-three cases to consider:
-.IP \[bu] 3
-If
-.I ksize
-equals
-.IR usize ,
-then there is no version mismatch and
-.I how
-can be used verbatim.
-.IP \[bu]
-If
-.I ksize
-is larger than
-.IR usize ,
-then there are some extension fields that the kernel supports
-which the user-space application
-is unaware of.
-Because a zero value in any added extension field signifies a no-op,
-the kernel
-treats all of the extension fields not provided by the user-space application
-as having zero values.
-This provides backwards-compatibility.
-.IP \[bu]
-If
-.I ksize
-is smaller than
-.IR usize ,
-then there are some extension fields which the user-space application
-is aware of but which the kernel does not support.
-Because any extension field must have its zero values signify a no-op,
-the kernel can
-safely ignore the unsupported extension fields if they are all-zero.
-If any unsupported extension fields are nonzero, then \-1 is returned and
-.I errno
-is set to
-.BR E2BIG .
-This provides forwards-compatibility.
-.P
-Because the definition of
-.I struct open_how
-may change in the future (with new fields being added when system headers are
-updated), user-space applications should zero-fill
-.I struct open_how
-to ensure that recompiling the program with new headers will not result in
-spurious errors at run time.
-The simplest way is to use a designated
-initializer:
-.P
-.in +4n
-.EX
-struct open_how how = { .flags = O_RDWR,
- .resolve = RESOLVE_IN_ROOT };
-.EE
-.in
-.P
-or explicitly using
-.BR memset (3)
-or similar:
-.P
-.in +4n
-.EX
-struct open_how how;
-memset(&how, 0, sizeof(how));
-how.flags = O_RDWR;
-how.resolve = RESOLVE_IN_ROOT;
-.EE
-.in
-.P
-A user-space application that wishes to determine which extensions
-the running kernel supports can do so by conducting a binary search on
-.I size
-with a structure which has every byte nonzero (to find the largest value
-which doesn't produce an error of
-.BR E2BIG ).
-.SH SEE ALSO
-.BR openat (2),
-.BR open_how (2type),
-.BR path_resolution (7),
-.BR symlink (7)
diff --git a/man2/outb.2 b/man2/outb.2
deleted file mode 100644
index 6f76d7725..000000000
--- a/man2/outb.2
+++ /dev/null
@@ -1,84 +0,0 @@
-.\" Copyright (c) 1995 Paul Gortmaker
-.\" (gpg109@rsphy1.anu.edu.au)
-.\" Wed Nov 29 10:58:54 EST 1995
-.\"
-.\" SPDX-License-Identifier: GPL-2.0-or-later
-.\"
-.TH outb 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-outb, outw, outl, outsb, outsw, outsl,
-inb, inw, inl, insb, insw, insl,
-outb_p, outw_p, outl_p, inb_p, inw_p, inl_p \- port I/O
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/io.h>
-.P
-.BI "unsigned char inb(unsigned short " port );
-.BI "unsigned char inb_p(unsigned short " port );
-.BI "unsigned short inw(unsigned short " port );
-.BI "unsigned short inw_p(unsigned short " port );
-.BI "unsigned int inl(unsigned short " port );
-.BI "unsigned int inl_p(unsigned short " port );
-.P
-.BI "void outb(unsigned char " value ", unsigned short " port );
-.BI "void outb_p(unsigned char " value ", unsigned short " port );
-.BI "void outw(unsigned short " value ", unsigned short " port );
-.BI "void outw_p(unsigned short " value ", unsigned short " port );
-.BI "void outl(unsigned int " value ", unsigned short " port );
-.BI "void outl_p(unsigned int " value ", unsigned short " port );
-.P
-.BI "void insb(unsigned short " port ", void " addr [. count ],
-.BI " unsigned long " count );
-.BI "void insw(unsigned short " port ", void " addr [. count ],
-.BI " unsigned long " count );
-.BI "void insl(unsigned short " port ", void " addr [. count ],
-.BI " unsigned long " count );
-.BI "void outsb(unsigned short " port ", const void " addr [. count ],
-.BI " unsigned long " count );
-.BI "void outsw(unsigned short " port ", const void " addr [. count ],
-.BI " unsigned long " count );
-.BI "void outsl(unsigned short " port ", const void " addr [. count ],
-.BI " unsigned long " count );
-.fi
-.SH DESCRIPTION
-This family of functions is used to do low-level port input and output.
-The out* functions do port output, the in* functions do port input;
-the b-suffix functions are byte-width and the w-suffix functions
-word-width; the _p-suffix functions pause until the I/O completes.
-.P
-They are primarily designed for internal kernel use,
-but can be used from user space.
-.\" , given the following information
-.\" in addition to that given in
-.\" .BR outb (9).
-.P
-You must compile with \fB\-O\fP or \fB\-O2\fP or similar.
-The functions
-are defined as inline macros, and will not be substituted in without
-optimization enabled, causing unresolved references at link time.
-.P
-You use
-.BR ioperm (2)
-or alternatively
-.BR iopl (2)
-to tell the kernel to allow the user space application to access the
-I/O ports in question.
-Failure to do this will cause the application
-to receive a segmentation fault.
-.SH VERSIONS
-.BR outb ()
-and friends are hardware-specific.
-The
-.I value
-argument is passed first and the
-.I port
-argument is passed second,
-which is the opposite order from most DOS implementations.
-.SH STANDARDS
-None.
-.SH SEE ALSO
-.BR ioperm (2),
-.BR iopl (2)
diff --git a/man2/outb_p.2 b/man2/outb_p.2
deleted file mode 100644
index 2c63c7549..000000000
--- a/man2/outb_p.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/outb.2
diff --git a/man2/outl.2 b/man2/outl.2
deleted file mode 100644
index 2c63c7549..000000000
--- a/man2/outl.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/outb.2
diff --git a/man2/outl_p.2 b/man2/outl_p.2
deleted file mode 100644
index 2c63c7549..000000000
--- a/man2/outl_p.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/outb.2
diff --git a/man2/outsb.2 b/man2/outsb.2
deleted file mode 100644
index 2c63c7549..000000000
--- a/man2/outsb.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/outb.2
diff --git a/man2/outsl.2 b/man2/outsl.2
deleted file mode 100644
index 2c63c7549..000000000
--- a/man2/outsl.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/outb.2
diff --git a/man2/outsw.2 b/man2/outsw.2
deleted file mode 100644
index 2c63c7549..000000000
--- a/man2/outsw.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/outb.2
diff --git a/man2/outw.2 b/man2/outw.2
deleted file mode 100644
index 2c63c7549..000000000
--- a/man2/outw.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/outb.2
diff --git a/man2/outw_p.2 b/man2/outw_p.2
deleted file mode 100644
index 2c63c7549..000000000
--- a/man2/outw_p.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/outb.2
diff --git a/man2/pause.2 b/man2/pause.2
deleted file mode 100644
index f8f07f8ea..000000000
--- a/man2/pause.2
+++ /dev/null
@@ -1,50 +0,0 @@
-.\" Copyright (c) 1992 Drew Eckhardt (drew@cs.colorado.edu), March 28, 1992
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Modified by Michael Haardt (michael@moria.de)
-.\" Modified Sat Jul 24 14:48:00 1993 by Rik Faith (faith@cs.unc.edu)
-.\" Modified 1995 by Mike Battersby (mib@deakin.edu.au)
-.\" Modified 2000 by aeb, following Michael Kerrisk
-.\"
-.TH pause 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-pause \- wait for signal
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <unistd.h>
-.P
-.B int pause(void);
-.fi
-.SH DESCRIPTION
-.BR pause ()
-causes the calling process (or thread) to sleep
-until a signal is delivered that either terminates the process or causes
-the invocation of a signal-catching function.
-.SH RETURN VALUE
-.BR pause ()
-returns only when a signal was caught and the
-signal-catching function returned.
-In this case,
-.BR pause ()
-returns \-1, and
-.I errno
-is set to
-.\" .BR ERESTARTNOHAND .
-.BR EINTR .
-.SH ERRORS
-.TP
-.B EINTR
-a signal was caught and the signal-catching function returned.
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-POSIX.1-2001, SVr4, 4.3BSD.
-.SH SEE ALSO
-.BR kill (2),
-.BR select (2),
-.BR signal (2),
-.BR sigsuspend (2)
diff --git a/man2/pciconfig_iobase.2 b/man2/pciconfig_iobase.2
deleted file mode 100644
index 5ab299588..000000000
--- a/man2/pciconfig_iobase.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/pciconfig_read.2
diff --git a/man2/pciconfig_read.2 b/man2/pciconfig_read.2
deleted file mode 100644
index 06da7a6a1..000000000
--- a/man2/pciconfig_read.2
+++ /dev/null
@@ -1,122 +0,0 @@
-.\" Contributed by Niki A. Rahimi, LTC Security Development
-.\" narahimi@us.ibm.com
-.\"
-.\" %%%LICENSE_START(FREELY_REDISTRIBUTABLE)
-.\" May be freely distributed and modified.
-.\" %%%LICENSE_END
-.\"
-.TH pciconfig_read 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-pciconfig_read, pciconfig_write, pciconfig_iobase \-
-pci device information handling
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <pci.h>
-.P
-.BI "int pciconfig_read(unsigned long " bus ", unsigned long " dfn ,
-.BI " unsigned long " off ", unsigned long " len ,
-.BI " unsigned char *" buf );
-.BI "int pciconfig_write(unsigned long " bus ", unsigned long " dfn ,
-.BI " unsigned long " off ", unsigned long " len ,
-.BI " unsigned char *" buf );
-.BI "int pciconfig_iobase(int " which ", unsigned long " bus ,
-.BI " unsigned long " devfn );
-.fi
-.SH DESCRIPTION
-Most of the interaction with PCI devices is already handled by the
-kernel PCI layer,
-and thus these calls should not normally need to be accessed from user space.
-.TP
-.BR pciconfig_read ()
-Reads to
-.I buf
-from device
-.I dev
-at offset
-.I off
-value.
-.TP
-.BR pciconfig_write ()
-Writes from
-.I buf
-to device
-.I dev
-at offset
-.I off
-value.
-.TP
-.BR pciconfig_iobase ()
-You pass it a bus/devfn pair and get a physical address for either the
-memory offset (for things like prep, this is 0xc0000000),
-the IO base for PIO cycles, or the ISA holes if any.
-.SH RETURN VALUE
-.TP
-.BR pciconfig_read ()
-On success, zero is returned.
-On error, \-1 is returned and
-.I errno
-is set to indicate the error.
-.TP
-.BR pciconfig_write ()
-On success, zero is returned.
-On error, \-1 is returned and
-.I errno
-is set to indicate the error.
-.TP
-.BR pciconfig_iobase ()
-Returns information on locations of various I/O
-regions in physical memory according to the
-.I which
-value.
-Values for
-.I which
-are:
-.BR IOBASE_BRIDGE_NUMBER ,
-.BR IOBASE_MEMORY ,
-.BR IOBASE_IO ,
-.BR IOBASE_ISA_IO ,
-.BR IOBASE_ISA_MEM .
-.SH ERRORS
-.TP
-.B EINVAL
-.I len
-value is invalid.
-This does not apply to
-.BR pciconfig_iobase ().
-.TP
-.B EIO
-I/O error.
-.TP
-.B ENODEV
-For
-.BR pciconfig_iobase (),
-"hose" value is NULL.
-For the other calls, could not find a slot.
-.TP
-.B ENOSYS
-The system has not implemented these calls
-.RB ( CONFIG_PCI
-not defined).
-.TP
-.B EOPNOTSUPP
-This return value is valid only for
-.BR pciconfig_iobase ().
-It is returned if the value for
-.I which
-is invalid.
-.TP
-.B EPERM
-User does not have the
-.B CAP_SYS_ADMIN
-capability.
-This does not apply to
-.BR pciconfig_iobase ().
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 2.0.26/2.1.11.
-.SH SEE ALSO
-.BR capabilities (7)
diff --git a/man2/pciconfig_write.2 b/man2/pciconfig_write.2
deleted file mode 100644
index 5ab299588..000000000
--- a/man2/pciconfig_write.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/pciconfig_read.2
diff --git a/man2/perf_event_open.2 b/man2/perf_event_open.2
deleted file mode 100644
index 5f68f5204..000000000
--- a/man2/perf_event_open.2
+++ /dev/null
@@ -1,4036 +0,0 @@
-.\" Copyright (c) 2012, Vincent Weaver
-.\"
-.\" SPDX-License-Identifier: GPL-2.0-or-later
-.\"
-.\" This document is based on the perf_event.h header file, the
-.\" tools/perf/design.txt file, and a lot of bitter experience.
-.\"
-.TH perf_event_open 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-perf_event_open \- set up performance monitoring
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#include <linux/perf_event.h>" " /* Definition of " PERF_* " constants */"
-.BR "#include <linux/hw_breakpoint.h>" " /* Definition of " HW_* " constants */"
-.BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */"
-.B #include <unistd.h>
-.P
-.BI "int syscall(SYS_perf_event_open, struct perf_event_attr *" attr ,
-.BI " pid_t " pid ", int " cpu ", int " group_fd \
-", unsigned long " flags );
-.fi
-.P
-.IR Note :
-glibc provides no wrapper for
-.BR perf_event_open (),
-necessitating the use of
-.BR syscall (2).
-.SH DESCRIPTION
-Given a list of parameters,
-.BR perf_event_open ()
-returns a file descriptor, for use in subsequent system calls
-(\c
-.BR read (2),
-.BR mmap (2),
-.BR prctl (2),
-.BR fcntl (2),
-etc.).
-.PP
-A call to
-.BR perf_event_open ()
-creates a file descriptor that allows measuring performance
-information.
-Each file descriptor corresponds to one
-event that is measured; these can be grouped together
-to measure multiple events simultaneously.
-.P
-Events can be enabled and disabled in two ways: via
-.BR ioctl (2)
-and via
-.BR prctl (2).
-When an event is disabled it does not count or generate overflows but does
-continue to exist and maintain its count value.
-.P
-Events come in two flavors: counting and sampled.
-A
-.I counting
-event is one that is used for counting the aggregate number of events
-that occur.
-In general, counting event results are gathered with a
-.BR read (2)
-call.
-A
-.I sampling
-event periodically writes measurements to a buffer that can then
-be accessed via
-.BR mmap (2).
-.SS Arguments
-The
-.I pid
-and
-.I cpu
-arguments allow specifying which process and CPU to monitor:
-.TP
-.BR "pid == 0" " and " "cpu == \-1"
-This measures the calling process/thread on any CPU.
-.TP
-.BR "pid == 0" " and " "cpu >= 0"
-This measures the calling process/thread only
-when running on the specified CPU.
-.TP
-.BR "pid > 0" " and " "cpu == \-1"
-This measures the specified process/thread on any CPU.
-.TP
-.BR "pid > 0" " and " "cpu >= 0"
-This measures the specified process/thread only
-when running on the specified CPU.
-.TP
-.BR "pid == \-1" " and " "cpu >= 0"
-This measures all processes/threads on the specified CPU.
-This requires
-.B CAP_PERFMON
-(since Linux 5.8) or
-.B CAP_SYS_ADMIN
-capability or a
-.I /proc/sys/kernel/perf_event_paranoid
-value of less than 1.
-.TP
-.BR "pid == \-1" " and " "cpu == \-1"
-This setting is invalid and will return an error.
-.P
-When
-.I pid
-is greater than zero, permission to perform this system call
-is governed by
-.B CAP_PERFMON
-(since Linux 5.9) and a ptrace access mode
-.B PTRACE_MODE_READ_REALCREDS
-check on older Linux versions; see
-.BR ptrace (2).
-.P
-The
-.I group_fd
-argument allows event groups to be created.
-An event group has one event which is the group leader.
-The leader is created first, with
-.IR group_fd " = \-1."
-The rest of the group members are created with subsequent
-.BR perf_event_open ()
-calls with
-.I group_fd
-being set to the file descriptor of the group leader.
-(A single event on its own is created with
-.IR group_fd " = \-1"
-and is considered to be a group with only 1 member.)
-An event group is scheduled onto the CPU as a unit:
-it will be put onto the CPU
-only if all of the events in the group can be put onto the CPU.
-This means that the values of the member events can be meaningfully compared
-\[em]added, divided (to get ratios), and so on\[em]
-with each other,
-since they have counted events for the same set of executed instructions.
-.P
-The
-.I flags
-argument is formed by ORing together zero or more of the following values:
-.TP
-.BR PERF_FLAG_FD_CLOEXEC " (since Linux 3.14)"
-.\" commit a21b0b354d4ac39be691f51c53562e2c24443d9e
-This flag enables the close-on-exec flag for the created
-event file descriptor,
-so that the file descriptor is automatically closed on
-.BR execve (2).
-Setting the close-on-exec flags at creation time, rather than later with
-.BR fcntl (2),
-avoids potential race conditions where the calling thread invokes
-.BR perf_event_open ()
-and
-.BR fcntl (2)
-at the same time as another thread calls
-.BR fork (2)
-then
-.BR execve (2).
-.TP
-.B PERF_FLAG_FD_NO_GROUP
-This flag tells the event to ignore the
-.I group_fd
-parameter except for the purpose of setting up output redirection
-using the
-.B PERF_FLAG_FD_OUTPUT
-flag.
-.TP
-.BR PERF_FLAG_FD_OUTPUT " (broken since Linux 2.6.35)"
-.\" commit ac9721f3f54b27a16c7e1afb2481e7ee95a70318
-This flag re-routes the event's sampled output to instead
-be included in the mmap buffer of the event specified by
-.IR group_fd .
-.TP
-.BR PERF_FLAG_PID_CGROUP " (since Linux 2.6.39)"
-.\" commit e5d1367f17ba6a6fed5fd8b74e4d5720923e0c25
-This flag activates per-container system-wide monitoring.
-A container
-is an abstraction that isolates a set of resources for finer-grained
-control (CPUs, memory, etc.).
-In this mode, the event is measured
-only if the thread running on the monitored CPU belongs to the designated
-container (cgroup).
-The cgroup is identified by passing a file descriptor
-opened on its directory in the cgroupfs filesystem.
-For instance, if the
-cgroup to monitor is called
-.IR test ,
-then a file descriptor opened on
-.I /dev/cgroup/test
-(assuming cgroupfs is mounted on
-.IR /dev/cgroup )
-must be passed as the
-.I pid
-parameter.
-cgroup monitoring is available only
-for system-wide events and may therefore require extra permissions.
-.P
-The
-.I perf_event_attr
-structure provides detailed configuration information
-for the event being created.
-.P
-.in +4n
-.EX
-struct perf_event_attr {
- __u32 type; /* Type of event */
- __u32 size; /* Size of attribute structure */
- __u64 config; /* Type\-specific configuration */
-\&
- union {
- __u64 sample_period; /* Period of sampling */
- __u64 sample_freq; /* Frequency of sampling */
- };
-\&
- __u64 sample_type; /* Specifies values included in sample */
- __u64 read_format; /* Specifies values returned in read */
-\&
- __u64 disabled : 1, /* off by default */
- inherit : 1, /* children inherit it */
- pinned : 1, /* must always be on PMU */
- exclusive : 1, /* only group on PMU */
- exclude_user : 1, /* don\[aq]t count user */
- exclude_kernel : 1, /* don\[aq]t count kernel */
- exclude_hv : 1, /* don\[aq]t count hypervisor */
- exclude_idle : 1, /* don\[aq]t count when idle */
- mmap : 1, /* include mmap data */
- comm : 1, /* include comm data */
- freq : 1, /* use freq, not period */
- inherit_stat : 1, /* per task counts */
- enable_on_exec : 1, /* next exec enables */
- task : 1, /* trace fork/exit */
- watermark : 1, /* wakeup_watermark */
- precise_ip : 2, /* skid constraint */
- mmap_data : 1, /* non\-exec mmap data */
- sample_id_all : 1, /* sample_type all events */
- exclude_host : 1, /* don\[aq]t count in host */
- exclude_guest : 1, /* don\[aq]t count in guest */
- exclude_callchain_kernel : 1,
- /* exclude kernel callchains */
- exclude_callchain_user : 1,
- /* exclude user callchains */
- mmap2 : 1, /* include mmap with inode data */
- comm_exec : 1, /* flag comm events that are
- due to exec */
- use_clockid : 1, /* use clockid for time fields */
- context_switch : 1, /* context switch data */
- write_backward : 1, /* Write ring buffer from end
- to beginning */
- namespaces : 1, /* include namespaces data */
- ksymbol : 1, /* include ksymbol events */
- bpf_event : 1, /* include bpf events */
- aux_output : 1, /* generate AUX records
- instead of events */
- cgroup : 1, /* include cgroup events */
- text_poke : 1, /* include text poke events */
- build_id : 1, /* use build id in mmap2 events */
- inherit_thread : 1, /* children only inherit */
- /* if cloned with CLONE_THREAD */
- remove_on_exec : 1, /* event is removed from task
- on exec */
- sigtrap : 1, /* send synchronous SIGTRAP
- on event */
-\&
- __reserved_1 : 26;
-\&
- union {
- __u32 wakeup_events; /* wakeup every n events */
- __u32 wakeup_watermark; /* bytes before wakeup */
- };
-\&
- __u32 bp_type; /* breakpoint type */
-\&
- union {
- __u64 bp_addr; /* breakpoint address */
- __u64 kprobe_func; /* for perf_kprobe */
- __u64 uprobe_path; /* for perf_uprobe */
- __u64 config1; /* extension of config */
- };
-\&
- union {
- __u64 bp_len; /* breakpoint length */
- __u64 kprobe_addr; /* with kprobe_func == NULL */
- __u64 probe_offset; /* for perf_[k,u]probe */
- __u64 config2; /* extension of config1 */
- };
- __u64 branch_sample_type; /* enum perf_branch_sample_type */
- __u64 sample_regs_user; /* user regs to dump on samples */
- __u32 sample_stack_user; /* size of stack to dump on
- samples */
- __s32 clockid; /* clock to use for time fields */
- __u64 sample_regs_intr; /* regs to dump on samples */
- __u32 aux_watermark; /* aux bytes before wakeup */
- __u16 sample_max_stack; /* max frames in callchain */
- __u16 __reserved_2; /* align to u64 */
- __u32 aux_sample_size; /* max aux sample size */
- __u32 __reserved_3; /* align to u64 */
- __u64 sig_data; /* user data for sigtrap */
-\&
-};
-.EE
-.in
-.P
-The fields of the
-.I perf_event_attr
-structure are described in more detail below:
-.TP
-.I type
-This field specifies the overall event type.
-It has one of the following values:
-.RS
-.TP
-.B PERF_TYPE_HARDWARE
-This indicates one of the "generalized" hardware events provided
-by the kernel.
-See the
-.I config
-field definition for more details.
-.TP
-.B PERF_TYPE_SOFTWARE
-This indicates one of the software-defined events provided by the kernel
-(even if no hardware support is available).
-.TP
-.B PERF_TYPE_TRACEPOINT
-This indicates a tracepoint
-provided by the kernel tracepoint infrastructure.
-.TP
-.B PERF_TYPE_HW_CACHE
-This indicates a hardware cache event.
-This has a special encoding, described in the
-.I config
-field definition.
-.TP
-.B PERF_TYPE_RAW
-This indicates a "raw" implementation-specific event in the
-.IR config " field."
-.TP
-.BR PERF_TYPE_BREAKPOINT " (since Linux 2.6.33)"
-.\" commit 24f1e32c60c45c89a997c73395b69c8af6f0a84e
-This indicates a hardware breakpoint as provided by the CPU.
-Breakpoints can be read/write accesses to an address as well as
-execution of an instruction address.
-.TP
-dynamic PMU
-Since Linux 2.6.38,
-.\" commit 2e80a82a49c4c7eca4e35734380f28298ba5db19
-.BR perf_event_open ()
-can support multiple PMUs.
-To enable this, a value exported by the kernel can be used in the
-.I type
-field to indicate which PMU to use.
-The value to use can be found in the sysfs filesystem:
-there is a subdirectory per PMU instance under
-.IR /sys/bus/event_source/devices .
-In each subdirectory there is a
-.I type
-file whose content is an integer that can be used in the
-.I type
-field.
-For instance,
-.I /sys/bus/event_source/devices/cpu/type
-contains the value for the core CPU PMU, which is usually 4.
-.TP
-.BR kprobe " and " uprobe " (since Linux 4.17)"
-.\" commit 65074d43fc77bcae32776724b7fa2696923c78e4
-.\" commit e12f03d7031a977356e3d7b75a68c2185ff8d155
-.\" commit 33ea4b24277b06dbc55d7f5772a46f029600255e
-These two dynamic PMUs create a kprobe/uprobe and attach it to the
-file descriptor generated by perf_event_open.
-The kprobe/uprobe will be destroyed on the destruction of the file descriptor.
-See fields
-.IR kprobe_func ,
-.IR uprobe_path ,
-.IR kprobe_addr ,
-and
-.I probe_offset
-for more details.
-.RE
-.TP
-.I "size"
-The size of the
-.I perf_event_attr
-structure for forward/backward compatibility.
-Set this using
-.I sizeof(struct perf_event_attr)
-to allow the kernel to see
-the struct size at the time of compilation.
-.IP
-The related define
-.B PERF_ATTR_SIZE_VER0
-is set to 64; this was the size of the first published struct.
-.B PERF_ATTR_SIZE_VER1
-is 72, corresponding to the addition of breakpoints in Linux 2.6.33.
-.\" commit cb5d76999029ae7a517cb07dfa732c1b5a934fc2
-.\" this was added much later when PERF_ATTR_SIZE_VER2 happened
-.\" but the actual attr_size had increased in Linux 2.6.33
-.B PERF_ATTR_SIZE_VER2
-is 80 corresponding to the addition of branch sampling in Linux 3.4.
-.\" commit cb5d76999029ae7a517cb07dfa732c1b5a934fc2
-.B PERF_ATTR_SIZE_VER3
-is 96 corresponding to the addition
-of
-.I sample_regs_user
-and
-.I sample_stack_user
-in Linux 3.7.
-.\" commit 1659d129ed014b715b0b2120e6fd929bdd33ed03
-.B PERF_ATTR_SIZE_VER4
-is 104 corresponding to the addition of
-.I sample_regs_intr
-in Linux 3.19.
-.\" commit 60e2364e60e86e81bc6377f49779779e6120977f
-.B PERF_ATTR_SIZE_VER5
-is 112 corresponding to the addition of
-.I aux_watermark
-in Linux 4.1.
-.\" commit 1a5941312414c71dece6717da9a0fa1303127afa
-.TP
-.I "config"
-This specifies which event you want, in conjunction with
-the
-.I type
-field.
-The
-.I config1
-and
-.I config2
-fields are also taken into account in cases where 64 bits is not
-enough to fully specify the event.
-The encoding of these fields are event dependent.
-.IP
-There are various ways to set the
-.I config
-field that are dependent on the value of the previously
-described
-.I type
-field.
-What follows are various possible settings for
-.I config
-separated out by
-.IR type .
-.IP
-If
-.I type
-is
-.BR PERF_TYPE_HARDWARE ,
-we are measuring one of the generalized hardware CPU events.
-Not all of these are available on all platforms.
-Set
-.I config
-to one of the following:
-.RS 12
-.TP
-.B PERF_COUNT_HW_CPU_CYCLES
-Total cycles.
-Be wary of what happens during CPU frequency scaling.
-.TP
-.B PERF_COUNT_HW_INSTRUCTIONS
-Retired instructions.
-Be careful, these can be affected by various
-issues, most notably hardware interrupt counts.
-.TP
-.B PERF_COUNT_HW_CACHE_REFERENCES
-Cache accesses.
-Usually this indicates Last Level Cache accesses but this may
-vary depending on your CPU.
-This may include prefetches and coherency messages; again this
-depends on the design of your CPU.
-.TP
-.B PERF_COUNT_HW_CACHE_MISSES
-Cache misses.
-Usually this indicates Last Level Cache misses; this is intended to be
-used in conjunction with the
-.B PERF_COUNT_HW_CACHE_REFERENCES
-event to calculate cache miss rates.
-.TP
-.B PERF_COUNT_HW_BRANCH_INSTRUCTIONS
-Retired branch instructions.
-Prior to Linux 2.6.35, this used
-the wrong event on AMD processors.
-.\" commit f287d332ce835f77a4f5077d2c0ef1e3f9ea42d2
-.TP
-.B PERF_COUNT_HW_BRANCH_MISSES
-Mispredicted branch instructions.
-.TP
-.B PERF_COUNT_HW_BUS_CYCLES
-Bus cycles, which can be different from total cycles.
-.TP
-.BR PERF_COUNT_HW_STALLED_CYCLES_FRONTEND " (since Linux 3.0)"
-.\" commit 8f62242246351b5a4bc0c1f00c0c7003edea128a
-Stalled cycles during issue.
-.TP
-.BR PERF_COUNT_HW_STALLED_CYCLES_BACKEND " (since Linux 3.0)"
-.\" commit 8f62242246351b5a4bc0c1f00c0c7003edea128a
-Stalled cycles during retirement.
-.TP
-.BR PERF_COUNT_HW_REF_CPU_CYCLES " (since Linux 3.3)"
-.\" commit c37e17497e01fc0f5d2d6feb5723b210b3ab8890
-Total cycles; not affected by CPU frequency scaling.
-.RE
-.IP
-If
-.I type
-is
-.BR PERF_TYPE_SOFTWARE ,
-we are measuring software events provided by the kernel.
-Set
-.I config
-to one of the following:
-.RS 12
-.TP
-.B PERF_COUNT_SW_CPU_CLOCK
-This reports the CPU clock, a high-resolution per-CPU timer.
-.TP
-.B PERF_COUNT_SW_TASK_CLOCK
-This reports a clock count specific to the task that is running.
-.TP
-.B PERF_COUNT_SW_PAGE_FAULTS
-This reports the number of page faults.
-.TP
-.B PERF_COUNT_SW_CONTEXT_SWITCHES
-This counts context switches.
-Until Linux 2.6.34, these were all reported as user-space
-events, after that they are reported as happening in the kernel.
-.\" commit e49a5bd38159dfb1928fd25b173bc9de4bbadb21
-.TP
-.B PERF_COUNT_SW_CPU_MIGRATIONS
-This reports the number of times the process
-has migrated to a new CPU.
-.TP
-.B PERF_COUNT_SW_PAGE_FAULTS_MIN
-This counts the number of minor page faults.
-These did not require disk I/O to handle.
-.TP
-.B PERF_COUNT_SW_PAGE_FAULTS_MAJ
-This counts the number of major page faults.
-These required disk I/O to handle.
-.TP
-.BR PERF_COUNT_SW_ALIGNMENT_FAULTS " (since Linux 2.6.33)"
-.\" commit f7d7986060b2890fc26db6ab5203efbd33aa2497
-This counts the number of alignment faults.
-These happen when unaligned memory accesses happen; the kernel
-can handle these but it reduces performance.
-This happens only on some architectures (never on x86).
-.TP
-.BR PERF_COUNT_SW_EMULATION_FAULTS " (since Linux 2.6.33)"
-.\" commit f7d7986060b2890fc26db6ab5203efbd33aa2497
-This counts the number of emulation faults.
-The kernel sometimes traps on unimplemented instructions
-and emulates them for user space.
-This can negatively impact performance.
-.TP
-.BR PERF_COUNT_SW_DUMMY " (since Linux 3.12)"
-.\" commit fa0097ee690693006ab1aea6c01ad3c851b65c77
-This is a placeholder event that counts nothing.
-Informational sample record types such as mmap or comm
-must be associated with an active event.
-This dummy event allows gathering such records without requiring
-a counting event.
-.TP
-.BR PERF_COUNT_SW_BPF_OUTPUT " (since Linux 4.4)"
-.\" commit a43eec304259a6c637f4014a6d4767159b6a3aa3
-This is used to generate raw sample data from BPF.
-BPF programs can write to this event using
-.B bpf_perf_event_output
-helper.
-.TP
-.BR PERF_COUNT_SW_CGROUP_SWITCHES " (since Linux 5.13)"
-.\" commit d0d1dd628527c77db2391ce0293c1ed344b2365f
-This counts context switches to a task in a different cgroup.
-In other words, if the next task is in the same cgroup,
-it won't count the switch.
-.RE
-.P
-.RS
-If
-.I type
-is
-.BR PERF_TYPE_TRACEPOINT ,
-then we are measuring kernel tracepoints.
-The value to use in
-.I config
-can be obtained from under debugfs
-.I tracing/events/*/*/id
-if ftrace is enabled in the kernel.
-.RE
-.P
-.RS
-If
-.I type
-is
-.BR PERF_TYPE_HW_CACHE ,
-then we are measuring a hardware CPU cache event.
-To calculate the appropriate
-.I config
-value, use the following equation:
-.RS 4
-.P
-.in +4n
-.EX
-config = (perf_hw_cache_id) |
- (perf_hw_cache_op_id << 8) |
- (perf_hw_cache_op_result_id << 16);
-.EE
-.in
-.P
-where
-.I perf_hw_cache_id
-is one of:
-.RS 4
-.TP
-.B PERF_COUNT_HW_CACHE_L1D
-for measuring Level 1 Data Cache
-.TP
-.B PERF_COUNT_HW_CACHE_L1I
-for measuring Level 1 Instruction Cache
-.TP
-.B PERF_COUNT_HW_CACHE_LL
-for measuring Last-Level Cache
-.TP
-.B PERF_COUNT_HW_CACHE_DTLB
-for measuring the Data TLB
-.TP
-.B PERF_COUNT_HW_CACHE_ITLB
-for measuring the Instruction TLB
-.TP
-.B PERF_COUNT_HW_CACHE_BPU
-for measuring the branch prediction unit
-.TP
-.BR PERF_COUNT_HW_CACHE_NODE " (since Linux 3.1)"
-.\" commit 89d6c0b5bdbb1927775584dcf532d98b3efe1477
-for measuring local memory accesses
-.RE
-.P
-and
-.I perf_hw_cache_op_id
-is one of:
-.RS 4
-.TP
-.B PERF_COUNT_HW_CACHE_OP_READ
-for read accesses
-.TP
-.B PERF_COUNT_HW_CACHE_OP_WRITE
-for write accesses
-.TP
-.B PERF_COUNT_HW_CACHE_OP_PREFETCH
-for prefetch accesses
-.RE
-.P
-and
-.I perf_hw_cache_op_result_id
-is one of:
-.RS 4
-.TP
-.B PERF_COUNT_HW_CACHE_RESULT_ACCESS
-to measure accesses
-.TP
-.B PERF_COUNT_HW_CACHE_RESULT_MISS
-to measure misses
-.RE
-.RE
-.P
-If
-.I type
-is
-.BR PERF_TYPE_RAW ,
-then a custom "raw"
-.I config
-value is needed.
-Most CPUs support events that are not covered by the "generalized" events.
-These are implementation defined; see your CPU manual (for example
-the Intel Volume 3B documentation or the AMD BIOS and Kernel Developer
-Guide).
-The libpfm4 library can be used to translate from the name in the
-architectural manuals to the raw hex value
-.BR perf_event_open ()
-expects in this field.
-.P
-If
-.I type
-is
-.BR PERF_TYPE_BREAKPOINT ,
-then leave
-.I config
-set to zero.
-Its parameters are set in other places.
-.P
-If
-.I type
-is
-.B kprobe
-or
-.BR uprobe ,
-set
-.I retprobe
-(bit 0 of
-.IR config ,
-see
-.IR /sys/bus/event_source/devices/[k,u]probe/format/retprobe )
-for kretprobe/uretprobe.
-See fields
-.IR kprobe_func ,
-.IR uprobe_path ,
-.IR kprobe_addr ,
-and
-.I probe_offset
-for more details.
-.RE
-.TP
-.I kprobe_func
-.TQ
-.I uprobe_path
-.TQ
-.I kprobe_addr
-.TQ
-.I probe_offset
-These fields describe the kprobe/uprobe for dynamic PMUs
-.B kprobe
-and
-.BR uprobe .
-For
-.BR kprobe :
-use
-.I kprobe_func
-and
-.IR probe_offset ,
-or use
-.I kprobe_addr
-and leave
-.I kprobe_func
-as NULL.
-For
-.BR uprobe :
-use
-.I uprobe_path
-and
-.IR probe_offset .
-.TP
-.I sample_period
-.TQ
-.I sample_freq
-A "sampling" event is one that generates an overflow notification
-every N events, where N is given by
-.IR sample_period .
-A sampling event has
-.IR sample_period " > 0."
-When an overflow occurs, requested data is recorded
-in the mmap buffer.
-The
-.I sample_type
-field controls what data is recorded on each overflow.
-.IP
-.I sample_freq
-can be used if you wish to use frequency rather than period.
-In this case, you set the
-.I freq
-flag.
-The kernel will adjust the sampling period
-to try and achieve the desired rate.
-The rate of adjustment is a
-timer tick.
-.TP
-.I sample_type
-The various bits in this field specify which values to include
-in the sample.
-They will be recorded in a ring-buffer,
-which is available to user space using
-.BR mmap (2).
-The order in which the values are saved in the
-sample are documented in the MMAP Layout subsection below;
-it is not the
-.I "enum perf_event_sample_format"
-order.
-.RS
-.TP
-.B PERF_SAMPLE_IP
-Records instruction pointer.
-.TP
-.B PERF_SAMPLE_TID
-Records the process and thread IDs.
-.TP
-.B PERF_SAMPLE_TIME
-Records a timestamp.
-.TP
-.B PERF_SAMPLE_ADDR
-Records an address, if applicable.
-.TP
-.B PERF_SAMPLE_READ
-Record counter values for all events in a group, not just the group leader.
-.TP
-.B PERF_SAMPLE_CALLCHAIN
-Records the callchain (stack backtrace).
-.TP
-.B PERF_SAMPLE_ID
-Records a unique ID for the opened event's group leader.
-.TP
-.B PERF_SAMPLE_CPU
-Records CPU number.
-.TP
-.B PERF_SAMPLE_PERIOD
-Records the current sampling period.
-.TP
-.B PERF_SAMPLE_STREAM_ID
-Records a unique ID for the opened event.
-Unlike
-.B PERF_SAMPLE_ID
-the actual ID is returned, not the group leader.
-This ID is the same as the one returned by
-.BR PERF_FORMAT_ID .
-.TP
-.B PERF_SAMPLE_RAW
-Records additional data, if applicable.
-Usually returned by tracepoint events.
-.TP
-.BR PERF_SAMPLE_BRANCH_STACK " (since Linux 3.4)"
-.\" commit bce38cd53e5ddba9cb6d708c4ef3d04a4016ec7e
-This provides a record of recent branches, as provided
-by CPU branch sampling hardware (such as Intel Last Branch Record).
-Not all hardware supports this feature.
-.IP
-See the
-.I branch_sample_type
-field for how to filter which branches are reported.
-.TP
-.BR PERF_SAMPLE_REGS_USER " (since Linux 3.7)"
-.\" commit 4018994f3d8785275ef0e7391b75c3462c029e56
-Records the current user-level CPU register state
-(the values in the process before the kernel was called).
-.TP
-.BR PERF_SAMPLE_STACK_USER " (since Linux 3.7)"
-.\" commit c5ebcedb566ef17bda7b02686e0d658a7bb42ee7
-Records the user level stack, allowing stack unwinding.
-.TP
-.BR PERF_SAMPLE_WEIGHT " (since Linux 3.10)"
-.\" commit c3feedf2aaf9ac8bad6f19f5d21e4ee0b4b87e9c
-Records a hardware provided weight value that expresses how
-costly the sampled event was.
-This allows the hardware to highlight expensive events in
-a profile.
-.TP
-.BR PERF_SAMPLE_DATA_SRC " (since Linux 3.10)"
-.\" commit d6be9ad6c960f43800a6f118932bc8a5a4eadcd1
-Records the data source: where in the memory hierarchy
-the data associated with the sampled instruction came from.
-This is available only if the underlying hardware
-supports this feature.
-.TP
-.BR PERF_SAMPLE_IDENTIFIER " (since Linux 3.12)"
-.\" commit ff3d527cebc1fa3707c617bfe9e74f53fcfb0955
-Places the
-.B SAMPLE_ID
-value in a fixed position in the record,
-either at the beginning (for sample events) or at the end
-(if a non-sample event).
-.IP
-This was necessary because a sample stream may have
-records from various different event sources with different
-.I sample_type
-settings.
-Parsing the event stream properly was not possible because the
-format of the record was needed to find
-.BR SAMPLE_ID ,
-but
-the format could not be found without knowing what
-event the sample belonged to (causing a circular
-dependency).
-.IP
-The
-.B PERF_SAMPLE_IDENTIFIER
-setting makes the event stream always parsable
-by putting
-.B SAMPLE_ID
-in a fixed location, even though
-it means having duplicate
-.B SAMPLE_ID
-values in records.
-.TP
-.BR PERF_SAMPLE_TRANSACTION " (since Linux 3.13)"
-.\" commit fdfbbd07e91f8fe387140776f3fd94605f0c89e5
-Records reasons for transactional memory abort events
-(for example, from Intel TSX transactional memory support).
-.IP
-The
-.I precise_ip
-setting must be greater than 0 and a transactional memory abort
-event must be measured or no values will be recorded.
-Also note that some perf_event measurements, such as sampled
-cycle counting, may cause extraneous aborts (by causing an
-interrupt during a transaction).
-.TP
-.BR PERF_SAMPLE_REGS_INTR " (since Linux 3.19)"
-.\" commit 60e2364e60e86e81bc6377f49779779e6120977f
-Records a subset of the current CPU register state
-as specified by
-.IR sample_regs_intr .
-Unlike
-.B PERF_SAMPLE_REGS_USER
-the register values will return kernel register
-state if the overflow happened while kernel
-code is running.
-If the CPU supports hardware sampling of
-register state (i.e., PEBS on Intel x86) and
-.I precise_ip
-is set higher than zero then the register
-values returned are those captured by
-hardware at the time of the sampled
-instruction's retirement.
-.TP
-.BR PERF_SAMPLE_PHYS_ADDR " (since Linux 4.13)"
-.\" commit fc7ce9c74c3ad232b084d80148654f926d01ece7
-Records physical address of data like in
-.BR PERF_SAMPLE_ADDR .
-.TP
-.BR PERF_SAMPLE_CGROUP " (since Linux 5.7)"
-.\" commit 96aaab686505c449e24d76e76507290dcc30e008
-Records (perf_event) cgroup ID of the process.
-This corresponds to the
-.I id
-field in the
-.B PERF_RECORD_CGROUP
-event.
-.TP
-.BR PERF_SAMPLE_DATA_PAGE_SIZE " (since Linux 5.11)"
-.\" commit 8d97e71811aaafe4abf611dc24822fd6e73df1a1
-Records page size of data like in
-.BR PERF_SAMPLE_ADDR .
-.TP
-.BR PERF_SAMPLE_CODE_PAGE_SIZE " (since Linux 5.11)"
-.\" commit 995f088efebe1eba0282a6ffa12411b37f8990c2
-Records page size of ip like in
-.BR PERF_SAMPLE_IP .
-.TP
-.BR PERF_SAMPLE_WEIGHT_STRUCT " (since Linux 5.12)"
-.\" commit 2a6c6b7d7ad346f0679d0963cb19b3f0ea7ef32c
-Records hardware provided weight values like in
-.BR PERF_SAMPLE_WEIGHT ,
-but it can represent multiple values in a struct.
-This shares the same space as
-.BR PERF_SAMPLE_WEIGHT ,
-so users can apply either of those,
-not both.
-It has the following format and
-the meaning of each field is
-dependent on the hardware implementation.
-.P
-.in +4n
-.EX
-union perf_sample_weight {
- u64 full; /* PERF_SAMPLE_WEIGHT */
- struct { /* PERF_SAMPLE_WEIGHT_STRUCT */
- u32 var1_dw;
- u16 var2_w;
- u16 var3_w;
- };
-};
-.EE
-.in
-.RE
-.TP
-.I read_format
-This field specifies the format of the data returned by
-.BR read (2)
-on a
-.BR perf_event_open ()
-file descriptor.
-.RS
-.TP
-.B PERF_FORMAT_TOTAL_TIME_ENABLED
-Adds the 64-bit
-.I time_enabled
-field.
-This can be used to calculate estimated totals if
-the PMU is overcommitted and multiplexing is happening.
-.TP
-.B PERF_FORMAT_TOTAL_TIME_RUNNING
-Adds the 64-bit
-.I time_running
-field.
-This can be used to calculate estimated totals if
-the PMU is overcommitted and multiplexing is happening.
-.TP
-.B PERF_FORMAT_ID
-Adds a 64-bit unique value that corresponds to the event group.
-.TP
-.B PERF_FORMAT_GROUP
-Allows all counter values in an event group to be read with one read.
-.TP
-.B PERF_FORMAT_LOST " (since Linux 6.0)"
-.\" commit 119a784c81270eb88e573174ed2209225d646656
-Adds a 64-bit value that is the number of lost samples for this event.
-This would be only meaningful when
-.I sample_period
-or
-.I sample_freq
-is set.
-.RE
-.TP
-.I disabled
-The
-.I disabled
-bit specifies whether the counter starts out disabled or enabled.
-If disabled, the event can later be enabled by
-.BR ioctl (2),
-.BR prctl (2),
-or
-.IR enable_on_exec .
-.IP
-When creating an event group, typically the group leader is initialized
-with
-.I disabled
-set to 1 and any child events are initialized with
-.I disabled
-set to 0.
-Despite
-.I disabled
-being 0, the child events will not start until the group leader
-is enabled.
-.TP
-.I inherit
-The
-.I inherit
-bit specifies that this counter should count events of child
-tasks as well as the task specified.
-This applies only to new children, not to any existing children at
-the time the counter is created (nor to any new children of
-existing children).
-.IP
-Inherit does not work for some combinations of
-.I read_format
-values, such as
-.BR PERF_FORMAT_GROUP .
-.TP
-.I pinned
-The
-.I pinned
-bit specifies that the counter should always be on the CPU if at all
-possible.
-It applies only to hardware counters and only to group leaders.
-If a pinned counter cannot be put onto the CPU (e.g., because there are
-not enough hardware counters or because of a conflict with some other
-event), then the counter goes into an 'error' state, where reads
-return end-of-file (i.e.,
-.BR read (2)
-returns 0) until the counter is subsequently enabled or disabled.
-.TP
-.I exclusive
-The
-.I exclusive
-bit specifies that when this counter's group is on the CPU,
-it should be the only group using the CPU's counters.
-In the future this may allow monitoring programs to
-support PMU features that need to run alone so that they do not
-disrupt other hardware counters.
-.IP
-Note that many unexpected situations may prevent events with the
-.I exclusive
-bit set from ever running.
-This includes any users running a system-wide
-measurement as well as any kernel use of the performance counters
-(including the commonly enabled NMI Watchdog Timer interface).
-.TP
-.I exclude_user
-If this bit is set, the count excludes events that happen in user space.
-.TP
-.I exclude_kernel
-If this bit is set, the count excludes events that happen in kernel space.
-.TP
-.I exclude_hv
-If this bit is set, the count excludes events that happen in the
-hypervisor.
-This is mainly for PMUs that have built-in support for handling this
-(such as POWER).
-Extra support is needed for handling hypervisor measurements on most
-machines.
-.TP
-.I exclude_idle
-If set, don't count when the CPU is running the idle task.
-While you can currently enable this for any event type, it is ignored
-for all but software events.
-.TP
-.I mmap
-The
-.I mmap
-bit enables generation of
-.B PERF_RECORD_MMAP
-samples for every
-.BR mmap (2)
-call that has
-.B PROT_EXEC
-set.
-This allows tools to notice new executable code being mapped into
-a program (dynamic shared libraries for example)
-so that addresses can be mapped back to the original code.
-.TP
-.I comm
-The
-.I comm
-bit enables tracking of process command name as modified by the
-.BR execve (2)
-and
-.BR prctl (PR_SET_NAME)
-system calls as well as writing to
-.IR /proc/self/comm .
-If the
-.I comm_exec
-flag is also successfully set (possible since Linux 3.16),
-.\" commit 82b897782d10fcc4930c9d4a15b175348fdd2871
-then the misc flag
-.B PERF_RECORD_MISC_COMM_EXEC
-can be used to differentiate the
-.BR execve (2)
-case from the others.
-.TP
-.I freq
-If this bit is set, then
-.I sample_frequency
-not
-.I sample_period
-is used when setting up the sampling interval.
-.TP
-.I inherit_stat
-This bit enables saving of event counts on context switch for
-inherited tasks.
-This is meaningful only if the
-.I inherit
-field is set.
-.TP
-.I enable_on_exec
-If this bit is set, a counter is automatically
-enabled after a call to
-.BR execve (2).
-.TP
-.I task
-If this bit is set, then
-fork/exit notifications are included in the ring buffer.
-.TP
-.I watermark
-If set, have an overflow notification happen when we cross the
-.I wakeup_watermark
-boundary.
-Otherwise, overflow notifications happen after
-.I wakeup_events
-samples.
-.TP
-.IR precise_ip " (since Linux 2.6.35)"
-.\" commit ab608344bcbde4f55ec4cd911b686b0ce3eae076
-This controls the amount of skid.
-Skid is how many instructions
-execute between an event of interest happening and the kernel
-being able to stop and record the event.
-Smaller skid is
-better and allows more accurate reporting of which events
-correspond to which instructions, but hardware is often limited
-with how small this can be.
-.IP
-The possible values of this field are the following:
-.RS
-.TP
-.B 0
-.B SAMPLE_IP
-can have arbitrary skid.
-.TP
-.B 1
-.B SAMPLE_IP
-must have constant skid.
-.TP
-.B 2
-.B SAMPLE_IP
-requested to have 0 skid.
-.TP
-.B 3
-.B SAMPLE_IP
-must have 0 skid.
-See also the description of
-.BR PERF_RECORD_MISC_EXACT_IP .
-.RE
-.TP
-.IR mmap_data " (since Linux 2.6.36)"
-.\" commit 3af9e859281bda7eb7c20b51879cf43aa788ac2e
-This is the counterpart of the
-.I mmap
-field.
-This enables generation of
-.B PERF_RECORD_MMAP
-samples for
-.BR mmap (2)
-calls that do not have
-.B PROT_EXEC
-set (for example data and SysV shared memory).
-.TP
-.IR sample_id_all " (since Linux 2.6.38)"
-.\" commit c980d1091810df13f21aabbce545fd98f545bbf7
-If set, then TID, TIME, ID, STREAM_ID, and CPU can
-additionally be included in
-.RB non- PERF_RECORD_SAMPLE s
-if the corresponding
-.I sample_type
-is selected.
-.IP
-If
-.B PERF_SAMPLE_IDENTIFIER
-is specified, then an additional ID value is included
-as the last value to ease parsing the record stream.
-This may lead to the
-.I id
-value appearing twice.
-.IP
-The layout is described by this pseudo-structure:
-.IP
-.in +4n
-.EX
-struct sample_id {
- { u32 pid, tid; } /* if PERF_SAMPLE_TID set */
- { u64 time; } /* if PERF_SAMPLE_TIME set */
- { u64 id; } /* if PERF_SAMPLE_ID set */
- { u64 stream_id;} /* if PERF_SAMPLE_STREAM_ID set */
- { u32 cpu, res; } /* if PERF_SAMPLE_CPU set */
- { u64 id; } /* if PERF_SAMPLE_IDENTIFIER set */
-};
-.EE
-.in
-.TP
-.IR exclude_host " (since Linux 3.2)"
-.\" commit a240f76165e6255384d4bdb8139895fac7988799
-When conducting measurements that include processes running
-VM instances (i.e., have executed a
-.B KVM_RUN
-.BR ioctl (2)),
-only measure events happening inside a guest instance.
-This is only meaningful outside the guests; this setting does
-not change counts gathered inside of a guest.
-Currently, this functionality is x86 only.
-.TP
-.IR exclude_guest " (since Linux 3.2)"
-.\" commit a240f76165e6255384d4bdb8139895fac7988799
-When conducting measurements that include processes running
-VM instances (i.e., have executed a
-.B KVM_RUN
-.BR ioctl (2)),
-do not measure events happening inside guest instances.
-This is only meaningful outside the guests; this setting does
-not change counts gathered inside of a guest.
-Currently, this functionality is x86 only.
-.TP
-.IR exclude_callchain_kernel " (since Linux 3.7)"
-.\" commit d077526485d5c9b12fe85d0b2b3b7041e6bc5f91
-Do not include kernel callchains.
-.TP
-.IR exclude_callchain_user " (since Linux 3.7)"
-.\" commit d077526485d5c9b12fe85d0b2b3b7041e6bc5f91
-Do not include user callchains.
-.TP
-.IR mmap2 " (since Linux 3.16)"
-.\" commit 13d7a2410fa637f450a29ecb515ac318ee40c741
-.\" This is tricky; was committed during 3.12 development
-.\" but right before release was disabled.
-.\" So while you could select mmap2 starting with Linux 3.12
-.\" it did not work until Linux 3.16
-.\" commit a5a5ba72843dd05f991184d6cb9a4471acce1005
-Generate an extended executable mmap record that contains enough
-additional information to uniquely identify shared mappings.
-The
-.I mmap
-flag must also be set for this to work.
-.TP
-.IR comm_exec " (since Linux 3.16)"
-.\" commit 82b897782d10fcc4930c9d4a15b175348fdd2871
-This is purely a feature-detection flag, it does not change
-kernel behavior.
-If this flag can successfully be set, then, when
-.I comm
-is enabled, the
-.B PERF_RECORD_MISC_COMM_EXEC
-flag will be set in the
-.I misc
-field of a comm record header if the rename event being
-reported was caused by a call to
-.BR execve (2).
-This allows tools to distinguish between the various
-types of process renaming.
-.TP
-.IR use_clockid " (since Linux 4.1)"
-.\" commit 34f439278cef7b1177f8ce24f9fc81dfc6221d3b
-This allows selecting which internal Linux clock to use
-when generating timestamps via the
-.I clockid
-field.
-This can make it easier to correlate perf sample times with
-timestamps generated by other tools.
-.TP
-.IR context_switch " (since Linux 4.3)"
-.\" commit 45ac1403f564f411c6a383a2448688ba8dd705a4
-This enables the generation of
-.B PERF_RECORD_SWITCH
-records when a context switch occurs.
-It also enables the generation of
-.B PERF_RECORD_SWITCH_CPU_WIDE
-records when sampling in CPU-wide mode.
-This functionality is in addition to existing tracepoint and
-software events for measuring context switches.
-The advantage of this method is that it will give full
-information even with strict
-.I perf_event_paranoid
-settings.
-.TP
-.IR write_backward " (since Linux 4.6)"
-.\" commit 9ecda41acb971ebd07c8fb35faf24005c0baea12
-This causes the ring buffer to be written from the end to the beginning.
-This is to support reading from overwritable ring buffer.
-.TP
-.IR namespaces " (since Linux 4.11)"
-.\" commit e422267322cd319e2695a535e47c5b1feeac45eb
-This enables the generation of
-.B PERF_RECORD_NAMESPACES
-records when a task enters a new namespace.
-Each namespace has a combination of device and inode numbers.
-.TP
-.IR ksymbol " (since Linux 5.0)"
-.\" commit 76193a94522f1d4edf2447a536f3f796ce56343b
-This enables the generation of
-.B PERF_RECORD_KSYMBOL
-records when new kernel symbols are registered or unregistered.
-This is analyzing dynamic kernel functions like eBPF.
-.TP
-.IR bpf_event " (since Linux 5.0)"
-.\" commit 6ee52e2a3fe4ea35520720736e6791df1fb67106
-This enables the generation of
-.B PERF_RECORD_BPF_EVENT
-records when an eBPF program is loaded or unloaded.
-.TP
-.IR aux_output " (since Linux 5.4)"
-.\" commit ab43762ef010967e4ccd53627f70a2eecbeafefb
-This allows normal (non-AUX) events to generate data for AUX events
-if the hardware supports it.
-.TP
-.IR cgroup " (since Linux 5.7)"
-.\" commit 96aaab686505c449e24d76e76507290dcc30e008
-This enables the generation of
-.B PERF_RECORD_CGROUP
-records when a new cgroup is created (and activated).
-.TP
-.IR text_poke " (since Linux 5.8)"
-.\" commit e17d43b93e544f5016c0251d2074c15568d5d963
-This enables the generation of
-.B PERF_RECORD_TEXT_POKE
-records when there's a change to the kernel text
-(i.e., self-modifying code).
-.TP
-.IR build_id " (since Linux 5.12)"
-.\" commit 88a16a1309333e43d328621ece3e9fa37027e8eb
-This changes the contents in the
-.B PERF_RECORD_MMAP2
-to have a build-id instead of device and inode numbers.
-.TP
-.IR inherit_thread " (since Linux 5.13)"
-.\" commit 2b26f0aa004995f49f7b6f4100dd0e4c39a9ed5f
-This disables the inheritance of the event to a child process.
-Only new threads in the same process
-(which is cloned with
-.BR CLONE_THREAD )
-will inherit the event.
-.TP
-.IR remove_on_exec " (since Linux 5.13)"
-.\" commit 2e498d0a74e5b88a6689ae1b811f247f91ff188e
-This closes the event when it starts a new process image by
-.BR execve (2).
-.TP
-.IR sigtrap " (since Linux 5.13)"
-.\" commit 97ba62b278674293762c3d91f724f1bb922f04e0
-This enables synchronous signal delivery of
-.B SIGTRAP
-on event overflow.
-.TP
-.I wakeup_events
-.TQ
-.I wakeup_watermark
-This union sets how many samples
-.RI ( wakeup_events )
-or bytes
-.RI ( wakeup_watermark )
-happen before an overflow notification happens.
-Which one is used is selected by the
-.I watermark
-bit flag.
-.IP
-.I wakeup_events
-counts only
-.B PERF_RECORD_SAMPLE
-record types.
-To receive overflow notification for all
-.B PERF_RECORD
-types choose watermark and set
-.I wakeup_watermark
-to 1.
-.IP
-Prior to Linux 3.0, setting
-.\" commit f506b3dc0ec454a16d40cab9ee5d75435b39dc50
-.I wakeup_events
-to 0 resulted in no overflow notifications;
-more recent kernels treat 0 the same as 1.
-.TP
-.IR bp_type " (since Linux 2.6.33)"
-.\" commit 24f1e32c60c45c89a997c73395b69c8af6f0a84e
-This chooses the breakpoint type.
-It is one of:
-.RS
-.TP
-.B HW_BREAKPOINT_EMPTY
-No breakpoint.
-.TP
-.B HW_BREAKPOINT_R
-Count when we read the memory location.
-.TP
-.B HW_BREAKPOINT_W
-Count when we write the memory location.
-.TP
-.B HW_BREAKPOINT_RW
-Count when we read or write the memory location.
-.TP
-.B HW_BREAKPOINT_X
-Count when we execute code at the memory location.
-.P
-The values can be combined via a bitwise or, but the
-combination of
-.B HW_BREAKPOINT_R
-or
-.B HW_BREAKPOINT_W
-with
-.B HW_BREAKPOINT_X
-is not allowed.
-.RE
-.TP
-.IR bp_addr " (since Linux 2.6.33)"
-.\" commit 24f1e32c60c45c89a997c73395b69c8af6f0a84e
-This is the address of the breakpoint.
-For execution breakpoints, this is the memory address of the instruction
-of interest; for read and write breakpoints, it is the memory address
-of the memory location of interest.
-.TP
-.IR config1 " (since Linux 2.6.39)"
-.\" commit a7e3ed1e470116c9d12c2f778431a481a6be8ab6
-.I config1
-is used for setting events that need an extra register or otherwise
-do not fit in the regular config field.
-Raw OFFCORE_EVENTS on Nehalem/Westmere/SandyBridge use this field
-on Linux 3.3 and later kernels.
-.TP
-.IR bp_len " (since Linux 2.6.33)"
-.\" commit 24f1e32c60c45c89a997c73395b69c8af6f0a84e
-.I bp_len
-is the length of the breakpoint being measured if
-.I type
-is
-.BR PERF_TYPE_BREAKPOINT .
-Options are
-.BR HW_BREAKPOINT_LEN_1 ,
-.BR HW_BREAKPOINT_LEN_2 ,
-.BR HW_BREAKPOINT_LEN_4 ,
-and
-.BR HW_BREAKPOINT_LEN_8 .
-For an execution breakpoint, set this to
-.IR sizeof(long) .
-.TP
-.IR config2 " (since Linux 2.6.39)"
-.\" commit a7e3ed1e470116c9d12c2f778431a481a6be8ab6
-.I config2
-is a further extension of the
-.I config1
-field.
-.TP
-.IR branch_sample_type " (since Linux 3.4)"
-.\" commit bce38cd53e5ddba9cb6d708c4ef3d04a4016ec7e
-If
-.B PERF_SAMPLE_BRANCH_STACK
-is enabled, then this specifies what branches to include
-in the branch record.
-.IP
-The first part of the value is the privilege level, which
-is a combination of one of the values listed below.
-If the user does not set privilege level explicitly, the kernel
-will use the event's privilege level.
-Event and branch privilege levels do not have to match.
-.RS
-.TP
-.B PERF_SAMPLE_BRANCH_USER
-Branch target is in user space.
-.TP
-.B PERF_SAMPLE_BRANCH_KERNEL
-Branch target is in kernel space.
-.TP
-.B PERF_SAMPLE_BRANCH_HV
-Branch target is in hypervisor.
-.TP
-.B PERF_SAMPLE_BRANCH_PLM_ALL
-A convenience value that is the three preceding values ORed together.
-.P
-In addition to the privilege value, at least one or more of the
-following bits must be set.
-.TP
-.B PERF_SAMPLE_BRANCH_ANY
-Any branch type.
-.TP
-.B PERF_SAMPLE_BRANCH_ANY_CALL
-Any call branch (includes direct calls, indirect calls, and far jumps).
-.TP
-.B PERF_SAMPLE_BRANCH_IND_CALL
-Indirect calls.
-.TP
-.BR PERF_SAMPLE_BRANCH_CALL " (since Linux 4.4)"
-.\" commit c229bf9dc179d2023e185c0f705bdf68484c1e73
-Direct calls.
-.TP
-.B PERF_SAMPLE_BRANCH_ANY_RETURN
-Any return branch.
-.TP
-.BR PERF_SAMPLE_BRANCH_IND_JUMP " (since Linux 4.2)"
-.\" commit c9fdfa14c3792c0160849c484e83aa57afd80ccc
-Indirect jumps.
-.TP
-.BR PERF_SAMPLE_BRANCH_COND " (since Linux 3.16)"
-.\" commit bac52139f0b7ab31330e98fd87fc5a2664951050
-Conditional branches.
-.TP
-.BR PERF_SAMPLE_BRANCH_ABORT_TX " (since Linux 3.11)"
-.\" commit 135c5612c460f89657c4698fe2ea753f6f667963
-Transactional memory aborts.
-.TP
-.BR PERF_SAMPLE_BRANCH_IN_TX " (since Linux 3.11)"
-.\" commit 135c5612c460f89657c4698fe2ea753f6f667963
-Branch in transactional memory transaction.
-.TP
-.BR PERF_SAMPLE_BRANCH_NO_TX " (since Linux 3.11)"
-.\" commit 135c5612c460f89657c4698fe2ea753f6f667963
-Branch not in transactional memory transaction.
-.BR PERF_SAMPLE_BRANCH_CALL_STACK " (since Linux 4.1)"
-.\" commit 2c44b1936bb3b135a3fac8b3493394d42e51cf70
-Branch is part of a hardware-generated call stack.
-This requires hardware support, currently only found
-on Intel x86 Haswell or newer.
-.RE
-.TP
-.IR sample_regs_user " (since Linux 3.7)"
-.\" commit 4018994f3d8785275ef0e7391b75c3462c029e56
-This bit mask defines the set of user CPU registers to dump on samples.
-The layout of the register mask is architecture-specific and
-is described in the kernel header file
-.IR arch/ARCH/include/uapi/asm/perf_regs.h .
-.TP
-.IR sample_stack_user " (since Linux 3.7)"
-.\" commit c5ebcedb566ef17bda7b02686e0d658a7bb42ee7
-This defines the size of the user stack to dump if
-.B PERF_SAMPLE_STACK_USER
-is specified.
-.TP
-.IR clockid " (since Linux 4.1)"
-.\" commit 34f439278cef7b1177f8ce24f9fc81dfc6221d3b
-If
-.I use_clockid
-is set, then this field selects which internal Linux timer to
-use for timestamps.
-The available timers are defined in
-.IR linux/time.h ,
-with
-.BR CLOCK_MONOTONIC ,
-.BR CLOCK_MONOTONIC_RAW ,
-.BR CLOCK_REALTIME ,
-.BR CLOCK_BOOTTIME ,
-and
-.B CLOCK_TAI
-currently supported.
-.TP
-.IR aux_watermark " (since Linux 4.1)"
-.\" commit 1a5941312414c71dece6717da9a0fa1303127afa
-This specifies how much data is required to trigger a
-.B PERF_RECORD_AUX
-sample.
-.TP
-.IR sample_max_stack " (since Linux 4.8)"
-.\" commit 97c79a38cd454602645f0470ffb444b3b75ce574
-When
-.I sample_type
-includes
-.BR PERF_SAMPLE_CALLCHAIN ,
-this field specifies how many stack frames to report when
-generating the callchain.
-.TP
-.IR aux_sample_size " (since Linux 5.5)"
-.\" commit a4faf00d994c40e64f656805ac375c65e324eefb
-When
-.B PERF_SAMPLE_AUX
-flag is set,
-specify the desired size of AUX data.
-Note that it can get smaller data than the specified size.
-.TP
-.IR sig_data " (since Linux 5.13)"
-.\" commit 97ba62b278674293762c3d91f724f1bb922f04e0
-This data will be copied to user's signal handler
-(through
-.I si_perf
-in the
-.IR siginfo_t )
-to disambiguate which event triggered the signal.
-.SS Reading results
-Once a
-.BR perf_event_open ()
-file descriptor has been opened, the values
-of the events can be read from the file descriptor.
-The values that are there are specified by the
-.I read_format
-field in the
-.I attr
-structure at open time.
-.P
-If you attempt to read into a buffer that is not big enough to hold the
-data, the error
-.B ENOSPC
-results.
-.P
-Here is the layout of the data returned by a read:
-.IP \[bu] 3
-If
-.B PERF_FORMAT_GROUP
-was specified to allow reading all events in a group at once:
-.IP
-.in +4n
-.EX
-struct read_format {
- u64 nr; /* The number of events */
- u64 time_enabled; /* if PERF_FORMAT_TOTAL_TIME_ENABLED */
- u64 time_running; /* if PERF_FORMAT_TOTAL_TIME_RUNNING */
- struct {
- u64 value; /* The value of the event */
- u64 id; /* if PERF_FORMAT_ID */
- u64 lost; /* if PERF_FORMAT_LOST */
- } values[nr];
-};
-.EE
-.in
-.IP \[bu]
-If
-.B PERF_FORMAT_GROUP
-was
-.I not
-specified:
-.IP
-.in +4n
-.EX
-struct read_format {
- u64 value; /* The value of the event */
- u64 time_enabled; /* if PERF_FORMAT_TOTAL_TIME_ENABLED */
- u64 time_running; /* if PERF_FORMAT_TOTAL_TIME_RUNNING */
- u64 id; /* if PERF_FORMAT_ID */
- u64 lost; /* if PERF_FORMAT_LOST */
-};
-.EE
-.in
-.P
-The values read are as follows:
-.TP
-.I nr
-The number of events in this file descriptor.
-Available only if
-.B PERF_FORMAT_GROUP
-was specified.
-.TP
-.I time_enabled
-.TQ
-.I time_running
-Total time the event was enabled and running.
-Normally these values are the same.
-Multiplexing happens if the number of events is more than the
-number of available PMU counter slots.
-In that case the events run only part of the time and the
-.I time_enabled
-and
-.I time running
-values can be used to scale an estimated value for the count.
-.TP
-.I value
-An unsigned 64-bit value containing the counter result.
-.TP
-.I id
-A globally unique value for this particular event; only present if
-.B PERF_FORMAT_ID
-was specified in
-.IR read_format .
-.TP
-.I lost
-The number of lost samples of this event;
-only present if
-.B PERF_FORMAT_LOST
-was specified in
-.IR read_format .
-.SS MMAP layout
-When using
-.BR perf_event_open ()
-in sampled mode, asynchronous events
-(like counter overflow or
-.B PROT_EXEC
-mmap tracking)
-are logged into a ring-buffer.
-This ring-buffer is created and accessed through
-.BR mmap (2).
-.P
-The mmap size should be 1+2\[ha]n pages, where the first page is a
-metadata page
-.RI ( "struct perf_event_mmap_page" )
-that contains various
-bits of information such as where the ring-buffer head is.
-.P
-Before Linux 2.6.39, there is a bug that means you must allocate an mmap
-ring buffer when sampling even if you do not plan to access it.
-.P
-The structure of the first metadata mmap page is as follows:
-.P
-.in +4n
-.EX
-struct perf_event_mmap_page {
- __u32 version; /* version number of this structure */
- __u32 compat_version; /* lowest version this is compat with */
- __u32 lock; /* seqlock for synchronization */
- __u32 index; /* hardware counter identifier */
- __s64 offset; /* add to hardware counter value */
- __u64 time_enabled; /* time event active */
- __u64 time_running; /* time event on CPU */
- union {
- __u64 capabilities;
- struct {
- __u64 cap_usr_time / cap_usr_rdpmc / cap_bit0 : 1,
- cap_bit0_is_deprecated : 1,
- cap_user_rdpmc : 1,
- cap_user_time : 1,
- cap_user_time_zero : 1,
- };
- };
- __u16 pmc_width;
- __u16 time_shift;
- __u32 time_mult;
- __u64 time_offset;
- __u64 __reserved[120]; /* Pad to 1 k */
- __u64 data_head; /* head in the data section */
- __u64 data_tail; /* user\-space written tail */
- __u64 data_offset; /* where the buffer starts */
- __u64 data_size; /* data buffer size */
- __u64 aux_head;
- __u64 aux_tail;
- __u64 aux_offset;
- __u64 aux_size;
-\&
-}
-.EE
-.in
-.P
-The following list describes the fields in the
-.I perf_event_mmap_page
-structure in more detail:
-.TP
-.I version
-Version number of this structure.
-.TP
-.I compat_version
-The lowest version this is compatible with.
-.TP
-.I lock
-A seqlock for synchronization.
-.TP
-.I index
-A unique hardware counter identifier.
-.TP
-.I offset
-When using rdpmc for reads this offset value
-must be added to the one returned by rdpmc to get
-the current total event count.
-.TP
-.I time_enabled
-Time the event was active.
-.TP
-.I time_running
-Time the event was running.
-.TP
-.IR cap_usr_time " / " cap_usr_rdpmc " / " cap_bit0 " (since Linux 3.4)"
-.\" commit c7206205d00ab375839bd6c7ddb247d600693c09
-There was a bug in the definition of
-.I cap_usr_time
-and
-.I cap_usr_rdpmc
-from Linux 3.4 until Linux 3.11.
-Both bits were defined to point to the same location, so it was
-impossible to know if
-.I cap_usr_time
-or
-.I cap_usr_rdpmc
-were actually set.
-.IP
-Starting with Linux 3.12, these are renamed to
-.\" commit fa7315871046b9a4c48627905691dbde57e51033
-.I cap_bit0
-and you should use the
-.I cap_user_time
-and
-.I cap_user_rdpmc
-fields instead.
-.TP
-.IR cap_bit0_is_deprecated " (since Linux 3.12)"
-.\" commit fa7315871046b9a4c48627905691dbde57e51033
-If set, this bit indicates that the kernel supports
-the properly separated
-.I cap_user_time
-and
-.I cap_user_rdpmc
-bits.
-.IP
-If not-set, it indicates an older kernel where
-.I cap_usr_time
-and
-.I cap_usr_rdpmc
-map to the same bit and thus both features should
-be used with caution.
-.TP
-.IR cap_user_rdpmc " (since Linux 3.12)"
-.\" commit fa7315871046b9a4c48627905691dbde57e51033
-If the hardware supports user-space read of performance counters
-without syscall (this is the "rdpmc" instruction on x86), then
-the following code can be used to do a read:
-.IP
-.in +4n
-.EX
-u32 seq, time_mult, time_shift, idx, width;
-u64 count, enabled, running;
-u64 cyc, time_offset;
-\&
-do {
- seq = pc\->lock;
- barrier();
- enabled = pc\->time_enabled;
- running = pc\->time_running;
-\&
- if (pc\->cap_usr_time && enabled != running) {
- cyc = rdtsc();
- time_offset = pc\->time_offset;
- time_mult = pc\->time_mult;
- time_shift = pc\->time_shift;
- }
-\&
- idx = pc\->index;
- count = pc\->offset;
-\&
- if (pc\->cap_usr_rdpmc && idx) {
- width = pc\->pmc_width;
- count += rdpmc(idx \- 1);
- }
-\&
- barrier();
-} while (pc\->lock != seq);
-.EE
-.in
-.TP
-.IR cap_user_time " (since Linux 3.12)"
-.\" commit fa7315871046b9a4c48627905691dbde57e51033
-This bit indicates the hardware has a constant, nonstop
-timestamp counter (TSC on x86).
-.TP
-.IR cap_user_time_zero " (since Linux 3.12)"
-.\" commit fa7315871046b9a4c48627905691dbde57e51033
-Indicates the presence of
-.I time_zero
-which allows mapping timestamp values to
-the hardware clock.
-.TP
-.I pmc_width
-If
-.IR cap_usr_rdpmc ,
-this field provides the bit-width of the value
-read using the rdpmc or equivalent instruction.
-This can be used to sign extend the result like:
-.IP
-.in +4n
-.EX
-pmc <<= 64 \- pmc_width;
-pmc >>= 64 \- pmc_width; // signed shift right
-count += pmc;
-.EE
-.in
-.TP
-.I time_shift
-.TQ
-.I time_mult
-.TQ
-.I time_offset
-.IP
-If
-.IR cap_usr_time ,
-these fields can be used to compute the time
-delta since
-.I time_enabled
-(in nanoseconds) using rdtsc or similar.
-.IP
-.in +4n
-.EX
-u64 quot, rem;
-u64 delta;
-\&
-quot = cyc >> time_shift;
-rem = cyc & (((u64)1 << time_shift) \- 1);
-delta = time_offset + quot * time_mult +
- ((rem * time_mult) >> time_shift);
-.EE
-.in
-.IP
-Where
-.IR time_offset ,
-.IR time_mult ,
-.IR time_shift ,
-and
-.I cyc
-are read in the
-seqcount loop described above.
-This delta can then be added to
-enabled and possible running (if idx), improving the scaling:
-.IP
-.in +4n
-.EX
-enabled += delta;
-if (idx)
- running += delta;
-quot = count / running;
-rem = count % running;
-count = quot * enabled + (rem * enabled) / running;
-.EE
-.in
-.TP
-.IR time_zero " (since Linux 3.12)"
-.\" commit fa7315871046b9a4c48627905691dbde57e51033
-.IP
-If
-.I cap_usr_time_zero
-is set, then the hardware clock (the TSC timestamp counter on x86)
-can be calculated from the
-.IR time_zero ,
-.IR time_mult ,
-and
-.I time_shift
-values:
-.IP
-.in +4n
-.EX
-time = timestamp \- time_zero;
-quot = time / time_mult;
-rem = time % time_mult;
-cyc = (quot << time_shift) + (rem << time_shift) / time_mult;
-.EE
-.in
-.IP
-And vice versa:
-.IP
-.in +4n
-.EX
-quot = cyc >> time_shift;
-rem = cyc & (((u64)1 << time_shift) \- 1);
-timestamp = time_zero + quot * time_mult +
- ((rem * time_mult) >> time_shift);
-.EE
-.in
-.TP
-.I data_head
-This points to the head of the data section.
-The value continuously increases, it does not wrap.
-The value needs to be manually wrapped by the size of the mmap buffer
-before accessing the samples.
-.IP
-On SMP-capable platforms, after reading the
-.I data_head
-value,
-user space should issue an rmb().
-.TP
-.I data_tail
-When the mapping is
-.BR PROT_WRITE ,
-the
-.I data_tail
-value should be written by user space to reflect the last read data.
-In this case, the kernel will not overwrite unread data.
-.TP
-.IR data_offset " (since Linux 4.1)"
-.\" commit e8c6deac69629c0cb97c3d3272f8631ef17f8f0f
-Contains the offset of the location in the mmap buffer
-where perf sample data begins.
-.TP
-.IR data_size " (since Linux 4.1)"
-.\" commit e8c6deac69629c0cb97c3d3272f8631ef17f8f0f
-Contains the size of the perf sample region within
-the mmap buffer.
-.TP
-.I aux_head
-.TQ
-.I aux_tail
-.TQ
-.I aux_offset
-.TQ
-.I aux_size " (since Linux 4.1)"
-.\" commit 45bfb2e50471abbbfd83d40d28c986078b0d24ff
-The AUX region allows
-.BR mmap (2)-ing
-a separate sample buffer for
-high-bandwidth data streams (separate from the main perf sample buffer).
-An example of a high-bandwidth stream is instruction tracing support,
-as is found in newer Intel processors.
-.IP
-To set up an AUX area, first
-.I aux_offset
-needs to be set with an offset greater than
-.IR data_offset + data_size
-and
-.I aux_size
-needs to be set to the desired buffer size.
-The desired offset and size must be page aligned, and the size
-must be a power of two.
-These values are then passed to mmap in order to map the AUX buffer.
-Pages in the AUX buffer are included as part of the
-.B RLIMIT_MEMLOCK
-resource limit (see
-.BR setrlimit (2)),
-and also as part of the
-.I perf_event_mlock_kb
-allowance.
-.IP
-By default, the AUX buffer will be truncated if it will not fit
-in the available space in the ring buffer.
-If the AUX buffer is mapped as a read only buffer, then it will
-operate in ring buffer mode where old data will be overwritten
-by new.
-In overwrite mode, it might not be possible to infer where the
-new data began, and it is the consumer's job to disable
-measurement while reading to avoid possible data races.
-.IP
-The
-.I aux_head
-and
-.I aux_tail
-ring buffer pointers have the same behavior and ordering
-rules as the previous described
-.I data_head
-and
-.IR data_tail .
-.P
-The following 2^n ring-buffer pages have the layout described below.
-.P
-If
-.I perf_event_attr.sample_id_all
-is set, then all event types will
-have the sample_type selected fields related to where/when (identity)
-an event took place (TID, TIME, ID, CPU, STREAM_ID) described in
-.B PERF_RECORD_SAMPLE
-below, it will be stashed just after the
-.I perf_event_header
-and the fields already present for the existing
-fields, that is, at the end of the payload.
-This allows a newer perf.data
-file to be supported by older perf tools, with the new optional
-fields being ignored.
-.P
-The mmap values start with a header:
-.P
-.in +4n
-.EX
-struct perf_event_header {
- __u32 type;
- __u16 misc;
- __u16 size;
-};
-.EE
-.in
-.P
-Below, we describe the
-.I perf_event_header
-fields in more detail.
-For ease of reading,
-the fields with shorter descriptions are presented first.
-.TP
-.I size
-This indicates the size of the record.
-.TP
-.I misc
-The
-.I misc
-field contains additional information about the sample.
-.IP
-The CPU mode can be determined from this value by masking with
-.B PERF_RECORD_MISC_CPUMODE_MASK
-and looking for one of the following (note these are not
-bit masks, only one can be set at a time):
-.RS
-.TP
-.B PERF_RECORD_MISC_CPUMODE_UNKNOWN
-Unknown CPU mode.
-.TP
-.B PERF_RECORD_MISC_KERNEL
-Sample happened in the kernel.
-.TP
-.B PERF_RECORD_MISC_USER
-Sample happened in user code.
-.TP
-.B PERF_RECORD_MISC_HYPERVISOR
-Sample happened in the hypervisor.
-.TP
-.BR PERF_RECORD_MISC_GUEST_KERNEL " (since Linux 2.6.35)"
-.\" commit 39447b386c846bbf1c56f6403c5282837486200f
-Sample happened in the guest kernel.
-.TP
-.B PERF_RECORD_MISC_GUEST_USER " (since Linux 2.6.35)"
-.\" commit 39447b386c846bbf1c56f6403c5282837486200f
-Sample happened in guest user code.
-.RE
-.P
-.RS
-Since the following three statuses are generated by
-different record types, they alias to the same bit:
-.TP
-.BR PERF_RECORD_MISC_MMAP_DATA " (since Linux 3.10)"
-.\" commit 2fe85427e3bf65d791700d065132772fc26e4d75
-This is set when the mapping is not executable;
-otherwise the mapping is executable.
-.TP
-.BR PERF_RECORD_MISC_COMM_EXEC " (since Linux 3.16)"
-.\" commit 82b897782d10fcc4930c9d4a15b175348fdd2871
-This is set for a
-.B PERF_RECORD_COMM
-record on kernels more recent than Linux 3.16
-if a process name change was caused by an
-.BR execve (2)
-system call.
-.TP
-.BR PERF_RECORD_MISC_SWITCH_OUT " (since Linux 4.3)"
-.\" commit 45ac1403f564f411c6a383a2448688ba8dd705a4
-When a
-.B PERF_RECORD_SWITCH
-or
-.B PERF_RECORD_SWITCH_CPU_WIDE
-record is generated, this bit indicates that the
-context switch is away from the current process
-(instead of into the current process).
-.RE
-.P
-.RS
-In addition, the following bits can be set:
-.TP
-.B PERF_RECORD_MISC_EXACT_IP
-This indicates that the content of
-.B PERF_SAMPLE_IP
-points
-to the actual instruction that triggered the event.
-See also
-.IR perf_event_attr.precise_ip .
-.TP
-.BR PERF_RECORD_MISC_SWITCH_OUT_PREEMPT " (since Linux 4.17)"
-.\" commit 101592b4904ecf6b8ed2a4784d41d180319d95a1
-When a
-.B PERF_RECORD_SWITCH
-or
-.B PERF_RECORD_SWITCH_CPU_WIDE
-record is generated,
-this indicates the context switch was a preemption.
-.TP
-.BR PERF_RECORD_MISC_MMAP_BUILD_ID " (since Linux 5.12)"
-.\" commit 88a16a1309333e43d328621ece3e9fa37027e8eb
-This indicates that the content of
-.B PERF_SAMPLE_MMAP2
-contains build-ID data instead of device major and minor numbers
-as well as the inode number.
-.TP
-.BR PERF_RECORD_MISC_EXT_RESERVED " (since Linux 2.6.35)"
-.\" commit 1676b8a077c352085d52578fb4f29350b58b6e74
-This indicates there is extended data available (currently not used).
-.TP
-.B PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT
-.\" commit 930e6fcd2bcce9bcd9d4aa7e755678d33f3fe6f4
-This bit is not set by the kernel.
-It is reserved for the user-space perf utility to indicate that
-.IR /proc/ pid /maps
-parsing was taking too long and was stopped, and thus the mmap
-records may be truncated.
-.RE
-.TP
-.I type
-The
-.I type
-value is one of the below.
-The values in the corresponding record (that follows the header)
-depend on the
-.I type
-selected as shown.
-.RS
-.TP 4
-.B PERF_RECORD_MMAP
-The MMAP events record the
-.B PROT_EXEC
-mappings so that we can correlate
-user-space IPs to code.
-They have the following structure:
-.IP
-.in +4n
-.EX
-struct {
- struct perf_event_header header;
- u32 pid, tid;
- u64 addr;
- u64 len;
- u64 pgoff;
- char filename[];
-};
-.EE
-.in
-.RS
-.TP
-.I pid
-is the process ID.
-.TP
-.I tid
-is the thread ID.
-.TP
-.I addr
-is the address of the allocated memory.
-.I len
-is the length of the allocated memory.
-.I pgoff
-is the page offset of the allocated memory.
-.I filename
-is a string describing the backing of the allocated memory.
-.RE
-.TP
-.B PERF_RECORD_LOST
-This record indicates when events are lost.
-.IP
-.in +4n
-.EX
-struct {
- struct perf_event_header header;
- u64 id;
- u64 lost;
- struct sample_id sample_id;
-};
-.EE
-.in
-.RS
-.TP
-.I id
-is the unique event ID for the samples that were lost.
-.TP
-.I lost
-is the number of events that were lost.
-.RE
-.TP
-.B PERF_RECORD_COMM
-This record indicates a change in the process name.
-.IP
-.in +4n
-.EX
-struct {
- struct perf_event_header header;
- u32 pid;
- u32 tid;
- char comm[];
- struct sample_id sample_id;
-};
-.EE
-.in
-.RS
-.TP
-.I pid
-is the process ID.
-.TP
-.I tid
-is the thread ID.
-.TP
-.I comm
-is a string containing the new name of the process.
-.RE
-.TP
-.B PERF_RECORD_EXIT
-This record indicates a process exit event.
-.IP
-.in +4n
-.EX
-struct {
- struct perf_event_header header;
- u32 pid, ppid;
- u32 tid, ptid;
- u64 time;
- struct sample_id sample_id;
-};
-.EE
-.in
-.TP
-.B PERF_RECORD_THROTTLE
-.TQ
-.B PERF_RECORD_UNTHROTTLE
-This record indicates a throttle/unthrottle event.
-.IP
-.in +4n
-.EX
-struct {
- struct perf_event_header header;
- u64 time;
- u64 id;
- u64 stream_id;
- struct sample_id sample_id;
-};
-.EE
-.in
-.TP
-.B PERF_RECORD_FORK
-This record indicates a fork event.
-.IP
-.in +4n
-.EX
-struct {
- struct perf_event_header header;
- u32 pid, ppid;
- u32 tid, ptid;
- u64 time;
- struct sample_id sample_id;
-};
-.EE
-.in
-.TP
-.B PERF_RECORD_READ
-This record indicates a read event.
-.IP
-.in +4n
-.EX
-struct {
- struct perf_event_header header;
- u32 pid, tid;
- struct read_format values;
- struct sample_id sample_id;
-};
-.EE
-.in
-.TP
-.B PERF_RECORD_SAMPLE
-This record indicates a sample.
-.IP
-.in +4n
-.EX
-struct {
- struct perf_event_header header;
- u64 sample_id; /* if PERF_SAMPLE_IDENTIFIER */
- u64 ip; /* if PERF_SAMPLE_IP */
- u32 pid, tid; /* if PERF_SAMPLE_TID */
- u64 time; /* if PERF_SAMPLE_TIME */
- u64 addr; /* if PERF_SAMPLE_ADDR */
- u64 id; /* if PERF_SAMPLE_ID */
- u64 stream_id; /* if PERF_SAMPLE_STREAM_ID */
- u32 cpu, res; /* if PERF_SAMPLE_CPU */
- u64 period; /* if PERF_SAMPLE_PERIOD */
- struct read_format v;
- /* if PERF_SAMPLE_READ */
- u64 nr; /* if PERF_SAMPLE_CALLCHAIN */
- u64 ips[nr]; /* if PERF_SAMPLE_CALLCHAIN */
- u32 size; /* if PERF_SAMPLE_RAW */
- char data[size]; /* if PERF_SAMPLE_RAW */
- u64 bnr; /* if PERF_SAMPLE_BRANCH_STACK */
- struct perf_branch_entry lbr[bnr];
- /* if PERF_SAMPLE_BRANCH_STACK */
- u64 abi; /* if PERF_SAMPLE_REGS_USER */
- u64 regs[weight(mask)];
- /* if PERF_SAMPLE_REGS_USER */
- u64 size; /* if PERF_SAMPLE_STACK_USER */
- char data[size]; /* if PERF_SAMPLE_STACK_USER */
- u64 dyn_size; /* if PERF_SAMPLE_STACK_USER &&
- size != 0 */
- union perf_sample_weight weight;
- /* if PERF_SAMPLE_WEIGHT */
- /* || PERF_SAMPLE_WEIGHT_STRUCT */
- u64 data_src; /* if PERF_SAMPLE_DATA_SRC */
- u64 transaction; /* if PERF_SAMPLE_TRANSACTION */
- u64 abi; /* if PERF_SAMPLE_REGS_INTR */
- u64 regs[weight(mask)];
- /* if PERF_SAMPLE_REGS_INTR */
- u64 phys_addr; /* if PERF_SAMPLE_PHYS_ADDR */
- u64 cgroup; /* if PERF_SAMPLE_CGROUP */
- u64 data_page_size;
- /* if PERF_SAMPLE_DATA_PAGE_SIZE */
- u64 code_page_size;
- /* if PERF_SAMPLE_CODE_PAGE_SIZE */
- u64 size; /* if PERF_SAMPLE_AUX */
- char data[size]; /* if PERF_SAMPLE_AUX */
-};
-.EE
-.in
-.RS 4
-.TP 4
-.I sample_id
-If
-.B PERF_SAMPLE_IDENTIFIER
-is enabled, a 64-bit unique ID is included.
-This is a duplication of the
-.B PERF_SAMPLE_ID
-.I id
-value, but included at the beginning of the sample
-so parsers can easily obtain the value.
-.TP
-.I ip
-If
-.B PERF_SAMPLE_IP
-is enabled, then a 64-bit instruction
-pointer value is included.
-.TP
-.I pid
-.TQ
-.I tid
-If
-.B PERF_SAMPLE_TID
-is enabled, then a 32-bit process ID
-and 32-bit thread ID are included.
-.TP
-.I time
-If
-.B PERF_SAMPLE_TIME
-is enabled, then a 64-bit timestamp
-is included.
-This is obtained via local_clock() which is a hardware timestamp
-if available and the jiffies value if not.
-.TP
-.I addr
-If
-.B PERF_SAMPLE_ADDR
-is enabled, then a 64-bit address is included.
-This is usually the address of a tracepoint,
-breakpoint, or software event; otherwise the value is 0.
-.TP
-.I id
-If
-.B PERF_SAMPLE_ID
-is enabled, a 64-bit unique ID is included.
-If the event is a member of an event group, the group leader ID is returned.
-This ID is the same as the one returned by
-.BR PERF_FORMAT_ID .
-.TP
-.I stream_id
-If
-.B PERF_SAMPLE_STREAM_ID
-is enabled, a 64-bit unique ID is included.
-Unlike
-.B PERF_SAMPLE_ID
-the actual ID is returned, not the group leader.
-This ID is the same as the one returned by
-.BR PERF_FORMAT_ID .
-.TP
-.I cpu
-.TQ
-.I res
-If
-.B PERF_SAMPLE_CPU
-is enabled, this is a 32-bit value indicating
-which CPU was being used, in addition to a reserved (unused)
-32-bit value.
-.TP
-.I period
-If
-.B PERF_SAMPLE_PERIOD
-is enabled, a 64-bit value indicating
-the current sampling period is written.
-.TP
-.I v
-If
-.B PERF_SAMPLE_READ
-is enabled, a structure of type read_format
-is included which has values for all events in the event group.
-The values included depend on the
-.I read_format
-value used at
-.BR perf_event_open ()
-time.
-.TP
-.I nr
-.TQ
-.I ips[nr]
-If
-.B PERF_SAMPLE_CALLCHAIN
-is enabled, then a 64-bit number is included
-which indicates how many following 64-bit instruction pointers will
-follow.
-This is the current callchain.
-.TP
-.I size
-.TQ
-.I data[size]
-If
-.B PERF_SAMPLE_RAW
-is enabled, then a 32-bit value indicating size
-is included followed by an array of 8-bit values of length size.
-The values are padded with 0 to have 64-bit alignment.
-.IP
-This RAW record data is opaque with respect to the ABI.
-The ABI doesn't make any promises with respect to the stability
-of its content, it may vary depending
-on event, hardware, and kernel version.
-.TP
-.I bnr
-.TQ
-.I lbr[bnr]
-If
-.B PERF_SAMPLE_BRANCH_STACK
-is enabled, then a 64-bit value indicating
-the number of records is included, followed by
-.I bnr
-.I perf_branch_entry
-structures which each include the fields:
-.RS
-.TP
-.I from
-This indicates the source instruction (may not be a branch).
-.TP
-.I to
-The branch target.
-.TP
-.I mispred
-The branch target was mispredicted.
-.TP
-.I predicted
-The branch target was predicted.
-.TP
-.IR in_tx " (since Linux 3.11)"
-.\" commit 135c5612c460f89657c4698fe2ea753f6f667963
-The branch was in a transactional memory transaction.
-.TP
-.IR abort " (since Linux 3.11)"
-.\" commit 135c5612c460f89657c4698fe2ea753f6f667963
-The branch was in an aborted transactional memory transaction.
-.TP
-.IR cycles " (since Linux 4.3)"
-.\" commit 71ef3c6b9d4665ee7afbbe4c208a98917dcfc32f
-This reports the number of cycles elapsed since the
-previous branch stack update.
-.P
-The entries are from most to least recent, so the first entry
-has the most recent branch.
-.P
-Support for
-.IR mispred ,
-.IR predicted ,
-and
-.I cycles
-is optional; if not supported, those
-values will be 0.
-.P
-The type of branches recorded is specified by the
-.I branch_sample_type
-field.
-.RE
-.TP
-.I abi
-.TQ
-.I regs[weight(mask)]
-If
-.B PERF_SAMPLE_REGS_USER
-is enabled, then the user CPU registers are recorded.
-.IP
-The
-.I abi
-field is one of
-.BR PERF_SAMPLE_REGS_ABI_NONE ,
-.BR PERF_SAMPLE_REGS_ABI_32 ,
-or
-.BR PERF_SAMPLE_REGS_ABI_64 .
-.IP
-The
-.I regs
-field is an array of the CPU registers that were specified by
-the
-.I sample_regs_user
-attr field.
-The number of values is the number of bits set in the
-.I sample_regs_user
-bit mask.
-.TP
-.I size
-.TQ
-.I data[size]
-.TQ
-.I dyn_size
-If
-.B PERF_SAMPLE_STACK_USER
-is enabled, then the user stack is recorded.
-This can be used to generate stack backtraces.
-.I size
-is the size requested by the user in
-.I sample_stack_user
-or else the maximum record size.
-.I data
-is the stack data (a raw dump of the memory pointed to by the
-stack pointer at the time of sampling).
-.I dyn_size
-is the amount of data actually dumped (can be less than
-.IR size ).
-Note that
-.I dyn_size
-is omitted if
-.I size
-is 0.
-.TP
-.I weight
-If
-.B PERF_SAMPLE_WEIGHT
-or
-.B PERF_SAMPLE_WEIGHT_STRUCT
-is enabled, then a 64-bit value provided by the hardware
-is recorded that indicates how costly the event was.
-This allows expensive events to stand out more clearly
-in profiles.
-.TP
-.I data_src
-If
-.B PERF_SAMPLE_DATA_SRC
-is enabled, then a 64-bit value is recorded that is made up of
-the following fields:
-.RS
-.TP 4
-.I mem_op
-Type of opcode, a bitwise combination of:
-.IP
-.PD 0
-.RS
-.TP 24
-.B PERF_MEM_OP_NA
-Not available
-.TP
-.B PERF_MEM_OP_LOAD
-Load instruction
-.TP
-.B PERF_MEM_OP_STORE
-Store instruction
-.TP
-.B PERF_MEM_OP_PFETCH
-Prefetch
-.TP
-.B PERF_MEM_OP_EXEC
-Executable code
-.RE
-.PD
-.TP
-.I mem_lvl
-Memory hierarchy level hit or miss, a bitwise combination of
-the following, shifted left by
-.BR PERF_MEM_LVL_SHIFT :
-.IP
-.PD 0
-.RS
-.TP 24
-.B PERF_MEM_LVL_NA
-Not available
-.TP
-.B PERF_MEM_LVL_HIT
-Hit
-.TP
-.B PERF_MEM_LVL_MISS
-Miss
-.TP
-.B PERF_MEM_LVL_L1
-Level 1 cache
-.TP
-.B PERF_MEM_LVL_LFB
-Line fill buffer
-.TP
-.B PERF_MEM_LVL_L2
-Level 2 cache
-.TP
-.B PERF_MEM_LVL_L3
-Level 3 cache
-.TP
-.B PERF_MEM_LVL_LOC_RAM
-Local DRAM
-.TP
-.B PERF_MEM_LVL_REM_RAM1
-Remote DRAM 1 hop
-.TP
-.B PERF_MEM_LVL_REM_RAM2
-Remote DRAM 2 hops
-.TP
-.B PERF_MEM_LVL_REM_CCE1
-Remote cache 1 hop
-.TP
-.B PERF_MEM_LVL_REM_CCE2
-Remote cache 2 hops
-.TP
-.B PERF_MEM_LVL_IO
-I/O memory
-.TP
-.B PERF_MEM_LVL_UNC
-Uncached memory
-.RE
-.PD
-.TP
-.I mem_snoop
-Snoop mode, a bitwise combination of the following, shifted left by
-.BR PERF_MEM_SNOOP_SHIFT :
-.IP
-.PD 0
-.RS
-.TP 24
-.B PERF_MEM_SNOOP_NA
-Not available
-.TP
-.B PERF_MEM_SNOOP_NONE
-No snoop
-.TP
-.B PERF_MEM_SNOOP_HIT
-Snoop hit
-.TP
-.B PERF_MEM_SNOOP_MISS
-Snoop miss
-.TP
-.B PERF_MEM_SNOOP_HITM
-Snoop hit modified
-.RE
-.PD
-.TP
-.I mem_lock
-Lock instruction, a bitwise combination of the following, shifted left by
-.BR PERF_MEM_LOCK_SHIFT :
-.IP
-.PD 0
-.RS
-.TP 24
-.B PERF_MEM_LOCK_NA
-Not available
-.TP
-.B PERF_MEM_LOCK_LOCKED
-Locked transaction
-.RE
-.PD
-.TP
-.I mem_dtlb
-TLB access hit or miss, a bitwise combination of the following, shifted
-left by
-.BR PERF_MEM_TLB_SHIFT :
-.IP
-.PD 0
-.RS
-.TP 24
-.B PERF_MEM_TLB_NA
-Not available
-.TP
-.B PERF_MEM_TLB_HIT
-Hit
-.TP
-.B PERF_MEM_TLB_MISS
-Miss
-.TP
-.B PERF_MEM_TLB_L1
-Level 1 TLB
-.TP
-.B PERF_MEM_TLB_L2
-Level 2 TLB
-.TP
-.B PERF_MEM_TLB_WK
-Hardware walker
-.TP
-.B PERF_MEM_TLB_OS
-OS fault handler
-.RE
-.PD
-.RE
-.TP
-.I transaction
-If the
-.B PERF_SAMPLE_TRANSACTION
-flag is set, then a 64-bit field is recorded describing
-the sources of any transactional memory aborts.
-.IP
-The field is a bitwise combination of the following values:
-.RS
-.TP
-.B PERF_TXN_ELISION
-Abort from an elision type transaction (Intel-CPU-specific).
-.TP
-.B PERF_TXN_TRANSACTION
-Abort from a generic transaction.
-.TP
-.B PERF_TXN_SYNC
-Synchronous abort (related to the reported instruction).
-.TP
-.B PERF_TXN_ASYNC
-Asynchronous abort (not related to the reported instruction).
-.TP
-.B PERF_TXN_RETRY
-Retryable abort (retrying the transaction may have succeeded).
-.TP
-.B PERF_TXN_CONFLICT
-Abort due to memory conflicts with other threads.
-.TP
-.B PERF_TXN_CAPACITY_WRITE
-Abort due to write capacity overflow.
-.TP
-.B PERF_TXN_CAPACITY_READ
-Abort due to read capacity overflow.
-.RE
-.IP
-In addition, a user-specified abort code can be obtained from
-the high 32 bits of the field by shifting right by
-.B PERF_TXN_ABORT_SHIFT
-and masking with the value
-.BR PERF_TXN_ABORT_MASK .
-.TP
-.I abi
-.TQ
-.I regs[weight(mask)]
-If
-.B PERF_SAMPLE_REGS_INTR
-is enabled, then the user CPU registers are recorded.
-.IP
-The
-.I abi
-field is one of
-.BR PERF_SAMPLE_REGS_ABI_NONE ,
-.BR PERF_SAMPLE_REGS_ABI_32 ,
-or
-.BR PERF_SAMPLE_REGS_ABI_64 .
-.IP
-The
-.I regs
-field is an array of the CPU registers that were specified by
-the
-.I sample_regs_intr
-attr field.
-The number of values is the number of bits set in the
-.I sample_regs_intr
-bit mask.
-.TP
-.I phys_addr
-If the
-.B PERF_SAMPLE_PHYS_ADDR
-flag is set, then the 64-bit physical address is recorded.
-.TP
-.I cgroup
-If the
-.B PERF_SAMPLE_CGROUP
-flag is set,
-then the 64-bit cgroup ID (for the perf_event subsystem) is recorded.
-To get the pathname of the cgroup, the ID should match to one in a
-.BR PERF_RECORD_CGROUP .
-.TP
-.I data_page_size
-If the
-.B PERF_SAMPLE_DATA_PAGE_SIZE
-flag is set,
-then the 64-bit page size value of the
-.B data
-address is recorded.
-.TP
-.I code_page_size
-If the
-.B PERF_SAMPLE_CODE_PAGE_SIZE
-flag is set,
-then the 64-bit page size value of the
-.B ip
-address is recorded.
-.TP
-.I size
-.TQ
-.IR data [ size ]
-If
-.B PERF_SAMPLE_AUX
-is enabled,
-a snapshot of the aux buffer is recorded.
-.RE
-.TP
-.B PERF_RECORD_MMAP2
-This record includes extended information on
-.BR mmap (2)
-calls returning executable mappings.
-The format is similar to that of the
-.B PERF_RECORD_MMAP
-record, but includes extra values that allow uniquely identifying
-shared mappings.
-Depending on the
-.B PERF_RECORD_MISC_MMAP_BUILD_ID
-bit in the header,
-the extra values have different layout and meanings.
-.IP
-.in +4n
-.EX
-struct {
- struct perf_event_header header;
- u32 pid;
- u32 tid;
- u64 addr;
- u64 len;
- u64 pgoff;
- union {
- struct {
- u32 maj;
- u32 min;
- u64 ino;
- u64 ino_generation;
- };
- struct { /* if PERF_RECORD_MISC_MMAP_BUILD_ID */
- u8 build_id_size;
- u8 __reserved_1;
- u16 __reserved_2;
- u8 build_id[20];
- };
- };
- u32 prot;
- u32 flags;
- char filename[];
- struct sample_id sample_id;
-};
-.EE
-.in
-.RS
-.TP
-.I pid
-is the process ID.
-.TP
-.I tid
-is the thread ID.
-.TP
-.I addr
-is the address of the allocated memory.
-.TP
-.I len
-is the length of the allocated memory.
-.TP
-.I pgoff
-is the page offset of the allocated memory.
-.TP
-.I maj
-is the major ID of the underlying device.
-.TP
-.I min
-is the minor ID of the underlying device.
-.TP
-.I ino
-is the inode number.
-.TP
-.I ino_generation
-is the inode generation.
-.TP
-.I build_id_size
-is the actual size of
-.I build_id
-field (up to 20).
-.TP
-.I build_id
-is a raw data to identify a binary.
-.TP
-.I prot
-is the protection information.
-.TP
-.I flags
-is the flags information.
-.TP
-.I filename
-is a string describing the backing of the allocated memory.
-.RE
-.TP
-.BR PERF_RECORD_AUX " (since Linux 4.1)"
-.\" commit 68db7e98c3a6ebe7284b6cf14906ed7c55f3f7f0
-This record reports that new data is available in the separate
-AUX buffer region.
-.IP
-.in +4n
-.EX
-struct {
- struct perf_event_header header;
- u64 aux_offset;
- u64 aux_size;
- u64 flags;
- struct sample_id sample_id;
-};
-.EE
-.in
-.RS
-.TP
-.I aux_offset
-offset in the AUX mmap region where the new data begins.
-.TP
-.I aux_size
-size of the data made available.
-.TP
-.I flags
-describes the AUX update.
-.RS
-.TP
-.B PERF_AUX_FLAG_TRUNCATED
-if set, then the data returned was truncated to fit the available
-buffer size.
-.TP
-.B PERF_AUX_FLAG_OVERWRITE
-.\" commit 2023a0d2829e521fe6ad6b9907f3f90bfbf57142
-if set, then the data returned has overwritten previous data.
-.RE
-.RE
-.TP
-.BR PERF_RECORD_ITRACE_START " (since Linux 4.1)"
-.\" ec0d7729bbaed4b9d2d3fada693278e13a3d1368
-This record indicates which process has initiated an instruction
-trace event, allowing tools to properly correlate the instruction
-addresses in the AUX buffer with the proper executable.
-.IP
-.in +4n
-.EX
-struct {
- struct perf_event_header header;
- u32 pid;
- u32 tid;
-};
-.EE
-.in
-.RS
-.TP
-.I pid
-process ID of the thread starting an instruction trace.
-.TP
-.I tid
-thread ID of the thread starting an instruction trace.
-.RE
-.TP
-.BR PERF_RECORD_LOST_SAMPLES " (since Linux 4.2)"
-.\" f38b0dbb491a6987e198aa6b428db8692a6480f8
-When using hardware sampling (such as Intel PEBS) this record
-indicates some number of samples that may have been lost.
-.IP
-.in +4n
-.EX
-struct {
- struct perf_event_header header;
- u64 lost;
- struct sample_id sample_id;
-};
-.EE
-.in
-.RS
-.TP
-.I lost
-the number of potentially lost samples.
-.RE
-.TP
-.BR PERF_RECORD_SWITCH " (since Linux 4.3)"
-.\" commit 45ac1403f564f411c6a383a2448688ba8dd705a4
-This record indicates a context switch has happened.
-The
-.B PERF_RECORD_MISC_SWITCH_OUT
-bit in the
-.I misc
-field indicates whether it was a context switch into
-or away from the current process.
-.IP
-.in +4n
-.EX
-struct {
- struct perf_event_header header;
- struct sample_id sample_id;
-};
-.EE
-.in
-.TP
-.BR PERF_RECORD_SWITCH_CPU_WIDE " (since Linux 4.3)"
-.\" commit 45ac1403f564f411c6a383a2448688ba8dd705a4
-As with
-.B PERF_RECORD_SWITCH
-this record indicates a context switch has happened,
-but it only occurs when sampling in CPU-wide mode
-and provides additional information on the process
-being switched to/from.
-The
-.B PERF_RECORD_MISC_SWITCH_OUT
-bit in the
-.I misc
-field indicates whether it was a context switch into
-or away from the current process.
-.IP
-.in +4n
-.EX
-struct {
- struct perf_event_header header;
- u32 next_prev_pid;
- u32 next_prev_tid;
- struct sample_id sample_id;
-};
-.EE
-.in
-.RS
-.TP
-.I next_prev_pid
-The process ID of the previous (if switching in)
-or next (if switching out) process on the CPU.
-.TP
-.I next_prev_tid
-The thread ID of the previous (if switching in)
-or next (if switching out) thread on the CPU.
-.RE
-.TP
-.BR PERF_RECORD_NAMESPACES " (since Linux 4.11)"
-.\" commit e422267322cd319e2695a535e47c5b1feeac45eb
-This record includes various namespace information of a process.
-.IP
-.in +4n
-.EX
-struct {
- struct perf_event_header header;
- u32 pid;
- u32 tid;
- u64 nr_namespaces;
- struct { u64 dev, inode } [nr_namespaces];
- struct sample_id sample_id;
-};
-.EE
-.in
-.RS
-.TP
-.I pid
-is the process ID
-.TP
-.I tid
-is the thread ID
-.TP
-.I nr_namespace
-is the number of namespaces in this record
-.RE
-.IP
-Each namespace has
-.I dev
-and
-.I inode
-fields and is recorded in the
-fixed position like below:
-.RS
-.TP
-.BR NET_NS_INDEX = 0
-Network namespace
-.TP
-.BR UTS_NS_INDEX = 1
-UTS namespace
-.TP
-.BR IPC_NS_INDEX = 2
-IPC namespace
-.TP
-.BR PID_NS_INDEX = 3
-PID namespace
-.TP
-.BR USER_NS_INDEX = 4
-User namespace
-.TP
-.BR MNT_NS_INDEX = 5
-Mount namespace
-.TP
-.BR CGROUP_NS_INDEX = 6
-Cgroup namespace
-.RE
-.TP
-.BR PERF_RECORD_KSYMBOL " (since Linux 5.0)"
-.\" commit 76193a94522f1d4edf2447a536f3f796ce56343b
-This record indicates kernel symbol register/unregister events.
-.IP
-.in +4n
-.EX
-struct {
- struct perf_event_header header;
- u64 addr;
- u32 len;
- u16 ksym_type;
- u16 flags;
- char name[];
- struct sample_id sample_id;
-};
-.EE
-.in
-.RS
-.TP
-.I addr
-is the address of the kernel symbol.
-.TP
-.I len
-is the length of the kernel symbol.
-.TP
-.I ksym_type
-is the type of the kernel symbol.
-Currently the following types are available:
-.RS
-.TP
-.B PERF_RECORD_KSYMBOL_TYPE_BPF
-The kernel symbol is a BPF function.
-.RE
-.TP
-.I flags
-If the
-.B PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER
-is set, then this event is for unregistering the kernel symbol.
-.RE
-.TP
-.BR PERF_RECORD_BPF_EVENT " (since Linux 5.0)"
-.\" commit 6ee52e2a3fe4ea35520720736e6791df1fb67106
-This record indicates BPF program is loaded or unloaded.
-.IP
-.in +4n
-.EX
-struct {
- struct perf_event_header header;
- u16 type;
- u16 flags;
- u32 id;
- u8 tag[BPF_TAG_SIZE];
- struct sample_id sample_id;
-};
-.EE
-.in
-.RS
-.TP
-.I type
-is one of the following values:
-.RS
-.TP
-.B PERF_BPF_EVENT_PROG_LOAD
-A BPF program is loaded
-.TP
-.B PERF_BPF_EVENT_PROG_UNLOAD
-A BPF program is unloaded
-.RE
-.TP
-.I id
-is the ID of the BPF program.
-.TP
-.I tag
-is the tag of the BPF program.
-Currently,
-.B BPF_TAG_SIZE
-is defined as 8.
-.RE
-.TP
-.BR PERF_RECORD_CGROUP " (since Linux 5.7)"
-.\" commit 96aaab686505c449e24d76e76507290dcc30e008
-This record indicates a new cgroup is created and activated.
-.IP
-.in +4n
-.EX
-struct {
- struct perf_event_header header;
- u64 id;
- char path[];
- struct sample_id sample_id;
-};
-.EE
-.in
-.RS
-.TP
-.I id
-is the cgroup identifier.
-This can be also retrieved by
-.BR name_to_handle_at (2)
-on the cgroup path (as a file handle).
-.TP
-.I path
-is the path of the cgroup from the root.
-.RE
-.TP
-.BR PERF_RECORD_TEXT_POKE " (since Linux 5.8)"
-.\" commit e17d43b93e544f5016c0251d2074c15568d5d963
-This record indicates a change in the kernel text.
-This includes addition and removal of the text
-and the corresponding length is zero in this case.
-.IP
-.in +4n
-.EX
-struct {
- struct perf_event_header header;
- u64 addr;
- u16 old_len;
- u16 new_len;
- u8 bytes[];
- struct sample_id sample_id;
-};
-.EE
-.in
-.RS
-.TP
-.I addr
-is the address of the change
-.TP
-.I old_len
-is the old length
-.TP
-.I new_len
-is the new length
-.TP
-.I bytes
-contains old bytes immediately followed by new bytes.
-.RE
-.RE
-.SS Overflow handling
-Events can be set to notify when a threshold is crossed,
-indicating an overflow.
-Overflow conditions can be captured by monitoring the
-event file descriptor with
-.BR poll (2),
-.BR select (2),
-or
-.BR epoll (7).
-Alternatively, the overflow events can be captured via sa signal handler,
-by enabling I/O signaling on the file descriptor; see the discussion of the
-.B F_SETOWN
-and
-.B F_SETSIG
-operations in
-.BR fcntl (2).
-.P
-Overflows are generated only by sampling events
-.RI ( sample_period
-must have a nonzero value).
-.P
-There are two ways to generate overflow notifications.
-.P
-The first is to set a
-.I wakeup_events
-or
-.I wakeup_watermark
-value that will trigger if a certain number of samples
-or bytes have been written to the mmap ring buffer.
-In this case,
-.B POLL_IN
-is indicated.
-.P
-The other way is by use of the
-.B PERF_EVENT_IOC_REFRESH
-ioctl.
-This ioctl adds to a counter that decrements each time the event overflows.
-When nonzero,
-.B POLL_IN
-is indicated, but
-once the counter reaches 0
-.B POLL_HUP
-is indicated and
-the underlying event is disabled.
-.P
-Refreshing an event group leader refreshes all siblings and
-refreshing with a parameter of 0 currently enables infinite
-refreshes;
-these behaviors are unsupported and should not be relied on.
-.\" See https://lkml.org/lkml/2011/5/24/337
-.P
-Starting with Linux 3.18,
-.\" commit 179033b3e064d2cd3f5f9945e76b0a0f0fbf4883
-.B POLL_HUP
-is indicated if the event being monitored is attached to a different
-process and that process exits.
-.SS rdpmc instruction
-Starting with Linux 3.4 on x86, you can use the
-.\" commit c7206205d00ab375839bd6c7ddb247d600693c09
-.I rdpmc
-instruction to get low-latency reads without having to enter the kernel.
-Note that using
-.I rdpmc
-is not necessarily faster than other methods for reading event values.
-.P
-Support for this can be detected with the
-.I cap_usr_rdpmc
-field in the mmap page; documentation on how
-to calculate event values can be found in that section.
-.P
-Originally, when rdpmc support was enabled, any process (not just ones
-with an active perf event) could use the rdpmc instruction to access
-the counters.
-Starting with Linux 4.0,
-.\" 7911d3f7af14a614617e38245fedf98a724e46a9
-rdpmc support is only allowed if an event is currently enabled
-in a process's context.
-To restore the old behavior, write the value 2 to
-.IR /sys/devices/cpu/rdpmc .
-.SS perf_event ioctl calls
-Various ioctls act on
-.BR perf_event_open ()
-file descriptors:
-.TP
-.B PERF_EVENT_IOC_ENABLE
-This enables the individual event or event group specified by the
-file descriptor argument.
-.IP
-If the
-.B PERF_IOC_FLAG_GROUP
-bit is set in the ioctl argument, then all events in a group are
-enabled, even if the event specified is not the group leader
-(but see BUGS).
-.TP
-.B PERF_EVENT_IOC_DISABLE
-This disables the individual counter or event group specified by the
-file descriptor argument.
-.IP
-Enabling or disabling the leader of a group enables or disables the
-entire group; that is, while the group leader is disabled, none of the
-counters in the group will count.
-Enabling or disabling a member of a group other than the leader
-affects only that counter; disabling a non-leader
-stops that counter from counting but doesn't affect any other counter.
-.IP
-If the
-.B PERF_IOC_FLAG_GROUP
-bit is set in the ioctl argument, then all events in a group are
-disabled, even if the event specified is not the group leader
-(but see BUGS).
-.TP
-.B PERF_EVENT_IOC_REFRESH
-Non-inherited overflow counters can use this
-to enable a counter for a number of overflows specified by the argument,
-after which it is disabled.
-Subsequent calls of this ioctl add the argument value to the current
-count.
-An overflow notification with
-.B POLL_IN
-set will happen on each overflow until the
-count reaches 0; when that happens a notification with
-.B POLL_HUP
-set is sent and the event is disabled.
-Using an argument of 0 is considered undefined behavior.
-.TP
-.B PERF_EVENT_IOC_RESET
-Reset the event count specified by the
-file descriptor argument to zero.
-This resets only the counts; there is no way to reset the
-multiplexing
-.I time_enabled
-or
-.I time_running
-values.
-.IP
-If the
-.B PERF_IOC_FLAG_GROUP
-bit is set in the ioctl argument, then all events in a group are
-reset, even if the event specified is not the group leader
-(but see BUGS).
-.TP
-.B PERF_EVENT_IOC_PERIOD
-This updates the overflow period for the event.
-.IP
-Since Linux 3.7 (on ARM)
-.\" commit 3581fe0ef37ce12ac7a4f74831168352ae848edc
-and Linux 3.14 (all other architectures),
-.\" commit bad7192b842c83e580747ca57104dd51fe08c223
-the new period takes effect immediately.
-On older kernels, the new period did not take effect until
-after the next overflow.
-.IP
-The argument is a pointer to a 64-bit value containing the
-desired new period.
-.IP
-Prior to Linux 2.6.36,
-.\" commit ad0cf3478de8677f720ee06393b3147819568d6a
-this ioctl always failed due to a bug
-in the kernel.
-.TP
-.B PERF_EVENT_IOC_SET_OUTPUT
-This tells the kernel to report event notifications to the specified
-file descriptor rather than the default one.
-The file descriptors must all be on the same CPU.
-.IP
-The argument specifies the desired file descriptor, or \-1 if
-output should be ignored.
-.TP
-.BR PERF_EVENT_IOC_SET_FILTER " (since Linux 2.6.33)"
-.\" commit 6fb2915df7f0747d9044da9dbff5b46dc2e20830
-This adds an ftrace filter to this event.
-.IP
-The argument is a pointer to the desired ftrace filter.
-.TP
-.BR PERF_EVENT_IOC_ID " (since Linux 3.12)"
-.\" commit cf4957f17f2a89984915ea808876d9c82225b862
-This returns the event ID value for the given event file descriptor.
-.IP
-The argument is a pointer to a 64-bit unsigned integer
-to hold the result.
-.TP
-.BR PERF_EVENT_IOC_SET_BPF " (since Linux 4.1)"
-.\" commit 2541517c32be2531e0da59dfd7efc1ce844644f5
-This allows attaching a Berkeley Packet Filter (BPF)
-program to an existing kprobe tracepoint event.
-You need
-.B CAP_PERFMON
-(since Linux 5.8) or
-.B CAP_SYS_ADMIN
-privileges to use this ioctl.
-.IP
-The argument is a BPF program file descriptor that was created by
-a previous
-.BR bpf (2)
-system call.
-.TP
-.BR PERF_EVENT_IOC_PAUSE_OUTPUT " (since Linux 4.7)"
-.\" commit 86e7972f690c1017fd086cdfe53d8524e68c661c
-This allows pausing and resuming the event's ring-buffer.
-A paused ring-buffer does not prevent generation of samples,
-but simply discards them.
-The discarded samples are considered lost, and cause a
-.B PERF_RECORD_LOST
-sample to be generated when possible.
-An overflow signal may still be triggered by the discarded sample
-even though the ring-buffer remains empty.
-.IP
-The argument is an unsigned 32-bit integer.
-A nonzero value pauses the ring-buffer, while a
-zero value resumes the ring-buffer.
-.TP
-.BR PERF_EVENT_MODIFY_ATTRIBUTES " (since Linux 4.17)"
-.\" commit 32ff77e8cc9e66cc4fb38098f64fd54cc8f54573
-This allows modifying an existing event without the overhead
-of closing and reopening a new event.
-Currently this is supported only for breakpoint events.
-.IP
-The argument is a pointer to a
-.I perf_event_attr
-structure containing the updated event settings.
-.TP
-.BR PERF_EVENT_IOC_QUERY_BPF " (since Linux 4.16)"
-.\" commit f371b304f12e31fe30207c41ca7754564e0ea4dc
-This allows querying which Berkeley Packet Filter (BPF)
-programs are attached to an existing kprobe tracepoint.
-You can only attach one BPF program per event, but you can
-have multiple events attached to a tracepoint.
-Querying this value on one tracepoint event returns the ID
-of all BPF programs in all events attached to the tracepoint.
-You need
-.B CAP_PERFMON
-(since Linux 5.8) or
-.B CAP_SYS_ADMIN
-privileges to use this ioctl.
-.IP
-The argument is a pointer to a structure
-.in +4n
-.EX
-struct perf_event_query_bpf {
- __u32 ids_len;
- __u32 prog_cnt;
- __u32 ids[0];
-};
-.EE
-.in
-.IP
-The
-.I ids_len
-field indicates the number of ids that can fit in the provided
-.I ids
-array.
-The
-.I prog_cnt
-value is filled in by the kernel with the number of attached
-BPF programs.
-The
-.I ids
-array is filled with the ID of each attached BPF program.
-If there are more programs than will fit in the array, then the
-kernel will return
-.B ENOSPC
-and
-.I ids_len
-will indicate the number of program IDs that were successfully copied.
-.\"
-.SS Using prctl(2)
-A process can enable or disable all currently open event groups
-using the
-.BR prctl (2)
-.B PR_TASK_PERF_EVENTS_ENABLE
-and
-.B PR_TASK_PERF_EVENTS_DISABLE
-operations.
-This applies only to events created locally by the calling process.
-This does not apply to events created by other processes attached
-to the calling process or inherited events from a parent process.
-Only group leaders are enabled and disabled,
-not any other members of the groups.
-.SS perf_event related configuration files
-Files in
-.I /proc/sys/kernel/
-.RS 4
-.TP
-.I /proc/sys/kernel/perf_event_paranoid
-The
-.I perf_event_paranoid
-file can be set to restrict access to the performance counters.
-.IP
-.PD 0
-.RS
-.TP
-.B 2
-allow only user-space measurements (default since Linux 4.6).
-.\" default changed in commit 0161028b7c8aebef64194d3d73e43bc3b53b5c66
-.TP
-.B 1
-allow both kernel and user measurements (default before Linux 4.6).
-.TP
-.B 0
-allow access to CPU-specific data but not raw tracepoint samples.
-.TP
-.B \-1
-no restrictions.
-.RE
-.PD
-.IP
-The existence of the
-.I perf_event_paranoid
-file is the official method for determining if a kernel supports
-.BR perf_event_open ().
-.TP
-.I /proc/sys/kernel/perf_event_max_sample_rate
-This sets the maximum sample rate.
-Setting this too high can allow
-users to sample at a rate that impacts overall machine performance
-and potentially lock up the machine.
-The default value is
-100000 (samples per second).
-.TP
-.I /proc/sys/kernel/perf_event_max_stack
-.\" Introduced in c5dfd78eb79851e278b7973031b9ca363da87a7e
-This file sets the maximum depth of stack frame entries reported
-when generating a call trace.
-.TP
-.I /proc/sys/kernel/perf_event_mlock_kb
-Maximum number of pages an unprivileged user can
-.BR mlock (2).
-The default is 516 (kB).
-.RE
-.P
-Files in
-.I /sys/bus/event_source/devices/
-.P
-.RS 4
-Since Linux 2.6.34, the kernel supports having multiple PMUs
-available for monitoring.
-Information on how to program these PMUs can be found under
-.IR /sys/bus/event_source/devices/ .
-Each subdirectory corresponds to a different PMU.
-.TP
-.IR /sys/bus/event_source/devices/*/type " (since Linux 2.6.38)"
-.\" commit abe43400579d5de0078c2d3a760e6598e183f871
-This contains an integer that can be used in the
-.I type
-field of
-.I perf_event_attr
-to indicate that you wish to use this PMU.
-.TP
-.IR /sys/bus/event_source/devices/cpu/rdpmc " (since Linux 3.4)"
-.\" commit 0c9d42ed4cee2aa1dfc3a260b741baae8615744f
-If this file is 1, then direct user-space access to the
-performance counter registers is allowed via the rdpmc instruction.
-This can be disabled by echoing 0 to the file.
-.IP
-As of Linux 4.0
-.\" a66734297f78707ce39d756b656bfae861d53f62
-.\" 7911d3f7af14a614617e38245fedf98a724e46a9
-the behavior has changed, so that 1 now means only allow access
-to processes with active perf events, with 2 indicating the old
-allow-anyone-access behavior.
-.TP
-.IR /sys/bus/event_source/devices/*/format/ " (since Linux 3.4)"
-.\" commit 641cc938815dfd09f8fa1ec72deb814f0938ac33
-This subdirectory contains information on the architecture-specific
-subfields available for programming the various
-.I config
-fields in the
-.I perf_event_attr
-struct.
-.IP
-The content of each file is the name of the config field, followed
-by a colon, followed by a series of integer bit ranges separated by
-commas.
-For example, the file
-.I event
-may contain the value
-.I config1:1,6\-10,44
-which indicates that event is an attribute that occupies bits 1,6\[en]10, and 44
-of
-.IR perf_event_attr::config1 .
-.TP
-.IR /sys/bus/event_source/devices/*/events/ " (since Linux 3.4)"
-.\" commit 641cc938815dfd09f8fa1ec72deb814f0938ac33
-This subdirectory contains files with predefined events.
-The contents are strings describing the event settings
-expressed in terms of the fields found in the previously mentioned
-.I ./format/
-directory.
-These are not necessarily complete lists of all events supported by
-a PMU, but usually a subset of events deemed useful or interesting.
-.IP
-The content of each file is a list of attribute names
-separated by commas.
-Each entry has an optional value (either hex or decimal).
-If no value is specified, then it is assumed to be a single-bit
-field with a value of 1.
-An example entry may look like this:
-.IR event=0x2,inv,ldlat=3 .
-.TP
-.I /sys/bus/event_source/devices/*/uevent
-This file is the standard kernel device interface
-for injecting hotplug events.
-.TP
-.IR /sys/bus/event_source/devices/*/cpumask " (since Linux 3.7)"
-.\" commit 314d9f63f385096580e9e2a06eaa0745d92fe4ac
-The
-.I cpumask
-file contains a comma-separated list of integers that
-indicate a representative CPU number for each socket (package)
-on the motherboard.
-This is needed when setting up uncore or northbridge events, as
-those PMUs present socket-wide events.
-.RE
-.SH RETURN VALUE
-On success,
-.BR perf_event_open ()
-returns the new file descriptor.
-On error, \-1 is returned and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-The errors returned by
-.BR perf_event_open ()
-can be inconsistent, and may
-vary across processor architectures and performance monitoring units.
-.TP
-.B E2BIG
-Returned if the
-.I perf_event_attr
-.I size
-value is too small
-(smaller than
-.BR PERF_ATTR_SIZE_VER0 ),
-too big (larger than the page size),
-or larger than the kernel supports and the extra bytes are not zero.
-When
-.B E2BIG
-is returned, the
-.I perf_event_attr
-.I size
-field is overwritten by the kernel to be the size of the structure
-it was expecting.
-.TP
-.B EACCES
-Returned when the requested event requires
-.B CAP_PERFMON
-(since Linux 5.8) or
-.B CAP_SYS_ADMIN
-permissions (or a more permissive perf_event paranoid setting).
-Some common cases where an unprivileged process
-may encounter this error:
-attaching to a process owned by a different user;
-monitoring all processes on a given CPU (i.e., specifying the
-.I pid
-argument as \-1);
-and not setting
-.I exclude_kernel
-when the paranoid setting requires it.
-.TP
-.B EBADF
-Returned if the
-.I group_fd
-file descriptor is not valid, or, if
-.B PERF_FLAG_PID_CGROUP
-is set,
-the cgroup file descriptor in
-.I pid
-is not valid.
-.TP
-.BR EBUSY " (since Linux 4.1)"
-.\" bed5b25ad9c8a2f5d735ef0bc746ec870c01c1b0
-Returned if another event already has exclusive
-access to the PMU.
-.TP
-.B EFAULT
-Returned if the
-.I attr
-pointer points at an invalid memory address.
-.TP
-.B EINTR
-Returned when trying to mix perf and ftrace handling
-for a uprobe.
-.TP
-.B EINVAL
-Returned if the specified event is invalid.
-There are many possible reasons for this.
-A not-exhaustive list:
-.I sample_freq
-is higher than the maximum setting;
-the
-.I cpu
-to monitor does not exist;
-.I read_format
-is out of range;
-.I sample_type
-is out of range;
-the
-.I flags
-value is out of range;
-.I exclusive
-or
-.I pinned
-set and the event is not a group leader;
-the event
-.I config
-values are out of range or set reserved bits;
-the generic event selected is not supported; or
-there is not enough room to add the selected event.
-.TP
-.B EMFILE
-Each opened event uses one file descriptor.
-If a large number of events are opened,
-the per-process limit on the number of open file descriptors will be reached,
-and no more events can be created.
-.TP
-.B ENODEV
-Returned when the event involves a feature not supported
-by the current CPU.
-.TP
-.B ENOENT
-Returned if the
-.I type
-setting is not valid.
-This error is also returned for
-some unsupported generic events.
-.TP
-.B ENOSPC
-Prior to Linux 3.3, if there was not enough room for the event,
-.\" commit aa2bc1ade59003a379ffc485d6da2d92ea3370a6
-.B ENOSPC
-was returned.
-In Linux 3.3, this was changed to
-.BR EINVAL .
-.B ENOSPC
-is still returned if you try to add more breakpoint events
-than supported by the hardware.
-.TP
-.B ENOSYS
-Returned if
-.B PERF_SAMPLE_STACK_USER
-is set in
-.I sample_type
-and it is not supported by hardware.
-.TP
-.B EOPNOTSUPP
-Returned if an event requiring a specific hardware feature is
-requested but there is no hardware support.
-This includes requesting low-skid events if not supported,
-branch tracing if it is not available, sampling if no PMU
-interrupt is available, and branch stacks for software events.
-.TP
-.BR EOVERFLOW " (since Linux 4.8)"
-.\" 97c79a38cd454602645f0470ffb444b3b75ce574
-Returned if
-.B PERF_SAMPLE_CALLCHAIN
-is requested and
-.I sample_max_stack
-is larger than the maximum specified in
-.IR /proc/sys/kernel/perf_event_max_stack .
-.TP
-.B EPERM
-Returned on many (but not all) architectures when an unsupported
-.IR exclude_hv ", " exclude_idle ", " exclude_user ", or " exclude_kernel
-setting is specified.
-.IP
-It can also happen, as with
-.BR EACCES ,
-when the requested event requires
-.B CAP_PERFMON
-(since Linux 5.8) or
-.B CAP_SYS_ADMIN
-permissions (or a more permissive perf_event paranoid setting).
-This includes setting a breakpoint on a kernel address,
-and (since Linux 3.13) setting a kernel function-trace tracepoint.
-.\" commit a4e95fc2cbb31d70a65beffeaf8773f881328c34
-.TP
-.B ESRCH
-Returned if attempting to attach to a process that does not exist.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-.BR perf_event_open ()
-was introduced in Linux 2.6.31 but was called
-.\" commit 0793a61d4df8daeac6492dbf8d2f3e5713caae5e
-.BR perf_counter_open ().
-It was renamed in Linux 2.6.32.
-.\" commit cdd6c482c9ff9c55475ee7392ec8f672eddb7be6
-.SH NOTES
-The official way of knowing if
-.BR perf_event_open ()
-support is enabled is checking
-for the existence of the file
-.IR /proc/sys/kernel/perf_event_paranoid .
-.P
-.B CAP_PERFMON
-capability (since Linux 5.8) provides secure approach to
-performance monitoring and observability operations in a system
-according to the principal of least privilege (POSIX IEEE 1003.1e).
-Accessing system performance monitoring and observability operations
-using
-.B CAP_PERFMON
-rather than the much more powerful
-.B CAP_SYS_ADMIN
-excludes chances to misuse credentials and makes operations more secure.
-.B CAP_SYS_ADMIN
-usage for secure system performance monitoring and observability
-is discouraged in favor of the
-.B CAP_PERFMON
-capability.
-.SH BUGS
-The
-.B F_SETOWN_EX
-option to
-.BR fcntl (2)
-is needed to properly get overflow signals in threads.
-This was introduced in Linux 2.6.32.
-.\" commit ba0a6c9f6fceed11c6a99e8326f0477fe383e6b5
-.P
-Prior to Linux 2.6.33 (at least for x86),
-.\" commit b690081d4d3f6a23541493f1682835c3cd5c54a1
-the kernel did not check
-if events could be scheduled together until read time.
-The same happens on all known kernels if the NMI watchdog is enabled.
-This means to see if a given set of events works you have to
-.BR perf_event_open (),
-start, then read before you know for sure you
-can get valid measurements.
-.P
-Prior to Linux 2.6.34,
-.\" FIXME . cannot find a kernel commit for this one
-event constraints were not enforced by the kernel.
-In that case, some events would silently return "0" if the kernel
-scheduled them in an improper counter slot.
-.P
-Prior to Linux 2.6.34, there was a bug when multiplexing where the
-wrong results could be returned.
-.\" commit 45e16a6834b6af098702e5ea6c9a40de42ff77d8
-.P
-Kernels from Linux 2.6.35 to Linux 2.6.39 can quickly crash the kernel if
-"inherit" is enabled and many threads are started.
-.\" commit 38b435b16c36b0d863efcf3f07b34a6fac9873fd
-.P
-Prior to Linux 2.6.35,
-.\" commit 050735b08ca8a016bbace4445fa025b88fee770b
-.B PERF_FORMAT_GROUP
-did not work with attached processes.
-.P
-There is a bug in the kernel code between
-Linux 2.6.36 and Linux 3.0 that ignores the
-"watermark" field and acts as if a wakeup_event
-was chosen if the union has a
-nonzero value in it.
-.\" commit 4ec8363dfc1451f8c8f86825731fe712798ada02
-.P
-From Linux 2.6.31 to Linux 3.4, the
-.B PERF_IOC_FLAG_GROUP
-ioctl argument was broken and would repeatedly operate
-on the event specified rather than iterating across
-all sibling events in a group.
-.\" commit 724b6daa13e100067c30cfc4d1ad06629609dc4e
-.P
-From Linux 3.4 to Linux 3.11, the mmap
-.\" commit fa7315871046b9a4c48627905691dbde57e51033
-.I cap_usr_rdpmc
-and
-.I cap_usr_time
-bits mapped to the same location.
-Code should migrate to the new
-.I cap_user_rdpmc
-and
-.I cap_user_time
-fields instead.
-.P
-Always double-check your results!
-Various generalized events have had wrong values.
-For example, retired branches measured
-the wrong thing on AMD machines until Linux 2.6.35.
-.\" commit f287d332ce835f77a4f5077d2c0ef1e3f9ea42d2
-.SH EXAMPLES
-The following is a short example that measures the total
-instruction count of a call to
-.BR printf (3).
-.P
-.\" SRC BEGIN (perf_event_open.c)
-.EX
-#include <linux/perf_event.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-#include <sys/syscall.h>
-#include <unistd.h>
-\&
-static long
-perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
- int cpu, int group_fd, unsigned long flags)
-{
- int ret;
-\&
- ret = syscall(SYS_perf_event_open, hw_event, pid, cpu,
- group_fd, flags);
- return ret;
-}
-\&
-int
-main(void)
-{
- int fd;
- long long count;
- struct perf_event_attr pe;
-\&
- memset(&pe, 0, sizeof(pe));
- pe.type = PERF_TYPE_HARDWARE;
- pe.size = sizeof(pe);
- pe.config = PERF_COUNT_HW_INSTRUCTIONS;
- pe.disabled = 1;
- pe.exclude_kernel = 1;
- pe.exclude_hv = 1;
-\&
- fd = perf_event_open(&pe, 0, \-1, \-1, 0);
- if (fd == \-1) {
- fprintf(stderr, "Error opening leader %llx\en", pe.config);
- exit(EXIT_FAILURE);
- }
-\&
- ioctl(fd, PERF_EVENT_IOC_RESET, 0);
- ioctl(fd, PERF_EVENT_IOC_ENABLE, 0);
-\&
- printf("Measuring instruction count for this printf\en");
-\&
- ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
- read(fd, &count, sizeof(count));
-\&
- printf("Used %lld instructions\en", count);
-\&
- close(fd);
-}
-.EE
-.\" SRC END
-.SH SEE ALSO
-.BR perf (1),
-.BR fcntl (2),
-.BR mmap (2),
-.BR open (2),
-.BR prctl (2),
-.BR read (2)
-.P
-.I Documentation/admin\-guide/perf\-security.rst
-in the kernel source tree
diff --git a/man2/perfmonctl.2 b/man2/perfmonctl.2
deleted file mode 100644
index d4bf56b9a..000000000
--- a/man2/perfmonctl.2
+++ /dev/null
@@ -1,193 +0,0 @@
-.\" Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
-.\" and Copyright (C) 2013 Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Written by Ivana Varekova <varekova@redhat.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.TH perfmonctl 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-perfmonctl \- interface to IA-64 performance monitoring unit
-.SH SYNOPSIS
-.nf
-.B #include <syscall.h>
-.B #include <perfmon.h>
-.P
-.BI "long perfmonctl(int " fd ", int " cmd ", void " arg [. narg "], int " narg ");"
-.fi
-.P
-.IR Note :
-There is no glibc wrapper for this system call; see HISTORY.
-.SH DESCRIPTION
-The IA-64-specific
-.BR perfmonctl ()
-system call provides an interface to the
-PMU (performance monitoring unit).
-The PMU consists of PMD (performance monitoring data) registers and
-PMC (performance monitoring control) registers,
-which gather hardware statistics.
-.P
-.BR perfmonctl ()
-applies the operation
-.I cmd
-to the input arguments specified by
-.IR arg .
-The number of arguments is defined by \fInarg\fR.
-The
-.I fd
-argument specifies the perfmon context to operate on.
-.P
-Supported values for
-.I cmd
-are:
-.TP
-.B PFM_CREATE_CONTEXT
-.nf
-.BI "perfmonctl(int " fd ", PFM_CREATE_CONTEXT, pfarg_context_t *" ctxt ", 1);"
-.fi
-Set up a context.
-.IP
-The
-.I fd
-parameter is ignored.
-A new perfmon context is created as specified in
-.I ctxt
-and its file descriptor is returned in \fIctxt\->ctx_fd\fR.
-.IP
-The file descriptor can be used in subsequent calls to
-.BR perfmonctl ()
-and can be used to read event notifications (type
-.IR pfm_msg_t )
-using
-.BR read (2).
-The file descriptor is pollable using
-.BR select (2),
-.BR poll (2),
-and
-.BR epoll (7).
-.IP
-The context can be destroyed by calling
-.BR close (2)
-on the file descriptor.
-.TP
-.B PFM_WRITE_PMCS
-.\" pfm_write_pmcs()
-.nf
-.BI "perfmonctl(int " fd ", PFM_WRITE_PMCS, pfarg_reg_t *" pmcs ", n);"
-.fi
-Set PMC registers.
-.TP
-.B PFM_WRITE_PMDS
-.nf
-.BI "perfmonctl(int " fd ", PFM_WRITE_PMDS, pfarg_reg_t *" pmds ", n);"
-.fi
-.\" pfm_write_pmds()
-Set PMD registers.
-.TP
-.B PFM_READ_PMDS
-.\" pfm_read_pmds()
-.nf
-.BI "perfmonctl(int " fd ", PFM_READ_PMDS, pfarg_reg_t *" pmds ", n);"
-.fi
-Read PMD registers.
-.TP
-.B PFM_START
-.\" pfm_start()
-.nf
-.\" .BI "perfmonctl(int " fd ", PFM_START, arg, 1);
-.BI "perfmonctl(int " fd ", PFM_START, NULL, 0);"
-.fi
-Start monitoring.
-.TP
-.B PFM_STOP
-.\" pfm_stop()
-.nf
-.BI "perfmonctl(int " fd ", PFM_STOP, NULL, 0);"
-.fi
-Stop monitoring.
-.TP
-.B PFM_LOAD_CONTEXT
-.\" pfm_context_load()
-.nf
-.BI "perfmonctl(int " fd ", PFM_LOAD_CONTEXT, pfarg_load_t *" largs ", 1);"
-.fi
-Attach the context to a thread.
-.TP
-.B PFM_UNLOAD_CONTEXT
-.\" pfm_context_unload()
-.nf
-.BI "perfmonctl(int " fd ", PFM_UNLOAD_CONTEXT, NULL, 0);"
-.fi
-Detach the context from a thread.
-.TP
-.B PFM_RESTART
-.\" pfm_restart()
-.nf
-.BI "perfmonctl(int " fd ", PFM_RESTART, NULL, 0);"
-.fi
-Restart monitoring after receiving an overflow notification.
-.TP
-.B PFM_GET_FEATURES
-.\" pfm_get_features()
-.nf
-.BI "perfmonctl(int " fd ", PFM_GET_FEATURES, pfarg_features_t *" arg ", 1);"
-.fi
-.TP
-.B PFM_DEBUG
-.\" pfm_debug()
-.nf
-.BI "perfmonctl(int " fd ", PFM_DEBUG, " val ", 0);"
-.fi
-If
-.I val
-is nonzero, enable debugging mode, otherwise disable.
-.TP
-.B PFM_GET_PMC_RESET_VAL
-.\" pfm_get_pmc_reset()
-.nf
-.BI "perfmonctl(int " fd ", PFM_GET_PMC_RESET_VAL, pfarg_reg_t *" req ", n);"
-.fi
-Reset PMC registers to default values.
-.\"
-.\"
-.\" .TP
-.\" .B PFM_CREATE_EVTSETS
-.\"
-.\" create or modify event sets
-.\" .nf
-.\" .BI "perfmonctl(int " fd ", PFM_CREATE_EVTSETS, pfarg_setdesc_t *desc , n);
-.\" .fi
-.\" .TP
-.\" .B PFM_DELETE_EVTSETS
-.\" delete event sets
-.\" .nf
-.\" .BI "perfmonctl(int " fd ", PFM_DELETE_EVTSET, pfarg_setdesc_t *desc , n);
-.\" .fi
-.\" .TP
-.\" .B PFM_GETINFO_EVTSETS
-.\" get information about event sets
-.\" .nf
-.\" .BI "perfmonctl(int " fd ", PFM_GETINFO_EVTSETS, pfarg_setinfo_t *info, n);
-.\" .fi
-.SH RETURN VALUE
-.BR perfmonctl ()
-returns zero when the operation is successful.
-On error, \-1 is returned and
-.I errno
-is set to indicate the error.
-.SH STANDARDS
-Linux on IA-64.
-.SH HISTORY
-Added in Linux 2.4;
-.\" commit ecf5b72d5f66af843f189dfe9ce31598c3e48ad7
-removed in Linux 5.10.
-.P
-This system call was broken for many years,
-and ultimately removed in Linux 5.10.
-.P
-glibc does not provide a wrapper for this system call;
-on kernels where it exists, call it using
-.BR syscall (2).
-.SH SEE ALSO
-.BR gprof (1)
-.P
-The perfmon2 interface specification
diff --git a/man2/personality.2 b/man2/personality.2
deleted file mode 100644
index 976f27c6d..000000000
--- a/man2/personality.2
+++ /dev/null
@@ -1,296 +0,0 @@
-.\" Copyright (C) 1995, Thomas K. Dyas <tdyas@eden.rutgers.edu>
-.\" and Copyright (C) 2016, Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Created Sat Aug 21 1995 Thomas K. Dyas <tdyas@eden.rutgers.edu>
-.\"
-.\" typo corrected, aeb, 950825
-.\" added layout change from joey, 960722
-.\" changed prototype, documented 0xffffffff, aeb, 030101
-.\" Modified 2004-11-03 patch from Martin Schulze <joey@infodrom.org>
-.\"
-.TH personality 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-personality \- set the process execution domain
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/personality.h>
-.P
-.BI "int personality(unsigned long " persona );
-.fi
-.SH DESCRIPTION
-Linux supports different execution domains, or personalities, for each
-process.
-Among other things, execution domains tell Linux how to map
-signal numbers into signal actions.
-The execution domain system allows
-Linux to provide limited support for binaries compiled under other
-UNIX-like operating systems.
-.P
-If
-.I persona
-is not
-0xffffffff, then
-.BR personality ()
-sets the caller's execution domain to the value specified by
-.IR persona .
-Specifying
-.I persona
-as 0xffffffff provides a way of retrieving
-the current persona without changing it.
-.P
-A list of the available execution domains can be found in
-.IR <sys/personality.h> .
-The execution domain is a 32-bit value in which the top three
-bytes are set aside for flags that cause the kernel to modify the
-behavior of certain system calls so as to emulate historical or
-architectural quirks.
-The least significant byte is a value defining the personality
-the kernel should assume.
-The flag values are as follows:
-.TP
-.BR ADDR_COMPAT_LAYOUT " (since Linux 2.6.9)"
-With this flag set, provide legacy virtual address space layout.
-.TP
-.BR ADDR_NO_RANDOMIZE " (since Linux 2.6.12)"
-With this flag set, disable address-space-layout randomization.
-.TP
-.BR ADDR_LIMIT_32BIT " (since Linux 2.2)"
-Limit the address space to 32 bits.
-.TP
-.BR ADDR_LIMIT_3GB " (since Linux 2.4.0)"
-With this flag set, use 0xc0000000 as the offset at which to search
-a virtual memory chunk on
-.BR mmap (2);
-otherwise use 0xffffe000.
-Applies to 32-bit x86 processes only.
-.TP
-.BR FDPIC_FUNCPTRS " (since Linux 2.6.11)"
-User-space function pointers to signal handlers point
-to descriptors.
-Applies only to ARM if BINFMT_ELF_FDPIC and SuperH.
-.TP
-.BR MMAP_PAGE_ZERO " (since Linux 2.4.0)"
-Map page 0 as read-only
-(to support binaries that depend on this SVr4 behavior).
-.TP
-.BR READ_IMPLIES_EXEC " (since Linux 2.6.8)"
-With this flag set,
-.B PROT_READ
-implies
-.B PROT_EXEC
-for
-.BR mmap (2).
-.TP
-.BR SHORT_INODE " (since Linux 2.4.0)"
-No effect.
-.TP
-.BR STICKY_TIMEOUTS " (since Linux 1.2.0)"
-With this flag set,
-.BR select (2),
-.BR pselect (2),
-and
-.BR ppoll (2)
-do not modify the returned timeout argument when
-interrupted by a signal handler.
-.TP
-.BR UNAME26 " (since Linux 3.1)"
-Have
-.BR uname (2)
-report a 2.6.(40+x) version number rather than a MAJOR.x version number.
-Added as a stopgap measure to support broken applications that
-could not handle the
-kernel version-numbering switch from Linux 2.6.x to Linux 3.x.
-.TP
-.BR WHOLE_SECONDS " (since Linux 1.2.0)"
-No effect.
-.P
-The available execution domains are:
-.TP
-.BR PER_BSD " (since Linux 1.2.0)"
-BSD. (No effects.)
-.TP
-.BR PER_HPUX " (since Linux 2.4)"
-Support for 32-bit HP/UX.
-This support was never complete, and was dropped so that since Linux 4.0,
-this value has no effect.
-.TP
-.BR PER_IRIX32 " (since Linux 2.2)"
-IRIX 5 32-bit.
-Never fully functional; support dropped in Linux 2.6.27.
-Implies
-.BR STICKY_TIMEOUTS .
-.TP
-.BR PER_IRIX64 " (since Linux 2.2)"
-IRIX 6 64-bit.
-Implies
-.BR STICKY_TIMEOUTS ;
-otherwise no effect.
-.TP
-.BR PER_IRIXN32 " (since Linux 2.2)"
-IRIX 6 new 32-bit.
-Implies
-.BR STICKY_TIMEOUTS ;
-otherwise no effect.
-.TP
-.BR PER_ISCR4 " (since Linux 1.2.0)"
-Implies
-.BR STICKY_TIMEOUTS ;
-otherwise no effect.
-.TP
-.BR PER_LINUX " (since Linux 1.2.0)"
-Linux.
-.TP
-.BR PER_LINUX32 " (since Linux 2.2)"
-.BR uname (2)
-returns the name of the 32-bit architecture in the
-.I machine
-field ("i686" instead of "x86_64", &c.).
-.IP
-Under ia64 (Itanium), processes with this personality don't have the
-O_LARGEFILE
-.BR open (2)
-flag forced.
-.IP
-Under 64-bit ARM, setting this personality is forbidden if
-.BR execve (2)ing
-a 32-bit process would also be forbidden
-(cf. the allow_mismatched_32bit_el0 kernel parameter and
-.IR Documentation/arm64/asymmetric-32bit.rst ).
-.TP
-.BR PER_LINUX32_3GB " (since Linux 2.4)"
-Same as
-.BR PER_LINUX32 ,
-but implies
-.BR ADDR_LIMIT_3GB .
-.TP
-.BR PER_LINUX_32BIT " (since Linux 2.0)"
-Same as
-.BR PER_LINUX ,
-but implies
-.BR ADDR_LIMIT_32BIT .
-.TP
-.BR PER_LINUX_FDPIC " (since Linux 2.6.11)"
-Same as
-.BR PER_LINUX ,
-but implies
-.BR FDPIC_FUNCPTRS .
-.TP
-.BR PER_OSF4 " (since Linux 2.4)"
-OSF/1 v4.
-.\" commit 987f20a9dcce3989e48d87cff3952c095c994445
-No effect since Linux 6.1, which removed a.out binary support.
-Before, on alpha,
-.\" Following is from a comment in arch/alpha/kernel/osf_sys.c
-would clear top 32 bits of iov_len in the user's buffer for
-compatibility with old versions of OSF/1 where iov_len
-was defined as.
-.IR int .
-.TP
-.BR PER_OSR5 " (since Linux 2.4)"
-SCO OpenServer 5.
-Implies
-.B STICKY_TIMEOUTS
-and
-.BR WHOLE_SECONDS ;
-otherwise no effect.
-.TP
-.BR PER_RISCOS " (since Linux 2.3.7; macro since Linux 2.3.13)"
-Acorn RISC OS/Arthur (MIPS).
-No effect.
-.\" commit 125ec7b4e90cbae4eed5a7ff1ee479cc331dcf3c
-Up to Linux v4.0, would set the emulation altroot to
-.I /usr/gnemul/riscos
-(cf.\&
-.BR PER_SUNOS ,
-below).
-Before then, up to Linux 2.6.3, just Arthur emulation.
-.TP
-.BR PER_SCOSVR3 " (since Linux 1.2.0)"
-SCO UNIX System V Release 3.
-Same as
-.BR PER_OSR5 ,
-but also implies
-.BR SHORT_INODE .
-.TP
-.BR PER_SOLARIS " (since Linux 2.4)"
-Solaris.
-Implies
-.BR STICKY_TIMEOUTS ;
-otherwise no effect.
-.TP
-.BR PER_SUNOS " (since Linux 2.4.0)"
-Sun OS.
-Same as
-.BR PER_BSD ,
-but implies
-.BR STICKY_TIMEOUTS .
-Prior to Linux 2.6.26,
-diverted library and dynamic linker searches to
-.IR /usr/gnemul .
-Buggy, largely unmaintained, and almost entirely unused.
-.TP
-.BR PER_SVR3 " (since Linux 1.2.0)"
-AT&T UNIX System V Release 3.
-Implies
-.B STICKY_TIMEOUTS
-and
-.BR SHORT_INODE ;
-otherwise no effect.
-.TP
-.BR PER_SVR4 " (since Linux 1.2.0)"
-AT&T UNIX System V Release 4.
-Implies
-.B STICKY_TIMEOUTS
-and
-.BR MMAP_PAGE_ZERO ;
-otherwise no effect.
-.TP
-.BR PER_UW7 " (since Linux 2.4)"
-UnixWare 7.
-Implies
-.B STICKY_TIMEOUTS
-and
-.BR MMAP_PAGE_ZERO ;
-otherwise no effect.
-.TP
-.BR PER_WYSEV386 " (since Linux 1.2.0)"
-WYSE UNIX System V/386.
-Implies
-.B STICKY_TIMEOUTS
-and
-.BR SHORT_INODE ;
-otherwise no effect.
-.TP
-.BR PER_XENIX " (since Linux 1.2.0)"
-XENIX.
-Implies
-.B STICKY_TIMEOUTS
-and
-.BR SHORT_INODE ;
-otherwise no effect.
-.SH RETURN VALUE
-On success, the previous
-.I persona
-is returned.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EINVAL
-The kernel was unable to change the personality.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 1.1.20,
-.\" (and thus first in a stable kernel release with Linux 1.2.0)
-glibc 2.3.
-.\" personality wrapper first appeared in glibc 1.90,
-.\" <sys/personality.h> was added later in glibc 2.2.91.
-.SH SEE ALSO
-.BR setarch (8)
diff --git a/man2/phys.2 b/man2/phys.2
deleted file mode 100644
index 5d25ea6ce..000000000
--- a/man2/phys.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/unimplemented.2
diff --git a/man2/pidfd_getfd.2 b/man2/pidfd_getfd.2
deleted file mode 100644
index ba92823cb..000000000
--- a/man2/pidfd_getfd.2
+++ /dev/null
@@ -1,144 +0,0 @@
-.\" Copyright (c) 2020 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.TH pidfd_getfd 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-pidfd_getfd \- obtain a duplicate of another process's file descriptor
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */"
-.B #include <unistd.h>
-.P
-.BI "int syscall(SYS_pidfd_getfd, int " pidfd ", int " targetfd ,
-.BI " unsigned int " flags );
-.fi
-.P
-.IR Note :
-glibc provides no wrapper for
-.BR pidfd_getfd (),
-necessitating the use of
-.BR syscall (2).
-.SH DESCRIPTION
-The
-.BR pidfd_getfd ()
-system call allocates a new file descriptor in the calling process.
-This new file descriptor is a duplicate of an existing file descriptor,
-.IR targetfd ,
-in the process referred to by the PID file descriptor
-.IR pidfd .
-.P
-The duplicate file descriptor refers to the same open file description (see
-.BR open (2))
-as the original file descriptor in the process referred to by
-.IR pidfd .
-The two file descriptors thus share file status flags and file offset.
-Furthermore, operations on the underlying file object
-(for example, assigning an address to a socket object using
-.BR bind (2))
-can equally be performed via the duplicate file descriptor.
-.P
-The close-on-exec flag
-.RB ( FD_CLOEXEC ;
-see
-.BR fcntl (2))
-is set on the file descriptor returned by
-.BR pidfd_getfd ().
-.P
-The
-.I flags
-argument is reserved for future use.
-Currently, it must be specified as 0.
-.P
-Permission to duplicate another process's file descriptor
-is governed by a ptrace access mode
-.B PTRACE_MODE_ATTACH_REALCREDS
-check (see
-.BR ptrace (2)).
-.SH RETURN VALUE
-On success,
-.BR pidfd_getfd ()
-returns a file descriptor (a nonnegative integer).
-On error, \-1 is returned and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EBADF
-.I pidfd
-is not a valid PID file descriptor.
-.TP
-.B EBADF
-.I targetfd
-is not an open file descriptor in the process referred to by
-.IR pidfd .
-.TP
-.B EINVAL
-.I flags
-is not 0.
-.TP
-.B EMFILE
-The per-process limit on the number of open file descriptors has been reached
-(see the description of
-.B RLIMIT_NOFILE
-in
-.BR getrlimit (2)).
-.TP
-.B ENFILE
-The system-wide limit on the total number of open files has been reached.
-.TP
-.B EPERM
-The calling process did not have
-.B PTRACE_MODE_ATTACH_REALCREDS
-permissions (see
-.BR ptrace (2))
-over the process referred to by
-.IR pidfd .
-.TP
-.B ESRCH
-The process referred to by
-.I pidfd
-does not exist
-(i.e., it has terminated and been waited on).
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 5.6.
-.\" commit 8649c322f75c96e7ced2fec201e123b2b073bf09
-.SH NOTES
-For a description of PID file descriptors, see
-.BR pidfd_open (2).
-.P
-The effect of
-.BR pidfd_getfd ()
-is similar to the use of
-.B SCM_RIGHTS
-messages described in
-.BR unix (7),
-but differs in the following respects:
-.IP \[bu] 3
-In order to pass a file descriptor using an
-.B SCM_RIGHTS
-message,
-the two processes must first establish a UNIX domain socket connection.
-.IP \[bu]
-The use of
-.B SCM_RIGHTS
-requires cooperation on the part of the process whose
-file descriptor is being copied.
-By contrast, no such cooperation is necessary when using
-.BR pidfd_getfd ().
-.IP \[bu]
-The ability to use
-.BR pidfd_getfd ()
-is restricted by a
-.B PTRACE_MODE_ATTACH_REALCREDS
-ptrace access mode check.
-.SH SEE ALSO
-.BR clone3 (2),
-.BR dup (2),
-.BR kcmp (2),
-.BR pidfd_open (2)
diff --git a/man2/pidfd_open.2 b/man2/pidfd_open.2
deleted file mode 100644
index 02c741f2b..000000000
--- a/man2/pidfd_open.2
+++ /dev/null
@@ -1,269 +0,0 @@
-.\" Copyright (c) 2019 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.TH pidfd_open 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-pidfd_open \- obtain a file descriptor that refers to a process
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */"
-.B #include <unistd.h>
-.P
-.BI "int syscall(SYS_pidfd_open, pid_t " pid ", unsigned int " flags );
-.fi
-.P
-.IR Note :
-glibc provides no wrapper for
-.BR pidfd_open (),
-necessitating the use of
-.BR syscall (2).
-.SH DESCRIPTION
-The
-.BR pidfd_open ()
-system call creates a file descriptor that refers to
-the process whose PID is specified in
-.IR pid .
-The file descriptor is returned as the function result;
-the close-on-exec flag is set on the file descriptor.
-.P
-The
-.I flags
-argument either has the value 0, or contains the following flag:
-.TP
-.BR PIDFD_NONBLOCK " (since Linux 5.10)"
-.\" commit 4da9af0014b51c8b015ed8c622440ef28912efe6
-Return a nonblocking file descriptor.
-If the process referred to by the file descriptor has not yet terminated,
-then an attempt to wait on the file descriptor using
-.BR waitid (2)
-will immediately return the error
-.B EAGAIN
-rather than blocking.
-.SH RETURN VALUE
-On success,
-.BR pidfd_open ()
-returns a file descriptor (a nonnegative integer).
-On error, \-1 is returned and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EINVAL
-.I flags
-is not valid.
-.TP
-.B EINVAL
-.I pid
-is not valid.
-.TP
-.B EMFILE
-The per-process limit on the number of open file descriptors has been reached
-(see the description of
-.B RLIMIT_NOFILE
-in
-.BR getrlimit (2)).
-.TP
-.B ENFILE
-The system-wide limit on the total number of open files has been reached.
-.TP
-.B ENODEV
-The anonymous inode filesystem is not available in this kernel.
-.TP
-.B ENOMEM
-Insufficient kernel memory was available.
-.TP
-.B ESRCH
-The process specified by
-.I pid
-does not exist.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 5.3.
-.SH NOTES
-The following code sequence can be used to obtain a file descriptor
-for the child of
-.BR fork (2):
-.P
-.in +4n
-.EX
-pid = fork();
-if (pid > 0) { /* If parent */
- pidfd = pidfd_open(pid, 0);
- ...
-}
-.EE
-.in
-.P
-Even if the child has already terminated by the time of the
-.BR pidfd_open ()
-call, its PID will not have been recycled and the returned
-file descriptor will refer to the resulting zombie process.
-Note, however, that this is guaranteed only if the following
-conditions hold true:
-.IP \[bu] 3
-the disposition of
-.B SIGCHLD
-has not been explicitly set to
-.B SIG_IGN
-(see
-.BR sigaction (2));
-.IP \[bu]
-the
-.B SA_NOCLDWAIT
-flag was not specified while establishing a handler for
-.B SIGCHLD
-or while setting the disposition of that signal to
-.B SIG_DFL
-(see
-.BR sigaction (2));
-and
-.IP \[bu]
-the zombie process was not reaped elsewhere in the program
-(e.g., either by an asynchronously executed signal handler or by
-.BR wait (2)
-or similar in another thread).
-.P
-If any of these conditions does not hold,
-then the child process (along with a PID file descriptor that refers to it)
-should instead be created using
-.BR clone (2)
-with the
-.B CLONE_PIDFD
-flag.
-.\"
-.SS Use cases for PID file descriptors
-A PID file descriptor returned by
-.BR pidfd_open ()
-(or by
-.BR clone (2)
-with the
-.B CLONE_PID
-flag) can be used for the following purposes:
-.IP \[bu] 3
-The
-.BR pidfd_send_signal (2)
-system call can be used to send a signal to the process referred to by
-a PID file descriptor.
-.IP \[bu]
-A PID file descriptor can be monitored using
-.BR poll (2),
-.BR select (2),
-and
-.BR epoll (7).
-When the process that it refers to terminates,
-these interfaces indicate the file descriptor as readable.
-Note, however, that in the current implementation,
-nothing can be read from the file descriptor
-.RB ( read (2)
-on the file descriptor fails with the error
-.BR EINVAL ).
-.IP \[bu]
-If the PID file descriptor refers to a child of the calling process,
-then it can be waited on using
-.BR waitid (2).
-.IP \[bu]
-The
-.BR pidfd_getfd (2)
-system call can be used to obtain a duplicate of a file descriptor
-of another process referred to by a PID file descriptor.
-.IP \[bu]
-A PID file descriptor can be used as the argument of
-.BR setns (2)
-in order to move into one or more of the same namespaces as the process
-referred to by the file descriptor.
-.IP \[bu]
-A PID file descriptor can be used as the argument of
-.BR process_madvise (2)
-in order to provide advice on the memory usage patterns of the process
-referred to by the file descriptor.
-.P
-The
-.BR pidfd_open ()
-system call is the preferred way of obtaining a PID file descriptor
-for an already existing process.
-The alternative is to obtain a file descriptor by opening a
-.IR /proc/ pid
-directory.
-However, the latter technique is possible only if the
-.BR proc (5)
-filesystem is mounted;
-furthermore, the file descriptor obtained in this way is
-.I not
-pollable and can't be waited on with
-.BR waitid (2).
-.SH EXAMPLES
-The program below opens a PID file descriptor for the
-process whose PID is specified as its command-line argument.
-It then uses
-.BR poll (2)
-to monitor the file descriptor for process exit, as indicated by an
-.B EPOLLIN
-event.
-.\"
-.SS Program source
-\&
-.\" SRC BEGIN (pidfd_open.c)
-.EX
-#define _GNU_SOURCE
-#include <poll.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/syscall.h>
-#include <unistd.h>
-\&
-static int
-pidfd_open(pid_t pid, unsigned int flags)
-{
- return syscall(SYS_pidfd_open, pid, flags);
-}
-\&
-int
-main(int argc, char *argv[])
-{
- int pidfd, ready;
- struct pollfd pollfd;
-\&
- if (argc != 2) {
- fprintf(stderr, "Usage: %s <pid>\en", argv[0]);
- exit(EXIT_SUCCESS);
- }
-\&
- pidfd = pidfd_open(atoi(argv[1]), 0);
- if (pidfd == \-1) {
- perror("pidfd_open");
- exit(EXIT_FAILURE);
- }
-\&
- pollfd.fd = pidfd;
- pollfd.events = POLLIN;
-\&
- ready = poll(&pollfd, 1, \-1);
- if (ready == \-1) {
- perror("poll");
- exit(EXIT_FAILURE);
- }
-\&
- printf("Events (%#x): POLLIN is %sset\en", pollfd.revents,
- (pollfd.revents & POLLIN) ? "" : "not ");
-\&
- close(pidfd);
- exit(EXIT_SUCCESS);
-}
-.EE
-.\" SRC END
-.SH SEE ALSO
-.BR clone (2),
-.BR kill (2),
-.BR pidfd_getfd (2),
-.BR pidfd_send_signal (2),
-.BR poll (2),
-.BR process_madvise (2),
-.BR select (2),
-.BR setns (2),
-.BR waitid (2),
-.BR epoll (7)
diff --git a/man2/pidfd_send_signal.2 b/man2/pidfd_send_signal.2
deleted file mode 100644
index 205808425..000000000
--- a/man2/pidfd_send_signal.2
+++ /dev/null
@@ -1,240 +0,0 @@
-.\" Copyright (c) 2019 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.TH pidfd_send_signal 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-pidfd_send_signal \- send a signal to a process specified by a file descriptor
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#include <linux/signal.h>" " /* Definition of " SIG* " constants */"
-.BR "#include <signal.h>" " /* Definition of " SI_* " constants */"
-.BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */"
-.B #include <unistd.h>
-.P
-.BI "int syscall(SYS_pidfd_send_signal, int " pidfd ", int " sig ,
-.BI " siginfo_t *_Nullable " info ", unsigned int " flags );
-.fi
-.P
-.IR Note :
-glibc provides no wrapper for
-.BR pidfd_send_signal (),
-necessitating the use of
-.BR syscall (2).
-.SH DESCRIPTION
-The
-.BR pidfd_send_signal ()
-system call sends the signal
-.I sig
-to the target process referred to by
-.IR pidfd ,
-a PID file descriptor that refers to a process.
-.\" See the very detailed commit message for kernel commit
-.\" 3eb39f47934f9d5a3027fe00d906a45fe3a15fad
-.P
-If the
-.I info
-argument points to a
-.I siginfo_t
-buffer, that buffer should be populated as described in
-.BR rt_sigqueueinfo (2).
-.P
-If the
-.I info
-argument is a null pointer,
-this is equivalent to specifying a pointer to a
-.I siginfo_t
-buffer whose fields match the values that are
-implicitly supplied when a signal is sent using
-.BR kill (2):
-.P
-.PD 0
-.IP \[bu] 3
-.I si_signo
-is set to the signal number;
-.IP \[bu]
-.I si_errno
-is set to 0;
-.IP \[bu]
-.I si_code
-is set to
-.BR SI_USER ;
-.IP \[bu]
-.I si_pid
-is set to the caller's PID; and
-.IP \[bu]
-.I si_uid
-is set to the caller's real user ID.
-.PD
-.P
-The calling process must either be in the same PID namespace as the
-process referred to by
-.IR pidfd ,
-or be in an ancestor of that namespace.
-.P
-The
-.I flags
-argument is reserved for future use;
-currently, this argument must be specified as 0.
-.SH RETURN VALUE
-On success,
-.BR pidfd_send_signal ()
-returns 0.
-On error, \-1 is returned and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EBADF
-.I pidfd
-is not a valid PID file descriptor.
-.TP
-.B EINVAL
-.I sig
-is not a valid signal.
-.TP
-.B EINVAL
-The calling process is not in a PID namespace from which it can
-send a signal to the target process.
-.TP
-.B EINVAL
-.I flags
-is not 0.
-.TP
-.B EPERM
-The calling process does not have permission to send the signal
-to the target process.
-.TP
-.B EPERM
-.I pidfd
-doesn't refer to the calling process, and
-.I info.si_code
-is invalid (see
-.BR rt_sigqueueinfo (2)).
-.TP
-.B ESRCH
-The target process does not exist
-(i.e., it has terminated and been waited on).
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 5.1.
-.SH NOTES
-.SS PID file descriptors
-The
-.I pidfd
-argument is a PID file descriptor,
-a file descriptor that refers to process.
-Such a file descriptor can be obtained in any of the following ways:
-.IP \[bu] 3
-by opening a
-.IR /proc/ pid
-directory;
-.IP \[bu]
-using
-.BR pidfd_open (2);
-or
-.IP \[bu]
-via the PID file descriptor that is returned by a call to
-.BR clone (2)
-or
-.BR clone3 (2)
-that specifies the
-.B CLONE_PIDFD
-flag.
-.P
-The
-.BR pidfd_send_signal ()
-system call allows the avoidance of race conditions that occur
-when using traditional interfaces (such as
-.BR kill (2))
-to signal a process.
-The problem is that the traditional interfaces specify the target process
-via a process ID (PID),
-with the result that the sender may accidentally send a signal to
-the wrong process if the originally intended target process
-has terminated and its PID has been recycled for another process.
-By contrast,
-a PID file descriptor is a stable reference to a specific process;
-if that process terminates,
-.BR pidfd_send_signal ()
-fails with the error
-.BR ESRCH .
-.SH EXAMPLES
-.\" SRC BEGIN (pidfd_send_signal.c)
-.EX
-#define _GNU_SOURCE
-#include <fcntl.h>
-#include <limits.h>
-#include <signal.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/syscall.h>
-#include <unistd.h>
-\&
-static int
-pidfd_send_signal(int pidfd, int sig, siginfo_t *info,
- unsigned int flags)
-{
- return syscall(SYS_pidfd_send_signal, pidfd, sig, info, flags);
-}
-\&
-int
-main(int argc, char *argv[])
-{
- int pidfd, sig;
- char path[PATH_MAX];
- siginfo_t info;
-\&
- if (argc != 3) {
- fprintf(stderr, "Usage: %s <pid> <signal>\en", argv[0]);
- exit(EXIT_FAILURE);
- }
-\&
- sig = atoi(argv[2]);
-\&
- /* Obtain a PID file descriptor by opening the /proc/PID directory
- of the target process. */
-\&
- snprintf(path, sizeof(path), "/proc/%s", argv[1]);
-\&
- pidfd = open(path, O_RDONLY);
- if (pidfd == \-1) {
- perror("open");
- exit(EXIT_FAILURE);
- }
-\&
- /* Populate a \[aq]siginfo_t\[aq] structure for use with
- pidfd_send_signal(). */
-\&
- memset(&info, 0, sizeof(info));
- info.si_code = SI_QUEUE;
- info.si_signo = sig;
- info.si_errno = 0;
- info.si_uid = getuid();
- info.si_pid = getpid();
- info.si_value.sival_int = 1234;
-\&
- /* Send the signal. */
-\&
- if (pidfd_send_signal(pidfd, sig, &info, 0) == \-1) {
- perror("pidfd_send_signal");
- exit(EXIT_FAILURE);
- }
-\&
- exit(EXIT_SUCCESS);
-}
-.EE
-.\" SRC END
-.SH SEE ALSO
-.BR clone (2),
-.BR kill (2),
-.BR pidfd_open (2),
-.BR rt_sigqueueinfo (2),
-.BR sigaction (2),
-.BR pid_namespaces (7),
-.BR signal (7)
diff --git a/man2/pipe.2 b/man2/pipe.2
deleted file mode 100644
index a440ad401..000000000
--- a/man2/pipe.2
+++ /dev/null
@@ -1,304 +0,0 @@
-.\" Copyright (C) 2005, 2008, Michael Kerrisk <mtk.manpages@gmail.com>
-.\" (A few fragments remain from an earlier (1992) version by
-.\" Drew Eckhardt <drew@cs.colorado.edu>.)
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Modified by Michael Haardt <michael@moria.de>
-.\" Modified 1993-07-23 by Rik Faith <faith@cs.unc.edu>
-.\" Modified 1996-10-22 by Eric S. Raymond <esr@thyrsus.com>
-.\" Modified 2004-06-17 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Modified 2005, mtk: added an example program
-.\" Modified 2008-01-09, mtk: rewrote DESCRIPTION; minor additions
-.\" to EXAMPLE text.
-.\" 2008-10-10, mtk: add description of pipe2()
-.\"
-.TH pipe 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-pipe, pipe2 \- create pipe
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <unistd.h>
-.P
-.BI "int pipe(int " pipefd [2]);
-.P
-.BR "#define _GNU_SOURCE" " /* See feature_test_macros(7) */"
-.BR "#include <fcntl.h>" " /* Definition of " O_* " constants */"
-.B #include <unistd.h>
-.P
-.BI "int pipe2(int " pipefd "[2], int " flags );
-.P
-/* On Alpha, IA-64, MIPS, SuperH, and SPARC/SPARC64, pipe() has the
- following prototype; see VERSIONS */
-.P
-.B #include <unistd.h>
-.P
-.B struct fd_pair {
-.B " long fd[2];"
-.B "};"
-.B struct fd_pair pipe(void);
-.fi
-.SH DESCRIPTION
-.BR pipe ()
-creates a pipe, a unidirectional data channel that
-can be used for interprocess communication.
-The array
-.I pipefd
-is used to return two file descriptors referring to the ends of the pipe.
-.I pipefd[0]
-refers to the read end of the pipe.
-.I pipefd[1]
-refers to the write end of the pipe.
-Data written to the write end of the pipe is buffered by the kernel
-until it is read from the read end of the pipe.
-For further details, see
-.BR pipe (7).
-.P
-If
-.I flags
-is 0, then
-.BR pipe2 ()
-is the same as
-.BR pipe ().
-The following values can be bitwise ORed in
-.I flags
-to obtain different behavior:
-.TP
-.B O_CLOEXEC
-Set the close-on-exec
-.RB ( FD_CLOEXEC )
-flag on the two new file descriptors.
-See the description of the same flag in
-.BR open (2)
-for reasons why this may be useful.
-.TP
-.BR O_DIRECT " (since Linux 3.4)"
-.\" commit 9883035ae7edef3ec62ad215611cb8e17d6a1a5d
-Create a pipe that performs I/O in "packet" mode.
-Each
-.BR write (2)
-to the pipe is dealt with as a separate packet, and
-.BR read (2)s
-from the pipe will read one packet at a time.
-Note the following points:
-.RS
-.IP \[bu] 3
-Writes of greater than
-.B PIPE_BUF
-bytes (see
-.BR pipe (7))
-will be split into multiple packets.
-The constant
-.B PIPE_BUF
-is defined in
-.IR <limits.h> .
-.IP \[bu]
-If a
-.BR read (2)
-specifies a buffer size that is smaller than the next packet,
-then the requested number of bytes are read,
-and the excess bytes in the packet are discarded.
-Specifying a buffer size of
-.B PIPE_BUF
-will be sufficient to read the largest possible packets
-(see the previous point).
-.IP \[bu]
-Zero-length packets are not supported.
-(A
-.BR read (2)
-that specifies a buffer size of zero is a no-op, and returns 0.)
-.RE
-.IP
-Older kernels that do not support this flag will indicate this via an
-.B EINVAL
-error.
-.IP
-Since Linux 4.5,
-.\" commit 0dbf5f20652108106cb822ad7662c786baaa03ff
-.\" FIXME . But, it is not possible to specify O_DIRECT when opening a FIFO
-it is possible to change the
-.B O_DIRECT
-setting of a pipe file descriptor using
-.BR fcntl (2).
-.TP
-.B O_NONBLOCK
-Set the
-.B O_NONBLOCK
-file status flag on the open file descriptions
-referred to by the new file descriptors.
-Using this flag saves extra calls to
-.BR fcntl (2)
-to achieve the same result.
-.TP
-.B O_NOTIFICATION_PIPE
-Since Linux 5.8,
-.\" commit c73be61cede5882f9605a852414db559c0ebedfd
-general notification mechanism is built on the top of the pipe where
-kernel splices notification messages into pipes opened by user space.
-The owner of the pipe has to tell the kernel which sources of events to watch
-and filters can also be applied to select
-which subevents should be placed into the pipe.
-.SH RETURN VALUE
-On success, zero is returned.
-On error, \-1 is returned,
-.I errno
-is set to indicate the error, and
-.I pipefd
-is left unchanged.
-.P
-On Linux (and other systems),
-.BR pipe ()
-does not modify
-.I pipefd
-on failure.
-A requirement standardizing this behavior was added in POSIX.1-2008 TC2.
-.\" http://austingroupbugs.net/view.php?id=467
-The Linux-specific
-.BR pipe2 ()
-system call
-likewise does not modify
-.I pipefd
-on failure.
-.SH ERRORS
-.TP
-.B EFAULT
-.I pipefd
-is not valid.
-.TP
-.B EINVAL
-.RB ( pipe2 ())
-Invalid value in
-.IR flags .
-.TP
-.B EMFILE
-The per-process limit on the number of open file descriptors has been reached.
-.TP
-.B ENFILE
-The system-wide limit on the total number of open files has been reached.
-.TP
-.B ENFILE
-The user hard limit on memory that can be allocated for pipes
-has been reached and the caller is not privileged; see
-.BR pipe (7).
-.TP
-.B ENOPKG
-.RB ( pipe2 ())
-.B O_NOTIFICATION_PIPE
-was passed in
-.I flags
-and support for notifications
-.RB ( CONFIG_WATCH_QUEUE )
-is not compiled into the kernel.
-.SH VERSIONS
-.\" See http://math-atlas.sourceforge.net/devel/assembly/64.psabi.1.33.ps.Z
-.\" for example, section 3.2.1 "Registers and the Stack Frame".
-The System V ABI on some architectures allows the use of more than one register
-for returning multiple values; several architectures
-(namely, Alpha, IA-64, MIPS, SuperH, and SPARC/SPARC64)
-(ab)use this feature in order to implement the
-.BR pipe ()
-system call in a functional manner:
-the call doesn't take any arguments and returns
-a pair of file descriptors as the return value on success.
-The glibc
-.BR pipe ()
-wrapper function transparently deals with this.
-See
-.BR syscall (2)
-for information regarding registers used for storing second file descriptor.
-.SH STANDARDS
-.TP
-.BR pipe ()
-POSIX.1-2008.
-.TP
-.BR pipe2 ()
-Linux.
-.SH HISTORY
-.TP
-.BR pipe ()
-POSIX.1-2001.
-.TP
-.BR pipe2 ()
-Linux 2.6.27,
-glibc 2.9.
-.SH EXAMPLES
-.\" fork.2 refers to this example program.
-The following program creates a pipe, and then
-.BR fork (2)s
-to create a child process;
-the child inherits a duplicate set of file
-descriptors that refer to the same pipe.
-After the
-.BR fork (2),
-each process closes the file descriptors that it doesn't need for the pipe
-(see
-.BR pipe (7)).
-The parent then writes the string contained in the program's
-command-line argument to the pipe,
-and the child reads this string a byte at a time from the pipe
-and echoes it on standard output.
-.SS Program source
-.\" SRC BEGIN (pipe.c)
-.EX
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/wait.h>
-#include <unistd.h>
-\&
-int
-main(int argc, char *argv[])
-{
- int pipefd[2];
- char buf;
- pid_t cpid;
-\&
- if (argc != 2) {
- fprintf(stderr, "Usage: %s <string>\en", argv[0]);
- exit(EXIT_FAILURE);
- }
-\&
- if (pipe(pipefd) == \-1) {
- perror("pipe");
- exit(EXIT_FAILURE);
- }
-\&
- cpid = fork();
- if (cpid == \-1) {
- perror("fork");
- exit(EXIT_FAILURE);
- }
-\&
- if (cpid == 0) { /* Child reads from pipe */
- close(pipefd[1]); /* Close unused write end */
-\&
- while (read(pipefd[0], &buf, 1) > 0)
- write(STDOUT_FILENO, &buf, 1);
-\&
- write(STDOUT_FILENO, "\en", 1);
- close(pipefd[0]);
- _exit(EXIT_SUCCESS);
-\&
- } else { /* Parent writes argv[1] to pipe */
- close(pipefd[0]); /* Close unused read end */
- write(pipefd[1], argv[1], strlen(argv[1]));
- close(pipefd[1]); /* Reader will see EOF */
- wait(NULL); /* Wait for child */
- exit(EXIT_SUCCESS);
- }
-}
-.EE
-.\" SRC END
-.SH SEE ALSO
-.BR fork (2),
-.BR read (2),
-.BR socketpair (2),
-.BR splice (2),
-.BR tee (2),
-.BR vmsplice (2),
-.BR write (2),
-.BR popen (3),
-.BR pipe (7)
diff --git a/man2/pipe2.2 b/man2/pipe2.2
deleted file mode 100644
index 980e2406b..000000000
--- a/man2/pipe2.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/pipe.2
diff --git a/man2/pivot_root.2 b/man2/pivot_root.2
deleted file mode 100644
index 9cb5d9b25..000000000
--- a/man2/pivot_root.2
+++ /dev/null
@@ -1,409 +0,0 @@
-.\" Copyright (C) 2019 Michael Kerrisk <mtk.manpages@gmail.com>
-.\" A very few fragments remain from an earlier page written by
-.\" Werner Almesberger in 2000
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.TH pivot_root 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-pivot_root \- change the root mount
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */"
-.B #include <unistd.h>
-.P
-.BI "int syscall(SYS_pivot_root, const char *" new_root \
-", const char *" put_old );
-.fi
-.P
-.IR Note :
-glibc provides no wrapper for
-.BR pivot_root (),
-necessitating the use of
-.BR syscall (2).
-.SH DESCRIPTION
-.BR pivot_root ()
-changes the root mount in the mount namespace of the calling process.
-More precisely, it moves the root mount to the
-directory \fIput_old\fP and makes \fInew_root\fP the new root mount.
-The calling process must have the
-.B CAP_SYS_ADMIN
-capability in the user namespace that owns the caller's mount namespace.
-.P
-.BR pivot_root ()
-changes the root directory and the current working directory
-of each process or thread in the same mount namespace to
-.I new_root
-if they point to the old root directory.
-(See also NOTES.)
-On the other hand,
-.BR pivot_root ()
-does not change the caller's current working directory
-(unless it is on the old root directory),
-and thus it should be followed by a
-\fBchdir("/")\fP call.
-.P
-The following restrictions apply:
-.IP \[bu] 3
-.I new_root
-and
-.I put_old
-must be directories.
-.IP \[bu]
-.I new_root
-and
-.I put_old
-must not be on the same mount as the current root.
-.IP \[bu]
-\fIput_old\fP must be at or underneath \fInew_root\fP;
-that is, adding some nonnegative
-number of "\fI/..\fP" suffixes to the pathname pointed to by
-.I put_old
-must yield the same directory as \fInew_root\fP.
-.IP \[bu]
-.I new_root
-must be a path to a mount point, but can't be
-.IR \[dq]/\[dq] .
-A path that is not already a mount point can be converted into one by
-bind mounting the path onto itself.
-.IP \[bu]
-The propagation type of the parent mount of
-.I new_root
-and the parent mount of the current root directory must not be
-.BR MS_SHARED ;
-similarly, if
-.I put_old
-is an existing mount point, its propagation type must not be
-.BR MS_SHARED .
-These restrictions ensure that
-.BR pivot_root ()
-never propagates any changes to another mount namespace.
-.IP \[bu]
-The current root directory must be a mount point.
-.SH RETURN VALUE
-On success, zero is returned.
-On error, \-1 is returned, and
-\fIerrno\fP is set to indicate the error.
-.SH ERRORS
-.BR pivot_root ()
-may fail with any of the same errors as
-.BR stat (2).
-Additionally, it may fail with the following errors:
-.TP
-.B EBUSY
-.\" Reconfirmed that the following error occurs on Linux 5.0 by
-.\" specifying 'new_root' as "/rootfs" and 'put_old' as
-.\" "/rootfs/oldrootfs", and *not* bind mounting "/rootfs" on top of
-.\" itself. Of course, this is an odd situation, since a later check
-.\" in the kernel code will in any case yield EINVAL if 'new_root' is
-.\" not a mount point. However, when the system call was first added,
-.\" 'new_root' was not required to be a mount point. So, this
-.\" error is nowadays probably just the result of crufty accumulation.
-.\" This error can also occur if we bind mount "/" on top of itself
-.\" and try to specify "/" as the 'new' (again, an odd situation). So,
-.\" the EBUSY check in the kernel does still seem necessary to prevent
-.\" that case. Furthermore, the "or put_old" piece is probably
-.\" redundant text (although the check is in the kernel), since,
-.\" in another check, 'put_old' is required to be under 'new_root'.
-.I new_root
-or
-.I put_old
-is on the current root mount.
-(This error covers the pathological case where
-.I new_root
-is
-.IR \[dq]/\[dq] .)
-.TP
-.B EINVAL
-.I new_root
-is not a mount point.
-.TP
-.B EINVAL
-\fIput_old\fP is not at or underneath \fInew_root\fP.
-.TP
-.B EINVAL
-The current root directory is not a mount point
-(because of an earlier
-.BR chroot (2)).
-.TP
-.B EINVAL
-The current root is on the rootfs (initial ramfs) mount; see NOTES.
-.TP
-.B EINVAL
-Either the mount point at
-.IR new_root ,
-or the parent mount of that mount point,
-has propagation type
-.BR MS_SHARED .
-.TP
-.B EINVAL
-.I put_old
-is a mount point and has the propagation type
-.BR MS_SHARED .
-.TP
-.B ENOTDIR
-\fInew_root\fP or \fIput_old\fP is not a directory.
-.TP
-.B EPERM
-The calling process does not have the
-.B CAP_SYS_ADMIN
-capability.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 2.3.41.
-.SH NOTES
-A command-line interface for this system call is provided by
-.BR pivot_root (8).
-.P
-.BR pivot_root ()
-allows the caller to switch to a new root filesystem while at the same time
-placing the old root mount at a location under
-.I new_root
-from where it can subsequently be unmounted.
-(The fact that it moves all processes that have a root directory
-or current working directory on the old root directory to the
-new root frees the old root directory of users,
-allowing the old root mount to be unmounted more easily.)
-.P
-One use of
-.BR pivot_root ()
-is during system startup, when the
-system mounts a temporary root filesystem (e.g., an
-.BR initrd (4)),
-then mounts the real root filesystem, and eventually turns the latter into
-the root directory of all relevant processes and threads.
-A modern use is to set up a root filesystem during
-the creation of a container.
-.P
-The fact that
-.BR pivot_root ()
-modifies process root and current working directories in the
-manner noted in DESCRIPTION
-is necessary in order to prevent kernel threads from keeping the old
-root mount busy with their root and current working directories,
-even if they never access
-the filesystem in any way.
-.P
-The rootfs (initial ramfs) cannot be
-.BR pivot_root ()ed.
-The recommended method of changing the root filesystem in this case is
-to delete everything in rootfs, overmount rootfs with the new root, attach
-.IR stdin / stdout / stderr
-to the new
-.IR /dev/console ,
-and exec the new
-.BR init (1).
-Helper programs for this process exist; see
-.BR switch_root (8).
-.\"
-.SS pivot_root(\[dq].\[dq], \[dq].\[dq])
-.I new_root
-and
-.I put_old
-may be the same directory.
-In particular, the following sequence allows a pivot-root operation
-without needing to create and remove a temporary directory:
-.P
-.in +4n
-.EX
-chdir(new_root);
-pivot_root(".", ".");
-umount2(".", MNT_DETACH);
-.EE
-.in
-.P
-This sequence succeeds because the
-.BR pivot_root ()
-call stacks the old root mount point
-on top of the new root mount point at
-.IR / .
-At that point, the calling process's root directory and current
-working directory refer to the new root mount point
-.RI ( new_root ).
-During the subsequent
-.BR umount ()
-call, resolution of
-.I \[dq].\[dq]
-starts with
-.I new_root
-and then moves up the list of mounts stacked at
-.IR / ,
-with the result that old root mount point is unmounted.
-.\"
-.SS Historical notes
-For many years, this manual page carried the following text:
-.RS
-.P
-.BR pivot_root ()
-may or may not change the current root and the current
-working directory of any processes or threads which use the old
-root directory.
-The caller of
-.BR pivot_root ()
-must ensure that processes with root or current working directory
-at the old root operate correctly in either case.
-An easy way to ensure this is to change their
-root and current working directory to \fInew_root\fP before invoking
-.BR pivot_root ().
-.RE
-.P
-This text, written before the system call implementation was
-even finalized in the kernel, was probably intended to warn users
-at that time that the implementation might change before final release.
-However, the behavior stated in DESCRIPTION
-has remained consistent since this system call
-was first implemented and will not change now.
-.SH EXAMPLES
-.\" FIXME
-.\" Would it be better, because simpler, to use unshare(2)
-.\" rather than clone(2) in the example below?
-The program below demonstrates the use of
-.BR pivot_root ()
-inside a mount namespace that is created using
-.BR clone (2).
-After pivoting to the root directory named in the program's
-first command-line argument, the child created by
-.BR clone (2)
-then executes the program named in the remaining command-line arguments.
-.P
-We demonstrate the program by creating a directory that will serve as
-the new root filesystem and placing a copy of the (statically linked)
-.BR busybox (1)
-executable in that directory.
-.P
-.in +4n
-.EX
-$ \fBmkdir /tmp/rootfs\fP
-$ \fBls \-id /tmp/rootfs\fP # Show inode number of new root directory
-319459 /tmp/rootfs
-$ \fBcp $(which busybox) /tmp/rootfs\fP
-$ \fBPS1=\[aq]bbsh$ \[aq] sudo ./pivot_root_demo /tmp/rootfs /busybox sh\fP
-bbsh$ \fBPATH=/\fP
-bbsh$ \fBbusybox ln busybox ln\fP
-bbsh$ \fBln busybox echo\fP
-bbsh$ \fBln busybox ls\fP
-bbsh$ \fBls\fP
-busybox echo ln ls
-bbsh$ \fBls \-id /\fP # Compare with inode number above
-319459 /
-bbsh$ \fBecho \[aq]hello world\[aq]\fP
-hello world
-.EE
-.in
-.SS Program source
-\&
-.P
-.\" SRC BEGIN (pivot_root.c)
-.EX
-/* pivot_root_demo.c */
-\&
-#define _GNU_SOURCE
-#include <err.h>
-#include <limits.h>
-#include <sched.h>
-#include <signal.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/mman.h>
-#include <sys/mount.h>
-#include <sys/stat.h>
-#include <sys/syscall.h>
-#include <sys/wait.h>
-#include <unistd.h>
-\&
-static int
-pivot_root(const char *new_root, const char *put_old)
-{
- return syscall(SYS_pivot_root, new_root, put_old);
-}
-\&
-#define STACK_SIZE (1024 * 1024)
-\&
-static int /* Startup function for cloned child */
-child(void *arg)
-{
- char path[PATH_MAX];
- char **args = arg;
- char *new_root = args[0];
- const char *put_old = "/oldrootfs";
-\&
- /* Ensure that \[aq]new_root\[aq] and its parent mount don\[aq]t have
- shared propagation (which would cause pivot_root() to
- return an error), and prevent propagation of mount
- events to the initial mount namespace. */
-\&
- if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, NULL) == \-1)
- err(EXIT_FAILURE, "mount\-MS_PRIVATE");
-\&
- /* Ensure that \[aq]new_root\[aq] is a mount point. */
-\&
- if (mount(new_root, new_root, NULL, MS_BIND, NULL) == \-1)
- err(EXIT_FAILURE, "mount\-MS_BIND");
-\&
- /* Create directory to which old root will be pivoted. */
-\&
- snprintf(path, sizeof(path), "%s/%s", new_root, put_old);
- if (mkdir(path, 0777) == \-1)
- err(EXIT_FAILURE, "mkdir");
-\&
- /* And pivot the root filesystem. */
-\&
- if (pivot_root(new_root, path) == \-1)
- err(EXIT_FAILURE, "pivot_root");
-\&
- /* Switch the current working directory to "/". */
-\&
- if (chdir("/") == \-1)
- err(EXIT_FAILURE, "chdir");
-\&
- /* Unmount old root and remove mount point. */
-\&
- if (umount2(put_old, MNT_DETACH) == \-1)
- perror("umount2");
- if (rmdir(put_old) == \-1)
- perror("rmdir");
-\&
- /* Execute the command specified in argv[1]... */
-\&
- execv(args[1], &args[1]);
- err(EXIT_FAILURE, "execv");
-}
-\&
-int
-main(int argc, char *argv[])
-{
- char *stack;
-\&
- /* Create a child process in a new mount namespace. */
-\&
- stack = mmap(NULL, STACK_SIZE, PROT_READ | PROT_WRITE,
- MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, \-1, 0);
- if (stack == MAP_FAILED)
- err(EXIT_FAILURE, "mmap");
-\&
- if (clone(child, stack + STACK_SIZE,
- CLONE_NEWNS | SIGCHLD, &argv[1]) == \-1)
- err(EXIT_FAILURE, "clone");
-\&
- /* Parent falls through to here; wait for child. */
-\&
- if (wait(NULL) == \-1)
- err(EXIT_FAILURE, "wait");
-\&
- exit(EXIT_SUCCESS);
-}
-.EE
-.\" SRC END
-.SH SEE ALSO
-.BR chdir (2),
-.BR chroot (2),
-.BR mount (2),
-.BR stat (2),
-.BR initrd (4),
-.BR mount_namespaces (7),
-.BR pivot_root (8),
-.BR switch_root (8)
diff --git a/man2/pkey_alloc.2 b/man2/pkey_alloc.2
deleted file mode 100644
index 0f706e7da..000000000
--- a/man2/pkey_alloc.2
+++ /dev/null
@@ -1,115 +0,0 @@
-.\" Copyright (C) 2016 Intel Corporation
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.TH pkey_alloc 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-pkey_alloc, pkey_free \- allocate or free a protection key
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#define _GNU_SOURCE" " /* See feature_test_macros(7) */"
-.B #include <sys/mman.h>
-.P
-.BI "int pkey_alloc(unsigned int " flags ", unsigned int " access_rights ");"
-.BI "int pkey_free(int " pkey ");"
-.fi
-.SH DESCRIPTION
-.BR pkey_alloc ()
-allocates a protection key (pkey) and allows it to be passed to
-.BR pkey_mprotect (2).
-.P
-The
-.BR pkey_alloc ()
-.I flags
-is reserved for future use and currently must always be specified as 0.
-.P
-The
-.BR pkey_alloc ()
-.I access_rights
-argument may contain zero or more disable operations:
-.TP
-.B PKEY_DISABLE_ACCESS
-Disable all data access to memory covered by the returned protection key.
-.TP
-.B PKEY_DISABLE_WRITE
-Disable write access to memory covered by the returned protection key.
-.P
-.BR pkey_free ()
-frees a protection key and makes it available for later
-allocations.
-After a protection key has been freed, it may no longer be used
-in any protection-key-related operations.
-.P
-An application should not call
-.BR pkey_free ()
-on any protection key which has been assigned to an address
-range by
-.BR pkey_mprotect (2)
-and which is still in use.
-The behavior in this case is undefined and may result in an error.
-.SH RETURN VALUE
-On success,
-.BR pkey_alloc ()
-returns a positive protection key value.
-On success,
-.BR pkey_free ()
-returns zero.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EINVAL
-.IR pkey ,
-.IR flags ,
-or
-.I access_rights
-is invalid.
-.TP
-.B ENOSPC
-.RB ( pkey_alloc ())
-All protection keys available for the current process have
-been allocated.
-The number of keys available is architecture-specific and
-implementation-specific and may be reduced by kernel-internal use
-of certain keys.
-There are currently 15 keys available to user programs on x86.
-.IP
-This error will also be returned if the processor or operating system
-does not support protection keys.
-Applications should always be prepared to handle this error, since
-factors outside of the application's control can reduce the number
-of available pkeys.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 4.9,
-glibc 2.27.
-.SH NOTES
-.BR pkey_alloc ()
-is always safe to call regardless of whether or not the operating system
-supports protection keys.
-It can be used in lieu of any other mechanism for detecting pkey support
-and will simply fail with the error
-.B ENOSPC
-if the operating system has no pkey support.
-.P
-The kernel guarantees that the contents of the hardware rights
-register (PKRU) will be preserved only for allocated protection
-keys.
-Any time a key is unallocated (either before the first call
-returning that key from
-.BR pkey_alloc ()
-or after it is freed via
-.BR pkey_free ()),
-the kernel may make arbitrary changes to the parts of the
-rights register affecting access to that key.
-.SH EXAMPLES
-See
-.BR pkeys (7).
-.SH SEE ALSO
-.BR pkey_mprotect (2),
-.BR pkeys (7)
diff --git a/man2/pkey_free.2 b/man2/pkey_free.2
deleted file mode 100644
index 5b524cbbf..000000000
--- a/man2/pkey_free.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/pkey_alloc.2
diff --git a/man2/pkey_mprotect.2 b/man2/pkey_mprotect.2
deleted file mode 100644
index b4f9309a3..000000000
--- a/man2/pkey_mprotect.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/mprotect.2
diff --git a/man2/poll.2 b/man2/poll.2
deleted file mode 100644
index f1a5a9795..000000000
--- a/man2/poll.2
+++ /dev/null
@@ -1,649 +0,0 @@
-.\" Copyright (C) 2006, 2019 Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Additions from Richard Gooch <rgooch@atnf.CSIRO.AU> and aeb, 971207
-.\" 2006-03-13, mtk, Added ppoll() + various other rewordings
-.\" 2006-07-01, mtk, Added POLLRDHUP + various other wording and
-.\" formatting changes.
-.\"
-.TH poll 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-poll, ppoll \- wait for some event on a file descriptor
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <poll.h>
-.P
-.BI "int poll(struct pollfd *" fds ", nfds_t " nfds ", int " timeout );
-.P
-.BR "#define _GNU_SOURCE" " /* See feature_test_macros(7) */"
-.B #include <poll.h>
-.P
-.BI "int ppoll(struct pollfd *" fds ", nfds_t " nfds ,
-.BI " const struct timespec *_Nullable " tmo_p ,
-.BI " const sigset_t *_Nullable " sigmask );
-.fi
-.SH DESCRIPTION
-.BR poll ()
-performs a similar task to
-.BR select (2):
-it waits for one of a set of file descriptors to become ready
-to perform I/O.
-The Linux-specific
-.BR epoll (7)
-API performs a similar task, but offers features beyond those found in
-.BR poll ().
-.P
-The set of file descriptors to be monitored is specified in the
-.I fds
-argument, which is an array of structures of the following form:
-.P
-.in +4n
-.EX
-struct pollfd {
- int fd; /* file descriptor */
- short events; /* requested events */
- short revents; /* returned events */
-};
-.EE
-.in
-.P
-The caller should specify the number of items in the
-.I fds
-array in
-.IR nfds .
-.P
-The field
-.I fd
-contains a file descriptor for an open file.
-If this field is negative, then the corresponding
-.I events
-field is ignored and the
-.I revents
-field returns zero.
-(This provides an easy way of ignoring a
-file descriptor for a single
-.BR poll ()
-call: simply set the
-.I fd
-field to its bitwise complement.)
-.P
-The field
-.I events
-is an input parameter, a bit mask specifying the events the application
-is interested in for the file descriptor
-.IR fd .
-This field may be specified as zero,
-in which case the only events that can be returned in
-.I revents
-are
-.BR POLLHUP ,
-.BR POLLERR ,
-and
-.B POLLNVAL
-(see below).
-.P
-The field
-.I revents
-is an output parameter, filled by the kernel with the events that
-actually occurred.
-The bits returned in
-.I revents
-can include any of those specified in
-.IR events ,
-or one of the values
-.BR POLLERR ,
-.BR POLLHUP ,
-or
-.BR POLLNVAL .
-(These three bits are meaningless in the
-.I events
-field, and will be set in the
-.I revents
-field whenever the corresponding condition is true.)
-.P
-If none of the events requested (and no error) has occurred for any
-of the file descriptors, then
-.BR poll ()
-blocks until one of the events occurs.
-.P
-The
-.I timeout
-argument specifies the number of milliseconds that
-.BR poll ()
-should block waiting for a file descriptor to become ready.
-The call will block until either:
-.IP \[bu] 3
-a file descriptor becomes ready;
-.IP \[bu]
-the call is interrupted by a signal handler; or
-.IP \[bu]
-the timeout expires.
-.P
-Being "ready" means that the requested operation will not block; thus,
-.BR poll ()ing
-regular files,
-block devices,
-and other files with no reasonable polling semantic
-.I always
-returns instantly as ready to read and write.
-.P
-Note that the
-.I timeout
-interval will be rounded up to the system clock granularity,
-and kernel scheduling delays mean that the blocking interval
-may overrun by a small amount.
-Specifying a negative value in
-.I timeout
-means an infinite timeout.
-Specifying a
-.I timeout
-of zero causes
-.BR poll ()
-to return immediately, even if no file descriptors are ready.
-.P
-The bits that may be set/returned in
-.I events
-and
-.I revents
-are defined in \fI<poll.h>\fP:
-.TP
-.B POLLIN
-There is data to read.
-.TP
-.B POLLPRI
-There is some exceptional condition on the file descriptor.
-Possibilities include:
-.RS
-.IP \[bu] 3
-There is out-of-band data on a TCP socket (see
-.BR tcp (7)).
-.IP \[bu]
-A pseudoterminal master in packet mode has seen a state change on the slave
-(see
-.BR ioctl_tty (2)).
-.IP \[bu]
-A
-.I cgroup.events
-file has been modified (see
-.BR cgroups (7)).
-.RE
-.TP
-.B POLLOUT
-Writing is now possible, though a write larger than the available space
-in a socket or pipe will still block (unless
-.B O_NONBLOCK
-is set).
-.TP
-.BR POLLRDHUP " (since Linux 2.6.17)"
-Stream socket peer closed connection,
-or shut down writing half of connection.
-The
-.B _GNU_SOURCE
-feature test macro must be defined
-(before including
-.I any
-header files)
-in order to obtain this definition.
-.TP
-.B POLLERR
-Error condition (only returned in
-.IR revents ;
-ignored in
-.IR events ).
-This bit is also set for a file descriptor referring
-to the write end of a pipe when the read end has been closed.
-.TP
-.B POLLHUP
-Hang up (only returned in
-.IR revents ;
-ignored in
-.IR events ).
-Note that when reading from a channel such as a pipe or a stream socket,
-this event merely indicates that the peer closed its end of the channel.
-Subsequent reads from the channel will return 0 (end of file)
-only after all outstanding data in the channel has been consumed.
-.TP
-.B POLLNVAL
-Invalid request:
-.I fd
-not open (only returned in
-.IR revents ;
-ignored in
-.IR events ).
-.P
-When compiling with
-.B _XOPEN_SOURCE
-defined, one also has the following,
-which convey no further information beyond the bits listed above:
-.TP
-.B POLLRDNORM
-Equivalent to
-.BR POLLIN .
-.TP
-.B POLLRDBAND
-Priority band data can be read (generally unused on Linux).
-.\" POLLRDBAND is used in the DECnet protocol.
-.TP
-.B POLLWRNORM
-Equivalent to
-.BR POLLOUT .
-.TP
-.B POLLWRBAND
-Priority data may be written.
-.P
-Linux also knows about, but does not use
-.BR POLLMSG .
-.SS ppoll()
-The relationship between
-.BR poll ()
-and
-.BR ppoll ()
-is analogous to the relationship between
-.BR select (2)
-and
-.BR pselect (2):
-like
-.BR pselect (2),
-.BR ppoll ()
-allows an application to safely wait until either a file descriptor
-becomes ready or until a signal is caught.
-.P
-Other than the difference in the precision of the
-.I timeout
-argument, the following
-.BR ppoll ()
-call:
-.P
-.in +4n
-.EX
-ready = ppoll(&fds, nfds, tmo_p, &sigmask);
-.EE
-.in
-.P
-is nearly equivalent to
-.I atomically
-executing the following calls:
-.P
-.in +4n
-.EX
-sigset_t origmask;
-int timeout;
-\&
-timeout = (tmo_p == NULL) ? \-1 :
- (tmo_p\->tv_sec * 1000 + tmo_p\->tv_nsec / 1000000);
-pthread_sigmask(SIG_SETMASK, &sigmask, &origmask);
-ready = poll(&fds, nfds, timeout);
-pthread_sigmask(SIG_SETMASK, &origmask, NULL);
-.EE
-.in
-.P
-The above code segment is described as
-.I nearly
-equivalent because whereas a negative
-.I timeout
-value for
-.BR poll ()
-is interpreted as an infinite timeout, a negative value expressed in
-.I *tmo_p
-results in an error from
-.BR ppoll ().
-.P
-See the description of
-.BR pselect (2)
-for an explanation of why
-.BR ppoll ()
-is necessary.
-.P
-If the
-.I sigmask
-argument is specified as NULL, then
-no signal mask manipulation is performed
-(and thus
-.BR ppoll ()
-differs from
-.BR poll ()
-only in the precision of the
-.I timeout
-argument).
-.P
-The
-.I tmo_p
-argument specifies an upper limit on the amount of time that
-.BR ppoll ()
-will block.
-This argument is a pointer to a
-.BR timespec (3)
-structure.
-.P
-If
-.I tmo_p
-is specified as NULL, then
-.BR ppoll ()
-can block indefinitely.
-.SH RETURN VALUE
-On success,
-.BR poll ()
-returns a nonnegative value which is the number of elements in the
-.I pollfds
-whose
-.I revents
-fields have been set to a nonzero value (indicating an event or an error).
-A return value of zero indicates that the system call timed out
-before any file descriptors became ready.
-.P
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EFAULT
-.I fds
-points outside the process's accessible address space.
-The array given as argument was not contained in the calling program's
-address space.
-.TP
-.B EINTR
-A signal occurred before any requested event; see
-.BR signal (7).
-.TP
-.B EINVAL
-The
-.I nfds
-value exceeds the
-.B RLIMIT_NOFILE
-value.
-.TP
-.B EINVAL
-.RB ( ppoll ())
-The timeout value expressed in
-.I *tmo_p
-is invalid (negative).
-.TP
-.B ENOMEM
-Unable to allocate memory for kernel data structures.
-.SH VERSIONS
-On some other UNIX systems,
-.\" Darwin, according to a report by Jeremy Sequoia, relayed by Josh Triplett
-.BR poll ()
-can fail with the error
-.B EAGAIN
-if the system fails to allocate kernel-internal resources, rather than
-.B ENOMEM
-as Linux does.
-POSIX permits this behavior.
-Portable programs may wish to check for
-.B EAGAIN
-and loop, just as with
-.BR EINTR .
-.P
-Some implementations define the nonstandard constant
-.B INFTIM
-with the value \-1 for use as a
-.I timeout
-for
-.BR poll ().
-This constant is not provided in glibc.
-.SS C library/kernel differences
-The Linux
-.BR ppoll ()
-system call modifies its
-.I tmo_p
-argument.
-However, the glibc wrapper function hides this behavior
-by using a local variable for the timeout argument that
-is passed to the system call.
-Thus, the glibc
-.BR ppoll ()
-function does not modify its
-.I tmo_p
-argument.
-.P
-The raw
-.BR ppoll ()
-system call has a fifth argument,
-.IR "size_t sigsetsize" ,
-which specifies the size in bytes of the
-.I sigmask
-argument.
-The glibc
-.BR ppoll ()
-wrapper function specifies this argument as a fixed value
-(equal to
-.IR sizeof(kernel_sigset_t) ).
-See
-.BR sigprocmask (2)
-for a discussion on the differences between the kernel and the libc
-notion of the sigset.
-.SH STANDARDS
-.TP
-.BR poll ()
-POSIX.1-2008.
-.TP
-.BR ppoll ()
-Linux.
-.\" FIXME .
-.\" ppoll() is proposed for inclusion in POSIX:
-.\" https://www.austingroupbugs.net/view.php?id=1263
-.\" NetBSD 3.0 has a pollts() which is like Linux ppoll().
-.SH HISTORY
-.TP
-.BR poll ()
-POSIX.1-2001.
-Linux 2.1.23.
-.IP
-On older kernels that lack this system call,
-the glibc
-.BR poll ()
-wrapper function provides emulation using
-.BR select (2).
-.TP
-.BR ppoll ()
-Linux 2.6.16,
-glibc 2.4.
-.SH NOTES
-The operation of
-.BR poll ()
-and
-.BR ppoll ()
-is not affected by the
-.B O_NONBLOCK
-flag.
-.P
-For a discussion of what may happen if a file descriptor being monitored by
-.BR poll ()
-is closed in another thread, see
-.BR select (2).
-.SH BUGS
-See the discussion of spurious readiness notifications under the
-BUGS section of
-.BR select (2).
-.SH EXAMPLES
-The program below opens each of the files named in its command-line
-arguments and monitors the resulting file descriptors for readiness to read
-.RB ( POLLIN ).
-The program loops, repeatedly using
-.BR poll ()
-to monitor the file descriptors,
-printing the number of ready file descriptors on return.
-For each ready file descriptor, the program:
-.IP \[bu] 3
-displays the returned
-.I revents
-field in a human-readable form;
-.IP \[bu]
-if the file descriptor is readable, reads some data from it,
-and displays that data on standard output; and
-.IP \[bu]
-if the file descriptor was not readable,
-but some other event occurred (presumably
-.BR POLLHUP ),
-closes the file descriptor.
-.P
-Suppose we run the program in one terminal, asking it to open a FIFO:
-.P
-.in +4n
-.EX
-$ \fBmkfifo myfifo\fP
-$ \fB./poll_input myfifo\fP
-.EE
-.in
-.P
-In a second terminal window, we then open the FIFO for writing,
-write some data to it, and close the FIFO:
-.P
-.in +4n
-.EX
-$ \fBecho aaaaabbbbbccccc > myfifo\fP
-.EE
-.in
-.P
-In the terminal where we are running the program, we would then see:
-.P
-.in +4n
-.EX
-Opened "myfifo" on fd 3
-About to poll()
-Ready: 1
- fd=3; events: POLLIN POLLHUP
- read 10 bytes: aaaaabbbbb
-About to poll()
-Ready: 1
- fd=3; events: POLLIN POLLHUP
- read 6 bytes: ccccc
-\&
-About to poll()
-Ready: 1
- fd=3; events: POLLHUP
- closing fd 3
-All file descriptors closed; bye
-.EE
-.in
-.P
-In the above output, we see that
-.BR poll ()
-returned three times:
-.IP \[bu] 3
-On the first return, the bits returned in the
-.I revents
-field were
-.BR POLLIN ,
-indicating that the file descriptor is readable, and
-.BR POLLHUP ,
-indicating that the other end of the FIFO has been closed.
-The program then consumed some of the available input.
-.IP \[bu]
-The second return from
-.BR poll ()
-also indicated
-.B POLLIN
-and
-.BR POLLHUP ;
-the program then consumed the last of the available input.
-.IP \[bu]
-On the final return,
-.BR poll ()
-indicated only
-.B POLLHUP
-on the FIFO,
-at which point the file descriptor was closed and the program terminated.
-.\"
-.SS Program source
-\&
-.\" SRC BEGIN (poll_input.c)
-.EX
-/* poll_input.c
-\&
- Licensed under GNU General Public License v2 or later.
-*/
-#include <fcntl.h>
-#include <poll.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-\&
-#define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \e
- } while (0)
-\&
-int
-main(int argc, char *argv[])
-{
- int ready;
- char buf[10];
- nfds_t num_open_fds, nfds;
- ssize_t s;
- struct pollfd *pfds;
-\&
- if (argc < 2) {
- fprintf(stderr, "Usage: %s file...\en", argv[0]);
- exit(EXIT_FAILURE);
- }
-\&
- num_open_fds = nfds = argc \- 1;
- pfds = calloc(nfds, sizeof(struct pollfd));
- if (pfds == NULL)
- errExit("malloc");
-\&
- /* Open each file on command line, and add it to \[aq]pfds\[aq] array. */
-\&
- for (nfds_t j = 0; j < nfds; j++) {
- pfds[j].fd = open(argv[j + 1], O_RDONLY);
- if (pfds[j].fd == \-1)
- errExit("open");
-\&
- printf("Opened \e"%s\e" on fd %d\en", argv[j + 1], pfds[j].fd);
-\&
- pfds[j].events = POLLIN;
- }
-\&
- /* Keep calling poll() as long as at least one file descriptor is
- open. */
-\&
- while (num_open_fds > 0) {
- printf("About to poll()\en");
- ready = poll(pfds, nfds, \-1);
- if (ready == \-1)
- errExit("poll");
-\&
- printf("Ready: %d\en", ready);
-\&
- /* Deal with array returned by poll(). */
-\&
- for (nfds_t j = 0; j < nfds; j++) {
- if (pfds[j].revents != 0) {
- printf(" fd=%d; events: %s%s%s\en", pfds[j].fd,
- (pfds[j].revents & POLLIN) ? "POLLIN " : "",
- (pfds[j].revents & POLLHUP) ? "POLLHUP " : "",
- (pfds[j].revents & POLLERR) ? "POLLERR " : "");
-\&
- if (pfds[j].revents & POLLIN) {
- s = read(pfds[j].fd, buf, sizeof(buf));
- if (s == \-1)
- errExit("read");
- printf(" read %zd bytes: %.*s\en",
- s, (int) s, buf);
- } else { /* POLLERR | POLLHUP */
- printf(" closing fd %d\en", pfds[j].fd);
- if (close(pfds[j].fd) == \-1)
- errExit("close");
- num_open_fds\-\-;
- }
- }
- }
- }
-\&
- printf("All file descriptors closed; bye\en");
- exit(EXIT_SUCCESS);
-}
-.EE
-.\" SRC END
-.SH SEE ALSO
-.BR restart_syscall (2),
-.BR select (2),
-.BR select_tut (2),
-.BR timespec (3),
-.BR epoll (7),
-.BR time (7)
diff --git a/man2/posix_fadvise.2 b/man2/posix_fadvise.2
deleted file mode 100644
index 7984f7989..000000000
--- a/man2/posix_fadvise.2
+++ /dev/null
@@ -1,227 +0,0 @@
-.\" Copyright 2003 Abhijit Menon-Sen <ams@wiw.org>
-.\" and Copyright (C) 2010, 2015, 2017 Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" 2005-04-08 mtk, noted kernel version and added BUGS
-.\" 2010-10-09, mtk, document arm_fadvise64_64()
-.\"
-.TH posix_fadvise 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-posix_fadvise \- predeclare an access pattern for file data
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <fcntl.h>
-.P
-.BI "int posix_fadvise(int " fd ", off_t " offset ", off_t " len \
-", int " advice ");"
-.fi
-.P
-.ad l
-.RS -4
-Feature Test Macro Requirements for glibc (see
-.BR feature_test_macros (7)):
-.RE
-.P
-.BR posix_fadvise ():
-.nf
- _POSIX_C_SOURCE >= 200112L
-.fi
-.SH DESCRIPTION
-Programs can use
-.BR posix_fadvise ()
-to announce an intention to access
-file data in a specific pattern in the future, thus allowing the kernel
-to perform appropriate optimizations.
-.P
-The \fIadvice\fP applies to a (not necessarily existent) region starting
-at \fIoffset\fP and extending for \fIlen\fP bytes (or until the end of
-the file if \fIlen\fP is 0) within the file referred to by \fIfd\fP.
-The \fIadvice\fP is not binding;
-it merely constitutes an expectation on behalf of
-the application.
-.P
-Permissible values for \fIadvice\fP include:
-.TP
-.B POSIX_FADV_NORMAL
-Indicates that the application has no advice to give about its access
-pattern for the specified data.
-If no advice is given for an open file,
-this is the default assumption.
-.TP
-.B POSIX_FADV_SEQUENTIAL
-The application expects to access the specified data sequentially (with
-lower offsets read before higher ones).
-.TP
-.B POSIX_FADV_RANDOM
-The specified data will be accessed in random order.
-.TP
-.B POSIX_FADV_NOREUSE
-The specified data will be accessed only once.
-.IP
-Before Linux 2.6.18, \fBPOSIX_FADV_NOREUSE\fP had the
-same semantics as \fBPOSIX_FADV_WILLNEED\fP.
-This was probably a bug; since Linux 2.6.18, this flag is a no-op.
-.TP
-.B POSIX_FADV_WILLNEED
-The specified data will be accessed in the near future.
-.IP
-\fBPOSIX_FADV_WILLNEED\fP initiates a
-nonblocking read of the specified region into the page cache.
-The amount of data read may be decreased by the kernel depending
-on virtual memory load.
-(A few megabytes will usually be fully satisfied,
-and more is rarely useful.)
-.TP
-.B POSIX_FADV_DONTNEED
-The specified data will not be accessed in the near future.
-.IP
-\fBPOSIX_FADV_DONTNEED\fP attempts to free cached pages associated with
-the specified region.
-This is useful, for example, while streaming large
-files.
-A program may periodically request the kernel to free cached data
-that has already been used, so that more useful cached pages are not
-discarded instead.
-.IP
-Requests to discard partial pages are ignored.
-It is preferable to preserve needed data than discard unneeded data.
-If the application requires that data be considered for discarding, then
-.I offset
-and
-.I len
-must be page-aligned.
-.IP
-The implementation
-.I may
-attempt to write back dirty pages in the specified region,
-but this is not guaranteed.
-Any unwritten dirty pages will not be freed.
-If the application wishes to ensure that dirty pages will be released,
-it should call
-.BR fsync (2)
-or
-.BR fdatasync (2)
-first.
-.SH RETURN VALUE
-On success, zero is returned.
-On error, an error number is returned.
-.SH ERRORS
-.TP
-.B EBADF
-The \fIfd\fP argument was not a valid file descriptor.
-.TP
-.B EINVAL
-An invalid value was specified for \fIadvice\fP.
-.TP
-.B ESPIPE
-The specified file descriptor refers to a pipe or FIFO.
-.RB ( ESPIPE
-is the error specified by POSIX,
-but before Linux 2.6.16,
-.\" commit 87ba81dba431232548ce29d5d224115d0c2355ac
-Linux returned
-.B EINVAL
-in this case.)
-.SH VERSIONS
-Under Linux, \fBPOSIX_FADV_NORMAL\fP sets the readahead window to the
-default size for the backing device; \fBPOSIX_FADV_SEQUENTIAL\fP doubles
-this size, and \fBPOSIX_FADV_RANDOM\fP disables file readahead entirely.
-These changes affect the entire file, not just the specified region
-(but other open file handles to the same file are unaffected).
-.SS C library/kernel differences
-The name of the wrapper function in the C library is
-.BR posix_fadvise ().
-The underlying system call is called
-.BR fadvise64 ()
-(or, on some architectures,
-.BR fadvise64_64 ());
-the difference between the two is that the former system call
-assumes that the type of the \fIlen\fP argument is \fIsize_t\fP,
-while the latter expects \fIloff_t\fP there.
-.SS Architecture-specific variants
-Some architectures require
-64-bit arguments to be aligned in a suitable pair of registers (see
-.BR syscall (2)
-for further detail).
-On such architectures, the call signature of
-.BR posix_fadvise ()
-shown in the SYNOPSIS would force
-a register to be wasted as padding between the
-.I fd
-and
-.I offset
-arguments.
-Therefore, these architectures define a version of the
-system call that orders the arguments suitably,
-but is otherwise exactly the same as
-.BR posix_fadvise ().
-.P
-For example, since Linux 2.6.14, ARM has the following system call:
-.P
-.in +4n
-.EX
-.BI "long arm_fadvise64_64(int " fd ", int " advice ,
-.BI " loff_t " offset ", loff_t " len );
-.EE
-.in
-.P
-These architecture-specific details are generally
-hidden from applications by the glibc
-.BR posix_fadvise ()
-wrapper function,
-which invokes the appropriate architecture-specific system call.
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-POSIX.1-2001.
-.P
-Kernel support first appeared in Linux 2.5.60;
-the underlying system call is called
-.BR fadvise64 ().
-.\" of fadvise64_64()
-Library support has been provided since glibc 2.2,
-via the wrapper function
-.BR posix_fadvise ().
-.P
-Since Linux 3.18,
-.\" commit d3ac21cacc24790eb45d735769f35753f5b56ceb
-support for the underlying system call is optional,
-depending on the setting of the
-.B CONFIG_ADVISE_SYSCALLS
-configuration option.
-.P
-The type of the
-.I len
-argument was changed from
-.I size_t
-to
-.I off_t
-in POSIX.1-2001 TC1.
-.SH NOTES
-The contents of the kernel buffer cache can be cleared via the
-.I /proc/sys/vm/drop_caches
-interface described in
-.BR proc (5).
-.P
-One can obtain a snapshot of which pages of a file are resident
-in the buffer cache by opening a file, mapping it with
-.BR mmap (2),
-and then applying
-.BR mincore (2)
-to the mapping.
-.SH BUGS
-Before Linux 2.6.6, if
-.I len
-was specified as 0, then this was interpreted literally as "zero bytes",
-rather than as meaning "all bytes through to the end of the file".
-.SH SEE ALSO
-.BR fincore (1),
-.BR mincore (2),
-.BR readahead (2),
-.BR sync_file_range (2),
-.BR posix_fallocate (3),
-.BR posix_madvise (3)
diff --git a/man2/ppoll.2 b/man2/ppoll.2
deleted file mode 100644
index 227cd0e47..000000000
--- a/man2/ppoll.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/poll.2
diff --git a/man2/prctl.2 b/man2/prctl.2
deleted file mode 100644
index fad855832..000000000
--- a/man2/prctl.2
+++ /dev/null
@@ -1,2577 +0,0 @@
-.\" Copyright (C) 1998 Andries Brouwer (aeb@cwi.nl)
-.\" and Copyright (C) 2002, 2006, 2008, 2012, 2013, 2015 Michael Kerrisk <mtk.manpages@gmail.com>
-.\" and Copyright Guillem Jover <guillem@hadrons.org>
-.\" and Copyright (C) 2010 Andi Kleen <andi@firstfloor.org>
-.\" and Copyright (C) 2012 Cyrill Gorcunov <gorcunov@openvz.org>
-.\" and Copyright (C) 2014 Dave Hansen / Intel
-.\" and Copyright (c) 2016 Eugene Syromyatnikov <evgsyr@gmail.com>
-.\" and Copyright (c) 2018 Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
-.\" and Copyright (c) 2020 Dave Martin <Dave.Martin@arm.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Modified Thu Nov 11 04:19:42 MET 1999, aeb: added PR_GET_PDEATHSIG
-.\" Modified 27 Jun 02, Michael Kerrisk
-.\" Added PR_SET_DUMPABLE, PR_GET_DUMPABLE,
-.\" PR_SET_KEEPCAPS, PR_GET_KEEPCAPS
-.\" Modified 2006-08-30 Guillem Jover <guillem@hadrons.org>
-.\" Updated Linux versions where the options where introduced.
-.\" Added PR_SET_TIMING, PR_GET_TIMING, PR_SET_NAME, PR_GET_NAME,
-.\" PR_SET_UNALIGN, PR_GET_UNALIGN, PR_SET_FPEMU, PR_GET_FPEMU,
-.\" PR_SET_FPEXC, PR_GET_FPEXC
-.\" 2008-04-29 Serge Hallyn, Document PR_CAPBSET_READ and PR_CAPBSET_DROP
-.\" 2008-06-13 Erik Bosman, <ejbosman@cs.vu.nl>
-.\" Document PR_GET_TSC and PR_SET_TSC.
-.\" 2008-06-15 mtk, Document PR_SET_SECCOMP, PR_GET_SECCOMP
-.\" 2009-10-03 Andi Kleen, document PR_MCE_KILL
-.\" 2012-04 Cyrill Gorcunov, Document PR_SET_MM
-.\" 2012-04-25 Michael Kerrisk, Document PR_TASK_PERF_EVENTS_DISABLE and
-.\" PR_TASK_PERF_EVENTS_ENABLE
-.\" 2012-09-20 Kees Cook, update PR_SET_SECCOMP for mode 2
-.\" 2012-09-20 Kees Cook, document PR_SET_NO_NEW_PRIVS, PR_GET_NO_NEW_PRIVS
-.\" 2012-10-25 Michael Kerrisk, Document PR_SET_TIMERSLACK and
-.\" PR_GET_TIMERSLACK
-.\" 2013-01-10 Kees Cook, document PR_SET_PTRACER
-.\" 2012-02-04 Michael Kerrisk, document PR_{SET,GET}_CHILD_SUBREAPER
-.\" 2014-11-10 Dave Hansen, document PR_MPX_{EN,DIS}ABLE_MANAGEMENT
-.\"
-.\"
-.TH prctl 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-prctl \- operations on a process or thread
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/prctl.h>
-.P
-.BI "int prctl(int " op ", ..."
-.BI " \fR/*\fP unsigned long " arg2 ", unsigned long " arg3 ,
-.BI " unsigned long " arg4 ", unsigned long " arg5 " \fR*/\fP );"
-.fi
-.SH DESCRIPTION
-.BR prctl ()
-manipulates various aspects of the behavior
-of the calling thread or process.
-.P
-Note that careless use of some
-.BR prctl ()
-operations can confuse the user-space run-time environment,
-so these operations should be used with care.
-.P
-.BR prctl ()
-is called with a first argument describing what to do
-(with values defined in \fI<linux/prctl.h>\fP), and further
-arguments with a significance depending on the first one.
-The first argument can be:
-.\"
-.\" prctl PR_CAP_AMBIENT
-.TP
-.BR PR_CAP_AMBIENT " (since Linux 4.3)"
-.\" commit 58319057b7847667f0c9585b9de0e8932b0fdb08
-Reads or changes the ambient capability set of the calling thread,
-according to the value of
-.IR arg2 ,
-which must be one of the following:
-.RS
-.\"
-.TP
-.B PR_CAP_AMBIENT_RAISE
-The capability specified in
-.I arg3
-is added to the ambient set.
-The specified capability must already be present in
-both the permitted and the inheritable sets of the process.
-This operation is not permitted if the
-.B SECBIT_NO_CAP_AMBIENT_RAISE
-securebit is set.
-.TP
-.B PR_CAP_AMBIENT_LOWER
-The capability specified in
-.I arg3
-is removed from the ambient set.
-.TP
-.B PR_CAP_AMBIENT_IS_SET
-The
-.BR prctl ()
-call returns 1 if the capability in
-.I arg3
-is in the ambient set and 0 if it is not.
-.TP
-.B PR_CAP_AMBIENT_CLEAR_ALL
-All capabilities will be removed from the ambient set.
-This operation requires setting
-.I arg3
-to zero.
-.RE
-.IP
-In all of the above operations,
-.I arg4
-and
-.I arg5
-must be specified as 0.
-.IP
-Higher-level interfaces layered on top of the above operations are
-provided in the
-.BR libcap (3)
-library in the form of
-.BR cap_get_ambient (3),
-.BR cap_set_ambient (3),
-and
-.BR cap_reset_ambient (3).
-.\" prctl PR_CAPBSET_READ
-.TP
-.BR PR_CAPBSET_READ " (since Linux 2.6.25)"
-Return (as the function result) 1 if the capability specified in
-.I arg2
-is in the calling thread's capability bounding set,
-or 0 if it is not.
-(The capability constants are defined in
-.IR <linux/capability.h> .)
-The capability bounding set dictates
-whether the process can receive the capability through a
-file's permitted capability set on a subsequent call to
-.BR execve (2).
-.IP
-If the capability specified in
-.I arg2
-is not valid, then the call fails with the error
-.BR EINVAL .
-.IP
-A higher-level interface layered on top of this operation is provided in the
-.BR libcap (3)
-library in the form of
-.BR cap_get_bound (3).
-.\" prctl PR_CAPBSET_DROP
-.TP
-.BR PR_CAPBSET_DROP " (since Linux 2.6.25)"
-If the calling thread has the
-.B CAP_SETPCAP
-capability within its user namespace, then drop the capability specified by
-.I arg2
-from the calling thread's capability bounding set.
-Any children of the calling thread will inherit the newly
-reduced bounding set.
-.IP
-The call fails with the error:
-.B EPERM
-if the calling thread does not have the
-.BR CAP_SETPCAP ;
-.B EINVAL
-if
-.I arg2
-does not represent a valid capability; or
-.B EINVAL
-if file capabilities are not enabled in the kernel,
-in which case bounding sets are not supported.
-.IP
-A higher-level interface layered on top of this operation is provided in the
-.BR libcap (3)
-library in the form of
-.BR cap_drop_bound (3).
-.\" prctl PR_SET_CHILD_SUBREAPER
-.TP
-.BR PR_SET_CHILD_SUBREAPER " (since Linux 3.4)"
-.\" commit ebec18a6d3aa1e7d84aab16225e87fd25170ec2b
-If
-.I arg2
-is nonzero,
-set the "child subreaper" attribute of the calling process;
-if
-.I arg2
-is zero, unset the attribute.
-.IP
-A subreaper fulfills the role of
-.BR init (1)
-for its descendant processes.
-When a process becomes orphaned
-(i.e., its immediate parent terminates),
-then that process will be reparented to
-the nearest still living ancestor subreaper.
-Subsequently, calls to
-.BR getppid (2)
-in the orphaned process will now return the PID of the subreaper process,
-and when the orphan terminates, it is the subreaper process that
-will receive a
-.B SIGCHLD
-signal and will be able to
-.BR wait (2)
-on the process to discover its termination status.
-.IP
-The setting of the "child subreaper" attribute
-is not inherited by children created by
-.BR fork (2)
-and
-.BR clone (2).
-The setting is preserved across
-.BR execve (2).
-.IP
-Establishing a subreaper process is useful in session management frameworks
-where a hierarchical group of processes is managed by a subreaper process
-that needs to be informed when one of the processes\[em]for example,
-a double-forked daemon\[em]terminates
-(perhaps so that it can restart that process).
-Some
-.BR init (1)
-frameworks (e.g.,
-.BR systemd (1))
-employ a subreaper process for similar reasons.
-.\" prctl PR_GET_CHILD_SUBREAPER
-.TP
-.BR PR_GET_CHILD_SUBREAPER " (since Linux 3.4)"
-Return the "child subreaper" setting of the caller,
-in the location pointed to by
-.IR "(int\~*) arg2" .
-.\" prctl PR_SET_DUMPABLE
-.TP
-.BR PR_SET_DUMPABLE " (since Linux 2.3.20)"
-Set the state of the "dumpable" attribute,
-which determines whether core dumps are produced for the calling process
-upon delivery of a signal whose default behavior is to produce a core dump.
-.IP
-Up to and including Linux 2.6.12,
-.I arg2
-must be either 0
-.RB ( SUID_DUMP_DISABLE ,
-process is not dumpable) or 1
-.RB ( SUID_DUMP_USER ,
-process is dumpable).
-Between Linux 2.6.13 and Linux 2.6.17,
-.\" commit abf75a5033d4da7b8a7e92321d74021d1fcfb502
-the value 2 was also permitted,
-which caused any binary which normally would not be dumped
-to be dumped readable by root only;
-for security reasons, this feature has been removed.
-.\" See http://marc.theaimsgroup.com/?l=linux-kernel&m=115270289030630&w=2
-.\" Subject: Fix prctl privilege escalation (CVE-2006-2451)
-.\" From: Marcel Holtmann <marcel () holtmann ! org>
-.\" Date: 2006-07-12 11:12:00
-(See also the description of
-.I /proc/sys/fs/\:suid_dumpable
-in
-.BR proc (5).)
-.IP
-Normally, the "dumpable" attribute is set to 1.
-However, it is reset to the current value contained in the file
-.I /proc/sys/fs/\:suid_dumpable
-(which by default has the value 0),
-in the following circumstances:
-.\" See kernel/cred.c::commit_creds() (Linux 3.18 sources)
-.RS
-.IP \[bu] 3
-The process's effective user or group ID is changed.
-.IP \[bu]
-The process's filesystem user or group ID is changed (see
-.BR credentials (7)).
-.IP \[bu]
-The process executes
-.RB ( execve (2))
-a set-user-ID or set-group-ID program, resulting in a change
-of either the effective user ID or the effective group ID.
-.IP \[bu]
-The process executes
-.RB ( execve (2))
-a program that has file capabilities (see
-.BR capabilities (7)),
-.\" See kernel/cred.c::commit_creds()
-but only if the permitted capabilities
-gained exceed those already permitted for the process.
-.\" Also certain namespace operations;
-.RE
-.IP
-Processes that are not dumpable can not be attached via
-.BR ptrace (2)
-.BR PTRACE_ATTACH ;
-see
-.BR ptrace (2)
-for further details.
-.IP
-If a process is not dumpable,
-the ownership of files in the process's
-.IR /proc/ pid
-directory is affected as described in
-.BR proc (5).
-.\" prctl PR_GET_DUMPABLE
-.TP
-.BR PR_GET_DUMPABLE " (since Linux 2.3.20)"
-Return (as the function result) the current state of the calling
-process's dumpable attribute.
-.\" Since Linux 2.6.13, the dumpable flag can have the value 2,
-.\" but in Linux 2.6.13 PR_GET_DUMPABLE simply returns 1 if the dumpable
-.\" flags has a nonzero value. This was fixed in Linux 2.6.14.
-.\" prctl PR_SET_ENDIAN
-.TP
-.BR PR_SET_ENDIAN " (since Linux 2.6.18, PowerPC only)"
-Set the endian-ness of the calling process to the value given
-in \fIarg2\fP, which should be one of the following:
-.\" Respectively 0, 1, 2
-.BR PR_ENDIAN_BIG ,
-.BR PR_ENDIAN_LITTLE ,
-or
-.B PR_ENDIAN_PPC_LITTLE
-(PowerPC pseudo little endian).
-.\" prctl PR_GET_ENDIAN
-.TP
-.BR PR_GET_ENDIAN " (since Linux 2.6.18, PowerPC only)"
-Return the endian-ness of the calling process,
-in the location pointed to by
-.IR "(int\~*) arg2" .
-.\" prctl PR_SET_FP_MODE
-.TP
-.BR PR_SET_FP_MODE " (since Linux 4.0, only on MIPS)"
-.\" commit 9791554b45a2acc28247f66a5fd5bbc212a6b8c8
-On the MIPS architecture,
-user-space code can be built using an ABI which permits linking
-with code that has more restrictive floating-point (FP) requirements.
-For example, user-space code may be built to target the O32 FPXX ABI
-and linked with code built for either one of the more restrictive
-FP32 or FP64 ABIs.
-When more restrictive code is linked in,
-the overall requirement for the process is to use the more
-restrictive floating-point mode.
-.IP
-Because the kernel has no means of knowing in advance
-which mode the process should be executed in,
-and because these restrictions can
-change over the lifetime of the process, the
-.B PR_SET_FP_MODE
-operation is provided to allow control of the floating-point mode
-from user space.
-.IP
-.\" https://dmz-portal.mips.com/wiki/MIPS_O32_ABI_-_FR0_and_FR1_Interlinking
-The
-.I (unsigned int) arg2
-argument is a bit mask describing the floating-point mode used:
-.RS
-.TP
-.B PR_FP_MODE_FR
-When this bit is
-.I unset
-(so called
-.BR FR=0 " or " FR0
-mode), the 32 floating-point registers are 32 bits wide,
-and 64-bit registers are represented as a pair of registers
-(even- and odd- numbered,
-with the even-numbered register containing the lower 32 bits,
-and the odd-numbered register containing the higher 32 bits).
-.IP
-When this bit is
-.I set
-(on supported hardware),
-the 32 floating-point registers are 64 bits wide (so called
-.BR FR=1 " or " FR1
-mode).
-Note that modern MIPS implementations (MIPS R6 and newer) support
-.B FR=1
-mode only.
-.IP
-Applications that use the O32 FP32 ABI can operate only when this bit is
-.I unset
-.RB ( FR=0 ;
-or they can be used with FRE enabled, see below).
-Applications that use the O32 FP64 ABI
-(and the O32 FP64A ABI, which exists to
-provide the ability to operate with existing FP32 code; see below)
-can operate only when this bit is
-.I set
-.RB ( FR=1 ).
-Applications that use the O32 FPXX ABI can operate with either
-.B FR=0
-or
-.BR FR=1 .
-.TP
-.B PR_FP_MODE_FRE
-Enable emulation of 32-bit floating-point mode.
-When this mode is enabled,
-it emulates 32-bit floating-point operations
-by raising a reserved-instruction exception
-on every instruction that uses 32-bit formats and
-the kernel then handles the instruction in software.
-(The problem lies in the discrepancy of handling odd-numbered registers
-which are the high 32 bits of 64-bit registers with even numbers in
-.B FR=0
-mode and the lower 32-bit parts of odd-numbered 64-bit registers in
-.B FR=1
-mode.)
-Enabling this bit is necessary when code with the O32 FP32 ABI should operate
-with code with compatible the O32 FPXX or O32 FP64A ABIs (which require
-.B FR=1
-FPU mode) or when it is executed on newer hardware (MIPS R6 onwards)
-which lacks
-.B FR=0
-mode support when a binary with the FP32 ABI is used.
-.IP
-Note that this mode makes sense only when the FPU is in 64-bit mode
-.RB ( FR=1 ).
-.IP
-Note that the use of emulation inherently has a significant performance hit
-and should be avoided if possible.
-.RE
-.IP
-In the N32/N64 ABI, 64-bit floating-point mode is always used,
-so FPU emulation is not required and the FPU always operates in
-.B FR=1
-mode.
-.IP
-This operation is mainly intended for use by the dynamic linker
-.RB ( ld.so (8)).
-.IP
-The arguments
-.IR arg3 ,
-.IR arg4 ,
-and
-.I arg5
-are ignored.
-.\" prctl PR_GET_FP_MODE
-.TP
-.BR PR_GET_FP_MODE " (since Linux 4.0, only on MIPS)"
-Return (as the function result)
-the current floating-point mode (see the description of
-.B PR_SET_FP_MODE
-for details).
-.IP
-On success,
-the call returns a bit mask which represents the current floating-point mode.
-.IP
-The arguments
-.IR arg2 ,
-.IR arg3 ,
-.IR arg4 ,
-and
-.I arg5
-are ignored.
-.\" prctl PR_SET_FPEMU
-.TP
-.BR PR_SET_FPEMU " (since Linux 2.4.18, 2.5.9, only on ia64)"
-Set floating-point emulation control bits to \fIarg2\fP.
-Pass
-.B PR_FPEMU_NOPRINT
-to silently emulate floating-point operation accesses, or
-.B PR_FPEMU_SIGFPE
-to not emulate floating-point operations and send
-.B SIGFPE
-instead.
-.\" prctl PR_GET_FPEMU
-.TP
-.BR PR_GET_FPEMU " (since Linux 2.4.18, 2.5.9, only on ia64)"
-Return floating-point emulation control bits,
-in the location pointed to by
-.IR "(int\~*) arg2" .
-.\" prctl PR_SET_FPEXC
-.TP
-.BR PR_SET_FPEXC " (since Linux 2.4.21, 2.5.32, only on PowerPC)"
-Set floating-point exception mode to \fIarg2\fP.
-Pass \fBPR_FP_EXC_SW_ENABLE\fP to use FPEXC for FP exception enables,
-\fBPR_FP_EXC_DIV\fP for floating-point divide by zero,
-\fBPR_FP_EXC_OVF\fP for floating-point overflow,
-\fBPR_FP_EXC_UND\fP for floating-point underflow,
-\fBPR_FP_EXC_RES\fP for floating-point inexact result,
-\fBPR_FP_EXC_INV\fP for floating-point invalid operation,
-\fBPR_FP_EXC_DISABLED\fP for FP exceptions disabled,
-\fBPR_FP_EXC_NONRECOV\fP for async nonrecoverable exception mode,
-\fBPR_FP_EXC_ASYNC\fP for async recoverable exception mode,
-\fBPR_FP_EXC_PRECISE\fP for precise exception mode.
-.\" prctl PR_GET_FPEXC
-.TP
-.BR PR_GET_FPEXC " (since Linux 2.4.21, 2.5.32, only on PowerPC)"
-Return floating-point exception mode,
-in the location pointed to by
-.IR "(int\~*) arg2" .
-.\" prctl PR_SET_IO_FLUSHER
-.TP
-.BR PR_SET_IO_FLUSHER " (since Linux 5.6)"
-If a user process is involved in the block layer or filesystem I/O path,
-and can allocate memory while processing I/O requests it must set
-\fIarg2\fP to 1.
-This will put the process in the IO_FLUSHER state,
-which allows it special treatment to make progress when allocating memory.
-If \fIarg2\fP is 0, the process will clear the IO_FLUSHER state, and
-the default behavior will be used.
-.IP
-The calling process must have the
-.B CAP_SYS_RESOURCE
-capability.
-.IP
-.IR arg3 ,
-.IR arg4 ,
-and
-.I arg5
-must be zero.
-.IP
-The IO_FLUSHER state is inherited by a child process created via
-.BR fork (2)
-and is preserved across
-.BR execve (2).
-.IP
-Examples of IO_FLUSHER applications are FUSE daemons, SCSI device
-emulation daemons, and daemons that perform error handling like multipath
-path recovery applications.
-.\" prctl PR_GET_IO_FLUSHER
-.TP
-.B PR_GET_IO_FLUSHER (Since Linux 5.6)
-Return (as the function result) the IO_FLUSHER state of the caller.
-A value of 1 indicates that the caller is in the IO_FLUSHER state;
-0 indicates that the caller is not in the IO_FLUSHER state.
-.IP
-The calling process must have the
-.B CAP_SYS_RESOURCE
-capability.
-.IP
-.IR arg2 ,
-.IR arg3 ,
-.IR arg4 ,
-and
-.I arg5
-must be zero.
-.\" prctl PR_SET_KEEPCAPS
-.TP
-.BR PR_SET_KEEPCAPS " (since Linux 2.2.18)"
-Set the state of the calling thread's "keep capabilities" flag.
-The effect of this flag is described in
-.BR capabilities (7).
-.I arg2
-must be either 0 (clear the flag)
-or 1 (set the flag).
-The "keep capabilities" value will be reset to 0 on subsequent calls to
-.BR execve (2).
-.\" prctl PR_GET_KEEPCAPS
-.TP
-.BR PR_GET_KEEPCAPS " (since Linux 2.2.18)"
-Return (as the function result) the current state of the calling thread's
-"keep capabilities" flag.
-See
-.BR capabilities (7)
-for a description of this flag.
-.\" prctl PR_MCE_KILL
-.TP
-.BR PR_MCE_KILL " (since Linux 2.6.32)"
-Set the machine check memory corruption kill policy for the calling thread.
-If
-.I arg2
-is
-.BR PR_MCE_KILL_CLEAR ,
-clear the thread memory corruption kill policy and use the system-wide default.
-(The system-wide default is defined by
-.IR /proc/sys/vm/memory_failure_early_kill ;
-see
-.BR proc (5).)
-If
-.I arg2
-is
-.BR PR_MCE_KILL_SET ,
-use a thread-specific memory corruption kill policy.
-In this case,
-.I arg3
-defines whether the policy is
-.I early kill
-.RB ( PR_MCE_KILL_EARLY ),
-.I late kill
-.RB ( PR_MCE_KILL_LATE ),
-or the system-wide default
-.RB ( PR_MCE_KILL_DEFAULT ).
-Early kill means that the thread receives a
-.B SIGBUS
-signal as soon as hardware memory corruption is detected inside
-its address space.
-In late kill mode, the process is killed only when it accesses a corrupted page.
-See
-.BR sigaction (2)
-for more information on the
-.B SIGBUS
-signal.
-The policy is inherited by children.
-The remaining unused
-.BR prctl ()
-arguments must be zero for future compatibility.
-.\" prctl PR_MCE_KILL_GET
-.TP
-.BR PR_MCE_KILL_GET " (since Linux 2.6.32)"
-Return (as the function result)
-the current per-process machine check kill policy.
-All unused
-.BR prctl ()
-arguments must be zero.
-.\" prctl PR_SET_MM
-.TP
-.BR PR_SET_MM " (since Linux 3.3)"
-.\" commit 028ee4be34a09a6d48bdf30ab991ae933a7bc036
-Modify certain kernel memory map descriptor fields
-of the calling process.
-Usually these fields are set by the kernel and dynamic loader (see
-.BR ld.so (8)
-for more information) and a regular application should not use this feature.
-However, there are cases, such as self-modifying programs,
-where a program might find it useful to change its own memory map.
-.IP
-The calling process must have the
-.B CAP_SYS_RESOURCE
-capability.
-The value in
-.I arg2
-is one of the options below, while
-.I arg3
-provides a new value for the option.
-The
-.I arg4
-and
-.I arg5
-arguments must be zero if unused.
-.IP
-Before Linux 3.10,
-.\" commit 52b3694157e3aa6df871e283115652ec6f2d31e0
-this feature is available only if the kernel is built with the
-.B CONFIG_CHECKPOINT_RESTORE
-option enabled.
-.RS
-.TP
-.B PR_SET_MM_START_CODE
-Set the address above which the program text can run.
-The corresponding memory area must be readable and executable,
-but not writable or shareable (see
-.BR mprotect (2)
-and
-.BR mmap (2)
-for more information).
-.TP
-.B PR_SET_MM_END_CODE
-Set the address below which the program text can run.
-The corresponding memory area must be readable and executable,
-but not writable or shareable.
-.TP
-.B PR_SET_MM_START_DATA
-Set the address above which initialized and
-uninitialized (bss) data are placed.
-The corresponding memory area must be readable and writable,
-but not executable or shareable.
-.TP
-.B PR_SET_MM_END_DATA
-Set the address below which initialized and
-uninitialized (bss) data are placed.
-The corresponding memory area must be readable and writable,
-but not executable or shareable.
-.TP
-.B PR_SET_MM_START_STACK
-Set the start address of the stack.
-The corresponding memory area must be readable and writable.
-.TP
-.B PR_SET_MM_START_BRK
-Set the address above which the program heap can be expanded with
-.BR brk (2)
-call.
-The address must be greater than the ending address of
-the current program data segment.
-In addition, the combined size of the resulting heap and
-the size of the data segment can't exceed the
-.B RLIMIT_DATA
-resource limit (see
-.BR setrlimit (2)).
-.TP
-.B PR_SET_MM_BRK
-Set the current
-.BR brk (2)
-value.
-The requirements for the address are the same as for the
-.B PR_SET_MM_START_BRK
-option.
-.P
-The following options are available since Linux 3.5.
-.\" commit fe8c7f5cbf91124987106faa3bdf0c8b955c4cf7
-.TP
-.B PR_SET_MM_ARG_START
-Set the address above which the program command line is placed.
-.TP
-.B PR_SET_MM_ARG_END
-Set the address below which the program command line is placed.
-.TP
-.B PR_SET_MM_ENV_START
-Set the address above which the program environment is placed.
-.TP
-.B PR_SET_MM_ENV_END
-Set the address below which the program environment is placed.
-.IP
-The address passed with
-.BR PR_SET_MM_ARG_START ,
-.BR PR_SET_MM_ARG_END ,
-.BR PR_SET_MM_ENV_START ,
-and
-.B PR_SET_MM_ENV_END
-should belong to a process stack area.
-Thus, the corresponding memory area must be readable, writable, and
-(depending on the kernel configuration) have the
-.B MAP_GROWSDOWN
-attribute set (see
-.BR mmap (2)).
-.TP
-.B PR_SET_MM_AUXV
-Set a new auxiliary vector.
-The
-.I arg3
-argument should provide the address of the vector.
-The
-.I arg4
-is the size of the vector.
-.TP
-.B PR_SET_MM_EXE_FILE
-.\" commit b32dfe377102ce668775f8b6b1461f7ad428f8b6
-Supersede the
-.IR /proc/ pid /exe
-symbolic link with a new one pointing to a new executable file
-identified by the file descriptor provided in
-.I arg3
-argument.
-The file descriptor should be obtained with a regular
-.BR open (2)
-call.
-.IP
-To change the symbolic link, one needs to unmap all existing
-executable memory areas, including those created by the kernel itself
-(for example the kernel usually creates at least one executable
-memory area for the ELF
-.I .text
-section).
-.IP
-In Linux 4.9 and earlier, the
-.\" commit 3fb4afd9a504c2386b8435028d43283216bf588e
-.B PR_SET_MM_EXE_FILE
-operation can be performed only once in a process's lifetime;
-attempting to perform the operation a second time results in the error
-.BR EPERM .
-This restriction was enforced for security reasons that were subsequently
-deemed specious,
-and the restriction was removed in Linux 4.10 because some
-user-space applications needed to perform this operation more than once.
-.P
-The following options are available since Linux 3.18.
-.\" commit f606b77f1a9e362451aca8f81d8f36a3a112139e
-.TP
-.B PR_SET_MM_MAP
-Provides one-shot access to all the addresses by passing in a
-.I struct prctl_mm_map
-(as defined in \fI<linux/prctl.h>\fP).
-The
-.I arg4
-argument should provide the size of the struct.
-.IP
-This feature is available only if the kernel is built with the
-.B CONFIG_CHECKPOINT_RESTORE
-option enabled.
-.TP
-.B PR_SET_MM_MAP_SIZE
-Returns the size of the
-.I struct prctl_mm_map
-the kernel expects.
-This allows user space to find a compatible struct.
-The
-.I arg4
-argument should be a pointer to an unsigned int.
-.IP
-This feature is available only if the kernel is built with the
-.B CONFIG_CHECKPOINT_RESTORE
-option enabled.
-.RE
-.\" prctl PR_SET_VMA
-.TP
-.BR PR_SET_VMA " (since Linux 5.17)"
-.\" Commit 9a10064f5625d5572c3626c1516e0bebc6c9fe9b
-Sets an attribute specified in
-.I arg2
-for virtual memory areas starting from the address specified in
-.I arg3
-and spanning the size specified in
-.IR arg4 .
-.I arg5
-specifies the value of the attribute to be set.
-.IP
-Note that assigning an attribute to a virtual memory area
-might prevent it from being merged with adjacent virtual memory areas
-due to the difference in that attribute's value.
-.IP
-Currently,
-.I arg2
-must be one of:
-.RS
-.TP
-.B PR_SET_VMA_ANON_NAME
-Set a name for anonymous virtual memory areas.
-.I arg5
-should be a pointer to a null-terminated string containing the name.
-The name length including null byte cannot exceed 80 bytes.
-If
-.I arg5
-is NULL, the name of the appropriate anonymous virtual memory areas
-will be reset.
-The name can contain only printable ascii characters (including space),
-except \[aq][\[aq], \[aq]]\[aq], \[aq]\e\[aq], \[aq]$\[aq], and \[aq]\[ga]\[aq].
-.RE
-.\" prctl PR_MPX_ENABLE_MANAGEMENT
-.TP
-.B PR_MPX_ENABLE_MANAGEMENT
-.TQ
-.BR PR_MPX_DISABLE_MANAGEMENT " (since Linux 3.19, removed in Linux 5.4; only on x86)"
-.\" commit fe3d197f84319d3bce379a9c0dc17b1f48ad358c
-.\" See also http://lwn.net/Articles/582712/
-.\" See also https://gcc.gnu.org/wiki/Intel%20MPX%20support%20in%20the%20GCC%20compiler
-Enable or disable kernel management of Memory Protection eXtensions (MPX)
-bounds tables.
-The
-.IR arg2 ,
-.IR arg3 ,
-.IR arg4 ,
-and
-.I arg5
-.\" commit e9d1b4f3c60997fe197bf0243cb4a41a44387a88
-arguments must be zero.
-.IP
-MPX is a hardware-assisted mechanism for performing bounds checking on
-pointers.
-It consists of a set of registers storing bounds information
-and a set of special instruction prefixes that tell the CPU on which
-instructions it should do bounds enforcement.
-There is a limited number of these registers and
-when there are more pointers than registers,
-their contents must be "spilled" into a set of tables.
-These tables are called "bounds tables" and the MPX
-.BR prctl ()
-operations control
-whether the kernel manages their allocation and freeing.
-.IP
-When management is enabled, the kernel will take over allocation
-and freeing of the bounds tables.
-It does this by trapping the #BR exceptions that result
-at first use of missing bounds tables and
-instead of delivering the exception to user space,
-it allocates the table and populates the bounds directory
-with the location of the new table.
-For freeing, the kernel checks to see if bounds tables are
-present for memory which is not allocated, and frees them if so.
-.IP
-Before enabling MPX management using
-.BR PR_MPX_ENABLE_MANAGEMENT ,
-the application must first have allocated a user-space buffer for
-the bounds directory and placed the location of that directory in the
-.I bndcfgu
-register.
-.IP
-These calls fail if the CPU or kernel does not support MPX.
-Kernel support for MPX is enabled via the
-.B CONFIG_X86_INTEL_MPX
-configuration option.
-You can check whether the CPU supports MPX by looking for the
-.I mpx
-CPUID bit, like with the following command:
-.IP
-.in +4n
-.EX
-cat /proc/cpuinfo | grep \[aq] mpx \[aq]
-.EE
-.in
-.IP
-A thread may not switch in or out of long (64-bit) mode while MPX is
-enabled.
-.IP
-All threads in a process are affected by these calls.
-.IP
-The child of a
-.BR fork (2)
-inherits the state of MPX management.
-During
-.BR execve (2),
-MPX management is reset to a state as if
-.B PR_MPX_DISABLE_MANAGEMENT
-had been called.
-.IP
-For further information on Intel MPX, see the kernel source file
-.IR Documentation/x86/intel_mpx.txt .
-.IP
-.\" commit f240652b6032b48ad7fa35c5e701cc4c8d697c0b
-.\" See also https://lkml.kernel.org/r/20190705175321.DB42F0AD@viggo.jf.intel.com
-Due to a lack of toolchain support,
-.BR PR_MPX_ENABLE_MANAGEMENT " and " PR_MPX_DISABLE_MANAGEMENT
-are not supported in Linux 5.4 and later.
-.\" prctl PR_SET_NAME
-.TP
-.BR PR_SET_NAME " (since Linux 2.6.9)"
-Set the name of the calling thread,
-using the value in the location pointed to by
-.IR "(char\~*) arg2" .
-The name can be up to 16 bytes long,
-.\" TASK_COMM_LEN in include/linux/sched.h
-including the terminating null byte.
-(If the length of the string, including the terminating null byte,
-exceeds 16 bytes, the string is silently truncated.)
-This is the same attribute that can be set via
-.BR pthread_setname_np (3)
-and retrieved using
-.BR pthread_getname_np (3).
-The attribute is likewise accessible via
-.IR /proc/self/task/ tid /comm
-(see
-.BR proc (5)),
-where
-.I tid
-is the thread ID of the calling thread, as returned by
-.BR gettid (2).
-.\" prctl PR_GET_NAME
-.TP
-.BR PR_GET_NAME " (since Linux 2.6.11)"
-Return the name of the calling thread,
-in the buffer pointed to by
-.IR "(char\~*) arg2" .
-The buffer should allow space for up to 16 bytes;
-the returned string will be null-terminated.
-.\" prctl PR_SET_NO_NEW_PRIVS
-.TP
-.BR PR_SET_NO_NEW_PRIVS " (since Linux 3.5)"
-Set the calling thread's
-.I no_new_privs
-attribute to the value in
-.IR arg2 .
-With
-.I no_new_privs
-set to 1,
-.BR execve (2)
-promises not to grant privileges to do anything
-that could not have been done without the
-.BR execve (2)
-call (for example,
-rendering the set-user-ID and set-group-ID mode bits,
-and file capabilities non-functional).
-Once set, the
-.I no_new_privs
-attribute cannot be unset.
-The setting of this attribute is inherited by children created by
-.BR fork (2)
-and
-.BR clone (2),
-and preserved across
-.BR execve (2).
-.IP
-Since Linux 4.10,
-the value of a thread's
-.I no_new_privs
-attribute can be viewed via the
-.I NoNewPrivs
-field in the
-.IR /proc/ pid /status
-file.
-.IP
-For more information, see the kernel source file
-.I Documentation/userspace\-api/no_new_privs.rst
-.\" commit 40fde647ccb0ae8c11d256d271e24d385eed595b
-(or
-.I Documentation/prctl/no_new_privs.txt
-before Linux 4.13).
-See also
-.BR seccomp (2).
-.\" prctl PR_GET_NO_NEW_PRIVS
-.TP
-.BR PR_GET_NO_NEW_PRIVS " (since Linux 3.5)"
-Return (as the function result) the value of the
-.I no_new_privs
-attribute for the calling thread.
-A value of 0 indicates the regular
-.BR execve (2)
-behavior.
-A value of 1 indicates
-.BR execve (2)
-will operate in the privilege-restricting mode described above.
-.\" prctl PR_PAC_RESET_KEYS
-.\" commit ba830885656414101b2f8ca88786524d4bb5e8c1
-.TP
-.BR PR_PAC_RESET_KEYS " (since Linux 5.0, only on arm64)"
-Securely reset the thread's pointer authentication keys
-to fresh random values generated by the kernel.
-.IP
-The set of keys to be reset is specified by
-.IR arg2 ,
-which must be a logical OR of zero or more of the following:
-.RS
-.TP
-.B PR_PAC_APIAKEY
-instruction authentication key A
-.TP
-.B PR_PAC_APIBKEY
-instruction authentication key B
-.TP
-.B PR_PAC_APDAKEY
-data authentication key A
-.TP
-.B PR_PAC_APDBKEY
-data authentication key B
-.TP
-.B PR_PAC_APGAKEY
-generic authentication \[lq]A\[rq] key.
-.IP
-(Yes folks, there really is no generic B key.)
-.RE
-.IP
-As a special case, if
-.I arg2
-is zero, then all the keys are reset.
-Since new keys could be added in future,
-this is the recommended way to completely wipe the existing keys
-when establishing a clean execution context.
-Note that there is no need to use
-.B PR_PAC_RESET_KEYS
-in preparation for calling
-.BR execve (2),
-since
-.BR execve (2)
-resets all the pointer authentication keys.
-.IP
-The remaining arguments
-.IR arg3 ", " arg4 ", and " arg5
-must all be zero.
-.IP
-If the arguments are invalid,
-and in particular if
-.I arg2
-contains set bits that are unrecognized
-or that correspond to a key not available on this platform,
-then the call fails with error
-.BR EINVAL .
-.IP
-.B Warning:
-Because the compiler or run-time environment
-may be using some or all of the keys,
-a successful
-.B PR_PAC_RESET_KEYS
-may crash the calling process.
-The conditions for using it safely are complex and system-dependent.
-Don't use it unless you know what you are doing.
-.IP
-For more information, see the kernel source file
-.I Documentation/arm64/pointer\-authentication.rst
-.\"commit b693d0b372afb39432e1c49ad7b3454855bc6bed
-(or
-.I Documentation/arm64/pointer\-authentication.txt
-before Linux 5.3).
-.\" prctl PR_SET_PDEATHSIG
-.TP
-.BR PR_SET_PDEATHSIG " (since Linux 2.1.57)"
-Set the parent-death signal
-of the calling process to \fIarg2\fP (either a signal value
-in the range
-.RB [ 1 ,
-.IR NSIG\~\-\~1 ],
-or
-.B 0
-to clear).
-This is the signal that the calling process will get when its
-parent dies.
-.IP
-.IR Warning :
-.\" https://bugzilla.kernel.org/show_bug.cgi?id=43300
-the "parent" in this case is considered to be the
-.I thread
-that created this process.
-In other words, the signal will be sent when that thread terminates
-(via, for example,
-.BR pthread_exit (3)),
-rather than after all of the threads in the parent process terminate.
-.IP
-The parent-death signal is sent upon subsequent termination of the parent
-thread and also upon termination of each subreaper process
-(see the description of
-.B PR_SET_CHILD_SUBREAPER
-above) to which the caller is subsequently reparented.
-If the parent thread and all ancestor subreapers have already terminated
-by the time of the
-.B PR_SET_PDEATHSIG
-operation, then no parent-death signal is sent to the caller.
-.IP
-The parent-death signal is process-directed (see
-.BR signal (7))
-and, if the child installs a handler using the
-.BR sigaction (2)
-.B SA_SIGINFO
-flag, the
-.I si_pid
-field of the
-.I siginfo_t
-argument of the handler contains the PID of the terminating parent process.
-.IP
-The parent-death signal setting is cleared for the child of a
-.BR fork (2).
-It is also
-(since Linux 2.4.36 / 2.6.23)
-.\" commit d2d56c5f51028cb9f3d800882eb6f4cbd3f9099f
-cleared when executing a set-user-ID or set-group-ID binary,
-or a binary that has associated capabilities (see
-.BR capabilities (7));
-otherwise, this value is preserved across
-.BR execve (2).
-The parent-death signal setting is also cleared upon changes to
-any of the following thread credentials:
-.\" FIXME capability changes can also trigger this; see
-.\" kernel/cred.c::commit_creds in the Linux 5.6 source.
-effective user ID, effective group ID, filesystem user ID,
-or filesystem group ID.
-.\" prctl PR_GET_PDEATHSIG
-.TP
-.BR PR_GET_PDEATHSIG " (since Linux 2.3.15)"
-Return the current value of the parent process death signal,
-in the location pointed to by
-.IR "(int\~*) arg2" .
-.\" prctl PR_SET_PTRACER
-.TP
-.BR PR_SET_PTRACER " (since Linux 3.4)"
-.\" commit 2d514487faf188938a4ee4fb3464eeecfbdcf8eb
-.\" commit bf06189e4d14641c0148bea16e9dd24943862215
-This is meaningful only when the Yama LSM is enabled and in mode 1
-("restricted ptrace", visible via
-.IR /proc/sys/kernel/yama/ptrace_scope ).
-When a "ptracer process ID" is passed in \fIarg2\fP,
-the caller is declaring that the ptracer process can
-.BR ptrace (2)
-the calling process as if it were a direct process ancestor.
-Each
-.B PR_SET_PTRACER
-operation replaces the previous "ptracer process ID".
-Employing
-.B PR_SET_PTRACER
-with
-.I arg2
-set to 0 clears the caller's "ptracer process ID".
-If
-.I arg2
-is
-.BR PR_SET_PTRACER_ANY ,
-the ptrace restrictions introduced by Yama are effectively disabled for the
-calling process.
-.IP
-For further information, see the kernel source file
-.I Documentation/admin\-guide/LSM/Yama.rst
-.\" commit 90bb766440f2147486a2acc3e793d7b8348b0c22
-(or
-.I Documentation/security/Yama.txt
-before Linux 4.13).
-.\" prctl PR_SET_SECCOMP
-.TP
-.BR PR_SET_SECCOMP " (since Linux 2.6.23)"
-.\" See http://thread.gmane.org/gmane.linux.kernel/542632
-.\" [PATCH 0 of 2] seccomp updates
-.\" andrea@cpushare.com
-Set the secure computing (seccomp) mode for the calling thread, to limit
-the available system calls.
-The more recent
-.BR seccomp (2)
-system call provides a superset of the functionality of
-.BR PR_SET_SECCOMP ,
-and is the preferred interface for new applications.
-.IP
-The seccomp mode is selected via
-.IR arg2 .
-(The seccomp constants are defined in
-.IR <linux/seccomp.h> .)
-The following values can be specified:
-.RS
-.TP
-.BR SECCOMP_MODE_STRICT " (since Linux 2.6.23)"
-See the description of
-.B SECCOMP_SET_MODE_STRICT
-in
-.BR seccomp (2).
-.IP
-This operation is available only
-if the kernel is configured with
-.B CONFIG_SECCOMP
-enabled.
-.TP
-.BR SECCOMP_MODE_FILTER " (since Linux 3.5)"
-The allowed system calls are defined by a pointer
-to a Berkeley Packet Filter passed in
-.IR arg3 .
-This argument is a pointer to
-.IR "struct sock_fprog" ;
-it can be designed to filter
-arbitrary system calls and system call arguments.
-See the description of
-.B SECCOMP_SET_MODE_FILTER
-in
-.BR seccomp (2).
-.IP
-This operation is available only
-if the kernel is configured with
-.B CONFIG_SECCOMP_FILTER
-enabled.
-.RE
-.IP
-For further details on seccomp filtering, see
-.BR seccomp (2).
-.\" prctl PR_GET_SECCOMP
-.TP
-.BR PR_GET_SECCOMP " (since Linux 2.6.23)"
-Return (as the function result)
-the secure computing mode of the calling thread.
-If the caller is not in secure computing mode, this operation returns 0;
-if the caller is in strict secure computing mode, then the
-.BR prctl ()
-call will cause a
-.B SIGKILL
-signal to be sent to the process.
-If the caller is in filter mode, and this system call is allowed by the
-seccomp filters, it returns 2; otherwise, the process is killed with a
-.B SIGKILL
-signal.
-.IP
-This operation is available only
-if the kernel is configured with
-.B CONFIG_SECCOMP
-enabled.
-.IP
-Since Linux 3.8, the
-.I Seccomp
-field of the
-.IR /proc/ pid /status
-file provides a method of obtaining the same information,
-without the risk that the process is killed; see
-.BR proc (5).
-.\" prctl PR_SET_SECUREBITS
-.TP
-.BR PR_SET_SECUREBITS " (since Linux 2.6.26)"
-Set the "securebits" flags of the calling thread to the value supplied in
-.IR arg2 .
-See
-.BR capabilities (7).
-.\" prctl PR_GET_SECUREBITS
-.TP
-.BR PR_GET_SECUREBITS " (since Linux 2.6.26)"
-Return (as the function result)
-the "securebits" flags of the calling thread.
-See
-.BR capabilities (7).
-.\" prctl PR_GET_SPECULATION_CTRL
-.TP
-.BR PR_GET_SPECULATION_CTRL " (since Linux 4.17)"
-Return (as the function result)
-the state of the speculation misfeature specified in
-.IR arg2 .
-Currently, the only permitted value for this argument is
-.B PR_SPEC_STORE_BYPASS
-(otherwise the call fails with the error
-.BR ENODEV ).
-.IP
-The return value uses bits 0-3 with the following meaning:
-.RS
-.TP
-.B PR_SPEC_PRCTL
-Mitigation can be controlled per thread by
-.BR PR_SET_SPECULATION_CTRL .
-.TP
-.B PR_SPEC_ENABLE
-The speculation feature is enabled, mitigation is disabled.
-.TP
-.B PR_SPEC_DISABLE
-The speculation feature is disabled, mitigation is enabled.
-.TP
-.B PR_SPEC_FORCE_DISABLE
-Same as
-.B PR_SPEC_DISABLE
-but cannot be undone.
-.TP
-.BR PR_SPEC_DISABLE_NOEXEC " (since Linux 5.1)"
-Same as
-.BR PR_SPEC_DISABLE ,
-but the state will be cleared on
-.BR execve (2).
-.RE
-.IP
-If all bits are 0,
-then the CPU is not affected by the speculation misfeature.
-.IP
-If
-.B PR_SPEC_PRCTL
-is set, then per-thread control of the mitigation is available.
-If not set,
-.BR prctl ()
-for the speculation misfeature will fail.
-.IP
-The
-.IR arg3 ,
-.IR arg4 ,
-and
-.I arg5
-arguments must be specified as 0; otherwise the call fails with the error
-.BR EINVAL .
-.\" prctl PR_SET_SPECULATION_CTRL
-.TP
-.BR PR_SET_SPECULATION_CTRL " (since Linux 4.17)"
-.\" commit b617cfc858161140d69cc0b5cc211996b557a1c7
-.\" commit 356e4bfff2c5489e016fdb925adbf12a1e3950ee
-Sets the state of the speculation misfeature specified in
-.IR arg2 .
-The speculation-misfeature settings are per-thread attributes.
-.IP
-Currently,
-.I arg2
-must be one of:
-.RS
-.TP
-.B PR_SPEC_STORE_BYPASS
-Set the state of the speculative store bypass misfeature.
-.\" commit 9137bb27e60e554dab694eafa4cca241fa3a694f
-.TP
-.BR PR_SPEC_INDIRECT_BRANCH " (since Linux 4.20)"
-Set the state of the indirect branch speculation misfeature.
-.RE
-.IP
-If
-.I arg2
-does not have one of the above values,
-then the call fails with the error
-.BR ENODEV .
-.IP
-The
-.I arg3
-argument is used to hand in the control value,
-which is one of the following:
-.RS
-.TP
-.B PR_SPEC_ENABLE
-The speculation feature is enabled, mitigation is disabled.
-.TP
-.B PR_SPEC_DISABLE
-The speculation feature is disabled, mitigation is enabled.
-.TP
-.B PR_SPEC_FORCE_DISABLE
-Same as
-.BR PR_SPEC_DISABLE ,
-but cannot be undone.
-A subsequent
-.BR prctl (\c
-.IR arg2 ,
-.BR PR_SPEC_ENABLE )
-with the same value for
-.I arg2
-will fail with the error
-.BR EPERM .
-.\" commit 71368af9027f18fe5d1c6f372cfdff7e4bde8b48
-.TP
-.BR PR_SPEC_DISABLE_NOEXEC " (since Linux 5.1)"
-Same as
-.BR PR_SPEC_DISABLE ,
-but the state will be cleared on
-.BR execve (2).
-Currently only supported for
-.I arg2
-equal to
-.B PR_SPEC_STORE_BYPASS.
-.RE
-.IP
-Any unsupported value in
-.I arg3
-will result in the call failing with the error
-.BR ERANGE .
-.IP
-The
-.I arg4
-and
-.I arg5
-arguments must be specified as 0; otherwise the call fails with the error
-.BR EINVAL .
-.IP
-The speculation feature can also be controlled by the
-.B spec_store_bypass_disable
-boot parameter.
-This parameter may enforce a read-only policy which will result in the
-.BR prctl ()
-call failing with the error
-.BR ENXIO .
-For further details, see the kernel source file
-.IR Documentation/admin\-guide/kernel\-parameters.txt .
-.\" prctl PR_SVE_SET_VL
-.\" commit 2d2123bc7c7f843aa9db87720de159a049839862
-.\" linux-5.6/Documentation/arm64/sve.rst
-.TP
-.BR PR_SVE_SET_VL " (since Linux 4.15, only on arm64)"
-Configure the thread's SVE vector length,
-as specified by
-.IR "(int) arg2" .
-Arguments
-.IR arg3 ,
-.IR arg4 ,
-and
-.I arg5
-are ignored.
-.IP
-The bits of
-.I arg2
-corresponding to
-.B PR_SVE_VL_LEN_MASK
-must be set to the desired vector length in bytes.
-This is interpreted as an upper bound:
-the kernel will select the greatest available vector length
-that does not exceed the value specified.
-In particular, specifying
-.B SVE_VL_MAX
-(defined in
-.I <asm/sigcontext.h>)
-for the
-.B PR_SVE_VL_LEN_MASK
-bits requests the maximum supported vector length.
-.IP
-In addition, the other bits of
-.I arg2
-must be set to one of the following combinations of flags:
-.RS
-.TP
-.B 0
-Perform the change immediately.
-At the next
-.BR execve (2)
-in the thread,
-the vector length will be reset to the value configured in
-.IR /proc/sys/abi/sve_default_vector_length .
-.TP
-.B PR_SVE_VL_INHERIT
-Perform the change immediately.
-Subsequent
-.BR execve (2)
-calls will preserve the new vector length.
-.TP
-.B PR_SVE_SET_VL_ONEXEC
-Defer the change, so that it is performed at the next
-.BR execve (2)
-in the thread.
-Further
-.BR execve (2)
-calls will reset the vector length to the value configured in
-.IR /proc/sys/abi/sve_default_vector_length .
-.TP
-.B "PR_SVE_SET_VL_ONEXEC | PR_SVE_VL_INHERIT"
-Defer the change, so that it is performed at the next
-.BR execve (2)
-in the thread.
-Further
-.BR execve (2)
-calls will preserve the new vector length.
-.RE
-.IP
-In all cases,
-any previously pending deferred change is canceled.
-.IP
-The call fails with error
-.B EINVAL
-if SVE is not supported on the platform, if
-.I arg2
-is unrecognized or invalid, or the value in the bits of
-.I arg2
-corresponding to
-.B PR_SVE_VL_LEN_MASK
-is outside the range
-.BR SVE_VL_MIN .. SVE_VL_MAX
-or is not a multiple of 16.
-.IP
-On success,
-a nonnegative value is returned that describes the
-.I selected
-configuration.
-If
-.B PR_SVE_SET_VL_ONEXEC
-was included in
-.IR arg2 ,
-then the configuration described by the return value
-will take effect at the next
-.BR execve (2).
-Otherwise, the configuration is already in effect when the
-.B PR_SVE_SET_VL
-call returns.
-In either case, the value is encoded in the same way as the return value of
-.BR PR_SVE_GET_VL .
-Note that there is no explicit flag in the return value
-corresponding to
-.BR PR_SVE_SET_VL_ONEXEC .
-.IP
-The configuration (including any pending deferred change)
-is inherited across
-.BR fork (2)
-and
-.BR clone (2).
-.IP
-For more information, see the kernel source file
-.I Documentation/arm64/sve.rst
-.\"commit b693d0b372afb39432e1c49ad7b3454855bc6bed
-(or
-.I Documentation/arm64/sve.txt
-before Linux 5.3).
-.IP
-.B Warning:
-Because the compiler or run-time environment
-may be using SVE, using this call without the
-.B PR_SVE_SET_VL_ONEXEC
-flag may crash the calling process.
-The conditions for using it safely are complex and system-dependent.
-Don't use it unless you really know what you are doing.
-.\" prctl PR_SVE_GET_VL
-.TP
-.BR PR_SVE_GET_VL " (since Linux 4.15, only on arm64)"
-Get the thread's current SVE vector length configuration.
-.IP
-Arguments
-.IR arg2 ", " arg3 ", " arg4 ", and " arg5
-are ignored.
-.IP
-Provided that the kernel and platform support SVE,
-this operation always succeeds,
-returning a nonnegative value that describes the
-.I current
-configuration.
-The bits corresponding to
-.B PR_SVE_VL_LEN_MASK
-contain the currently configured vector length in bytes.
-The bit corresponding to
-.B PR_SVE_VL_INHERIT
-indicates whether the vector length will be inherited
-across
-.BR execve (2).
-.IP
-Note that there is no way to determine whether there is
-a pending vector length change that has not yet taken effect.
-.IP
-For more information, see the kernel source file
-.I Documentation/arm64/sve.rst
-.\"commit b693d0b372afb39432e1c49ad7b3454855bc6bed
-(or
-.I Documentation/arm64/sve.txt
-before Linux 5.3).
-.TP
-.\" prctl PR_SET_SYSCALL_USER_DISPATCH
-.\" commit 1446e1df9eb183fdf81c3f0715402f1d7595d4
-.BR PR_SET_SYSCALL_USER_DISPATCH " (since Linux 5.11, x86 only)"
-Configure the Syscall User Dispatch mechanism
-for the calling thread.
-This mechanism allows an application
-to selectively intercept system calls
-so that they can be handled within the application itself.
-Interception takes the form of a thread-directed
-.B SIGSYS
-signal that is delivered to the thread
-when it makes a system call.
-If intercepted,
-the system call is not executed by the kernel.
-.IP
-To enable this mechanism,
-.I arg2
-should be set to
-.BR PR_SYS_DISPATCH_ON .
-Once enabled, further system calls will be selectively intercepted,
-depending on a control variable provided by user space.
-In this case,
-.I arg3
-and
-.I arg4
-respectively identify the
-.I offset
-and
-.I length
-of a single contiguous memory region in the process address space
-from where system calls are always allowed to be executed,
-regardless of the control variable.
-(Typically, this area would include the area of memory
-containing the C library.)
-.IP
-.I arg5
-points to a char-sized variable
-that is a fast switch to allow/block system call execution
-without the overhead of doing another system call
-to re-configure Syscall User Dispatch.
-This control variable can either be set to
-.B SYSCALL_DISPATCH_FILTER_BLOCK
-to block system calls from executing
-or to
-.B SYSCALL_DISPATCH_FILTER_ALLOW
-to temporarily allow them to be executed.
-This value is checked by the kernel
-on every system call entry,
-and any unexpected value will raise
-an uncatchable
-.B SIGSYS
-at that time,
-killing the application.
-.IP
-When a system call is intercepted,
-the kernel sends a thread-directed
-.B SIGSYS
-signal to the triggering thread.
-Various fields will be set in the
-.I siginfo_t
-structure (see
-.BR sigaction (2))
-associated with the signal:
-.RS
-.IP \[bu] 3
-.I si_signo
-will contain
-.BR SIGSYS .
-.IP \[bu]
-.I si_call_addr
-will show the address of the system call instruction.
-.IP \[bu]
-.I si_syscall
-and
-.I si_arch
-will indicate which system call was attempted.
-.IP \[bu]
-.I si_code
-will contain
-.BR SYS_USER_DISPATCH .
-.IP \[bu]
-.I si_errno
-will be set to 0.
-.RE
-.IP
-The program counter will be as though the system call happened
-(i.e., the program counter will not point to the system call instruction).
-.IP
-When the signal handler returns to the kernel,
-the system call completes immediately
-and returns to the calling thread,
-without actually being executed.
-If necessary
-(i.e., when emulating the system call on user space.),
-the signal handler should set the system call return value
-to a sane value,
-by modifying the register context stored in the
-.I ucontext
-argument of the signal handler.
-See
-.BR sigaction (2),
-.BR sigreturn (2),
-and
-.BR getcontext (3)
-for more information.
-.IP
-If
-.I arg2
-is set to
-.BR PR_SYS_DISPATCH_OFF ,
-Syscall User Dispatch is disabled for that thread.
-the remaining arguments must be set to 0.
-.IP
-The setting is not preserved across
-.BR fork (2),
-.BR clone (2),
-or
-.BR execve (2).
-.IP
-For more information,
-see the kernel source file
-.I Documentation/admin\-guide/syscall\-user\-dispatch.rst
-.\" prctl PR_SET_TAGGED_ADDR_CTRL
-.\" commit 63f0c60379650d82250f22e4cf4137ef3dc4f43d
-.TP
-.BR PR_SET_TAGGED_ADDR_CTRL " (since Linux 5.4, only on arm64)"
-Controls support for passing tagged user-space addresses to the kernel
-(i.e., addresses where bits 56\[em]63 are not all zero).
-.IP
-The level of support is selected by
-.IR "arg2" ,
-which can be one of the following:
-.RS
-.TP
-.B 0
-Addresses that are passed
-for the purpose of being dereferenced by the kernel
-must be untagged.
-.TP
-.B PR_TAGGED_ADDR_ENABLE
-Addresses that are passed
-for the purpose of being dereferenced by the kernel
-may be tagged, with the exceptions summarized below.
-.RE
-.IP
-The remaining arguments
-.IR arg3 ", " arg4 ", and " arg5
-must all be zero.
-.\" Enforcement added in
-.\" commit 3e91ec89f527b9870fe42dcbdb74fd389d123a95
-.IP
-On success, the mode specified in
-.I arg2
-is set for the calling thread and the return value is 0.
-If the arguments are invalid,
-the mode specified in
-.I arg2
-is unrecognized,
-or if this feature is unsupported by the kernel
-or disabled via
-.IR /proc/sys/abi/tagged_addr_disabled ,
-the call fails with the error
-.BR EINVAL .
-.IP
-In particular, if
-.BR prctl ( PR_SET_TAGGED_ADDR_CTRL ,
-0, 0, 0, 0)
-fails with
-.BR EINVAL ,
-then all addresses passed to the kernel must be untagged.
-.IP
-Irrespective of which mode is set,
-addresses passed to certain interfaces
-must always be untagged:
-.RS
-.IP \[bu] 3
-.BR brk (2),
-.BR mmap (2),
-.BR shmat (2),
-.BR shmdt (2),
-and the
-.I new_address
-argument of
-.BR mremap (2).
-.IP
-(Prior to Linux 5.6 these accepted tagged addresses,
-but the behaviour may not be what you expect.
-Don't rely on it.)
-.IP \[bu]
-\[oq]polymorphic\[cq] interfaces
-that accept pointers to arbitrary types cast to a
-.I void *
-or other generic type, specifically
-.BR prctl (),
-.BR ioctl (2),
-and in general
-.BR setsockopt (2)
-(only certain specific
-.BR setsockopt (2)
-options allow tagged addresses).
-.RE
-.IP
-This list of exclusions may shrink
-when moving from one kernel version to a later kernel version.
-While the kernel may make some guarantees
-for backwards compatibility reasons,
-for the purposes of new software
-the effect of passing tagged addresses to these interfaces
-is unspecified.
-.IP
-The mode set by this call is inherited across
-.BR fork (2)
-and
-.BR clone (2).
-The mode is reset by
-.BR execve (2)
-to 0
-(i.e., tagged addresses not permitted in the user/kernel ABI).
-.IP
-For more information, see the kernel source file
-.IR Documentation/arm64/tagged\-address\-abi.rst .
-.IP
-.B Warning:
-This call is primarily intended for use by the run-time environment.
-A successful
-.B PR_SET_TAGGED_ADDR_CTRL
-call elsewhere may crash the calling process.
-The conditions for using it safely are complex and system-dependent.
-Don't use it unless you know what you are doing.
-.\" prctl PR_GET_TAGGED_ADDR_CTRL
-.\" commit 63f0c60379650d82250f22e4cf4137ef3dc4f43d
-.TP
-.BR PR_GET_TAGGED_ADDR_CTRL " (since Linux 5.4, only on arm64)"
-Returns the current tagged address mode
-for the calling thread.
-.IP
-Arguments
-.IR arg2 ", " arg3 ", " arg4 ", and " arg5
-must all be zero.
-.IP
-If the arguments are invalid
-or this feature is disabled or unsupported by the kernel,
-the call fails with
-.BR EINVAL .
-In particular, if
-.BR prctl ( PR_GET_TAGGED_ADDR_CTRL ,
-0, 0, 0, 0)
-fails with
-.BR EINVAL ,
-then this feature is definitely either unsupported,
-or disabled via
-.IR /proc/sys/abi/tagged_addr_disabled .
-In this case,
-all addresses passed to the kernel must be untagged.
-.IP
-Otherwise, the call returns a nonnegative value
-describing the current tagged address mode,
-encoded in the same way as the
-.I arg2
-argument of
-.BR PR_SET_TAGGED_ADDR_CTRL .
-.IP
-For more information, see the kernel source file
-.IR Documentation/arm64/tagged\-address\-abi.rst .
-.\"
-.\" prctl PR_TASK_PERF_EVENTS_DISABLE
-.TP
-.BR PR_TASK_PERF_EVENTS_DISABLE " (since Linux 2.6.31)"
-Disable all performance counters attached to the calling process,
-regardless of whether the counters were created by
-this process or another process.
-Performance counters created by the calling process for other
-processes are unaffected.
-For more information on performance counters, see the Linux kernel source file
-.IR tools/perf/design.txt .
-.IP
-Originally called
-.BR PR_TASK_PERF_COUNTERS_DISABLE ;
-.\" commit 1d1c7ddbfab358445a542715551301b7fc363e28
-renamed (retaining the same numerical value)
-in Linux 2.6.32.
-.\"
-.\" prctl PR_TASK_PERF_EVENTS_ENABLE
-.TP
-.BR PR_TASK_PERF_EVENTS_ENABLE " (since Linux 2.6.31)"
-The converse of
-.BR PR_TASK_PERF_EVENTS_DISABLE ;
-enable performance counters attached to the calling process.
-.IP
-Originally called
-.BR PR_TASK_PERF_COUNTERS_ENABLE ;
-.\" commit 1d1c7ddbfab358445a542715551301b7fc363e28
-renamed
-.\" commit cdd6c482c9ff9c55475ee7392ec8f672eddb7be6
-in Linux 2.6.32.
-.\"
-.\" prctl PR_SET_THP_DISABLE
-.TP
-.BR PR_SET_THP_DISABLE " (since Linux 3.15)"
-.\" commit a0715cc22601e8830ace98366c0c2bd8da52af52
-Set the state of the "THP disable" flag for the calling thread.
-If
-.I arg2
-has a nonzero value, the flag is set, otherwise it is cleared.
-Setting this flag provides a method
-for disabling transparent huge pages
-for jobs where the code cannot be modified, and using a malloc hook with
-.BR madvise (2)
-is not an option (i.e., statically allocated data).
-The setting of the "THP disable" flag is inherited by a child created via
-.BR fork (2)
-and is preserved across
-.BR execve (2).
-.\" prctl PR_GET_THP_DISABLE
-.TP
-.BR PR_GET_THP_DISABLE " (since Linux 3.15)"
-Return (as the function result) the current setting of the "THP disable"
-flag for the calling thread:
-either 1, if the flag is set, or 0, if it is not.
-.\" prctl PR_GET_TID_ADDRESS
-.TP
-.BR PR_GET_TID_ADDRESS " (since Linux 3.5)"
-.\" commit 300f786b2683f8bb1ec0afb6e1851183a479c86d
-Return the
-.I clear_child_tid
-address set by
-.BR set_tid_address (2)
-and the
-.BR clone (2)
-.B CLONE_CHILD_CLEARTID
-flag, in the location pointed to by
-.IR "(int\~**)\~arg2" .
-This feature is available only if the kernel is built with the
-.B CONFIG_CHECKPOINT_RESTORE
-option enabled.
-Note that since the
-.BR prctl ()
-system call does not have a compat implementation for
-the AMD64 x32 and MIPS n32 ABIs,
-and the kernel writes out a pointer using the kernel's pointer size,
-this operation expects a user-space buffer of 8 (not 4) bytes on these ABIs.
-.\" prctl PR_SET_TIMERSLACK
-.TP
-.BR PR_SET_TIMERSLACK " (since Linux 2.6.28)"
-.\" See https://lwn.net/Articles/369549/
-.\" commit 6976675d94042fbd446231d1bd8b7de71a980ada
-Each thread has two associated timer slack values:
-a "default" value, and a "current" value.
-This operation sets the "current" timer slack value for the calling thread.
-.I arg2
-is an unsigned long value, then maximum "current" value is ULONG_MAX and
-the minimum "current" value is 1.
-If the nanosecond value supplied in
-.I arg2
-is greater than zero, then the "current" value is set to this value.
-If
-.I arg2
-is equal to zero,
-the "current" timer slack is reset to the
-thread's "default" timer slack value.
-.IP
-The "current" timer slack is used by the kernel to group timer expirations
-for the calling thread that are close to one another;
-as a consequence, timer expirations for the thread may be
-up to the specified number of nanoseconds late (but will never expire early).
-Grouping timer expirations can help reduce system power consumption
-by minimizing CPU wake-ups.
-.IP
-The timer expirations affected by timer slack are those set by
-.BR select (2),
-.BR pselect (2),
-.BR poll (2),
-.BR ppoll (2),
-.BR epoll_wait (2),
-.BR epoll_pwait (2),
-.BR clock_nanosleep (2),
-.BR nanosleep (2),
-and
-.BR futex (2)
-(and thus the library functions implemented via futexes, including
-.\" List obtained by grepping for futex usage in glibc source
-.BR pthread_cond_timedwait (3),
-.BR pthread_mutex_timedlock (3),
-.BR pthread_rwlock_timedrdlock (3),
-.BR pthread_rwlock_timedwrlock (3),
-and
-.BR sem_timedwait (3)).
-.IP
-Timer slack is not applied to threads that are scheduled under
-a real-time scheduling policy (see
-.BR sched_setscheduler (2)).
-.IP
-When a new thread is created,
-the two timer slack values are made the same as the "current" value
-of the creating thread.
-Thereafter, a thread can adjust its "current" timer slack value via
-.BR PR_SET_TIMERSLACK .
-The "default" value can't be changed.
-The timer slack values of
-.I init
-(PID 1), the ancestor of all processes,
-are 50,000 nanoseconds (50 microseconds).
-The timer slack value is inherited by a child created via
-.BR fork (2),
-and is preserved across
-.BR execve (2).
-.IP
-Since Linux 4.6, the "current" timer slack value of any process
-can be examined and changed via the file
-.IR /proc/ pid /timerslack_ns .
-See
-.BR proc (5).
-.\" prctl PR_GET_TIMERSLACK
-.TP
-.BR PR_GET_TIMERSLACK " (since Linux 2.6.28)"
-Return (as the function result)
-the "current" timer slack value of the calling thread.
-.\" prctl PR_SET_TIMING
-.TP
-.BR PR_SET_TIMING " (since Linux 2.6.0)"
-.\" Precisely: Linux 2.6.0-test4
-Set whether to use (normal, traditional) statistical process timing or
-accurate timestamp-based process timing, by passing
-.B PR_TIMING_STATISTICAL
-.\" 0
-or
-.B PR_TIMING_TIMESTAMP
-.\" 1
-to \fIarg2\fP.
-.B PR_TIMING_TIMESTAMP
-is not currently implemented
-(attempting to set this mode will yield the error
-.BR EINVAL ).
-.\" PR_TIMING_TIMESTAMP doesn't do anything in Linux 2.6.26-rc8,
-.\" and looking at the patch history, it appears
-.\" that it never did anything.
-.\" prctl PR_GET_TIMING
-.TP
-.BR PR_GET_TIMING " (since Linux 2.6.0)"
-.\" Precisely: Linux 2.6.0-test4
-Return (as the function result) which process timing method is currently
-in use.
-.\" prctl PR_SET_TSC
-.TP
-.BR PR_SET_TSC " (since Linux 2.6.26, x86 only)"
-Set the state of the flag determining whether the timestamp counter
-can be read by the process.
-Pass
-.B PR_TSC_ENABLE
-to
-.I arg2
-to allow it to be read, or
-.B PR_TSC_SIGSEGV
-to generate a
-.B SIGSEGV
-when the process tries to read the timestamp counter.
-.\" prctl PR_GET_TSC
-.TP
-.BR PR_GET_TSC " (since Linux 2.6.26, x86 only)"
-Return the state of the flag determining whether the timestamp counter
-can be read,
-in the location pointed to by
-.IR "(int\~*) arg2" .
-.\" prctl PR_SET_UNALIGN
-.TP
-.B PR_SET_UNALIGN
-(Only on: ia64, since Linux 2.3.48; parisc, since Linux 2.6.15;
-PowerPC, since Linux 2.6.18; Alpha, since Linux 2.6.22;
-.\" sh: 94ea5e449ae834af058ef005d16a8ad44fcf13d6
-.\" tile: 2f9ac29eec71a696cb0dcc5fb82c0f8d4dac28c9
-sh, since Linux 2.6.34; tile, since Linux 3.12)
-Set unaligned access control bits to \fIarg2\fP.
-Pass
-\fBPR_UNALIGN_NOPRINT\fP to silently fix up unaligned user accesses,
-or \fBPR_UNALIGN_SIGBUS\fP to generate
-.B SIGBUS
-on unaligned user access.
-Alpha also supports an additional flag with the value
-of 4 and no corresponding named constant,
-which instructs kernel to not fix up
-unaligned accesses (it is analogous to providing the
-.B UAC_NOFIX
-flag in
-.B SSI_NVPAIRS
-operation of the
-.BR setsysinfo ()
-system call on Tru64).
-.\" prctl PR_GET_UNALIGN
-.TP
-.B PR_GET_UNALIGN
-(See
-.B PR_SET_UNALIGN
-for information on versions and architectures.)
-Return unaligned access control bits, in the location pointed to by
-.IR "(unsigned int\~*) arg2" .
-.\" prctl PR_GET_AUXV
-.TP
-.BR PR_GET_AUXV " (since Linux 6.4)"
-Get the auxiliary vector (auxv) into the buffer pointed to by
-.IR "(void\~*) arg2" ,
-whose length is given by \fIarg3\fP.
-If the buffer is not long enough for the full auxiliary vector,
-the copy will be truncated.
-Return (as the function result)
-the full length of the auxiliary vector.
-\fIarg4\fP and \fIarg5\fP must be 0.
-.TP
-.BR PR_SET_MDWE " (since Linux 6.3)"
-.\" commit b507808ebce23561d4ff8c2aa1fb949fe402bc61
-Set the calling process' Memory-Deny-Write-Execute protection mask.
-Once protection bits are set,
-they can not be changed.
-.I arg2
-must be a bit mask of:
-.RS
-.TP
-.B PR_MDWE_REFUSE_EXEC_GAIN
-New memory mapping protections can't be writable and executable.
-Non-executable mappings can't become executable.
-.TP
-.B PR_MDWE_NO_INHERIT " (since Linux 6.6)"
-.\" commit 2a87e5520554034e8c423479740f95bea4a086a0
-Do not propagate MDWE protection to child processes on
-.BR fork (2).
-Setting this bit requires setting
-.B PR_MDWE_REFUSE_EXEC_GAIN
-too.
-.RE
-.TP
-.BR PR_GET_MDWE " (since Linux 6.3)"
-.\" commit b507808ebce23561d4ff8c2aa1fb949fe402bc61
-Return (as the function result) the Memory-Deny-Write-Execute protection mask
-of the calling process.
-(See
-.B PR_SET_MDWE
-for information on the protection mask bits.)
-.SH RETURN VALUE
-On success,
-.BR PR_CAP_AMBIENT + PR_CAP_AMBIENT_IS_SET ,
-.BR PR_CAPBSET_READ ,
-.BR PR_GET_DUMPABLE ,
-.BR PR_GET_FP_MODE ,
-.BR PR_GET_IO_FLUSHER ,
-.BR PR_GET_KEEPCAPS ,
-.BR PR_MCE_KILL_GET ,
-.BR PR_GET_NO_NEW_PRIVS ,
-.BR PR_GET_SECUREBITS ,
-.BR PR_GET_SPECULATION_CTRL ,
-.BR PR_SVE_GET_VL ,
-.BR PR_SVE_SET_VL ,
-.BR PR_GET_TAGGED_ADDR_CTRL ,
-.BR PR_GET_THP_DISABLE ,
-.BR PR_GET_TIMING ,
-.BR PR_GET_TIMERSLACK ,
-.BR PR_GET_AUXV ,
-and (if it returns)
-.B PR_GET_SECCOMP
-return the nonnegative values described above.
-All other
-.I op
-values return 0 on success.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EACCES
-.I op
-is
-.B PR_SET_SECCOMP
-and
-.I arg2
-is
-.BR SECCOMP_MODE_FILTER ,
-but the process does not have the
-.B CAP_SYS_ADMIN
-capability or has not set the
-.I no_new_privs
-attribute (see the discussion of
-.B PR_SET_NO_NEW_PRIVS
-above).
-.TP
-.B EACCES
-.I op
-is
-.BR PR_SET_MM ,
-and
-.I arg3
-is
-.BR PR_SET_MM_EXE_FILE ,
-the file is not executable.
-.TP
-.B EBADF
-.I op
-is
-.BR PR_SET_MM ,
-.I arg3
-is
-.BR PR_SET_MM_EXE_FILE ,
-and the file descriptor passed in
-.I arg4
-is not valid.
-.TP
-.B EBUSY
-.I op
-is
-.BR PR_SET_MM ,
-.I arg3
-is
-.BR PR_SET_MM_EXE_FILE ,
-and this the second attempt to change the
-.IR /proc/ pid /exe
-symbolic link, which is prohibited.
-.TP
-.B EFAULT
-.I arg2
-is an invalid address.
-.TP
-.B EFAULT
-.I op
-is
-.BR PR_SET_SECCOMP ,
-.I arg2
-is
-.BR SECCOMP_MODE_FILTER ,
-the system was built with
-.BR CONFIG_SECCOMP_FILTER ,
-and
-.I arg3
-is an invalid address.
-.TP
-.B EFAULT
-.I op
-is
-.B PR_SET_SYSCALL_USER_DISPATCH
-and
-.I arg5
-has an invalid address.
-.TP
-.B EINVAL
-The value of
-.I op
-is not recognized,
-or not supported on this system.
-.TP
-.B EINVAL
-.I op
-is
-.B PR_MCE_KILL
-or
-.B PR_MCE_KILL_GET
-or
-.BR PR_SET_MM ,
-and unused
-.BR prctl ()
-arguments were not specified as zero.
-.TP
-.B EINVAL
-.I arg2
-is not valid value for this
-.IR op .
-.TP
-.B EINVAL
-.I op
-is
-.B PR_SET_SECCOMP
-or
-.BR PR_GET_SECCOMP ,
-and the kernel was not configured with
-.BR CONFIG_SECCOMP .
-.TP
-.B EINVAL
-.I op
-is
-.BR PR_SET_SECCOMP ,
-.I arg2
-is
-.BR SECCOMP_MODE_FILTER ,
-and the kernel was not configured with
-.BR CONFIG_SECCOMP_FILTER .
-.TP
-.B EINVAL
-.I op
-is
-.BR PR_SET_MM ,
-and one of the following is true
-.RS
-.IP \[bu] 3
-.I arg4
-or
-.I arg5
-is nonzero;
-.IP \[bu]
-.I arg3
-is greater than
-.B TASK_SIZE
-(the limit on the size of the user address space for this architecture);
-.IP \[bu]
-.I arg2
-is
-.BR PR_SET_MM_START_CODE ,
-.BR PR_SET_MM_END_CODE ,
-.BR PR_SET_MM_START_DATA ,
-.BR PR_SET_MM_END_DATA ,
-or
-.BR PR_SET_MM_START_STACK ,
-and the permissions of the corresponding memory area are not as required;
-.IP \[bu]
-.I arg2
-is
-.B PR_SET_MM_START_BRK
-or
-.BR PR_SET_MM_BRK ,
-and
-.I arg3
-is less than or equal to the end of the data segment
-or specifies a value that would cause the
-.B RLIMIT_DATA
-resource limit to be exceeded.
-.RE
-.TP
-.B EINVAL
-.I op
-is
-.B PR_SET_PTRACER
-and
-.I arg2
-is not 0,
-.BR PR_SET_PTRACER_ANY ,
-or the PID of an existing process.
-.TP
-.B EINVAL
-.I op
-is
-.B PR_SET_PDEATHSIG
-and
-.I arg2
-is not a valid signal number.
-.TP
-.B EINVAL
-.I op
-is
-.B PR_SET_DUMPABLE
-and
-.I arg2
-is neither
-.B SUID_DUMP_DISABLE
-nor
-.BR SUID_DUMP_USER .
-.TP
-.B EINVAL
-.I op
-is
-.B PR_SET_TIMING
-and
-.I arg2
-is not
-.BR PR_TIMING_STATISTICAL .
-.TP
-.B EINVAL
-.I op
-is
-.B PR_SET_NO_NEW_PRIVS
-and
-.I arg2
-is not equal to 1
-or
-.IR arg3 ,
-.IR arg4 ,
-or
-.I arg5
-is nonzero.
-.TP
-.B EINVAL
-.I op
-is
-.B PR_GET_NO_NEW_PRIVS
-and
-.IR arg2 ,
-.IR arg3 ,
-.IR arg4 ,
-or
-.I arg5
-is nonzero.
-.TP
-.B EINVAL
-.I op
-is
-.B PR_SET_THP_DISABLE
-and
-.IR arg3 ,
-.IR arg4 ,
-or
-.I arg5
-is nonzero.
-.TP
-.B EINVAL
-.I op
-is
-.B PR_GET_THP_DISABLE
-and
-.IR arg2 ,
-.IR arg3 ,
-.IR arg4 ,
-or
-.I arg5
-is nonzero.
-.TP
-.B EINVAL
-.I op
-is
-.B PR_CAP_AMBIENT
-and an unused argument
-.RI ( arg4 ,
-.IR arg5 ,
-or,
-in the case of
-.BR PR_CAP_AMBIENT_CLEAR_ALL ,
-.IR arg3 )
-is nonzero; or
-.I arg2
-has an invalid value;
-or
-.I arg2
-is
-.BR PR_CAP_AMBIENT_LOWER ,
-.BR PR_CAP_AMBIENT_RAISE ,
-or
-.B PR_CAP_AMBIENT_IS_SET
-and
-.I arg3
-does not specify a valid capability.
-.TP
-.B EINVAL
-.I op
-was
-.B PR_GET_SPECULATION_CTRL
-or
-.B PR_SET_SPECULATION_CTRL
-and unused arguments to
-.BR prctl ()
-are not 0.
-.TP
-.B EINVAL
-.I op
-is
-.B PR_PAC_RESET_KEYS
-and the arguments are invalid or unsupported.
-See the description of
-.B PR_PAC_RESET_KEYS
-above for details.
-.TP
-.B EINVAL
-.I op
-is
-.B PR_SVE_SET_VL
-and the arguments are invalid or unsupported,
-or SVE is not available on this platform.
-See the description of
-.B PR_SVE_SET_VL
-above for details.
-.TP
-.B EINVAL
-.I op
-is
-.B PR_SVE_GET_VL
-and SVE is not available on this platform.
-.TP
-.B EINVAL
-.I op
-is
-.B PR_SET_SYSCALL_USER_DISPATCH
-and one of the following is true:
-.RS
-.IP \[bu] 3
-.I arg2
-is
-.B PR_SYS_DISPATCH_OFF
-and the remaining arguments are not 0;
-.IP \[bu]
-.I arg2
-is
-.B PR_SYS_DISPATCH_ON
-and the memory range specified is outside the
-address space of the process.
-.IP \[bu]
-.I arg2
-is invalid.
-.RE
-.TP
-.B EINVAL
-.I op
-is
-.B PR_SET_TAGGED_ADDR_CTRL
-and the arguments are invalid or unsupported.
-See the description of
-.B PR_SET_TAGGED_ADDR_CTRL
-above for details.
-.TP
-.B EINVAL
-.I op
-is
-.B PR_GET_TAGGED_ADDR_CTRL
-and the arguments are invalid or unsupported.
-See the description of
-.B PR_GET_TAGGED_ADDR_CTRL
-above for details.
-.TP
-.B ENODEV
-.I op
-was
-.B PR_SET_SPECULATION_CTRL
-the kernel or CPU does not support the requested speculation misfeature.
-.TP
-.B ENXIO
-.I op
-was
-.B PR_MPX_ENABLE_MANAGEMENT
-or
-.B PR_MPX_DISABLE_MANAGEMENT
-and the kernel or the CPU does not support MPX management.
-Check that the kernel and processor have MPX support.
-.TP
-.B ENXIO
-.I op
-was
-.B PR_SET_SPECULATION_CTRL
-implies that the control of the selected speculation misfeature is not possible.
-See
-.B PR_GET_SPECULATION_CTRL
-for the bit fields to determine which option is available.
-.TP
-.B EOPNOTSUPP
-.I op
-is
-.B PR_SET_FP_MODE
-and
-.I arg2
-has an invalid or unsupported value.
-.TP
-.B EPERM
-.I op
-is
-.BR PR_SET_SECUREBITS ,
-and the caller does not have the
-.B CAP_SETPCAP
-capability,
-or tried to unset a "locked" flag,
-or tried to set a flag whose corresponding locked flag was set
-(see
-.BR capabilities (7)).
-.TP
-.B EPERM
-.I op
-is
-.B PR_SET_SPECULATION_CTRL
-wherein the speculation was disabled with
-.B PR_SPEC_FORCE_DISABLE
-and caller tried to enable it again.
-.TP
-.B EPERM
-.I op
-is
-.BR PR_SET_KEEPCAPS ,
-and the caller's
-.B SECBIT_KEEP_CAPS_LOCKED
-flag is set
-(see
-.BR capabilities (7)).
-.TP
-.B EPERM
-.I op
-is
-.BR PR_CAPBSET_DROP ,
-and the caller does not have the
-.B CAP_SETPCAP
-capability.
-.TP
-.B EPERM
-.I op
-is
-.BR PR_SET_MM ,
-and the caller does not have the
-.B CAP_SYS_RESOURCE
-capability.
-.TP
-.B EPERM
-.I op
-is
-.B PR_CAP_AMBIENT
-and
-.I arg2
-is
-.BR PR_CAP_AMBIENT_RAISE ,
-but either the capability specified in
-.I arg3
-is not present in the process's permitted and inheritable capability sets,
-or the
-.B PR_CAP_AMBIENT_LOWER
-securebit has been set.
-.TP
-.B ERANGE
-.I op
-was
-.B PR_SET_SPECULATION_CTRL
-and
-.I arg3
-is not
-.BR PR_SPEC_ENABLE ,
-.BR PR_SPEC_DISABLE ,
-.BR PR_SPEC_FORCE_DISABLE ,
-nor
-.BR PR_SPEC_DISABLE_NOEXEC .
-.SH VERSIONS
-IRIX has a
-.BR prctl ()
-system call (also introduced in Linux 2.1.44
-as irix_prctl on the MIPS architecture),
-with prototype
-.P
-.in +4n
-.EX
-.BI "ptrdiff_t prctl(int " op ", int " arg2 ", int " arg3 );
-.EE
-.in
-.P
-and operations to get the maximum number of processes per user,
-get the maximum number of processors the calling process can use,
-find out whether a specified process is currently blocked,
-get or set the maximum stack size, and so on.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 2.1.57,
-glibc 2.0.6
-.SH SEE ALSO
-.BR signal (2),
-.BR core (5)
diff --git a/man2/pread.2 b/man2/pread.2
deleted file mode 100644
index e80680b48..000000000
--- a/man2/pread.2
+++ /dev/null
@@ -1,146 +0,0 @@
-.\" Copyright (C) 1999 Joseph Samuel Myers.
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.TH pread 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-pread, pwrite \- read from or write to a file descriptor at a given offset
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <unistd.h>
-.P
-.BI "ssize_t pread(int " fd ", void " buf [. count "], size_t " count ,
-.BI " off_t " offset );
-.BI "ssize_t pwrite(int " fd ", const void " buf [. count "], size_t " count ,
-.BI " off_t " offset );
-.fi
-.P
-.RS -4
-Feature Test Macro Requirements for glibc (see
-.BR feature_test_macros (7)):
-.RE
-.P
-.BR pread (),
-.BR pwrite ():
-.nf
- _XOPEN_SOURCE >= 500
- || /* Since glibc 2.12: */ _POSIX_C_SOURCE >= 200809L
-.fi
-.SH DESCRIPTION
-.BR pread ()
-reads up to
-.I count
-bytes from file descriptor
-.I fd
-at offset
-.I offset
-(from the start of the file) into the buffer starting at
-.IR buf .
-The file offset is not changed.
-.P
-.BR pwrite ()
-writes up to
-.I count
-bytes from the buffer starting at
-.I buf
-to the file descriptor
-.I fd
-at offset
-.IR offset .
-The file offset is not changed.
-.P
-The file referenced by
-.I fd
-must be capable of seeking.
-.SH RETURN VALUE
-On success,
-.BR pread ()
-returns the number of bytes read
-(a return of zero indicates end of file)
-and
-.BR pwrite ()
-returns the number of bytes written.
-.P
-Note that it is not an error for a successful call to transfer fewer bytes
-than requested (see
-.BR read (2)
-and
-.BR write (2)).
-.P
-On error, \-1 is returned and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.BR pread ()
-can fail and set
-.I errno
-to any error specified for
-.BR read (2)
-or
-.BR lseek (2).
-.BR pwrite ()
-can fail and set
-.I errno
-to any error specified for
-.BR write (2)
-or
-.BR lseek (2).
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-POSIX.1-2001.
-.P
-Added in Linux 2.1.60;
-the entries in the i386 system call table were added in Linux 2.1.69.
-C library support (including emulation using
-.BR lseek (2)
-on older kernels without the system calls) was added in glibc 2.1.
-.SS C library/kernel differences
-On Linux, the underlying system calls were renamed
-in Linux 2.6:
-.BR pread ()
-became
-.BR pread64 (),
-and
-.BR pwrite ()
-became
-.BR pwrite64 ().
-The system call numbers remained the same.
-The glibc
-.BR pread ()
-and
-.BR pwrite ()
-wrapper functions transparently deal with the change.
-.P
-On some 32-bit architectures,
-the calling signature for these system calls differ,
-for the reasons described in
-.BR syscall (2).
-.SH NOTES
-The
-.BR pread ()
-and
-.BR pwrite ()
-system calls are especially useful in multithreaded applications.
-They allow multiple threads to perform I/O on the same file descriptor
-without being affected by changes to the file offset by other threads.
-.SH BUGS
-POSIX requires that opening a file with the
-.B O_APPEND
-flag should have no effect on the location at which
-.BR pwrite ()
-writes data.
-However, on Linux, if a file is opened with
-.\" FIXME . https://bugzilla.kernel.org/show_bug.cgi?id=43178
-.BR O_APPEND ,
-.BR pwrite ()
-appends data to the end of the file, regardless of the value of
-.IR offset .
-.SH SEE ALSO
-.BR lseek (2),
-.BR read (2),
-.BR readv (2),
-.BR write (2)
diff --git a/man2/pread64.2 b/man2/pread64.2
deleted file mode 100644
index 87eacb238..000000000
--- a/man2/pread64.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/pread.2
diff --git a/man2/preadv.2 b/man2/preadv.2
deleted file mode 100644
index 54e3384ef..000000000
--- a/man2/preadv.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/readv.2
diff --git a/man2/preadv2.2 b/man2/preadv2.2
deleted file mode 100644
index 54e3384ef..000000000
--- a/man2/preadv2.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/readv.2
diff --git a/man2/prlimit.2 b/man2/prlimit.2
deleted file mode 100644
index df6d7362a..000000000
--- a/man2/prlimit.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/getrlimit.2
diff --git a/man2/prlimit64.2 b/man2/prlimit64.2
deleted file mode 100644
index df6d7362a..000000000
--- a/man2/prlimit64.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/getrlimit.2
diff --git a/man2/process_madvise.2 b/man2/process_madvise.2
deleted file mode 100644
index 2c9f0b3f9..000000000
--- a/man2/process_madvise.2
+++ /dev/null
@@ -1,200 +0,0 @@
-.\" Copyright (C) 2021 Suren Baghdasaryan <surenb@google.com>
-.\" and Copyright (C) 2021 Minchan Kim <minchan@kernel.org>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Commit ecb8ac8b1f146915aa6b96449b66dd48984caacc
-.\"
-.TH process_madvise 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-process_madvise \- give advice about use of memory to a process
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/mman.h>
-.P
-.BI "ssize_t process_madvise(int " pidfd ", const struct iovec " iovec [. n ],
-.BI " size_t " n ", int " advice \
-", unsigned int " flags );
-.fi
-.SH DESCRIPTION
-The
-.BR process_madvise ()
-system call is used to give advice or directions to the kernel about the
-address ranges of another process or of the calling process.
-It provides the advice for the address ranges described by
-.I iovec
-and
-.IR n .
-The goal of such advice is to improve system or application performance.
-.P
-The
-.I pidfd
-argument is a PID file descriptor (see
-.BR pidfd_open (2))
-that specifies the process to which the advice is to be applied.
-.P
-The pointer
-.I iovec
-points to an array of
-.I iovec
-structures, described in
-.BR iovec (3type).
-.P
-.I n
-specifies the number of elements in the array of
-.I iovec
-structures.
-This value must be less than or equal to
-.B IOV_MAX
-(defined in
-.I <limits.h>
-or accessible via the call
-.IR sysconf(_SC_IOV_MAX) ).
-.P
-The
-.I advice
-argument is one of the following values:
-.TP
-.B MADV_COLD
-See
-.BR madvise (2).
-.TP
-.B MADV_COLLAPSE
-See
-.BR madvise (2).
-.TP
-.B MADV_PAGEOUT
-See
-.BR madvise (2).
-.TP
-.B MADV_WILLNEED
-See
-.BR madvise (2).
-.P
-The
-.I flags
-argument is reserved for future use; currently, this argument must be
-specified as 0.
-.P
-The
-.I n
-and
-.I iovec
-arguments are checked before applying any advice.
-If
-.I n
-is too big, or
-.I iovec
-is invalid,
-then an error will be returned immediately and no advice will be applied.
-.P
-The advice might be applied to only a part of
-.I iovec
-if one of its elements points to an invalid memory region in the
-remote process.
-No further elements will be processed beyond that point.
-(See the discussion regarding partial advice in RETURN VALUE.)
-.P
-.\" commit 96cfe2c0fd23ea7c2368d14f769d287e7ae1082e
-Starting in Linux 5.12,
-permission to apply advice to another process is governed by
-ptrace access mode
-.B PTRACE_MODE_READ_FSCREDS
-check (see
-.BR ptrace (2));
-in addition,
-because of the performance implications of applying the advice,
-the caller must have the
-.B CAP_SYS_NICE
-capability
-(see
-.BR capabilities (7)).
-.SH RETURN VALUE
-On success,
-.BR process_madvise ()
-returns the number of bytes advised.
-This return value may be less than the total number of requested bytes,
-if an error occurred after some
-.I iovec
-elements were already processed.
-The caller should check the return value to determine whether a partial
-advice occurred.
-.P
-On error, \-1 is returned and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EBADF
-.I pidfd
-is not a valid PID file descriptor.
-.TP
-.B EFAULT
-The memory described by
-.I iovec
-is outside the accessible address space of the process referred to by
-.IR pidfd .
-.TP
-.B EINVAL
-.I flags
-is not 0.
-.TP
-.B EINVAL
-The sum of the
-.I iov_len
-values of
-.I iovec
-overflows a
-.I ssize_t
-value.
-.TP
-.B EINVAL
-.I n
-is too large.
-.TP
-.B ENOMEM
-Could not allocate memory for internal copies of the
-.I iovec
-structures.
-.TP
-.B EPERM
-The caller does not have permission to access the address space of the process
-.IR pidfd .
-.TP
-.B ESRCH
-The target process does not exist (i.e., it has terminated and been waited on).
-.P
-See
-.BR madvise (2)
-for
-.IR advice -specific
-errors.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 5.10.
-.\" Linux commit ecb8ac8b1f146915aa6b96449b66dd48984caacc
-glibc 2.36.
-.\" glibc commit d19ee3473d68ca0e794f3a8b7677a0983ae1342e
-.P
-Support for this system call is optional,
-depending on the setting of the
-.B CONFIG_ADVISE_SYSCALLS
-configuration option.
-.P
-When this system call first appeared in Linux 5.10,
-permission to apply advice to another process was entirely governed by
-ptrace access mode
-.B PTRACE_MODE_ATTACH_FSCREDS
-check (see
-.BR ptrace (2)).
-This requirement was relaxed in Linux 5.12 so that the caller didn't require
-full control over the target process.
-.SH SEE ALSO
-.BR madvise (2),
-.BR pidfd_open (2),
-.BR process_vm_readv (2),
-.BR process_vm_write (2)
diff --git a/man2/process_vm_readv.2 b/man2/process_vm_readv.2
deleted file mode 100644
index 3a25cd8f5..000000000
--- a/man2/process_vm_readv.2
+++ /dev/null
@@ -1,314 +0,0 @@
-.\" Copyright (C) 2011 Christopher Yeoh <cyeoh@au1.ibm.com>
-.\" and Copyright (C) 2012 Mike Frysinger <vapier@gentoo.org>
-.\" and Copyright (C) 2012 Michael Kerrisk <mtk.man-pages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Commit fcf634098c00dd9cd247447368495f0b79be12d1
-.\"
-.TH process_vm_readv 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-process_vm_readv, process_vm_writev \-
-transfer data between process address spaces
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/uio.h>
-.P
-.BI "ssize_t process_vm_readv(pid_t " pid ,
-.BI " const struct iovec *" local_iov ,
-.BI " unsigned long " liovcnt ,
-.BI " const struct iovec *" remote_iov ,
-.BI " unsigned long " riovcnt ,
-.BI " unsigned long " flags ");"
-.BI "ssize_t process_vm_writev(pid_t " pid ,
-.BI " const struct iovec *" local_iov ,
-.BI " unsigned long " liovcnt ,
-.BI " const struct iovec *" remote_iov ,
-.BI " unsigned long " riovcnt ,
-.BI " unsigned long " flags ");"
-.fi
-.P
-.RS -4
-Feature Test Macro Requirements for glibc (see
-.BR feature_test_macros (7)):
-.RE
-.P
-.BR process_vm_readv (),
-.BR process_vm_writev ():
-.nf
- _GNU_SOURCE
-.fi
-.SH DESCRIPTION
-These system calls transfer data between the address space
-of the calling process ("the local process") and the process identified by
-.I pid
-("the remote process").
-The data moves directly between the address spaces of the two processes,
-without passing through kernel space.
-.P
-The
-.BR process_vm_readv ()
-system call transfers data from the remote process to the local process.
-The data to be transferred is identified by
-.I remote_iov
-and
-.IR riovcnt :
-.I remote_iov
-is a pointer to an array describing address ranges in the process
-.IR pid ,
-and
-.I riovcnt
-specifies the number of elements in
-.IR remote_iov .
-The data is transferred to the locations specified by
-.I local_iov
-and
-.IR liovcnt :
-.I local_iov
-is a pointer to an array describing address ranges in the calling process,
-and
-.I liovcnt
-specifies the number of elements in
-.IR local_iov .
-.P
-The
-.BR process_vm_writev ()
-system call is the converse of
-.BR process_vm_readv ()\[em]it
-transfers data from the local process to the remote process.
-Other than the direction of the transfer, the arguments
-.IR liovcnt ,
-.IR local_iov ,
-.IR riovcnt ,
-and
-.I remote_iov
-have the same meaning as for
-.BR process_vm_readv ().
-.P
-The
-.I local_iov
-and
-.I remote_iov
-arguments point to an array of
-.I iovec
-structures, described in
-.BR iovec (3type).
-.P
-Buffers are processed in array order.
-This means that
-.BR process_vm_readv ()
-completely fills
-.I local_iov[0]
-before proceeding to
-.IR local_iov[1] ,
-and so on.
-Likewise,
-.I remote_iov[0]
-is completely read before proceeding to
-.IR remote_iov[1] ,
-and so on.
-.P
-Similarly,
-.BR process_vm_writev ()
-writes out the entire contents of
-.I local_iov[0]
-before proceeding to
-.IR local_iov[1] ,
-and it completely fills
-.I remote_iov[0]
-before proceeding to
-.IR remote_iov[1] .
-.P
-The lengths of
-.I remote_iov[i].iov_len
-and
-.I local_iov[i].iov_len
-do not have to be the same.
-Thus, it is possible to split a single local buffer
-into multiple remote buffers, or vice versa.
-.P
-The
-.I flags
-argument is currently unused and must be set to 0.
-.P
-The values specified in the
-.I liovcnt
-and
-.I riovcnt
-arguments must be less than or equal to
-.B IOV_MAX
-(defined in
-.I <limits.h>
-or accessible via the call
-.IR sysconf(_SC_IOV_MAX) ).
-.\" In time, glibc might provide a wrapper that works around this limit,
-.\" as is done for readv()/writev()
-.P
-The count arguments and
-.I local_iov
-are checked before doing any transfers.
-If the counts are too big, or
-.I local_iov
-is invalid,
-or the addresses refer to regions that are inaccessible to the local process,
-none of the vectors will be processed
-and an error will be returned immediately.
-.P
-Note, however, that these system calls do not check the memory regions
-in the remote process until just before doing the read/write.
-Consequently, a partial read/write (see RETURN VALUE)
-may result if one of the
-.I remote_iov
-elements points to an invalid memory region in the remote process.
-No further reads/writes will be attempted beyond that point.
-Keep this in mind when attempting to read data of unknown length
-(such as C strings that are null-terminated) from a remote process,
-by avoiding spanning memory pages (typically 4\ KiB) in a single remote
-.I iovec
-element.
-(Instead, split the remote read into two
-.I remote_iov
-elements and have them merge back into a single write
-.I local_iov
-entry.
-The first read entry goes up to the page boundary,
-while the second starts on the next page boundary.)
-.P
-Permission to read from or write to another process
-is governed by a ptrace access mode
-.B PTRACE_MODE_ATTACH_REALCREDS
-check; see
-.BR ptrace (2).
-.SH RETURN VALUE
-On success,
-.BR process_vm_readv ()
-returns the number of bytes read and
-.BR process_vm_writev ()
-returns the number of bytes written.
-This return value may be less than the total number of requested bytes,
-if a partial read/write occurred.
-(Partial transfers apply at the granularity of
-.I iovec
-elements.
-These system calls won't perform a partial transfer that splits a single
-.I iovec
-element.)
-The caller should check the return value to determine whether
-a partial read/write occurred.
-.P
-On error, \-1 is returned and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EFAULT
-The memory described by
-.I local_iov
-is outside the caller's accessible address space.
-.TP
-.B EFAULT
-The memory described by
-.I remote_iov
-is outside the accessible address space of the process
-.IR pid .
-.TP
-.B EINVAL
-The sum of the
-.I iov_len
-values of either
-.I local_iov
-or
-.I remote_iov
-overflows a
-.I ssize_t
-value.
-.TP
-.B EINVAL
-.I flags
-is not 0.
-.TP
-.B EINVAL
-.I liovcnt
-or
-.I riovcnt
-is too large.
-.TP
-.B ENOMEM
-Could not allocate memory for internal copies of the
-.I iovec
-structures.
-.TP
-.B EPERM
-The caller does not have permission to access the address space of the process
-.IR pid .
-.TP
-.B ESRCH
-No process with ID
-.I pid
-exists.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 3.2,
-glibc 2.15.
-.SH NOTES
-The data transfers performed by
-.BR process_vm_readv ()
-and
-.BR process_vm_writev ()
-are not guaranteed to be atomic in any way.
-.P
-These system calls were designed to permit fast message passing
-by allowing messages to be exchanged with a single copy operation
-(rather than the double copy that would be required
-when using, for example, shared memory or pipes).
-.\" Original user is MPI, http://www.mcs.anl.gov/research/projects/mpi/
-.\" See also some benchmarks at http://lwn.net/Articles/405284/
-.\" and http://marc.info/?l=linux-mm&m=130105930902915&w=2
-.SH EXAMPLES
-The following code sample demonstrates the use of
-.BR process_vm_readv ().
-It reads 20 bytes at the address 0x10000 from the process with PID 10
-and writes the first 10 bytes into
-.I buf1
-and the second 10 bytes into
-.IR buf2 .
-.P
-.\" SRC BEGIN (process_vm_readv.c)
-.EX
-#define _GNU_SOURCE
-#include <stdlib.h>
-#include <sys/types.h>
-#include <sys/uio.h>
-\&
-int
-main(void)
-{
- char buf1[10];
- char buf2[10];
- pid_t pid = 10; /* PID of remote process */
- ssize_t nread;
- struct iovec local[2];
- struct iovec remote[1];
-\&
- local[0].iov_base = buf1;
- local[0].iov_len = 10;
- local[1].iov_base = buf2;
- local[1].iov_len = 10;
- remote[0].iov_base = (void *) 0x10000;
- remote[0].iov_len = 20;
-\&
- nread = process_vm_readv(pid, local, 2, remote, 1, 0);
- if (nread != 20)
- exit(EXIT_FAILURE);
-\&
- exit(EXIT_SUCCESS);
-}
-.EE
-.\" SRC END
-.SH SEE ALSO
-.BR readv (2),
-.BR writev (2)
diff --git a/man2/process_vm_writev.2 b/man2/process_vm_writev.2
deleted file mode 100644
index 7b198a9a4..000000000
--- a/man2/process_vm_writev.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/process_vm_readv.2
diff --git a/man2/prof.2 b/man2/prof.2
deleted file mode 100644
index 5d25ea6ce..000000000
--- a/man2/prof.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/unimplemented.2
diff --git a/man2/pselect.2 b/man2/pselect.2
deleted file mode 100644
index e17784318..000000000
--- a/man2/pselect.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/select.2
diff --git a/man2/pselect6.2 b/man2/pselect6.2
deleted file mode 100644
index e17784318..000000000
--- a/man2/pselect6.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/select.2
diff --git a/man2/ptrace.2 b/man2/ptrace.2
deleted file mode 100644
index c6487656c..000000000
--- a/man2/ptrace.2
+++ /dev/null
@@ -1,2986 +0,0 @@
-.\" Copyright (c) 1993 Michael Haardt <michael@moria.de>
-.\" Fri Apr 2 11:32:09 MET DST 1993
-.\"
-.\" and changes Copyright (C) 1999 Mike Coleman (mkc@acm.org)
-.\" -- major revision to fully document ptrace semantics per recent Linux
-.\" kernel (2.2.10) and glibc (2.1.2)
-.\" Sun Nov 7 03:18:35 CST 1999
-.\"
-.\" and Copyright (c) 2011, Denys Vlasenko <vda.linux@googlemail.com>
-.\" and Copyright (c) 2015, 2016, Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: GPL-2.0-or-later
-.\"
-.\" Modified Fri Jul 23 23:47:18 1993 by Rik Faith <faith@cs.unc.edu>
-.\" Modified Fri Jan 31 16:46:30 1997 by Eric S. Raymond <esr@thyrsus.com>
-.\" Modified Thu Oct 7 17:28:49 1999 by Andries Brouwer <aeb@cwi.nl>
-.\" Modified, 27 May 2004, Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Added notes on capability requirements
-.\"
-.\" 2006-03-24, Chuck Ebbert <76306.1226@compuserve.com>
-.\" Added PTRACE_SETOPTIONS, PTRACE_GETEVENTMSG, PTRACE_GETSIGINFO,
-.\" PTRACE_SETSIGINFO, PTRACE_SYSEMU, PTRACE_SYSEMU_SINGLESTEP
-.\" (Thanks to Blaisorblade, Daniel Jacobowitz and others who helped.)
-.\" 2011-09, major update by Denys Vlasenko <vda.linux@googlemail.com>
-.\" 2015-01, Kees Cook <keescook@chromium.org>
-.\" Added PTRACE_O_TRACESECCOMP, PTRACE_EVENT_SECCOMP
-.\"
-.\" FIXME The following are undocumented:
-.\"
-.\" PTRACE_GETWMMXREGS
-.\" PTRACE_SETWMMXREGS
-.\" ARM
-.\" Linux 2.6.12
-.\"
-.\" PTRACE_SET_SYSCALL
-.\" ARM and ARM64
-.\" Linux 2.6.16
-.\" commit 3f471126ee53feb5e9b210ea2f525ed3bb9b7a7f
-.\" Author: Nicolas Pitre <nico@cam.org>
-.\" Date: Sat Jan 14 19:30:04 2006 +0000
-.\"
-.\" PTRACE_GETCRUNCHREGS
-.\" PTRACE_SETCRUNCHREGS
-.\" ARM
-.\" Linux 2.6.18
-.\" commit 3bec6ded282b331552587267d67a06ed7fd95ddd
-.\" Author: Lennert Buytenhek <buytenh@wantstofly.org>
-.\" Date: Tue Jun 27 22:56:18 2006 +0100
-.\"
-.\" PTRACE_GETVFPREGS
-.\" PTRACE_SETVFPREGS
-.\" ARM and ARM64
-.\" Linux 2.6.30
-.\" commit 3d1228ead618b88e8606015cbabc49019981805d
-.\" Author: Catalin Marinas <catalin.marinas@arm.com>
-.\" Date: Wed Feb 11 13:12:56 2009 +0100
-.\"
-.\" PTRACE_GETHBPREGS
-.\" PTRACE_SETHBPREGS
-.\" ARM and ARM64
-.\" Linux 2.6.37
-.\" commit 864232fa1a2f8dfe003438ef0851a56722740f3e
-.\" Author: Will Deacon <will.deacon@arm.com>
-.\" Date: Fri Sep 3 10:42:55 2010 +0100
-.\"
-.\" PTRACE_SINGLEBLOCK
-.\" Since at least Linux 2.4.0 on various architectures
-.\" Since Linux 2.6.25 on x86 (and others?)
-.\" commit 5b88abbf770a0e1975c668743100f42934f385e8
-.\" Author: Roland McGrath <roland@redhat.com>
-.\" Date: Wed Jan 30 13:30:53 2008 +0100
-.\" ptrace: generic PTRACE_SINGLEBLOCK
-.\"
-.\" PTRACE_GETFPXREGS
-.\" PTRACE_SETFPXREGS
-.\" Since at least Linux 2.4.0 on various architectures
-.\"
-.\" PTRACE_GETFDPIC
-.\" PTRACE_GETFDPIC_EXEC
-.\" PTRACE_GETFDPIC_INTERP
-.\" blackfin, c6x, frv, sh
-.\" First appearance in Linux 2.6.11 on frv
-.\"
-.\" and others that can be found in the arch/*/include/uapi/asm/ptrace files
-.\"
-.TH ptrace 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-ptrace \- process trace
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/ptrace.h>
-.P
-.BI "long ptrace(enum __ptrace_request " op ", pid_t " pid ,
-.BI " void *" addr ", void *" data );
-.fi
-.SH DESCRIPTION
-The
-.BR ptrace ()
-system call provides a means by which one process (the "tracer")
-may observe and control the execution of another process (the "tracee"),
-and examine and change the tracee's memory and registers.
-It is primarily used to implement breakpoint debugging and system
-call tracing.
-.P
-A tracee first needs to be attached to the tracer.
-Attachment and subsequent commands are per thread:
-in a multithreaded process,
-every thread can be individually attached to a
-(potentially different) tracer,
-or left not attached and thus not debugged.
-Therefore, "tracee" always means "(one) thread",
-never "a (possibly multithreaded) process".
-Ptrace commands are always sent to
-a specific tracee using a call of the form
-.P
-.in +4n
-.EX
-ptrace(PTRACE_foo, pid, ...)
-.EE
-.in
-.P
-where
-.I pid
-is the thread ID of the corresponding Linux thread.
-.P
-(Note that in this page, a "multithreaded process"
-means a thread group consisting of threads created using the
-.BR clone (2)
-.B CLONE_THREAD
-flag.)
-.P
-A process can initiate a trace by calling
-.BR fork (2)
-and having the resulting child do a
-.BR PTRACE_TRACEME ,
-followed (typically) by an
-.BR execve (2).
-Alternatively, one process may commence tracing another process using
-.B PTRACE_ATTACH
-or
-.BR PTRACE_SEIZE .
-.P
-While being traced, the tracee will stop each time a signal is delivered,
-even if the signal is being ignored.
-(An exception is
-.BR SIGKILL ,
-which has its usual effect.)
-The tracer will be notified at its next call to
-.BR waitpid (2)
-(or one of the related "wait" system calls); that call will return a
-.I status
-value containing information that indicates
-the cause of the stop in the tracee.
-While the tracee is stopped,
-the tracer can use various ptrace operations to inspect and modify the tracee.
-The tracer then causes the tracee to continue,
-optionally ignoring the delivered signal
-(or even delivering a different signal instead).
-.P
-If the
-.B PTRACE_O_TRACEEXEC
-option is not in effect, all successful calls to
-.BR execve (2)
-by the traced process will cause it to be sent a
-.B SIGTRAP
-signal,
-giving the parent a chance to gain control before the new program
-begins execution.
-.P
-When the tracer is finished tracing, it can cause the tracee to continue
-executing in a normal, untraced mode via
-.BR PTRACE_DETACH .
-.P
-The value of
-.I op
-determines the operation to be performed:
-.TP
-.B PTRACE_TRACEME
-Indicate that this process is to be traced by its parent.
-A process probably shouldn't make this operation if its parent
-isn't expecting to trace it.
-.RI ( pid ,
-.IR addr ,
-and
-.I data
-are ignored.)
-.IP
-The
-.B PTRACE_TRACEME
-operation is used only by the tracee;
-the remaining operations are used only by the tracer.
-In the following operations,
-.I pid
-specifies the thread ID of the tracee to be acted on.
-For operations other than
-.BR PTRACE_ATTACH ,
-.BR PTRACE_SEIZE ,
-.BR PTRACE_INTERRUPT ,
-and
-.BR PTRACE_KILL ,
-the tracee must be stopped.
-.TP
-.B PTRACE_PEEKTEXT
-.TQ
-.B PTRACE_PEEKDATA
-Read a word at the address
-.I addr
-in the tracee's memory, returning the word as the result of the
-.BR ptrace ()
-call.
-Linux does not have separate text and data address spaces,
-so these two operations are currently equivalent.
-.RI ( data
-is ignored; but see NOTES.)
-.TP
-.B PTRACE_PEEKUSER
-.\" PTRACE_PEEKUSR in kernel source, but glibc uses PTRACE_PEEKUSER,
-.\" and that is the name that seems common on other systems.
-Read a word at offset
-.I addr
-in the tracee's USER area,
-which holds the registers and other information about the process
-(see
-.IR <sys/user.h> ).
-The word is returned as the result of the
-.BR ptrace ()
-call.
-Typically, the offset must be word-aligned, though this might vary by
-architecture.
-See NOTES.
-.RI ( data
-is ignored; but see NOTES.)
-.TP
-.B PTRACE_POKETEXT
-.TQ
-.B PTRACE_POKEDATA
-Copy the word
-.I data
-to the address
-.I addr
-in the tracee's memory.
-As for
-.B PTRACE_PEEKTEXT
-and
-.BR PTRACE_PEEKDATA ,
-these two operations are currently equivalent.
-.TP
-.B PTRACE_POKEUSER
-.\" PTRACE_POKEUSR in kernel source, but glibc uses PTRACE_POKEUSER,
-.\" and that is the name that seems common on other systems.
-Copy the word
-.I data
-to offset
-.I addr
-in the tracee's USER area.
-As for
-.BR PTRACE_PEEKUSER ,
-the offset must typically be word-aligned.
-In order to maintain the integrity of the kernel,
-some modifications to the USER area are disallowed.
-.\" FIXME In the preceding sentence, which modifications are disallowed,
-.\" and when they are disallowed, how does user space discover that fact?
-.TP
-.B PTRACE_GETREGS
-.TQ
-.B PTRACE_GETFPREGS
-Copy the tracee's general-purpose or floating-point registers,
-respectively, to the address
-.I data
-in the tracer.
-See
-.I <sys/user.h>
-for information on the format of this data.
-.RI ( addr
-is ignored.)
-Note that SPARC systems have the meaning of
-.I data
-and
-.I addr
-reversed; that is,
-.I data
-is ignored and the registers are copied to the address
-.IR addr .
-.B PTRACE_GETREGS
-and
-.B PTRACE_GETFPREGS
-are not present on all architectures.
-.TP
-.BR PTRACE_GETREGSET " (since Linux 2.6.34)"
-Read the tracee's registers.
-.I addr
-specifies, in an architecture-dependent way, the type of registers to be read.
-.B NT_PRSTATUS
-(with numerical value 1)
-usually results in reading of general-purpose registers.
-If the CPU has, for example,
-floating-point and/or vector registers, they can be retrieved by setting
-.I addr
-to the corresponding
-.B NT_foo
-constant.
-.I data
-points to a
-.BR "struct iovec" ,
-which describes the destination buffer's location and length.
-On return, the kernel modifies
-.B iov.len
-to indicate the actual number of bytes returned.
-.TP
-.B PTRACE_SETREGS
-.TQ
-.B PTRACE_SETFPREGS
-Modify the tracee's general-purpose or floating-point registers,
-respectively, from the address
-.I data
-in the tracer.
-As for
-.BR PTRACE_POKEUSER ,
-some general-purpose register modifications may be disallowed.
-.\" FIXME . In the preceding sentence, which modifications are disallowed,
-.\" and when they are disallowed, how does user space discover that fact?
-.RI ( addr
-is ignored.)
-Note that SPARC systems have the meaning of
-.I data
-and
-.I addr
-reversed; that is,
-.I data
-is ignored and the registers are copied from the address
-.IR addr .
-.B PTRACE_SETREGS
-and
-.B PTRACE_SETFPREGS
-are not present on all architectures.
-.TP
-.BR PTRACE_SETREGSET " (since Linux 2.6.34)"
-Modify the tracee's registers.
-The meaning of
-.I addr
-and
-.I data
-is analogous to
-.BR PTRACE_GETREGSET .
-.TP
-.BR PTRACE_GETSIGINFO " (since Linux 2.3.99-pre6)"
-Retrieve information about the signal that caused the stop.
-Copy a
-.I siginfo_t
-structure (see
-.BR sigaction (2))
-from the tracee to the address
-.I data
-in the tracer.
-.RI ( addr
-is ignored.)
-.TP
-.BR PTRACE_SETSIGINFO " (since Linux 2.3.99-pre6)"
-Set signal information:
-copy a
-.I siginfo_t
-structure from the address
-.I data
-in the tracer to the tracee.
-This will affect only signals that would normally be delivered to
-the tracee and were caught by the tracer.
-It may be difficult to tell
-these normal signals from synthetic signals generated by
-.BR ptrace ()
-itself.
-.RI ( addr
-is ignored.)
-.TP
-.BR PTRACE_PEEKSIGINFO " (since Linux 3.10)"
-.\" commit 84c751bd4aebbaae995fe32279d3dba48327bad4
-Retrieve
-.I siginfo_t
-structures without removing signals from a queue.
-.I addr
-points to a
-.I ptrace_peeksiginfo_args
-structure that specifies the ordinal position from which
-copying of signals should start,
-and the number of signals to copy.
-.I siginfo_t
-structures are copied into the buffer pointed to by
-.IR data .
-The return value contains the number of copied signals (zero indicates
-that there is no signal corresponding to the specified ordinal position).
-Within the returned
-.I siginfo
-structures,
-the
-.I si_code
-field includes information
-.RB ( __SI_CHLD ,
-.BR __SI_FAULT ,
-etc.) that are not otherwise exposed to user space.
-.P
-.in +4n
-.EX
-struct ptrace_peeksiginfo_args {
- u64 off; /* Ordinal position in queue at which
- to start copying signals */
- u32 flags; /* PTRACE_PEEKSIGINFO_SHARED or 0 */
- s32 nr; /* Number of signals to copy */
-};
-.EE
-.in
-.IP
-Currently, there is only one flag,
-.BR PTRACE_PEEKSIGINFO_SHARED ,
-for dumping signals from the process-wide signal queue.
-If this flag is not set,
-signals are read from the per-thread queue of the specified thread.
-.in
-.TP
-.BR PTRACE_GETSIGMASK " (since Linux 3.11)"
-.\" commit 29000caecbe87b6b66f144f72111f0d02fbbf0c1
-Place a copy of the mask of blocked signals (see
-.BR sigprocmask (2))
-in the buffer pointed to by
-.IR data ,
-which should be a pointer to a buffer of type
-.IR sigset_t .
-The
-.I addr
-argument contains the size of the buffer pointed to by
-.I data
-(i.e.,
-.IR sizeof(sigset_t) ).
-.TP
-.BR PTRACE_SETSIGMASK " (since Linux 3.11)"
-Change the mask of blocked signals (see
-.BR sigprocmask (2))
-to the value specified in the buffer pointed to by
-.IR data ,
-which should be a pointer to a buffer of type
-.IR sigset_t .
-The
-.I addr
-argument contains the size of the buffer pointed to by
-.I data
-(i.e.,
-.IR sizeof(sigset_t) ).
-.TP
-.BR PTRACE_SETOPTIONS " (since Linux 2.4.6; see BUGS for caveats)"
-Set ptrace options from
-.IR data .
-.RI ( addr
-is ignored.)
-.I data
-is interpreted as a bit mask of options,
-which are specified by the following flags:
-.RS
-.TP
-.BR PTRACE_O_EXITKILL " (since Linux 3.8)"
-.\" commit 992fb6e170639b0849bace8e49bf31bd37c4123
-Send a
-.B SIGKILL
-signal to the tracee if the tracer exits.
-This option is useful for ptrace jailers that
-want to ensure that tracees can never escape the tracer's control.
-.TP
-.BR PTRACE_O_TRACECLONE " (since Linux 2.5.46)"
-Stop the tracee at the next
-.BR clone (2)
-and automatically start tracing the newly cloned process,
-which will start with a
-.BR SIGSTOP ,
-or
-.B PTRACE_EVENT_STOP
-if
-.B PTRACE_SEIZE
-was used.
-A
-.BR waitpid (2)
-by the tracer will return a
-.I status
-value such that
-.IP
-.nf
- status>>8 == (SIGTRAP | (PTRACE_EVENT_CLONE<<8))
-.fi
-.IP
-The PID of the new process can be retrieved with
-.BR PTRACE_GETEVENTMSG .
-.IP
-This option may not catch
-.BR clone (2)
-calls in all cases.
-If the tracee calls
-.BR clone (2)
-with the
-.B CLONE_VFORK
-flag,
-.B PTRACE_EVENT_VFORK
-will be delivered instead
-if
-.B PTRACE_O_TRACEVFORK
-is set; otherwise if the tracee calls
-.BR clone (2)
-with the exit signal set to
-.BR SIGCHLD ,
-.B PTRACE_EVENT_FORK
-will be delivered if
-.B PTRACE_O_TRACEFORK
-is set.
-.TP
-.BR PTRACE_O_TRACEEXEC " (since Linux 2.5.46)"
-Stop the tracee at the next
-.BR execve (2).
-A
-.BR waitpid (2)
-by the tracer will return a
-.I status
-value such that
-.IP
-.nf
- status>>8 == (SIGTRAP | (PTRACE_EVENT_EXEC<<8))
-.fi
-.IP
-If the execing thread is not a thread group leader,
-the thread ID is reset to thread group leader's ID before this stop.
-Since Linux 3.0, the former thread ID can be retrieved with
-.BR PTRACE_GETEVENTMSG .
-.TP
-.BR PTRACE_O_TRACEEXIT " (since Linux 2.5.60)"
-Stop the tracee at exit.
-A
-.BR waitpid (2)
-by the tracer will return a
-.I status
-value such that
-.IP
-.nf
- status>>8 == (SIGTRAP | (PTRACE_EVENT_EXIT<<8))
-.fi
-.IP
-The tracee's exit status can be retrieved with
-.BR PTRACE_GETEVENTMSG .
-.IP
-The tracee is stopped early during process exit,
-when registers are still available,
-allowing the tracer to see where the exit occurred,
-whereas the normal exit notification is done after the process
-is finished exiting.
-Even though context is available,
-the tracer cannot prevent the exit from happening at this point.
-.TP
-.BR PTRACE_O_TRACEFORK " (since Linux 2.5.46)"
-Stop the tracee at the next
-.BR fork (2)
-and automatically start tracing the newly forked process,
-which will start with a
-.BR SIGSTOP ,
-or
-.B PTRACE_EVENT_STOP
-if
-.B PTRACE_SEIZE
-was used.
-A
-.BR waitpid (2)
-by the tracer will return a
-.I status
-value such that
-.IP
-.nf
- status>>8 == (SIGTRAP | (PTRACE_EVENT_FORK<<8))
-.fi
-.IP
-The PID of the new process can be retrieved with
-.BR PTRACE_GETEVENTMSG .
-.TP
-.BR PTRACE_O_TRACESYSGOOD " (since Linux 2.4.6)"
-When delivering system call traps, set bit 7 in the signal number
-(i.e., deliver
-.IR "SIGTRAP|0x80" ).
-This makes it easy for the tracer to distinguish
-normal traps from those caused by a system call.
-.TP
-.BR PTRACE_O_TRACEVFORK " (since Linux 2.5.46)"
-Stop the tracee at the next
-.BR vfork (2)
-and automatically start tracing the newly vforked process,
-which will start with a
-.BR SIGSTOP ,
-or
-.B PTRACE_EVENT_STOP
-if
-.B PTRACE_SEIZE
-was used.
-A
-.BR waitpid (2)
-by the tracer will return a
-.I status
-value such that
-.IP
-.nf
- status>>8 == (SIGTRAP | (PTRACE_EVENT_VFORK<<8))
-.fi
-.IP
-The PID of the new process can be retrieved with
-.BR PTRACE_GETEVENTMSG .
-.TP
-.BR PTRACE_O_TRACEVFORKDONE " (since Linux 2.5.60)"
-Stop the tracee at the completion of the next
-.BR vfork (2).
-A
-.BR waitpid (2)
-by the tracer will return a
-.I status
-value such that
-.IP
-.nf
- status>>8 == (SIGTRAP | (PTRACE_EVENT_VFORK_DONE<<8))
-.fi
-.IP
-The PID of the new process can (since Linux 2.6.18) be retrieved with
-.BR PTRACE_GETEVENTMSG .
-.TP
-.BR PTRACE_O_TRACESECCOMP " (since Linux 3.5)"
-Stop the tracee when a
-.BR seccomp (2)
-.B SECCOMP_RET_TRACE
-rule is triggered.
-A
-.BR waitpid (2)
-by the tracer will return a
-.I status
-value such that
-.IP
-.nf
- status>>8 == (SIGTRAP | (PTRACE_EVENT_SECCOMP<<8))
-.fi
-.IP
-While this triggers a
-.B PTRACE_EVENT
-stop, it is similar to a syscall-enter-stop.
-For details, see the note on
-.B PTRACE_EVENT_SECCOMP
-below.
-The seccomp event message data (from the
-.B SECCOMP_RET_DATA
-portion of the seccomp filter rule) can be retrieved with
-.BR PTRACE_GETEVENTMSG .
-.TP
-.BR PTRACE_O_SUSPEND_SECCOMP " (since Linux 4.3)"
-.\" commit 13c4a90119d28cfcb6b5bdd820c233b86c2b0237
-Suspend the tracee's seccomp protections.
-This applies regardless of mode, and
-can be used when the tracee has not yet installed seccomp filters.
-That is, a valid use case is to suspend a tracee's seccomp protections
-before they are installed by the tracee,
-let the tracee install the filters,
-and then clear this flag when the filters should be resumed.
-Setting this option requires that the tracer have the
-.B CAP_SYS_ADMIN
-capability,
-not have any seccomp protections installed, and not have
-.B PTRACE_O_SUSPEND_SECCOMP
-set on itself.
-.RE
-.TP
-.BR PTRACE_GETEVENTMSG " (since Linux 2.5.46)"
-Retrieve a message (as an
-.IR "unsigned long" )
-about the ptrace event
-that just happened, placing it at the address
-.I data
-in the tracer.
-For
-.BR PTRACE_EVENT_EXIT ,
-this is the tracee's exit status.
-For
-.BR PTRACE_EVENT_FORK ,
-.BR PTRACE_EVENT_VFORK ,
-.BR PTRACE_EVENT_VFORK_DONE ,
-and
-.BR PTRACE_EVENT_CLONE ,
-this is the PID of the new process.
-For
-.BR PTRACE_EVENT_SECCOMP ,
-this is the
-.BR seccomp (2)
-filter's
-.B SECCOMP_RET_DATA
-associated with the triggered rule.
-.RI ( addr
-is ignored.)
-.TP
-.B PTRACE_CONT
-Restart the stopped tracee process.
-If
-.I data
-is nonzero,
-it is interpreted as the number of a signal to be delivered to the tracee;
-otherwise, no signal is delivered.
-Thus, for example, the tracer can control
-whether a signal sent to the tracee is delivered or not.
-.RI ( addr
-is ignored.)
-.TP
-.B PTRACE_SYSCALL
-.TQ
-.B PTRACE_SINGLESTEP
-Restart the stopped tracee as for
-.BR PTRACE_CONT ,
-but arrange for the tracee to be stopped at
-the next entry to or exit from a system call,
-or after execution of a single instruction, respectively.
-(The tracee will also, as usual, be stopped upon receipt of a signal.)
-From the tracer's perspective, the tracee will appear to have been
-stopped by receipt of a
-.BR SIGTRAP .
-So, for
-.BR PTRACE_SYSCALL ,
-for example, the idea is to inspect
-the arguments to the system call at the first stop,
-then do another
-.B PTRACE_SYSCALL
-and inspect the return value of the system call at the second stop.
-The
-.I data
-argument is treated as for
-.BR PTRACE_CONT .
-.RI ( addr
-is ignored.)
-.TP
-.BR PTRACE_SET_SYSCALL " (since Linux 2.6.16)"
-.\" commit 3f471126ee53feb5e9b210ea2f525ed3bb9b7a7f
-When in syscall-enter-stop,
-change the number of the system call that is about to
-be executed to the number specified in the
-.I data
-argument.
-The
-.I addr
-argument is ignored.
-This operation is currently
-.\" As of 4.19-rc2
-supported only on arm (and arm64, though only for backwards compatibility),
-.\" commit 27aa55c5e5123fa8b8ad0156559d34d7edff58ca
-but most other architectures have other means of accomplishing this
-(usually by changing the register that the userland code passed the
-system call number in).
-.\" see change_syscall in tools/testing/selftests/seccomp/seccomp_bpf.c
-.\" and also strace's linux/*/set_scno.c files.
-.TP
-.B PTRACE_SYSEMU
-.TQ
-.BR PTRACE_SYSEMU_SINGLESTEP " (since Linux 2.6.14)"
-For
-.BR PTRACE_SYSEMU ,
-continue and stop on entry to the next system call,
-which will not be executed.
-See the documentation on syscall-stops below.
-For
-.BR PTRACE_SYSEMU_SINGLESTEP ,
-do the same but also singlestep if not a system call.
-This call is used by programs like
-User Mode Linux that want to emulate all the tracee's system calls.
-The
-.I data
-argument is treated as for
-.BR PTRACE_CONT .
-The
-.I addr
-argument is ignored.
-These operations are currently
-.\" As at 3.7
-supported only on x86.
-.TP
-.BR PTRACE_LISTEN " (since Linux 3.4)"
-Restart the stopped tracee, but prevent it from executing.
-The resulting state of the tracee is similar to a process which
-has been stopped by a
-.B SIGSTOP
-(or other stopping signal).
-See the "group-stop" subsection for additional information.
-.B PTRACE_LISTEN
-works only on tracees attached by
-.BR PTRACE_SEIZE .
-.TP
-.B PTRACE_KILL
-Send the tracee a
-.B SIGKILL
-to terminate it.
-.RI ( addr
-and
-.I data
-are ignored.)
-.IP
-.I This operation is deprecated; do not use it!
-Instead, send a
-.B SIGKILL
-directly using
-.BR kill (2)
-or
-.BR tgkill (2).
-The problem with
-.B PTRACE_KILL
-is that it requires the tracee to be in signal-delivery-stop,
-otherwise it may not work
-(i.e., may complete successfully but won't kill the tracee).
-By contrast, sending a
-.B SIGKILL
-directly has no such limitation.
-.\" [Note from Denys Vlasenko:
-.\" deprecation suggested by Oleg Nesterov. He prefers to deprecate it
-.\" instead of describing (and needing to support) PTRACE_KILL's quirks.]
-.TP
-.BR PTRACE_INTERRUPT " (since Linux 3.4)"
-Stop a tracee.
-If the tracee is running or sleeping in kernel space and
-.B PTRACE_SYSCALL
-is in effect,
-the system call is interrupted and syscall-exit-stop is reported.
-(The interrupted system call is restarted when the tracee is restarted.)
-If the tracee was already stopped by a signal and
-.B PTRACE_LISTEN
-was sent to it,
-the tracee stops with
-.B PTRACE_EVENT_STOP
-and
-.I WSTOPSIG(status)
-returns the stop signal.
-If any other ptrace-stop is generated at the same time (for example,
-if a signal is sent to the tracee), this ptrace-stop happens.
-If none of the above applies (for example, if the tracee is running in user
-space), it stops with
-.B PTRACE_EVENT_STOP
-with
-.I WSTOPSIG(status)
-==
-.BR SIGTRAP .
-.B PTRACE_INTERRUPT
-only works on tracees attached by
-.BR PTRACE_SEIZE .
-.TP
-.B PTRACE_ATTACH
-Attach to the process specified in
-.IR pid ,
-making it a tracee of the calling process.
-.\" No longer true (removed by Denys Vlasenko, 2011, who remarks:
-.\" "I think it isn't true in non-ancient 2.4 and in Linux 2.6/3.x.
-.\" Basically, it's not true for any Linux in practical use.
-.\" ; the behavior of the tracee is as if it had done a
-.\" .BR PTRACE_TRACEME .
-.\" The calling process actually becomes the parent of the tracee
-.\" process for most purposes (e.g., it will receive
-.\" notification of tracee events and appears in
-.\" .BR ps (1)
-.\" output as the tracee's parent), but a
-.\" .BR getppid (2)
-.\" by the tracee will still return the PID of the original parent.
-The tracee is sent a
-.BR SIGSTOP ,
-but will not necessarily have stopped
-by the completion of this call; use
-.BR waitpid (2)
-to wait for the tracee to stop.
-See the "Attaching and detaching" subsection for additional information.
-.RI ( addr
-and
-.I data
-are ignored.)
-.IP
-Permission to perform a
-.B PTRACE_ATTACH
-is governed by a ptrace access mode
-.B PTRACE_MODE_ATTACH_REALCREDS
-check; see below.
-.TP
-.BR PTRACE_SEIZE " (since Linux 3.4)"
-.\"
-.\" Noted by Dmitry Levin:
-.\"
-.\" PTRACE_SEIZE was introduced by commit v3.1-rc1~308^2~28, but
-.\" it had to be used along with a temporary flag PTRACE_SEIZE_DEVEL,
-.\" which was removed later by commit v3.4-rc1~109^2~20.
-.\"
-.\" That is, [before] v3.4 we had a test mode of PTRACE_SEIZE API,
-.\" which was not compatible with the current PTRACE_SEIZE API introduced
-.\" in Linux 3.4.
-.\"
-Attach to the process specified in
-.IR pid ,
-making it a tracee of the calling process.
-Unlike
-.BR PTRACE_ATTACH ,
-.B PTRACE_SEIZE
-does not stop the process.
-Group-stops are reported as
-.B PTRACE_EVENT_STOP
-and
-.I WSTOPSIG(status)
-returns the stop signal.
-Automatically attached children stop with
-.B PTRACE_EVENT_STOP
-and
-.I WSTOPSIG(status)
-returns
-.B SIGTRAP
-instead of having
-.B SIGSTOP
-signal delivered to them.
-.BR execve (2)
-does not deliver an extra
-.BR SIGTRAP .
-Only a
-.BR PTRACE_SEIZE d
-process can accept
-.B PTRACE_INTERRUPT
-and
-.B PTRACE_LISTEN
-commands.
-The "seized" behavior just described is inherited by
-children that are automatically attached using
-.BR PTRACE_O_TRACEFORK ,
-.BR PTRACE_O_TRACEVFORK ,
-and
-.BR PTRACE_O_TRACECLONE .
-.I addr
-must be zero.
-.I data
-contains a bit mask of ptrace options to activate immediately.
-.IP
-Permission to perform a
-.B PTRACE_SEIZE
-is governed by a ptrace access mode
-.B PTRACE_MODE_ATTACH_REALCREDS
-check; see below.
-.\"
-.TP
-.BR PTRACE_SECCOMP_GET_FILTER " (since Linux 4.4)"
-.\" commit f8e529ed941ba2bbcbf310b575d968159ce7e895
-This operation allows the tracer to dump the tracee's
-classic BPF filters.
-.IP
-.I addr
-is an integer specifying the index of the filter to be dumped.
-The most recently installed filter has the index 0.
-If
-.I addr
-is greater than the number of installed filters,
-the operation fails with the error
-.BR ENOENT .
-.IP
-.I data
-is either a pointer to a
-.I struct sock_filter
-array that is large enough to store the BPF program,
-or NULL if the program is not to be stored.
-.IP
-Upon success,
-the return value is the number of instructions in the BPF program.
-If
-.I data
-was NULL, then this return value can be used to correctly size the
-.I struct sock_filter
-array passed in a subsequent call.
-.IP
-This operation fails with the error
-.B EACCES
-if the caller does not have the
-.B CAP_SYS_ADMIN
-capability or if the caller is in strict or filter seccomp mode.
-If the filter referred to by
-.I addr
-is not a classic BPF filter, the operation fails with the error
-.BR EMEDIUMTYPE .
-.IP
-This operation is available if the kernel was configured with both the
-.B CONFIG_SECCOMP_FILTER
-and the
-.B CONFIG_CHECKPOINT_RESTORE
-options.
-.TP
-.B PTRACE_DETACH
-Restart the stopped tracee as for
-.BR PTRACE_CONT ,
-but first detach from it.
-Under Linux, a tracee can be detached in this way regardless
-of which method was used to initiate tracing.
-.RI ( addr
-is ignored.)
-.\"
-.TP
-.BR PTRACE_GET_THREAD_AREA " (since Linux 2.6.0)"
-This operation performs a similar task to
-.BR get_thread_area (2).
-It reads the TLS entry in the GDT whose index is given in
-.IR addr ,
-placing a copy of the entry into the
-.I struct user_desc
-pointed to by
-.IR data .
-(By contrast with
-.BR get_thread_area (2),
-the
-.I entry_number
-of the
-.I struct user_desc
-is ignored.)
-.TP
-.BR PTRACE_SET_THREAD_AREA " (since Linux 2.6.0)"
-This operation performs a similar task to
-.BR set_thread_area (2).
-It sets the TLS entry in the GDT whose index is given in
-.IR addr ,
-assigning it the data supplied in the
-.I struct user_desc
-pointed to by
-.IR data .
-(By contrast with
-.BR set_thread_area (2),
-the
-.I entry_number
-of the
-.I struct user_desc
-is ignored; in other words,
-this ptrace operation can't be used to allocate a free TLS entry.)
-.TP
-.BR PTRACE_GET_SYSCALL_INFO " (since Linux 5.3)"
-.\" commit 201766a20e30f982ccfe36bebfad9602c3ff574a
-Retrieve information about the system call that caused the stop.
-The information is placed into the buffer pointed by the
-.I data
-argument, which should be a pointer to a buffer of type
-.IR "struct ptrace_syscall_info" .
-The
-.I addr
-argument contains the size of the buffer pointed to
-by the
-.I data
-argument (i.e.,
-.IR "sizeof(struct ptrace_syscall_info)" ).
-The return value contains the number of bytes available
-to be written by the kernel.
-If the size of the data to be written by the kernel exceeds the size
-specified by the
-.I addr
-argument, the output data is truncated.
-.IP
-The
-.I ptrace_syscall_info
-structure contains the following fields:
-.IP
-.in +4n
-.EX
-struct ptrace_syscall_info {
- __u8 op; /* Type of system call stop */
- __u32 arch; /* AUDIT_ARCH_* value; see seccomp(2) */
- __u64 instruction_pointer; /* CPU instruction pointer */
- __u64 stack_pointer; /* CPU stack pointer */
- union {
- struct { /* op == PTRACE_SYSCALL_INFO_ENTRY */
- __u64 nr; /* System call number */
- __u64 args[6]; /* System call arguments */
- } entry;
- struct { /* op == PTRACE_SYSCALL_INFO_EXIT */
- __s64 rval; /* System call return value */
- __u8 is_error; /* System call error flag;
- Boolean: does rval contain
- an error value (\-ERRCODE) or
- a nonerror return value? */
- } exit;
- struct { /* op == PTRACE_SYSCALL_INFO_SECCOMP */
- __u64 nr; /* System call number */
- __u64 args[6]; /* System call arguments */
- __u32 ret_data; /* SECCOMP_RET_DATA portion
- of SECCOMP_RET_TRACE
- return value */
- } seccomp;
- };
-};
-.EE
-.in
-.IP
-The
-.IR op ,
-.IR arch ,
-.IR instruction_pointer ,
-and
-.I stack_pointer
-fields are defined for all kinds of ptrace system call stops.
-The rest of the structure is a union; one should read only those fields
-that are meaningful for the kind of system call stop specified by the
-.I op
-field.
-.IP
-The
-.I op
-field has one of the following values (defined in
-.IR <linux/ptrace.h> )
-indicating what type of stop occurred and
-which part of the union is filled:
-.RS
-.TP
-.B PTRACE_SYSCALL_INFO_ENTRY
-The
-.I entry
-component of the union contains information relating to a
-system call entry stop.
-.TP
-.B PTRACE_SYSCALL_INFO_EXIT
-The
-.I exit
-component of the union contains information relating to a
-system call exit stop.
-.TP
-.B PTRACE_SYSCALL_INFO_SECCOMP
-The
-.I seccomp
-component of the union contains information relating to a
-.B PTRACE_EVENT_SECCOMP
-stop.
-.TP
-.B PTRACE_SYSCALL_INFO_NONE
-No component of the union contains relevant information.
-.RE
-.IP
-In case of system call entry or exit stops,
-the data returned by
-.B PTRACE_GET_SYSCALL_INFO
-is limited to type
-.B PTRACE_SYSCALL_INFO_NONE
-unless
-.B PTRACE_O_TRACESYSGOOD
-option is set before the corresponding system call stop has occurred.
-.\"
-.SS Death under ptrace
-When a (possibly multithreaded) process receives a killing signal
-(one whose disposition is set to
-.B SIG_DFL
-and whose default action is to kill the process),
-all threads exit.
-Tracees report their death to their tracer(s).
-Notification of this event is delivered via
-.BR waitpid (2).
-.P
-Note that the killing signal will first cause signal-delivery-stop
-(on one tracee only),
-and only after it is injected by the tracer
-(or after it was dispatched to a thread which isn't traced),
-will death from the signal happen on
-.I all
-tracees within a multithreaded process.
-(The term "signal-delivery-stop" is explained below.)
-.P
-.B SIGKILL
-does not generate signal-delivery-stop and
-therefore the tracer can't suppress it.
-.B SIGKILL
-kills even within system calls
-(syscall-exit-stop is not generated prior to death by
-.BR SIGKILL ).
-The net effect is that
-.B SIGKILL
-always kills the process (all its threads),
-even if some threads of the process are ptraced.
-.P
-When the tracee calls
-.BR _exit (2),
-it reports its death to its tracer.
-Other threads are not affected.
-.P
-When any thread executes
-.BR exit_group (2),
-every tracee in its thread group reports its death to its tracer.
-.P
-If the
-.B PTRACE_O_TRACEEXIT
-option is on,
-.B PTRACE_EVENT_EXIT
-will happen before actual death.
-This applies to exits via
-.BR exit (2),
-.BR exit_group (2),
-and signal deaths (except
-.BR SIGKILL ,
-depending on the kernel version; see BUGS below),
-and when threads are torn down on
-.BR execve (2)
-in a multithreaded process.
-.P
-The tracer cannot assume that the ptrace-stopped tracee exists.
-There are many scenarios when the tracee may die while stopped (such as
-.BR SIGKILL ).
-Therefore, the tracer must be prepared to handle an
-.B ESRCH
-error on any ptrace operation.
-Unfortunately, the same error is returned if the tracee
-exists but is not ptrace-stopped
-(for commands which require a stopped tracee),
-or if it is not traced by the process which issued the ptrace call.
-The tracer needs to keep track of the stopped/running state of the tracee,
-and interpret
-.B ESRCH
-as "tracee died unexpectedly" only if it knows that the tracee has
-been observed to enter ptrace-stop.
-Note that there is no guarantee that
-.I waitpid(WNOHANG)
-will reliably report the tracee's death status if a
-ptrace operation returned
-.BR ESRCH .
-.I waitpid(WNOHANG)
-may return 0 instead.
-In other words, the tracee may be "not yet fully dead",
-but already refusing ptrace operations.
-.P
-The tracer can't assume that the tracee
-.I always
-ends its life by reporting
-.I WIFEXITED(status)
-or
-.IR WIFSIGNALED(status) ;
-there are cases where this does not occur.
-For example, if a thread other than thread group leader does an
-.BR execve (2),
-it disappears;
-its PID will never be seen again,
-and any subsequent ptrace stops will be reported under
-the thread group leader's PID.
-.SS Stopped states
-A tracee can be in two states: running or stopped.
-For the purposes of ptrace, a tracee which is blocked in a system call
-(such as
-.BR read (2),
-.BR pause (2),
-etc.)
-is nevertheless considered to be running, even if the tracee is blocked
-for a long time.
-The state of the tracee after
-.B PTRACE_LISTEN
-is somewhat of a gray area: it is not in any ptrace-stop (ptrace commands
-won't work on it, and it will deliver
-.BR waitpid (2)
-notifications),
-but it also may be considered "stopped" because
-it is not executing instructions (is not scheduled), and if it was
-in group-stop before
-.BR PTRACE_LISTEN ,
-it will not respond to signals until
-.B SIGCONT
-is received.
-.P
-There are many kinds of states when the tracee is stopped, and in ptrace
-discussions they are often conflated.
-Therefore, it is important to use precise terms.
-.P
-In this manual page, any stopped state in which the tracee is ready
-to accept ptrace commands from the tracer is called
-.IR ptrace-stop .
-Ptrace-stops can
-be further subdivided into
-.IR signal-delivery-stop ,
-.IR group-stop ,
-.IR syscall-stop ,
-.IR "PTRACE_EVENT stops" ,
-and so on.
-These stopped states are described in detail below.
-.P
-When the running tracee enters ptrace-stop, it notifies its tracer using
-.BR waitpid (2)
-(or one of the other "wait" system calls).
-Most of this manual page assumes that the tracer waits with:
-.P
-.in +4n
-.EX
-pid = waitpid(pid_or_minus_1, &status, __WALL);
-.EE
-.in
-.P
-Ptrace-stopped tracees are reported as returns with
-.I pid
-greater than 0 and
-.I WIFSTOPPED(status)
-true.
-.\" Denys Vlasenko:
-.\" Do we require __WALL usage, or will just using 0 be ok? (With 0,
-.\" I am not 100% sure there aren't ugly corner cases.) Are the
-.\" rules different if user wants to use waitid? Will waitid require
-.\" WEXITED?
-.\"
-.P
-The
-.B __WALL
-flag does not include the
-.B WSTOPPED
-and
-.B WEXITED
-flags, but implies their functionality.
-.P
-Setting the
-.B WCONTINUED
-flag when calling
-.BR waitpid (2)
-is not recommended: the "continued" state is per-process and
-consuming it can confuse the real parent of the tracee.
-.P
-Use of the
-.B WNOHANG
-flag may cause
-.BR waitpid (2)
-to return 0 ("no wait results available yet")
-even if the tracer knows there should be a notification.
-Example:
-.P
-.in +4n
-.EX
-errno = 0;
-ptrace(PTRACE_CONT, pid, 0L, 0L);
-if (errno == ESRCH) {
- /* tracee is dead */
- r = waitpid(tracee, &status, __WALL | WNOHANG);
- /* r can still be 0 here! */
-}
-.EE
-.in
-.\" FIXME .
-.\" waitid usage? WNOWAIT?
-.\" describe how wait notifications queue (or not queue)
-.P
-The following kinds of ptrace-stops exist: signal-delivery-stops,
-group-stops,
-.B PTRACE_EVENT
-stops, syscall-stops.
-They all are reported by
-.BR waitpid (2)
-with
-.I WIFSTOPPED(status)
-true.
-They may be differentiated by examining the value
-.IR status>>8 ,
-and if there is ambiguity in that value, by querying
-.BR PTRACE_GETSIGINFO .
-(Note: the
-.I WSTOPSIG(status)
-macro can't be used to perform this examination,
-because it returns the value
-.IR "(status>>8)\ &\ 0xff" .)
-.SS Signal-delivery-stop
-When a (possibly multithreaded) process receives any signal except
-.BR SIGKILL ,
-the kernel selects an arbitrary thread which handles the signal.
-(If the signal is generated with
-.BR tgkill (2),
-the target thread can be explicitly selected by the caller.)
-If the selected thread is traced, it enters signal-delivery-stop.
-At this point, the signal is not yet delivered to the process,
-and can be suppressed by the tracer.
-If the tracer doesn't suppress the signal,
-it passes the signal to the tracee in the next ptrace restart operation.
-This second step of signal delivery is called
-.I "signal injection"
-in this manual page.
-Note that if the signal is blocked,
-signal-delivery-stop doesn't happen until the signal is unblocked,
-with the usual exception that
-.B SIGSTOP
-can't be blocked.
-.P
-Signal-delivery-stop is observed by the tracer as
-.BR waitpid (2)
-returning with
-.I WIFSTOPPED(status)
-true, with the signal returned by
-.IR WSTOPSIG(status) .
-If the signal is
-.BR SIGTRAP ,
-this may be a different kind of ptrace-stop;
-see the "Syscall-stops" and "execve" sections below for details.
-If
-.I WSTOPSIG(status)
-returns a stopping signal, this may be a group-stop; see below.
-.SS Signal injection and suppression
-After signal-delivery-stop is observed by the tracer,
-the tracer should restart the tracee with the call
-.P
-.in +4n
-.EX
-ptrace(PTRACE_restart, pid, 0, sig)
-.EE
-.in
-.P
-where
-.B PTRACE_restart
-is one of the restarting ptrace operations.
-If
-.I sig
-is 0, then a signal is not delivered.
-Otherwise, the signal
-.I sig
-is delivered.
-This operation is called
-.I "signal injection"
-in this manual page, to distinguish it from signal-delivery-stop.
-.P
-The
-.I sig
-value may be different from the
-.I WSTOPSIG(status)
-value: the tracer can cause a different signal to be injected.
-.P
-Note that a suppressed signal still causes system calls to return
-prematurely.
-In this case, system calls will be restarted: the tracer will
-observe the tracee to reexecute the interrupted system call (or
-.BR restart_syscall (2)
-system call for a few system calls which use a different mechanism
-for restarting) if the tracer uses
-.BR PTRACE_SYSCALL .
-Even system calls (such as
-.BR poll (2))
-which are not restartable after signal are restarted after
-signal is suppressed;
-however, kernel bugs exist which cause some system calls to fail with
-.B EINTR
-even though no observable signal is injected to the tracee.
-.P
-Restarting ptrace commands issued in ptrace-stops other than
-signal-delivery-stop are not guaranteed to inject a signal, even if
-.I sig
-is nonzero.
-No error is reported; a nonzero
-.I sig
-may simply be ignored.
-Ptrace users should not try to "create a new signal" this way: use
-.BR tgkill (2)
-instead.
-.P
-The fact that signal injection operations may be ignored
-when restarting the tracee after
-ptrace stops that are not signal-delivery-stops
-is a cause of confusion among ptrace users.
-One typical scenario is that the tracer observes group-stop,
-mistakes it for signal-delivery-stop, restarts the tracee with
-.P
-.in +4n
-.EX
-ptrace(PTRACE_restart, pid, 0, stopsig)
-.EE
-.in
-.P
-with the intention of injecting
-.IR stopsig ,
-but
-.I stopsig
-gets ignored and the tracee continues to run.
-.P
-The
-.B SIGCONT
-signal has a side effect of waking up (all threads of)
-a group-stopped process.
-This side effect happens before signal-delivery-stop.
-The tracer can't suppress this side effect (it can
-only suppress signal injection, which only causes the
-.B SIGCONT
-handler to not be executed in the tracee, if such a handler is installed).
-In fact, waking up from group-stop may be followed by
-signal-delivery-stop for signal(s)
-.I other than
-.BR SIGCONT ,
-if they were pending when
-.B SIGCONT
-was delivered.
-In other words,
-.B SIGCONT
-may be not the first signal observed by the tracee after it was sent.
-.P
-Stopping signals cause (all threads of) a process to enter group-stop.
-This side effect happens after signal injection, and therefore can be
-suppressed by the tracer.
-.P
-In Linux 2.4 and earlier, the
-.B SIGSTOP
-signal can't be injected.
-.\" In the Linux 2.4 sources, in arch/i386/kernel/signal.c::do_signal(),
-.\" there is:
-.\"
-.\" /* The debugger continued. Ignore SIGSTOP. */
-.\" if (signr == SIGSTOP)
-.\" continue;
-.P
-.B PTRACE_GETSIGINFO
-can be used to retrieve a
-.I siginfo_t
-structure which corresponds to the delivered signal.
-.B PTRACE_SETSIGINFO
-may be used to modify it.
-If
-.B PTRACE_SETSIGINFO
-has been used to alter
-.IR siginfo_t ,
-the
-.I si_signo
-field and the
-.I sig
-parameter in the restarting command must match,
-otherwise the result is undefined.
-.SS Group-stop
-When a (possibly multithreaded) process receives a stopping signal,
-all threads stop.
-If some threads are traced, they enter a group-stop.
-Note that the stopping signal will first cause signal-delivery-stop
-(on one tracee only), and only after it is injected by the tracer
-(or after it was dispatched to a thread which isn't traced),
-will group-stop be initiated on
-.I all
-tracees within the multithreaded process.
-As usual, every tracee reports its group-stop separately
-to the corresponding tracer.
-.P
-Group-stop is observed by the tracer as
-.BR waitpid (2)
-returning with
-.I WIFSTOPPED(status)
-true, with the stopping signal available via
-.IR WSTOPSIG(status) .
-The same result is returned by some other classes of ptrace-stops,
-therefore the recommended practice is to perform the call
-.P
-.in +4n
-.EX
-ptrace(PTRACE_GETSIGINFO, pid, 0, &siginfo)
-.EE
-.in
-.P
-The call can be avoided if the signal is not
-.BR SIGSTOP ,
-.BR SIGTSTP ,
-.BR SIGTTIN ,
-or
-.BR SIGTTOU ;
-only these four signals are stopping signals.
-If the tracer sees something else, it can't be a group-stop.
-Otherwise, the tracer needs to call
-.BR PTRACE_GETSIGINFO .
-If
-.B PTRACE_GETSIGINFO
-fails with
-.BR EINVAL ,
-then it is definitely a group-stop.
-(Other failure codes are possible, such as
-.B ESRCH
-("no such process") if a
-.B SIGKILL
-killed the tracee.)
-.P
-If tracee was attached using
-.BR PTRACE_SEIZE ,
-group-stop is indicated by
-.BR PTRACE_EVENT_STOP :
-.IR "status>>16 == PTRACE_EVENT_STOP" .
-This allows detection of group-stops
-without requiring an extra
-.B PTRACE_GETSIGINFO
-call.
-.P
-As of Linux 2.6.38,
-after the tracer sees the tracee ptrace-stop and until it
-restarts or kills it, the tracee will not run,
-and will not send notifications (except
-.B SIGKILL
-death) to the tracer, even if the tracer enters into another
-.BR waitpid (2)
-call.
-.P
-The kernel behavior described in the previous paragraph
-causes a problem with transparent handling of stopping signals.
-If the tracer restarts the tracee after group-stop,
-the stopping signal
-is effectively ignored\[em]the tracee doesn't remain stopped, it runs.
-If the tracer doesn't restart the tracee before entering into the next
-.BR waitpid (2),
-future
-.B SIGCONT
-signals will not be reported to the tracer;
-this would cause the
-.B SIGCONT
-signals to have no effect on the tracee.
-.P
-Since Linux 3.4, there is a method to overcome this problem: instead of
-.BR PTRACE_CONT ,
-a
-.B PTRACE_LISTEN
-command can be used to restart a tracee in a way where it does not execute,
-but waits for a new event which it can report via
-.BR waitpid (2)
-(such as when
-it is restarted by a
-.BR SIGCONT ).
-.SS PTRACE_EVENT stops
-If the tracer sets
-.B PTRACE_O_TRACE_*
-options, the tracee will enter ptrace-stops called
-.B PTRACE_EVENT
-stops.
-.P
-.B PTRACE_EVENT
-stops are observed by the tracer as
-.BR waitpid (2)
-returning with
-.IR WIFSTOPPED(status) ,
-and
-.I WSTOPSIG(status)
-returns
-.B SIGTRAP
-(or for
-.BR PTRACE_EVENT_STOP ,
-returns the stopping signal if tracee is in a group-stop).
-An additional bit is set in the higher byte of the status word:
-the value
-.I status>>8
-will be
-.P
-.in +4n
-.EX
-((PTRACE_EVENT_foo<<8) | SIGTRAP).
-.EE
-.in
-.P
-The following events exist:
-.TP
-.B PTRACE_EVENT_VFORK
-Stop before return from
-.BR vfork (2)
-or
-.BR clone (2)
-with the
-.B CLONE_VFORK
-flag.
-When the tracee is continued after this stop, it will wait for child to
-exit/exec before continuing its execution
-(in other words, the usual behavior on
-.BR vfork (2)).
-.TP
-.B PTRACE_EVENT_FORK
-Stop before return from
-.BR fork (2)
-or
-.BR clone (2)
-with the exit signal set to
-.BR SIGCHLD .
-.TP
-.B PTRACE_EVENT_CLONE
-Stop before return from
-.BR clone (2).
-.TP
-.B PTRACE_EVENT_VFORK_DONE
-Stop before return from
-.BR vfork (2)
-or
-.BR clone (2)
-with the
-.B CLONE_VFORK
-flag,
-but after the child unblocked this tracee by exiting or execing.
-.P
-For all four stops described above,
-the stop occurs in the parent (i.e., the tracee),
-not in the newly created thread.
-.B PTRACE_GETEVENTMSG
-can be used to retrieve the new thread's ID.
-.TP
-.B PTRACE_EVENT_EXEC
-Stop before return from
-.BR execve (2).
-Since Linux 3.0,
-.B PTRACE_GETEVENTMSG
-returns the former thread ID.
-.TP
-.B PTRACE_EVENT_EXIT
-Stop before exit (including death from
-.BR exit_group (2)),
-signal death, or exit caused by
-.BR execve (2)
-in a multithreaded process.
-.B PTRACE_GETEVENTMSG
-returns the exit status.
-Registers can be examined
-(unlike when "real" exit happens).
-The tracee is still alive; it needs to be
-.BR PTRACE_CONT ed
-or
-.BR PTRACE_DETACH ed
-to finish exiting.
-.TP
-.B PTRACE_EVENT_STOP
-Stop induced by
-.B PTRACE_INTERRUPT
-command, or group-stop, or initial ptrace-stop when a new child is attached
-(only if attached using
-.BR PTRACE_SEIZE ).
-.TP
-.B PTRACE_EVENT_SECCOMP
-Stop triggered by a
-.BR seccomp (2)
-rule on tracee syscall entry when
-.B PTRACE_O_TRACESECCOMP
-has been set by the tracer.
-The seccomp event message data (from the
-.B SECCOMP_RET_DATA
-portion of the seccomp filter rule) can be retrieved with
-.BR PTRACE_GETEVENTMSG .
-The semantics of this stop are described in
-detail in a separate section below.
-.P
-.B PTRACE_GETSIGINFO
-on
-.B PTRACE_EVENT
-stops returns
-.B SIGTRAP
-in
-.IR si_signo ,
-with
-.I si_code
-set to
-.IR "(event<<8)\ |\ SIGTRAP" .
-.SS Syscall-stops
-If the tracee was restarted by
-.B PTRACE_SYSCALL
-or
-.BR PTRACE_SYSEMU ,
-the tracee enters
-syscall-enter-stop just prior to entering any system call (which
-will not be executed if the restart was using
-.BR PTRACE_SYSEMU ,
-regardless of any change made to registers at this point or how the
-tracee is restarted after this stop).
-No matter which method caused the syscall-entry-stop,
-if the tracer restarts the tracee with
-.BR PTRACE_SYSCALL ,
-the tracee enters syscall-exit-stop when the system call is finished,
-or if it is interrupted by a signal.
-(That is, signal-delivery-stop never happens between syscall-enter-stop
-and syscall-exit-stop; it happens
-.I after
-syscall-exit-stop.).
-If the tracee is continued using any other method (including
-.BR PTRACE_SYSEMU ),
-no syscall-exit-stop occurs.
-Note that all mentions
-.B PTRACE_SYSEMU
-apply equally to
-.BR PTRACE_SYSEMU_SINGLESTEP .
-.P
-However, even if the tracee was continued using
-.BR PTRACE_SYSCALL ,
-it is not guaranteed that the next stop will be a syscall-exit-stop.
-Other possibilities are that the tracee may stop in a
-.B PTRACE_EVENT
-stop (including seccomp stops), exit (if it entered
-.BR _exit (2)
-or
-.BR exit_group (2)),
-be killed by
-.BR SIGKILL ,
-or die silently (if it is a thread group leader, the
-.BR execve (2)
-happened in another thread,
-and that thread is not traced by the same tracer;
-this situation is discussed later).
-.P
-Syscall-enter-stop and syscall-exit-stop are observed by the tracer as
-.BR waitpid (2)
-returning with
-.I WIFSTOPPED(status)
-true, and
-.I WSTOPSIG(status)
-giving
-.BR SIGTRAP .
-If the
-.B PTRACE_O_TRACESYSGOOD
-option was set by the tracer, then
-.I WSTOPSIG(status)
-will give the value
-.IR "(SIGTRAP\ |\ 0x80)" .
-.P
-Syscall-stops can be distinguished from signal-delivery-stop with
-.B SIGTRAP
-by querying
-.B PTRACE_GETSIGINFO
-for the following cases:
-.TP
-.IR si_code " <= 0"
-.B SIGTRAP
-was delivered as a result of a user-space action,
-for example, a system call
-.RB ( tgkill (2),
-.BR kill (2),
-.BR sigqueue (3),
-etc.),
-expiration of a POSIX timer,
-change of state on a POSIX message queue,
-or completion of an asynchronous I/O operation.
-.TP
-.IR si_code " == SI_KERNEL (0x80)"
-.B SIGTRAP
-was sent by the kernel.
-.TP
-.IR si_code " == SIGTRAP or " si_code " == (SIGTRAP|0x80)"
-This is a syscall-stop.
-.P
-However, syscall-stops happen very often (twice per system call),
-and performing
-.B PTRACE_GETSIGINFO
-for every syscall-stop may be somewhat expensive.
-.P
-Some architectures allow the cases to be distinguished
-by examining registers.
-For example, on x86,
-.I rax
-==
-.RB \- ENOSYS
-in syscall-enter-stop.
-Since
-.B SIGTRAP
-(like any other signal) always happens
-.I after
-syscall-exit-stop,
-and at this point
-.I rax
-almost never contains
-.RB \- ENOSYS ,
-the
-.B SIGTRAP
-looks like "syscall-stop which is not syscall-enter-stop";
-in other words, it looks like a
-"stray syscall-exit-stop" and can be detected this way.
-But such detection is fragile and is best avoided.
-.P
-Using the
-.B PTRACE_O_TRACESYSGOOD
-option is the recommended method to distinguish syscall-stops
-from other kinds of ptrace-stops,
-since it is reliable and does not incur a performance penalty.
-.P
-Syscall-enter-stop and syscall-exit-stop are
-indistinguishable from each other by the tracer.
-The tracer needs to keep track of the sequence of
-ptrace-stops in order to not misinterpret syscall-enter-stop as
-syscall-exit-stop or vice versa.
-In general, a syscall-enter-stop is
-always followed by syscall-exit-stop,
-.B PTRACE_EVENT
-stop, or the tracee's death;
-no other kinds of ptrace-stop can occur in between.
-However, note that seccomp stops (see below) can cause syscall-exit-stops,
-without preceding syscall-entry-stops.
-If seccomp is in use, care needs
-to be taken not to misinterpret such stops as syscall-entry-stops.
-.P
-If after syscall-enter-stop,
-the tracer uses a restarting command other than
-.BR PTRACE_SYSCALL ,
-syscall-exit-stop is not generated.
-.P
-.B PTRACE_GETSIGINFO
-on syscall-stops returns
-.B SIGTRAP
-in
-.IR si_signo ,
-with
-.I si_code
-set to
-.B SIGTRAP
-or
-.IR (SIGTRAP|0x80) .
-.\"
-.SS PTRACE_EVENT_SECCOMP stops (Linux 3.5 to Linux 4.7)
-The behavior of
-.B PTRACE_EVENT_SECCOMP
-stops and their interaction with other kinds
-of ptrace stops has changed between kernel versions.
-This documents the behavior
-from their introduction until Linux 4.7 (inclusive).
-The behavior in later kernel versions is documented in the next section.
-.P
-A
-.B PTRACE_EVENT_SECCOMP
-stop occurs whenever a
-.B SECCOMP_RET_TRACE
-rule is triggered.
-This is independent of which methods was used to restart the system call.
-Notably, seccomp still runs even if the tracee was restarted using
-.B PTRACE_SYSEMU
-and this system call is unconditionally skipped.
-.P
-Restarts from this stop will behave as if the stop had occurred right
-before the system call in question.
-In particular, both
-.B PTRACE_SYSCALL
-and
-.B PTRACE_SYSEMU
-will normally cause a subsequent syscall-entry-stop.
-However, if after the
-.B PTRACE_EVENT_SECCOMP
-the system call number is negative,
-both the syscall-entry-stop and the system call itself will be skipped.
-This means that if the system call number is negative after a
-.B PTRACE_EVENT_SECCOMP
-and the tracee is restarted using
-.BR PTRACE_SYSCALL ,
-the next observed stop will be a syscall-exit-stop,
-rather than the syscall-entry-stop that might have been expected.
-.\"
-.SS PTRACE_EVENT_SECCOMP stops (since Linux 4.8)
-Starting with Linux 4.8,
-.\" commit 93e35efb8de45393cf61ed07f7b407629bf698ea
-the
-.B PTRACE_EVENT_SECCOMP
-stop was reordered to occur between syscall-entry-stop and
-syscall-exit-stop.
-Note that seccomp no longer runs (and no
-.B PTRACE_EVENT_SECCOMP
-will be reported) if the system call is skipped due to
-.BR PTRACE_SYSEMU .
-.P
-Functionally, a
-.B PTRACE_EVENT_SECCOMP
-stop functions comparably
-to a syscall-entry-stop (i.e., continuations using
-.B PTRACE_SYSCALL
-will cause syscall-exit-stops,
-the system call number may be changed and any other modified registers
-are visible to the to-be-executed system call as well).
-Note that there may be,
-but need not have been a preceding syscall-entry-stop.
-.P
-After a
-.B PTRACE_EVENT_SECCOMP
-stop, seccomp will be rerun, with a
-.B SECCOMP_RET_TRACE
-rule now functioning the same as a
-.BR SECCOMP_RET_ALLOW .
-Specifically, this means that if registers are not modified during the
-.B PTRACE_EVENT_SECCOMP
-stop, the system call will then be allowed.
-.\"
-.SS PTRACE_SINGLESTEP stops
-[Details of these kinds of stops are yet to be documented.]
-.\"
-.\" FIXME .
-.\" document stops occurring with PTRACE_SINGLESTEP
-.\"
-.SS Informational and restarting ptrace commands
-Most ptrace commands (all except
-.BR PTRACE_ATTACH ,
-.BR PTRACE_SEIZE ,
-.BR PTRACE_TRACEME ,
-.BR PTRACE_INTERRUPT ,
-and
-.BR PTRACE_KILL )
-require the tracee to be in a ptrace-stop, otherwise they fail with
-.BR ESRCH .
-.P
-When the tracee is in ptrace-stop,
-the tracer can read and write data to
-the tracee using informational commands.
-These commands leave the tracee in ptrace-stopped state:
-.P
-.in +4n
-.EX
-ptrace(PTRACE_PEEKTEXT/PEEKDATA/PEEKUSER, pid, addr, 0);
-ptrace(PTRACE_POKETEXT/POKEDATA/POKEUSER, pid, addr, long_val);
-ptrace(PTRACE_GETREGS/GETFPREGS, pid, 0, &struct);
-ptrace(PTRACE_SETREGS/SETFPREGS, pid, 0, &struct);
-ptrace(PTRACE_GETREGSET, pid, NT_foo, &iov);
-ptrace(PTRACE_SETREGSET, pid, NT_foo, &iov);
-ptrace(PTRACE_GETSIGINFO, pid, 0, &siginfo);
-ptrace(PTRACE_SETSIGINFO, pid, 0, &siginfo);
-ptrace(PTRACE_GETEVENTMSG, pid, 0, &long_var);
-ptrace(PTRACE_SETOPTIONS, pid, 0, PTRACE_O_flags);
-.EE
-.in
-.P
-Note that some errors are not reported.
-For example, setting signal information
-.RI ( siginfo )
-may have no effect in some ptrace-stops, yet the call may succeed
-(return 0 and not set
-.IR errno );
-querying
-.B PTRACE_GETEVENTMSG
-may succeed and return some random value if current ptrace-stop
-is not documented as returning a meaningful event message.
-.P
-The call
-.P
-.in +4n
-.EX
-ptrace(PTRACE_SETOPTIONS, pid, 0, PTRACE_O_flags);
-.EE
-.in
-.P
-affects one tracee.
-The tracee's current flags are replaced.
-Flags are inherited by new tracees created and "auto-attached" via active
-.BR PTRACE_O_TRACEFORK ,
-.BR PTRACE_O_TRACEVFORK ,
-or
-.B PTRACE_O_TRACECLONE
-options.
-.P
-Another group of commands makes the ptrace-stopped tracee run.
-They have the form:
-.P
-.in +4n
-.EX
-ptrace(cmd, pid, 0, sig);
-.EE
-.in
-.P
-where
-.I cmd
-is
-.BR PTRACE_CONT ,
-.BR PTRACE_LISTEN ,
-.BR PTRACE_DETACH ,
-.BR PTRACE_SYSCALL ,
-.BR PTRACE_SINGLESTEP ,
-.BR PTRACE_SYSEMU ,
-or
-.BR PTRACE_SYSEMU_SINGLESTEP .
-If the tracee is in signal-delivery-stop,
-.I sig
-is the signal to be injected (if it is nonzero).
-Otherwise,
-.I sig
-may be ignored.
-(When restarting a tracee from a ptrace-stop other than signal-delivery-stop,
-recommended practice is to always pass 0 in
-.IR sig .)
-.SS Attaching and detaching
-A thread can be attached to the tracer using the call
-.P
-.in +4n
-.EX
-ptrace(PTRACE_ATTACH, pid, 0, 0);
-.EE
-.in
-.P
-or
-.P
-.in +4n
-.EX
-ptrace(PTRACE_SEIZE, pid, 0, PTRACE_O_flags);
-.EE
-.in
-.P
-.B PTRACE_ATTACH
-sends
-.B SIGSTOP
-to this thread.
-If the tracer wants this
-.B SIGSTOP
-to have no effect, it needs to suppress it.
-Note that if other signals are concurrently sent to
-this thread during attach,
-the tracer may see the tracee enter signal-delivery-stop
-with other signal(s) first!
-The usual practice is to reinject these signals until
-.B SIGSTOP
-is seen, then suppress
-.B SIGSTOP
-injection.
-The design bug here is that a ptrace attach and a concurrently delivered
-.B SIGSTOP
-may race and the concurrent
-.B SIGSTOP
-may be lost.
-.\"
-.\" FIXME Describe how to attach to a thread which is already group-stopped.
-.P
-Since attaching sends
-.B SIGSTOP
-and the tracer usually suppresses it, this may cause a stray
-.B EINTR
-return from the currently executing system call in the tracee,
-as described in the "Signal injection and suppression" section.
-.P
-Since Linux 3.4,
-.B PTRACE_SEIZE
-can be used instead of
-.BR PTRACE_ATTACH .
-.B PTRACE_SEIZE
-does not stop the attached process.
-If you need to stop
-it after attach (or at any other time) without sending it any signals,
-use
-.B PTRACE_INTERRUPT
-command.
-.P
-The operation
-.P
-.in +4n
-.EX
-ptrace(PTRACE_TRACEME, 0, 0, 0);
-.EE
-.in
-.P
-turns the calling thread into a tracee.
-The thread continues to run (doesn't enter ptrace-stop).
-A common practice is to follow the
-.B PTRACE_TRACEME
-with
-.P
-.in +4n
-.EX
-raise(SIGSTOP);
-.EE
-.in
-.P
-and allow the parent (which is our tracer now) to observe our
-signal-delivery-stop.
-.P
-If the
-.BR PTRACE_O_TRACEFORK ,
-.BR PTRACE_O_TRACEVFORK ,
-or
-.B PTRACE_O_TRACECLONE
-options are in effect, then children created by, respectively,
-.BR vfork (2)
-or
-.BR clone (2)
-with the
-.B CLONE_VFORK
-flag,
-.BR fork (2)
-or
-.BR clone (2)
-with the exit signal set to
-.BR SIGCHLD ,
-and other kinds of
-.BR clone (2),
-are automatically attached to the same tracer which traced their parent.
-.B SIGSTOP
-is delivered to the children, causing them to enter
-signal-delivery-stop after they exit the system call which created them.
-.P
-Detaching of the tracee is performed by:
-.P
-.in +4n
-.EX
-ptrace(PTRACE_DETACH, pid, 0, sig);
-.EE
-.in
-.P
-.B PTRACE_DETACH
-is a restarting operation;
-therefore it requires the tracee to be in ptrace-stop.
-If the tracee is in signal-delivery-stop, a signal can be injected.
-Otherwise, the
-.I sig
-parameter may be silently ignored.
-.P
-If the tracee is running when the tracer wants to detach it,
-the usual solution is to send
-.B SIGSTOP
-(using
-.BR tgkill (2),
-to make sure it goes to the correct thread),
-wait for the tracee to stop in signal-delivery-stop for
-.B SIGSTOP
-and then detach it (suppressing
-.B SIGSTOP
-injection).
-A design bug is that this can race with concurrent
-.BR SIGSTOP s.
-Another complication is that the tracee may enter other ptrace-stops
-and needs to be restarted and waited for again, until
-.B SIGSTOP
-is seen.
-Yet another complication is to be sure that
-the tracee is not already ptrace-stopped,
-because no signal delivery happens while it is\[em]not even
-.BR SIGSTOP .
-.\" FIXME Describe how to detach from a group-stopped tracee so that it
-.\" doesn't run, but continues to wait for SIGCONT.
-.P
-If the tracer dies, all tracees are automatically detached and restarted,
-unless they were in group-stop.
-Handling of restart from group-stop is currently buggy,
-but the "as planned" behavior is to leave tracee stopped and waiting for
-.BR SIGCONT .
-If the tracee is restarted from signal-delivery-stop,
-the pending signal is injected.
-.SS execve(2) under ptrace
-.\" clone(2) CLONE_THREAD says:
-.\" If any of the threads in a thread group performs an execve(2),
-.\" then all threads other than the thread group leader are terminated,
-.\" and the new program is executed in the thread group leader.
-.\"
-When one thread in a multithreaded process calls
-.BR execve (2),
-the kernel destroys all other threads in the process,
-.\" In Linux 3.1 sources, see fs/exec.c::de_thread()
-and resets the thread ID of the execing thread to the
-thread group ID (process ID).
-(Or, to put things another way, when a multithreaded process does an
-.BR execve (2),
-at completion of the call, it appears as though the
-.BR execve (2)
-occurred in the thread group leader, regardless of which thread did the
-.BR execve (2).)
-This resetting of the thread ID looks very confusing to tracers:
-.IP \[bu] 3
-All other threads stop in
-.B PTRACE_EVENT_EXIT
-stop, if the
-.B PTRACE_O_TRACEEXIT
-option was turned on.
-Then all other threads except the thread group leader report
-death as if they exited via
-.BR _exit (2)
-with exit code 0.
-.IP \[bu]
-The execing tracee changes its thread ID while it is in the
-.BR execve (2).
-(Remember, under ptrace, the "pid" returned from
-.BR waitpid (2),
-or fed into ptrace calls, is the tracee's thread ID.)
-That is, the tracee's thread ID is reset to be the same as its process ID,
-which is the same as the thread group leader's thread ID.
-.IP \[bu]
-Then a
-.B PTRACE_EVENT_EXEC
-stop happens, if the
-.B PTRACE_O_TRACEEXEC
-option was turned on.
-.IP \[bu]
-If the thread group leader has reported its
-.B PTRACE_EVENT_EXIT
-stop by this time,
-it appears to the tracer that
-the dead thread leader "reappears from nowhere".
-(Note: the thread group leader does not report death via
-.I WIFEXITED(status)
-until there is at least one other live thread.
-This eliminates the possibility that the tracer will see
-it dying and then reappearing.)
-If the thread group leader was still alive,
-for the tracer this may look as if thread group leader
-returns from a different system call than it entered,
-or even "returned from a system call even though
-it was not in any system call".
-If the thread group leader was not traced
-(or was traced by a different tracer), then during
-.BR execve (2)
-it will appear as if it has become a tracee of
-the tracer of the execing tracee.
-.P
-All of the above effects are the artifacts of
-the thread ID change in the tracee.
-.P
-The
-.B PTRACE_O_TRACEEXEC
-option is the recommended tool for dealing with this situation.
-First, it enables
-.B PTRACE_EVENT_EXEC
-stop,
-which occurs before
-.BR execve (2)
-returns.
-In this stop, the tracer can use
-.B PTRACE_GETEVENTMSG
-to retrieve the tracee's former thread ID.
-(This feature was introduced in Linux 3.0.)
-Second, the
-.B PTRACE_O_TRACEEXEC
-option disables legacy
-.B SIGTRAP
-generation on
-.BR execve (2).
-.P
-When the tracer receives
-.B PTRACE_EVENT_EXEC
-stop notification,
-it is guaranteed that except this tracee and the thread group leader,
-no other threads from the process are alive.
-.P
-On receiving the
-.B PTRACE_EVENT_EXEC
-stop notification,
-the tracer should clean up all its internal
-data structures describing the threads of this process,
-and retain only one data structure\[em]one which
-describes the single still running tracee, with
-.P
-.in +4n
-.EX
-thread ID == thread group ID == process ID.
-.EE
-.in
-.P
-Example: two threads call
-.BR execve (2)
-at the same time:
-.P
-.nf
-*** we get syscall-enter-stop in thread 1: **
-PID1 execve("/bin/foo", "foo" <unfinished ...>
-*** we issue PTRACE_SYSCALL for thread 1 **
-*** we get syscall-enter-stop in thread 2: **
-PID2 execve("/bin/bar", "bar" <unfinished ...>
-*** we issue PTRACE_SYSCALL for thread 2 **
-*** we get PTRACE_EVENT_EXEC for PID0, we issue PTRACE_SYSCALL **
-*** we get syscall-exit-stop for PID0: **
-PID0 <... execve resumed> ) = 0
-.fi
-.P
-If the
-.B PTRACE_O_TRACEEXEC
-option is
-.I not
-in effect for the execing tracee,
-and if the tracee was
-.BR PTRACE_ATTACH ed
-rather that
-.BR PTRACE_SEIZE d,
-the kernel delivers an extra
-.B SIGTRAP
-to the tracee after
-.BR execve (2)
-returns.
-This is an ordinary signal (similar to one which can be
-generated by
-.IR "kill \-TRAP" ),
-not a special kind of ptrace-stop.
-Employing
-.B PTRACE_GETSIGINFO
-for this signal returns
-.I si_code
-set to 0
-.RI ( SI_USER ).
-This signal may be blocked by signal mask,
-and thus may be delivered (much) later.
-.P
-Usually, the tracer (for example,
-.BR strace (1))
-would not want to show this extra post-execve
-.B SIGTRAP
-signal to the user, and would suppress its delivery to the tracee (if
-.B SIGTRAP
-is set to
-.BR SIG_DFL ,
-it is a killing signal).
-However, determining
-.I which
-.B SIGTRAP
-to suppress is not easy.
-Setting the
-.B PTRACE_O_TRACEEXEC
-option or using
-.B PTRACE_SEIZE
-and thus suppressing this extra
-.B SIGTRAP
-is the recommended approach.
-.SS Real parent
-The ptrace API (ab)uses the standard UNIX parent/child signaling over
-.BR waitpid (2).
-This used to cause the real parent of the process to stop receiving
-several kinds of
-.BR waitpid (2)
-notifications when the child process is traced by some other process.
-.P
-Many of these bugs have been fixed, but as of Linux 2.6.38 several still
-exist; see BUGS below.
-.P
-As of Linux 2.6.38, the following is believed to work correctly:
-.IP \[bu] 3
-exit/death by signal is reported first to the tracer, then,
-when the tracer consumes the
-.BR waitpid (2)
-result, to the real parent (to the real parent only when the
-whole multithreaded process exits).
-If the tracer and the real parent are the same process,
-the report is sent only once.
-.SH RETURN VALUE
-On success, the
-.B PTRACE_PEEK*
-operations return the requested data (but see NOTES),
-the
-.B PTRACE_SECCOMP_GET_FILTER
-operation returns the number of instructions in the BPF program,
-the
-.B PTRACE_GET_SYSCALL_INFO
-operation returns the number of bytes available to be written by the kernel,
-and other operations return zero.
-.P
-On error, all operations return \-1, and
-.I errno
-is set to indicate the error.
-Since the value returned by a successful
-.B PTRACE_PEEK*
-operation may be \-1, the caller must clear
-.I errno
-before the call, and then check it afterward
-to determine whether or not an error occurred.
-.SH ERRORS
-.TP
-.B EBUSY
-(i386 only) There was an error with allocating or freeing a debug register.
-.TP
-.B EFAULT
-There was an attempt to read from or write to an invalid area in
-the tracer's or the tracee's memory,
-probably because the area wasn't mapped or accessible.
-Unfortunately, under Linux, different variations of this fault
-will return
-.B EIO
-or
-.B EFAULT
-more or less arbitrarily.
-.TP
-.B EINVAL
-An attempt was made to set an invalid option.
-.TP
-.B EIO
-.I op
-is invalid, or an attempt was made to read from or
-write to an invalid area in the tracer's or the tracee's memory,
-or there was a word-alignment violation,
-or an invalid signal was specified during a restart operation.
-.TP
-.B EPERM
-The specified process cannot be traced.
-This could be because the
-tracer has insufficient privileges (the required capability is
-.BR CAP_SYS_PTRACE );
-unprivileged processes cannot trace processes that they
-cannot send signals to or those running
-set-user-ID/set-group-ID programs, for obvious reasons.
-Alternatively, the process may already be being traced,
-or (before Linux 2.6.26) be
-.BR init (1)
-(PID 1).
-.TP
-.B ESRCH
-The specified process does not exist, or is not currently being traced
-by the caller, or is not stopped
-(for operations that require a stopped tracee).
-.SH STANDARDS
-None.
-.SH HISTORY
-SVr4, 4.3BSD.
-.P
-Before Linux 2.6.26,
-.\" See commit 00cd5c37afd5f431ac186dd131705048c0a11fdb
-.BR init (1),
-the process with PID 1, may not be traced.
-.SH NOTES
-Although arguments to
-.BR ptrace ()
-are interpreted according to the prototype given,
-glibc currently declares
-.BR ptrace ()
-as a variadic function with only the
-.I op
-argument fixed.
-It is recommended to always supply four arguments,
-even if the requested operation does not use them,
-setting unused/ignored arguments to
-.I 0L
-or
-.IR "(void\ *)\ 0".
-.P
-A tracees parent continues to be the tracer even if that tracer calls
-.BR execve (2).
-.P
-The layout of the contents of memory and the USER area are
-quite operating-system- and architecture-specific.
-The offset supplied, and the data returned,
-might not entirely match with the definition of
-.IR "struct user" .
-.\" See http://lkml.org/lkml/2008/5/8/375
-.P
-The size of a "word" is determined by the operating-system variant
-(e.g., for 32-bit Linux it is 32 bits).
-.P
-This page documents the way the
-.BR ptrace ()
-call works currently in Linux.
-Its behavior differs significantly on other flavors of UNIX.
-In any case, use of
-.BR ptrace ()
-is highly specific to the operating system and architecture.
-.\"
-.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
-.\"
-.SS Ptrace access mode checking
-Various parts of the kernel-user-space API (not just
-.BR ptrace ()
-operations), require so-called "ptrace access mode" checks,
-whose outcome determines whether an operation is permitted
-(or, in a few cases, causes a "read" operation to return sanitized data).
-These checks are performed in cases where one process can
-inspect sensitive information about,
-or in some cases modify the state of, another process.
-The checks are based on factors such as the credentials and capabilities
-of the two processes,
-whether or not the "target" process is dumpable,
-and the results of checks performed by any enabled Linux Security Module
-(LSM)\[em]for example, SELinux, Yama, or Smack\[em]and by the commoncap LSM
-(which is always invoked).
-.P
-Prior to Linux 2.6.27, all access checks were of a single type.
-Since Linux 2.6.27,
-.\" commit 006ebb40d3d65338bd74abb03b945f8d60e362bd
-two access mode levels are distinguished:
-.TP
-.B PTRACE_MODE_READ
-For "read" operations or other operations that are less dangerous,
-such as:
-.BR get_robust_list (2);
-.BR kcmp (2);
-reading
-.IR /proc/ pid /auxv ,
-.IR /proc/ pid /environ ,
-or
-.IR /proc/ pid /stat ;
-or
-.BR readlink (2)
-of a
-.IR /proc/ pid /ns/*
-file.
-.TP
-.B PTRACE_MODE_ATTACH
-For "write" operations, or other operations that are more dangerous,
-such as: ptrace attaching
-.RB ( PTRACE_ATTACH )
-to another process
-or calling
-.BR process_vm_writev (2).
-.RB ( PTRACE_MODE_ATTACH
-was effectively the default before Linux 2.6.27.)
-.\"
-.\" Regarding the above description of the distinction between
-.\" PTRACE_MODE_READ and PTRACE_MODE_ATTACH, Stephen Smalley notes:
-.\"
-.\" That was the intent when the distinction was introduced, but it doesn't
-.\" appear to have been properly maintained, e.g. there is now a common
-.\" helper lock_trace() that is used for
-.\" /proc/pid/{stack,syscall,personality} but checks PTRACE_MODE_ATTACH, and
-.\" PTRACE_MODE_ATTACH is also used in timerslack_ns_write/show(). Likely
-.\" should review and make them consistent. There was also some debate
-.\" about proper handling of /proc/pid/fd. Arguably that one might belong
-.\" back in the _ATTACH camp.
-.\"
-.P
-Since Linux 4.5,
-.\" commit caaee6234d05a58c5b4d05e7bf766131b810a657
-the above access mode checks are combined (ORed) with
-one of the following modifiers:
-.TP
-.B PTRACE_MODE_FSCREDS
-Use the caller's filesystem UID and GID (see
-.BR credentials (7))
-or effective capabilities for LSM checks.
-.TP
-.B PTRACE_MODE_REALCREDS
-Use the caller's real UID and GID or permitted capabilities for LSM checks.
-This was effectively the default before Linux 4.5.
-.P
-Because combining one of the credential modifiers with one of
-the aforementioned access modes is typical,
-some macros are defined in the kernel sources for the combinations:
-.TP
-.B PTRACE_MODE_READ_FSCREDS
-Defined as
-.BR "PTRACE_MODE_READ | PTRACE_MODE_FSCREDS" .
-.TP
-.B PTRACE_MODE_READ_REALCREDS
-Defined as
-.BR "PTRACE_MODE_READ | PTRACE_MODE_REALCREDS" .
-.TP
-.B PTRACE_MODE_ATTACH_FSCREDS
-Defined as
-.BR "PTRACE_MODE_ATTACH | PTRACE_MODE_FSCREDS" .
-.TP
-.B PTRACE_MODE_ATTACH_REALCREDS
-Defined as
-.BR "PTRACE_MODE_ATTACH | PTRACE_MODE_REALCREDS" .
-.P
-One further modifier can be ORed with the access mode:
-.TP
-.BR PTRACE_MODE_NOAUDIT " (since Linux 3.3)"
-.\" commit 69f594a38967f4540ce7a29b3fd214e68a8330bd
-.\" Just for /proc/pid/stat
-Don't audit this access mode check.
-This modifier is employed for ptrace access mode checks
-(such as checks when reading
-.IR /proc/ pid /stat )
-that merely cause the output to be filtered or sanitized,
-rather than causing an error to be returned to the caller.
-In these cases, accessing the file is not a security violation and
-there is no reason to generate a security audit record.
-This modifier suppresses the generation of
-such an audit record for the particular access check.
-.P
-Note that all of the
-.B PTRACE_MODE_*
-constants described in this subsection are kernel-internal,
-and not visible to user space.
-The constant names are mentioned here in order to label the various kinds of
-ptrace access mode checks that are performed for various system calls
-and accesses to various pseudofiles (e.g., under
-.IR /proc ).
-These names are used in other manual pages to provide a simple
-shorthand for labeling the different kernel checks.
-.P
-The algorithm employed for ptrace access mode checking determines whether
-the calling process is allowed to perform the corresponding action
-on the target process.
-(In the case of opening
-.IR /proc/ pid
-files, the "calling process" is the one opening the file,
-and the process with the corresponding PID is the "target process".)
-The algorithm is as follows:
-.IP (1) 5
-If the calling thread and the target thread are in the same
-thread group, access is always allowed.
-.IP (2)
-If the access mode specifies
-.BR PTRACE_MODE_FSCREDS ,
-then, for the check in the next step,
-employ the caller's filesystem UID and GID.
-(As noted in
-.BR credentials (7),
-the filesystem UID and GID almost always have the same values
-as the corresponding effective IDs.)
-.IP
-Otherwise, the access mode specifies
-.BR PTRACE_MODE_REALCREDS ,
-so use the caller's real UID and GID for the checks in the next step.
-(Most APIs that check the caller's UID and GID use the effective IDs.
-For historical reasons, the
-.B PTRACE_MODE_REALCREDS
-check uses the real IDs instead.)
-.IP (3)
-Deny access if
-.I neither
-of the following is true:
-.RS
-.IP \[bu] 3
-The real, effective, and saved-set user IDs of the target
-match the caller's user ID,
-.I and
-the real, effective, and saved-set group IDs of the target
-match the caller's group ID.
-.IP \[bu]
-The caller has the
-.B CAP_SYS_PTRACE
-capability in the user namespace of the target.
-.RE
-.IP (4)
-Deny access if the target process "dumpable" attribute has a value other than 1
-.RB ( SUID_DUMP_USER ;
-see the discussion of
-.B PR_SET_DUMPABLE
-in
-.BR prctl (2)),
-and the caller does not have the
-.B CAP_SYS_PTRACE
-capability in the user namespace of the target process.
-.IP (5)
-The kernel LSM
-.IR security_ptrace_access_check ()
-interface is invoked to see if ptrace access is permitted.
-The results depend on the LSM(s).
-The implementation of this interface in the commoncap LSM performs
-the following steps:
-.\" (in cap_ptrace_access_check()):
-.RS
-.IP (5.1) 7
-If the access mode includes
-.BR PTRACE_MODE_FSCREDS ,
-then use the caller's
-.I effective
-capability set
-in the following check;
-otherwise (the access mode specifies
-.BR PTRACE_MODE_REALCREDS ,
-so) use the caller's
-.I permitted
-capability set.
-.IP (5.2)
-Deny access if
-.I neither
-of the following is true:
-.RS
-.IP \[bu] 3
-The caller and the target process are in the same user namespace,
-and the caller's capabilities are a superset of the target process's
-.I permitted
-capabilities.
-.IP \[bu]
-The caller has the
-.B CAP_SYS_PTRACE
-capability in the target process's user namespace.
-.RE
-.IP
-Note that the commoncap LSM does not distinguish between
-.B PTRACE_MODE_READ
-and
-.BR PTRACE_MODE_ATTACH .
-.RE
-.IP (6)
-If access has not been denied by any of the preceding steps,
-then access is allowed.
-.\"
-.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
-.\"
-.SS /proc/sys/kernel/yama/ptrace_scope
-On systems with the Yama Linux Security Module (LSM) installed
-(i.e., the kernel was configured with
-.BR CONFIG_SECURITY_YAMA ),
-the
-.I /proc/sys/kernel/yama/ptrace_scope
-file (available since Linux 3.4)
-.\" commit 2d514487faf188938a4ee4fb3464eeecfbdcf8eb
-can be used to restrict the ability to trace a process with
-.BR ptrace ()
-(and thus also the ability to use tools such as
-.BR strace (1)
-and
-.BR gdb (1)).
-The goal of such restrictions is to prevent attack escalation whereby
-a compromised process can ptrace-attach to other sensitive processes
-(e.g., a GPG agent or an SSH session) owned by the user in order
-to gain additional credentials that may exist in memory
-and thus expand the scope of the attack.
-.P
-More precisely, the Yama LSM limits two types of operations:
-.IP \[bu] 3
-Any operation that performs a ptrace access mode
-.B PTRACE_MODE_ATTACH
-check\[em]for example,
-.BR ptrace ()
-.BR PTRACE_ATTACH .
-(See the "Ptrace access mode checking" discussion above.)
-.IP \[bu]
-.BR ptrace ()
-.BR PTRACE_TRACEME .
-.P
-A process that has the
-.B CAP_SYS_PTRACE
-capability can update the
-.I /proc/sys/kernel/yama/ptrace_scope
-file with one of the following values:
-.TP
-0 ("classic ptrace permissions")
-No additional restrictions on operations that perform
-.B PTRACE_MODE_ATTACH
-checks (beyond those imposed by the commoncap and other LSMs).
-.IP
-The use of
-.B PTRACE_TRACEME
-is unchanged.
-.TP
-1 ("restricted ptrace") [default value]
-When performing an operation that requires a
-.B PTRACE_MODE_ATTACH
-check, the calling process must either have the
-.B CAP_SYS_PTRACE
-capability in the user namespace of the target process or
-it must have a predefined relationship with the target process.
-By default,
-the predefined relationship is that the target process
-must be a descendant of the caller.
-.IP
-A target process can employ the
-.BR prctl (2)
-.B PR_SET_PTRACER
-operation to declare an additional PID that is allowed to perform
-.B PTRACE_MODE_ATTACH
-operations on the target.
-See the kernel source file
-.I Documentation/admin\-guide/LSM/Yama.rst
-.\" commit 90bb766440f2147486a2acc3e793d7b8348b0c22
-(or
-.I Documentation/security/Yama.txt
-before Linux 4.13)
-for further details.
-.IP
-The use of
-.B PTRACE_TRACEME
-is unchanged.
-.TP
-2 ("admin-only attach")
-Only processes with the
-.B CAP_SYS_PTRACE
-capability in the user namespace of the target process may perform
-.B PTRACE_MODE_ATTACH
-operations or trace children that employ
-.BR PTRACE_TRACEME .
-.TP
-3 ("no attach")
-No process may perform
-.B PTRACE_MODE_ATTACH
-operations or trace children that employ
-.BR PTRACE_TRACEME .
-.IP
-Once this value has been written to the file, it cannot be changed.
-.P
-With respect to values 1 and 2,
-note that creating a new user namespace effectively removes the
-protection offered by Yama.
-This is because a process in the parent user namespace whose effective
-UID matches the UID of the creator of a child namespace
-has all capabilities (including
-.BR CAP_SYS_PTRACE )
-when performing operations within the child user namespace
-(and further-removed descendants of that namespace).
-Consequently, when a process tries to use user namespaces to sandbox itself,
-it inadvertently weakens the protections offered by the Yama LSM.
-.\"
-.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
-.\"
-.SS C library/kernel differences
-At the system call level, the
-.BR PTRACE_PEEKTEXT ,
-.BR PTRACE_PEEKDATA ,
-and
-.B PTRACE_PEEKUSER
-operations have a different API: they store the result
-at the address specified by the
-.I data
-parameter, and the return value is the error flag.
-The glibc wrapper function provides the API given in DESCRIPTION above,
-with the result being returned via the function return value.
-.SH BUGS
-On hosts with Linux 2.6 kernel headers,
-.B PTRACE_SETOPTIONS
-is declared with a different value than the one for Linux 2.4.
-This leads to applications compiled with Linux 2.6 kernel
-headers failing when run on Linux 2.4.
-This can be worked around by redefining
-.B PTRACE_SETOPTIONS
-to
-.BR PTRACE_OLDSETOPTIONS ,
-if that is defined.
-.P
-Group-stop notifications are sent to the tracer, but not to real parent.
-Last confirmed on 2.6.38.6.
-.P
-If a thread group leader is traced and exits by calling
-.BR _exit (2),
-.\" Note from Denys Vlasenko:
-.\" Here "exits" means any kind of death - _exit, exit_group,
-.\" signal death. Signal death and exit_group cases are trivial,
-.\" though: since signal death and exit_group kill all other threads
-.\" too, "until all other threads exit" thing happens rather soon
-.\" in these cases. Therefore, only _exit presents observably
-.\" puzzling behavior to ptrace users: thread leader _exit's,
-.\" but WIFEXITED isn't reported! We are trying to explain here
-.\" why it is so.
-a
-.B PTRACE_EVENT_EXIT
-stop will happen for it (if requested), but the subsequent
-.B WIFEXITED
-notification will not be delivered until all other threads exit.
-As explained above, if one of other threads calls
-.BR execve (2),
-the death of the thread group leader will
-.I never
-be reported.
-If the execed thread is not traced by this tracer,
-the tracer will never know that
-.BR execve (2)
-happened.
-One possible workaround is to
-.B PTRACE_DETACH
-the thread group leader instead of restarting it in this case.
-Last confirmed on 2.6.38.6.
-.\" FIXME . need to test/verify this scenario
-.P
-A
-.B SIGKILL
-signal may still cause a
-.B PTRACE_EVENT_EXIT
-stop before actual signal death.
-This may be changed in the future;
-.B SIGKILL
-is meant to always immediately kill tasks even under ptrace.
-Last confirmed on Linux 3.13.
-.P
-Some system calls return with
-.B EINTR
-if a signal was sent to a tracee, but delivery was suppressed by the tracer.
-(This is very typical operation: it is usually
-done by debuggers on every attach, in order to not introduce
-a bogus
-.BR SIGSTOP ).
-As of Linux 3.2.9, the following system calls are affected
-(this list is likely incomplete):
-.BR epoll_wait (2),
-and
-.BR read (2)
-from an
-.BR inotify (7)
-file descriptor.
-The usual symptom of this bug is that when you attach to
-a quiescent process with the command
-.P
-.in +4n
-.EX
-strace \-p <process\-ID>
-.EE
-.in
-.P
-then, instead of the usual
-and expected one-line output such as
-.P
-.in +4n
-.EX
-restart_syscall(<... resuming interrupted call ...>_
-.EE
-.in
-.P
-or
-.P
-.in +4n
-.EX
-select(6, [5], NULL, [5], NULL_
-.EE
-.in
-.P
-('_' denotes the cursor position), you observe more than one line.
-For example:
-.P
-.in +4n
-.EX
- clock_gettime(CLOCK_MONOTONIC, {15370, 690928118}) = 0
- epoll_wait(4,_
-.EE
-.in
-.P
-What is not visible here is that the process was blocked in
-.BR epoll_wait (2)
-before
-.BR strace (1)
-has attached to it.
-Attaching caused
-.BR epoll_wait (2)
-to return to user space with the error
-.BR EINTR .
-In this particular case, the program reacted to
-.B EINTR
-by checking the current time, and then executing
-.BR epoll_wait (2)
-again.
-(Programs which do not expect such "stray"
-.B EINTR
-errors may behave in an unintended way upon an
-.BR strace (1)
-attach.)
-.P
-Contrary to the normal rules, the glibc wrapper for
-.BR ptrace ()
-can set
-.I errno
-to zero.
-.SH SEE ALSO
-.BR gdb (1),
-.BR ltrace (1),
-.BR strace (1),
-.BR clone (2),
-.BR execve (2),
-.BR fork (2),
-.BR gettid (2),
-.BR prctl (2),
-.BR seccomp (2),
-.BR sigaction (2),
-.BR tgkill (2),
-.BR vfork (2),
-.BR waitpid (2),
-.BR exec (3),
-.BR capabilities (7),
-.BR signal (7)
diff --git a/man2/putmsg.2 b/man2/putmsg.2
deleted file mode 100644
index 5d25ea6ce..000000000
--- a/man2/putmsg.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/unimplemented.2
diff --git a/man2/putpmsg.2 b/man2/putpmsg.2
deleted file mode 100644
index 5d25ea6ce..000000000
--- a/man2/putpmsg.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/unimplemented.2
diff --git a/man2/pwrite.2 b/man2/pwrite.2
deleted file mode 100644
index 87eacb238..000000000
--- a/man2/pwrite.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/pread.2
diff --git a/man2/pwrite64.2 b/man2/pwrite64.2
deleted file mode 100644
index 9290e0a58..000000000
--- a/man2/pwrite64.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/pwrite.2
diff --git a/man2/pwritev.2 b/man2/pwritev.2
deleted file mode 100644
index 54e3384ef..000000000
--- a/man2/pwritev.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/readv.2
diff --git a/man2/pwritev2.2 b/man2/pwritev2.2
deleted file mode 100644
index 54e3384ef..000000000
--- a/man2/pwritev2.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/readv.2
diff --git a/man2/query_module.2 b/man2/query_module.2
deleted file mode 100644
index 60746df56..000000000
--- a/man2/query_module.2
+++ /dev/null
@@ -1,194 +0,0 @@
-.\" Copyright (C) 1996 Free Software Foundation, Inc.
-.\"
-.\" SPDX-License-Identifier: GPL-1.0-or-later
-.\"
-.\" 2006-02-09, some reformatting by Luc Van Oostenryck; some
-.\" reformatting and rewordings by mtk
-.\"
-.TH query_module 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-query_module \- query the kernel for various bits pertaining to modules
-.SH SYNOPSIS
-.nf
-.B #include <linux/module.h>
-.P
-.BI "[[deprecated]] int query_module(const char *" name ", int " which ,
-.BI " void " buf [. bufsize "], \
-size_t " bufsize ,
-.BI " size_t *" ret );
-.fi
-.SH DESCRIPTION
-.IR Note :
-This system call is present only before Linux 2.6.
-.P
-.BR query_module ()
-requests information from the kernel about loadable modules.
-The returned information is placed in the buffer pointed to by
-.IR buf .
-The caller must specify the size of
-.I buf
-in
-.IR bufsize .
-The precise nature and format of the returned information
-depend on the operation specified by
-.IR which .
-Some operations require
-.I name
-to identify a currently loaded module, some allow
-.I name
-to be NULL, indicating the kernel proper.
-.P
-The following values can be specified for
-.IR which :
-.TP
-.B 0
-Returns success, if the kernel supports
-.BR query_module ().
-Used to probe for availability of the system call.
-.TP
-.B QM_MODULES
-Returns the names of all loaded modules.
-The returned buffer consists of a sequence of null-terminated strings;
-.I ret
-is set to the number of
-modules.
-.\" ret is set on ENOSPC
-.TP
-.B QM_DEPS
-Returns the names of all modules used by the indicated module.
-The returned buffer consists of a sequence of null-terminated strings;
-.I ret
-is set to the number of modules.
-.\" ret is set on ENOSPC
-.TP
-.B QM_REFS
-Returns the names of all modules using the indicated module.
-This is the inverse of
-.BR QM_DEPS .
-The returned buffer consists of a sequence of null-terminated strings;
-.I ret
-is set to the number of modules.
-.\" ret is set on ENOSPC
-.TP
-.B QM_SYMBOLS
-Returns the symbols and values exported by the kernel or the indicated
-module.
-The returned buffer is an array of structures of the following form
-.\" ret is set on ENOSPC
-.IP
-.in +4n
-.EX
-struct module_symbol {
- unsigned long value;
- unsigned long name;
-};
-.EE
-.in
-.IP
-followed by null-terminated strings.
-The value of
-.I name
-is the character offset of the string relative to the start of
-.IR buf ;
-.I ret
-is set to the number of symbols.
-.TP
-.B QM_INFO
-Returns miscellaneous information about the indicated module.
-The output buffer format is:
-.IP
-.in +4n
-.EX
-struct module_info {
- unsigned long address;
- unsigned long size;
- unsigned long flags;
-};
-.EE
-.in
-.IP
-where
-.I address
-is the kernel address at which the module resides,
-.I size
-is the size of the module in bytes, and
-.I flags
-is a mask of
-.BR MOD_RUNNING ,
-.BR MOD_AUTOCLEAN ,
-and so on, that indicates the current status of the module
-(see the Linux kernel source file
-.IR include/linux/module.h ).
-.I ret
-is set to the size of the
-.I module_info
-structure.
-.SH RETURN VALUE
-On success, zero is returned.
-On error, \-1 is returned and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EFAULT
-At least one of
-.IR name ,
-.IR buf ,
-or
-.I ret
-was outside the program's accessible address space.
-.TP
-.B EINVAL
-Invalid
-.IR which ;
-or
-.I name
-is NULL (indicating "the kernel"),
-but this is not permitted with the specified value of
-.IR which .
-.\" Not permitted with QM_DEPS, QM_REFS, or QM_INFO.
-.TP
-.B ENOENT
-No module by that
-.I name
-exists.
-.TP
-.B ENOSPC
-The buffer size provided was too small.
-.I ret
-is set to the minimum size needed.
-.TP
-.B ENOSYS
-.BR query_module ()
-is not supported in this version of the kernel
-(e.g., Linux 2.6 or later).
-.SH STANDARDS
-Linux.
-.SH VERSIONS
-Removed in Linux 2.6.
-.\" Removed in Linux 2.5.48
-.P
-Some of the information that was formerly available via
-.BR query_module ()
-can be obtained from
-.IR /proc/modules ,
-.IR /proc/kallsyms ,
-and the files under the directory
-.IR /sys/module .
-.P
-The
-.BR query_module ()
-system call is not supported by glibc.
-No declaration is provided in glibc headers, but,
-through a quirk of history, glibc does export an ABI for this system call.
-Therefore, in order to employ this system call,
-it is sufficient to manually declare the interface in your code;
-alternatively, you can invoke the system call using
-.BR syscall (2).
-.SH SEE ALSO
-.BR create_module (2),
-.BR delete_module (2),
-.BR get_kernel_syms (2),
-.BR init_module (2),
-.BR lsmod (8),
-.BR modinfo (8)
diff --git a/man2/quotactl.2 b/man2/quotactl.2
deleted file mode 100644
index 3807507e8..000000000
--- a/man2/quotactl.2
+++ /dev/null
@@ -1,806 +0,0 @@
-.\" Copyright (c) 2010, Jan Kara
-.\" A few pieces copyright (c) 1996 Andries Brouwer (aeb@cwi.nl)
-.\" and copyright 2010 (c) Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.TH quotactl 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-quotactl \- manipulate disk quotas
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/quota.h>
-.BR "#include <xfs/xqm.h>" " /* Definition of " Q_X* " and " XFS_QUOTA_* \
-" constants"
-.RB " (or " <linux/dqblk_xfs.h> "; see NOTES) */"
-.P
-.BI "int quotactl(int " op ", const char *_Nullable " special ", int " id ,
-.BI " caddr_t " addr );
-.fi
-.SH DESCRIPTION
-The quota system can be used to set per-user, per-group, and per-project limits
-on the amount of disk space used on a filesystem.
-For each user and/or group,
-a soft limit and a hard limit can be set for each filesystem.
-The hard limit can't be exceeded.
-The soft limit can be exceeded, but warnings will ensue.
-Moreover, the user can't exceed the soft limit for more than grace period
-duration (one week by default) at a time;
-after this, the soft limit counts as a hard limit.
-.P
-The
-.BR quotactl ()
-call manipulates disk quotas.
-The
-.I op
-argument indicates an operation to be applied to the user or
-group ID specified in
-.IR id .
-To initialize the
-.I op
-argument, use the
-.I QCMD(subop, type)
-macro.
-The
-.I type
-value is either
-.BR USRQUOTA ,
-for user quotas,
-.BR GRPQUOTA ,
-for group quotas, or (since Linux 4.1)
-.\" 847aac644e92e5624f2c153bab409bf713d5ff9a
-.BR PRJQUOTA ,
-for project quotas.
-The
-.I subop
-value is described below.
-.P
-The
-.I special
-argument is a pointer to a null-terminated string containing the pathname
-of the (mounted) block special device for the filesystem being manipulated.
-.P
-The
-.I addr
-argument is the address of an optional, operation-specific, data structure
-that is copied in or out of the system.
-The interpretation of
-.I addr
-is given with each operation below.
-.P
-The
-.I subop
-value is one of the following operations:
-.TP
-.B Q_QUOTAON
-Turn on quotas for a filesystem.
-The
-.I id
-argument is the identification number of the quota format to be used.
-Currently, there are three supported quota formats:
-.RS
-.TP 13
-.B QFMT_VFS_OLD
-The original quota format.
-.TP
-.B QFMT_VFS_V0
-The standard VFS v0 quota format, which can handle 32-bit UIDs and GIDs
-and quota limits up to 2\[ha]42 bytes and 2\[ha]32 inodes.
-.TP
-.B QFMT_VFS_V1
-A quota format that can handle 32-bit UIDs and GIDs
-and quota limits of 2\[ha]63 \- 1 bytes and 2\[ha]63 \- 1 inodes.
-.RE
-.IP
-The
-.I addr
-argument points to the pathname of a file containing the quotas for
-the filesystem.
-The quota file must exist; it is normally created with the
-.BR quotacheck (8)
-program
-.IP
-Quota information can be also stored in hidden system inodes
-for ext4, XFS, and other filesystems if the filesystem is configured so.
-In this case, there are no visible quota files and there is no need to
-use
-.BR quotacheck (8).
-Quota information is always kept consistent by the filesystem and the
-.B Q_QUOTAON
-operation serves only to enable enforcement of quota limits.
-The presence of hidden
-system inodes with quota information is indicated by the
-.B DQF_SYS_FILE
-flag in the
-.I dqi_flags
-field returned by the
-.B Q_GETINFO
-operation.
-.IP
-This operation requires privilege
-.RB ( CAP_SYS_ADMIN ).
-.TP
-.B Q_QUOTAOFF
-Turn off quotas for a filesystem.
-The
-.I addr
-and
-.I id
-arguments are ignored.
-This operation requires privilege
-.RB ( CAP_SYS_ADMIN ).
-.TP
-.B Q_GETQUOTA
-Get disk quota limits and current usage for user or group
-.IR id .
-The
-.I addr
-argument is a pointer to a
-.I dqblk
-structure defined in
-.I <sys/quota.h>
-as follows:
-.IP
-.in +4n
-.EX
-/* uint64_t is an unsigned 64\-bit integer;
- uint32_t is an unsigned 32\-bit integer */
-\&
-struct dqblk { /* Definition since Linux 2.4.22 */
- uint64_t dqb_bhardlimit; /* Absolute limit on disk
- quota blocks alloc */
- uint64_t dqb_bsoftlimit; /* Preferred limit on
- disk quota blocks */
- uint64_t dqb_curspace; /* Current occupied space
- (in bytes) */
- uint64_t dqb_ihardlimit; /* Maximum number of
- allocated inodes */
- uint64_t dqb_isoftlimit; /* Preferred inode limit */
- uint64_t dqb_curinodes; /* Current number of
- allocated inodes */
- uint64_t dqb_btime; /* Time limit for excessive
- disk use */
- uint64_t dqb_itime; /* Time limit for excessive
- files */
- uint32_t dqb_valid; /* Bit mask of QIF_*
- constants */
-};
-\&
-/* Flags in dqb_valid that indicate which fields in
- dqblk structure are valid. */
-\&
-#define QIF_BLIMITS 1
-#define QIF_SPACE 2
-#define QIF_ILIMITS 4
-#define QIF_INODES 8
-#define QIF_BTIME 16
-#define QIF_ITIME 32
-#define QIF_LIMITS (QIF_BLIMITS | QIF_ILIMITS)
-#define QIF_USAGE (QIF_SPACE | QIF_INODES)
-#define QIF_TIMES (QIF_BTIME | QIF_ITIME)
-#define QIF_ALL (QIF_LIMITS | QIF_USAGE | QIF_TIMES)
-.EE
-.in
-.IP
-The
-.I dqb_valid
-field is a bit mask that is set to indicate the entries in the
-.I dqblk
-structure that are valid.
-Currently, the kernel fills in all entries of the
-.I dqblk
-structure and marks them as valid in the
-.I dqb_valid
-field.
-Unprivileged users may retrieve only their own quotas;
-a privileged user
-.RB ( CAP_SYS_ADMIN )
-can retrieve the quotas of any user.
-.TP
-.BR Q_GETNEXTQUOTA " (since Linux 4.6)"
-.\" commit 926132c0257a5a8d149a6a395cc3405e55420566
-This operation is the same as
-.BR Q_GETQUOTA ,
-but it returns quota information for the next ID greater than or equal to
-.I id
-that has a quota set.
-.IP
-The
-.I addr
-argument is a pointer to a
-.I nextdqblk
-structure whose fields are as for the
-.IR dqblk ,
-except for the addition of a
-.I dqb_id
-field that is used to return the ID for which
-quota information is being returned:
-.IP
-.in +4n
-.EX
-struct nextdqblk {
- uint64_t dqb_bhardlimit;
- uint64_t dqb_bsoftlimit;
- uint64_t dqb_curspace;
- uint64_t dqb_ihardlimit;
- uint64_t dqb_isoftlimit;
- uint64_t dqb_curinodes;
- uint64_t dqb_btime;
- uint64_t dqb_itime;
- uint32_t dqb_valid;
- uint32_t dqb_id;
-};
-.EE
-.in
-.TP
-.B Q_SETQUOTA
-Set quota information for user or group
-.IR id ,
-using the information supplied in the
-.I dqblk
-structure pointed to by
-.IR addr .
-The
-.I dqb_valid
-field of the
-.I dqblk
-structure indicates which entries in the structure have been set by the caller.
-This operation supersedes the
-.B Q_SETQLIM
-and
-.B Q_SETUSE
-operations in the previous quota interfaces.
-This operation requires privilege
-.RB ( CAP_SYS_ADMIN ).
-.TP
-.BR Q_GETINFO " (since Linux 2.4.22)"
-Get information (like grace times) about quotafile.
-The
-.I addr
-argument should be a pointer to a
-.I dqinfo
-structure.
-This structure is defined in
-.I <sys/quota.h>
-as follows:
-.IP
-.in +4n
-.EX
-/* uint64_t is an unsigned 64\-bit integer;
- uint32_t is an unsigned 32\-bit integer */
-\&
-struct dqinfo { /* Defined since Linux 2.4.22 */
- uint64_t dqi_bgrace; /* Time before block soft limit
- becomes hard limit */
- uint64_t dqi_igrace; /* Time before inode soft limit
- becomes hard limit */
- uint32_t dqi_flags; /* Flags for quotafile
- (DQF_*) */
- uint32_t dqi_valid;
-};
-\&
-/* Bits for dqi_flags */
-\&
-/* Quota format QFMT_VFS_OLD */
-\&
-#define DQF_ROOT_SQUASH (1 << 0) /* Root squash enabled */
- /* Before Linux v4.0, this had been defined
- privately as V1_DQF_RSQUASH */
-\&
-/* Quota format QFMT_VFS_V0 / QFMT_VFS_V1 */
-\&
-#define DQF_SYS_FILE (1 << 16) /* Quota stored in
- a system file */
-\&
-/* Flags in dqi_valid that indicate which fields in
- dqinfo structure are valid. */
-\&
-#define IIF_BGRACE 1
-#define IIF_IGRACE 2
-#define IIF_FLAGS 4
-#define IIF_ALL (IIF_BGRACE | IIF_IGRACE | IIF_FLAGS)
-.EE
-.in
-.IP
-The
-.I dqi_valid
-field in the
-.I dqinfo
-structure indicates the entries in the structure that are valid.
-Currently, the kernel fills in all entries of the
-.I dqinfo
-structure and marks them all as valid in the
-.I dqi_valid
-field.
-The
-.I id
-argument is ignored.
-.TP
-.BR Q_SETINFO " (since Linux 2.4.22)"
-Set information about quotafile.
-The
-.I addr
-argument should be a pointer to a
-.I dqinfo
-structure.
-The
-.I dqi_valid
-field of the
-.I dqinfo
-structure indicates the entries in the structure
-that have been set by the caller.
-This operation supersedes the
-.B Q_SETGRACE
-and
-.B Q_SETFLAGS
-operations in the previous quota interfaces.
-The
-.I id
-argument is ignored.
-This operation requires privilege
-.RB ( CAP_SYS_ADMIN ).
-.TP
-.BR Q_GETFMT " (since Linux 2.4.22)"
-Get quota format used on the specified filesystem.
-The
-.I addr
-argument should be a pointer to a 4-byte buffer
-where the format number will be stored.
-.TP
-.B Q_SYNC
-Update the on-disk copy of quota usages for a filesystem.
-If
-.I special
-is NULL, then all filesystems with active quotas are sync'ed.
-The
-.I addr
-and
-.I id
-arguments are ignored.
-.TP
-.BR Q_GETSTATS " (supported up to Linux 2.4.21)"
-Get statistics and other generic information about the quota subsystem.
-The
-.I addr
-argument should be a pointer to a
-.I dqstats
-structure in which data should be stored.
-This structure is defined in
-.IR <sys/quota.h> .
-The
-.I special
-and
-.I id
-arguments are ignored.
-.IP
-This operation is obsolete and was removed in Linux 2.4.22.
-Files in
-.I /proc/sys/fs/quota/
-carry the information instead.
-.P
-For XFS filesystems making use of the XFS Quota Manager (XQM),
-the above operations are bypassed and the following operations are used:
-.TP
-.B Q_XQUOTAON
-Turn on quotas for an XFS filesystem.
-XFS provides the ability to turn on/off quota limit enforcement
-with quota accounting.
-Therefore, XFS expects
-.I addr
-to be a pointer to an
-.I "unsigned int"
-that contains a bitwise combination of the following flags (defined in
-.IR <xfs/xqm.h> ):
-.IP
-.in +4n
-.EX
-XFS_QUOTA_UDQ_ACCT /* User quota accounting */
-XFS_QUOTA_UDQ_ENFD /* User quota limits enforcement */
-XFS_QUOTA_GDQ_ACCT /* Group quota accounting */
-XFS_QUOTA_GDQ_ENFD /* Group quota limits enforcement */
-XFS_QUOTA_PDQ_ACCT /* Project quota accounting */
-XFS_QUOTA_PDQ_ENFD /* Project quota limits enforcement */
-.EE
-.in
-.IP
-This operation requires privilege
-.RB ( CAP_SYS_ADMIN ).
-The
-.I id
-argument is ignored.
-.TP
-.B Q_XQUOTAOFF
-Turn off quotas for an XFS filesystem.
-As with
-.BR Q_QUOTAON ,
-XFS filesystems expect a pointer to an
-.I "unsigned int"
-that specifies whether quota accounting and/or limit enforcement need
-to be turned off (using the same flags as for
-.B Q_XQUOTAON
-operation).
-This operation requires privilege
-.RB ( CAP_SYS_ADMIN ).
-The
-.I id
-argument is ignored.
-.TP
-.B Q_XGETQUOTA
-Get disk quota limits and current usage for user
-.IR id .
-The
-.I addr
-argument is a pointer to an
-.I fs_disk_quota
-structure, which is defined in
-.I <xfs/xqm.h>
-as follows:
-.IP
-.in +4n
-.EX
-/* All the blk units are in BBs (Basic Blocks) of
- 512 bytes. */
-\&
-#define FS_DQUOT_VERSION 1 /* fs_disk_quota.d_version */
-\&
-#define XFS_USER_QUOTA (1<<0) /* User quota type */
-#define XFS_PROJ_QUOTA (1<<1) /* Project quota type */
-#define XFS_GROUP_QUOTA (1<<2) /* Group quota type */
-\&
-struct fs_disk_quota {
- int8_t d_version; /* Version of this structure */
- int8_t d_flags; /* XFS_{USER,PROJ,GROUP}_QUOTA */
- uint16_t d_fieldmask; /* Field specifier */
- uint32_t d_id; /* User, project, or group ID */
- uint64_t d_blk_hardlimit; /* Absolute limit on
- disk blocks */
- uint64_t d_blk_softlimit; /* Preferred limit on
- disk blocks */
- uint64_t d_ino_hardlimit; /* Maximum # allocated
- inodes */
- uint64_t d_ino_softlimit; /* Preferred inode limit */
- uint64_t d_bcount; /* # disk blocks owned by
- the user */
- uint64_t d_icount; /* # inodes owned by the user */
- int32_t d_itimer; /* Zero if within inode limits */
- /* If not, we refuse service */
- int32_t d_btimer; /* Similar to above; for
- disk blocks */
- uint16_t d_iwarns; /* # warnings issued with
- respect to # of inodes */
- uint16_t d_bwarns; /* # warnings issued with
- respect to disk blocks */
- int32_t d_padding2; /* Padding \- for future use */
- uint64_t d_rtb_hardlimit; /* Absolute limit on realtime
- (RT) disk blocks */
- uint64_t d_rtb_softlimit; /* Preferred limit on RT
- disk blocks */
- uint64_t d_rtbcount; /* # realtime blocks owned */
- int32_t d_rtbtimer; /* Similar to above; for RT
- disk blocks */
- uint16_t d_rtbwarns; /* # warnings issued with
- respect to RT disk blocks */
- int16_t d_padding3; /* Padding \- for future use */
- char d_padding4[8]; /* Yet more padding */
-};
-.EE
-.in
-.IP
-Unprivileged users may retrieve only their own quotas;
-a privileged user
-.RB ( CAP_SYS_ADMIN )
-may retrieve the quotas of any user.
-.TP
-.BR Q_XGETNEXTQUOTA " (since Linux 4.6)"
-.\" commit 8b37524962b9c54423374717786198f5c0820a28
-This operation is the same as
-.BR Q_XGETQUOTA ,
-but it returns (in the
-.I fs_disk_quota
-structure pointed by
-.IR addr )
-quota information for the next ID greater than or equal to
-.I id
-that has a quota set.
-Note that since
-.I fs_disk_quota
-already has
-.I q_id
-field, no separate structure type is needed (in contrast with
-.B Q_GETQUOTA
-and
-.B Q_GETNEXTQUOTA
-operations)
-.TP
-.B Q_XSETQLIM
-Set disk quota limits for user
-.IR id .
-The
-.I addr
-argument is a pointer to an
-.I fs_disk_quota
-structure.
-This operation requires privilege
-.RB ( CAP_SYS_ADMIN ).
-.TP
-.B Q_XGETQSTAT
-Returns XFS filesystem-specific quota information in the
-.I fs_quota_stat
-structure pointed by
-.IR addr .
-This is useful for finding out how much space is used to store quota
-information, and also to get the quota on/off status of a given local XFS
-filesystem.
-The
-.I fs_quota_stat
-structure itself is defined as follows:
-.IP
-.in +4n
-.EX
-#define FS_QSTAT_VERSION 1 /* fs_quota_stat.qs_version */
-\&
-struct fs_qfilestat {
- uint64_t qfs_ino; /* Inode number */
- uint64_t qfs_nblks; /* Number of BBs
- 512\-byte\-blocks */
- uint32_t qfs_nextents; /* Number of extents */
-};
-\&
-struct fs_quota_stat {
- int8_t qs_version; /* Version number for
- future changes */
- uint16_t qs_flags; /* XFS_QUOTA_{U,P,G}DQ_{ACCT,ENFD} */
- int8_t qs_pad; /* Unused */
- struct fs_qfilestat qs_uquota; /* User quota storage
- information */
- struct fs_qfilestat qs_gquota; /* Group quota storage
- information */
- uint32_t qs_incoredqs; /* Number of dquots in core */
- int32_t qs_btimelimit; /* Limit for blocks timer */
- int32_t qs_itimelimit; /* Limit for inodes timer */
- int32_t qs_rtbtimelimit;/* Limit for RT
- blocks timer */
- uint16_t qs_bwarnlimit; /* Limit for # of warnings */
- uint16_t qs_iwarnlimit; /* Limit for # of warnings */
-};
-.EE
-.in
-.IP
-The
-.I id
-argument is ignored.
-.TP
-.B Q_XGETQSTATV
-Returns XFS filesystem-specific quota information in the
-.I fs_quota_statv
-pointed to by
-.IR addr .
-This version of the operation uses a structure with proper versioning support,
-along with appropriate layout (all fields are naturally aligned) and
-padding to avoiding special compat handling;
-it also provides the ability to get statistics regarding
-the project quota file.
-The
-.I fs_quota_statv
-structure itself is defined as follows:
-.IP
-.in +4n
-.EX
-#define FS_QSTATV_VERSION1 1 /* fs_quota_statv.qs_version */
-\&
-struct fs_qfilestatv {
- uint64_t qfs_ino; /* Inode number */
- uint64_t qfs_nblks; /* Number of BBs
- 512\-byte\-blocks */
- uint32_t qfs_nextents; /* Number of extents */
- uint32_t qfs_pad; /* Pad for 8\-byte alignment */
-};
-\&
-struct fs_quota_statv {
- int8_t qs_version; /* Version for future
- changes */
- uint8_t qs_pad1; /* Pad for 16\-bit alignment */
- uint16_t qs_flags; /* XFS_QUOTA_.* flags */
- uint32_t qs_incoredqs; /* Number of dquots incore */
- struct fs_qfilestatv qs_uquota; /* User quota
- information */
- struct fs_qfilestatv qs_gquota; /* Group quota
- information */
- struct fs_qfilestatv qs_pquota; /* Project quota
- information */
- int32_t qs_btimelimit; /* Limit for blocks timer */
- int32_t qs_itimelimit; /* Limit for inodes timer */
- int32_t qs_rtbtimelimit; /* Limit for RT blocks
- timer */
- uint16_t qs_bwarnlimit; /* Limit for # of warnings */
- uint16_t qs_iwarnlimit; /* Limit for # of warnings */
- uint64_t qs_pad2[8]; /* For future proofing */
-};
-.EE
-.in
-.IP
-The
-.I qs_version
-field of the structure should be filled with the version of the structure
-supported by the callee (for now, only
-.I FS_QSTAT_VERSION1
-is supported).
-The kernel will fill the structure in accordance with
-version provided.
-The
-.I id
-argument is ignored.
-.TP
-.BR Q_XQUOTARM " (buggy until Linux 3.16)"
-.\" 9da93f9b7cdf8ab28da6b364cdc1fafc8670b4dc
-Free the disk space taken by disk quotas.
-The
-.I addr
-argument should be a pointer to an
-.I "unsigned int"
-value containing flags (the same as in
-.I d_flags
-field of
-.I fs_disk_quota
-structure)
-which identify what types of quota
-should be removed.
-(Note that the quota type passed in the
-.I op
-argument is ignored, but should remain valid in order to pass preliminary
-quotactl syscall handler checks.)
-.IP
-Quotas must have already been turned off.
-The
-.I id
-argument is ignored.
-.TP
-.BR Q_XQUOTASYNC " (since Linux 2.6.15; no-op since Linux 3.4)"
-.\" Added in commit ee34807a65aa0c5911dc27682863afca780a003e
-This operation was an XFS quota equivalent to
-.BR Q_SYNC ,
-but it is no-op since Linux 3.4,
-.\" 4b217ed9e30f94b6e8e5e262020ef0ceab6113af
-as
-.BR sync (1)
-writes quota information to disk now
-(in addition to the other filesystem metadata that it writes out).
-The
-.IR special ", " id " and " addr
-arguments are ignored.
-.SH RETURN VALUE
-On success,
-.BR quotactl ()
-returns 0; on error \-1
-is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EACCES
-.I op
-is
-.BR Q_QUOTAON ,
-and the quota file pointed to by
-.I addr
-exists, but is not a regular file or
-is not on the filesystem pointed to by
-.IR special .
-.TP
-.B EBUSY
-.I op
-is
-.BR Q_QUOTAON ,
-but another
-.B Q_QUOTAON
-had already been performed.
-.TP
-.B EFAULT
-.I addr
-or
-.I special
-is invalid.
-.TP
-.B EINVAL
-.I op
-or
-.I type
-is invalid.
-.TP
-.B EINVAL
-.I op
-is
-.BR Q_QUOTAON ,
-but the specified quota file is corrupted.
-.TP
-.BR EINVAL " (since Linux 5.5)"
-.\" 3dd4d40b420846dd35869ccc8f8627feef2cff32
-.I op
-is
-.BR Q_XQUOTARM ,
-but
-.I addr
-does not point to valid quota types.
-.TP
-.B ENOENT
-The file specified by
-.I special
-or
-.I addr
-does not exist.
-.TP
-.B ENOSYS
-The kernel has not been compiled with the
-.B CONFIG_QUOTA
-option.
-.TP
-.B ENOTBLK
-.I special
-is not a block device.
-.TP
-.B EPERM
-The caller lacked the required privilege
-.RB ( CAP_SYS_ADMIN )
-for the specified operation.
-.TP
-.B ERANGE
-.I op
-is
-.BR Q_SETQUOTA ,
-but the specified limits are out of the range allowed by the quota format.
-.TP
-.B ESRCH
-No disk quota is found for the indicated user.
-Quotas have not been turned on for this filesystem.
-.TP
-.B ESRCH
-.I op
-is
-.BR Q_QUOTAON ,
-but the specified quota format was not found.
-.TP
-.B ESRCH
-.I op
-is
-.B Q_GETNEXTQUOTA
-or
-.BR Q_XGETNEXTQUOTA ,
-but there is no ID greater than or equal to
-.I id
-that has an active quota.
-.SH NOTES
-Instead of
-.I <xfs/xqm.h>
-one can use
-.IR <linux/dqblk_xfs.h> ,
-taking into account that there are several naming discrepancies:
-.IP \[bu] 3
-Quota enabling flags (of format
-.BR XFS_QUOTA_[UGP]DQ_{ACCT,ENFD} )
-are defined without a leading "X", as
-.BR FS_QUOTA_[UGP]DQ_{ACCT,ENFD} .
-.IP \[bu]
-The same is true for
-.B XFS_{USER,GROUP,PROJ}_QUOTA
-quota type flags, which are defined as
-.BR FS_{USER,GROUP,PROJ}_QUOTA .
-.IP \[bu]
-The
-.I dqblk_xfs.h
-header file defines its own
-.BR XQM_USRQUOTA ,
-.BR XQM_GRPQUOTA ,
-and
-.B XQM_PRJQUOTA
-constants for the available quota types, but their values are the same as for
-constants without the
-.B XQM_
-prefix.
-.SH SEE ALSO
-.BR quota (1),
-.BR getrlimit (2),
-.BR quotacheck (8),
-.BR quotaon (8)
diff --git a/man2/read.2 b/man2/read.2
deleted file mode 100644
index c74f62a0b..000000000
--- a/man2/read.2
+++ /dev/null
@@ -1,245 +0,0 @@
-.\" This manpage is Copyright (C) 1992 Drew Eckhardt;
-.\" and Copyright (C) 1993 Michael Haardt, Ian Jackson.
-.\" and Copyright (C) 2009-2015 Michael Kerrisk, <mtk.manpages.gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Modified Sat Jul 24 00:06:00 1993 by Rik Faith <faith@cs.unc.edu>
-.\" Modified Wed Jan 17 16:02:32 1996 by Michael Haardt
-.\" <michael@cantor.informatik.rwth-aachen.de>
-.\" Modified Thu Apr 11 19:26:35 1996 by Andries Brouwer <aeb@cwi.nl>
-.\" Modified Sun Jul 21 18:59:33 1996 by Andries Brouwer <aeb@cwi.nl>
-.\" Modified Fri Jan 31 16:47:33 1997 by Eric S. Raymond <esr@thyrsus.com>
-.\" Modified Sat Jul 12 20:45:39 1997 by Michael Haardt
-.\" <michael@cantor.informatik.rwth-aachen.de>
-.\"
-.TH read 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-read \- read from a file descriptor
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <unistd.h>
-.P
-.BI "ssize_t read(int " fd ", void " buf [. count "], size_t " count );
-.fi
-.SH DESCRIPTION
-.BR read ()
-attempts to read up to
-.I count
-bytes from file descriptor
-.I fd
-into the buffer starting at
-.IR buf .
-.P
-On files that support seeking,
-the read operation commences at the file offset,
-and the file offset is incremented by the number of bytes read.
-If the file offset is at or past the end of file,
-no bytes are read, and
-.BR read ()
-returns zero.
-.P
-If
-.I count
-is zero,
-.BR read ()
-.I may
-detect the errors described below.
-In the absence of any errors,
-or if
-.BR read ()
-does not check for errors, a
-.BR read ()
-with a
-.I count
-of 0 returns zero and has no other effects.
-.P
-According to POSIX.1, if
-.I count
-is greater than
-.BR SSIZE_MAX ,
-the result is implementation-defined;
-see NOTES for the upper limit on Linux.
-.SH RETURN VALUE
-On success, the number of bytes read is returned (zero indicates end of
-file), and the file position is advanced by this number.
-It is not an error if this number is smaller than the number of bytes
-requested; this may happen for example because fewer bytes are actually
-available right now (maybe because we were close to end-of-file, or
-because we are reading from a pipe, or from a terminal), or because
-.BR read ()
-was interrupted by a signal.
-See also NOTES.
-.P
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-In this case, it is left unspecified whether
-the file position (if any) changes.
-.SH ERRORS
-.TP
-.B EAGAIN
-The file descriptor
-.I fd
-refers to a file other than a socket and has been marked nonblocking
-.RB ( O_NONBLOCK ),
-and the read would block.
-See
-.BR open (2)
-for further details on the
-.B O_NONBLOCK
-flag.
-.TP
-.BR EAGAIN " or " EWOULDBLOCK
-.\" Actually EAGAIN on Linux
-The file descriptor
-.I fd
-refers to a socket and has been marked nonblocking
-.RB ( O_NONBLOCK ),
-and the read would block.
-POSIX.1-2001 allows either error to be returned for this case,
-and does not require these constants to have the same value,
-so a portable application should check for both possibilities.
-.TP
-.B EBADF
-.I fd
-is not a valid file descriptor or is not open for reading.
-.TP
-.B EFAULT
-.I buf
-is outside your accessible address space.
-.TP
-.B EINTR
-The call was interrupted by a signal before any data was read; see
-.BR signal (7).
-.TP
-.B EINVAL
-.I fd
-is attached to an object which is unsuitable for reading;
-or the file was opened with the
-.B O_DIRECT
-flag, and either the address specified in
-.IR buf ,
-the value specified in
-.IR count ,
-or the file offset is not suitably aligned.
-.TP
-.B EINVAL
-.I fd
-was created via a call to
-.BR timerfd_create (2)
-and the wrong size buffer was given to
-.BR read ();
-see
-.BR timerfd_create (2)
-for further information.
-.TP
-.B EIO
-I/O error.
-This will happen for example when the process is in a
-background process group, tries to read from its controlling terminal,
-and either it is ignoring or blocking
-.B SIGTTIN
-or its process group
-is orphaned.
-It may also occur when there is a low-level I/O error
-while reading from a disk or tape.
-A further possible cause of
-.B EIO
-on networked filesystems is when an advisory lock had been taken
-out on the file descriptor and this lock has been lost.
-See the
-.I "Lost locks"
-section of
-.BR fcntl (2)
-for further details.
-.TP
-.B EISDIR
-.I fd
-refers to a directory.
-.P
-Other errors may occur, depending on the object connected to
-.IR fd .
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-SVr4, 4.3BSD, POSIX.1-2001.
-.SH NOTES
-On Linux,
-.BR read ()
-(and similar system calls) will transfer at most
-0x7ffff000 (2,147,479,552) bytes,
-returning the number of bytes actually transferred.
-.\" commit e28cc71572da38a5a12c1cfe4d7032017adccf69
-(This is true on both 32-bit and 64-bit systems.)
-.P
-On NFS filesystems, reading small amounts of data will update the
-timestamp only the first time, subsequent calls may not do so.
-This is caused
-by client side attribute caching, because most if not all NFS clients
-leave
-.I st_atime
-(last file access time)
-updates to the server, and client side reads satisfied from the
-client's cache will not cause
-.I st_atime
-updates on the server as there are no
-server-side reads.
-UNIX semantics can be obtained by disabling client-side attribute caching,
-but in most situations this will substantially
-increase server load and decrease performance.
-.SH BUGS
-According to POSIX.1-2008/SUSv4 Section XSI 2.9.7
-("Thread Interactions with Regular File Operations"):
-.P
-.RS 4
-All of the following functions shall be atomic with respect to
-each other in the effects specified in POSIX.1-2008 when they
-operate on regular files or symbolic links: ...
-.RE
-.P
-Among the APIs subsequently listed are
-.BR read ()
-and
-.BR readv (2).
-And among the effects that should be atomic across threads (and processes)
-are updates of the file offset.
-However, before Linux 3.14,
-this was not the case: if two processes that share
-an open file description (see
-.BR open (2))
-perform a
-.BR read ()
-(or
-.BR readv (2))
-at the same time, then the I/O operations were not atomic
-with respect to updating the file offset,
-with the result that the reads in the two processes
-might (incorrectly) overlap in the blocks of data that they obtained.
-This problem was fixed in Linux 3.14.
-.\" http://thread.gmane.org/gmane.linux.kernel/1649458
-.\" From: Michael Kerrisk (man-pages <mtk.manpages <at> gmail.com>
-.\" Subject: Update of file offset on write() etc. is non-atomic with I/O
-.\" Date: 2014-02-17 15:41:37 GMT
-.\" Newsgroups: gmane.linux.kernel, gmane.linux.file-systems
-.\" commit 9c225f2655e36a470c4f58dbbc99244c5fc7f2d4
-.\" Author: Linus Torvalds <torvalds@linux-foundation.org>
-.\" Date: Mon Mar 3 09:36:58 2014 -0800
-.\"
-.\" vfs: atomic f_pos accesses as per POSIX
-.SH SEE ALSO
-.BR close (2),
-.BR fcntl (2),
-.BR ioctl (2),
-.BR lseek (2),
-.BR open (2),
-.BR pread (2),
-.BR readdir (2),
-.BR readlink (2),
-.BR readv (2),
-.BR select (2),
-.BR write (2),
-.BR fread (3)
diff --git a/man2/readahead.2 b/man2/readahead.2
deleted file mode 100644
index e857fde66..000000000
--- a/man2/readahead.2
+++ /dev/null
@@ -1,99 +0,0 @@
-.\" This manpage is Copyright (C) 2004, Michael Kerrisk
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" 2004-05-40 Created by Michael Kerrisk <mtk.manpages@gmail.com>
-.\" 2004-10-05 aeb, minor correction
-.\"
-.TH readahead 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-readahead \- initiate file readahead into page cache
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#define _GNU_SOURCE" " /* See feature_test_macros(7) */"
-.B #define _FILE_OFFSET_BITS 64
-.B #include <fcntl.h>
-.P
-.BI "ssize_t readahead(int " fd ", off_t " offset ", size_t " count );
-.fi
-.SH DESCRIPTION
-.BR readahead ()
-initiates readahead on a file so that subsequent reads from that file will
-be satisfied from the cache, and not block on disk I/O
-(assuming the readahead was initiated early enough and that other activity
-on the system did not in the meantime flush pages from the cache).
-.P
-The
-.I fd
-argument is a file descriptor identifying the file which is
-to be read.
-The
-.I offset
-argument specifies the starting point from which data is to be read
-and
-.I count
-specifies the number of bytes to be read.
-I/O is performed in whole pages, so that
-.I offset
-is effectively rounded down to a page boundary
-and bytes are read up to the next page boundary greater than or
-equal to
-.IR "(offset+count)" .
-.BR readahead ()
-does not read beyond the end of the file.
-The file offset of the open file description referred to by the file descriptor
-.I fd
-is left unchanged.
-.SH RETURN VALUE
-On success,
-.BR readahead ()
-returns 0; on failure, \-1 is returned, with
-.I errno
-set to indicate the error.
-.SH ERRORS
-.TP
-.B EBADF
-.I fd
-is not a valid file descriptor or is not open for reading.
-.TP
-.B EINVAL
-.I fd
-does not refer to a file type to which
-.BR readahead ()
-can be applied.
-.SH VERSIONS
-On some 32-bit architectures,
-the calling signature for this system call differs,
-for the reasons described in
-.BR syscall (2).
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 2.4.13,
-glibc 2.3.
-.SH NOTES
-.B _FILE_OFFSET_BITS
-should be defined to be 64 in code that uses a pointer to
-.BR readahead ,
-if the code is intended to be portable
-to traditional 32-bit x86 and ARM platforms where
-.BR off_t 's
-width defaults to 32 bits.
-.SH BUGS
-.BR readahead ()
-attempts to schedule the reads in the background and return immediately.
-However, it may block while it reads the filesystem metadata needed
-to locate the requested blocks.
-This occurs frequently with ext[234] on large files
-using indirect blocks instead of extents,
-giving the appearance that the call blocks until the requested data has
-been read.
-.SH SEE ALSO
-.BR lseek (2),
-.BR madvise (2),
-.BR mmap (2),
-.BR posix_fadvise (2),
-.BR read (2)
diff --git a/man2/readdir.2 b/man2/readdir.2
deleted file mode 100644
index 933bb30d4..000000000
--- a/man2/readdir.2
+++ /dev/null
@@ -1,116 +0,0 @@
-.\" Copyright (C) 1995 Andries Brouwer (aeb@cwi.nl)
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Written 11 June 1995 by Andries Brouwer <aeb@cwi.nl>
-.\" Modified 22 July 1995 by Michael Chastain <mec@duracef.shout.net>:
-.\" In 1.3.X, returns only one entry each time; return value is different.
-.\" Modified 2004-12-01, mtk, fixed headers listed in SYNOPSIS
-.\"
-.TH readdir 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-readdir \- read directory entry
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */"
-.B #include <unistd.h>
-.P
-.BI "int syscall(SYS_readdir, unsigned int " fd ,
-.BI " struct old_linux_dirent *" dirp ", unsigned int " count );
-.fi
-.P
-.IR Note :
-There is no definition of
-.BR "struct old_linux_dirent" ;
-see NOTES.
-.SH DESCRIPTION
-This is not the function you are interested in.
-Look at
-.BR readdir (3)
-for the POSIX conforming C library interface.
-This page documents the bare kernel system call interface,
-which is superseded by
-.BR getdents (2).
-.P
-.BR readdir ()
-reads one
-.I old_linux_dirent
-structure from the directory
-referred to by the file descriptor
-.I fd
-into the buffer pointed to by
-.IR dirp .
-The argument
-.I count
-is ignored; at most one
-.I old_linux_dirent
-structure is read.
-.P
-The
-.I old_linux_dirent
-structure is declared (privately in Linux kernel file
-.BR fs/readdir.c )
-as follows:
-.P
-.in +4n
-.EX
-struct old_linux_dirent {
- unsigned long d_ino; /* inode number */
- unsigned long d_offset; /* offset to this \fIold_linux_dirent\fP */
- unsigned short d_namlen; /* length of this \fId_name\fP */
- char d_name[1]; /* filename (null\-terminated) */
-}
-.EE
-.in
-.P
-.I d_ino
-is an inode number.
-.I d_offset
-is the distance from the start of the directory to this
-.IR old_linux_dirent .
-.I d_reclen
-is the size of
-.IR d_name ,
-not counting the terminating null byte (\[aq]\e0\[aq]).
-.I d_name
-is a null-terminated filename.
-.SH RETURN VALUE
-On success, 1 is returned.
-On end of directory, 0 is returned.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EBADF
-Invalid file descriptor
-.IR fd .
-.TP
-.B EFAULT
-Argument points outside the calling process's address space.
-.TP
-.B EINVAL
-Result buffer is too small.
-.TP
-.B ENOENT
-No such directory.
-.TP
-.B ENOTDIR
-File descriptor does not refer to a directory.
-.SH VERSIONS
-You will need to define the
-.I old_linux_dirent
-structure yourself.
-However, probably you should use
-.BR readdir (3)
-instead.
-.P
-This system call does not exist on x86-64.
-.SH STANDARDS
-Linux.
-.SH SEE ALSO
-.BR getdents (2),
-.BR readdir (3)
diff --git a/man2/readlink.2 b/man2/readlink.2
deleted file mode 100644
index da671a9be..000000000
--- a/man2/readlink.2
+++ /dev/null
@@ -1,331 +0,0 @@
-.\" Copyright (c) 1983, 1991 The Regents of the University of California.
-.\" And Copyright (C) 2011 Guillem Jover <guillem@hadrons.org>
-.\" And Copyright (C) 2006, 2014 Michael Kerrisk
-.\" All rights reserved.
-.\"
-.\" SPDX-License-Identifier: BSD-4-Clause-UC
-.\"
-.\" @(#)readlink.2 6.8 (Berkeley) 3/10/91
-.\"
-.\" Modified Sat Jul 24 00:10:21 1993 by Rik Faith (faith@cs.unc.edu)
-.\" Modified Tue Jul 9 23:55:17 1996 by aeb
-.\" Modified Fri Jan 24 00:26:00 1997 by aeb
-.\" 2011-09-20, Guillem Jover <guillem@hadrons.org>:
-.\" Added text on dynamically allocating buffer + example program
-.\"
-.TH readlink 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-readlink, readlinkat \- read value of a symbolic link
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <unistd.h>
-.P
-.BI "ssize_t readlink(const char *restrict " pathname ", char *restrict " buf ,
-.BI " size_t " bufsiz );
-.P
-.BR "#include <fcntl.h> " "/* Definition of " AT_* " constants */"
-.B #include <unistd.h>
-.P
-.BI "ssize_t readlinkat(int " dirfd ", const char *restrict " pathname ,
-.BI " char *restrict " buf ", size_t " bufsiz );
-.P
-.fi
-.RS -4
-Feature Test Macro Requirements for glibc (see
-.BR feature_test_macros (7)):
-.RE
-.P
-.BR readlink ():
-.nf
- _XOPEN_SOURCE >= 500 || _POSIX_C_SOURCE >= 200112L
-.\" || _XOPEN_SOURCE && _XOPEN_SOURCE_EXTENDED
- || /* glibc <= 2.19: */ _BSD_SOURCE
-.fi
-.P
-.BR readlinkat ():
-.nf
- Since glibc 2.10:
- _POSIX_C_SOURCE >= 200809L
- Before glibc 2.10:
- _ATFILE_SOURCE
-.fi
-.SH DESCRIPTION
-.BR readlink ()
-places the contents of the symbolic link
-.I pathname
-in the buffer
-.IR buf ,
-which has size
-.IR bufsiz .
-.BR readlink ()
-does not append a terminating null byte to
-.IR buf .
-It will (silently) truncate the contents (to a length of
-.I bufsiz
-characters), in case the buffer is too small to hold all of the contents.
-.SS readlinkat()
-The
-.BR readlinkat ()
-system call operates in exactly the same way as
-.BR readlink (),
-except for the differences described here.
-.P
-If the pathname given in
-.I pathname
-is relative, then it is interpreted relative to the directory
-referred to by the file descriptor
-.I dirfd
-(rather than relative to the current working directory of
-the calling process, as is done by
-.BR readlink ()
-for a relative pathname).
-.P
-If
-.I pathname
-is relative and
-.I dirfd
-is the special value
-.BR AT_FDCWD ,
-then
-.I pathname
-is interpreted relative to the current working
-directory of the calling process (like
-.BR readlink ()).
-.P
-If
-.I pathname
-is absolute, then
-.I dirfd
-is ignored.
-.P
-Since Linux 2.6.39,
-.\" commit 65cfc6722361570bfe255698d9cd4dccaf47570d
-.I pathname
-can be an empty string,
-in which case the call operates on the symbolic link referred to by
-.I dirfd
-(which should have been obtained using
-.BR open (2)
-with the
-.B O_PATH
-and
-.B O_NOFOLLOW
-flags).
-.P
-See
-.BR openat (2)
-for an explanation of the need for
-.BR readlinkat ().
-.SH RETURN VALUE
-On success, these calls return the number of bytes placed in
-.IR buf .
-(If the returned value equals
-.IR bufsiz ,
-then truncation may have occurred.)
-On error, \-1 is returned and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EACCES
-Search permission is denied for a component of the path prefix.
-(See also
-.BR path_resolution (7).)
-.TP
-.B EBADF
-.RB ( readlinkat ())
-.I pathname
-is relative but
-.I dirfd
-is neither
-.B AT_FDCWD
-nor a valid file descriptor.
-.TP
-.B EFAULT
-.I buf
-extends outside the process's allocated address space.
-.TP
-.B EINVAL
-.I bufsiz
-is not positive.
-.\" At the glibc level, bufsiz is unsigned, so this error can only occur
-.\" if bufsiz==0. However, the in the kernel syscall, bufsiz is signed,
-.\" and this error can also occur if bufsiz < 0.
-.\" See: http://thread.gmane.org/gmane.linux.man/380
-.\" Subject: [patch 0/3] [RFC] kernel/glibc mismatch of "readlink" syscall?
-.TP
-.B EINVAL
-The named file (i.e., the final filename component of
-.IR pathname )
-is not a symbolic link.
-.TP
-.B EIO
-An I/O error occurred while reading from the filesystem.
-.TP
-.B ELOOP
-Too many symbolic links were encountered in translating the pathname.
-.TP
-.B ENAMETOOLONG
-A pathname, or a component of a pathname, was too long.
-.TP
-.B ENOENT
-The named file does not exist.
-.TP
-.B ENOMEM
-Insufficient kernel memory was available.
-.TP
-.B ENOTDIR
-A component of the path prefix is not a directory.
-.TP
-.B ENOTDIR
-.RB ( readlinkat ())
-.I pathname
-is relative and
-.I dirfd
-is a file descriptor referring to a file other than a directory.
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-.TP
-.BR readlink ()
-4.4BSD
-(first appeared in 4.2BSD),
-POSIX.1-2001, POSIX.1-2008.
-.TP
-.BR readlinkat ()
-POSIX.1-2008.
-Linux 2.6.16,
-glibc 2.4.
-.P
-Up to and including glibc 2.4, the return type of
-.BR readlink ()
-was declared as
-.IR int .
-Nowadays, the return type is declared as
-.IR ssize_t ,
-as (newly) required in POSIX.1-2001.
-.SS glibc
-On older kernels where
-.BR readlinkat ()
-is unavailable, the glibc wrapper function falls back to the use of
-.BR readlink ().
-When
-.I pathname
-is a relative pathname,
-glibc constructs a pathname based on the symbolic link in
-.I /proc/self/fd
-that corresponds to the
-.I dirfd
-argument.
-.SH NOTES
-Using a statically sized buffer might not provide enough room for the
-symbolic link contents.
-The required size for the buffer can be obtained from the
-.I stat.st_size
-value returned by a call to
-.BR lstat (2)
-on the link.
-However, the number of bytes written by
-.BR readlink ()
-and
-.BR readlinkat ()
-should be checked to make sure that the size of the
-symbolic link did not increase between the calls.
-Dynamically allocating the buffer for
-.BR readlink ()
-and
-.BR readlinkat ()
-also addresses a common portability problem when using
-.B PATH_MAX
-for the buffer size,
-as this constant is not guaranteed to be defined per POSIX
-if the system does not have such limit.
-.SH EXAMPLES
-The following program allocates the buffer needed by
-.BR readlink ()
-dynamically from the information provided by
-.BR lstat (2),
-falling back to a buffer of size
-.B PATH_MAX
-in cases where
-.BR lstat (2)
-reports a size of zero.
-.P
-.\" SRC BEGIN (readlink.c)
-.EX
-#include <limits.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/stat.h>
-#include <unistd.h>
-\&
-int
-main(int argc, char *argv[])
-{
- char *buf;
- ssize_t nbytes, bufsiz;
- struct stat sb;
-\&
- if (argc != 2) {
- fprintf(stderr, "Usage: %s <pathname>\en", argv[0]);
- exit(EXIT_FAILURE);
- }
-\&
- if (lstat(argv[1], &sb) == \-1) {
- perror("lstat");
- exit(EXIT_FAILURE);
- }
-\&
- /* Add one to the link size, so that we can determine whether
- the buffer returned by readlink() was truncated. */
-\&
- bufsiz = sb.st_size + 1;
-\&
- /* Some magic symlinks under (for example) /proc and /sys
- report \[aq]st_size\[aq] as zero. In that case, take PATH_MAX as
- a "good enough" estimate. */
-\&
- if (sb.st_size == 0)
- bufsiz = PATH_MAX;
-\&
- buf = malloc(bufsiz);
- if (buf == NULL) {
- perror("malloc");
- exit(EXIT_FAILURE);
- }
-\&
- nbytes = readlink(argv[1], buf, bufsiz);
- if (nbytes == \-1) {
- perror("readlink");
- exit(EXIT_FAILURE);
- }
-\&
- /* Print only \[aq]nbytes\[aq] of \[aq]buf\[aq], as it doesn't contain a terminating
- null byte (\[aq]\e0\[aq]). */
- printf("\[aq]%s\[aq] points to \[aq]%.*s\[aq]\en", argv[1], (int) nbytes, buf);
-\&
- /* If the return value was equal to the buffer size, then
- the link target was larger than expected (perhaps because the
- target was changed between the call to lstat() and the call to
- readlink()). Warn the user that the returned target may have
- been truncated. */
-\&
- if (nbytes == bufsiz)
- printf("(Returned buffer may have been truncated)\en");
-\&
- free(buf);
- exit(EXIT_SUCCESS);
-}
-.EE
-.\" SRC END
-.SH SEE ALSO
-.BR readlink (1),
-.BR lstat (2),
-.BR stat (2),
-.BR symlink (2),
-.BR realpath (3),
-.BR path_resolution (7),
-.BR symlink (7)
diff --git a/man2/readlinkat.2 b/man2/readlinkat.2
deleted file mode 100644
index b29d1b541..000000000
--- a/man2/readlinkat.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/readlink.2
diff --git a/man2/readv.2 b/man2/readv.2
deleted file mode 100644
index d39f2b9b9..000000000
--- a/man2/readv.2
+++ /dev/null
@@ -1,427 +0,0 @@
-.\" Copyright (C) 2007, 2010 Michael Kerrisk <mtk.manpages@gmail.com>
-.\" and Copyright (c) 1993 by Thomas Koenig (ig25@rz.uni-karlsruhe.de)
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Modified Sat Jul 24 18:34:44 1993 by Rik Faith (faith@cs.unc.edu)
-.\" Merged readv.[23], 2002-10-17, aeb
-.\" 2007-04-30 mtk, A fairly major rewrite to fix errors and
-.\" add more details.
-.\" 2010-11-16, mtk, Added documentation of preadv() and pwritev()
-.\"
-.TH readv 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-readv, writev, preadv, pwritev, preadv2, pwritev2 \-
-read or write data into multiple buffers
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/uio.h>
-.P
-.BI "ssize_t readv(int " fd ", const struct iovec *" iov ", int " iovcnt );
-.BI "ssize_t writev(int " fd ", const struct iovec *" iov ", int " iovcnt );
-.P
-.BI "ssize_t preadv(int " fd ", const struct iovec *" iov ", int " iovcnt ,
-.BI " off_t " offset );
-.BI "ssize_t pwritev(int " fd ", const struct iovec *" iov ", int " iovcnt ,
-.BI " off_t " offset );
-.P
-.BI "ssize_t preadv2(int " fd ", const struct iovec *" iov ", int " iovcnt ,
-.BI " off_t " offset ", int " flags );
-.BI "ssize_t pwritev2(int " fd ", const struct iovec *" iov ", int " iovcnt ,
-.BI " off_t " offset ", int " flags );
-.fi
-.P
-.RS -4
-Feature Test Macro Requirements for glibc (see
-.BR feature_test_macros (7)):
-.RE
-.P
-.BR preadv (),
-.BR pwritev ():
-.nf
- Since glibc 2.19:
- _DEFAULT_SOURCE
- glibc 2.19 and earlier:
- _BSD_SOURCE
-.fi
-.SH DESCRIPTION
-The
-.BR readv ()
-system call reads
-.I iovcnt
-buffers from the file associated with the file descriptor
-.I fd
-into the buffers described by
-.I iov
-("scatter input").
-.P
-The
-.BR writev ()
-system call writes
-.I iovcnt
-buffers of data described by
-.I iov
-to the file associated with the file descriptor
-.I fd
-("gather output").
-.P
-The pointer
-.I iov
-points to an array of
-.I iovec
-structures,
-described in
-.BR iovec (3type).
-.P
-The
-.BR readv ()
-system call works just like
-.BR read (2)
-except that multiple buffers are filled.
-.P
-The
-.BR writev ()
-system call works just like
-.BR write (2)
-except that multiple buffers are written out.
-.P
-Buffers are processed in array order.
-This means that
-.BR readv ()
-completely fills
-.I iov[0]
-before proceeding to
-.IR iov[1] ,
-and so on.
-(If there is insufficient data, then not all buffers pointed to by
-.I iov
-may be filled.)
-Similarly,
-.BR writev ()
-writes out the entire contents of
-.I iov[0]
-before proceeding to
-.IR iov[1] ,
-and so on.
-.P
-The data transfers performed by
-.BR readv ()
-and
-.BR writev ()
-are atomic: the data written by
-.\" Regarding atomicity, see https://bugzilla.kernel.org/show_bug.cgi?id=10596
-.BR writev ()
-is written as a single block that is not intermingled with output
-from writes in other processes;
-analogously,
-.BR readv ()
-is guaranteed to read a contiguous block of data from the file,
-regardless of read operations performed in other threads or processes
-that have file descriptors referring to the same open file description
-(see
-.BR open (2)).
-.SS preadv() and pwritev()
-The
-.BR preadv ()
-system call combines the functionality of
-.BR readv ()
-and
-.BR pread (2).
-It performs the same task as
-.BR readv (),
-but adds a fourth argument,
-.IR offset ,
-which specifies the file offset at which the input operation
-is to be performed.
-.P
-The
-.BR pwritev ()
-system call combines the functionality of
-.BR writev ()
-and
-.BR pwrite (2).
-It performs the same task as
-.BR writev (),
-but adds a fourth argument,
-.IR offset ,
-which specifies the file offset at which the output operation
-is to be performed.
-.P
-The file offset is not changed by these system calls.
-The file referred to by
-.I fd
-must be capable of seeking.
-.SS preadv2() and pwritev2()
-These system calls are similar to
-.BR preadv ()
-and
-.BR pwritev ()
-calls, but add a fifth argument,
-.IR flags ,
-which modifies the behavior on a per-call basis.
-.P
-Unlike
-.BR preadv ()
-and
-.BR pwritev (),
-if the
-.I offset
-argument is \-1, then the current file offset is used and updated.
-.P
-The
-.I flags
-argument contains a bitwise OR of zero or more of the following flags:
-.TP
-.BR RWF_DSYNC " (since Linux 4.7)"
-.\" commit e864f39569f4092c2b2bc72c773b6e486c7e3bd9
-Provide a per-write equivalent of the
-.B O_DSYNC
-.BR open (2)
-flag.
-This flag is meaningful only for
-.BR pwritev2 (),
-and its effect applies only to the data range written by the system call.
-.TP
-.BR RWF_HIPRI " (since Linux 4.6)"
-High priority read/write.
-Allows block-based filesystems to use polling of the device,
-which provides lower latency, but may use additional resources.
-(Currently, this feature is usable only on a file descriptor opened using the
-.B O_DIRECT
-flag.)
-.TP
-.BR RWF_SYNC " (since Linux 4.7)"
-.\" commit e864f39569f4092c2b2bc72c773b6e486c7e3bd9
-Provide a per-write equivalent of the
-.B O_SYNC
-.BR open (2)
-flag.
-This flag is meaningful only for
-.BR pwritev2 (),
-and its effect applies only to the data range written by the system call.
-.TP
-.BR RWF_NOWAIT " (since Linux 4.14)"
-.\" commit 3239d834847627b6634a4139cf1dc58f6f137a46
-.\" commit 91f9943e1c7b6638f27312d03fe71fcc67b23571
-Do not wait for data which is not immediately available.
-If this flag is specified, the
-.BR preadv2 ()
-system call will return instantly if it would have to read data from
-the backing storage or wait for a lock.
-If some data was successfully read, it will return the number of bytes read.
-If no bytes were read, it will return \-1 and set
-.I errno
-to
-.B EAGAIN
-(but see
-.BR BUGS ).
-Currently, this flag is meaningful only for
-.BR preadv2 ().
-.TP
-.BR RWF_APPEND " (since Linux 4.16)"
-.\" commit e1fc742e14e01d84d9693c4aca4ab23da65811fb
-Provide a per-write equivalent of the
-.B O_APPEND
-.BR open (2)
-flag.
-This flag is meaningful only for
-.BR pwritev2 (),
-and its effect applies only to the data range written by the system call.
-The
-.I offset
-argument does not affect the write operation;
-the data is always appended to the end of the file.
-However, if the
-.I offset
-argument is \-1, the current file offset is updated.
-.SH RETURN VALUE
-On success,
-.BR readv (),
-.BR preadv (),
-and
-.BR preadv2 ()
-return the number of bytes read;
-.BR writev (),
-.BR pwritev (),
-and
-.BR pwritev2 ()
-return the number of bytes written.
-.P
-Note that it is not an error for a successful call to transfer fewer bytes
-than requested (see
-.BR read (2)
-and
-.BR write (2)).
-.P
-On error, \-1 is returned, and \fIerrno\fP is set to indicate the error.
-.SH ERRORS
-The errors are as given for
-.BR read (2)
-and
-.BR write (2).
-Furthermore,
-.BR preadv (),
-.BR preadv2 (),
-.BR pwritev (),
-and
-.BR pwritev2 ()
-can also fail for the same reasons as
-.BR lseek (2).
-Additionally, the following errors are defined:
-.TP
-.B EINVAL
-The sum of the
-.I iov_len
-values overflows an
-.I ssize_t
-value.
-.TP
-.B EINVAL
-The vector count,
-.IR iovcnt ,
-is less than zero or greater than the permitted maximum.
-.TP
-.B EOPNOTSUPP
-An unknown flag is specified in \fIflags\fP.
-.SH VERSIONS
-.SS C library/kernel differences
-The raw
-.BR preadv ()
-and
-.BR pwritev ()
-system calls have call signatures that differ slightly from that of the
-corresponding GNU C library wrapper functions shown in the SYNOPSIS.
-The final argument,
-.IR offset ,
-is unpacked by the wrapper functions into two arguments in the system calls:
-.P
-.BI " unsigned long " pos_l ", unsigned long " pos
-.P
-These arguments contain, respectively, the low order and high order 32 bits of
-.IR offset .
-.SH STANDARDS
-.TP
-.BR readv ()
-.TQ
-.BR writev ()
-POSIX.1-2008.
-.TP
-.BR preadv ()
-.TQ
-.BR pwritev ()
-BSD.
-.TP
-.BR preadv2 ()
-.TQ
-.BR pwritev2 ()
-Linux.
-.SH HISTORY
-.TP
-.BR readv ()
-.TQ
-.BR writev ()
-POSIX.1-2001,
-4.4BSD (first appeared in 4.2BSD).
-.\" Linux libc5 used \fIsize_t\fP as the type of the \fIiovcnt\fP argument,
-.\" and \fIint\fP as the return type.
-.\" The readv/writev system calls were buggy before Linux 1.3.40.
-.\" (Says release.libc.)
-.P
-.BR preadv (),
-.BR pwritev ():
-Linux 2.6.30,
-glibc 2.10.
-.P
-.BR preadv2 (),
-.BR pwritev2 ():
-Linux 4.6,
-glibc 2.26.
-.SS Historical C library/kernel differences
-To deal with the fact that
-.B IOV_MAX
-was so low on early versions of Linux,
-the glibc wrapper functions for
-.BR readv ()
-and
-.BR writev ()
-did some extra work if they detected that the underlying kernel
-system call failed because this limit was exceeded.
-In the case of
-.BR readv (),
-the wrapper function allocated a temporary buffer large enough
-for all of the items specified by
-.IR iov ,
-passed that buffer in a call to
-.BR read (2),
-copied data from the buffer to the locations specified by the
-.I iov_base
-fields of the elements of
-.IR iov ,
-and then freed the buffer.
-The wrapper function for
-.BR writev ()
-performed the analogous task using a temporary buffer and a call to
-.BR write (2).
-.P
-The need for this extra effort in the glibc wrapper functions
-went away with Linux 2.2 and later.
-However, glibc continued to provide this behavior until glibc 2.10.
-Starting with glibc 2.9,
-the wrapper functions provide this behavior only if the library detects
-that the system is running a Linux kernel older than Linux 2.6.18
-(an arbitrarily selected kernel version).
-And since glibc 2.20
-(which requires a minimum of Linux 2.6.32),
-the glibc wrapper functions always just directly invoke the system calls.
-.SH NOTES
-POSIX.1 allows an implementation to place a limit on
-the number of items that can be passed in
-.IR iov .
-An implementation can advertise its limit by defining
-.B IOV_MAX
-in
-.I <limits.h>
-or at run time via the return value from
-.IR sysconf(_SC_IOV_MAX) .
-On modern Linux systems, the limit is 1024.
-Back in Linux 2.0 days, this limit was 16.
-.\"
-.\"
-.SH BUGS
-Linux 5.9 and Linux 5.10 have a bug where
-.BR preadv2 ()
-with the
-.B RWF_NOWAIT
-flag may return 0 even when not at end of file.
-.\" See
-.\" <https://lore.kernel.org/linux-fsdevel/fea8b16d-5a69-40f9-b123-e84dcd6e8f2e@www.fastmail.com/T/#u>
-.\" The bug was introduced in
-.\" efa8480a831 fs: RWF_NOWAIT should imply IOCB_NOIO
-.\"and fixed in
-.\" 06c0444290 mm/filemap.c: generic_file_buffered_read() now uses find_get_pages_contig
-.SH EXAMPLES
-The following code sample demonstrates the use of
-.BR writev ():
-.P
-.in +4n
-.EX
-char *str0 = "hello ";
-char *str1 = "world\en";
-ssize_t nwritten;
-struct iovec iov[2];
-\&
-iov[0].iov_base = str0;
-iov[0].iov_len = strlen(str0);
-iov[1].iov_base = str1;
-iov[1].iov_len = strlen(str1);
-\&
-nwritten = writev(STDOUT_FILENO, iov, 2);
-.EE
-.in
-.SH SEE ALSO
-.BR pread (2),
-.BR read (2),
-.BR write (2)
diff --git a/man2/reboot.2 b/man2/reboot.2
deleted file mode 100644
index 6d106e3dc..000000000
--- a/man2/reboot.2
+++ /dev/null
@@ -1,241 +0,0 @@
-.\" Copyright (c) 1998 Andries Brouwer (aeb@cwi.nl), 24 September 1998
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Modified, 27 May 2004, Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Added notes on capability requirements
-.\"
-.TH reboot 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-reboot \- reboot or enable/disable Ctrl-Alt-Del
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.RB "/* Since Linux 2.1.30 there are symbolic names " LINUX_REBOOT_*
- for the constants and a fourth argument to the call: */
-.P
-.BR "#include <linux/reboot.h> " \
-"/* Definition of " LINUX_REBOOT_* " constants */"
-.BR "#include <sys/syscall.h> " "/* Definition of " SYS_* " constants */"
-.B #include <unistd.h>
-.P
-.BI "int syscall(SYS_reboot, int " magic ", int " magic2 ", int " op ", void *" arg );
-.P
-/* Under glibc and most alternative libc's (including uclibc, dietlibc,
- musl and a few others), some of the constants involved have gotten
-.RB " symbolic names " RB_* ", and the library call is a 1-argument"
- wrapper around the system call: */
-.P
-.BR "#include <sys/reboot.h> " "/* Definition of " RB_* " constants */"
-.B #include <unistd.h>
-.P
-.BI "int reboot(int " op );
-.fi
-.SH DESCRIPTION
-The
-.BR reboot ()
-call reboots the system, or enables/disables the reboot keystroke
-(abbreviated CAD, since the default is Ctrl-Alt-Delete;
-it can be changed using
-.BR loadkeys (1)).
-.P
-This system call fails (with the error
-.BR EINVAL )
-unless
-.I magic
-equals
-.B LINUX_REBOOT_MAGIC1
-(that is, 0xfee1dead) and
-.I magic2
-equals
-.B LINUX_REBOOT_MAGIC2
-(that is, 0x28121969).
-However, since Linux 2.1.17 also
-.B LINUX_REBOOT_MAGIC2A
-(that is, 0x05121996)
-and since Linux 2.1.97 also
-.B LINUX_REBOOT_MAGIC2B
-(that is, 0x16041998)
-and since Linux 2.5.71 also
-.B LINUX_REBOOT_MAGIC2C
-(that is, 0x20112000)
-are permitted as values for
-.IR magic2 .
-(The hexadecimal values of these constants are meaningful.)
-.P
-The
-.I op
-argument can have the following values:
-.TP
-.B LINUX_REBOOT_CMD_CAD_OFF
-.RB ( RB_DISABLE_CAD ,
-0).
-CAD is disabled.
-This means that the CAD keystroke will cause a
-.B SIGINT
-signal to be
-sent to init (process 1), whereupon this process may decide upon a
-proper action (maybe: kill all processes, sync, reboot).
-.TP
-.B LINUX_REBOOT_CMD_CAD_ON
-.RB ( RB_ENABLE_CAD ,
-0x89abcdef).
-CAD is enabled.
-This means that the CAD keystroke will immediately cause
-the action associated with
-.BR LINUX_REBOOT_CMD_RESTART .
-.TP
-.B LINUX_REBOOT_CMD_HALT
-.RB ( RB_HALT_SYSTEM ,
-0xcdef0123; since Linux 1.1.76).
-The message "System halted." is printed, and the system is halted.
-Control is given to the ROM monitor, if there is one.
-If not preceded by a
-.BR sync (2),
-data will be lost.
-.TP
-.B LINUX_REBOOT_CMD_KEXEC
-.RB ( RB_KEXEC ,
-0x45584543, since Linux 2.6.13).
-Execute a kernel that has been loaded earlier with
-.BR kexec_load (2).
-This option is available only if the kernel was configured with
-.BR CONFIG_KEXEC .
-.TP
-.B LINUX_REBOOT_CMD_POWER_OFF
-.RB ( RB_POWER_OFF ,
-0x4321fedc; since Linux 2.1.30).
-The message "Power down." is printed, the system is stopped,
-and all power is removed from the system, if possible.
-If not preceded by a
-.BR sync (2),
-data will be lost.
-.TP
-.B LINUX_REBOOT_CMD_RESTART
-.RB ( RB_AUTOBOOT ,
-0x1234567).
-The message "Restarting system." is printed, and a default
-restart is performed immediately.
-If not preceded by a
-.BR sync (2),
-data will be lost.
-.TP
-.B LINUX_REBOOT_CMD_RESTART2
-(0xa1b2c3d4; since Linux 2.1.30).
-The message "Restarting system with command \[aq]%s\[aq]" is printed,
-and a restart (using the command string given in
-.IR arg )
-is performed immediately.
-If not preceded by a
-.BR sync (2),
-data will be lost.
-.TP
-.B LINUX_REBOOT_CMD_SW_SUSPEND
-.RB ( RB_SW_SUSPEND ,
-0xd000fce1; since Linux 2.5.18).
-The system is suspended (hibernated) to disk.
-This option is available only if the kernel was configured with
-.BR CONFIG_HIBERNATION .
-.P
-Only the superuser may call
-.BR reboot ().
-.P
-The precise effect of the above actions depends on the architecture.
-For the i386 architecture, the additional argument does not do
-anything at present (2.1.122), but the type of reboot can be
-determined by kernel command-line arguments ("reboot=...") to be
-either warm or cold, and either hard or through the BIOS.
-.\"
-.SS Behavior inside PID namespaces
-.\" commit cf3f89214ef6a33fad60856bc5ffd7bb2fc4709b
-.\" see also commit 923c7538236564c46ee80c253a416705321f13e3
-Since Linux 3.4,
-if
-.BR reboot ()
-is called
-from a PID namespace other than the initial PID namespace
-with one of the
-.I op
-values listed below,
-it performs a "reboot" of that namespace:
-the "init" process of the PID namespace is immediately terminated,
-with the effects described in
-.BR pid_namespaces (7).
-.P
-The values that can be supplied in
-.I op
-when calling
-.BR reboot ()
-in this case are as follows:
-.TP
-.B LINUX_REBOOT_CMD_RESTART
-.TQ
-.B LINUX_REBOOT_CMD_RESTART2
-The "init" process is terminated,
-and
-.BR wait (2)
-in the parent process reports that the child was killed with a
-.B SIGHUP
-signal.
-.TP
-.B LINUX_REBOOT_CMD_POWER_OFF
-.TQ
-.B LINUX_REBOOT_CMD_HALT
-The "init" process is terminated,
-and
-.BR wait (2)
-in the parent process reports that the child was killed with a
-.B SIGINT
-signal.
-.P
-For the other
-.I op
-values,
-.BR reboot ()
-returns \-1 and
-.I errno
-is set to
-.BR EINVAL .
-.SH RETURN VALUE
-For the values of
-.I op
-that stop or restart the system,
-a successful call to
-.BR reboot ()
-does not return.
-For the other
-.I op
-values, zero is returned on success.
-In all cases, \-1 is returned on failure, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EFAULT
-Problem with getting user-space data under
-.BR LINUX_REBOOT_CMD_RESTART2 .
-.TP
-.B EINVAL
-Bad magic numbers or
-.IR op .
-.TP
-.B EPERM
-The calling process has insufficient privilege to call
-.BR reboot ();
-the caller must have the
-.B CAP_SYS_BOOT
-inside its user namespace.
-.SH STANDARDS
-Linux.
-.SH SEE ALSO
-.BR systemctl (1),
-.BR systemd (1),
-.BR kexec_load (2),
-.BR sync (2),
-.BR bootparam (7),
-.BR capabilities (7),
-.BR ctrlaltdel (8),
-.BR halt (8),
-.BR shutdown (8)
diff --git a/man2/recv.2 b/man2/recv.2
deleted file mode 100644
index 294eca384..000000000
--- a/man2/recv.2
+++ /dev/null
@@ -1,563 +0,0 @@
-.\" Copyright (c) 1983, 1990, 1991 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" SPDX-License-Identifier: BSD-4-Clause-UC
-.\"
-.\" $Id: recv.2,v 1.3 1999/05/13 11:33:38 freitag Exp $
-.\"
-.\" Modified Sat Jul 24 00:22:20 1993 by Rik Faith <faith@cs.unc.edu>
-.\" Modified Tue Oct 22 17:45:19 1996 by Eric S. Raymond <esr@thyrsus.com>
-.\" Modified 1998,1999 by Andi Kleen
-.\" 2001-06-19 corrected SO_EE_OFFENDER, bug report by James Hawtin
-.\"
-.TH recv 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-recv, recvfrom, recvmsg \- receive a message from a socket
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/socket.h>
-.P
-.BI "ssize_t recv(int " sockfd ", void " buf [. len "], size_t " len ,
-.BI " int " flags );
-.BI "ssize_t recvfrom(int " sockfd ", void " buf "[restrict ." len "], size_t " len ,
-.BI " int " flags ,
-.BI " struct sockaddr *_Nullable restrict " src_addr ,
-.BI " socklen_t *_Nullable restrict " addrlen );
-.BI "ssize_t recvmsg(int " sockfd ", struct msghdr *" msg ", int " flags );
-.fi
-.SH DESCRIPTION
-The
-.BR recv (),
-.BR recvfrom (),
-and
-.BR recvmsg ()
-calls are used to receive messages from a socket.
-They may be used
-to receive data on both connectionless and connection-oriented sockets.
-This page first describes common features of all three system calls,
-and then describes the differences between the calls.
-.P
-The only difference between
-.BR recv ()
-and
-.BR read (2)
-is the presence of
-.IR flags .
-With a zero
-.I flags
-argument,
-.BR recv ()
-is generally equivalent to
-.BR read (2)
-(but see NOTES).
-Also, the following call
-.P
-.in +4n
-.EX
-recv(sockfd, buf, len, flags);
-.EE
-.in
-.P
-is equivalent to
-.P
-.in +4n
-.EX
-recvfrom(sockfd, buf, len, flags, NULL, NULL);
-.EE
-.in
-.P
-All three calls return the length of the message on successful
-completion.
-If a message is too long to fit in the supplied buffer, excess
-bytes may be discarded depending on the type of socket the message is
-received from.
-.P
-If no messages are available at the socket, the receive calls wait for a
-message to arrive, unless the socket is nonblocking (see
-.BR fcntl (2)),
-in which case the value \-1 is returned and
-.I errno
-is set to
-.BR EAGAIN " or " EWOULDBLOCK .
-The receive calls normally return any data available, up to the requested
-amount, rather than waiting for receipt of the full amount requested.
-.P
-An application can use
-.BR select (2),
-.BR poll (2),
-or
-.BR epoll (7)
-to determine when more data arrives on a socket.
-.SS The flags argument
-The
-.I flags
-argument is formed by ORing one or more of the following values:
-.TP
-.BR MSG_CMSG_CLOEXEC " (" recvmsg "() only; since Linux 2.6.23)"
-Set the close-on-exec flag for the file descriptor received
-via a UNIX domain file descriptor using the
-.B SCM_RIGHTS
-operation (described in
-.BR unix (7)).
-This flag is useful for the same reasons as the
-.B O_CLOEXEC
-flag of
-.BR open (2).
-.TP
-.BR MSG_DONTWAIT " (since Linux 2.2)"
-Enables nonblocking operation; if the operation would block,
-the call fails with the error
-.BR EAGAIN " or " EWOULDBLOCK .
-This provides similar behavior to setting the
-.B O_NONBLOCK
-flag (via the
-.BR fcntl (2)
-.B F_SETFL
-operation), but differs in that
-.B MSG_DONTWAIT
-is a per-call option, whereas
-.B O_NONBLOCK
-is a setting on the open file description (see
-.BR open (2)),
-which will affect all threads in the calling process
-as well as other processes that hold file descriptors
-referring to the same open file description.
-.TP
-.BR MSG_ERRQUEUE " (since Linux 2.2)"
-This flag
-specifies that queued errors should be received from the socket error queue.
-The error is passed in
-an ancillary message with a type dependent on the protocol (for IPv4
-.BR IP_RECVERR ).
-The user should supply a buffer of sufficient size.
-See
-.BR cmsg (3)
-and
-.BR ip (7)
-for more information.
-The payload of the original packet that caused the error
-is passed as normal data via
-.IR msg_iovec .
-The original destination address of the datagram that caused the error
-is supplied via
-.IR msg_name .
-.IP
-The error is supplied in a
-.I sock_extended_err
-structure:
-.IP
-.in +4n
-.EX
-#define SO_EE_ORIGIN_NONE 0
-#define SO_EE_ORIGIN_LOCAL 1
-#define SO_EE_ORIGIN_ICMP 2
-#define SO_EE_ORIGIN_ICMP6 3
-\&
-struct sock_extended_err
-{
- uint32_t ee_errno; /* Error number */
- uint8_t ee_origin; /* Where the error originated */
- uint8_t ee_type; /* Type */
- uint8_t ee_code; /* Code */
- uint8_t ee_pad; /* Padding */
- uint32_t ee_info; /* Additional information */
- uint32_t ee_data; /* Other data */
- /* More data may follow */
-};
-\&
-struct sockaddr *SO_EE_OFFENDER(struct sock_extended_err *);
-.EE
-.in
-.IP
-.I ee_errno
-contains the
-.I errno
-number of the queued error.
-.I ee_origin
-is the origin code of where the error originated.
-The other fields are protocol-specific.
-The macro
-.B SO_EE_OFFENDER
-returns a pointer to the address of the network object
-where the error originated from given a pointer to the ancillary message.
-If this address is not known, the
-.I sa_family
-member of the
-.I sockaddr
-contains
-.B AF_UNSPEC
-and the other fields of the
-.I sockaddr
-are undefined.
-The payload of the packet that caused the error is passed as normal data.
-.IP
-For local errors, no address is passed (this
-can be checked with the
-.I cmsg_len
-member of the
-.IR cmsghdr ).
-For error receives,
-the
-.B MSG_ERRQUEUE
-flag is set in the
-.IR msghdr .
-After an error has been passed, the pending socket error
-is regenerated based on the next queued error and will be passed
-on the next socket operation.
-.TP
-.B MSG_OOB
-This flag requests receipt of out-of-band data that would not be received
-in the normal data stream.
-Some protocols place expedited data
-at the head of the normal data queue, and thus this flag cannot
-be used with such protocols.
-.TP
-.B MSG_PEEK
-This flag causes the receive operation to
-return data from the beginning of the
-receive queue without removing that data from the queue.
-Thus, a
-subsequent receive call will return the same data.
-.TP
-.BR MSG_TRUNC " (since Linux 2.2)"
-For raw
-.RB ( AF_PACKET ),
-Internet datagram (since Linux 2.4.27/2.6.8),
-netlink (since Linux 2.6.22),
-and UNIX datagram as well as sequenced-packet
-.\" commit 9f6f9af7694ede6314bed281eec74d588ba9474f
-(since Linux 3.4) sockets:
-return the real length of the packet or datagram,
-even when it was longer than the passed buffer.
-.IP
-For use with Internet stream sockets, see
-.BR tcp (7).
-.TP
-.BR MSG_WAITALL " (since Linux 2.2)"
-This flag requests that the operation block until the full request is
-satisfied.
-However, the call may still return less data than requested if
-a signal is caught, an error or disconnect occurs, or the next data to be
-received is of a different type than that returned.
-This flag has no effect for datagram sockets.
-.\"
-.SS recvfrom()
-.BR recvfrom ()
-places the received message into the buffer
-.IR buf .
-The caller must specify the size of the buffer in
-.IR len .
-.P
-If
-.I src_addr
-is not NULL,
-and the underlying protocol provides the source address of the message,
-that source address is placed in the buffer pointed to by
-.IR src_addr .
-.\" (Note: for datagram sockets in both the UNIX and Internet domains,
-.\" .I src_addr
-.\" is filled in.
-.\" .I src_addr
-.\" is also filled in for stream sockets in the UNIX domain, but is not
-.\" filled in for stream sockets in the Internet domain.)
-.\" [The above notes on AF_UNIX and AF_INET sockets apply as at
-.\" Kernel 2.4.18. (MTK, 22 Jul 02)]
-In this case,
-.I addrlen
-is a value-result argument.
-Before the call,
-it should be initialized to the size of the buffer associated with
-.IR src_addr .
-Upon return,
-.I addrlen
-is updated to contain the actual size of the source address.
-The returned address is truncated if the buffer provided is too small;
-in this case,
-.I addrlen
-will return a value greater than was supplied to the call.
-.P
-If the caller is not interested in the source address,
-.I src_addr
-and
-.I addrlen
-should be specified as NULL.
-.\"
-.SS recv()
-The
-.BR recv ()
-call is normally used only on a
-.I connected
-socket (see
-.BR connect (2)).
-It is equivalent to the call:
-.P
-.in +4n
-.EX
-recvfrom(fd, buf, len, flags, NULL, 0);
-.EE
-.in
-.\"
-.SS recvmsg()
-The
-.BR recvmsg ()
-call uses a
-.I msghdr
-structure to minimize the number of directly supplied arguments.
-This structure is defined as follows in
-.IR <sys/socket.h> :
-.P
-.in +4n
-.EX
-struct msghdr {
- void *msg_name; /* Optional address */
- socklen_t msg_namelen; /* Size of address */
- struct iovec *msg_iov; /* Scatter/gather array */
- size_t msg_iovlen; /* # elements in msg_iov */
- void *msg_control; /* Ancillary data, see below */
- size_t msg_controllen; /* Ancillary data buffer len */
- int msg_flags; /* Flags on received message */
-};
-.EE
-.in
-.P
-The
-.I msg_name
-field points to a caller-allocated buffer that is used to
-return the source address if the socket is unconnected.
-The caller should set
-.I msg_namelen
-to the size of this buffer before this call;
-upon return from a successful call,
-.I msg_namelen
-will contain the length of the returned address.
-If the application does not need to know the source address,
-.I msg_name
-can be specified as NULL.
-.P
-The fields
-.I msg_iov
-and
-.I msg_iovlen
-describe scatter-gather locations, as discussed in
-.BR readv (2).
-.P
-The field
-.IR msg_control ,
-which has length
-.IR msg_controllen ,
-points to a buffer for other protocol control-related messages or
-miscellaneous ancillary data.
-When
-.BR recvmsg ()
-is called,
-.I msg_controllen
-should contain the length of the available buffer in
-.IR msg_control ;
-upon return from a successful call it will contain the length
-of the control message sequence.
-.P
-The messages are of the form:
-.P
-.in +4n
-.EX
-struct cmsghdr {
- size_t cmsg_len; /* Data byte count, including header
- (type is socklen_t in POSIX) */
- int cmsg_level; /* Originating protocol */
- int cmsg_type; /* Protocol\-specific type */
-/* followed by
- unsigned char cmsg_data[]; */
-};
-.EE
-.in
-.P
-Ancillary data should be accessed only by the macros defined in
-.BR cmsg (3).
-.P
-As an example, Linux uses this ancillary data mechanism to pass extended
-errors, IP options, or file descriptors over UNIX domain sockets.
-For further information on the use of ancillary data in various
-socket domains, see
-.BR unix (7)
-and
-.BR ip (7).
-.P
-The
-.I msg_flags
-field in the
-.I msghdr
-is set on return of
-.BR recvmsg ().
-It can contain several flags:
-.TP
-.B MSG_EOR
-indicates end-of-record; the data returned completed a record (generally
-used with sockets of type
-.BR SOCK_SEQPACKET ).
-.TP
-.B MSG_TRUNC
-indicates that the trailing portion of a datagram was discarded because the
-datagram was larger than the buffer supplied.
-.TP
-.B MSG_CTRUNC
-indicates that some control data was discarded due to lack of space in the
-buffer for ancillary data.
-.TP
-.B MSG_OOB
-is returned to indicate that expedited or out-of-band data was received.
-.TP
-.B MSG_ERRQUEUE
-indicates that no data was received but an extended error from the socket
-error queue.
-.TP
-.BR MSG_CMSG_CLOEXEC " (since Linux 2.6.23)"
-.\" commit 4a19542e5f694cd408a32c3d9dc593ba9366e2d7
-indicates that
-.B MSG_CMSG_CLOEXEC
-was specified in the
-.I flags
-argument of
-.BR recvmsg ().
-.SH RETURN VALUE
-These calls return the number of bytes received, or \-1
-if an error occurred.
-In the event of an error,
-.I errno
-is set to indicate the error.
-.P
-When a stream socket peer has performed an orderly shutdown,
-the return value will be 0 (the traditional "end-of-file" return).
-.P
-Datagram sockets in various domains (e.g., the UNIX and Internet domains)
-permit zero-length datagrams.
-When such a datagram is received, the return value is 0.
-.P
-The value 0 may also be returned if the requested number of bytes
-to receive from a stream socket was 0.
-.SH ERRORS
-These are some standard errors generated by the socket layer.
-Additional errors
-may be generated and returned from the underlying protocol modules;
-see their manual pages.
-.TP
-.BR EAGAIN " or " EWOULDBLOCK
-.\" Actually EAGAIN on Linux
-The socket is marked nonblocking and the receive operation
-would block, or a receive timeout had been set and the timeout expired
-before data was received.
-POSIX.1 allows either error to be returned for this case,
-and does not require these constants to have the same value,
-so a portable application should check for both possibilities.
-.TP
-.B EBADF
-The argument
-.I sockfd
-is an invalid file descriptor.
-.TP
-.B ECONNREFUSED
-A remote host refused to allow the network connection (typically
-because it is not running the requested service).
-.TP
-.B EFAULT
-The receive buffer pointer(s) point outside the process's
-address space.
-.TP
-.B EINTR
-The receive was interrupted by delivery of a signal before
-any data was available; see
-.BR signal (7).
-.TP
-.B EINVAL
-Invalid argument passed.
-.\" e.g., msg_namelen < 0 for recvmsg() or addrlen < 0 for recvfrom()
-.TP
-.B ENOMEM
-Could not allocate memory for
-.BR recvmsg ().
-.TP
-.B ENOTCONN
-The socket is associated with a connection-oriented protocol
-and has not been connected (see
-.BR connect (2)
-and
-.BR accept (2)).
-.TP
-.B ENOTSOCK
-The file descriptor
-.I sockfd
-does not refer to a socket.
-.SH VERSIONS
-According to POSIX.1,
-.\" POSIX.1-2001, POSIX.1-2008
-the
-.I msg_controllen
-field of the
-.I msghdr
-structure should be typed as
-.IR socklen_t ,
-and the
-.I msg_iovlen
-field should be typed as
-.IR int ,
-but glibc currently types both as
-.IR size_t .
-.\" glibc bug for msg_controllen raised 12 Mar 2006
-.\" http://sourceware.org/bugzilla/show_bug.cgi?id=2448
-.\" The problem is an underlying kernel issue: the size of the
-.\" __kernel_size_t type used to type these fields varies
-.\" across architectures, but socklen_t is always 32 bits,
-.\" as (at least with GCC) is int.
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-POSIX.1-2001,
-4.4BSD (first appeared in 4.2BSD).
-.P
-POSIX.1 describes only the
-.BR MSG_OOB ,
-.BR MSG_PEEK ,
-and
-.B MSG_WAITALL
-flags.
-.SH NOTES
-If a zero-length datagram is pending,
-.BR read (2)
-and
-.BR recv ()
-with a
-.I flags
-argument of zero provide different behavior.
-In this circumstance,
-.BR read (2)
-has no effect (the datagram remains pending), while
-.BR recv ()
-consumes the pending datagram.
-.P
-See
-.BR recvmmsg (2)
-for information about a Linux-specific system call
-that can be used to receive multiple datagrams in a single call.
-.SH EXAMPLES
-An example of the use of
-.BR recvfrom ()
-is shown in
-.BR getaddrinfo (3).
-.SH SEE ALSO
-.BR fcntl (2),
-.BR getsockopt (2),
-.BR read (2),
-.BR recvmmsg (2),
-.BR select (2),
-.BR shutdown (2),
-.BR socket (2),
-.BR cmsg (3),
-.BR sockatmark (3),
-.BR ip (7),
-.BR ipv6 (7),
-.BR socket (7),
-.BR tcp (7),
-.BR udp (7),
-.BR unix (7)
diff --git a/man2/recvfrom.2 b/man2/recvfrom.2
deleted file mode 100644
index 13228c391..000000000
--- a/man2/recvfrom.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/recv.2
diff --git a/man2/recvmmsg.2 b/man2/recvmmsg.2
deleted file mode 100644
index bf3c9d4ca..000000000
--- a/man2/recvmmsg.2
+++ /dev/null
@@ -1,276 +0,0 @@
-.\" Copyright (C) 2011 by Andi Kleen <andi@firstfloor.org>
-.\" and Copyright (c) 2011 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Syscall added in following commit
-.\" commit a2e2725541fad72416326798c2d7fa4dafb7d337
-.\" Author: Arnaldo Carvalho de Melo <acme@redhat.com>
-.\" Date: Mon Oct 12 23:40:10 2009 -0700
-.\"
-.TH recvmmsg 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-recvmmsg \- receive multiple messages on a socket
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#define _GNU_SOURCE" " /* See feature_test_macros(7) */"
-.B #include <sys/socket.h>
-.P
-.BI "int recvmmsg(int " sockfd ", struct mmsghdr *" msgvec \
-", unsigned int " vlen ","
-.BI " int " flags ", struct timespec *" timeout ");"
-.fi
-.SH DESCRIPTION
-The
-.BR recvmmsg ()
-system call is an extension of
-.BR recvmsg (2)
-that allows the caller to receive multiple messages from a socket
-using a single system call.
-(This has performance benefits for some applications.)
-A further extension over
-.BR recvmsg (2)
-is support for a timeout on the receive operation.
-.P
-The
-.I sockfd
-argument is the file descriptor of the socket to receive data from.
-.P
-The
-.I msgvec
-argument is a pointer to an array of
-.I mmsghdr
-structures.
-The size of this array is specified in
-.IR vlen .
-.P
-The
-.I mmsghdr
-structure is defined in
-.I <sys/socket.h>
-as:
-.P
-.in +4n
-.EX
-struct mmsghdr {
- struct msghdr msg_hdr; /* Message header */
- unsigned int msg_len; /* Number of received bytes for header */
-};
-.EE
-.in
-.P
-The
-.I msg_hdr
-field is a
-.I msghdr
-structure, as described in
-.BR recvmsg (2).
-The
-.I msg_len
-field is the number of bytes returned for the message in the entry.
-This field has the same value as the return value of a single
-.BR recvmsg (2)
-on the header.
-.P
-The
-.I flags
-argument contains flags ORed together.
-The flags are the same as documented for
-.BR recvmsg (2),
-with the following addition:
-.TP
-.BR MSG_WAITFORONE " (since Linux 2.6.34)"
-Turns on
-.B MSG_DONTWAIT
-after the first message has been received.
-.P
-The
-.I timeout
-argument points to a
-.I struct timespec
-(see
-.BR clock_gettime (2))
-defining a timeout (seconds plus nanoseconds) for the receive operation
-.RI ( "but see BUGS!" ).
-(This interval will be rounded up to the system clock granularity,
-and kernel scheduling delays mean that the blocking interval
-may overrun by a small amount.)
-If
-.I timeout
-is NULL, then the operation blocks indefinitely.
-.P
-A blocking
-.BR recvmmsg ()
-call blocks until
-.I vlen
-messages have been received
-or until the timeout expires.
-A nonblocking call reads as many messages as are available
-(up to the limit specified by
-.IR vlen )
-and returns immediately.
-.P
-On return from
-.BR recvmmsg (),
-successive elements of
-.I msgvec
-are updated to contain information about each received message:
-.I msg_len
-contains the size of the received message;
-the subfields of
-.I msg_hdr
-are updated as described in
-.BR recvmsg (2).
-The return value of the call indicates the number of elements of
-.I msgvec
-that have been updated.
-.SH RETURN VALUE
-On success,
-.BR recvmmsg ()
-returns the number of messages received in
-.IR msgvec ;
-on error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-Errors are as for
-.BR recvmsg (2).
-In addition, the following error can occur:
-.TP
-.B EINVAL
-.I timeout
-is invalid.
-.P
-See also BUGS.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 2.6.33,
-glibc 2.12.
-.SH BUGS
-The
-.I timeout
-argument does not work as intended.
-.\" FIXME . https://bugzilla.kernel.org/show_bug.cgi?id=75371
-.\" http://thread.gmane.org/gmane.linux.man/5677
-The timeout is checked only after the receipt of each datagram,
-so that if up to
-.I vlen\-1
-datagrams are received before the timeout expires,
-but then no further datagrams are received, the call will block forever.
-.P
-If an error occurs after at least one message has been received,
-the call succeeds, and returns the number of messages received.
-The error code is expected to be returned on a subsequent call to
-.BR recvmmsg ().
-In the current implementation, however, the error code can be overwritten
-in the meantime by an unrelated network event on a socket,
-for example an incoming ICMP packet.
-.SH EXAMPLES
-The following program uses
-.BR recvmmsg ()
-to receive multiple messages on a socket and stores
-them in multiple buffers.
-The call returns if all buffers are filled or if the
-timeout specified has expired.
-.P
-The following snippet periodically generates UDP datagrams
-containing a random number:
-.P
-.in +4n
-.EX
-.RB "$" " while true; do echo $RANDOM > /dev/udp/127.0.0.1/1234;"
-.B " sleep 0.25; done"
-.EE
-.in
-.P
-These datagrams are read by the example application, which
-can give the following output:
-.P
-.in +4n
-.EX
-.RB "$" " ./a.out"
-5 messages received
-1 11782
-2 11345
-3 304
-4 13514
-5 28421
-.EE
-.in
-.SS Program source
-\&
-.\" SRC BEGIN (recvmmsg.c)
-.EX
-#define _GNU_SOURCE
-#include <arpa/inet.h>
-#include <netinet/in.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/socket.h>
-#include <time.h>
-\&
-int
-main(void)
-{
-#define VLEN 10
-#define BUFSIZE 200
-#define TIMEOUT 1
- int sockfd, retval;
- char bufs[VLEN][BUFSIZE+1];
- struct iovec iovecs[VLEN];
- struct mmsghdr msgs[VLEN];
- struct timespec timeout;
- struct sockaddr_in addr;
-\&
- sockfd = socket(AF_INET, SOCK_DGRAM, 0);
- if (sockfd == \-1) {
- perror("socket()");
- exit(EXIT_FAILURE);
- }
-\&
- addr.sin_family = AF_INET;
- addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
- addr.sin_port = htons(1234);
- if (bind(sockfd, (struct sockaddr *) &addr, sizeof(addr)) == \-1) {
- perror("bind()");
- exit(EXIT_FAILURE);
- }
-\&
- memset(msgs, 0, sizeof(msgs));
- for (size_t i = 0; i < VLEN; i++) {
- iovecs[i].iov_base = bufs[i];
- iovecs[i].iov_len = BUFSIZE;
- msgs[i].msg_hdr.msg_iov = &iovecs[i];
- msgs[i].msg_hdr.msg_iovlen = 1;
- }
-\&
- timeout.tv_sec = TIMEOUT;
- timeout.tv_nsec = 0;
-\&
- retval = recvmmsg(sockfd, msgs, VLEN, 0, &timeout);
- if (retval == \-1) {
- perror("recvmmsg()");
- exit(EXIT_FAILURE);
- }
-\&
- printf("%d messages received\en", retval);
- for (size_t i = 0; i < retval; i++) {
- bufs[i][msgs[i].msg_len] = 0;
- printf("%zu %s", i+1, bufs[i]);
- }
- exit(EXIT_SUCCESS);
-}
-.EE
-.\" SRC END
-.SH SEE ALSO
-.BR clock_gettime (2),
-.BR recvmsg (2),
-.BR sendmmsg (2),
-.BR sendmsg (2),
-.BR socket (2),
-.BR socket (7)
diff --git a/man2/recvmsg.2 b/man2/recvmsg.2
deleted file mode 100644
index 13228c391..000000000
--- a/man2/recvmsg.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/recv.2
diff --git a/man2/remap_file_pages.2 b/man2/remap_file_pages.2
deleted file mode 100644
index 0c87352c0..000000000
--- a/man2/remap_file_pages.2
+++ /dev/null
@@ -1,170 +0,0 @@
-.\" Copyright (C) 2003, Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" 2003-12-10 Initial creation, Michael Kerrisk <mtk.manpages@gmail.com>
-.\" 2004-10-28 aeb, corrected prototype, prot must be 0
-.\"
-.TH remap_file_pages 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-remap_file_pages \- create a nonlinear file mapping
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#define _GNU_SOURCE" " /* See feature_test_macros(7) */"
-.B #include <sys/mman.h>
-.P
-.BI "[[deprecated]] int remap_file_pages(void " addr [. size "], size_t " size ,
-.BI " int " prot ", size_t " pgoff ", \
-int " flags );
-.fi
-.SH DESCRIPTION
-.BR Note :
-.\" commit 33041a0d76d3c3e0aff28ac95a2ffdedf1282dbc
-.\" http://lwn.net/Articles/597632/
-this system call was marked as deprecated starting with Linux 3.16.
-In Linux 4.0, the implementation was replaced
-.\" commit c8d78c1823f46519473949d33f0d1d33fe21ea16
-by a slower in-kernel emulation.
-Those few applications that use this system call should
-consider migrating to alternatives.
-This change was made because the kernel code for this system call was complex,
-and it is believed to be little used or perhaps even completely unused.
-While it had some use cases in database applications on 32-bit systems,
-those use cases don't exist on 64-bit systems.
-.P
-The
-.BR remap_file_pages ()
-system call is used to create a nonlinear mapping, that is, a mapping
-in which the pages of the file are mapped into a nonsequential order
-in memory.
-The advantage of using
-.BR remap_file_pages ()
-over using repeated calls to
-.BR mmap (2)
-is that the former approach does not require the kernel to create
-additional VMA (Virtual Memory Area) data structures.
-.P
-To create a nonlinear mapping we perform the following steps:
-.TP 3
-1.
-Use
-.BR mmap (2)
-to create a mapping (which is initially linear).
-This mapping must be created with the
-.B MAP_SHARED
-flag.
-.TP
-2.
-Use one or more calls to
-.BR remap_file_pages ()
-to rearrange the correspondence between the pages of the mapping
-and the pages of the file.
-It is possible to map the same page of a file
-into multiple locations within the mapped region.
-.P
-The
-.I pgoff
-and
-.I size
-arguments specify the region of the file that is to be relocated
-within the mapping:
-.I pgoff
-is a file offset in units of the system page size;
-.I size
-is the length of the region in bytes.
-.P
-The
-.I addr
-argument serves two purposes.
-First, it identifies the mapping whose pages we want to rearrange.
-Thus,
-.I addr
-must be an address that falls within
-a region previously mapped by a call to
-.BR mmap (2).
-Second,
-.I addr
-specifies the address at which the file pages
-identified by
-.I pgoff
-and
-.I size
-will be placed.
-.P
-The values specified in
-.I addr
-and
-.I size
-should be multiples of the system page size.
-If they are not, then the kernel rounds
-.I both
-values
-.I down
-to the nearest multiple of the page size.
-.\" This rounding is weird, and not consistent with the treatment of
-.\" the analogous arguments for munmap()/mprotect() and for mlock().
-.\" MTK, 14 Sep 2005
-.P
-The
-.I prot
-argument must be specified as 0.
-.P
-The
-.I flags
-argument has the same meaning as for
-.BR mmap (2),
-but all flags other than
-.B MAP_NONBLOCK
-are ignored.
-.SH RETURN VALUE
-On success,
-.BR remap_file_pages ()
-returns 0.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EINVAL
-.I addr
-does not refer to a valid mapping
-created with the
-.B MAP_SHARED
-flag.
-.TP
-.B EINVAL
-.IR addr ,
-.IR size ,
-.IR prot ,
-or
-.I pgoff
-is invalid.
-.\" And possibly others from vma->vm_ops->populate()
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 2.5.46,
-glibc 2.3.3.
-.SH NOTES
-Since Linux 2.6.23,
-.\" commit 3ee6dafc677a68e461a7ddafc94a580ebab80735
-.BR remap_file_pages ()
-creates non-linear mappings only
-on in-memory filesystems such as
-.BR tmpfs (5),
-hugetlbfs or ramfs.
-On filesystems with a backing store,
-.BR remap_file_pages ()
-is not much more efficient than using
-.BR mmap (2)
-to adjust which parts of the file are mapped to which addresses.
-.SH SEE ALSO
-.BR getpagesize (2),
-.BR mmap (2),
-.BR mmap2 (2),
-.BR mprotect (2),
-.BR mremap (2),
-.BR msync (2)
diff --git a/man2/removexattr.2 b/man2/removexattr.2
deleted file mode 100644
index 1d3a731e4..000000000
--- a/man2/removexattr.2
+++ /dev/null
@@ -1,100 +0,0 @@
-.\" Copyright (C) Andreas Gruenbacher, February 2001
-.\" Copyright (C) Silicon Graphics Inc, September 2001
-.\"
-.\" SPDX-License-Identifier: GPL-2.0-or-later
-.\"
-.TH removexattr 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-removexattr, lremovexattr, fremovexattr \- remove an extended attribute
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/xattr.h>
-.P
-.BI "int removexattr(const char *" path ", const char *" name );
-.BI "int lremovexattr(const char *" path ", const char *" name );
-.BI "int fremovexattr(int " fd ", const char *" name );
-.fi
-.SH DESCRIPTION
-Extended attributes are
-.IR name : value
-pairs associated with inodes (files, directories, symbolic links, etc.).
-They are extensions to the normal attributes which are associated
-with all inodes in the system (i.e., the
-.BR stat (2)
-data).
-A complete overview of extended attributes concepts can be found in
-.BR xattr (7).
-.P
-.BR removexattr ()
-removes the extended attribute identified by
-.I name
-and associated with the given
-.I path
-in the filesystem.
-.P
-.BR lremovexattr ()
-is identical to
-.BR removexattr (),
-except in the case of a symbolic link, where the extended attribute is
-removed from the link itself, not the file that it refers to.
-.P
-.BR fremovexattr ()
-is identical to
-.BR removexattr (),
-only the extended attribute is removed from the open file referred to by
-.I fd
-(as returned by
-.BR open (2))
-in place of
-.IR path .
-.P
-An extended attribute name is a null-terminated string.
-The
-.I name
-includes a namespace prefix; there may be several, disjoint
-namespaces associated with an individual inode.
-.SH RETURN VALUE
-On success, zero is returned.
-On failure, \-1 is returned and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B ENODATA
-The named attribute does not exist.
-.\" .RB ( ENOATTR
-.\" is defined to be a synonym for
-.\" .BR ENODATA
-.\" in
-.\" .IR <attr/attributes.h> .)
-.TP
-.B ENOTSUP
-Extended attributes are not supported by the filesystem, or are disabled.
-.P
-In addition, the errors documented in
-.BR stat (2)
-can also occur.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 2.4,
-glibc 2.3.
-.\" .SH AUTHORS
-.\" Andreas Gruenbacher,
-.\" .RI < a.gruenbacher@computer.org >
-.\" and the SGI XFS development team,
-.\" .RI < linux-xfs@oss.sgi.com >.
-.\" Please send any bug reports or comments to these addresses.
-.SH SEE ALSO
-.BR getfattr (1),
-.BR setfattr (1),
-.BR getxattr (2),
-.BR listxattr (2),
-.BR open (2),
-.BR setxattr (2),
-.BR stat (2),
-.BR symlink (7),
-.BR xattr (7)
diff --git a/man2/rename.2 b/man2/rename.2
deleted file mode 100644
index 617d8c29a..000000000
--- a/man2/rename.2
+++ /dev/null
@@ -1,549 +0,0 @@
-.\" This manpage is Copyright (C) 1992 Drew Eckhardt;
-.\" and Copyright (C) 1993 Michael Haardt;
-.\" and Copyright (C) 1993,1995 Ian Jackson
-.\" and Copyright (C) 2006, 2014 Michael Kerrisk
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Modified Sat Jul 24 00:35:52 1993 by Rik Faith <faith@cs.unc.edu>
-.\" Modified Thu Jun 4 12:21:13 1998 by Andries Brouwer <aeb@cwi.nl>
-.\" Modified Thu Mar 3 09:49:35 2005 by Michael Haardt <michael@moria.de>
-.\" 2007-03-25, mtk, added various text to DESCRIPTION.
-.\"
-.TH rename 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-rename, renameat, renameat2 \- change the name or location of a file
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <stdio.h>
-.P
-.BI "int rename(const char *" oldpath ", const char *" newpath );
-.P
-.BR "#include <fcntl.h> " "/* Definition of " AT_* " constants */"
-.B #include <stdio.h>
-.P
-.BI "int renameat(int " olddirfd ", const char *" oldpath ,
-.BI " int " newdirfd ", const char *" newpath );
-.BI "int renameat2(int " olddirfd ", const char *" oldpath ,
-.BI " int " newdirfd ", const char *" newpath \
-", unsigned int " flags );
-.fi
-.P
-.RS -4
-Feature Test Macro Requirements for glibc (see
-.BR feature_test_macros (7)):
-.RE
-.P
-.nf
-.BR renameat ():
- Since glibc 2.10:
- _POSIX_C_SOURCE >= 200809L
- Before glibc 2.10:
- _ATFILE_SOURCE
-.P
-.BR renameat2 ():
- _GNU_SOURCE
-.fi
-.SH DESCRIPTION
-.BR rename ()
-renames a file, moving it between directories if required.
-Any other hard links to the file (as created using
-.BR link (2))
-are unaffected.
-Open file descriptors for
-.I oldpath
-are also unaffected.
-.P
-Various restrictions determine whether or not the rename operation succeeds:
-see ERRORS below.
-.P
-If
-.I newpath
-already exists, it will be atomically replaced, so that there is
-no point at which another process attempting to access
-.I newpath
-will find it missing.
-However, there will probably be a window in which both
-.I oldpath
-and
-.I newpath
-refer to the file being renamed.
-.P
-If
-.I oldpath
-and
-.I newpath
-are existing hard links referring to the same file, then
-.BR rename ()
-does nothing, and returns a success status.
-.P
-If
-.I newpath
-exists but the operation fails for some reason,
-.BR rename ()
-guarantees to leave an instance of
-.I newpath
-in place.
-.P
-.I oldpath
-can specify a directory.
-In this case,
-.I newpath
-must either not exist, or it must specify an empty directory.
-.P
-If
-.I oldpath
-refers to a symbolic link, the link is renamed; if
-.I newpath
-refers to a symbolic link, the link will be overwritten.
-.SS renameat()
-The
-.BR renameat ()
-system call operates in exactly the same way as
-.BR rename (),
-except for the differences described here.
-.P
-If the pathname given in
-.I oldpath
-is relative, then it is interpreted relative to the directory
-referred to by the file descriptor
-.I olddirfd
-(rather than relative to the current working directory of
-the calling process, as is done by
-.BR rename ()
-for a relative pathname).
-.P
-If
-.I oldpath
-is relative and
-.I olddirfd
-is the special value
-.BR AT_FDCWD ,
-then
-.I oldpath
-is interpreted relative to the current working
-directory of the calling process (like
-.BR rename ()).
-.P
-If
-.I oldpath
-is absolute, then
-.I olddirfd
-is ignored.
-.P
-The interpretation of
-.I newpath
-is as for
-.IR oldpath ,
-except that a relative pathname is interpreted relative
-to the directory referred to by the file descriptor
-.IR newdirfd .
-.P
-See
-.BR openat (2)
-for an explanation of the need for
-.BR renameat ().
-.SS renameat2()
-.BR renameat2 ()
-has an additional
-.I flags
-argument.
-A
-.BR renameat2 ()
-call with a zero
-.I flags
-argument is equivalent to
-.BR renameat ().
-.P
-The
-.I flags
-argument is a bit mask consisting of zero or more of the following flags:
-.TP
-.B RENAME_EXCHANGE
-Atomically exchange
-.I oldpath
-and
-.IR newpath .
-Both pathnames must exist
-but may be of different types (e.g., one could be a non-empty directory
-and the other a symbolic link).
-.TP
-.B RENAME_NOREPLACE
-Don't overwrite
-.I newpath
-of the rename.
-Return an error if
-.I newpath
-already exists.
-.IP
-.B RENAME_NOREPLACE
-can't be employed together with
-.BR RENAME_EXCHANGE .
-.IP
-.B RENAME_NOREPLACE
-requires support from the underlying filesystem.
-Support for various filesystems was added as follows:
-.RS
-.IP \[bu] 3
-ext4 (Linux 3.15);
-.\" ext4: commit 0a7c3937a1f23f8cb5fc77ae01661e9968a51d0c
-.IP \[bu]
-btrfs, tmpfs, and cifs (Linux 3.17);
-.IP \[bu]
-xfs (Linux 4.0);
-.\" btrfs: commit 80ace85c915d0f41016f82917218997b72431258
-.\" tmpfs: commit 3b69ff51d087d265aa4af3a532fc4f20bf33e718
-.\" cifs: commit 7c33d5972ce382bcc506d16235f1e9b7d22cbef8
-.\"
-.\" gfs2 in Linux 4.2?
-.IP \[bu]
-Support for many other filesystems was added in Linux 4.9, including
-ext2, minix, reiserfs, jfs, vfat, and bpf.
-.\" Also affs, bfs, exofs, hfs, hfsplus, jffs2, logfs, msdos,
-.\" nilfs2, omfs, sysvfs, ubifs, udf, ufs
-.\" hugetlbfs, ramfs
-.\" local filesystems: commit f03b8ad8d38634d13e802165cc15917481b47835
-.\" libfs: commit e0e0be8a835520e2f7c89f214dfda570922a1b90
-.RE
-.TP
-.BR RENAME_WHITEOUT " (since Linux 3.18)"
-.\" commit 0d7a855526dd672e114aff2ac22b60fc6f155b08
-.\" commit 787fb6bc9682ec7c05fb5d9561b57100fbc1cc41
-This operation makes sense only for overlay/union
-filesystem implementations.
-.IP
-Specifying
-.B RENAME_WHITEOUT
-creates a "whiteout" object at the source of
-the rename at the same time as performing the rename.
-The whole operation is atomic,
-so that if the rename succeeds then the whiteout will also have been created.
-.IP
-A "whiteout" is an object that has special meaning in union/overlay
-filesystem constructs.
-In these constructs,
-multiple layers exist and only the top one is ever modified.
-A whiteout on an upper layer will effectively hide a
-matching file in the lower layer,
-making it appear as if the file didn't exist.
-.IP
-When a file that exists on the lower layer is renamed,
-the file is first copied up (if not already on the upper layer)
-and then renamed on the upper, read-write layer.
-At the same time, the source file needs to be "whiteouted"
-(so that the version of the source file in the lower layer
-is rendered invisible).
-The whole operation needs to be done atomically.
-.IP
-When not part of a union/overlay,
-the whiteout appears as a character device with a {0,0} device number.
-.\" https://www.freebsd.org/cgi/man.cgi?query=mount_unionfs&manpath=FreeBSD+11.0-RELEASE
-(Note that other union/overlay implementations may employ different methods
-for storing whiteout entries; specifically, BSD union mount employs
-a separate inode type,
-.BR DT_WHT ,
-which, while supported by some filesystems available in Linux,
-such as CODA and XFS, is ignored by the kernel's whiteout support code,
-as of Linux 4.19, at least.)
-.IP
-.B RENAME_WHITEOUT
-requires the same privileges as creating a device node (i.e., the
-.B CAP_MKNOD
-capability).
-.IP
-.B RENAME_WHITEOUT
-can't be employed together with
-.BR RENAME_EXCHANGE .
-.IP
-.B RENAME_WHITEOUT
-requires support from the underlying filesystem.
-Among the filesystems that support it are
-tmpfs (since Linux 3.18),
-.\" tmpfs: commit 46fdb794e3f52ef18b859ebc92f0a9d7db21c5df
-ext4 (since Linux 3.18),
-.\" ext4: commit cd808deced431b66b5fa4e5c193cb7ec0059eaff
-XFS (since Linux 4.1),
-.\" XFS: commit 7dcf5c3e4527cfa2807567b00387cf2ed5e07f00
-f2fs (since Linux 4.2),
-.\" f2fs: commit 7e01e7ad746bc8198a8b46163ddc73a1c7d22339
-btrfs (since Linux 4.7),
-.\" btrfs: commit cdd1fedf8261cd7a73c0596298902ff4f0f04492
-and ubifs (since Linux 4.9).
-.\" ubifs: commit 9e0a1fff8db56eaaebb74b4a3ef65f86811c4798
-.SH RETURN VALUE
-On success, zero is returned.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EACCES
-Write permission is denied for the directory containing
-.I oldpath
-or
-.IR newpath ,
-or, search permission is denied for one of the directories
-in the path prefix of
-.I oldpath
-or
-.IR newpath ,
-or
-.I oldpath
-is a directory and does not allow write permission (needed to update
-the
-.I ..
-entry).
-(See also
-.BR path_resolution (7).)
-.TP
-.B EBUSY
-The rename fails because
-.IR oldpath " or " newpath
-is a directory that is in use by some process (perhaps as
-current working directory, or as root directory, or because
-it was open for reading) or is in use by the system
-(for example as a mount point), while the system considers
-this an error.
-(Note that there is no requirement to return
-.B EBUSY
-in such
-cases\[em]there is nothing wrong with doing the rename anyway\[em]but
-it is allowed to return
-.B EBUSY
-if the system cannot otherwise
-handle such situations.)
-.TP
-.B EDQUOT
-The user's quota of disk blocks on the filesystem has been exhausted.
-.TP
-.B EFAULT
-.IR oldpath " or " newpath " points outside your accessible address space."
-.TP
-.B EINVAL
-The new pathname contained a path prefix of the old, or, more generally,
-an attempt was made to make a directory a subdirectory of itself.
-.TP
-.B EISDIR
-.I newpath
-is an existing directory, but
-.I oldpath
-is not a directory.
-.TP
-.B ELOOP
-Too many symbolic links were encountered in resolving
-.IR oldpath " or " newpath .
-.TP
-.B EMLINK
-.I oldpath
-already has the maximum number of links to it, or
-it was a directory and the directory containing
-.I newpath
-has the maximum number of links.
-.TP
-.B ENAMETOOLONG
-.IR oldpath " or " newpath " was too long."
-.TP
-.B ENOENT
-The link named by
-.I oldpath
-does not exist;
-or, a directory component in
-.I newpath
-does not exist;
-or,
-.I oldpath
-or
-.I newpath
-is an empty string.
-.TP
-.B ENOMEM
-Insufficient kernel memory was available.
-.TP
-.B ENOSPC
-The device containing the file has no room for the new directory
-entry.
-.TP
-.B ENOTDIR
-A component used as a directory in
-.IR oldpath " or " newpath
-is not, in fact, a directory.
-Or,
-.I oldpath
-is a directory, and
-.I newpath
-exists but is not a directory.
-.TP
-.BR ENOTEMPTY " or " EEXIST
-.I newpath
-is a nonempty directory, that is, contains entries other than "." and "..".
-.TP
-.BR EPERM " or " EACCES
-The directory containing
-.I oldpath
-has the sticky bit
-.RB ( S_ISVTX )
-set and the process's effective user ID is neither
-the user ID of the file to be deleted nor that of the directory
-containing it, and the process is not privileged
-(Linux: does not have the
-.B CAP_FOWNER
-capability);
-or
-.I newpath
-is an existing file and the directory containing it has the sticky bit set
-and the process's effective user ID is neither the user ID of the file
-to be replaced nor that of the directory containing it,
-and the process is not privileged
-(Linux: does not have the
-.B CAP_FOWNER
-capability);
-or the filesystem containing
-.I oldpath
-does not support renaming of the type requested.
-.TP
-.B EROFS
-The file is on a read-only filesystem.
-.TP
-.B EXDEV
-.IR oldpath " and " newpath
-are not on the same mounted filesystem.
-(Linux permits a filesystem to be mounted at multiple points, but
-.BR rename ()
-does not work across different mount points,
-even if the same filesystem is mounted on both.)
-.P
-The following additional errors can occur for
-.BR renameat ()
-and
-.BR renameat2 ():
-.TP
-.B EBADF
-.I oldpath
-.RI ( newpath )
-is relative but
-.I olddirfd
-.RI ( newdirfd )
-is not a valid file descriptor.
-.TP
-.B ENOTDIR
-.I oldpath
-is relative and
-.I olddirfd
-is a file descriptor referring to a file other than a directory;
-or similar for
-.I newpath
-and
-.I newdirfd
-.P
-The following additional errors can occur for
-.BR renameat2 ():
-.TP
-.B EEXIST
-.I flags
-contains
-.B RENAME_NOREPLACE
-and
-.I newpath
-already exists.
-.TP
-.B EINVAL
-An invalid flag was specified in
-.IR flags .
-.TP
-.B EINVAL
-Both
-.B RENAME_NOREPLACE
-and
-.B RENAME_EXCHANGE
-were specified in
-.IR flags .
-.TP
-.B EINVAL
-Both
-.B RENAME_WHITEOUT
-and
-.B RENAME_EXCHANGE
-were specified in
-.IR flags .
-.TP
-.B EINVAL
-The filesystem does not support one of the flags in
-.IR flags .
-.TP
-.B ENOENT
-.I flags
-contains
-.B RENAME_EXCHANGE
-and
-.I newpath
-does not exist.
-.TP
-.B EPERM
-.B RENAME_WHITEOUT
-was specified in
-.IR flags ,
-but the caller does not have the
-.B CAP_MKNOD
-capability.
-.SH STANDARDS
-.TP
-.BR rename ()
-C11, POSIX.1-2008.
-.TP
-.BR renameat ()
-POSIX.1-2008.
-.TP
-.BR renameat2 ()
-Linux.
-.SH HISTORY
-.TP
-.BR rename ()
-4.3BSD, C89, POSIX.1-2001.
-.TP
-.BR renameat ()
-Linux 2.6.16,
-glibc 2.4.
-.TP
-.BR renameat2 ()
-Linux 3.15,
-glibc 2.28.
-.SS glibc notes
-On older kernels where
-.BR renameat ()
-is unavailable, the glibc wrapper function falls back to the use of
-.BR rename ().
-When
-.I oldpath
-and
-.I newpath
-are relative pathnames,
-glibc constructs pathnames based on the symbolic links in
-.I /proc/self/fd
-that correspond to the
-.I olddirfd
-and
-.I newdirfd
-arguments.
-.SH BUGS
-On NFS filesystems, you can not assume that if the operation
-failed, the file was not renamed.
-If the server does the rename operation
-and then crashes, the retransmitted RPC which will be processed when the
-server is up again causes a failure.
-The application is expected to
-deal with this.
-See
-.BR link (2)
-for a similar problem.
-.SH SEE ALSO
-.BR mv (1),
-.BR rename (1),
-.BR chmod (2),
-.BR link (2),
-.BR symlink (2),
-.BR unlink (2),
-.BR path_resolution (7),
-.BR symlink (7)
diff --git a/man2/renameat.2 b/man2/renameat.2
deleted file mode 100644
index 9b74442c8..000000000
--- a/man2/renameat.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/rename.2
diff --git a/man2/renameat2.2 b/man2/renameat2.2
deleted file mode 100644
index 9b74442c8..000000000
--- a/man2/renameat2.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/rename.2
diff --git a/man2/request_key.2 b/man2/request_key.2
deleted file mode 100644
index 53c47e450..000000000
--- a/man2/request_key.2
+++ /dev/null
@@ -1,562 +0,0 @@
-.\" Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
-.\" Written by David Howells (dhowells@redhat.com)
-.\" and Copyright (C) 2016 Michael Kerrisk <mtk.man-pages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: GPL-2.0-or-later
-.\"
-.TH request_key 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-request_key \- request a key from the kernel's key management facility
-.SH LIBRARY
-Linux Key Management Utilities
-.RI ( libkeyutils ", " \-lkeyutils )
-.SH SYNOPSIS
-.nf
-.B #include <keyutils.h>
-.P
-.BI "key_serial_t request_key(const char *" type ", const char *" description ,
-.BI " const char *_Nullable " callout_info ,
-.BI " key_serial_t " dest_keyring );
-.fi
-.SH DESCRIPTION
-.BR request_key ()
-attempts to find a key of the given
-.I type
-with a description (name) that matches the specified
-.IR description .
-If such a key could not be found, then the key is optionally created.
-If the key is found or created,
-.BR request_key ()
-attaches it to the keyring whose ID is specified in
-.I dest_keyring
-and returns the key's serial number.
-.P
-.BR request_key ()
-first recursively searches for a matching key in all of the keyrings
-attached to the calling process.
-The keyrings are searched in the order: thread-specific keyring,
-process-specific keyring, and then session keyring.
-.P
-If
-.BR request_key ()
-is called from a program invoked by
-.BR request_key ()
-on behalf of some other process to generate a key, then the keyrings of that
-other process will be searched next,
-using that other process's user ID, group ID,
-supplementary group IDs, and security context to determine access.
-.\" David Howells: we can then have an arbitrarily long sequence
-.\" of "recursive" request-key upcalls. There is no limit, other
-.\" than number of PIDs, etc.
-.P
-The search of the keyring tree is breadth-first:
-the keys in each keyring searched are checked for a match before any child
-keyrings are recursed into.
-Only keys for which the caller has
-.I search
-permission be found, and only keyrings for which the caller has
-.I search
-permission may be searched.
-.P
-If the key is not found and
-.I callout
-is NULL, then the call fails with the error
-.BR ENOKEY .
-.P
-If the key is not found and
-.I callout
-is not NULL, then the kernel attempts to invoke a user-space
-program to instantiate the key.
-The details are given below.
-.P
-The
-.I dest_keyring
-serial number may be that of a valid keyring for which the caller has
-.I write
-permission, or it may be one of the following special keyring IDs:
-.TP
-.B KEY_SPEC_THREAD_KEYRING
-This specifies the caller's thread-specific keyring (see
-.BR thread\-keyring (7)).
-.TP
-.B KEY_SPEC_PROCESS_KEYRING
-This specifies the caller's process-specific keyring (see
-.BR process\-keyring (7)).
-.TP
-.B KEY_SPEC_SESSION_KEYRING
-This specifies the caller's session-specific keyring (see
-.BR session\-keyring (7)).
-.TP
-.B KEY_SPEC_USER_KEYRING
-This specifies the caller's UID-specific keyring (see
-.BR user\-keyring (7)).
-.TP
-.B KEY_SPEC_USER_SESSION_KEYRING
-This specifies the caller's UID-session keyring (see
-.BR user\-session\-keyring (7)).
-.P
-When the
-.I dest_keyring
-is specified as 0
-and no key construction has been performed,
-then no additional linking is done.
-.P
-Otherwise, if
-.I dest_keyring
-is 0 and a new key is constructed, the new key will be linked
-to the "default" keyring.
-More precisely, when the kernel tries to determine to which keyring the
-newly constructed key should be linked,
-it tries the following keyrings,
-beginning with the keyring set via the
-.BR keyctl (2)
-.B KEYCTL_SET_REQKEY_KEYRING
-operation and continuing in the order shown below
-until it finds the first keyring that exists:
-.IP \[bu] 3
-.\" 8bbf4976b59fc9fc2861e79cab7beb3f6d647640
-The requestor keyring
-.RB ( KEY_REQKEY_DEFL_REQUESTOR_KEYRING ,
-since Linux 2.6.29).
-.\" FIXME
-.\" Actually, is the preceding point correct?
-.\" If I understand correctly, we'll only get here if
-.\" 'dest_keyring' is zero, in which case KEY_REQKEY_DEFL_REQUESTOR_KEYRING
-.\" won't refer to a keyring. Have I misunderstood?
-.IP \[bu]
-The thread-specific keyring
-.RB ( KEY_REQKEY_DEFL_THREAD_KEYRING ;
-see
-.BR thread\-keyring (7)).
-.IP \[bu]
-The process-specific keyring
-.RB ( KEY_REQKEY_DEFL_PROCESS_KEYRING ;
-see
-.BR process\-keyring (7)).
-.IP \[bu]
-The session-specific keyring
-.RB ( KEY_REQKEY_DEFL_SESSION_KEYRING ;
-see
-.BR session\-keyring (7)).
-.IP \[bu]
-The session keyring for the process's user ID
-.RB ( KEY_REQKEY_DEFL_USER_SESSION_KEYRING ;
-see
-.BR user\-session\-keyring (7)).
-This keyring is expected to always exist.
-.IP \[bu]
-The UID-specific keyring
-.RB ( KEY_REQKEY_DEFL_USER_KEYRING ;
-see
-.BR user\-keyring (7)).
-This keyring is also expected to always exist.
-.\" mtk: Are there circumstances where the user sessions and UID-specific
-.\" keyrings do not exist?
-.\"
-.\" David Howells:
-.\" The uid keyrings don't exist until someone tries to access them -
-.\" at which point they're both created. When you log in, pam_keyinit
-.\" creates a link to your user keyring in the session keyring it just
-.\" created, thereby creating the user and user-session keyrings.
-.\"
-.\" and David elaborated that "access" means:
-.\"
-.\" It means lookup_user_key() was passed KEY_LOOKUP_CREATE. So:
-.\"
-.\" add_key() - destination keyring
-.\" request_key() - destination keyring
-.\" KEYCTL_GET_KEYRING_ID - if create arg is true
-.\" KEYCTL_CLEAR
-.\" KEYCTL_LINK - both args
-.\" KEYCTL_SEARCH - destination keyring
-.\" KEYCTL_CHOWN
-.\" KEYCTL_SETPERM
-.\" KEYCTL_SET_TIMEOUT
-.\" KEYCTL_INSTANTIATE - destination keyring
-.\" KEYCTL_INSTANTIATE_IOV - destination keyring
-.\" KEYCTL_NEGATE - destination keyring
-.\" KEYCTL_REJECT - destination keyring
-.\" KEYCTL_GET_PERSISTENT - destination keyring
-.\"
-.\" will all create a keyring under some circumstances. Whereas the rest,
-.\" such as KEYCTL_GET_SECURITY, KEYCTL_READ and KEYCTL_REVOKE, won't.
-.P
-If the
-.BR keyctl (2)
-.B KEYCTL_SET_REQKEY_KEYRING
-operation specifies
-.B KEY_REQKEY_DEFL_DEFAULT
-(or no
-.B KEYCTL_SET_REQKEY_KEYRING
-operation is performed),
-then the kernel looks for a keyring
-starting from the beginning of the list.
-.\"
-.SS Requesting user-space instantiation of a key
-If the kernel cannot find a key matching
-.I type
-and
-.IR description ,
-and
-.I callout
-is not NULL, then the kernel attempts to invoke a user-space
-program to instantiate a key with the given
-.I type
-and
-.IR description .
-In this case, the following steps are performed:
-.IP (1) 5
-The kernel creates an uninstantiated key, U, with the requested
-.I type
-and
-.IR description .
-.IP (2)
-The kernel creates an authorization key, V,
-.\" struct request_key_auth, defined in security/keys/internal.h
-that refers to the key U and records the facts that the caller of
-.BR request_key ()
-is:
-.RS
-.IP (2.1) 7
-the context in which the key U should be instantiated and secured, and
-.IP (2.2)
-the context from which associated key requests may be satisfied.
-.RE
-.IP
-The authorization key is constructed as follows:
-.RS
-.IP \[bu] 3
-The key type is
-.IR \[dq].request_key_auth\[dq] .
-.IP \[bu]
-The key's UID and GID are the same as the corresponding filesystem IDs
-of the requesting process.
-.IP \[bu]
-The key grants
-.IR view ,
-.IR read ,
-and
-.I search
-permissions to the key possessor as well as
-.I view
-permission for the key user.
-.IP \[bu]
-The description (name) of the key is the hexadecimal
-string representing the ID of the key that is to be instantiated
-in the requesting program.
-.IP \[bu]
-The payload of the key is taken from the data specified in
-.IR callout_info .
-.IP \[bu]
-Internally, the kernel also records the PID of the process that called
-.BR request_key ().
-.RE
-.IP (3)
-The kernel creates a process that executes a user-space service such as
-.BR request\-key (8)
-with a new session keyring that contains a link to the authorization key, V.
-.\" The request\-key(8) program can be invoked in circumstances *other* than
-.\" when triggered by request_key(2). For example, upcalls from places such
-.\" as the DNS resolver.
-.IP
-This program is supplied with the following command-line arguments:
-.RS
-.IP [0] 5
-The string
-.IR \[dq]/sbin/request\-key\[dq] .
-.IP [1]
-The string
-.I \[dq]create\[dq]
-(indicating that a key is to be created).
-.IP [2]
-The ID of the key that is to be instantiated.
-.IP [3]
-The filesystem UID of the caller of
-.BR request_key ().
-.IP [4]
-The filesystem GID of the caller of
-.BR request_key ().
-.IP [5]
-The ID of the thread keyring of the caller of
-.BR request_key ().
-This may be zero if that keyring hasn't been created.
-.IP [6]
-The ID of the process keyring of the caller of
-.BR request_key ().
-This may be zero if that keyring hasn't been created.
-.IP [7]
-The ID of the session keyring of the caller of
-.BR request_key ().
-.RE
-.IP
-.IR Note :
-each of the command-line arguments that is a key ID is encoded in
-.I decimal
-(unlike the key IDs shown in
-.IR /proc/keys ,
-which are shown as hexadecimal values).
-.IP (4)
-The program spawned in the previous step:
-.RS
-.IP \[bu] 3
-Assumes the authority to instantiate the key U using the
-.BR keyctl (2)
-.B KEYCTL_ASSUME_AUTHORITY
-operation (typically via the
-.BR keyctl_assume_authority (3)
-function).
-.IP \[bu]
-Obtains the callout data from the payload of the authorization key V
-(using the
-.BR keyctl (2)
-.B KEYCTL_READ
-operation (or, more commonly, the
-.BR keyctl_read (3)
-function) with a key ID value of
-.BR KEY_SPEC_REQKEY_AUTH_KEY ).
-.IP \[bu]
-Instantiates the key
-(or execs another program that performs that task),
-specifying the payload and destination keyring.
-(The destination keyring that the requestor specified when calling
-.BR request_key ()
-can be accessed using the special key ID
-.BR KEY_SPEC_REQUESTOR_KEYRING .)
-.\" Should an instantiating program be using KEY_SPEC_REQUESTOR_KEYRING?
-.\" I couldn't find a use in the keyutils git repo.
-.\" According to David Howells:
-.\" * This feature is provided, but not used at the moment.
-.\" * A key added to that ring is then owned by the requester
-Instantiation is performed using the
-.BR keyctl (2)
-.B KEYCTL_INSTANTIATE
-operation (or, more commonly, the
-.BR keyctl_instantiate (3)
-function).
-At this point, the
-.BR request_key ()
-call completes, and the requesting program can continue execution.
-.RE
-.P
-If these steps are unsuccessful, then an
-.B ENOKEY
-error will be returned to the caller of
-.BR request_key ()
-and a temporary, negatively instantiated key will be installed
-in the keyring specified by
-.IR dest_keyring .
-This will expire after a few seconds, but will cause subsequent calls to
-.BR request_key ()
-to fail until it does.
-The purpose of this negatively instantiated key is to prevent
-(possibly different) processes making repeated requests
-(that require expensive
-.BR request\-key (8)
-upcalls) for a key that can't (at the moment) be positively instantiated.
-.P
-Once the key has been instantiated, the authorization key
-.RB ( KEY_SPEC_REQKEY_AUTH_KEY )
-is revoked, and the destination keyring
-.RB ( KEY_SPEC_REQUESTOR_KEYRING )
-is no longer accessible from the
-.BR request\-key (8)
-program.
-.P
-If a key is created, then\[em]regardless of whether it is a valid key or
-a negatively instantiated key\[em]it will displace any other key with
-the same type and description from the keyring specified in
-.IR dest_keyring .
-.SH RETURN VALUE
-On success,
-.BR request_key ()
-returns the serial number of the key it found or caused to be created.
-On error, \-1 is returned and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EACCES
-The keyring wasn't available for modification by the user.
-.TP
-.B EDQUOT
-The key quota for this user would be exceeded by creating this key or linking
-it to the keyring.
-.TP
-.B EFAULT
-One of
-.IR type ,
-.IR description ,
-or
-.I callout_info
-points outside the process's accessible address space.
-.TP
-.B EINTR
-The request was interrupted by a signal; see
-.BR signal (7).
-.TP
-.B EINVAL
-The size of the string (including the terminating null byte) specified in
-.I type
-or
-.I description
-exceeded the limit (32 bytes and 4096 bytes respectively).
-.TP
-.B EINVAL
-The size of the string (including the terminating null byte) specified in
-.I callout_info
-exceeded the system page size.
-.TP
-.B EKEYEXPIRED
-An expired key was found, but no replacement could be obtained.
-.TP
-.B EKEYREJECTED
-The attempt to generate a new key was rejected.
-.TP
-.B EKEYREVOKED
-A revoked key was found, but no replacement could be obtained.
-.TP
-.B ENOKEY
-No matching key was found.
-.TP
-.B ENOMEM
-Insufficient memory to create a key.
-.TP
-.B EPERM
-The
-.I type
-argument started with a period (\[aq].\[aq]).
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 2.6.10.
-.P
-The ability to instantiate keys upon request was added
-.\" commit 3e30148c3d524a9c1c63ca28261bc24c457eb07a
-in Linux 2.6.13.
-.SH EXAMPLES
-The program below demonstrates the use of
-.BR request_key ().
-The
-.IR type ,
-.IR description ,
-and
-.I callout_info
-arguments for the system call are taken from the values
-supplied in the command-line arguments.
-The call specifies the session keyring as the target keyring.
-.P
-In order to demonstrate this program,
-we first create a suitable entry in the file
-.IR /etc/request\-key.conf .
-.P
-.in +4n
-.EX
-$ sudo sh
-# \fBecho \[aq]create user mtk:* * /bin/keyctl instantiate %k %c %S\[aq] \e\fP
- \fB> /etc/request\-key.conf\fP
-# \fBexit\fP
-.EE
-.in
-.P
-This entry specifies that when a new "user" key with the prefix
-"mtk:" must be instantiated, that task should be performed via the
-.BR keyctl (1)
-command's
-.B instantiate
-operation.
-The arguments supplied to the
-.B instantiate
-operation are:
-the ID of the uninstantiated key
-.RI ( %k );
-the callout data supplied to the
-.BR request_key ()
-call
-.RI ( %c );
-and the session keyring
-.RI ( %S )
-of the requestor (i.e., the caller of
-.BR request_key ()).
-See
-.BR request\-key.conf (5)
-for details of these
-.I %
-specifiers.
-.P
-Then we run the program and check the contents of
-.I /proc/keys
-to verify that the requested key has been instantiated:
-.P
-.in +4n
-.EX
-$ \fB./t_request_key user mtk:key1 "Payload data"\fP
-$ \fBgrep \[aq]2dddaf50\[aq] /proc/keys\fP
-2dddaf50 I\-\-Q\-\-\- 1 perm 3f010000 1000 1000 user mtk:key1: 12
-.EE
-.in
-.P
-For another example of the use of this program, see
-.BR keyctl (2).
-.SS Program source
-\&
-.\" SRC BEGIN (t_request_key.c)
-.EX
-/* t_request_key.c */
-\&
-#include <keyutils.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-\&
-int
-main(int argc, char *argv[])
-{
- key_serial_t key;
-\&
- if (argc != 4) {
- fprintf(stderr, "Usage: %s type description callout\-data\en",
- argv[0]);
- exit(EXIT_FAILURE);
- }
-\&
- key = request_key(argv[1], argv[2], argv[3],
- KEY_SPEC_SESSION_KEYRING);
- if (key == \-1) {
- perror("request_key");
- exit(EXIT_FAILURE);
- }
-\&
- printf("Key ID is %jx\en", (uintmax_t) key);
-\&
- exit(EXIT_SUCCESS);
-}
-.EE
-.\" SRC END
-.SH SEE ALSO
-.ad l
-.nh
-.BR keyctl (1),
-.BR add_key (2),
-.BR keyctl (2),
-.BR keyctl (3),
-.BR capabilities (7),
-.BR keyrings (7),
-.BR keyutils (7),
-.BR persistent\-keyring (7),
-.BR process\-keyring (7),
-.BR session\-keyring (7),
-.BR thread\-keyring (7),
-.BR user\-keyring (7),
-.BR user\-session\-keyring (7),
-.BR request\-key (8)
-.P
-The kernel source files
-.I Documentation/security/keys/core.rst
-and
-.I Documentation/keys/request\-key.rst
-(or, before Linux 4.13, in the files
-.\" commit b68101a1e8f0263dbc7b8375d2a7c57c6216fb76
-.I Documentation/security/keys.txt
-and
-.\" commit 3db38ed76890565772fcca3279cc8d454ea6176b
-.IR Documentation/security/keys\-request\-key.txt ).
diff --git a/man2/restart_syscall.2 b/man2/restart_syscall.2
deleted file mode 100644
index c90bb3827..000000000
--- a/man2/restart_syscall.2
+++ /dev/null
@@ -1,123 +0,0 @@
-.\" Copyright (c) 2013 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" http://thread.gmane.org/gmane.linux.kernel/76552/focus=76803
-.\" From: Linus Torvalds <torvalds <at> transmeta.com>
-.\" Subject: Re: [PATCH] compatibility syscall layer (lets try again)
-.\" Newsgroups: gmane.linux.kernel
-.\" Date: 2002-12-05 02:51:12 GMT
-.\"
-.\" See also Section 11.3.3 of Understanding the Linux Kernel, 3rd edition
-.\"
-.TH restart_syscall 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-restart_syscall \- restart a system call after interruption by a stop signal
-.SH SYNOPSIS
-.nf
-.B long restart_syscall(void);
-.fi
-.P
-.IR Note :
-There is no glibc wrapper for this system call; see NOTES.
-.SH DESCRIPTION
-The
-.BR restart_syscall ()
-system call is used to restart certain system calls
-after a process that was stopped by a signal (e.g.,
-.B SIGSTOP
-or
-.BR SIGTSTP )
-is later resumed after receiving a
-.B SIGCONT
-signal.
-This system call is designed only for internal use by the kernel.
-.P
-.BR restart_syscall ()
-is used for restarting only those system calls that,
-when restarted, should adjust their time-related parameters\[em]namely
-.BR poll (2)
-(since Linux 2.6.24),
-.BR nanosleep (2)
-(since Linux 2.6),
-.BR clock_nanosleep (2)
-(since Linux 2.6),
-and
-.BR futex (2),
-when employed with the
-.B FUTEX_WAIT
-(since Linux 2.6.22)
-and
-.B FUTEX_WAIT_BITSET
-(since Linux 2.6.31)
-operations.
-.\" These system calls correspond to the special internal errno value
-.\" ERESTART_RESTARTBLOCK. Each of the system calls has a "restart"
-.\" helper function that is invoked by restart_syscall().
-.\" Notable (as at Linux 3.17) is that poll() has such a "restart"
-.\" function, but ppoll(), select(), and pselect() do not.
-.\" This means that the latter system calls do not take account of the
-.\" time spent in the stopped state when restarting.
-.BR restart_syscall ()
-restarts the interrupted system call with a
-time argument that is suitably adjusted to account for the
-time that has already elapsed (including the time where the process
-was stopped by a signal).
-Without the
-.BR restart_syscall ()
-mechanism, restarting these system calls would not correctly deduct the
-already elapsed time when the process continued execution.
-.SH RETURN VALUE
-The return value of
-.BR restart_syscall ()
-is the return value of whatever system call is being restarted.
-.SH ERRORS
-.I errno
-is set as per the errors for whatever system call is being restarted by
-.BR restart_syscall ().
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 2.6.
-.SH NOTES
-There is no glibc wrapper for this system call,
-because it is intended for use only by the kernel and
-should never be called by applications.
-.P
-The kernel uses
-.BR restart_syscall ()
-to ensure that when a system call is restarted
-after a process has been stopped by a signal and then resumed by
-.BR SIGCONT ,
-then the time that the process spent in the stopped state is counted
-against the timeout interval specified in the original system call.
-In the case of system calls that take a timeout argument and
-automatically restart after a stop signal plus
-.BR SIGCONT ,
-but which do not have the
-.BR restart_syscall ()
-mechanism built in, then, after the process resumes execution,
-the time that the process spent in the stop state is
-.I not
-counted against the timeout value.
-Notable examples of system calls that suffer this problem are
-.BR ppoll (2),
-.BR select (2),
-and
-.BR pselect (2).
-.P
-From user space, the operation of
-.BR restart_syscall ()
-is largely invisible:
-to the process that made the system call that is restarted,
-it appears as though that system call executed and
-returned in the usual fashion.
-.SH SEE ALSO
-.BR sigaction (2),
-.BR sigreturn (2),
-.BR signal (7)
-.\" FIXME . ppoll(2), select(2), and pselect(2)
-.\" should probably get the restart_syscall() treatment:
-.\" If a select() call is suspended by stop-sig+SIGCONT, the time
-.\" spent suspended is *not* deducted when the select() is restarted.
-.\" FIXME . check whether recvmmsg() handles stop-sig+SIGCONT properly.
diff --git a/man2/rmdir.2 b/man2/rmdir.2
deleted file mode 100644
index b3ab4904d..000000000
--- a/man2/rmdir.2
+++ /dev/null
@@ -1,128 +0,0 @@
-.\" This manpage is Copyright (C) 1992 Drew Eckhardt;
-.\" and Copyright (C) 1993 Michael Haardt, Ian Jackson.
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Modified 1993-07-24 by Rik Faith <faith@cs.unc.edu>
-.\" Modified 1997-01-31 by Eric S. Raymond <esr@thyrsus.com>
-.\" Modified 2004-06-23 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.TH rmdir 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-rmdir \- delete a directory
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <unistd.h>
-.P
-.BI "int rmdir(const char *" pathname );
-.fi
-.SH DESCRIPTION
-.BR rmdir ()
-deletes a directory, which must be empty.
-.SH RETURN VALUE
-On success, zero is returned.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EACCES
-Write access to the directory containing
-.I pathname
-was not allowed, or one of the directories in the path prefix of
-.I pathname
-did not allow search permission.
-(See also
-.BR path_resolution (7).)
-.TP
-.B EBUSY
-.I pathname
-is currently in use by the system or some process that prevents its
-removal.
-On Linux, this means
-.I pathname
-is currently used as a mount point
-or is the root directory of the calling process.
-.TP
-.B EFAULT
-.IR pathname " points outside your accessible address space."
-.TP
-.B EINVAL
-.I pathname
-has
-.I .
-as last component.
-.TP
-.B ELOOP
-Too many symbolic links were encountered in resolving
-.IR pathname .
-.TP
-.B ENAMETOOLONG
-.IR pathname " was too long."
-.TP
-.B ENOENT
-A directory component in
-.I pathname
-does not exist or is a dangling symbolic link.
-.TP
-.B ENOMEM
-Insufficient kernel memory was available.
-.TP
-.B ENOTDIR
-.IR pathname ,
-or a component used as a directory in
-.IR pathname ,
-is not, in fact, a directory.
-.TP
-.B ENOTEMPTY
-.I pathname
-contains entries other than
-.IR . " and " .. " ;"
-or,
-.I pathname
-has
-.I ..
-as its final component.
-POSIX.1 also allows
-.\" POSIX.1-2001, POSIX.1-2008
-.B EEXIST
-for this condition.
-.TP
-.B EPERM
-The directory containing
-.I pathname
-has the sticky bit
-.RB ( S_ISVTX )
-set and the process's effective user ID is neither the user ID
-of the file to be deleted nor that of the directory containing it,
-and the process is not privileged (Linux: does not have the
-.B CAP_FOWNER
-capability).
-.TP
-.B EPERM
-The filesystem containing
-.I pathname
-does not support the removal of directories.
-.TP
-.B EROFS
-.I pathname
-refers to a directory on a read-only filesystem.
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-POSIX.1-2001, SVr4, 4.3BSD.
-.SH BUGS
-Infelicities in the protocol underlying NFS can cause the unexpected
-disappearance of directories which are still being used.
-.SH SEE ALSO
-.BR rm (1),
-.BR rmdir (1),
-.BR chdir (2),
-.BR chmod (2),
-.BR mkdir (2),
-.BR rename (2),
-.BR unlink (2),
-.BR unlinkat (2)
diff --git a/man2/rt_sigaction.2 b/man2/rt_sigaction.2
deleted file mode 100644
index d642d26cf..000000000
--- a/man2/rt_sigaction.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/sigaction.2
diff --git a/man2/rt_sigpending.2 b/man2/rt_sigpending.2
deleted file mode 100644
index 304adff9e..000000000
--- a/man2/rt_sigpending.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/sigpending.2
diff --git a/man2/rt_sigprocmask.2 b/man2/rt_sigprocmask.2
deleted file mode 100644
index 5eab7ac86..000000000
--- a/man2/rt_sigprocmask.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/sigprocmask.2
diff --git a/man2/rt_sigqueueinfo.2 b/man2/rt_sigqueueinfo.2
deleted file mode 100644
index 59d1d2cbe..000000000
--- a/man2/rt_sigqueueinfo.2
+++ /dev/null
@@ -1,195 +0,0 @@
-.\" Copyright (c) 2002, 2011 Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.TH rt_sigqueueinfo 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-rt_sigqueueinfo, rt_tgsigqueueinfo \- queue a signal and data
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#include <linux/signal.h>" " /* Definition of " SI_* " constants */"
-.BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */"
-.B #include <unistd.h>
-.P
-.BI "int syscall(SYS_rt_sigqueueinfo, pid_t " tgid ,
-.BI " int " sig ", siginfo_t *" info );
-.BI "int syscall(SYS_rt_tgsigqueueinfo, pid_t " tgid ", pid_t " tid ,
-.BI " int " sig ", siginfo_t *" info );
-.fi
-.P
-.IR Note :
-There are no glibc wrappers for these system calls; see NOTES.
-.SH DESCRIPTION
-The
-.BR rt_sigqueueinfo ()
-and
-.BR rt_tgsigqueueinfo ()
-system calls are the low-level interfaces used to send a signal plus data
-to a process or thread.
-The receiver of the signal can obtain the accompanying data
-by establishing a signal handler with the
-.BR sigaction (2)
-.B SA_SIGINFO
-flag.
-.P
-These system calls are not intended for direct application use;
-they are provided to allow the implementation of
-.BR sigqueue (3)
-and
-.BR pthread_sigqueue (3).
-.P
-The
-.BR rt_sigqueueinfo ()
-system call sends the signal
-.I sig
-to the thread group with the ID
-.IR tgid .
-(The term "thread group" is synonymous with "process", and
-.I tid
-corresponds to the traditional UNIX process ID.)
-The signal will be delivered to an arbitrary member of the thread group
-(i.e., one of the threads that is not currently blocking the signal).
-.P
-The
-.I info
-argument specifies the data to accompany the signal.
-This argument is a pointer to a structure of type
-.IR siginfo_t ,
-described in
-.BR sigaction (2)
-(and defined by including
-.IR <sigaction.h> ).
-The caller should set the following fields in this structure:
-.TP
-.I si_code
-This should be one of the
-.B SI_*
-codes in the Linux kernel source file
-.IR include/asm\-generic/siginfo.h .
-If the signal is being sent to any process other than the caller itself,
-the following restrictions apply:
-.RS
-.IP \[bu] 3
-The code can't be a value greater than or equal to zero.
-In particular, it can't be
-.BR SI_USER ,
-which is used by the kernel to indicate a signal sent by
-.BR kill (2),
-and nor can it be
-.BR SI_KERNEL ,
-which is used to indicate a signal generated by the kernel.
-.IP \[bu]
-The code can't (since Linux 2.6.39) be
-.BR SI_TKILL ,
-which is used by the kernel to indicate a signal sent using
-.\" tkill(2) or
-.BR tgkill (2).
-.RE
-.TP
-.I si_pid
-This should be set to a process ID,
-typically the process ID of the sender.
-.TP
-.I si_uid
-This should be set to a user ID,
-typically the real user ID of the sender.
-.TP
-.I si_value
-This field contains the user data to accompany the signal.
-For more information, see the description of the last
-.RI ( "union sigval" )
-argument of
-.BR sigqueue (3).
-.P
-Internally, the kernel sets the
-.I si_signo
-field to the value specified in
-.IR sig ,
-so that the receiver of the signal can also obtain
-the signal number via that field.
-.P
-The
-.BR rt_tgsigqueueinfo ()
-system call is like
-.BR rt_sigqueueinfo (),
-but sends the signal and data to the single thread
-specified by the combination of
-.IR tgid ,
-a thread group ID,
-and
-.IR tid ,
-a thread in that thread group.
-.SH RETURN VALUE
-On success, these system calls return 0.
-On error, they return \-1 and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EAGAIN
-The limit of signals which may be queued has been reached.
-(See
-.BR signal (7)
-for further information.)
-.TP
-.B EINVAL
-.IR sig ,
-.IR tgid ,
-or
-.I tid
-was invalid.
-.TP
-.B EPERM
-The caller does not have permission to send the signal to the target.
-For the required permissions, see
-.BR kill (2).
-.TP
-.B EPERM
-.I tgid
-specifies a process other than the caller and
-.I info\->si_code
-is invalid.
-.TP
-.B ESRCH
-.BR rt_sigqueueinfo ():
-No thread group matching
-.I tgid
-was found.
-.P
-.BR rt_tgsigqueinfo ():
-No thread matching
-.I tgid
-and
-.I tid
-was found.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-.TP
-.BR rt_sigqueueinfo ()
-Linux 2.2.
-.TP
-.BR rt_tgsigqueueinfo ()
-Linux 2.6.31.
-.SH NOTES
-Since these system calls are not intended for application use,
-there are no glibc wrapper functions; use
-.BR syscall (2)
-in the unlikely case that you want to call them directly.
-.P
-As with
-.BR kill (2),
-the null signal (0) can be used to check if the specified process
-or thread exists.
-.SH SEE ALSO
-.BR kill (2),
-.BR pidfd_send_signal (2),
-.BR sigaction (2),
-.BR sigprocmask (2),
-.BR tgkill (2),
-.BR pthread_sigqueue (3),
-.BR sigqueue (3),
-.BR signal (7)
diff --git a/man2/rt_sigreturn.2 b/man2/rt_sigreturn.2
deleted file mode 100644
index 830b7b99f..000000000
--- a/man2/rt_sigreturn.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/sigreturn.2
diff --git a/man2/rt_sigsuspend.2 b/man2/rt_sigsuspend.2
deleted file mode 100644
index 96d99c4c0..000000000
--- a/man2/rt_sigsuspend.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/sigsuspend.2
diff --git a/man2/rt_sigtimedwait.2 b/man2/rt_sigtimedwait.2
deleted file mode 100644
index ca098e5fc..000000000
--- a/man2/rt_sigtimedwait.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/sigtimedwait.2
diff --git a/man2/rt_tgsigqueueinfo.2 b/man2/rt_tgsigqueueinfo.2
deleted file mode 100644
index 7b6cf6814..000000000
--- a/man2/rt_tgsigqueueinfo.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/rt_sigqueueinfo.2
diff --git a/man2/s390_guarded_storage.2 b/man2/s390_guarded_storage.2
deleted file mode 100644
index dd77e010c..000000000
--- a/man2/s390_guarded_storage.2
+++ /dev/null
@@ -1,162 +0,0 @@
-.\" Copyright (C) 2018 Eugene Syromyatnikov <evgsyr@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.TH s390_guarded_storage 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-s390_guarded_storage \- operations with z/Architecture guarded storage facility
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#include <asm/guarded_storage.h> " "/* Definition of " GS_* " constants */"
-.BR "#include <sys/syscall.h> " \
-"/* Definition of " SYS_* " constants */"
-.B #include <unistd.h>
-.P
-.BI "int syscall(SYS_s390_guarded_storage, int " command ,
-.BI " struct gs_cb *" gs_cb );
-.fi
-.P
-.IR Note :
-glibc provides no wrapper for
-.BR s390_guarded_storage (),
-necessitating the use of
-.BR syscall (2).
-.SH DESCRIPTION
-The
-.BR s390_guarded_storage ()
-system call enables the use of the Guarded Storage Facility
-(a z/Architecture-specific feature) for user-space processes.
-.P
-.\" The description is based on
-.\" http://www-05.ibm.com/de/linux-on-z-ws-us/agenda/pdfs/8_-_Linux_Whats_New_-_Stefan_Raspl.pdf
-.\" and "z/Architecture Principles of Operation" obtained from
-.\" http://publibfi.boulder.ibm.com/epubs/pdf/dz9zr011.pdf
-The guarded storage facility is a hardware feature that allows marking up to
-64 memory regions (as of z14) as guarded;
-reading a pointer with a newly introduced "Load Guarded" (LGG)
-or "Load Logical and Shift Guarded" (LLGFSG) instructions will cause
-a range check on the loaded value and invoke a (previously set up)
-user-space handler if one of the guarded regions is affected.
-.P
-The
-.\" The command description is copied from v4.12-rc1~139^2~56^2 commit message
-.I command
-argument indicates which function to perform.
-The following commands are supported:
-.TP
-.B GS_ENABLE
-Enable the guarded storage facility for the calling task.
-The initial content of the guarded storage control block will be all zeros.
-After enablement, user-space code can use the "Load Guarded Storage
-Controls" (LGSC) instruction (or the
-.BR load_gs_cb ()
-function wrapper provided in the
-.I asm/guarded_storage.h
-header) to load an arbitrary control block.
-While a task is enabled, the kernel will save and restore the calling content
-of the guarded storage registers on context switch.
-.TP
-.B GS_DISABLE
-Disables the use of the guarded storage facility for the calling task.
-The kernel will cease to save and restore the content of the guarded storage
-registers, the task-specific content of these registers is lost.
-.TP
-.B GS_SET_BC_CB
-Set a broadcast guarded storage control block to the one provided in the
-.I gs_cb
-argument.
-This is called per thread and associates a specific guarded storage control
-block with the calling task.
-This control block will be used in the broadcast command
-.BR GS_BROADCAST .
-.TP
-.B GS_CLEAR_BC_CB
-Clears the broadcast guarded storage control block.
-The guarded storage control block will no longer have the association
-established by the
-.B GS_SET_BC_CB
-command.
-.TP
-.B GS_BROADCAST
-Sends a broadcast to all thread siblings of the calling task.
-Every sibling that has established a broadcast guarded storage control block
-will load this control block and will be enabled for guarded storage.
-The broadcast guarded storage control block is consumed; a second broadcast
-without a refresh of the stored control block with
-.B GS_SET_BC_CB
-will not have any effect.
-.P
-The
-.I gs_cb
-argument specifies the address of a guarded storage control block structure
-and is currently used only by the
-.B GS_SET_BC_CB
-command; all other aforementioned commands ignore this argument.
-.SH RETURN VALUE
-On success, the return value of
-.BR s390_guarded_storage ()
-is 0.
-.P
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EFAULT
-.I command
-was
-.B GS_SET_BC_CB
-and the copying of the guarded storage control block structure pointed by the
-.I gs_cb
-argument has failed.
-.TP
-.B EINVAL
-The value provided in the
-.I command
-argument was not valid.
-.TP
-.B ENOMEM
-.I command
-was one of
-.BR GS_ENABLE " or " GS_SET_BC_CB ,
-and the allocation of a new guarded storage control block has failed.
-.TP
-.B EOPNOTSUPP
-The guarded storage facility is not supported by the hardware.
-.SH STANDARDS
-Linux on s390.
-.SH HISTORY
-.\" 916cda1aa1b412d7cf2991c3af7479544942d121, v4.12-rc1~139^2~56^2
-Linux 4.12.
-System z14.
-.SH NOTES
-The description of the guarded storage facility along with related
-instructions and Guarded Storage Control Block and
-Guarded Storage Event Parameter List structure layouts
-is available in "z/Architecture Principles of Operations"
-beginning from the twelfth edition.
-.P
-The
-.I gs_cb
-structure has a field
-.I gsepla
-(Guarded Storage Event Parameter List Address), which is a user-space pointer
-to a Guarded Storage Event Parameter List structure
-(that contains the address
-of the aforementioned event handler in the
-.I gseha
-field), and its layout is available as a
-.B gs_epl
-structure type definition in the
-.I asm/guarded_storage.h
-header.
-.\" .P
-.\" For the example of using the guarded storage facility, see
-.\" .UR https://developer.ibm.com/javasdk/2017/09/25/concurrent-scavenge-using-guarded-storage-facility-works/
-.\" the article with the description of its usage in the Java Garbage Collection
-.\" .UE
-.SH SEE ALSO
-.BR syscall (2)
diff --git a/man2/s390_pci_mmio_read.2 b/man2/s390_pci_mmio_read.2
deleted file mode 100644
index dedc39050..000000000
--- a/man2/s390_pci_mmio_read.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/s390_pci_mmio_write.2
diff --git a/man2/s390_pci_mmio_write.2 b/man2/s390_pci_mmio_write.2
deleted file mode 100644
index 00875f20a..000000000
--- a/man2/s390_pci_mmio_write.2
+++ /dev/null
@@ -1,94 +0,0 @@
-.\" Copyright (c) IBM Corp. 2015
-.\" Author: Alexey Ishchuk <aishchuk@linux.vnet.ibm.com>
-.\"
-.\" SPDX-License-Identifier: GPL-2.0-or-later
-.\"
-.TH s390_pci_mmio_write 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-s390_pci_mmio_write, s390_pci_mmio_read \- transfer data to/from PCI
-MMIO memory page
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */"
-.B #include <unistd.h>
-.P
-.BI "int syscall(SYS_s390_pci_mmio_write, unsigned long " mmio_addr ,
-.BI " const void " user_buffer [. length "], \
-size_t " length );
-.BI "int syscall(SYS_s390_pci_mmio_read, unsigned long " mmio_addr ,
-.BI " void " user_buffer [. length "], size_t " length );
-.fi
-.P
-.IR Note :
-glibc provides no wrappers for these system calls,
-necessitating the use of
-.BR syscall (2).
-.SH DESCRIPTION
-The
-.BR s390_pci_mmio_write ()
-system call writes
-.I length
-bytes of data from the user-space buffer
-.I user_buffer
-to the PCI MMIO memory location specified by
-.IR mmio_addr .
-The
-.BR s390_pci_mmio_read ()
-system call reads
-.I length
-bytes of
-data from the PCI MMIO memory location specified by
-.I mmio_addr
-to the user-space buffer
-.IR user_buffer .
-.P
-These system calls must be used instead of the simple assignment
-or data-transfer operations that are used to access the PCI MMIO
-memory areas mapped to user space on the Linux System z platform.
-The address specified by
-.I mmio_addr
-must belong to a PCI MMIO memory page mapping in the caller's address space,
-and the data being written or read must not cross a page boundary.
-The
-.I length
-value cannot be greater than the system page size.
-.SH RETURN VALUE
-On success,
-.BR s390_pci_mmio_write ()
-and
-.BR s390_pci_mmio_read ()
-return 0.
-On failure, \-1 is returned and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EFAULT
-The address in
-.I mmio_addr
-is invalid.
-.TP
-.B EFAULT
-.I user_buffer
-does not point to a valid location in the caller's address space.
-.TP
-.B EINVAL
-Invalid
-.I length
-argument.
-.TP
-.B ENODEV
-PCI support is not enabled.
-.TP
-.B ENOMEM
-Insufficient memory.
-.SH STANDARDS
-Linux on s390.
-.SH HISTORY
-Linux 3.19.
-System z EC12.
-.SH SEE ALSO
-.BR syscall (2)
diff --git a/man2/s390_runtime_instr.2 b/man2/s390_runtime_instr.2
deleted file mode 100644
index cb52fd43d..000000000
--- a/man2/s390_runtime_instr.2
+++ /dev/null
@@ -1,104 +0,0 @@
-.\" Copyright (c) IBM Corp. 2012
-.\" Author: Jan Glauber <jang@linux.vnet.ibm.com>
-.\"
-.\" SPDX-License-Identifier: GPL-2.0-or-later
-.\"
-.TH s390_runtime_instr 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-s390_runtime_instr \- enable/disable s390 CPU run-time instrumentation
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#include <asm/runtime_instr.h>" " /* Definition of " S390_* " constants */"
-.BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */"
-.B #include <unistd.h>
-.P
-.BI "int syscall(SYS_s390_runtime_instr, int " command ", int " signum );
-.fi
-.P
-.IR Note :
-glibc provides no wrapper for
-.BR s390_runtime_instr (),
-necessitating the use of
-.BR syscall (2).
-.SH DESCRIPTION
-The
-.BR s390_runtime_instr ()
-system call starts or stops CPU run-time instrumentation for the
-calling thread.
-.P
-The
-.I command
-argument controls whether run-time instrumentation is started
-.RB ( S390_RUNTIME_INSTR_START ,
-1) or stopped
-.RB ( S390_RUNTIME_INSTR_STOP ,
-2) for the calling thread.
-.P
-The
-.I signum
-argument specifies the number of a real-time signal.
-This argument was used to specify a signal number that should be delivered
-to the thread if the run-time instrumentation buffer was full or if
-the run-time-instrumentation-halted interrupt had occurred.
-This feature was never used,
-and in Linux 4.4 support for this feature was removed;
-.\" commit b38feccd663b55ab07116208b68e1ffc7c3c7e78
-thus, in current kernels, this argument is ignored.
-.SH RETURN VALUE
-On success,
-.BR s390_runtime_instr ()
-returns 0 and enables the thread for
-run-time instrumentation by assigning the thread a default run-time
-instrumentation control block.
-The caller can then read and modify the control block and start the run-time
-instrumentation.
-On error, \-1 is returned and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EINVAL
-The value specified in
-.I command
-is not a valid command.
-.TP
-.B EINVAL
-The value specified in
-.I signum
-is not a real-time signal number.
-From Linux 4.4 onwards, the
-.I signum
-argument has no effect,
-so that an invalid signal number will not result in an error.
-.TP
-.B ENOMEM
-Allocating memory for the run-time instrumentation control block failed.
-.TP
-.B EOPNOTSUPP
-The run-time instrumentation facility is not available.
-.SH STANDARDS
-Linux on s390.
-.SH HISTORY
-Linux 3.7.
-System z EC12.
-.SH NOTES
-The
-.I asm/runtime_instr.h
-header file is available
-.\" commit df2f815a7df7edb5335a3bdeee6a8f9f6f9c35c4
-since Linux 4.16.
-.P
-Starting with Linux 4.4,
-support for signalling was removed, as was the check whether
-.I signum
-is a valid real-time signal.
-For backwards compatibility with older kernels, it is recommended to pass
-a valid real-time signal number in
-.I signum
-and install a handler for that signal.
-.SH SEE ALSO
-.BR syscall (2),
-.BR signal (7)
diff --git a/man2/s390_sthyi.2 b/man2/s390_sthyi.2
deleted file mode 100644
index 9e0ebb0b7..000000000
--- a/man2/s390_sthyi.2
+++ /dev/null
@@ -1,133 +0,0 @@
-.\" Copyright IBM Corp. 2017
-.\" Author: QingFeng Hao <haoqf@linux.vnet.ibm.com>
-.\"
-.\" SPDX-License-Identifier: GPL-2.0-or-later
-.\"
-.TH s390_sthyi 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-s390_sthyi \- emulate STHYI instruction
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#include <asm/sthyi.h>" " /* Definition of " STHYI_* " constants */"
-.BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */"
-.B #include <unistd.h>
-.P
-.BI "int syscall(SYS_s390_sthyi, unsigned long " function_code ,
-.BI " void *" resp_buffer ", uint64_t *" return_code ,
-.BI " unsigned long " flags );
-.fi
-.P
-.IR Note :
-glibc provides no wrapper for
-.BR s390_sthyi (),
-necessitating the use of
-.BR syscall (2).
-.SH DESCRIPTION
-The
-.BR s390_sthyi ()
-system call emulates the STHYI (Store Hypervisor Information) instruction.
-It provides hardware resource information for the machine and its
-virtualization levels.
-This includes CPU type and capacity, as well as the machine model and
-other metrics.
-.P
-The
-.I function_code
-argument indicates which function to perform.
-The following code(s) are supported:
-.TP
-.B STHYI_FC_CP_IFL_CAP
-Return CP (Central Processor) and IFL (Integrated Facility for Linux)
-capacity information.
-.P
-The
-.I resp_buffer
-argument specifies the address of a response buffer.
-When the
-.I function_code
-is
-.BR STHYI_FC_CP_IFL_CAP ,
-the buffer must be one page (4K) in size.
-If the system call returns 0,
-the response buffer will be filled with CPU capacity information.
-Otherwise, the response buffer's content is unchanged.
-.P
-The
-.I return_code
-argument stores the return code of the STHYI instruction,
-using one of the following values:
-.TP
-0
-Success.
-.TP
-4
-Unsupported function code.
-.P
-For further details about
-.IR return_code ,
-.IR function_code ,
-and
-.IR resp_buffer ,
-see the reference given in NOTES.
-.P
-The
-.I flags
-argument is provided to allow for future extensions and currently
-must be set to 0.
-.SH RETURN VALUE
-On success (that is: emulation succeeded), the return value of
-.BR s390_sthyi ()
-matches the condition code of the STHYI instructions, which is a value
-in the range [0..3].
-A return value of 0 indicates that CPU capacity information is stored in
-.IR *resp_buffer .
-A return value of 3 indicates "unsupported function code" and the content of
-.I *resp_buffer
-is unchanged.
-The return values 1 and 2 are reserved.
-.P
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EFAULT
-The value specified in
-.I resp_buffer
-or
-.I return_code
-is not a valid address.
-.TP
-.B EINVAL
-The value specified in
-.I flags
-is nonzero.
-.TP
-.B ENOMEM
-Allocating memory for handling the CPU capacity information failed.
-.TP
-.B EOPNOTSUPP
-The value specified in
-.I function_code
-is not valid.
-.SH STANDARDS
-Linux on s390.
-.SH HISTORY
-Linux 4.15.
-.SH NOTES
-For details of the STHYI instruction, see
-.UR https://www.ibm.com\:/support\:/knowledgecenter\:/SSB27U_6.3.0\:/com.ibm.zvm.v630.hcpb4\:/hcpb4sth.htm
-the documentation page
-.UE .
-.P
-When the system call interface is used, the response buffer doesn't
-have to fulfill alignment requirements described in the STHYI
-instruction definition.
-.P
-The kernel caches the response (for up to one second, as of Linux 4.16).
-Subsequent system call invocations may return the cached response.
-.SH SEE ALSO
-.BR syscall (2)
diff --git a/man2/sbrk.2 b/man2/sbrk.2
deleted file mode 100644
index a3711a537..000000000
--- a/man2/sbrk.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/brk.2
diff --git a/man2/sched_get_priority_max.2 b/man2/sched_get_priority_max.2
deleted file mode 100644
index c94f11051..000000000
--- a/man2/sched_get_priority_max.2
+++ /dev/null
@@ -1,112 +0,0 @@
-.\" Copyright (C) Tom Bjorkholm & Markus Kuhn, 1996
-.\"
-.\" SPDX-License-Identifier: GPL-2.0-or-later
-.\"
-.\" 1996-04-01 Tom Bjorkholm <tomb@mydata.se>
-.\" First version written
-.\" 1996-04-10 Markus Kuhn <mskuhn@cip.informatik.uni-erlangen.de>
-.\" revision
-.\"
-.TH sched_get_priority_max 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-sched_get_priority_max, sched_get_priority_min \- get static priority range
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sched.h>
-.P
-.BI "int sched_get_priority_max(int " policy );
-.BI "int sched_get_priority_min(int " policy );
-.fi
-.SH DESCRIPTION
-.BR sched_get_priority_max ()
-returns the maximum priority value that can be used with the
-scheduling algorithm identified by
-.IR policy .
-.BR sched_get_priority_min ()
-returns the minimum priority value that can be used with the
-scheduling algorithm identified by
-.IR policy .
-Supported
-.I policy
-values are
-.BR SCHED_FIFO ,
-.BR SCHED_RR ,
-.BR SCHED_OTHER ,
-.BR SCHED_BATCH ,
-.BR SCHED_IDLE ,
-and
-.BR SCHED_DEADLINE .
-Further details about these policies can be found in
-.BR sched (7).
-.P
-Processes with numerically higher priority values are scheduled before
-processes with numerically lower priority values.
-Thus, the value
-returned by
-.BR sched_get_priority_max ()
-will be greater than the
-value returned by
-.BR sched_get_priority_min ().
-.P
-Linux allows the static priority range 1 to 99 for the
-.B SCHED_FIFO
-and
-.B SCHED_RR
-policies, and the priority 0 for the remaining policies.
-Scheduling priority ranges for the various policies
-are not alterable.
-.P
-The range of scheduling priorities may vary on other POSIX systems,
-thus it is a good idea for portable applications to use a virtual
-priority range and map it to the interval given by
-.BR sched_get_priority_max ()
-and
-.BR sched_get_priority_min ()
-POSIX.1 requires
-.\" POSIX.1-2001, POSIX.1-2008 (XBD 2.8.4)
-a spread of at least 32 between the maximum and the minimum values for
-.B SCHED_FIFO
-and
-.BR SCHED_RR .
-.P
-POSIX systems on which
-.BR sched_get_priority_max ()
-and
-.BR sched_get_priority_min ()
-are available define
-.B _POSIX_PRIORITY_SCHEDULING
-in
-.IR <unistd.h> .
-.SH RETURN VALUE
-On success,
-.BR sched_get_priority_max ()
-and
-.BR sched_get_priority_min ()
-return the maximum/minimum priority value for the named scheduling
-policy.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EINVAL
-The argument
-.I policy
-does not identify a defined scheduling policy.
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-POSIX.1-2001.
-.SH SEE ALSO
-.ad l
-.nh
-.BR sched_getaffinity (2),
-.BR sched_getparam (2),
-.BR sched_getscheduler (2),
-.BR sched_setaffinity (2),
-.BR sched_setparam (2),
-.BR sched_setscheduler (2),
-.BR sched (7)
diff --git a/man2/sched_get_priority_min.2 b/man2/sched_get_priority_min.2
deleted file mode 100644
index 17b99f0e3..000000000
--- a/man2/sched_get_priority_min.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/sched_get_priority_max.2
diff --git a/man2/sched_getaffinity.2 b/man2/sched_getaffinity.2
deleted file mode 100644
index f376c11f4..000000000
--- a/man2/sched_getaffinity.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/sched_setaffinity.2
diff --git a/man2/sched_getattr.2 b/man2/sched_getattr.2
deleted file mode 100644
index cb2c346ee..000000000
--- a/man2/sched_getattr.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/sched_setattr.2
diff --git a/man2/sched_getparam.2 b/man2/sched_getparam.2
deleted file mode 100644
index d39facd91..000000000
--- a/man2/sched_getparam.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/sched_setparam.2
diff --git a/man2/sched_getscheduler.2 b/man2/sched_getscheduler.2
deleted file mode 100644
index 13aa82793..000000000
--- a/man2/sched_getscheduler.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/sched_setscheduler.2
diff --git a/man2/sched_rr_get_interval.2 b/man2/sched_rr_get_interval.2
deleted file mode 100644
index 3b70285a9..000000000
--- a/man2/sched_rr_get_interval.2
+++ /dev/null
@@ -1,110 +0,0 @@
-.\" Copyright (C) Tom Bjorkholm & Markus Kuhn, 1996
-.\"
-.\" SPDX-License-Identifier: GPL-2.0-or-later
-.\"
-.\" 1996-04-01 Tom Bjorkholm <tomb@mydata.se>
-.\" First version written
-.\" 1996-04-10 Markus Kuhn <mskuhn@cip.informatik.uni-erlangen.de>
-.\" revision
-.\"
-.TH sched_rr_get_interval 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-sched_rr_get_interval \- get the SCHED_RR interval for the named process
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sched.h>
-.P
-.BI "int sched_rr_get_interval(pid_t " pid ", struct timespec *" tp );
-.fi
-.SH DESCRIPTION
-.BR sched_rr_get_interval ()
-writes into the
-.BR timespec (3)
-structure pointed to by
-.I tp
-the round-robin time quantum for the process identified by
-.IR pid .
-The specified process should be running under the
-.B SCHED_RR
-scheduling policy.
-.P
-If
-.I pid
-is zero, the time quantum for the calling process is written into
-.IR *tp .
-.\" FIXME . On Linux, sched_rr_get_interval()
-.\" returns the timeslice for SCHED_OTHER processes -- this timeslice
-.\" is influenced by the nice value.
-.\" For SCHED_FIFO processes, this always returns 0.
-.\"
-.\" The round-robin time quantum value is not alterable under Linux
-.\" 1.3.81.
-.\"
-.SH RETURN VALUE
-On success,
-.BR sched_rr_get_interval ()
-returns 0.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EFAULT
-Problem with copying information to user space.
-.TP
-.B EINVAL
-Invalid pid.
-.TP
-.B ENOSYS
-The system call is not yet implemented (only on rather old kernels).
-.TP
-.B ESRCH
-Could not find a process with the ID
-.IR pid .
-.SH VERSIONS
-.SS Linux
-Linux 3.9 added
-.\" commit ce0dbbbb30aee6a835511d5be446462388ba9eee
-a new mechanism for adjusting (and viewing) the
-.B SCHED_RR
-quantum: the
-.I /proc/sys/kernel/sched_rr_timeslice_ms
-file exposes the quantum as a millisecond value, whose default is 100.
-Writing 0 to this file resets the quantum to the default value.
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-POSIX.1-2001.
-.SS Linux
-POSIX does not specify any mechanism for controlling the size of the
-round-robin time quantum.
-Older Linux kernels provide a (nonportable) method of doing this.
-The quantum can be controlled by adjusting the process's nice value (see
-.BR setpriority (2)).
-Assigning a negative (i.e., high) nice value results in a longer quantum;
-assigning a positive (i.e., low) nice value results in a shorter quantum.
-The default quantum is 0.1 seconds;
-the degree to which changing the nice value affects the
-quantum has varied somewhat across kernel versions.
-This method of adjusting the quantum was removed
-.\" commit a4ec24b48ddef1e93f7578be53270f0b95ad666c
-starting with Linux 2.6.24.
-.SH NOTES
-POSIX systems on which
-.BR sched_rr_get_interval ()
-is available define
-.B _POSIX_PRIORITY_SCHEDULING
-in
-.IR <unistd.h> .
-.\" .SH BUGS
-.\" As of Linux 1.3.81
-.\" .BR sched_rr_get_interval ()
-.\" returns with error
-.\" ENOSYS, because SCHED_RR has not yet been fully implemented and tested
-.\" properly.
-.SH SEE ALSO
-.BR timespec (3),
-.BR sched (7)
diff --git a/man2/sched_setaffinity.2 b/man2/sched_setaffinity.2
deleted file mode 100644
index 7f87fabb1..000000000
--- a/man2/sched_setaffinity.2
+++ /dev/null
@@ -1,427 +0,0 @@
-.\" Copyright (C) 2002 Robert Love
-.\" and Copyright (C) 2006, 2015 Michael Kerrisk
-.\"
-.\" SPDX-License-Identifier: GPL-2.0-or-later
-.\"
-.\" 2002-11-19 Robert Love <rml@tech9.net> - initial version
-.\" 2004-04-20 mtk - fixed description of return value
-.\" 2004-04-22 aeb - added glibc prototype history
-.\" 2005-05-03 mtk - noted that sched_setaffinity may cause thread
-.\" migration and that CPU affinity is a per-thread attribute.
-.\" 2006-02-03 mtk -- Major rewrite
-.\" 2008-11-12, mtk, removed CPU_*() macro descriptions to a
-.\" separate CPU_SET(3) page.
-.\"
-.TH sched_setaffinity 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-sched_setaffinity, sched_getaffinity \- \
-set and get a thread's CPU affinity mask
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#define _GNU_SOURCE" " /* See feature_test_macros(7) */"
-.B #include <sched.h>
-.P
-.BI "int sched_setaffinity(pid_t " pid ", size_t " cpusetsize ,
-.BI " const cpu_set_t *" mask );
-.BI "int sched_getaffinity(pid_t " pid ", size_t " cpusetsize ,
-.BI " cpu_set_t *" mask );
-.fi
-.SH DESCRIPTION
-A thread's CPU affinity mask determines the set of CPUs on which
-it is eligible to run.
-On a multiprocessor system, setting the CPU affinity mask
-can be used to obtain performance benefits.
-For example,
-by dedicating one CPU to a particular thread
-(i.e., setting the affinity mask of that thread to specify a single CPU,
-and setting the affinity mask of all other threads to exclude that CPU),
-it is possible to ensure maximum execution speed for that thread.
-Restricting a thread to run on a single CPU also avoids
-the performance cost caused by the cache invalidation that occurs
-when a thread ceases to execute on one CPU and then
-recommences execution on a different CPU.
-.P
-A CPU affinity mask is represented by the
-.I cpu_set_t
-structure, a "CPU set", pointed to by
-.IR mask .
-A set of macros for manipulating CPU sets is described in
-.BR CPU_SET (3).
-.P
-.BR sched_setaffinity ()
-sets the CPU affinity mask of the thread whose ID is
-.I pid
-to the value specified by
-.IR mask .
-If
-.I pid
-is zero, then the calling thread is used.
-The argument
-.I cpusetsize
-is the length (in bytes) of the data pointed to by
-.IR mask .
-Normally this argument would be specified as
-.IR "sizeof(cpu_set_t)" .
-.P
-If the thread specified by
-.I pid
-is not currently running on one of the CPUs specified in
-.IR mask ,
-then that thread is migrated to one of the CPUs specified in
-.IR mask .
-.P
-.BR sched_getaffinity ()
-writes the affinity mask of the thread whose ID is
-.I pid
-into the
-.I cpu_set_t
-structure pointed to by
-.IR mask .
-The
-.I cpusetsize
-argument specifies the size (in bytes) of
-.IR mask .
-If
-.I pid
-is zero, then the mask of the calling thread is returned.
-.SH RETURN VALUE
-On success,
-.BR sched_setaffinity ()
-and
-.BR sched_getaffinity ()
-return 0 (but see "C library/kernel differences" below,
-which notes that the underlying
-.BR sched_getaffinity ()
-differs in its return value).
-On failure, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EFAULT
-A supplied memory address was invalid.
-.TP
-.B EINVAL
-The affinity bit mask
-.I mask
-contains no processors that are currently physically on the system
-and permitted to the thread according to any restrictions that
-may be imposed by
-.I cpuset
-cgroups or the "cpuset" mechanism described in
-.BR cpuset (7).
-.TP
-.B EINVAL
-.RB ( sched_getaffinity ()
-and, before Linux 2.6.9,
-.BR sched_setaffinity ())
-.I cpusetsize
-is smaller than the size of the affinity mask used by the kernel.
-.TP
-.B EPERM
-.RB ( sched_setaffinity ())
-The calling thread does not have appropriate privileges.
-The caller needs an effective user ID equal to the real user ID
-or effective user ID of the thread identified by
-.IR pid ,
-or it must possess the
-.B CAP_SYS_NICE
-capability in the user namespace of the thread
-.IR pid .
-.TP
-.B ESRCH
-The thread whose ID is \fIpid\fP could not be found.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 2.5.8,
-glibc 2.3.
-.P
-Initially, the glibc interfaces included a
-.I cpusetsize
-argument, typed as
-.IR "unsigned int" .
-In glibc 2.3.3, the
-.I cpusetsize
-argument was removed, but was then restored in glibc 2.3.4, with type
-.IR size_t .
-.SH NOTES
-After a call to
-.BR sched_setaffinity (),
-the set of CPUs on which the thread will actually run is
-the intersection of the set specified in the
-.I mask
-argument and the set of CPUs actually present on the system.
-The system may further restrict the set of CPUs on which the thread
-runs if the "cpuset" mechanism described in
-.BR cpuset (7)
-is being used.
-These restrictions on the actual set of CPUs on which the thread
-will run are silently imposed by the kernel.
-.P
-There are various ways of determining the number of CPUs
-available on the system, including: inspecting the contents of
-.IR /proc/cpuinfo ;
-using
-.BR sysconf (3)
-to obtain the values of the
-.B _SC_NPROCESSORS_CONF
-and
-.B _SC_NPROCESSORS_ONLN
-parameters; and inspecting the list of CPU directories under
-.IR /sys/devices/system/cpu/ .
-.P
-.BR sched (7)
-has a description of the Linux scheduling scheme.
-.P
-The affinity mask is a per-thread attribute that can be
-adjusted independently for each of the threads in a thread group.
-The value returned from a call to
-.BR gettid (2)
-can be passed in the argument
-.IR pid .
-Specifying
-.I pid
-as 0 will set the attribute for the calling thread,
-and passing the value returned from a call to
-.BR getpid (2)
-will set the attribute for the main thread of the thread group.
-(If you are using the POSIX threads API, then use
-.BR pthread_setaffinity_np (3)
-instead of
-.BR sched_setaffinity ().)
-.P
-The
-.I isolcpus
-boot option can be used to isolate one or more CPUs at boot time,
-so that no processes are scheduled onto those CPUs.
-Following the use of this boot option,
-the only way to schedule processes onto the isolated CPUs is via
-.BR sched_setaffinity ()
-or the
-.BR cpuset (7)
-mechanism.
-For further information, see the kernel source file
-.IR Documentation/admin\-guide/kernel\-parameters.txt .
-As noted in that file,
-.I isolcpus
-is the preferred mechanism of isolating CPUs
-(versus the alternative of manually setting the CPU affinity
-of all processes on the system).
-.P
-A child created via
-.BR fork (2)
-inherits its parent's CPU affinity mask.
-The affinity mask is preserved across an
-.BR execve (2).
-.SS C library/kernel differences
-This manual page describes the glibc interface for the CPU affinity calls.
-The actual system call interface is slightly different, with the
-.I mask
-being typed as
-.IR "unsigned long\ *" ,
-reflecting the fact that the underlying implementation of CPU
-sets is a simple bit mask.
-.P
-On success, the raw
-.BR sched_getaffinity ()
-system call returns the number of bytes placed copied into the
-.I mask
-buffer;
-this will be the minimum of
-.I cpusetsize
-and the size (in bytes) of the
-.I cpumask_t
-data type that is used internally by the kernel to
-represent the CPU set bit mask.
-.SS Handling systems with large CPU affinity masks
-The underlying system calls (which represent CPU masks as bit masks of type
-.IR "unsigned long\ *" )
-impose no restriction on the size of the CPU mask.
-However, the
-.I cpu_set_t
-data type used by glibc has a fixed size of 128 bytes,
-meaning that the maximum CPU number that can be represented is 1023.
-.\" FIXME . See https://sourceware.org/bugzilla/show_bug.cgi?id=15630
-.\" and https://sourceware.org/ml/libc-alpha/2013-07/msg00288.html
-If the kernel CPU affinity mask is larger than 1024,
-then calls of the form:
-.P
-.in +4n
-.EX
-sched_getaffinity(pid, sizeof(cpu_set_t), &mask);
-.EE
-.in
-.P
-fail with the error
-.BR EINVAL ,
-the error produced by the underlying system call for the case where the
-.I mask
-size specified in
-.I cpusetsize
-is smaller than the size of the affinity mask used by the kernel.
-(Depending on the system CPU topology, the kernel affinity mask can
-be substantially larger than the number of active CPUs in the system.)
-.P
-When working on systems with large kernel CPU affinity masks,
-one must dynamically allocate the
-.I mask
-argument (see
-.BR CPU_ALLOC (3)).
-Currently, the only way to do this is by probing for the size
-of the required mask using
-.BR sched_getaffinity ()
-calls with increasing mask sizes (until the call does not fail with the error
-.BR EINVAL ).
-.P
-Be aware that
-.BR CPU_ALLOC (3)
-may allocate a slightly larger CPU set than requested
-(because CPU sets are implemented as bit masks allocated in units of
-.IR sizeof(long) ).
-Consequently,
-.BR sched_getaffinity ()
-can set bits beyond the requested allocation size, because the kernel
-sees a few additional bits.
-Therefore, the caller should iterate over the bits in the returned set,
-counting those which are set, and stop upon reaching the value returned by
-.BR CPU_COUNT (3)
-(rather than iterating over the number of bits
-requested to be allocated).
-.SH EXAMPLES
-The program below creates a child process.
-The parent and child then each assign themselves to a specified CPU
-and execute identical loops that consume some CPU time.
-Before terminating, the parent waits for the child to complete.
-The program takes three command-line arguments:
-the CPU number for the parent,
-the CPU number for the child,
-and the number of loop iterations that both processes should perform.
-.P
-As the sample runs below demonstrate, the amount of real and CPU time
-consumed when running the program will depend on intra-core caching effects
-and whether the processes are using the same CPU.
-.P
-We first employ
-.BR lscpu (1)
-to determine that this (x86)
-system has two cores, each with two CPUs:
-.P
-.in +4n
-.EX
-$ \fBlscpu | egrep \-i \[aq]core.*:|socket\[aq]\fP
-Thread(s) per core: 2
-Core(s) per socket: 2
-Socket(s): 1
-.EE
-.in
-.P
-We then time the operation of the example program for three cases:
-both processes running on the same CPU;
-both processes running on different CPUs on the same core;
-and both processes running on different CPUs on different cores.
-.P
-.in +4n
-.EX
-$ \fBtime \-p ./a.out 0 0 100000000\fP
-real 14.75
-user 3.02
-sys 11.73
-$ \fBtime \-p ./a.out 0 1 100000000\fP
-real 11.52
-user 3.98
-sys 19.06
-$ \fBtime \-p ./a.out 0 3 100000000\fP
-real 7.89
-user 3.29
-sys 12.07
-.EE
-.in
-.SS Program source
-\&
-.\" SRC BEGIN (sched_setaffinity.c)
-.EX
-#define _GNU_SOURCE
-#include <err.h>
-#include <sched.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/wait.h>
-#include <unistd.h>
-\&
-int
-main(int argc, char *argv[])
-{
- int parentCPU, childCPU;
- cpu_set_t set;
- unsigned int nloops;
-\&
- if (argc != 4) {
- fprintf(stderr, "Usage: %s parent\-cpu child\-cpu num\-loops\en",
- argv[0]);
- exit(EXIT_FAILURE);
- }
-\&
- parentCPU = atoi(argv[1]);
- childCPU = atoi(argv[2]);
- nloops = atoi(argv[3]);
-\&
- CPU_ZERO(&set);
-\&
- switch (fork()) {
- case \-1: /* Error */
- err(EXIT_FAILURE, "fork");
-\&
- case 0: /* Child */
- CPU_SET(childCPU, &set);
-\&
- if (sched_setaffinity(getpid(), sizeof(set), &set) == \-1)
- err(EXIT_FAILURE, "sched_setaffinity");
-\&
- for (unsigned int j = 0; j < nloops; j++)
- getppid();
-\&
- exit(EXIT_SUCCESS);
-\&
- default: /* Parent */
- CPU_SET(parentCPU, &set);
-\&
- if (sched_setaffinity(getpid(), sizeof(set), &set) == \-1)
- err(EXIT_FAILURE, "sched_setaffinity");
-\&
- for (unsigned int j = 0; j < nloops; j++)
- getppid();
-\&
- wait(NULL); /* Wait for child to terminate */
- exit(EXIT_SUCCESS);
- }
-}
-.EE
-.\" SRC END
-.SH SEE ALSO
-.ad l
-.nh
-.BR lscpu (1),
-.BR nproc (1),
-.BR taskset (1),
-.BR clone (2),
-.BR getcpu (2),
-.BR getpriority (2),
-.BR gettid (2),
-.BR nice (2),
-.BR sched_get_priority_max (2),
-.BR sched_get_priority_min (2),
-.BR sched_getscheduler (2),
-.BR sched_setscheduler (2),
-.BR setpriority (2),
-.BR CPU_SET (3),
-.BR get_nprocs (3),
-.BR pthread_setaffinity_np (3),
-.BR sched_getcpu (3),
-.BR capabilities (7),
-.BR cpuset (7),
-.BR sched (7),
-.BR numactl (8)
diff --git a/man2/sched_setattr.2 b/man2/sched_setattr.2
deleted file mode 100644
index 5d7061bd6..000000000
--- a/man2/sched_setattr.2
+++ /dev/null
@@ -1,447 +0,0 @@
-.\" Copyright (C) 2014 Michael Kerrisk <mtk.manpages@gmail.com>
-.\" and Copyright (C) 2014 Peter Zijlstra <peterz@infradead.org>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.TH sched_setattr 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-sched_setattr, sched_getattr \-
-set and get scheduling policy and attributes
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#include <sched.h>" " /* Definition of " SCHED_* " constants */"
-.BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */"
-.B #include <unistd.h>
-.P
-.BI "int syscall(SYS_sched_setattr, pid_t " pid ", struct sched_attr *" attr ,
-.BI " unsigned int " flags );
-.BI "int syscall(SYS_sched_getattr, pid_t " pid ", struct sched_attr *" attr ,
-.BI " unsigned int " size ", unsigned int " flags );
-.fi
-.\" FIXME . Add feature test macro requirements
-.P
-.IR Note :
-glibc provides no wrappers for these system calls,
-necessitating the use of
-.BR syscall (2).
-.SH DESCRIPTION
-.SS sched_setattr()
-The
-.BR sched_setattr ()
-system call sets the scheduling policy and
-associated attributes for the thread whose ID is specified in
-.IR pid .
-If
-.I pid
-equals zero,
-the scheduling policy and attributes of the calling thread will be set.
-.P
-Currently, Linux supports the following "normal"
-(i.e., non-real-time) scheduling policies as values that may be specified in
-.IR policy :
-.TP 14
-.B SCHED_OTHER
-the standard round-robin time-sharing policy;
-.\" In the 2.6 kernel sources, SCHED_OTHER is actually called
-.\" SCHED_NORMAL.
-.TP
-.B SCHED_BATCH
-for "batch" style execution of processes; and
-.TP
-.B SCHED_IDLE
-for running
-.I very
-low priority background jobs.
-.P
-Various "real-time" policies are also supported,
-for special time-critical applications that need precise control over
-the way in which runnable threads are selected for execution.
-For the rules governing when a process may use these policies, see
-.BR sched (7).
-The real-time policies that may be specified in
-.I policy
-are:
-.TP 14
-.B SCHED_FIFO
-a first-in, first-out policy; and
-.TP
-.B SCHED_RR
-a round-robin policy.
-.P
-Linux also provides the following policy:
-.TP 14
-.B SCHED_DEADLINE
-a deadline scheduling policy; see
-.BR sched (7)
-for details.
-.P
-The
-.I attr
-argument is a pointer to a structure that defines
-the new scheduling policy and attributes for the specified thread.
-This structure has the following form:
-.P
-.in +4n
-.EX
-struct sched_attr {
- u32 size; /* Size of this structure */
- u32 sched_policy; /* Policy (SCHED_*) */
- u64 sched_flags; /* Flags */
- s32 sched_nice; /* Nice value (SCHED_OTHER,
- SCHED_BATCH) */
- u32 sched_priority; /* Static priority (SCHED_FIFO,
- SCHED_RR) */
- /* Remaining fields are for SCHED_DEADLINE */
- u64 sched_runtime;
- u64 sched_deadline;
- u64 sched_period;
-};
-.EE
-.in
-.P
-The fields of the
-.I sched_attr
-structure are as follows:
-.TP
-.B size
-This field should be set to the size of the structure in bytes, as in
-.IR "sizeof(struct sched_attr)" .
-If the provided structure is smaller than the kernel structure,
-any additional fields are assumed to be '0'.
-If the provided structure is larger than the kernel structure,
-the kernel verifies that all additional fields are 0;
-if they are not,
-.BR sched_setattr ()
-fails with the error
-.B E2BIG
-and updates
-.I size
-to contain the size of the kernel structure.
-.IP
-The above behavior when the size of the user-space
-.I sched_attr
-structure does not match the size of the kernel structure
-allows for future extensibility of the interface.
-Malformed applications that pass oversize structures
-won't break in the future if the size of the kernel
-.I sched_attr
-structure is increased.
-In the future,
-it could also allow applications that know about a larger user-space
-.I sched_attr
-structure to determine whether they are running on an older kernel
-that does not support the larger structure.
-.TP
-.I sched_policy
-This field specifies the scheduling policy, as one of the
-.B SCHED_*
-values listed above.
-.TP
-.I sched_flags
-This field contains zero or more of the following flags
-that are ORed together to control scheduling behavior:
-.RS
-.TP
-.B SCHED_FLAG_RESET_ON_FORK
-Children created by
-.BR fork (2)
-do not inherit privileged scheduling policies.
-See
-.BR sched (7)
-for details.
-.TP
-.BR SCHED_FLAG_RECLAIM " (since Linux 4.13)"
-.\" 2d4283e9d583a3ee8cfb1cbb9c1270614df4c29d
-This flag allows a
-.B SCHED_DEADLINE
-thread to reclaim bandwidth unused by other real-time threads.
-.\" Bandwidth reclaim is done via the GRUB algorithm; see
-.\" Documentation/scheduler/sched-deadline.txt
-.TP
-.BR SCHED_FLAG_DL_OVERRUN " (since Linux 4.16)"
-.\" commit 34be39305a77b8b1ec9f279163c7cdb6cc719b91
-This flag allows an application to get informed about run-time overruns in
-.B SCHED_DEADLINE
-threads.
-Such overruns may be caused by (for example) coarse execution time accounting
-or incorrect parameter assignment.
-Notification takes the form of a
-.B SIGXCPU
-signal which is generated on each overrun.
-.IP
-This
-.B SIGXCPU
-signal is
-.I process-directed
-(see
-.BR signal (7))
-rather than thread-directed.
-This is probably a bug.
-On the one hand,
-.BR sched_setattr ()
-is being used to set a per-thread attribute.
-On the other hand, if the process-directed signal is delivered to
-a thread inside the process other than the one that had a run-time overrun,
-the application has no way of knowing which thread overran.
-.RE
-.TP
-.I sched_nice
-This field specifies the nice value to be set when specifying
-.I sched_policy
-as
-.B SCHED_OTHER
-or
-.BR SCHED_BATCH .
-The nice value is a number in the range \-20 (high priority)
-to +19 (low priority); see
-.BR sched (7).
-.TP
-.I sched_priority
-This field specifies the static priority to be set when specifying
-.I sched_policy
-as
-.B SCHED_FIFO
-or
-.BR SCHED_RR .
-The allowed range of priorities for these policies can be determined using
-.BR sched_get_priority_min (2)
-and
-.BR sched_get_priority_max (2).
-For other policies, this field must be specified as 0.
-.TP
-.I sched_runtime
-This field specifies the "Runtime" parameter for deadline scheduling.
-The value is expressed in nanoseconds.
-This field, and the next two fields,
-are used only for
-.B SCHED_DEADLINE
-scheduling; for further details, see
-.BR sched (7).
-.TP
-.I sched_deadline
-This field specifies the "Deadline" parameter for deadline scheduling.
-The value is expressed in nanoseconds.
-.TP
-.I sched_period
-This field specifies the "Period" parameter for deadline scheduling.
-The value is expressed in nanoseconds.
-.P
-The
-.I flags
-argument is provided to allow for future extensions to the interface;
-in the current implementation it must be specified as 0.
-.\"
-.\"
-.SS sched_getattr()
-The
-.BR sched_getattr ()
-system call fetches the scheduling policy and the
-associated attributes for the thread whose ID is specified in
-.IR pid .
-If
-.I pid
-equals zero,
-the scheduling policy and attributes of the calling thread
-will be retrieved.
-.P
-The
-.I size
-argument should be set to the size of the
-.I sched_attr
-structure as known to user space.
-The value must be at least as large as the size of the initially published
-.I sched_attr
-structure, or the call fails with the error
-.BR EINVAL .
-.P
-The retrieved scheduling attributes are placed in the fields of the
-.I sched_attr
-structure pointed to by
-.IR attr .
-The kernel sets
-.I attr.size
-to the size of its
-.I sched_attr
-structure.
-.P
-If the caller-provided
-.I attr
-buffer is larger than the kernel's
-.I sched_attr
-structure,
-the additional bytes in the user-space structure are not touched.
-If the caller-provided structure is smaller than the kernel
-.I sched_attr
-structure, the kernel will silently not return any values which would be stored
-outside the provided space.
-As with
-.BR sched_setattr (),
-these semantics allow for future extensibility of the interface.
-.P
-The
-.I flags
-argument is provided to allow for future extensions to the interface;
-in the current implementation it must be specified as 0.
-.SH RETURN VALUE
-On success,
-.BR sched_setattr ()
-and
-.BR sched_getattr ()
-return 0.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.BR sched_getattr ()
-and
-.BR sched_setattr ()
-can both fail for the following reasons:
-.TP
-.B EINVAL
-.I attr
-is NULL; or
-.I pid
-is negative; or
-.I flags
-is not zero.
-.TP
-.B ESRCH
-The thread whose ID is
-.I pid
-could not be found.
-.P
-In addition,
-.BR sched_getattr ()
-can fail for the following reasons:
-.TP
-.B E2BIG
-The buffer specified by
-.I size
-and
-.I attr
-is too small.
-.TP
-.B EINVAL
-.I size
-is invalid; that is, it is smaller than the initial version of the
-.I sched_attr
-structure (48 bytes) or larger than the system page size.
-.P
-In addition,
-.BR sched_setattr ()
-can fail for the following reasons:
-.TP
-.B E2BIG
-The buffer specified by
-.I size
-and
-.I attr
-is larger than the kernel structure,
-and one or more of the excess bytes is nonzero.
-.TP
-.B EBUSY
-.B SCHED_DEADLINE
-admission control failure, see
-.BR sched (7).
-.TP
-.B EINVAL
-.I attr.sched_policy
-is not one of the recognized policies;
-.I attr.sched_flags
-contains a flag other than
-.BR SCHED_FLAG_RESET_ON_FORK ;
-or
-.I attr.sched_priority
-is invalid; or
-.I attr.sched_policy
-is
-.B SCHED_DEADLINE
-and the deadline scheduling parameters in
-.I attr
-are invalid.
-.TP
-.B EPERM
-The caller does not have appropriate privileges.
-.TP
-.B EPERM
-The CPU affinity mask of the thread specified by
-.I pid
-does not include all CPUs in the system
-(see
-.BR sched_setaffinity (2)).
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 3.14.
-.\" FIXME . Add glibc version
-.SH NOTES
-glibc does not provide wrappers for these system calls; call them using
-.BR syscall (2).
-.P
-.BR sched_setattr ()
-provides a superset of the functionality of
-.BR sched_setscheduler (2),
-.BR sched_setparam (2),
-.BR nice (2),
-and (other than the ability to set the priority of all processes
-belonging to a specified user or all processes in a specified group)
-.BR setpriority (2).
-Analogously,
-.BR sched_getattr ()
-provides a superset of the functionality of
-.BR sched_getscheduler (2),
-.BR sched_getparam (2),
-and (partially)
-.BR getpriority (2).
-.SH BUGS
-In Linux versions up to
-.\" FIXME . patch sent to Peter Zijlstra
-3.15,
-.BR sched_setattr ()
-failed with the error
-.B EFAULT
-instead of
-.B E2BIG
-for the case described in ERRORS.
-.P
-Up to Linux 5.3,
-.BR sched_getattr ()
-failed with the error
-.B EFBIG
-if the in-kernel
-.I sched_attr
-structure was larger than the
-.I size
-passed by user space.
-.\" In Linux versions up to up 3.15,
-.\" FIXME . patch from Peter Zijlstra pending
-.\" .BR sched_setattr ()
-.\" allowed a negative
-.\" .I attr.sched_policy
-.\" value.
-.SH SEE ALSO
-.ad l
-.nh
-.BR chrt (1),
-.BR nice (2),
-.BR sched_get_priority_max (2),
-.BR sched_get_priority_min (2),
-.BR sched_getaffinity (2),
-.BR sched_getparam (2),
-.BR sched_getscheduler (2),
-.BR sched_rr_get_interval (2),
-.BR sched_setaffinity (2),
-.BR sched_setparam (2),
-.BR sched_setscheduler (2),
-.BR sched_yield (2),
-.BR setpriority (2),
-.BR pthread_getschedparam (3),
-.BR pthread_setschedparam (3),
-.BR pthread_setschedprio (3),
-.BR capabilities (7),
-.BR cpuset (7),
-.BR sched (7)
-.ad
diff --git a/man2/sched_setparam.2 b/man2/sched_setparam.2
deleted file mode 100644
index 20aff7aa1..000000000
--- a/man2/sched_setparam.2
+++ /dev/null
@@ -1,121 +0,0 @@
-.\" Copyright (C) Tom Bjorkholm & Markus Kuhn, 1996
-.\"
-.\" SPDX-License-Identifier: GPL-2.0-or-later
-.\"
-.\" 1996-04-01 Tom Bjorkholm <tomb@mydata.se>
-.\" First version written
-.\" 1996-04-10 Markus Kuhn <mskuhn@cip.informatik.uni-erlangen.de>
-.\" revision
-.\" Modified 2004-05-27 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.TH sched_setparam 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-sched_setparam, sched_getparam \- set and get scheduling parameters
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sched.h>
-.P
-.BI "int sched_setparam(pid_t " pid ", const struct sched_param *" param );
-.BI "int sched_getparam(pid_t " pid ", struct sched_param *" param );
-.P
-\fBstruct sched_param {
- ...
- int \fIsched_priority\fB;
- ...
-};
-.fi
-.SH DESCRIPTION
-.BR sched_setparam ()
-sets the scheduling parameters associated with the scheduling policy
-for the thread whose thread ID is specified in \fIpid\fP.
-If \fIpid\fP is zero, then
-the parameters of the calling thread are set.
-The interpretation of
-the argument \fIparam\fP depends on the scheduling
-policy of the thread identified by
-.IR pid .
-See
-.BR sched (7)
-for a description of the scheduling policies supported under Linux.
-.P
-.BR sched_getparam ()
-retrieves the scheduling parameters for the
-thread identified by \fIpid\fP.
-If \fIpid\fP is zero, then the parameters
-of the calling thread are retrieved.
-.P
-.BR sched_setparam ()
-checks the validity of \fIparam\fP for the scheduling policy of the
-thread.
-The value \fIparam\->sched_priority\fP must lie within the
-range given by
-.BR sched_get_priority_min (2)
-and
-.BR sched_get_priority_max (2).
-.P
-For a discussion of the privileges and resource limits related to
-scheduling priority and policy, see
-.BR sched (7).
-.P
-POSIX systems on which
-.BR sched_setparam ()
-and
-.BR sched_getparam ()
-are available define
-.B _POSIX_PRIORITY_SCHEDULING
-in \fI<unistd.h>\fP.
-.SH RETURN VALUE
-On success,
-.BR sched_setparam ()
-and
-.BR sched_getparam ()
-return 0.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EINVAL
-Invalid arguments:
-.I param
-is NULL or
-.I pid
-is negative
-.TP
-.B EINVAL
-.RB ( sched_setparam ())
-The argument \fIparam\fP does not make sense for the current
-scheduling policy.
-.TP
-.B EPERM
-.RB ( sched_setparam ())
-The caller does not have appropriate privileges
-(Linux: does not have the
-.B CAP_SYS_NICE
-capability).
-.TP
-.B ESRCH
-The thread whose ID is \fIpid\fP could not be found.
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-POSIX.1-2001.
-.SH SEE ALSO
-.ad l
-.nh
-.BR getpriority (2),
-.BR gettid (2),
-.BR nice (2),
-.BR sched_get_priority_max (2),
-.BR sched_get_priority_min (2),
-.BR sched_getaffinity (2),
-.BR sched_getscheduler (2),
-.BR sched_setaffinity (2),
-.BR sched_setattr (2),
-.BR sched_setscheduler (2),
-.BR setpriority (2),
-.BR capabilities (7),
-.BR sched (7)
diff --git a/man2/sched_setscheduler.2 b/man2/sched_setscheduler.2
deleted file mode 100644
index b22dd76d5..000000000
--- a/man2/sched_setscheduler.2
+++ /dev/null
@@ -1,232 +0,0 @@
-.\" Copyright (C) 2014 Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\"
-.TH sched_setscheduler 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-sched_setscheduler, sched_getscheduler \-
-set and get scheduling policy/parameters
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sched.h>
-.P
-.BI "int sched_setscheduler(pid_t " pid ", int " policy ,
-.BI " const struct sched_param *" param );
-.BI "int sched_getscheduler(pid_t " pid );
-.fi
-.SH DESCRIPTION
-The
-.BR sched_setscheduler ()
-system call
-sets both the scheduling policy and parameters for the
-thread whose ID is specified in \fIpid\fP.
-If \fIpid\fP equals zero, the
-scheduling policy and parameters of the calling thread will be set.
-.P
-The scheduling parameters are specified in the
-.I param
-argument, which is a pointer to a structure of the following form:
-.P
-.in +4n
-.EX
-struct sched_param {
- ...
- int sched_priority;
- ...
-};
-.EE
-.in
-.P
-In the current implementation, the structure contains only one field,
-.IR sched_priority .
-The interpretation of
-.I param
-depends on the selected policy.
-.P
-Currently, Linux supports the following "normal"
-(i.e., non-real-time) scheduling policies as values that may be specified in
-.IR policy :
-.TP 14
-.B SCHED_OTHER
-the standard round-robin time-sharing policy;
-.\" In the 2.6 kernel sources, SCHED_OTHER is actually called
-.\" SCHED_NORMAL.
-.TP
-.B SCHED_BATCH
-for "batch" style execution of processes; and
-.TP
-.B SCHED_IDLE
-for running
-.I very
-low priority background jobs.
-.P
-For each of the above policies,
-.I param\->sched_priority
-must be 0.
-.P
-Various "real-time" policies are also supported,
-for special time-critical applications that need precise control over
-the way in which runnable threads are selected for execution.
-For the rules governing when a process may use these policies, see
-.BR sched (7).
-The real-time policies that may be specified in
-.I policy
-are:
-.TP 14
-.B SCHED_FIFO
-a first-in, first-out policy; and
-.TP
-.B SCHED_RR
-a round-robin policy.
-.P
-For each of the above policies,
-.I param\->sched_priority
-specifies a scheduling priority for the thread.
-This is a number in the range returned by calling
-.BR sched_get_priority_min (2)
-and
-.BR sched_get_priority_max (2)
-with the specified
-.IR policy .
-On Linux, these system calls return, respectively, 1 and 99.
-.P
-Since Linux 2.6.32, the
-.B SCHED_RESET_ON_FORK
-flag can be ORed in
-.I policy
-when calling
-.BR sched_setscheduler ().
-As a result of including this flag, children created by
-.BR fork (2)
-do not inherit privileged scheduling policies.
-See
-.BR sched (7)
-for details.
-.P
-.BR sched_getscheduler ()
-returns the current scheduling policy of the thread
-identified by \fIpid\fP.
-If \fIpid\fP equals zero, the policy of the
-calling thread will be retrieved.
-.SH RETURN VALUE
-On success,
-.BR sched_setscheduler ()
-returns zero.
-On success,
-.BR sched_getscheduler ()
-returns the policy for the thread (a nonnegative integer).
-On error, both calls return \-1, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EINVAL
-Invalid arguments:
-.I pid
-is negative or
-.I param
-is NULL.
-.TP
-.B EINVAL
-.RB ( sched_setscheduler ())
-.I policy
-is not one of the recognized policies.
-.TP
-.B EINVAL
-.RB ( sched_setscheduler ())
-.I param
-does not make sense for the specified
-.IR policy .
-.TP
-.B EPERM
-The calling thread does not have appropriate privileges.
-.TP
-.B ESRCH
-The thread whose ID is \fIpid\fP could not be found.
-.SH VERSIONS
-POSIX.1 does not detail the permissions that an unprivileged
-thread requires in order to call
-.BR sched_setscheduler (),
-and details vary across systems.
-For example, the Solaris 7 manual page says that
-the real or effective user ID of the caller must
-match the real user ID or the save set-user-ID of the target.
-.P
-The scheduling policy and parameters are in fact per-thread
-attributes on Linux.
-The value returned from a call to
-.BR gettid (2)
-can be passed in the argument
-.IR pid .
-Specifying
-.I pid
-as 0 will operate on the attributes of the calling thread,
-and passing the value returned from a call to
-.BR getpid (2)
-will operate on the attributes of the main thread of the thread group.
-(If you are using the POSIX threads API, then use
-.BR pthread_setschedparam (3),
-.BR pthread_getschedparam (3),
-and
-.BR pthread_setschedprio (3),
-instead of the
-.BR sched_* (2)
-system calls.)
-.SH STANDARDS
-POSIX.1-2008 (but see BUGS below).
-.P
-.B SCHED_BATCH
-and
-.B SCHED_IDLE
-are Linux-specific.
-.SH HISTORY
-POSIX.1-2001.
-.SH NOTES
-Further details of the semantics of all of the above "normal"
-and "real-time" scheduling policies can be found in the
-.BR sched (7)
-manual page.
-That page also describes an additional policy,
-.BR SCHED_DEADLINE ,
-which is settable only via
-.BR sched_setattr (2).
-.P
-POSIX systems on which
-.BR sched_setscheduler ()
-and
-.BR sched_getscheduler ()
-are available define
-.B _POSIX_PRIORITY_SCHEDULING
-in \fI<unistd.h>\fP.
-.SH BUGS
-POSIX.1 says that on success,
-.BR sched_setscheduler ()
-should return the previous scheduling policy.
-Linux
-.BR sched_setscheduler ()
-does not conform to this requirement,
-since it always returns 0 on success.
-.SH SEE ALSO
-.ad l
-.nh
-.BR chrt (1),
-.BR nice (2),
-.BR sched_get_priority_max (2),
-.BR sched_get_priority_min (2),
-.BR sched_getaffinity (2),
-.BR sched_getattr (2),
-.BR sched_getparam (2),
-.BR sched_rr_get_interval (2),
-.BR sched_setaffinity (2),
-.BR sched_setattr (2),
-.BR sched_setparam (2),
-.BR sched_yield (2),
-.BR setpriority (2),
-.BR capabilities (7),
-.BR cpuset (7),
-.BR sched (7)
-.ad
diff --git a/man2/sched_yield.2 b/man2/sched_yield.2
deleted file mode 100644
index 4a926a747..000000000
--- a/man2/sched_yield.2
+++ /dev/null
@@ -1,76 +0,0 @@
-.\" Copyright (C) Tom Bjorkholm & Markus Kuhn, 1996
-.\"
-.\" SPDX-License-Identifier: GPL-2.0-or-later
-.\"
-.\" 1996-04-01 Tom Bjorkholm <tomb@mydata.se>
-.\" First version written
-.\" 1996-04-10 Markus Kuhn <mskuhn@cip.informatik.uni-erlangen.de>
-.\" revision
-.\"
-.TH sched_yield 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-sched_yield \- yield the processor
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sched.h>
-.P
-.B int sched_yield(void);
-.fi
-.SH DESCRIPTION
-.BR sched_yield ()
-causes the calling thread to relinquish the CPU.
-The thread is moved to the end of the queue for its static
-priority and a new thread gets to run.
-.SH RETURN VALUE
-On success,
-.BR sched_yield ()
-returns 0.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-In the Linux implementation,
-.BR sched_yield ()
-always succeeds.
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-POSIX.1-2001 (but optional).
-POSIX.1-2008.
-.P
-Before POSIX.1-2008,
-systems on which
-.BR sched_yield ()
-is available defined
-.B _POSIX_PRIORITY_SCHEDULING
-in
-.IR <unistd.h> .
-.SH CAVEATS
-.BR sched_yield ()
-is intended for use with real-time scheduling policies (i.e.,
-.B SCHED_FIFO
-or
-.BR SCHED_RR ).
-Use of
-.BR sched_yield ()
-with nondeterministic scheduling policies such as
-.B SCHED_OTHER
-is unspecified and very likely means your application design is broken.
-.P
-If the calling thread is the only thread in the highest
-priority list at that time,
-it will continue to run after a call to
-.BR sched_yield ().
-.P
-Avoid calling
-.BR sched_yield ()
-unnecessarily or inappropriately
-(e.g., when resources needed by other
-schedulable threads are still held by the caller),
-since doing so will result in unnecessary context switches,
-which will degrade system performance.
-.SH SEE ALSO
-.BR sched (7)
diff --git a/man2/seccomp.2 b/man2/seccomp.2
deleted file mode 100644
index 25376c3bf..000000000
--- a/man2/seccomp.2
+++ /dev/null
@@ -1,1245 +0,0 @@
-.\" Copyright (C) 2014 Kees Cook <keescook@chromium.org>
-.\" and Copyright (C) 2012 Will Drewry <wad@chromium.org>
-.\" and Copyright (C) 2008, 2014,2017 Michael Kerrisk <mtk.manpages@gmail.com>
-.\" and Copyright (C) 2017 Tyler Hicks <tyhicks@canonical.com>
-.\" and Copyright (C) 2020 Tycho Andersen <tycho@tycho.ws>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.TH seccomp 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-seccomp \- operate on Secure Computing state of the process
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#include <linux/seccomp.h>" " /* Definition of " SECCOMP_* " constants */"
-.BR "#include <linux/filter.h>" " /* Definition of " "struct sock_fprog" " */"
-.BR "#include <linux/audit.h>" " /* Definition of " AUDIT_* " constants */"
-.BR "#include <linux/signal.h>" " /* Definition of " SIG* " constants */"
-.BR "#include <sys/ptrace.h>" " /* Definition of " PTRACE_* " constants */"
-.\" Kees Cook noted: Anything that uses SECCOMP_RET_TRACE returns will
-.\" need <sys/ptrace.h>
-.BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */"
-.B #include <unistd.h>
-.P
-.BI "int syscall(SYS_seccomp, unsigned int " operation ", unsigned int " flags ,
-.BI " void *" args );
-.fi
-.P
-.IR Note :
-glibc provides no wrapper for
-.BR seccomp (),
-necessitating the use of
-.BR syscall (2).
-.SH DESCRIPTION
-The
-.BR seccomp ()
-system call operates on the Secure Computing (seccomp) state of the
-calling process.
-.P
-Currently, Linux supports the following
-.I operation
-values:
-.TP
-.B SECCOMP_SET_MODE_STRICT
-The only system calls that the calling thread is permitted to make are
-.BR read (2),
-.BR write (2),
-.BR _exit (2)
-(but not
-.BR exit_group (2)),
-and
-.BR sigreturn (2).
-Other system calls result in the termination of the calling thread,
-or termination of the entire process with the
-.B SIGKILL
-signal when there is only one thread.
-Strict secure computing mode is useful for number-crunching
-applications that may need to execute untrusted byte code, perhaps
-obtained by reading from a pipe or socket.
-.IP
-Note that although the calling thread can no longer call
-.BR sigprocmask (2),
-it can use
-.BR sigreturn (2)
-to block all signals apart from
-.B SIGKILL
-and
-.BR SIGSTOP .
-This means that
-.BR alarm (2)
-(for example) is not sufficient for restricting the process's execution time.
-Instead, to reliably terminate the process,
-.B SIGKILL
-must be used.
-This can be done by using
-.BR timer_create (2)
-with
-.B SIGEV_SIGNAL
-and
-.I sigev_signo
-set to
-.BR SIGKILL ,
-or by using
-.BR setrlimit (2)
-to set the hard limit for
-.BR RLIMIT_CPU .
-.IP
-This operation is available only if the kernel is configured with
-.B CONFIG_SECCOMP
-enabled.
-.IP
-The value of
-.I flags
-must be 0, and
-.I args
-must be NULL.
-.IP
-This operation is functionally identical to the call:
-.IP
-.in +4n
-.EX
-prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT);
-.EE
-.in
-.TP
-.B SECCOMP_SET_MODE_FILTER
-The system calls allowed are defined by a pointer to a Berkeley Packet
-Filter (BPF) passed via
-.IR args .
-This argument is a pointer to a
-.IR "struct\~sock_fprog" ;
-it can be designed to filter arbitrary system calls and system call
-arguments.
-If the filter is invalid,
-.BR seccomp ()
-fails, returning
-.B EINVAL
-in
-.IR errno .
-.IP
-If
-.BR fork (2)
-or
-.BR clone (2)
-is allowed by the filter, any child processes will be constrained to
-the same system call filters as the parent.
-If
-.BR execve (2)
-is allowed,
-the existing filters will be preserved across a call to
-.BR execve (2).
-.IP
-In order to use the
-.B SECCOMP_SET_MODE_FILTER
-operation, either the calling thread must have the
-.B CAP_SYS_ADMIN
-capability in its user namespace, or the thread must already have the
-.I no_new_privs
-bit set.
-If that bit was not already set by an ancestor of this thread,
-the thread must make the following call:
-.IP
-.in +4n
-.EX
-prctl(PR_SET_NO_NEW_PRIVS, 1);
-.EE
-.in
-.IP
-Otherwise, the
-.B SECCOMP_SET_MODE_FILTER
-operation fails and returns
-.B EACCES
-in
-.IR errno .
-This requirement ensures that an unprivileged process cannot apply
-a malicious filter and then invoke a set-user-ID or
-other privileged program using
-.BR execve (2),
-thus potentially compromising that program.
-(Such a malicious filter might, for example, cause an attempt to use
-.BR setuid (2)
-to set the caller's user IDs to nonzero values to instead
-return 0 without actually making the system call.
-Thus, the program might be tricked into retaining superuser privileges
-in circumstances where it is possible to influence it to do
-dangerous things because it did not actually drop privileges.)
-.IP
-If
-.BR prctl (2)
-or
-.BR seccomp ()
-is allowed by the attached filter, further filters may be added.
-This will increase evaluation time, but allows for further reduction of
-the attack surface during execution of a thread.
-.IP
-The
-.B SECCOMP_SET_MODE_FILTER
-operation is available only if the kernel is configured with
-.B CONFIG_SECCOMP_FILTER
-enabled.
-.IP
-When
-.I flags
-is 0, this operation is functionally identical to the call:
-.IP
-.in +4n
-.EX
-prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, args);
-.EE
-.in
-.IP
-The recognized
-.I flags
-are:
-.RS
-.TP
-.BR SECCOMP_FILTER_FLAG_LOG " (since Linux 4.14)"
-.\" commit e66a39977985b1e69e17c4042cb290768eca9b02
-All filter return actions except
-.B SECCOMP_RET_ALLOW
-should be logged.
-An administrator may override this filter flag by preventing specific
-actions from being logged via the
-.I /proc/sys/kernel/seccomp/actions_logged
-file.
-.TP
-.BR SECCOMP_FILTER_FLAG_NEW_LISTENER " (since Linux 5.0)"
-.\" commit 6a21cc50f0c7f87dae5259f6cfefe024412313f6
-After successfully installing the filter program,
-return a new user-space notification file descriptor.
-(The close-on-exec flag is set for the file descriptor.)
-When the filter returns
-.B SECCOMP_RET_USER_NOTIF
-a notification will be sent to this file descriptor.
-.IP
-At most one seccomp filter using the
-.B SECCOMP_FILTER_FLAG_NEW_LISTENER
-flag can be installed for a thread.
-.IP
-See
-.BR seccomp_unotify (2)
-for further details.
-.TP
-.BR SECCOMP_FILTER_FLAG_SPEC_ALLOW " (since Linux 4.17)"
-.\" commit 00a02d0c502a06d15e07b857f8ff921e3e402675
-Disable Speculative Store Bypass mitigation.
-.TP
-.B SECCOMP_FILTER_FLAG_TSYNC
-When adding a new filter, synchronize all other threads of the calling
-process to the same seccomp filter tree.
-A "filter tree" is the ordered list of filters attached to a thread.
-(Attaching identical filters in separate
-.BR seccomp ()
-calls results in different filters from this perspective.)
-.IP
-If any thread cannot synchronize to the same filter tree,
-the call will not attach the new seccomp filter,
-and will fail, returning the first thread ID found that cannot synchronize.
-Synchronization will fail if another thread in the same process is in
-.B SECCOMP_MODE_STRICT
-or if it has attached new seccomp filters to itself,
-diverging from the calling thread's filter tree.
-.RE
-.TP
-.BR SECCOMP_GET_ACTION_AVAIL " (since Linux 4.14)"
-.\" commit d612b1fd8010d0d67b5287fe146b8b55bcbb8655
-Test to see if an action is supported by the kernel.
-This operation is helpful to confirm that the kernel knows
-of a more recently added filter return action
-since the kernel treats all unknown actions as
-.BR SECCOMP_RET_KILL_PROCESS .
-.IP
-The value of
-.I flags
-must be 0, and
-.I args
-must be a pointer to an unsigned 32-bit filter return action.
-.TP
-.BR SECCOMP_GET_NOTIF_SIZES " (since Linux 5.0)"
-.\" commit 6a21cc50f0c7f87dae5259f6cfefe024412313f6
-Get the sizes of the seccomp user-space notification structures.
-Since these structures may evolve and grow over time,
-this command can be used to determine how
-much memory to allocate for sending and receiving notifications.
-.IP
-The value of
-.I flags
-must be 0, and
-.I args
-must be a pointer to a
-.IR "struct seccomp_notif_sizes" ,
-which has the following form:
-.IP
-.EX
-struct seccomp_notif_sizes
- __u16 seccomp_notif; /* Size of notification structure */
- __u16 seccomp_notif_resp; /* Size of response structure */
- __u16 seccomp_data; /* Size of \[aq]struct seccomp_data\[aq] */
-};
-.EE
-.IP
-See
-.BR seccomp_unotify (2)
-for further details.
-.\"
-.SS Filters
-When adding filters via
-.BR SECCOMP_SET_MODE_FILTER ,
-.I args
-points to a filter program:
-.P
-.in +4n
-.EX
-struct sock_fprog {
- unsigned short len; /* Number of BPF instructions */
- struct sock_filter *filter; /* Pointer to array of
- BPF instructions */
-};
-.EE
-.in
-.P
-Each program must contain one or more BPF instructions:
-.P
-.in +4n
-.EX
-struct sock_filter { /* Filter block */
- __u16 code; /* Actual filter code */
- __u8 jt; /* Jump true */
- __u8 jf; /* Jump false */
- __u32 k; /* Generic multiuse field */
-};
-.EE
-.in
-.P
-When executing the instructions, the BPF program operates on the
-system call information made available (i.e., use the
-.B BPF_ABS
-addressing mode) as a (read-only)
-.\" Quoting Kees Cook:
-.\" If BPF even allows changing the data, it's not copied back to
-.\" the syscall when it runs. Anything wanting to do things like
-.\" that would need to use ptrace to catch the call and directly
-.\" modify the registers before continuing with the call.
-buffer of the following form:
-.P
-.in +4n
-.EX
-struct seccomp_data {
- int nr; /* System call number */
- __u32 arch; /* AUDIT_ARCH_* value
- (see <linux/audit.h>) */
- __u64 instruction_pointer; /* CPU instruction pointer */
- __u64 args[6]; /* Up to 6 system call arguments */
-};
-.EE
-.in
-.P
-Because numbering of system calls varies between architectures and
-some architectures (e.g., x86-64) allow user-space code to use
-the calling conventions of multiple architectures
-(and the convention being used may vary over the life of a process that uses
-.BR execve (2)
-to execute binaries that employ the different conventions),
-it is usually necessary to verify the value of the
-.I arch
-field.
-.P
-It is strongly recommended to use an allow-list approach whenever
-possible because such an approach is more robust and simple.
-A deny-list will have to be updated whenever a potentially
-dangerous system call is added (or a dangerous flag or option if those
-are deny-listed), and it is often possible to alter the
-representation of a value without altering its meaning, leading to
-a deny-list bypass.
-See also
-.I Caveats
-below.
-.P
-The
-.I arch
-field is not unique for all calling conventions.
-The x86-64 ABI and the x32 ABI both use
-.B AUDIT_ARCH_X86_64
-as
-.IR arch ,
-and they run on the same processors.
-Instead, the mask
-.B __X32_SYSCALL_BIT
-is used on the system call number to tell the two ABIs apart.
-.\" As noted by Dave Drysdale in a note at the end of
-.\" https://lwn.net/Articles/604515/
-.\" One additional detail to point out for the x32 ABI case:
-.\" the syscall number gets a high bit set (__X32_SYSCALL_BIT),
-.\" to mark it as an x32 call.
-.\"
-.\" If x32 support is included in the kernel, then __SYSCALL_MASK
-.\" will have a value that is not all-ones, and this will trigger
-.\" an extra instruction in system_call to mask off the extra bit,
-.\" so that the syscall table indexing still works.
-.P
-This means that a policy must either deny all syscalls with
-.B __X32_SYSCALL_BIT
-or it must recognize syscalls with and without
-.B __X32_SYSCALL_BIT
-set.
-A list of system calls to be denied based on
-.I nr
-that does not also contain
-.I nr
-values with
-.B __X32_SYSCALL_BIT
-set can be bypassed by a malicious program that sets
-.BR __X32_SYSCALL_BIT .
-.P
-Additionally, kernels prior to Linux 5.4 incorrectly permitted
-.I nr
-in the ranges 512-547 as well as the corresponding non-x32 syscalls ORed
-with
-.BR __X32_SYSCALL_BIT .
-For example,
-.I nr
-== 521 and
-.I nr
-== (101 |
-.BR __X32_SYSCALL_BIT )
-would result in invocations of
-.BR ptrace (2)
-with potentially confused x32-vs-x86_64 semantics in the kernel.
-Policies intended to work on kernels before Linux 5.4 must ensure that they
-deny or otherwise correctly handle these system calls.
-On Linux 5.4 and newer,
-.\" commit 6365b842aae4490ebfafadfc6bb27a6d3cc54757
-such system calls will fail with the error
-.BR ENOSYS ,
-without doing anything.
-.P
-The
-.I instruction_pointer
-field provides the address of the machine-language instruction that
-performed the system call.
-This might be useful in conjunction with the use of
-.IR /proc/ pid /maps
-to perform checks based on which region (mapping) of the program
-made the system call.
-(Probably, it is wise to lock down the
-.BR mmap (2)
-and
-.BR mprotect (2)
-system calls to prevent the program from subverting such checks.)
-.P
-When checking values from
-.IR args ,
-keep in mind that arguments are often
-silently truncated before being processed, but after the seccomp check.
-For example, this happens if the i386 ABI is used on an
-x86-64 kernel: although the kernel will normally not look beyond
-the 32 lowest bits of the arguments, the values of the full
-64-bit registers will be present in the seccomp data.
-A less surprising example is that if the x86-64 ABI is used to perform
-a system call that takes an argument of type
-.IR int ,
-the more-significant half of the argument register is ignored by
-the system call, but visible in the seccomp data.
-.P
-A seccomp filter returns a 32-bit value consisting of two parts:
-the most significant 16 bits
-(corresponding to the mask defined by the constant
-.BR SECCOMP_RET_ACTION_FULL )
-contain one of the "action" values listed below;
-the least significant 16-bits (defined by the constant
-.BR SECCOMP_RET_DATA )
-are "data" to be associated with this return value.
-.P
-If multiple filters exist, they are \fIall\fP executed,
-in reverse order of their addition to the filter tree\[em]that is,
-the most recently installed filter is executed first.
-(Note that all filters will be called
-even if one of the earlier filters returns
-.BR SECCOMP_RET_KILL .
-This is done to simplify the kernel code and to provide a
-tiny speed-up in the execution of sets of filters by
-avoiding a check for this uncommon case.)
-.\" From an Aug 2015 conversation with Kees Cook where I asked why *all*
-.\" filters are applied even if one of the early filters returns
-.\" SECCOMP_RET_KILL:
-.\"
-.\" It's just because it would be an optimization that would only speed up
-.\" the RET_KILL case, but it's the uncommon one and the one that doesn't
-.\" benefit meaningfully from such a change (you need to kill the process
-.\" really quickly?). We would speed up killing a program at the (albeit
-.\" tiny) expense to all other filtered programs. Best to keep the filter
-.\" execution logic clear, simple, and as fast as possible for all
-.\" filters.
-The return value for the evaluation of a given system call is the first-seen
-action value of highest precedence (along with its accompanying data)
-returned by execution of all of the filters.
-.P
-In decreasing order of precedence,
-the action values that may be returned by a seccomp filter are:
-.TP
-.BR SECCOMP_RET_KILL_PROCESS " (since Linux 4.14)"
-.\" commit 4d3b0b05aae9ee9ce0970dc4cc0fb3fad5e85945
-.\" commit 0466bdb99e8744bc9befa8d62a317f0fd7fd7421
-This value results in immediate termination of the process,
-with a core dump.
-The system call is not executed.
-By contrast with
-.B SECCOMP_RET_KILL_THREAD
-below, all threads in the thread group are terminated.
-(For a discussion of thread groups, see the description of the
-.B CLONE_THREAD
-flag in
-.BR clone (2).)
-.IP
-The process terminates
-.I "as though"
-killed by a
-.B SIGSYS
-signal.
-Even if a signal handler has been registered for
-.BR SIGSYS ,
-the handler will be ignored in this case and the process always terminates.
-To a parent process that is waiting on this process (using
-.BR waitpid (2)
-or similar), the returned
-.I wstatus
-will indicate that its child was terminated as though by a
-.B SIGSYS
-signal.
-.TP
-.BR SECCOMP_RET_KILL_THREAD " (or " SECCOMP_RET_KILL )
-This value results in immediate termination of the thread
-that made the system call.
-The system call is not executed.
-Other threads in the same thread group will continue to execute.
-.IP
-The thread terminates
-.I "as though"
-killed by a
-.B SIGSYS
-signal.
-See
-.B SECCOMP_RET_KILL_PROCESS
-above.
-.IP
-.\" See these commits:
-.\" seccomp: dump core when using SECCOMP_RET_KILL
-.\" (b25e67161c295c98acda92123b2dd1e7d8642901)
-.\" seccomp: Only dump core when single-threaded
-.\" (d7276e321ff8a53106a59c85ca46d03e34288893)
-Before Linux 4.11,
-any process terminated in this way would not trigger a coredump
-(even though
-.B SIGSYS
-is documented in
-.BR signal (7)
-as having a default action of termination with a core dump).
-Since Linux 4.11,
-a single-threaded process will dump core if terminated in this way.
-.IP
-With the addition of
-.B SECCOMP_RET_KILL_PROCESS
-in Linux 4.14,
-.B SECCOMP_RET_KILL_THREAD
-was added as a synonym for
-.BR SECCOMP_RET_KILL ,
-in order to more clearly distinguish the two actions.
-.IP
-.BR Note :
-the use of
-.B SECCOMP_RET_KILL_THREAD
-to kill a single thread in a multithreaded process is likely to leave the
-process in a permanently inconsistent and possibly corrupt state.
-.TP
-.B SECCOMP_RET_TRAP
-This value results in the kernel sending a thread-directed
-.B SIGSYS
-signal to the triggering thread.
-(The system call is not executed.)
-Various fields will be set in the
-.I siginfo_t
-structure (see
-.BR sigaction (2))
-associated with signal:
-.RS
-.IP \[bu] 3
-.I si_signo
-will contain
-.BR SIGSYS .
-.IP \[bu]
-.I si_call_addr
-will show the address of the system call instruction.
-.IP \[bu]
-.I si_syscall
-and
-.I si_arch
-will indicate which system call was attempted.
-.IP \[bu]
-.I si_code
-will contain
-.BR SYS_SECCOMP .
-.IP \[bu]
-.I si_errno
-will contain the
-.B SECCOMP_RET_DATA
-portion of the filter return value.
-.RE
-.IP
-The program counter will be as though the system call happened
-(i.e., the program counter will not point to the system call instruction).
-The return value register will contain an architecture\-dependent value;
-if resuming execution, set it to something appropriate for the system call.
-(The architecture dependency is because replacing it with
-.B ENOSYS
-could overwrite some useful information.)
-.TP
-.B SECCOMP_RET_ERRNO
-This value results in the
-.B SECCOMP_RET_DATA
-portion of the filter's return value being passed to user space as the
-.I errno
-value without executing the system call.
-.TP
-.BR SECCOMP_RET_USER_NOTIF " (since Linux 5.0)"
-.\" commit 6a21cc50f0c7f87dae5259f6cfefe024412313f6
-Forward the system call to an attached user-space supervisor
-process to allow that process to decide what to do with the system call.
-If there is no attached supervisor (either
-because the filter was not installed with the
-.B SECCOMP_FILTER_FLAG_NEW_LISTENER
-flag or because the file descriptor was closed), the filter returns
-.B ENOSYS
-(similar to what happens when a filter returns
-.B SECCOMP_RET_TRACE
-and there is no tracer).
-See
-.BR seccomp_unotify (2)
-for further details.
-.IP
-Note that the supervisor process will not be notified
-if another filter returns an action value with a precedence greater than
-.BR SECCOMP_RET_USER_NOTIF .
-.TP
-.B SECCOMP_RET_TRACE
-When returned, this value will cause the kernel to attempt to notify a
-.BR ptrace (2)-based
-tracer prior to executing the system call.
-If there is no tracer present,
-the system call is not executed and returns a failure status with
-.I errno
-set to
-.BR ENOSYS .
-.IP
-A tracer will be notified if it requests
-.B PTRACE_O_TRACESECCOMP
-using
-.IR ptrace(PTRACE_SETOPTIONS) .
-The tracer will be notified of a
-.B PTRACE_EVENT_SECCOMP
-and the
-.B SECCOMP_RET_DATA
-portion of the filter's return value will be available to the tracer via
-.BR PTRACE_GETEVENTMSG .
-.IP
-The tracer can skip the system call by changing the system call number
-to \-1.
-Alternatively, the tracer can change the system call
-requested by changing the system call to a valid system call number.
-If the tracer asks to skip the system call, then the system call will
-appear to return the value that the tracer puts in the return value register.
-.IP
-.\" This was changed in ce6526e8afa4.
-.\" A related hole, using PTRACE_SYSCALL instead of SECCOMP_RET_TRACE, was
-.\" changed in arch-specific commits, e.g. 93e35efb8de4 for X86 and
-.\" 0f3912fd934c for ARM.
-Before Linux 4.8, the seccomp check will not be run again after the tracer is
-notified.
-(This means that, on older kernels, seccomp-based sandboxes
-.B "must not"
-allow use of
-.BR ptrace (2)\[em]even
-of other
-sandboxed processes\[em]without extreme care;
-ptracers can use this mechanism to escape from the seccomp sandbox.)
-.IP
-Note that a tracer process will not be notified
-if another filter returns an action value with a precedence greater than
-.BR SECCOMP_RET_TRACE .
-.TP
-.BR SECCOMP_RET_LOG " (since Linux 4.14)"
-.\" commit 59f5cf44a38284eb9e76270c786fb6cc62ef8ac4
-This value results in the system call being executed after
-the filter return action is logged.
-An administrator may override the logging of this action via
-the
-.I /proc/sys/kernel/seccomp/actions_logged
-file.
-.TP
-.B SECCOMP_RET_ALLOW
-This value results in the system call being executed.
-.P
-If an action value other than one of the above is specified,
-then the filter action is treated as either
-.B SECCOMP_RET_KILL_PROCESS
-(since Linux 4.14)
-.\" commit 4d3b0b05aae9ee9ce0970dc4cc0fb3fad5e85945
-or
-.B SECCOMP_RET_KILL_THREAD
-(in Linux 4.13 and earlier).
-.\"
-.SS /proc interfaces
-The files in the directory
-.I /proc/sys/kernel/seccomp
-provide additional seccomp information and configuration:
-.TP
-.IR actions_avail " (since Linux 4.14)"
-.\" commit 8e5f1ad116df6b0de65eac458d5e7c318d1c05af
-A read-only ordered list of seccomp filter return actions in string form.
-The ordering, from left-to-right, is in decreasing order of precedence.
-The list represents the set of seccomp filter return actions
-supported by the kernel.
-.TP
-.IR actions_logged " (since Linux 4.14)"
-.\" commit 0ddec0fc8900201c0897b87b762b7c420436662f
-A read-write ordered list of seccomp filter return actions that
-are allowed to be logged.
-Writes to the file do not need to be in ordered form but reads from
-the file will be ordered in the same way as the
-.I actions_avail
-file.
-.IP
-It is important to note that the value of
-.I actions_logged
-does not prevent certain filter return actions from being logged when
-the audit subsystem is configured to audit a task.
-If the action is not found in the
-.I actions_logged
-file, the final decision on whether to audit the action for that task is
-ultimately left up to the audit subsystem to decide for all filter return
-actions other than
-.BR SECCOMP_RET_ALLOW .
-.IP
-The "allow" string is not accepted in the
-.I actions_logged
-file as it is not possible to log
-.B SECCOMP_RET_ALLOW
-actions.
-Attempting to write "allow" to the file will fail with the error
-.BR EINVAL .
-.\"
-.SS Audit logging of seccomp actions
-.\" commit 59f5cf44a38284eb9e76270c786fb6cc62ef8ac4
-Since Linux 4.14, the kernel provides the facility to log the
-actions returned by seccomp filters in the audit log.
-The kernel makes the decision to log an action based on
-the action type, whether or not the action is present in the
-.I actions_logged
-file, and whether kernel auditing is enabled
-(e.g., via the kernel boot option
-.IR audit=1 ).
-.\" or auditing could be enabled via the netlink API (AUDIT_SET)
-The rules are as follows:
-.IP \[bu] 3
-If the action is
-.BR SECCOMP_RET_ALLOW ,
-the action is not logged.
-.IP \[bu]
-Otherwise, if the action is either
-.B SECCOMP_RET_KILL_PROCESS
-or
-.BR SECCOMP_RET_KILL_THREAD ,
-and that action appears in the
-.I actions_logged
-file, the action is logged.
-.IP \[bu]
-Otherwise, if the filter has requested logging (the
-.B SECCOMP_FILTER_FLAG_LOG
-flag)
-and the action appears in the
-.I actions_logged
-file, the action is logged.
-.IP \[bu]
-Otherwise, if kernel auditing is enabled and the process is being audited
-.RB ( autrace (8)),
-the action is logged.
-.IP \[bu]
-Otherwise, the action is not logged.
-.SH RETURN VALUE
-On success,
-.BR seccomp ()
-returns 0.
-On error, if
-.B SECCOMP_FILTER_FLAG_TSYNC
-was used,
-the return value is the ID of the thread
-that caused the synchronization failure.
-(This ID is a kernel thread ID of the type returned by
-.BR clone (2)
-and
-.BR gettid (2).)
-On other errors, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.BR seccomp ()
-can fail for the following reasons:
-.TP
-.B EACCES
-The caller did not have the
-.B CAP_SYS_ADMIN
-capability in its user namespace, or had not set
-.I no_new_privs
-before using
-.BR SECCOMP_SET_MODE_FILTER .
-.TP
-.B EBUSY
-While installing a new filter, the
-.B SECCOMP_FILTER_FLAG_NEW_LISTENER
-flag was specified,
-but a previous filter had already been installed with that flag.
-.TP
-.B EFAULT
-.I args
-was not a valid address.
-.TP
-.B EINVAL
-.I operation
-is unknown or is not supported by this kernel version or configuration.
-.TP
-.B EINVAL
-The specified
-.I flags
-are invalid for the given
-.IR operation .
-.TP
-.B EINVAL
-.I operation
-included
-.BR BPF_ABS ,
-but the specified offset was not aligned to a 32-bit boundary or exceeded
-.IR "sizeof(struct\~seccomp_data)" .
-.TP
-.B EINVAL
-.\" See kernel/seccomp.c::seccomp_may_assign_mode() in Linux 3.18 sources
-A secure computing mode has already been set, and
-.I operation
-differs from the existing setting.
-.TP
-.B EINVAL
-.I operation
-specified
-.BR SECCOMP_SET_MODE_FILTER ,
-but the filter program pointed to by
-.I args
-was not valid or the length of the filter program was zero or exceeded
-.B BPF_MAXINSNS
-(4096) instructions.
-.TP
-.B ENOMEM
-Out of memory.
-.TP
-.B ENOMEM
-.\" ENOMEM in kernel/seccomp.c::seccomp_attach_filter() in Linux 3.18 sources
-The total length of all filter programs attached
-to the calling thread would exceed
-.B MAX_INSNS_PER_PATH
-(32768) instructions.
-Note that for the purposes of calculating this limit,
-each already existing filter program incurs an
-overhead penalty of 4 instructions.
-.TP
-.B EOPNOTSUPP
-.I operation
-specified
-.BR SECCOMP_GET_ACTION_AVAIL ,
-but the kernel does not support the filter return action specified by
-.IR args .
-.TP
-.B ESRCH
-Another thread caused a failure during thread sync, but its ID could not
-be determined.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 3.17.
-.\" FIXME . Add glibc version
-.SH NOTES
-Rather than hand-coding seccomp filters as shown in the example below,
-you may prefer to employ the
-.I libseccomp
-library, which provides a front-end for generating seccomp filters.
-.P
-The
-.I Seccomp
-field of the
-.IR /proc/ pid /status
-file provides a method of viewing the seccomp mode of a process; see
-.BR proc (5).
-.P
-.BR seccomp ()
-provides a superset of the functionality provided by the
-.BR prctl (2)
-.B PR_SET_SECCOMP
-operation (which does not support
-.IR flags ).
-.P
-Since Linux 4.4, the
-.BR ptrace (2)
-.B PTRACE_SECCOMP_GET_FILTER
-operation can be used to dump a process's seccomp filters.
-.\"
-.SS Architecture support for seccomp BPF
-Architecture support for seccomp BPF filtering
-.\" Check by grepping for HAVE_ARCH_SECCOMP_FILTER in Kconfig files in
-.\" kernel source. Last checked in Linux 4.16-rc source.
-is available on the following architectures:
-.IP \[bu] 3
-x86-64, i386, x32 (since Linux 3.5)
-.PD 0
-.IP \[bu]
-ARM (since Linux 3.8)
-.IP \[bu]
-s390 (since Linux 3.8)
-.IP \[bu]
-MIPS (since Linux 3.16)
-.IP \[bu]
-ARM-64 (since Linux 3.19)
-.IP \[bu]
-PowerPC (since Linux 4.3)
-.IP \[bu]
-Tile (since Linux 4.3)
-.IP \[bu]
-PA-RISC (since Linux 4.6)
-.\" User mode Linux since Linux 4.6
-.PD
-.\"
-.SS Caveats
-There are various subtleties to consider when applying seccomp filters
-to a program, including the following:
-.IP \[bu] 3
-Some traditional system calls have user-space implementations in the
-.BR vdso (7)
-on many architectures.
-Notable examples include
-.BR clock_gettime (2),
-.BR gettimeofday (2),
-and
-.BR time (2).
-On such architectures,
-seccomp filtering for these system calls will have no effect.
-(However, there are cases where the
-.BR vdso (7)
-implementations may fall back to invoking the true system call,
-in which case seccomp filters would see the system call.)
-.IP \[bu]
-Seccomp filtering is based on system call numbers.
-However, applications typically do not directly invoke system calls,
-but instead call wrapper functions in the C library which
-in turn invoke the system calls.
-Consequently, one must be aware of the following:
-.RS
-.IP \[bu] 3
-The glibc wrappers for some traditional system calls may actually
-employ system calls with different names in the kernel.
-For example, the
-.BR exit (2)
-wrapper function actually employs the
-.BR exit_group (2)
-system call, and the
-.BR fork (2)
-wrapper function actually calls
-.BR clone (2).
-.IP \[bu]
-The behavior of wrapper functions may vary across architectures,
-according to the range of system calls provided on those architectures.
-In other words, the same wrapper function may invoke
-different system calls on different architectures.
-.IP \[bu]
-Finally, the behavior of wrapper functions can change across glibc versions.
-For example, in older versions, the glibc wrapper function for
-.BR open (2)
-invoked the system call of the same name,
-but starting in glibc 2.26, the implementation switched to calling
-.BR openat (2)
-on all architectures.
-.RE
-.P
-The consequence of the above points is that it may be necessary
-to filter for a system call other than might be expected.
-Various manual pages in Section 2 provide helpful details
-about the differences between wrapper functions and
-the underlying system calls in subsections entitled
-.IR "C library/kernel differences" .
-.P
-Furthermore, note that the application of seccomp filters
-even risks causing bugs in an application,
-when the filters cause unexpected failures for legitimate operations
-that the application might need to perform.
-Such bugs may not easily be discovered when testing the seccomp
-filters if the bugs occur in rarely used application code paths.
-.\"
-.SS Seccomp-specific BPF details
-Note the following BPF details specific to seccomp filters:
-.IP \[bu] 3
-The
-.B BPF_H
-and
-.B BPF_B
-size modifiers are not supported: all operations must load and store
-(4-byte) words
-.RB ( BPF_W ).
-.IP \[bu]
-To access the contents of the
-.I seccomp_data
-buffer, use the
-.B BPF_ABS
-addressing mode modifier.
-.IP \[bu]
-The
-.B BPF_LEN
-addressing mode modifier yields an immediate mode operand
-whose value is the size of the
-.I seccomp_data
-buffer.
-.SH EXAMPLES
-The program below accepts four or more arguments.
-The first three arguments are a system call number,
-a numeric architecture identifier, and an error number.
-The program uses these values to construct a BPF filter
-that is used at run time to perform the following checks:
-.IP \[bu] 3
-If the program is not running on the specified architecture,
-the BPF filter causes system calls to fail with the error
-.BR ENOSYS .
-.IP \[bu]
-If the program attempts to execute the system call with the specified number,
-the BPF filter causes the system call to fail, with
-.I errno
-being set to the specified error number.
-.P
-The remaining command-line arguments specify
-the pathname and additional arguments of a program
-that the example program should attempt to execute using
-.BR execv (3)
-(a library function that employs the
-.BR execve (2)
-system call).
-Some example runs of the program are shown below.
-.P
-First, we display the architecture that we are running on (x86-64)
-and then construct a shell function that looks up system call
-numbers on this architecture:
-.P
-.in +4n
-.EX
-$ \fBuname \-m\fP
-x86_64
-$ \fBsyscall_nr() {
- cat /usr/src/linux/arch/x86/syscalls/syscall_64.tbl | \e
- awk \[aq]$2 != "x32" && $3 == "\[aq]$1\[aq]" { print $1 }\[aq]
-}\fP
-.EE
-.in
-.P
-When the BPF filter rejects a system call (case [2] above),
-it causes the system call to fail with the error number
-specified on the command line.
-In the experiments shown here, we'll use error number 99:
-.P
-.in +4n
-.EX
-$ \fBerrno 99\fP
-EADDRNOTAVAIL 99 Cannot assign requested address
-.EE
-.in
-.P
-In the following example, we attempt to run the command
-.BR whoami (1),
-but the BPF filter rejects the
-.BR execve (2)
-system call, so that the command is not even executed:
-.P
-.in +4n
-.EX
-$ \fBsyscall_nr execve\fP
-59
-$ \fB./a.out\fP
-Usage: ./a.out <syscall_nr> <arch> <errno> <prog> [<args>]
-Hint for <arch>: AUDIT_ARCH_I386: 0x40000003
- AUDIT_ARCH_X86_64: 0xC000003E
-$ \fB./a.out 59 0xC000003E 99 /bin/whoami\fP
-execv: Cannot assign requested address
-.EE
-.in
-.P
-In the next example, the BPF filter rejects the
-.BR write (2)
-system call, so that, although it is successfully started, the
-.BR whoami (1)
-command is not able to write output:
-.P
-.in +4n
-.EX
-$ \fBsyscall_nr write\fP
-1
-$ \fB./a.out 1 0xC000003E 99 /bin/whoami\fP
-.EE
-.in
-.P
-In the final example,
-the BPF filter rejects a system call that is not used by the
-.BR whoami (1)
-command, so it is able to successfully execute and produce output:
-.P
-.in +4n
-.EX
-$ \fBsyscall_nr preadv\fP
-295
-$ \fB./a.out 295 0xC000003E 99 /bin/whoami\fP
-cecilia
-.EE
-.in
-.SS Program source
-.\" SRC BEGIN (seccomp.c)
-.EX
-#include <linux/audit.h>
-#include <linux/filter.h>
-#include <linux/seccomp.h>
-#include <stddef.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/prctl.h>
-#include <sys/syscall.h>
-#include <unistd.h>
-\&
-#define X32_SYSCALL_BIT 0x40000000
-#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
-\&
-static int
-install_filter(int syscall_nr, unsigned int t_arch, int f_errno)
-{
- unsigned int upper_nr_limit = 0xffffffff;
-\&
- /* Assume that AUDIT_ARCH_X86_64 means the normal x86\-64 ABI
- (in the x32 ABI, all system calls have bit 30 set in the
- \[aq]nr\[aq] field, meaning the numbers are >= X32_SYSCALL_BIT). */
- if (t_arch == AUDIT_ARCH_X86_64)
- upper_nr_limit = X32_SYSCALL_BIT \- 1;
-\&
- struct sock_filter filter[] = {
- /* [0] Load architecture from \[aq]seccomp_data\[aq] buffer into
- accumulator. */
- BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
- (offsetof(struct seccomp_data, arch))),
-\&
- /* [1] Jump forward 5 instructions if architecture does not
- match \[aq]t_arch\[aq]. */
- BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, t_arch, 0, 5),
-\&
- /* [2] Load system call number from \[aq]seccomp_data\[aq] buffer into
- accumulator. */
- BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
- (offsetof(struct seccomp_data, nr))),
-\&
- /* [3] Check ABI \- only needed for x86\-64 in deny\-list use
- cases. Use BPF_JGT instead of checking against the bit
- mask to avoid having to reload the syscall number. */
- BPF_JUMP(BPF_JMP | BPF_JGT | BPF_K, upper_nr_limit, 3, 0),
-\&
- /* [4] Jump forward 1 instruction if system call number
- does not match \[aq]syscall_nr\[aq]. */
- BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, syscall_nr, 0, 1),
-\&
- /* [5] Matching architecture and system call: don\[aq]t execute
- the system call, and return \[aq]f_errno\[aq] in \[aq]errno\[aq]. */
- BPF_STMT(BPF_RET | BPF_K,
- SECCOMP_RET_ERRNO | (f_errno & SECCOMP_RET_DATA)),
-\&
- /* [6] Destination of system call number mismatch: allow other
- system calls. */
- BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
-\&
- /* [7] Destination of architecture mismatch: kill process. */
- BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL_PROCESS),
- };
-\&
- struct sock_fprog prog = {
- .len = ARRAY_SIZE(filter),
- .filter = filter,
- };
-\&
- if (syscall(SYS_seccomp, SECCOMP_SET_MODE_FILTER, 0, &prog)) {
- perror("seccomp");
- return 1;
- }
-\&
- return 0;
-}
-\&
-int
-main(int argc, char *argv[])
-{
- if (argc < 5) {
- fprintf(stderr, "Usage: "
- "%s <syscall_nr> <arch> <errno> <prog> [<args>]\en"
- "Hint for <arch>: AUDIT_ARCH_I386: 0x%X\en"
- " AUDIT_ARCH_X86_64: 0x%X\en"
- "\en", argv[0], AUDIT_ARCH_I386, AUDIT_ARCH_X86_64);
- exit(EXIT_FAILURE);
- }
-\&
- if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
- perror("prctl");
- exit(EXIT_FAILURE);
- }
-\&
- if (install_filter(strtol(argv[1], NULL, 0),
- strtoul(argv[2], NULL, 0),
- strtol(argv[3], NULL, 0)))
- exit(EXIT_FAILURE);
-\&
- execv(argv[4], &argv[4]);
- perror("execv");
- exit(EXIT_FAILURE);
-}
-.EE
-.\" SRC END
-.SH SEE ALSO
-.BR bpfc (1),
-.BR strace (1),
-.BR bpf (2),
-.BR prctl (2),
-.BR ptrace (2),
-.BR seccomp_unotify (2),
-.BR sigaction (2),
-.BR proc (5),
-.BR signal (7),
-.BR socket (7)
-.P
-Various pages from the
-.I libseccomp
-library, including:
-.BR scmp_sys_resolver (1),
-.BR seccomp_export_bpf (3),
-.BR seccomp_init (3),
-.BR seccomp_load (3),
-and
-.BR seccomp_rule_add (3).
-.P
-The kernel source files
-.I Documentation/networking/filter.txt
-and
-.I Documentation/userspace\-api/seccomp_filter.rst
-.\" commit c061f33f35be0ccc80f4b8e0aea5dfd2ed7e01a3
-(or
-.I Documentation/prctl/seccomp_filter.txt
-before Linux 4.13).
-.P
-McCanne, S.\& and Jacobson, V.\& (1992)
-.IR "The BSD Packet Filter: A New Architecture for User-level Packet Capture" ,
-Proceedings of the USENIX Winter 1993 Conference
-.UR http://www.tcpdump.org/papers/bpf\-usenix93.pdf
-.UE
diff --git a/man2/seccomp_unotify.2 b/man2/seccomp_unotify.2
deleted file mode 100644
index f11dabf3a..000000000
--- a/man2/seccomp_unotify.2
+++ /dev/null
@@ -1,2011 +0,0 @@
-.\" Copyright (C) 2020 Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.TH seccomp_unotify 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-seccomp_unotify \- Seccomp user-space notification mechanism
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <linux/seccomp.h>
-.B #include <linux/filter.h>
-.B #include <linux/audit.h>
-.P
-.BI "int seccomp(unsigned int " operation ", unsigned int " flags \
-", void *" args );
-.P
-.B #include <sys/ioctl.h>
-.P
-.BI "int ioctl(int " fd ", SECCOMP_IOCTL_NOTIF_RECV,"
-.BI " struct seccomp_notif *" req );
-.BI "int ioctl(int " fd ", SECCOMP_IOCTL_NOTIF_SEND,"
-.BI " struct seccomp_notif_resp *" resp );
-.BI "int ioctl(int " fd ", SECCOMP_IOCTL_NOTIF_ID_VALID, __u64 *" id );
-.BI "int ioctl(int " fd ", SECCOMP_IOCTL_NOTIF_ADDFD,"
-.BI " struct seccomp_notif_addfd *" addfd );
-.fi
-.SH DESCRIPTION
-This page describes the user-space notification mechanism provided by the
-Secure Computing (seccomp) facility.
-As well as the use of the
-.B SECCOMP_FILTER_FLAG_NEW_LISTENER
-flag, the
-.B SECCOMP_RET_USER_NOTIF
-action value, and the
-.B SECCOMP_GET_NOTIF_SIZES
-operation described in
-.BR seccomp (2),
-this mechanism involves the use of a number of related
-.BR ioctl (2)
-operations (described below).
-.\"
-.SS Overview
-In conventional usage of a seccomp filter,
-the decision about how to treat a system call is made by the filter itself.
-By contrast, the user-space notification mechanism allows
-the seccomp filter to delegate
-the handling of the system call to another user-space process.
-Note that this mechanism is explicitly
-.B not
-intended as a method implementing security policy; see NOTES.
-.P
-In the discussion that follows,
-the thread(s) on which the seccomp filter is installed is (are)
-referred to as the
-.IR target ,
-and the process that is notified by the user-space notification
-mechanism is referred to as the
-.IR supervisor .
-.P
-A suitably privileged supervisor can use the user-space notification
-mechanism to perform actions on behalf of the target.
-The advantage of the user-space notification mechanism is that
-the supervisor will
-usually be able to retrieve information about the target and the
-performed system call that the seccomp filter itself cannot.
-(A seccomp filter is limited in the information it can obtain and
-the actions that it can perform because it
-is running on a virtual machine inside the kernel.)
-.P
-An overview of the steps performed by the target and the supervisor
-is as follows:
-.\"-------------------------------------
-.IP (1) 5
-The target establishes a seccomp filter in the usual manner,
-but with two differences:
-.RS
-.IP \[bu] 3
-The
-.BR seccomp (2)
-.I flags
-argument includes the flag
-.BR SECCOMP_FILTER_FLAG_NEW_LISTENER .
-Consequently, the return value of the (successful)
-.BR seccomp (2)
-call is a new "listening"
-file descriptor that can be used to receive notifications.
-Only one "listening" seccomp filter can be installed for a thread.
-.\" FIXME
-.\" Is the last sentence above correct?
-.\"
-.\" Kees Cook (25 Oct 2020) notes:
-.\"
-.\" I like this limitation, but I expect that it'll need to change in the
-.\" future. Even with LSMs, we see the need for arbitrary stacking, and the
-.\" idea of there being only 1 supervisor will eventually break down. Right
-.\" now there is only 1 because only container managers are using this
-.\" feature. But if some daemon starts using it to isolate some thread,
-.\" suddenly it might break if a container manager is trying to listen to it
-.\" too, etc. I expect it won't be needed soon, but I do think it'll change.
-.\"
-.IP \[bu]
-In cases where it is appropriate, the seccomp filter returns the action value
-.BR SECCOMP_RET_USER_NOTIF .
-This return value will trigger a notification event.
-.RE
-.\"-------------------------------------
-.IP (2)
-In order that the supervisor can obtain notifications
-using the listening file descriptor,
-(a duplicate of) that file descriptor must be passed from
-the target to the supervisor.
-One way in which this could be done is by passing the file descriptor
-over a UNIX domain socket connection between the target and the supervisor
-(using the
-.B SCM_RIGHTS
-ancillary message type described in
-.BR unix (7)).
-Another way to do this is through the use of
-.BR pidfd_getfd (2).
-.\" Jann Horn:
-.\" Instead of using unix domain sockets to send the fd to the
-.\" parent, I think you could also use clone3() with
-.\" flags==CLONE_FILES|SIGCHLD, dup2() the seccomp fd to an fd
-.\" that was reserved in the parent, call unshare(CLONE_FILES)
-.\" in the child after setting up the seccomp fd, and wake
-.\" up the parent with something like pthread_cond_signal()?
-.\" I'm not sure whether that'd look better or worse in the
-.\" end though, so maybe just ignore this comment.
-.\"-------------------------------------
-.IP (3)
-The supervisor will receive notification events
-on the listening file descriptor.
-These events are returned as structures of type
-.IR seccomp_notif .
-Because this structure and its size may evolve over kernel versions,
-the supervisor must first determine the size of this structure
-using the
-.BR seccomp (2)
-.B SECCOMP_GET_NOTIF_SIZES
-operation, which returns a structure of type
-.IR seccomp_notif_sizes .
-The supervisor allocates a buffer of size
-.I seccomp_notif_sizes.seccomp_notif
-bytes to receive notification events.
-In addition,the supervisor allocates another buffer of size
-.I seccomp_notif_sizes.seccomp_notif_resp
-bytes for the response (a
-.I struct seccomp_notif_resp
-structure)
-that it will provide to the kernel (and thus the target).
-.\"-------------------------------------
-.IP (4)
-The target then performs its workload,
-which includes system calls that will be controlled by the seccomp filter.
-Whenever one of these system calls causes the filter to return the
-.B SECCOMP_RET_USER_NOTIF
-action value, the kernel does
-.I not
-(yet) execute the system call;
-instead, execution of the target is temporarily blocked inside
-the kernel (in a sleep state that is interruptible by signals)
-and a notification event is generated on the listening file descriptor.
-.\"-------------------------------------
-.IP (5)
-The supervisor can now repeatedly monitor the
-listening file descriptor for
-.BR SECCOMP_RET_USER_NOTIF -triggered
-events.
-To do this, the supervisor uses the
-.B SECCOMP_IOCTL_NOTIF_RECV
-.BR ioctl (2)
-operation to read information about a notification event;
-this operation blocks until an event is available.
-The operation returns a
-.I seccomp_notif
-structure containing information about the system call
-that is being attempted by the target.
-(As described in NOTES,
-the file descriptor can also be monitored with
-.BR select (2),
-.BR poll (2),
-or
-.BR epoll (7).)
-.\" FIXME
-.\" Christian Brauner:
-.\"
-.\" Do we support O_NONBLOCK with SECCOMP_IOCTL_NOTIF_RECV and if
-.\" not should we?
-.\"
-.\" Michael Kerrisk:
-.\"
-.\" A quick test suggests that O_NONBLOCK has no effect on the blocking
-.\" behavior of SECCOMP_IOCTL_NOTIF_RECV.
-.
-.\"-------------------------------------
-.IP (6)
-The
-.I seccomp_notif
-structure returned by the
-.B SECCOMP_IOCTL_NOTIF_RECV
-operation includes the same information (a
-.I seccomp_data
-structure) that was passed to the seccomp filter.
-This information allows the supervisor to discover the system call number and
-the arguments for the target's system call.
-In addition, the notification event contains the ID of the thread
-that triggered the notification and a unique cookie value that
-is used in subsequent
-.B SECCOMP_IOCTL_NOTIF_ID_VALID
-and
-.B SECCOMP_IOCTL_NOTIF_SEND
-operations.
-.IP
-The information in the notification can be used to discover the
-values of pointer arguments for the target's system call.
-(This is something that can't be done from within a seccomp filter.)
-One way in which the supervisor can do this is to open the corresponding
-.IR /proc/ tid /mem
-file (see
-.BR proc (5))
-and read bytes from the location that corresponds to one of
-the pointer arguments whose value is supplied in the notification event.
-.\" Tycho Andersen mentioned that there are alternatives to /proc/PID/mem,
-.\" such as ptrace() and /proc/PID/map_files
-(The supervisor must be careful to avoid
-a race condition that can occur when doing this;
-see the description of the
-.B SECCOMP_IOCTL_NOTIF_ID_VALID
-.BR ioctl (2)
-operation below.)
-In addition,
-the supervisor can access other system information that is visible
-in user space but which is not accessible from a seccomp filter.
-.\"-------------------------------------
-.IP (7)
-Having obtained information as per the previous step,
-the supervisor may then choose to perform an action in response
-to the target's system call
-(which, as noted above, is not executed when the seccomp filter returns the
-.B SECCOMP_RET_USER_NOTIF
-action value).
-.IP
-One example use case here relates to containers.
-The target may be located inside a container where
-it does not have sufficient capabilities to mount a filesystem
-in the container's mount namespace.
-However, the supervisor may be a more privileged process that
-does have sufficient capabilities to perform the mount operation.
-.\"-------------------------------------
-.IP (8)
-The supervisor then sends a response to the notification.
-The information in this response is used by the kernel to construct
-a return value for the target's system call and provide
-a value that will be assigned to the
-.I errno
-variable of the target.
-.IP
-The response is sent using the
-.B SECCOMP_IOCTL_NOTIF_SEND
-.BR ioctl (2)
-operation, which is used to transmit a
-.I seccomp_notif_resp
-structure to the kernel.
-This structure includes a cookie value that the supervisor obtained in the
-.I seccomp_notif
-structure returned by the
-.B SECCOMP_IOCTL_NOTIF_RECV
-operation.
-This cookie value allows the kernel to associate the response with the
-target.
-This structure must include the cookie value that the supervisor
-obtained in the
-.I seccomp_notif
-structure returned by the
-.B SECCOMP_IOCTL_NOTIF_RECV
-operation;
-the cookie allows the kernel to associate the response with the target.
-.\"-------------------------------------
-.IP (9)
-Once the notification has been sent,
-the system call in the target thread unblocks,
-returning the information that was provided by the supervisor
-in the notification response.
-.\"-------------------------------------
-.P
-As a variation on the last two steps,
-the supervisor can send a response that tells the kernel that it
-should execute the target thread's system call; see the discussion of
-.BR SECCOMP_USER_NOTIF_FLAG_CONTINUE ,
-below.
-.\"
-.SH IOCTL OPERATIONS
-The following
-.BR ioctl (2)
-operations are supported by the seccomp user-space
-notification file descriptor.
-For each of these operations, the first (file descriptor) argument of
-.BR ioctl (2)
-is the listening file descriptor returned by a call to
-.BR seccomp (2)
-with the
-.B SECCOMP_FILTER_FLAG_NEW_LISTENER
-flag.
-.\"
-.SS SECCOMP_IOCTL_NOTIF_RECV
-The
-.B SECCOMP_IOCTL_NOTIF_RECV
-operation (available since Linux 5.0) is used to obtain a user-space
-notification event.
-If no such event is currently pending,
-the operation blocks until an event occurs.
-The third
-.BR ioctl (2)
-argument is a pointer to a structure of the following form
-which contains information about the event.
-This structure must be zeroed out before the call.
-.P
-.in +4n
-.EX
-struct seccomp_notif {
- __u64 id; /* Cookie */
- __u32 pid; /* TID of target thread */
- __u32 flags; /* Currently unused (0) */
- struct seccomp_data data; /* See seccomp(2) */
-};
-.EE
-.in
-.P
-The fields in this structure are as follows:
-.TP
-.I id
-This is a cookie for the notification.
-Each such cookie is guaranteed to be unique for the corresponding
-seccomp filter.
-.RS
-.IP \[bu] 3
-The cookie can be used with the
-.B SECCOMP_IOCTL_NOTIF_ID_VALID
-.BR ioctl (2)
-operation described below.
-.IP \[bu]
-When returning a notification response to the kernel,
-the supervisor must include the cookie value in the
-.I seccomp_notif_resp
-structure that is specified as the argument of the
-.B SECCOMP_IOCTL_NOTIF_SEND
-operation.
-.RE
-.TP
-.I pid
-This is the thread ID of the target thread that triggered
-the notification event.
-.TP
-.I flags
-This is a bit mask of flags providing further information on the event.
-In the current implementation, this field is always zero.
-.TP
-.I data
-This is a
-.I seccomp_data
-structure containing information about the system call that
-triggered the notification.
-This is the same structure that is passed to the seccomp filter.
-See
-.BR seccomp (2)
-for details of this structure.
-.P
-On success, this operation returns 0; on failure, \-1 is returned, and
-.I errno
-is set to indicate the cause of the error.
-This operation can fail with the following errors:
-.TP
-.BR EINVAL " (since Linux 5.5)"
-.\" commit 2882d53c9c6f3b8311d225062522f03772cf0179
-The
-.I seccomp_notif
-structure that was passed to the call contained nonzero fields.
-.TP
-.B ENOENT
-The target thread was killed by a signal as the notification information
-was being generated,
-or the target's (blocked) system call was interrupted by a signal handler.
-.\" FIXME
-.\" From my experiments,
-.\" it appears that if a SECCOMP_IOCTL_NOTIF_RECV is done after
-.\" the target thread terminates, then the ioctl() simply
-.\" blocks (rather than returning an error to indicate that the
-.\" target no longer exists).
-.\"
-.\" I found that surprising, and it required some contortions in
-.\" the example program. It was not possible to code my SIGCHLD
-.\" handler (which reaps the zombie when the worker/target
-.\" terminates) to simply set a flag checked in the main
-.\" handleNotifications() loop, since this created an
-.\" unavoidable race where the child might terminate just after
-.\" I had checked the flag, but before I blocked (forever!) in the
-.\" SECCOMP_IOCTL_NOTIF_RECV operation. Instead, I had to code
-.\" the signal handler to simply call _exit(2) in order to
-.\" terminate the parent process (the supervisor).
-.\"
-.\" Is this expected behavior? It seems to me rather
-.\" desirable that SECCOMP_IOCTL_NOTIF_RECV should give an error
-.\" if the target has terminated.
-.\"
-.\" Jann posted a patch to rectify this, but there was no response
-.\" (Lore link: https://bit.ly/3jvUBxk) to his question about fixing
-.\" this issue. (I've tried building with the patch, but encountered
-.\" an issue with the target process entering D state after a signal.)
-.\"
-.\" For now, this behavior is documented in BUGS.
-.\"
-.\" Kees Cook commented: Let's change [this] ASAP!
-.\"
-.SS SECCOMP_IOCTL_NOTIF_ID_VALID
-The
-.B SECCOMP_IOCTL_NOTIF_ID_VALID
-operation (available since Linux 5.0) is used to check that a notification ID
-returned by an earlier
-.B SECCOMP_IOCTL_NOTIF_RECV
-operation is still valid
-(i.e., that the target still exists and its system call
-is still blocked waiting for a response).
-.P
-The third
-.BR ioctl (2)
-argument is a pointer to the cookie
-.RI ( id )
-returned by the
-.B SECCOMP_IOCTL_NOTIF_RECV
-operation.
-.P
-This operation is necessary to avoid race conditions that can occur when the
-.I pid
-returned by the
-.B SECCOMP_IOCTL_NOTIF_RECV
-operation terminates, and that process ID is reused by another process.
-An example of this kind of race is the following
-.IP (1) 5
-A notification is generated on the listening file descriptor.
-The returned
-.I seccomp_notif
-contains the TID of the target thread (in the
-.I pid
-field of the structure).
-.IP (2)
-The target terminates.
-.IP (3)
-Another thread or process is created on the system that by chance reuses the
-TID that was freed when the target terminated.
-.IP (4)
-The supervisor
-.BR open (2)s
-the
-.IR /proc/ tid /mem
-file for the TID obtained in step 1, with the intention of (say)
-inspecting the memory location(s) that containing the argument(s) of
-the system call that triggered the notification in step 1.
-.P
-In the above scenario, the risk is that the supervisor may try
-to access the memory of a process other than the target.
-This race can be avoided by following the call to
-.BR open (2)
-with a
-.B SECCOMP_IOCTL_NOTIF_ID_VALID
-operation to verify that the process that generated the notification
-is still alive.
-(Note that if the target terminates after the latter step,
-a subsequent
-.BR read (2)
-from the file descriptor may return 0, indicating end of file.)
-.\" Jann Horn:
-.\" the PID can be reused, but the /proc/$pid directory is
-.\" internally not associated with the numeric PID, but,
-.\" conceptually speaking, with a specific incarnation of the
-.\" PID, or something like that. (Actually, it is associated
-.\" with the "struct pid", which is not reused, instead of the
-.\" numeric PID.
-.P
-See NOTES for a discussion of other cases where
-.B SECCOMP_IOCTL_NOTIF_ID_VALID
-checks must be performed.
-.P
-On success (i.e., the notification ID is still valid),
-this operation returns 0.
-On failure (i.e., the notification ID is no longer valid),
-\-1 is returned, and
-.I errno
-is set to
-.BR ENOENT .
-.\"
-.SS SECCOMP_IOCTL_NOTIF_SEND
-The
-.B SECCOMP_IOCTL_NOTIF_SEND
-operation (available since Linux 5.0)
-is used to send a notification response back to the kernel.
-The third
-.BR ioctl (2)
-argument of this structure is a pointer to a structure of the following form:
-.P
-.in +4n
-.EX
-struct seccomp_notif_resp {
- __u64 id; /* Cookie value */
- __s64 val; /* Success return value */
- __s32 error; /* 0 (success) or negative error number */
- __u32 flags; /* See below */
-};
-.EE
-.in
-.P
-The fields of this structure are as follows:
-.TP
-.I id
-This is the cookie value that was obtained using the
-.B SECCOMP_IOCTL_NOTIF_RECV
-operation.
-This cookie value allows the kernel to correctly associate this response
-with the system call that triggered the user-space notification.
-.TP
-.I val
-This is the value that will be used for a spoofed
-success return for the target's system call; see below.
-.TP
-.I error
-This is the value that will be used as the error number
-.RI ( errno )
-for a spoofed error return for the target's system call; see below.
-.TP
-.I flags
-This is a bit mask that includes zero or more of the following flags:
-.RS
-.TP
-.BR SECCOMP_USER_NOTIF_FLAG_CONTINUE " (since Linux 5.5)"
-Tell the kernel to execute the target's system call.
-.\" commit fb3c5386b382d4097476ce9647260fc89b34afdb
-.RE
-.P
-Two kinds of response are possible:
-.IP \[bu] 3
-A response to the kernel telling it to execute the
-target's system call.
-In this case, the
-.I flags
-field includes
-.B SECCOMP_USER_NOTIF_FLAG_CONTINUE
-and the
-.I error
-and
-.I val
-fields must be zero.
-.IP
-This kind of response can be useful in cases where the supervisor needs
-to do deeper analysis of the target's system call than is possible
-from a seccomp filter (e.g., examining the values of pointer arguments),
-and, having decided that the system call does not require emulation
-by the supervisor, the supervisor wants the system call to
-be executed normally in the target.
-.IP
-The
-.B SECCOMP_USER_NOTIF_FLAG_CONTINUE
-flag should be used with caution; see NOTES.
-.IP \[bu]
-A spoofed return value for the target's system call.
-In this case, the kernel does not execute the target's system call,
-instead causing the system call to return a spoofed value as specified by
-fields of the
-.I seccomp_notif_resp
-structure.
-The supervisor should set the fields of this structure as follows:
-.RS
-.IP \[bu] 3
-.I flags
-does not contain
-.BR SECCOMP_USER_NOTIF_FLAG_CONTINUE .
-.IP \[bu]
-.I error
-is set either to 0 for a spoofed "success" return or to a negative
-error number for a spoofed "failure" return.
-In the former case, the kernel causes the target's system call
-to return the value specified in the
-.I val
-field.
-In the latter case, the kernel causes the target's system call
-to return \-1, and
-.I errno
-is assigned the negated
-.I error
-value.
-.IP \[bu]
-.I val
-is set to a value that will be used as the return value for a spoofed
-"success" return for the target's system call.
-The value in this field is ignored if the
-.I error
-field contains a nonzero value.
-.\" FIXME
-.\" Kees Cook suggested:
-.\"
-.\" Strictly speaking, this is architecture specific, but
-.\" all architectures do it this way. Should seccomp enforce
-.\" val == 0 when err != 0 ?
-.\"
-.\" Christian Brauner
-.\"
-.\" Feels like it should, at least for the SEND ioctl where we already
-.\" verify that val and err are both 0 when CONTINUE is specified (as you
-.\" pointed out correctly above).
-.RE
-.P
-On success, this operation returns 0; on failure, \-1 is returned, and
-.I errno
-is set to indicate the cause of the error.
-This operation can fail with the following errors:
-.TP
-.B EINPROGRESS
-A response to this notification has already been sent.
-.TP
-.B EINVAL
-An invalid value was specified in the
-.I flags field.
-.TP
-.B
-.B EINVAL
-The
-.I flags
-field contained
-.BR SECCOMP_USER_NOTIF_FLAG_CONTINUE ,
-and the
-.I error
-or
-.I val
-field was not zero.
-.TP
-.B ENOENT
-The blocked system call in the target
-has been interrupted by a signal handler
-or the target has terminated.
-.\" Jann Horn notes:
-.\" you could also get this [ENOENT] if a response has already
-.\" been sent, instead of EINPROGRESS - the only difference is
-.\" whether the target thread has picked up the response yet
-.\"
-.SS SECCOMP_IOCTL_NOTIF_ADDFD
-The
-.B SECCOMP_IOCTL_NOTIF_ADDFD
-operation (available since Linux 5.9)
-allows the supervisor to install a file descriptor
-into the target's file descriptor table.
-Much like the use of
-.B SCM_RIGHTS
-messages described in
-.BR unix (7),
-this operation is semantically equivalent to duplicating
-a file descriptor from the supervisor's file descriptor table
-into the target's file descriptor table.
-.P
-The
-.B SECCOMP_IOCTL_NOTIF_ADDFD
-operation permits the supervisor to emulate a target system call (such as
-.BR socket (2)
-or
-.BR openat (2))
-that generates a file descriptor.
-The supervisor can perform the system call that generates
-the file descriptor (and associated open file description)
-and then use this operation to allocate
-a file descriptor that refers to the same open file description in the target.
-(For an explanation of open file descriptions, see
-.BR open (2).)
-.P
-Once this operation has been performed,
-the supervisor can close its copy of the file descriptor.
-.P
-In the target,
-the received file descriptor is subject to the same
-Linux Security Module (LSM) checks as are applied to a file descriptor
-that is received in an
-.B SCM_RIGHTS
-ancillary message.
-If the file descriptor refers to a socket,
-it inherits the cgroup version 1 network controller settings
-.RI ( classid
-and
-.IR netprioidx )
-of the target.
-.P
-The third
-.BR ioctl (2)
-argument is a pointer to a structure of the following form:
-.P
-.in +4n
-.EX
-struct seccomp_notif_addfd {
- __u64 id; /* Cookie value */
- __u32 flags; /* Flags */
- __u32 srcfd; /* Local file descriptor number */
- __u32 newfd; /* 0 or desired file descriptor
- number in target */
- __u32 newfd_flags; /* Flags to set on target file
- descriptor */
-};
-.EE
-.in
-.P
-The fields in this structure are as follows:
-.TP
-.I id
-This field should be set to the notification ID
-(cookie value) that was obtained via
-.BR SECCOMP_IOCTL_NOTIF_RECV .
-.TP
-.I flags
-This field is a bit mask of flags that modify the behavior of the operation.
-Currently, only one flag is supported:
-.RS
-.TP
-.B SECCOMP_ADDFD_FLAG_SETFD
-When allocating the file descriptor in the target,
-use the file descriptor number specified in the
-.I newfd
-field.
-.TP
-.BR SECCOMP_ADDFD_FLAG_SEND " (since Linux 5.14)"
-.\" commit 0ae71c7720e3ae3aabd2e8a072d27f7bd173d25c
-Perform the equivalent of
-.B SECCOMP_IOCTL_NOTIF_ADDFD
-plus
-.B SECCOMP_IOCTL_NOTIF_SEND
-as an atomic operation.
-On successful invocation, the target process's
-.I errno
-will be 0
-and the return value will be the file descriptor number
-that was allocated in the target.
-If allocating the file descriptor in the target fails,
-the target's system call continues to be blocked
-until a successful response is sent.
-.RE
-.TP
-.I srcfd
-This field should be set to the number of the file descriptor
-in the supervisor that is to be duplicated.
-.TP
-.I newfd
-This field determines which file descriptor number is allocated in the target.
-If the
-.B SECCOMP_ADDFD_FLAG_SETFD
-flag is set,
-then this field specifies which file descriptor number should be allocated.
-If this file descriptor number is already open in the target,
-it is atomically closed and reused.
-If the descriptor duplication fails due to an LSM check, or if
-.I srcfd
-is not a valid file descriptor,
-the file descriptor
-.I newfd
-will not be closed in the target process.
-.IP
-If the
-.B SECCOMP_ADDFD_FLAG_SETFD
-flag it not set, then this field must be 0,
-and the kernel allocates the lowest unused file descriptor number
-in the target.
-.TP
-.I newfd_flags
-This field is a bit mask specifying flags that should be set on
-the file descriptor that is received in the target process.
-Currently, only the following flag is implemented:
-.RS
-.TP
-.B O_CLOEXEC
-Set the close-on-exec flag on the received file descriptor.
-.RE
-.P
-On success, this
-.BR ioctl (2)
-call returns the number of the file descriptor that was allocated
-in the target.
-Assuming that the emulated system call is one that returns
-a file descriptor as its function result (e.g.,
-.BR socket (2)),
-this value can be used as the return value
-.RI ( resp.val )
-that is supplied in the response that is subsequently sent with the
-.B SECCOMP_IOCTL_NOTIF_SEND
-operation.
-.P
-On error, \-1 is returned and
-.I errno
-is set to indicate the cause of the error.
-.P
-This operation can fail with the following errors:
-.TP
-.B EBADF
-Allocating the file descriptor in the target would cause the target's
-.B RLIMIT_NOFILE
-limit to be exceeded (see
-.BR getrlimit (2)).
-.TP
-.B EBUSY
-If the flag
-.B SECCOMP_IOCTL_NOTIF_SEND
-is used, this means the operation can't proceed until other
-.B SECCOMP_IOCTL_NOTIF_ADDFD
-requests are processed.
-.TP
-.B EINPROGRESS
-The user-space notification specified in the
-.I id
-field exists but has not yet been fetched (by a
-.BR SECCOMP_IOCTL_NOTIF_RECV )
-or has already been responded to (by a
-.BR SECCOMP_IOCTL_NOTIF_SEND ).
-.TP
-.B EINVAL
-An invalid flag was specified in the
-.I flags
-or
-.I newfd_flags
-field, or the
-.I newfd
-field is nonzero and the
-.B SECCOMP_ADDFD_FLAG_SETFD
-flag was not specified in the
-.I flags
-field.
-.TP
-.B EMFILE
-The file descriptor number specified in
-.I newfd
-exceeds the limit specified in
-.IR /proc/sys/fs/nr_open .
-.TP
-.B ENOENT
-The blocked system call in the target
-has been interrupted by a signal handler
-or the target has terminated.
-.P
-Here is some sample code (with error handling omitted) that uses the
-.B SECCOMP_ADDFD_FLAG_SETFD
-operation (here, to emulate a call to
-.BR openat (2)):
-.P
-.EX
-.in +4n
-int fd, removeFd;
-\&
-fd = openat(req\->data.args[0], path, req\->data.args[2],
- req\->data.args[3]);
-\&
-struct seccomp_notif_addfd addfd;
-addfd.id = req\->id; /* Cookie from SECCOMP_IOCTL_NOTIF_RECV */
-addfd.srcfd = fd;
-addfd.newfd = 0;
-addfd.flags = 0;
-addfd.newfd_flags = O_CLOEXEC;
-\&
-targetFd = ioctl(notifyFd, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd);
-\&
-close(fd); /* No longer needed in supervisor */
-\&
-struct seccomp_notif_resp *resp;
- /* Code to allocate 'resp' omitted */
-resp\->id = req\->id;
-resp\->error = 0; /* "Success" */
-resp\->val = targetFd;
-resp\->flags = 0;
-ioctl(notifyFd, SECCOMP_IOCTL_NOTIF_SEND, resp);
-.in
-.EE
-.SH NOTES
-One example use case for the user-space notification
-mechanism is to allow a container manager
-(a process which is typically running with more privilege than
-the processes inside the container)
-to mount block devices or create device nodes for the container.
-The mount use case provides an example of where the
-.B SECCOMP_USER_NOTIF_FLAG_CONTINUE
-.BR ioctl (2)
-operation is useful.
-Upon receiving a notification for the
-.BR mount (2)
-system call, the container manager (the "supervisor") can distinguish
-a request to mount a block filesystem
-(which would not be possible for a "target" process inside the container)
-and mount that file system.
-If, on the other hand, the container manager detects that the operation
-could be performed by the process inside the container
-(e.g., a mount of a
-.BR tmpfs (5)
-filesystem), it can notify the kernel that the target process's
-.BR mount (2)
-system call can continue.
-.\"
-.SS select()/poll()/epoll semantics
-The file descriptor returned when
-.BR seccomp (2)
-is employed with the
-.B SECCOMP_FILTER_FLAG_NEW_LISTENER
-flag can be monitored using
-.BR poll (2),
-.BR epoll (7),
-and
-.BR select (2).
-These interfaces indicate that the file descriptor is ready as follows:
-.IP \[bu] 3
-When a notification is pending,
-these interfaces indicate that the file descriptor is readable.
-Following such an indication, a subsequent
-.B SECCOMP_IOCTL_NOTIF_RECV
-.BR ioctl (2)
-will not block, returning either information about a notification
-or else failing with the error
-.B EINTR
-if the target has been killed by a signal or its system call
-has been interrupted by a signal handler.
-.IP \[bu]
-After the notification has been received (i.e., by the
-.B SECCOMP_IOCTL_NOTIF_RECV
-.BR ioctl (2)
-operation), these interfaces indicate that the file descriptor is writable,
-meaning that a notification response can be sent using the
-.B SECCOMP_IOCTL_NOTIF_SEND
-.BR ioctl (2)
-operation.
-.IP \[bu]
-After the last thread using the filter has terminated and been reaped using
-.BR waitpid (2)
-(or similar),
-the file descriptor indicates an end-of-file condition (readable in
-.BR select (2);
-.BR POLLHUP / EPOLLHUP
-in
-.BR poll (2)/
-.BR epoll_wait (2)).
-.SS Design goals; use of SECCOMP_USER_NOTIF_FLAG_CONTINUE
-The intent of the user-space notification feature is
-to allow system calls to be performed on behalf of the target.
-The target's system call should either be handled by the supervisor or
-allowed to continue normally in the kernel (where standard security
-policies will be applied).
-.P
-.BR "Note well" :
-this mechanism must not be used to make security policy decisions
-about the system call,
-which would be inherently race-prone for reasons described next.
-.P
-The
-.B SECCOMP_USER_NOTIF_FLAG_CONTINUE
-flag must be used with caution.
-If set by the supervisor, the target's system call will continue.
-However, there is a time-of-check, time-of-use race here,
-since an attacker could exploit the interval of time where the target is
-blocked waiting on the "continue" response to do things such as
-rewriting the system call arguments.
-.P
-Note furthermore that a user-space notifier can be bypassed if
-the existing filters allow the use of
-.BR seccomp (2)
-or
-.BR prctl (2)
-to install a filter that returns an action value with a higher precedence than
-.B SECCOMP_RET_USER_NOTIF
-(see
-.BR seccomp (2)).
-.P
-It should thus be absolutely clear that the
-seccomp user-space notification mechanism
-.B can not
-be used to implement a security policy!
-It should only ever be used in scenarios where a more privileged process
-supervises the system calls of a lesser privileged target to
-get around kernel-enforced security restrictions when
-the supervisor deems this safe.
-In other words,
-in order to continue a system call, the supervisor should be sure that
-another security mechanism or the kernel itself will sufficiently block
-the system call if its arguments are rewritten to something unsafe.
-.\"
-.SS Caveats regarding the use of \fI/proc/\fPtid\fI/mem\fP
-The discussion above noted the need to use the
-.B SECCOMP_IOCTL_NOTIF_ID_VALID
-.BR ioctl (2)
-when opening the
-.IR /proc/ tid /mem
-file of the target
-to avoid the possibility of accessing the memory of the wrong process
-in the event that the target terminates and its ID
-is recycled by another (unrelated) thread.
-However, the use of this
-.BR ioctl (2)
-operation is also necessary in other situations,
-as explained in the following paragraphs.
-.P
-Consider the following scenario, where the supervisor
-tries to read the pathname argument of a target's blocked
-.BR mount (2)
-system call:
-.IP (1) 5
-From one of its functions
-.RI ( func() ),
-the target calls
-.BR mount (2),
-which triggers a user-space notification and causes the target to block.
-.IP (2)
-The supervisor receives the notification, opens
-.IR /proc/ tid /mem ,
-and (successfully) performs the
-.B SECCOMP_IOCTL_NOTIF_ID_VALID
-check.
-.IP (3)
-The target receives a signal, which causes the
-.BR mount (2)
-to abort.
-.IP (4)
-The signal handler executes in the target, and returns.
-.IP (5)
-Upon return from the handler, the execution of
-.I func()
-resumes, and it returns (and perhaps other functions are called,
-overwriting the memory that had been used for the stack frame of
-.IR func() ).
-.IP (6)
-Using the address provided in the notification information,
-the supervisor reads from the target's memory location that used to
-contain the pathname.
-.IP (7)
-The supervisor now calls
-.BR mount (2)
-with some arbitrary bytes obtained in the previous step.
-.P
-The conclusion from the above scenario is this:
-since the target's blocked system call may be interrupted by a signal handler,
-the supervisor must be written to expect that the
-target may abandon its system call at
-.B any
-time;
-in such an event, any information that the supervisor obtained from
-the target's memory must be considered invalid.
-.P
-To prevent such scenarios,
-every read from the target's memory must be separated from use of
-the bytes so obtained by a
-.B SECCOMP_IOCTL_NOTIF_ID_VALID
-check.
-In the above example, the check would be placed between the two final steps.
-An example of such a check is shown in EXAMPLES.
-.P
-Following on from the above, it should be clear that
-a write by the supervisor into the target's memory can
-.B never
-be considered safe.
-.\"
-.SS Caveats regarding blocking system calls
-Suppose that the target performs a blocking system call (e.g.,
-.BR accept (2))
-that the supervisor should handle.
-The supervisor might then in turn execute the same blocking system call.
-.P
-In this scenario,
-it is important to note that if the target's system call is now
-interrupted by a signal, the supervisor is
-.I not
-informed of this.
-If the supervisor does not take suitable steps to
-actively discover that the target's system call has been canceled,
-various difficulties can occur.
-Taking the example of
-.BR accept (2),
-the supervisor might remain blocked in its
-.BR accept (2)
-holding a port number that the target
-(which, after the interruption by the signal handler,
-perhaps closed its listening socket) might expect to be able to reuse in a
-.BR bind (2)
-call.
-.P
-Therefore, when the supervisor wishes to emulate a blocking system call,
-it must do so in such a way that it gets informed if the target's
-system call is interrupted by a signal handler.
-For example, if the supervisor itself executes the same
-blocking system call, then it could employ a separate thread
-that uses the
-.B SECCOMP_IOCTL_NOTIF_ID_VALID
-operation to check if the target is still blocked in its system call.
-Alternatively, in the
-.BR accept (2)
-example, the supervisor might use
-.BR poll (2)
-to monitor both the notification file descriptor
-(so as to discover when the target's
-.BR accept (2)
-call has been interrupted) and the listening file descriptor
-(so as to know when a connection is available).
-.P
-If the target's system call is interrupted,
-the supervisor must take care to release resources (e.g., file descriptors)
-that it acquired on behalf of the target.
-.\"
-.SS Interaction with SA_RESTART signal handlers
-Consider the following scenario:
-.IP (1) 5
-The target process has used
-.BR sigaction (2)
-to install a signal handler with the
-.B SA_RESTART
-flag.
-.IP (2)
-The target has made a system call that triggered a seccomp
-user-space notification and the target is currently blocked
-until the supervisor sends a notification response.
-.IP (3)
-A signal is delivered to the target and the signal handler is executed.
-.IP (4)
-When (if) the supervisor attempts to send a notification response, the
-.B SECCOMP_IOCTL_NOTIF_SEND
-.BR ioctl (2))
-operation will fail with the
-.B ENOENT
-error.
-.P
-In this scenario, the kernel will restart the target's system call.
-Consequently, the supervisor will receive another user-space notification.
-Thus, depending on how many times the blocked system call
-is interrupted by a signal handler,
-the supervisor may receive multiple notifications for
-the same instance of a system call in the target.
-.P
-One oddity is that system call restarting as described in this scenario
-will occur even for the blocking system calls listed in
-.BR signal (7)
-that would
-.B never
-normally be restarted by the
-.B SA_RESTART
-flag.
-.\" FIXME
-.\" About the above, Kees Cook commented:
-.\"
-.\" Does this need fixing? I imagine the correct behavior for this case
-.\" would be a response to _SEND of EINPROGRESS and the target would see
-.\" EINTR normally?
-.\"
-.\" I mean, it's not like seccomp doesn't already expose weirdness with
-.\" syscall restarts. Not even arm64 compat agrees[3] with arm32 in this
-.\" regard. :(
-.
-.\" FIXME
-.\" Michael Kerrisk:
-.\" I wonder about the effect of this oddity for system calls that
-.\" are normally nonrestartable because they have timeouts. My
-.\" understanding is that the kernel doesn't restart those system
-.\" calls because it's impossible for the kernel to restart the call
-.\" with the right timeout value. I wonder what happens when those
-.\" system calls are restarted in the scenario we're discussing.)
-.P
-Furthermore, if the supervisor response is a file descriptor
-added with
-.BR SECCOMP_IOCTL_NOTIF_ADDFD ,
-then the flag
-.B SECCOMP_ADDFD_FLAG_SEND
-can be used to atomically add the file descriptor and return that value,
-making sure no file descriptors are inadvertently leaked into the target.
-.SH BUGS
-If a
-.B SECCOMP_IOCTL_NOTIF_RECV
-.BR ioctl (2)
-operation
-.\" or a poll/epoll/select
-is performed after the target terminates, then the
-.BR ioctl (2)
-call simply blocks (rather than returning an error to indicate that the
-target no longer exists).
-.\" FIXME
-.\" Comment from Kees Cook:
-.\"
-.\" I want this fixed. It caused me no end of pain when building the
-.\" selftests, and ended up spawning my implementing a global test timeout
-.\" in kselftest. :P Before the usage counter refactor, there was no sane
-.\" way to deal with this, but now I think we're close.
-.\"
-.SH EXAMPLES
-The (somewhat contrived) program shown below demonstrates the use of
-the interfaces described in this page.
-The program creates a child process that serves as the "target" process.
-The child process installs a seccomp filter that returns the
-.B SECCOMP_RET_USER_NOTIF
-action value if a call is made to
-.BR mkdir (2).
-The child process then calls
-.BR mkdir (2)
-once for each of the supplied command-line arguments,
-and reports the result returned by the call.
-After processing all arguments, the child process terminates.
-.P
-The parent process acts as the supervisor, listening for the notifications
-that are generated when the target process calls
-.BR mkdir (2).
-When such a notification occurs,
-the supervisor examines the memory of the target process (using
-.IR /proc/ pid /mem )
-to discover the pathname argument that was supplied to the
-.BR mkdir (2)
-call, and performs one of the following actions:
-.IP \[bu] 3
-If the pathname begins with the prefix "/tmp/",
-then the supervisor attempts to create the specified directory,
-and then spoofs a return for the target process based on the return
-value of the supervisor's
-.BR mkdir (2)
-call.
-In the event that that call succeeds,
-the spoofed success return value is the length of the pathname.
-.IP \[bu]
-If the pathname begins with "./" (i.e., it is a relative pathname),
-the supervisor sends a
-.B SECCOMP_USER_NOTIF_FLAG_CONTINUE
-response to the kernel to say that the kernel should execute
-the target process's
-.BR mkdir (2)
-call.
-.IP \[bu]
-If the pathname begins with some other prefix,
-the supervisor spoofs an error return for the target process,
-so that the target process's
-.BR mkdir (2)
-call appears to fail with the error
-.B EOPNOTSUPP
-("Operation not supported").
-Additionally, if the specified pathname is exactly "/bye",
-then the supervisor terminates.
-.P
-This program can be used to demonstrate various aspects of the
-behavior of the seccomp user-space notification mechanism.
-To help aid such demonstrations,
-the program logs various messages to show the operation
-of the target process (lines prefixed "T:") and the supervisor
-(indented lines prefixed "S:").
-.P
-In the following example, the target attempts to create the directory
-.IR /tmp/x .
-Upon receiving the notification, the supervisor creates the directory on the
-target's behalf,
-and spoofs a success return to be received by the target process's
-.BR mkdir (2)
-call.
-.P
-.in +4n
-.EX
-$ \fB./seccomp_unotify /tmp/x\fP
-T: PID = 23168
-\&
-T: about to mkdir("/tmp/x")
- S: got notification (ID 0x17445c4a0f4e0e3c) for PID 23168
- S: executing: mkdir("/tmp/x", 0700)
- S: success! spoofed return = 6
- S: sending response (flags = 0; val = 6; error = 0)
-T: SUCCESS: mkdir(2) returned 6
-\&
-T: terminating
- S: target has terminated; bye
-.EE
-.in
-.P
-In the above output, note that the spoofed return value seen by the target
-process is 6 (the length of the pathname
-.IR /tmp/x ),
-whereas a normal
-.BR mkdir (2)
-call returns 0 on success.
-.P
-In the next example, the target attempts to create a directory using the
-relative pathname
-.IR ./sub .
-Since this pathname starts with "./",
-the supervisor sends a
-.B SECCOMP_USER_NOTIF_FLAG_CONTINUE
-response to the kernel,
-and the kernel then (successfully) executes the target process's
-.BR mkdir (2)
-call.
-.P
-.in +4n
-.EX
-$ \fB./seccomp_unotify ./sub\fP
-T: PID = 23204
-\&
-T: about to mkdir("./sub")
- S: got notification (ID 0xddb16abe25b4c12) for PID 23204
- S: target can execute system call
- S: sending response (flags = 0x1; val = 0; error = 0)
-T: SUCCESS: mkdir(2) returned 0
-\&
-T: terminating
- S: target has terminated; bye
-.EE
-.in
-.P
-If the target process attempts to create a directory with
-a pathname that doesn't start with "." and doesn't begin with the prefix
-"/tmp/", then the supervisor spoofs an error return
-.RB ( EOPNOTSUPP ,
-"Operation not supported")
-for the target's
-.BR mkdir (2)
-call (which is not executed):
-.P
-.in +4n
-.EX
-$ \fB./seccomp_unotify /xxx\fP
-T: PID = 23178
-\&
-T: about to mkdir("/xxx")
- S: got notification (ID 0xe7dc095d1c524e80) for PID 23178
- S: spoofing error response (Operation not supported)
- S: sending response (flags = 0; val = 0; error = \-95)
-T: ERROR: mkdir(2): Operation not supported
-\&
-T: terminating
- S: target has terminated; bye
-.EE
-.in
-.P
-In the next example,
-the target process attempts to create a directory with the pathname
-.BR /tmp/nosuchdir/b .
-Upon receiving the notification,
-the supervisor attempts to create that directory, but the
-.BR mkdir (2)
-call fails because the directory
-.B /tmp/nosuchdir
-does not exist.
-Consequently, the supervisor spoofs an error return that passes the error
-that it received back to the target process's
-.BR mkdir (2)
-call.
-.P
-.in +4n
-.EX
-$ \fB./seccomp_unotify /tmp/nosuchdir/b\fP
-T: PID = 23199
-\&
-T: about to mkdir("/tmp/nosuchdir/b")
- S: got notification (ID 0x8744454293506046) for PID 23199
- S: executing: mkdir("/tmp/nosuchdir/b", 0700)
- S: failure! (errno = 2; No such file or directory)
- S: sending response (flags = 0; val = 0; error = \-2)
-T: ERROR: mkdir(2): No such file or directory
-\&
-T: terminating
- S: target has terminated; bye
-.EE
-.in
-.P
-If the supervisor receives a notification and sees that the
-argument of the target's
-.BR mkdir (2)
-is the string "/bye", then (as well as spoofing an
-.B EOPNOTSUPP
-error), the supervisor terminates.
-If the target process subsequently executes another
-.BR mkdir (2)
-that triggers its seccomp filter to return the
-.B SECCOMP_RET_USER_NOTIF
-action value, then the kernel causes the target process's system call to
-fail with the error
-.B ENOSYS
-("Function not implemented").
-This is demonstrated by the following example:
-.P
-.in +4n
-.EX
-$ \fB./seccomp_unotify /bye /tmp/y\fP
-T: PID = 23185
-\&
-T: about to mkdir("/bye")
- S: got notification (ID 0xa81236b1d2f7b0f4) for PID 23185
- S: spoofing error response (Operation not supported)
- S: sending response (flags = 0; val = 0; error = \-95)
- S: terminating **********
-T: ERROR: mkdir(2): Operation not supported
-\&
-T: about to mkdir("/tmp/y")
-T: ERROR: mkdir(2): Function not implemented
-\&
-T: terminating
-.EE
-.in
-.\"
-.SS Program source
-.\" SRC BEGIN (seccomp_unotify.c)
-.EX
-#define _GNU_SOURCE
-#include <err.h>
-#include <errno.h>
-#include <fcntl.h>
-#include <limits.h>
-#include <linux/audit.h>
-#include <linux/filter.h>
-#include <linux/seccomp.h>
-#include <signal.h>
-#include <stdbool.h>
-#include <stddef.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-#include <sys/prctl.h>
-#include <sys/socket.h>
-#include <sys/stat.h>
-#include <sys/syscall.h>
-#include <sys/types.h>
-#include <sys/un.h>
-#include <unistd.h>
-\&
-#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
-\&
-/* Send the file descriptor \[aq]fd\[aq] over the connected UNIX domain socket
- \[aq]sockfd\[aq]. Returns 0 on success, or \-1 on error. */
-\&
-static int
-sendfd(int sockfd, int fd)
-{
- int data;
- struct iovec iov;
- struct msghdr msgh;
- struct cmsghdr *cmsgp;
-\&
- /* Allocate a char array of suitable size to hold the ancillary data.
- However, since this buffer is in reality a \[aq]struct cmsghdr\[aq], use a
- union to ensure that it is suitably aligned. */
- union {
- char buf[CMSG_SPACE(sizeof(int))];
- /* Space large enough to hold an \[aq]int\[aq] */
- struct cmsghdr align;
- } controlMsg;
-\&
- /* The \[aq]msg_name\[aq] field can be used to specify the address of the
- destination socket when sending a datagram. However, we do not
- need to use this field because \[aq]sockfd\[aq] is a connected socket. */
-\&
- msgh.msg_name = NULL;
- msgh.msg_namelen = 0;
-\&
- /* On Linux, we must transmit at least one byte of real data in
- order to send ancillary data. We transmit an arbitrary integer
- whose value is ignored by recvfd(). */
-\&
- msgh.msg_iov = &iov;
- msgh.msg_iovlen = 1;
- iov.iov_base = &data;
- iov.iov_len = sizeof(int);
- data = 12345;
-\&
- /* Set \[aq]msghdr\[aq] fields that describe ancillary data */
-\&
- msgh.msg_control = controlMsg.buf;
- msgh.msg_controllen = sizeof(controlMsg.buf);
-\&
- /* Set up ancillary data describing file descriptor to send */
-\&
- cmsgp = CMSG_FIRSTHDR(&msgh);
- cmsgp\->cmsg_level = SOL_SOCKET;
- cmsgp\->cmsg_type = SCM_RIGHTS;
- cmsgp\->cmsg_len = CMSG_LEN(sizeof(int));
- memcpy(CMSG_DATA(cmsgp), &fd, sizeof(int));
-\&
- /* Send real plus ancillary data */
-\&
- if (sendmsg(sockfd, &msgh, 0) == \-1)
- return \-1;
-\&
- return 0;
-}
-\&
-/* Receive a file descriptor on a connected UNIX domain socket. Returns
- the received file descriptor on success, or \-1 on error. */
-\&
-static int
-recvfd(int sockfd)
-{
- int data, fd;
- ssize_t nr;
- struct iovec iov;
- struct msghdr msgh;
-\&
- /* Allocate a char buffer for the ancillary data. See the comments
- in sendfd() */
- union {
- char buf[CMSG_SPACE(sizeof(int))];
- struct cmsghdr align;
- } controlMsg;
- struct cmsghdr *cmsgp;
-\&
- /* The \[aq]msg_name\[aq] field can be used to obtain the address of the
- sending socket. However, we do not need this information. */
-\&
- msgh.msg_name = NULL;
- msgh.msg_namelen = 0;
-\&
- /* Specify buffer for receiving real data */
-\&
- msgh.msg_iov = &iov;
- msgh.msg_iovlen = 1;
- iov.iov_base = &data; /* Real data is an \[aq]int\[aq] */
- iov.iov_len = sizeof(int);
-\&
- /* Set \[aq]msghdr\[aq] fields that describe ancillary data */
-\&
- msgh.msg_control = controlMsg.buf;
- msgh.msg_controllen = sizeof(controlMsg.buf);
-\&
- /* Receive real plus ancillary data; real data is ignored */
-\&
- nr = recvmsg(sockfd, &msgh, 0);
- if (nr == \-1)
- return \-1;
-\&
- cmsgp = CMSG_FIRSTHDR(&msgh);
-\&
- /* Check the validity of the \[aq]cmsghdr\[aq] */
-\&
- if (cmsgp == NULL
- || cmsgp\->cmsg_len != CMSG_LEN(sizeof(int))
- || cmsgp\->cmsg_level != SOL_SOCKET
- || cmsgp\->cmsg_type != SCM_RIGHTS)
- {
- errno = EINVAL;
- return \-1;
- }
-\&
- /* Return the received file descriptor to our caller */
-\&
- memcpy(&fd, CMSG_DATA(cmsgp), sizeof(int));
- return fd;
-}
-\&
-static void
-sigchldHandler(int sig)
-{
- char msg[] = "\etS: target has terminated; bye\en";
-\&
- write(STDOUT_FILENO, msg, sizeof(msg) \- 1);
- _exit(EXIT_SUCCESS);
-}
-\&
-static int
-seccomp(unsigned int operation, unsigned int flags, void *args)
-{
- return syscall(SYS_seccomp, operation, flags, args);
-}
-\&
-/* The following is the x86\-64\-specific BPF boilerplate code for checking
- that the BPF program is running on the right architecture + ABI. At
- completion of these instructions, the accumulator contains the system
- call number. */
-\&
-/* For the x32 ABI, all system call numbers have bit 30 set */
-\&
-#define X32_SYSCALL_BIT 0x40000000
-\&
-#define X86_64_CHECK_ARCH_AND_LOAD_SYSCALL_NR \e
- BPF_STMT(BPF_LD | BPF_W | BPF_ABS, \e
- (offsetof(struct seccomp_data, arch))), \e
- BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, AUDIT_ARCH_X86_64, 0, 2), \e
- BPF_STMT(BPF_LD | BPF_W | BPF_ABS, \e
- (offsetof(struct seccomp_data, nr))), \e
- BPF_JUMP(BPF_JMP | BPF_JGE | BPF_K, X32_SYSCALL_BIT, 0, 1), \e
- BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL_PROCESS)
-\&
-/* installNotifyFilter() installs a seccomp filter that generates
- user\-space notifications (SECCOMP_RET_USER_NOTIF) when the process
- calls mkdir(2); the filter allows all other system calls.
-\&
- The function return value is a file descriptor from which the
- user\-space notifications can be fetched. */
-\&
-static int
-installNotifyFilter(void)
-{
- int notifyFd;
-\&
- struct sock_filter filter[] = {
- X86_64_CHECK_ARCH_AND_LOAD_SYSCALL_NR,
-\&
- /* mkdir() triggers notification to user\-space supervisor */
-\&
- BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SYS_mkdir, 0, 1),
- BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_USER_NOTIF),
-\&
- /* Every other system call is allowed */
-\&
- BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
- };
-\&
- struct sock_fprog prog = {
- .len = ARRAY_SIZE(filter),
- .filter = filter,
- };
-\&
- /* Install the filter with the SECCOMP_FILTER_FLAG_NEW_LISTENER flag;
- as a result, seccomp() returns a notification file descriptor. */
-\&
- notifyFd = seccomp(SECCOMP_SET_MODE_FILTER,
- SECCOMP_FILTER_FLAG_NEW_LISTENER, &prog);
- if (notifyFd == \-1)
- err(EXIT_FAILURE, "seccomp\-install\-notify\-filter");
-\&
- return notifyFd;
-}
-\&
-/* Close a pair of sockets created by socketpair() */
-\&
-static void
-closeSocketPair(int sockPair[2])
-{
- if (close(sockPair[0]) == \-1)
- err(EXIT_FAILURE, "closeSocketPair\-close\-0");
- if (close(sockPair[1]) == \-1)
- err(EXIT_FAILURE, "closeSocketPair\-close\-1");
-}
-\&
-/* Implementation of the target process; create a child process that:
-\&
- (1) installs a seccomp filter with the
- SECCOMP_FILTER_FLAG_NEW_LISTENER flag;
- (2) writes the seccomp notification file descriptor returned from
- the previous step onto the UNIX domain socket, \[aq]sockPair[0]\[aq];
- (3) calls mkdir(2) for each element of \[aq]argv\[aq].
-\&
- The function return value in the parent is the PID of the child
- process; the child does not return from this function. */
-\&
-static pid_t
-targetProcess(int sockPair[2], char *argv[])
-{
- int notifyFd, s;
- pid_t targetPid;
-\&
- targetPid = fork();
-\&
- if (targetPid == \-1)
- err(EXIT_FAILURE, "fork");
-\&
- if (targetPid > 0) /* In parent, return PID of child */
- return targetPid;
-\&
- /* Child falls through to here */
-\&
- printf("T: PID = %ld\en", (long) getpid());
-\&
- /* Install seccomp filter(s) */
-\&
- if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0))
- err(EXIT_FAILURE, "prctl");
-\&
- notifyFd = installNotifyFilter();
-\&
- /* Pass the notification file descriptor to the tracing process over
- a UNIX domain socket */
-\&
- if (sendfd(sockPair[0], notifyFd) == \-1)
- err(EXIT_FAILURE, "sendfd");
-\&
- /* Notification and socket FDs are no longer needed in target */
-\&
- if (close(notifyFd) == \-1)
- err(EXIT_FAILURE, "close\-target\-notify\-fd");
-\&
- closeSocketPair(sockPair);
-\&
- /* Perform a mkdir() call for each of the command\-line arguments */
-\&
- for (char **ap = argv; *ap != NULL; ap++) {
- printf("\enT: about to mkdir(\e"%s\e")\en", *ap);
-\&
- s = mkdir(*ap, 0700);
- if (s == \-1)
- perror("T: ERROR: mkdir(2)");
- else
- printf("T: SUCCESS: mkdir(2) returned %d\en", s);
- }
-\&
- printf("\enT: terminating\en");
- exit(EXIT_SUCCESS);
-}
-\&
-/* Check that the notification ID provided by a SECCOMP_IOCTL_NOTIF_RECV
- operation is still valid. It will no longer be valid if the target
- process has terminated or is no longer blocked in the system call that
- generated the notification (because it was interrupted by a signal).
-\&
- This operation can be used when doing such things as accessing
- /proc/PID files in the target process in order to avoid TOCTOU race
- conditions where the PID that is returned by SECCOMP_IOCTL_NOTIF_RECV
- terminates and is reused by another process. */
-\&
-static bool
-cookieIsValid(int notifyFd, uint64_t id)
-{
- return ioctl(notifyFd, SECCOMP_IOCTL_NOTIF_ID_VALID, &id) == 0;
-}
-\&
-/* Access the memory of the target process in order to fetch the
- pathname referred to by the system call argument \[aq]argNum\[aq] in
- \[aq]req\->data.args[]\[aq]. The pathname is returned in \[aq]path\[aq],
- a buffer of \[aq]len\[aq] bytes allocated by the caller.
-\&
- Returns true if the pathname is successfully fetched, and false
- otherwise. For possible causes of failure, see the comments below. */
-\&
-static bool
-getTargetPathname(struct seccomp_notif *req, int notifyFd,
- int argNum, char *path, size_t len)
-{
- int procMemFd;
- char procMemPath[PATH_MAX];
- ssize_t nread;
-\&
- snprintf(procMemPath, sizeof(procMemPath), "/proc/%d/mem", req\->pid);
-\&
- procMemFd = open(procMemPath, O_RDONLY | O_CLOEXEC);
- if (procMemFd == \-1)
- return false;
-\&
- /* Check that the process whose info we are accessing is still alive
- and blocked in the system call that caused the notification.
- If the SECCOMP_IOCTL_NOTIF_ID_VALID operation (performed in
- cookieIsValid()) succeeded, we know that the /proc/PID/mem file
- descriptor that we opened corresponded to the process for which we
- received a notification. If that process subsequently terminates,
- then read() on that file descriptor will return 0 (EOF). */
-\&
- if (!cookieIsValid(notifyFd, req\->id)) {
- close(procMemFd);
- return false;
- }
-\&
- /* Read bytes at the location containing the pathname argument */
-\&
- nread = pread(procMemFd, path, len, req\->data.args[argNum]);
-\&
- close(procMemFd);
-\&
- if (nread <= 0)
- return false;
-\&
- /* Once again check that the notification ID is still valid. The
- case we are particularly concerned about here is that just
- before we fetched the pathname, the target\[aq]s blocked system
- call was interrupted by a signal handler, and after the handler
- returned, the target carried on execution (past the interrupted
- system call). In that case, we have no guarantees about what we
- are reading, since the target\[aq]s memory may have been arbitrarily
- changed by subsequent operations. */
-\&
- if (!cookieIsValid(notifyFd, req\->id)) {
- perror("\etS: notification ID check failed!!!");
- return false;
- }
-\&
- /* Even if the target\[aq]s system call was not interrupted by a signal,
- we have no guarantees about what was in the memory of the target
- process. (The memory may have been modified by another thread, or
- even by an external attacking process.) We therefore treat the
- buffer returned by pread() as untrusted input. The buffer should
- contain a terminating null byte; if not, then we will trigger an
- error for the target process. */
-\&
- if (strnlen(path, nread) < nread)
- return true;
-\&
- return false;
-}
-\&
-/* Allocate buffers for the seccomp user\-space notification request and
- response structures. It is the caller\[aq]s responsibility to free the
- buffers returned via \[aq]req\[aq] and \[aq]resp\[aq]. */
-\&
-static void
-allocSeccompNotifBuffers(struct seccomp_notif **req,
- struct seccomp_notif_resp **resp,
- struct seccomp_notif_sizes *sizes)
-{
- size_t resp_size;
-\&
- /* Discover the sizes of the structures that are used to receive
- notifications and send notification responses, and allocate
- buffers of those sizes. */
-\&
- if (seccomp(SECCOMP_GET_NOTIF_SIZES, 0, sizes) == \-1)
- err(EXIT_FAILURE, "seccomp\-SECCOMP_GET_NOTIF_SIZES");
-\&
- *req = malloc(sizes\->seccomp_notif);
- if (*req == NULL)
- err(EXIT_FAILURE, "malloc\-seccomp_notif");
-\&
- /* When allocating the response buffer, we must allow for the fact
- that the user\-space binary may have been built with user\-space
- headers where \[aq]struct seccomp_notif_resp\[aq] is bigger than the
- response buffer expected by the (older) kernel. Therefore, we
- allocate a buffer that is the maximum of the two sizes. This
- ensures that if the supervisor places bytes into the response
- structure that are past the response size that the kernel expects,
- then the supervisor is not touching an invalid memory location. */
-\&
- resp_size = sizes\->seccomp_notif_resp;
- if (sizeof(struct seccomp_notif_resp) > resp_size)
- resp_size = sizeof(struct seccomp_notif_resp);
-\&
- *resp = malloc(resp_size);
- if (*resp == NULL)
- err(EXIT_FAILURE, "malloc\-seccomp_notif_resp");
-\&
-}
-\&
-/* Handle notifications that arrive via the SECCOMP_RET_USER_NOTIF file
- descriptor, \[aq]notifyFd\[aq]. */
-\&
-static void
-handleNotifications(int notifyFd)
-{
- bool pathOK;
- char path[PATH_MAX];
- struct seccomp_notif *req;
- struct seccomp_notif_resp *resp;
- struct seccomp_notif_sizes sizes;
-\&
- allocSeccompNotifBuffers(&req, &resp, &sizes);
-\&
- /* Loop handling notifications */
-\&
- for (;;) {
-\&
- /* Wait for next notification, returning info in \[aq]*req\[aq] */
-\&
- memset(req, 0, sizes.seccomp_notif);
- if (ioctl(notifyFd, SECCOMP_IOCTL_NOTIF_RECV, req) == \-1) {
- if (errno == EINTR)
- continue;
- err(EXIT_FAILURE, "\etS: ioctl\-SECCOMP_IOCTL_NOTIF_RECV");
- }
-\&
- printf("\etS: got notification (ID %#llx) for PID %d\en",
- req\->id, req\->pid);
-\&
- /* The only system call that can generate a notification event
- is mkdir(2). Nevertheless, we check that the notified system
- call is indeed mkdir() as kind of future\-proofing of this
- code in case the seccomp filter is later modified to
- generate notifications for other system calls. */
-\&
- if (req\->data.nr != SYS_mkdir) {
- printf("\etS: notification contained unexpected "
- "system call number; bye!!!\en");
- exit(EXIT_FAILURE);
- }
-\&
- pathOK = getTargetPathname(req, notifyFd, 0, path, sizeof(path));
-\&
- /* Prepopulate some fields of the response */
-\&
- resp\->id = req\->id; /* Response includes notification ID */
- resp\->flags = 0;
- resp\->val = 0;
-\&
- /* If getTargetPathname() failed, trigger an EINVAL error
- response (sending this response may yield an error if the
- failure occurred because the notification ID was no longer
- valid); if the directory is in /tmp, then create it on behalf
- of the supervisor; if the pathname starts with \[aq].\[aq], tell the
- kernel to let the target process execute the mkdir();
- otherwise, give an error for a directory pathname in any other
- location. */
-\&
- if (!pathOK) {
- resp\->error = \-EINVAL;
- printf("\etS: spoofing error for invalid pathname (%s)\en",
- strerror(\-resp\->error));
- } else if (strncmp(path, "/tmp/", strlen("/tmp/")) == 0) {
- printf("\etS: executing: mkdir(\e"%s\e", %#llo)\en",
- path, req\->data.args[1]);
-\&
- if (mkdir(path, req\->data.args[1]) == 0) {
- resp\->error = 0; /* "Success" */
- resp\->val = strlen(path); /* Used as return value of
- mkdir() in target */
- printf("\etS: success! spoofed return = %lld\en",
- resp\->val);
- } else {
-\&
- /* If mkdir() failed in the supervisor, pass the error
- back to the target */
-\&
- resp\->error = \-errno;
- printf("\etS: failure! (errno = %d; %s)\en", errno,
- strerror(errno));
- }
- } else if (strncmp(path, "./", strlen("./")) == 0) {
- resp\->error = resp\->val = 0;
- resp\->flags = SECCOMP_USER_NOTIF_FLAG_CONTINUE;
- printf("\etS: target can execute system call\en");
- } else {
- resp\->error = \-EOPNOTSUPP;
- printf("\etS: spoofing error response (%s)\en",
- strerror(\-resp\->error));
- }
-\&
- /* Send a response to the notification */
-\&
- printf("\etS: sending response "
- "(flags = %#x; val = %lld; error = %d)\en",
- resp\->flags, resp\->val, resp\->error);
-\&
- if (ioctl(notifyFd, SECCOMP_IOCTL_NOTIF_SEND, resp) == \-1) {
- if (errno == ENOENT)
- printf("\etS: response failed with ENOENT; "
- "perhaps target process\[aq]s syscall was "
- "interrupted by a signal?\en");
- else
- perror("ioctl\-SECCOMP_IOCTL_NOTIF_SEND");
- }
-\&
- /* If the pathname is just "/bye", then the supervisor breaks out
- of the loop and terminates. This allows us to see what happens
- if the target process makes further calls to mkdir(2). */
-\&
- if (strcmp(path, "/bye") == 0)
- break;
- }
-\&
- free(req);
- free(resp);
- printf("\etS: terminating **********\en");
- exit(EXIT_FAILURE);
-}
-\&
-/* Implementation of the supervisor process:
-\&
- (1) obtains the notification file descriptor from \[aq]sockPair[1]\[aq]
- (2) handles notifications that arrive on that file descriptor. */
-\&
-static void
-supervisor(int sockPair[2])
-{
- int notifyFd;
-\&
- notifyFd = recvfd(sockPair[1]);
-\&
- if (notifyFd == \-1)
- err(EXIT_FAILURE, "recvfd");
-\&
- closeSocketPair(sockPair); /* We no longer need the socket pair */
-\&
- handleNotifications(notifyFd);
-}
-\&
-int
-main(int argc, char *argv[])
-{
- int sockPair[2];
- struct sigaction sa;
-\&
- setbuf(stdout, NULL);
-\&
- if (argc < 2) {
- fprintf(stderr, "At least one pathname argument is required\en");
- exit(EXIT_FAILURE);
- }
-\&
- /* Create a UNIX domain socket that is used to pass the seccomp
- notification file descriptor from the target process to the
- supervisor process. */
-\&
- if (socketpair(AF_UNIX, SOCK_STREAM, 0, sockPair) == \-1)
- err(EXIT_FAILURE, "socketpair");
-\&
- /* Create a child process\-\-the "target"\-\-that installs seccomp
- filtering. The target process writes the seccomp notification
- file descriptor onto \[aq]sockPair[0]\[aq] and then calls mkdir(2) for
- each directory in the command\-line arguments. */
-\&
- (void) targetProcess(sockPair, &argv[optind]);
-\&
- /* Catch SIGCHLD when the target terminates, so that the
- supervisor can also terminate. */
-\&
- sa.sa_handler = sigchldHandler;
- sa.sa_flags = 0;
- sigemptyset(&sa.sa_mask);
- if (sigaction(SIGCHLD, &sa, NULL) == \-1)
- err(EXIT_FAILURE, "sigaction");
-\&
- supervisor(sockPair);
-\&
- exit(EXIT_SUCCESS);
-}
-.EE
-.\" SRC END
-.SH SEE ALSO
-.BR ioctl (2),
-.BR pidfd_getfd (2),
-.BR pidfd_open (2),
-.BR seccomp (2)
-.P
-A further example program can be found in the kernel source file
-.IR samples/seccomp/user-trap.c .
diff --git a/man2/security.2 b/man2/security.2
deleted file mode 100644
index 5d25ea6ce..000000000
--- a/man2/security.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/unimplemented.2
diff --git a/man2/select.2 b/man2/select.2
deleted file mode 100644
index bbf8ea9e0..000000000
--- a/man2/select.2
+++ /dev/null
@@ -1,765 +0,0 @@
-.\" This manpage is copyright (C) 1992 Drew Eckhardt,
-.\" copyright (C) 1995 Michael Shields,
-.\" copyright (C) 2001 Paul Sheer,
-.\" copyright (C) 2006, 2019 Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Modified 1993-07-24 by Rik Faith <faith@cs.unc.edu>
-.\" Modified 1995-05-18 by Jim Van Zandt <jrv@vanzandt.mv.com>
-.\" Sun Feb 11 14:07:00 MET 1996 Martin Schulze <joey@linux.de>
-.\" * layout slightly modified
-.\"
-.\" Modified Mon Oct 21 23:05:29 EDT 1996 by Eric S. Raymond <esr@thyrsus.com>
-.\" Modified Thu Feb 24 01:41:09 CET 2000 by aeb
-.\" Modified Thu Feb 9 22:32:09 CET 2001 by bert hubert <ahu@ds9a.nl>, aeb
-.\" Modified Mon Nov 11 14:35:00 PST 2002 by Ben Woodard <ben@zork.net>
-.\" 2005-03-11, mtk, modified pselect() text (it is now a system
-.\" call in Linux 2.6.16.
-.\"
-.TH select 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-select, pselect, FD_CLR, FD_ISSET, FD_SET, FD_ZERO, fd_set \-
-synchronous I/O multiplexing
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/select.h>
-.P
-.BR typedef " /* ... */ " fd_set;
-.P
-.BI "int select(int " nfds ", fd_set *_Nullable restrict " readfds ,
-.BI " fd_set *_Nullable restrict " writefds ,
-.BI " fd_set *_Nullable restrict " exceptfds ,
-.BI " struct timeval *_Nullable restrict " timeout );
-.P
-.BI "void FD_CLR(int " fd ", fd_set *" set );
-.BI "int FD_ISSET(int " fd ", fd_set *" set );
-.BI "void FD_SET(int " fd ", fd_set *" set );
-.BI "void FD_ZERO(fd_set *" set );
-.P
-.BI "int pselect(int " nfds ", fd_set *_Nullable restrict " readfds ,
-.BI " fd_set *_Nullable restrict " writefds ,
-.BI " fd_set *_Nullable restrict " exceptfds ,
-.BI " const struct timespec *_Nullable restrict " timeout ,
-.BI " const sigset_t *_Nullable restrict " sigmask );
-.fi
-.P
-.RS -4
-Feature Test Macro Requirements for glibc (see
-.BR feature_test_macros (7)):
-.RE
-.P
-.BR pselect ():
-.nf
- _POSIX_C_SOURCE >= 200112L
-.fi
-.SH DESCRIPTION
-.BR "WARNING" :
-.BR select ()
-can monitor only file descriptors numbers that are less than
-.B FD_SETSIZE
-(1024)\[em]an unreasonably low limit for many modern applications\[em]and
-this limitation will not change.
-All modern applications should instead use
-.BR poll (2)
-or
-.BR epoll (7),
-which do not suffer this limitation.
-.P
-.BR select ()
-allows a program to monitor multiple file descriptors,
-waiting until one or more of the file descriptors become "ready"
-for some class of I/O operation (e.g., input possible).
-A file descriptor is considered ready if it is possible to
-perform a corresponding I/O operation (e.g.,
-.BR read (2),
-or a sufficiently small
-.BR write (2))
-without blocking.
-.\"
-.SS fd_set
-A structure type that can represent a set of file descriptors.
-According to POSIX,
-the maximum number of file descriptors in an
-.I fd_set
-structure is the value of the macro
-.BR FD_SETSIZE .
-.\"
-.SS File descriptor sets
-The principal arguments of
-.BR select ()
-are three "sets" of file descriptors (declared with the type
-.IR fd_set ),
-which allow the caller to wait for three classes of events
-on the specified set of file descriptors.
-Each of the
-.I fd_set
-arguments may be specified as NULL if no file descriptors are
-to be watched for the corresponding class of events.
-.P
-.BR "Note well" :
-Upon return, each of the file descriptor sets is modified in place
-to indicate which file descriptors are currently "ready".
-Thus, if using
-.BR select ()
-within a loop, the sets \fImust be reinitialized\fP before each call.
-.P
-The contents of a file descriptor set can be manipulated
-using the following macros:
-.TP
-.BR FD_ZERO ()
-This macro clears (removes all file descriptors from)
-.IR set .
-It should be employed as the first step in initializing a file descriptor set.
-.TP
-.BR FD_SET ()
-This macro adds the file descriptor
-.I fd
-to
-.IR set .
-Adding a file descriptor that is already present in the set is a no-op,
-and does not produce an error.
-.TP
-.BR FD_CLR ()
-This macro removes the file descriptor
-.I fd
-from
-.IR set .
-Removing a file descriptor that is not present in the set is a no-op,
-and does not produce an error.
-.TP
-.BR FD_ISSET ()
-.BR select ()
-modifies the contents of the sets according to the rules
-described below.
-After calling
-.BR select (),
-the
-.BR FD_ISSET ()
-macro
-can be used to test if a file descriptor is still present in a set.
-.BR FD_ISSET ()
-returns nonzero if the file descriptor
-.I fd
-is present in
-.IR set ,
-and zero if it is not.
-.\"
-.SS Arguments
-The arguments of
-.BR select ()
-are as follows:
-.TP
-.I readfds
-The file descriptors in this set are watched to see if they are
-ready for reading.
-A file descriptor is ready for reading if a read operation will not
-block; in particular, a file descriptor is also ready on end-of-file.
-.IP
-After
-.BR select ()
-has returned, \fIreadfds\fP will be
-cleared of all file descriptors except for those that are ready for reading.
-.TP
-.I writefds
-The file descriptors in this set are watched to see if they are
-ready for writing.
-A file descriptor is ready for writing if a write operation will not block.
-However, even if a file descriptor indicates as writable,
-a large write may still block.
-.IP
-After
-.BR select ()
-has returned, \fIwritefds\fP will be
-cleared of all file descriptors except for those that are ready for writing.
-.TP
-.I exceptfds
-The file descriptors in this set are watched for "exceptional conditions".
-For examples of some exceptional conditions, see the discussion of
-.B POLLPRI
-in
-.BR poll (2).
-.IP
-After
-.BR select ()
-has returned,
-\fIexceptfds\fP will be cleared of all file descriptors except for those
-for which an exceptional condition has occurred.
-.TP
-.I nfds
-This argument should be set to the highest-numbered file descriptor in any
-of the three sets, plus 1.
-The indicated file descriptors in each set are checked, up to this limit
-(but see BUGS).
-.TP
-.I timeout
-The
-.I timeout
-argument is a
-.I timeval
-structure (shown below) that specifies the interval that
-.BR select ()
-should block waiting for a file descriptor to become ready.
-The call will block until either:
-.RS
-.IP \[bu] 3
-a file descriptor becomes ready;
-.IP \[bu]
-the call is interrupted by a signal handler; or
-.IP \[bu]
-the timeout expires.
-.RE
-.IP
-Note that the
-.I timeout
-interval will be rounded up to the system clock granularity,
-and kernel scheduling delays mean that the blocking interval
-may overrun by a small amount.
-.IP
-If both fields of the
-.I timeval
-structure are zero, then
-.BR select ()
-returns immediately.
-(This is useful for polling.)
-.IP
-If
-.I timeout
-is specified as NULL,
-.BR select ()
-blocks indefinitely waiting for a file descriptor to become ready.
-.\"
-.SS pselect()
-The
-.BR pselect ()
-system call allows an application to safely wait until either
-a file descriptor becomes ready or until a signal is caught.
-.P
-The operation of
-.BR select ()
-and
-.BR pselect ()
-is identical, other than these three differences:
-.IP \[bu] 3
-.BR select ()
-uses a timeout that is a
-.I struct timeval
-(with seconds and microseconds), while
-.BR pselect ()
-uses a
-.I struct timespec
-(with seconds and nanoseconds).
-.IP \[bu]
-.BR select ()
-may update the
-.I timeout
-argument to indicate how much time was left.
-.BR pselect ()
-does not change this argument.
-.IP \[bu]
-.BR select ()
-has no
-.I sigmask
-argument, and behaves as
-.BR pselect ()
-called with NULL
-.IR sigmask .
-.P
-.I sigmask
-is a pointer to a signal mask (see
-.BR sigprocmask (2));
-if it is not NULL, then
-.BR pselect ()
-first replaces the current signal mask by the one pointed to by
-.IR sigmask ,
-then does the "select" function, and then restores the original
-signal mask.
-(If
-.I sigmask
-is NULL,
-the signal mask is not modified during the
-.BR pselect ()
-call.)
-.P
-Other than the difference in the precision of the
-.I timeout
-argument, the following
-.BR pselect ()
-call:
-.P
-.in +4n
-.EX
-ready = pselect(nfds, &readfds, &writefds, &exceptfds,
- timeout, &sigmask);
-.EE
-.in
-.P
-is equivalent to
-.I atomically
-executing the following calls:
-.P
-.in +4n
-.EX
-sigset_t origmask;
-\&
-pthread_sigmask(SIG_SETMASK, &sigmask, &origmask);
-ready = select(nfds, &readfds, &writefds, &exceptfds, timeout);
-pthread_sigmask(SIG_SETMASK, &origmask, NULL);
-.EE
-.in
-.P
-The reason that
-.BR pselect ()
-is needed is that if one wants to wait for either a signal
-or for a file descriptor to become ready, then
-an atomic test is needed to prevent race conditions.
-(Suppose the signal handler sets a global flag and
-returns.
-Then a test of this global flag followed by a call of
-.BR select ()
-could hang indefinitely if the signal arrived just after the test
-but just before the call.
-By contrast,
-.BR pselect ()
-allows one to first block signals, handle the signals that have come in,
-then call
-.BR pselect ()
-with the desired
-.IR sigmask ,
-avoiding the race.)
-.SS The timeout
-The
-.I timeout
-argument for
-.BR select ()
-is a structure of the following type:
-.P
-.in +4n
-.EX
-struct timeval {
- time_t tv_sec; /* seconds */
- suseconds_t tv_usec; /* microseconds */
-};
-.EE
-.in
-.P
-The corresponding argument for
-.BR pselect ()
-is a
-.BR timespec (3)
-structure.
-.P
-On Linux,
-.BR select ()
-modifies
-.I timeout
-to reflect the amount of time not slept; most other implementations
-do not do this.
-(POSIX.1 permits either behavior.)
-This causes problems both when Linux code which reads
-.I timeout
-is ported to other operating systems, and when code is ported to Linux
-that reuses a \fIstruct timeval\fP for multiple
-.BR select ()s
-in a loop without reinitializing it.
-Consider
-.I timeout
-to be undefined after
-.BR select ()
-returns.
-.\" .P - it is rumored that:
-.\" On BSD, when a timeout occurs, the file descriptor bits are not changed.
-.\" - it is certainly true that:
-.\" Linux follows SUSv2 and sets the bit masks to zero upon a timeout.
-.SH RETURN VALUE
-On success,
-.BR select ()
-and
-.BR pselect ()
-return the number of file descriptors contained in the three returned
-descriptor sets (that is, the total number of bits that are set in
-.IR readfds ,
-.IR writefds ,
-.IR exceptfds ).
-The return value may be zero if the timeout expired before any
-file descriptors became ready.
-.P
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error;
-the file descriptor sets are unmodified,
-and
-.I timeout
-becomes undefined.
-.SH ERRORS
-.TP
-.B EBADF
-An invalid file descriptor was given in one of the sets.
-(Perhaps a file descriptor that was already closed,
-or one on which an error has occurred.)
-However, see BUGS.
-.TP
-.B EINTR
-A signal was caught; see
-.BR signal (7).
-.TP
-.B EINVAL
-.I nfds
-is negative or exceeds the
-.B RLIMIT_NOFILE
-resource limit (see
-.BR getrlimit (2)).
-.TP
-.B EINVAL
-The value contained within
-.I timeout
-is invalid.
-.TP
-.B ENOMEM
-Unable to allocate memory for internal tables.
-.SH VERSIONS
-On some other UNIX systems,
-.\" Darwin, according to a report by Jeremy Sequoia, relayed by Josh Triplett
-.BR select ()
-can fail with the error
-.B EAGAIN
-if the system fails to allocate kernel-internal resources, rather than
-.B ENOMEM
-as Linux does.
-POSIX specifies this error for
-.BR poll (2),
-but not for
-.BR select ().
-Portable programs may wish to check for
-.B EAGAIN
-and loop, just as with
-.BR EINTR .
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-.TP
-.BR select ()
-POSIX.1-2001, 4.4BSD (first appeared in 4.2BSD).
-.IP
-Generally portable to/from
-non-BSD systems supporting clones of the BSD socket layer (including
-System\ V variants).
-However, note that the System\ V variant typically
-sets the timeout variable before returning, but the BSD variant does not.
-.TP
-.BR pselect ()
-Linux 2.6.16.
-POSIX.1g, POSIX.1-2001.
-.IP
-Prior to this,
-it was emulated in glibc (but see BUGS).
-.TP
-.B fd_set
-POSIX.1-2001.
-.SH NOTES
-The following header also provides the
-.I fd_set
-type:
-.IR <sys/time.h> .
-.P
-An
-.I fd_set
-is a fixed size buffer.
-Executing
-.BR FD_CLR ()
-or
-.BR FD_SET ()
-with a value of
-.I fd
-that is negative or is equal to or larger than
-.B FD_SETSIZE
-will result
-in undefined behavior.
-Moreover, POSIX requires
-.I fd
-to be a valid file descriptor.
-.P
-The operation of
-.BR select ()
-and
-.BR pselect ()
-is not affected by the
-.B O_NONBLOCK
-flag.
-.\"
-.SS The self-pipe trick
-On systems that lack
-.BR pselect (),
-reliable (and more portable) signal trapping can be achieved
-using the self-pipe trick.
-In this technique,
-a signal handler writes a byte to a pipe whose other end
-is monitored by
-.BR select ()
-in the main program.
-(To avoid possibly blocking when writing to a pipe that may be full
-or reading from a pipe that may be empty,
-nonblocking I/O is used when reading from and writing to the pipe.)
-.\"
-.SS Emulating usleep(3)
-Before the advent of
-.BR usleep (3),
-some code employed a call to
-.BR select ()
-with all three sets empty,
-.I nfds
-zero, and a non-NULL
-.I timeout
-as a fairly portable way to sleep with subsecond precision.
-.\"
-.SS Correspondence between select() and poll() notifications
-Within the Linux kernel source,
-.\" fs/select.c
-we find the following definitions which show the correspondence
-between the readable, writable, and exceptional condition notifications of
-.BR select ()
-and the event notifications provided by
-.BR poll (2)
-and
-.BR epoll (7):
-.P
-.in +4n
-.EX
-#define POLLIN_SET (EPOLLRDNORM | EPOLLRDBAND | EPOLLIN |
- EPOLLHUP | EPOLLERR)
- /* Ready for reading */
-#define POLLOUT_SET (EPOLLWRBAND | EPOLLWRNORM | EPOLLOUT |
- EPOLLERR)
- /* Ready for writing */
-#define POLLEX_SET (EPOLLPRI)
- /* Exceptional condition */
-.EE
-.in
-.\"
-.SS Multithreaded applications
-If a file descriptor being monitored by
-.BR select ()
-is closed in another thread, the result is unspecified.
-On some UNIX systems,
-.BR select ()
-unblocks and returns, with an indication that the file descriptor is ready
-(a subsequent I/O operation will likely fail with an error,
-unless another process reopens the file descriptor between the time
-.BR select ()
-returned and the I/O operation is performed).
-On Linux (and some other systems),
-closing the file descriptor in another thread has no effect on
-.BR select ().
-In summary, any application that relies on a particular behavior
-in this scenario must be considered buggy.
-.\"
-.SS C library/kernel differences
-The Linux kernel allows file descriptor sets of arbitrary size,
-determining the length of the sets to be checked from the value of
-.IR nfds .
-However, in the glibc implementation, the
-.I fd_set
-type is fixed in size.
-See also BUGS.
-.P
-The
-.BR pselect ()
-interface described in this page is implemented by glibc.
-The underlying Linux system call is named
-.BR pselect6 ().
-This system call has somewhat different behavior from the glibc
-wrapper function.
-.P
-The Linux
-.BR pselect6 ()
-system call modifies its
-.I timeout
-argument.
-However, the glibc wrapper function hides this behavior
-by using a local variable for the timeout argument that
-is passed to the system call.
-Thus, the glibc
-.BR pselect ()
-function does not modify its
-.I timeout
-argument;
-this is the behavior required by POSIX.1-2001.
-.P
-The final argument of the
-.BR pselect6 ()
-system call is not a
-.I "sigset_t\ *"
-pointer, but is instead a structure of the form:
-.P
-.in +4n
-.EX
-struct {
- const kernel_sigset_t *ss; /* Pointer to signal set */
- size_t ss_len; /* Size (in bytes) of object
- pointed to by \[aq]ss\[aq] */
-};
-.EE
-.in
-.P
-This allows the system call to obtain both
-a pointer to the signal set and its size,
-while allowing for the fact that most architectures
-support a maximum of 6 arguments to a system call.
-See
-.BR sigprocmask (2)
-for a discussion of the difference between the kernel and libc
-notion of the signal set.
-.\"
-.SS Historical glibc details
-glibc 2.0 provided an incorrect version of
-.BR pselect ()
-that did not take a
-.I sigmask
-argument.
-.P
-From glibc 2.1 to glibc 2.2.1,
-one must define
-.B _GNU_SOURCE
-in order to obtain the declaration of
-.BR pselect ()
-from
-.IR <sys/select.h> .
-.SH BUGS
-POSIX allows an implementation to define an upper limit,
-advertised via the constant
-.BR FD_SETSIZE ,
-on the range of file descriptors that can be specified
-in a file descriptor set.
-The Linux kernel imposes no fixed limit, but the glibc implementation makes
-.I fd_set
-a fixed-size type, with
-.B FD_SETSIZE
-defined as 1024, and the
-.BR FD_* ()
-macros operating according to that limit.
-To monitor file descriptors greater than 1023, use
-.BR poll (2)
-or
-.BR epoll (7)
-instead.
-.P
-The implementation of the
-.I fd_set
-arguments as value-result arguments is a design error that is avoided in
-.BR poll (2)
-and
-.BR epoll (7).
-.P
-According to POSIX,
-.BR select ()
-should check all specified file descriptors in the three file descriptor sets,
-up to the limit
-.IR nfds\-1 .
-However, the current implementation ignores any file descriptor in
-these sets that is greater than the maximum file descriptor number
-that the process currently has open.
-According to POSIX, any such file descriptor that is specified in one
-of the sets should result in the error
-.BR EBADF .
-.P
-Starting with glibc 2.1, glibc provided an emulation of
-.BR pselect ()
-that was implemented using
-.BR sigprocmask (2)
-and
-.BR select ().
-This implementation remained vulnerable to the very race condition that
-.BR pselect ()
-was designed to prevent.
-Modern versions of glibc use the (race-free)
-.BR pselect ()
-system call on kernels where it is provided.
-.P
-On Linux,
-.BR select ()
-may report a socket file descriptor as "ready for reading", while
-nevertheless a subsequent read blocks.
-This could for example
-happen when data has arrived but upon examination has the wrong
-checksum and is discarded.
-There may be other circumstances
-in which a file descriptor is spuriously reported as ready.
-.\" Stevens discusses a case where accept can block after select
-.\" returns successfully because of an intervening RST from the client.
-Thus it may be safer to use
-.B O_NONBLOCK
-on sockets that should not block.
-.\" Maybe the kernel should have returned EIO in such a situation?
-.P
-On Linux,
-.BR select ()
-also modifies
-.I timeout
-if the call is interrupted by a signal handler (i.e., the
-.B EINTR
-error return).
-This is not permitted by POSIX.1.
-The Linux
-.BR pselect ()
-system call has the same behavior,
-but the glibc wrapper hides this behavior by internally copying the
-.I timeout
-to a local variable and passing that variable to the system call.
-.SH EXAMPLES
-.\" SRC BEGIN (select.c)
-.EX
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/select.h>
-\&
-int
-main(void)
-{
- int retval;
- fd_set rfds;
- struct timeval tv;
-\&
- /* Watch stdin (fd 0) to see when it has input. */
-\&
- FD_ZERO(&rfds);
- FD_SET(0, &rfds);
-\&
- /* Wait up to five seconds. */
-\&
- tv.tv_sec = 5;
- tv.tv_usec = 0;
-\&
- retval = select(1, &rfds, NULL, NULL, &tv);
- /* Don\[aq]t rely on the value of tv now! */
-\&
- if (retval == \-1)
- perror("select()");
- else if (retval)
- printf("Data is available now.\en");
- /* FD_ISSET(0, &rfds) will be true. */
- else
- printf("No data within five seconds.\en");
-\&
- exit(EXIT_SUCCESS);
-}
-.EE
-.\" SRC END
-.SH SEE ALSO
-.BR accept (2),
-.BR connect (2),
-.BR poll (2),
-.BR read (2),
-.BR recv (2),
-.BR restart_syscall (2),
-.BR send (2),
-.BR sigprocmask (2),
-.BR write (2),
-.BR timespec (3),
-.BR epoll (7),
-.BR time (7)
-.P
-For a tutorial with discussion and examples, see
-.BR select_tut (2).
diff --git a/man2/select_tut.2 b/man2/select_tut.2
deleted file mode 100644
index 59c241973..000000000
--- a/man2/select_tut.2
+++ /dev/null
@@ -1,638 +0,0 @@
-.\" This manpage is copyright (C) 2001 Paul Sheer.
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" very minor changes, aeb
-.\"
-.\" Modified 5 June 2002, Michael Kerrisk <mtk.manpages@gmail.com>
-.\" 2006-05-13, mtk, removed much material that is redundant with select.2
-.\" various other changes
-.\" 2008-01-26, mtk, substantial changes and rewrites
-.\"
-.TH SELECT_TUT 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-select, pselect \- synchronous I/O multiplexing
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-See
-.BR select (2)
-.SH DESCRIPTION
-The
-.BR select ()
-and
-.BR pselect ()
-system calls are used to efficiently monitor multiple file descriptors,
-to see if any of them is, or becomes, "ready";
-that is, to see whether I/O becomes possible,
-or an "exceptional condition" has occurred on any of the file descriptors.
-.P
-This page provides background and tutorial information
-on the use of these system calls.
-For details of the arguments and semantics of
-.BR select ()
-and
-.BR pselect (),
-see
-.BR select (2).
-.\"
-.SS Combining signal and data events
-.BR pselect ()
-is useful if you are waiting for a signal as well as
-for file descriptor(s) to become ready for I/O.
-Programs that receive signals
-normally use the signal handler only to raise a global flag.
-The global flag will indicate that the event must be processed
-in the main loop of the program.
-A signal will cause the
-.BR select ()
-(or
-.BR pselect ())
-call to return with \fIerrno\fP set to \fBEINTR\fP.
-This behavior is essential so that signals can be processed
-in the main loop of the program, otherwise
-.BR select ()
-would block indefinitely.
-.P
-Now, somewhere
-in the main loop will be a conditional to check the global flag.
-So we must ask:
-what if a signal arrives after the conditional, but before the
-.BR select ()
-call?
-The answer is that
-.BR select ()
-would block indefinitely, even though an event is actually pending.
-This race condition is solved by the
-.BR pselect ()
-call.
-This call can be used to set the signal mask to a set of signals
-that are to be received only within the
-.BR pselect ()
-call.
-For instance, let us say that the event in question
-was the exit of a child process.
-Before the start of the main loop, we
-would block \fBSIGCHLD\fP using
-.BR sigprocmask (2).
-Our
-.BR pselect ()
-call would enable
-.B SIGCHLD
-by using an empty signal mask.
-Our program would look like:
-.P
-.EX
-static volatile sig_atomic_t got_SIGCHLD = 0;
-\&
-static void
-child_sig_handler(int sig)
-{
- got_SIGCHLD = 1;
-}
-\&
-int
-main(int argc, char *argv[])
-{
- sigset_t sigmask, empty_mask;
- struct sigaction sa;
- fd_set readfds, writefds, exceptfds;
- int r;
-\&
- sigemptyset(&sigmask);
- sigaddset(&sigmask, SIGCHLD);
- if (sigprocmask(SIG_BLOCK, &sigmask, NULL) == \-1) {
- perror("sigprocmask");
- exit(EXIT_FAILURE);
- }
-\&
- sa.sa_flags = 0;
- sa.sa_handler = child_sig_handler;
- sigemptyset(&sa.sa_mask);
- if (sigaction(SIGCHLD, &sa, NULL) == \-1) {
- perror("sigaction");
- exit(EXIT_FAILURE);
- }
-\&
- sigemptyset(&empty_mask);
-\&
- for (;;) { /* main loop */
- /* Initialize readfds, writefds, and exceptfds
- before the pselect() call. (Code omitted.) */
-\&
- r = pselect(nfds, &readfds, &writefds, &exceptfds,
- NULL, &empty_mask);
- if (r == \-1 && errno != EINTR) {
- /* Handle error */
- }
-\&
- if (got_SIGCHLD) {
- got_SIGCHLD = 0;
-\&
- /* Handle signalled event here; e.g., wait() for all
- terminated children. (Code omitted.) */
- }
-\&
- /* main body of program */
- }
-}
-.EE
-.SS Practical
-So what is the point of
-.BR select ()?
-Can't I just read and write to my file descriptors whenever I want?
-The point of
-.BR select ()
-is that it watches
-multiple descriptors at the same time and properly puts the process to
-sleep if there is no activity.
-UNIX programmers often find
-themselves in a position where they have to handle I/O from more than one
-file descriptor where the data flow may be intermittent.
-If you were to merely create a sequence of
-.BR read (2)
-and
-.BR write (2)
-calls, you would
-find that one of your calls may block waiting for data from/to a file
-descriptor, while another file descriptor is unused though ready for I/O.
-.BR select ()
-efficiently copes with this situation.
-.SS Select law
-Many people who try to use
-.BR select ()
-come across behavior that is
-difficult to understand and produces nonportable or borderline results.
-For instance, the above program is carefully written not to
-block at any point, even though it does not set its file descriptors to
-nonblocking mode.
-It is easy to introduce
-subtle errors that will remove the advantage of using
-.BR select (),
-so here is a list of essentials to watch for when using
-.BR select ().
-.TP 4
-1.
-You should always try to use
-.BR select ()
-without a timeout.
-Your program
-should have nothing to do if there is no data available.
-Code that
-depends on timeouts is not usually portable and is difficult to debug.
-.TP
-2.
-The value \fInfds\fP must be properly calculated for efficiency as
-explained above.
-.TP
-3.
-No file descriptor must be added to any set if you do not intend
-to check its result after the
-.BR select ()
-call, and respond appropriately.
-See next rule.
-.TP
-4.
-After
-.BR select ()
-returns, all file descriptors in all sets
-should be checked to see if they are ready.
-.TP
-5.
-The functions
-.BR read (2),
-.BR recv (2),
-.BR write (2),
-and
-.BR send (2)
-do \fInot\fP necessarily read/write the full amount of data
-that you have requested.
-If they do read/write the full amount, it's
-because you have a low traffic load and a fast stream.
-This is not always going to be the case.
-You should cope with the case of your
-functions managing to send or receive only a single byte.
-.TP
-6.
-Never read/write only in single bytes at a time unless you are really
-sure that you have a small amount of data to process.
-It is extremely
-inefficient not to read/write as much data as you can buffer each time.
-The buffers in the example below are 1024 bytes although they could
-easily be made larger.
-.TP
-7.
-Calls to
-.BR read (2),
-.BR recv (2),
-.BR write (2),
-.BR send (2),
-and
-.BR select ()
-can fail with the error
-\fBEINTR\fP,
-and calls to
-.BR read (2),
-.BR recv (2),
-.BR write (2),
-and
-.BR send (2)
-can fail with
-.I errno
-set to \fBEAGAIN\fP (\fBEWOULDBLOCK\fP).
-These results must be properly managed (not done properly above).
-If your program is not going to receive any signals, then
-it is unlikely you will get \fBEINTR\fP.
-If your program does not set nonblocking I/O,
-you will not get \fBEAGAIN\fP.
-.\" Nonetheless, you should still cope with these errors for completeness.
-.TP
-8.
-Never call
-.BR read (2),
-.BR recv (2),
-.BR write (2),
-or
-.BR send (2)
-with a buffer length of zero.
-.TP
-9.
-If the functions
-.BR read (2),
-.BR recv (2),
-.BR write (2),
-and
-.BR send (2)
-fail with errors other than those listed in \fB7.\fP,
-or one of the input functions returns 0, indicating end of file,
-then you should \fInot\fP pass that file descriptor to
-.BR select ()
-again.
-In the example below,
-I close the file descriptor immediately, and then set it to \-1
-to prevent it being included in a set.
-.TP
-10.
-The timeout value must be initialized with each new call to
-.BR select (),
-since some operating systems modify the structure.
-.BR pselect ()
-however does not modify its timeout structure.
-.TP
-11.
-Since
-.BR select ()
-modifies its file descriptor sets,
-if the call is being used in a loop,
-then the sets must be reinitialized before each call.
-.\" "I have heard" does not fill me with confidence, and doesn't
-.\" belong in a man page, so I've commented this point out.
-.\" .TP
-.\" 11.
-.\" I have heard that the Windows socket layer does not cope with OOB data
-.\" properly.
-.\" It also does not cope with
-.\" .BR select ()
-.\" calls when no file descriptors are set at all.
-.\" Having no file descriptors set is a useful
-.\" way to sleep the process with subsecond precision by using the timeout.
-.\" (See further on.)
-.SH RETURN VALUE
-See
-.BR select (2).
-.SH NOTES
-Generally speaking,
-all operating systems that support sockets also support
-.BR select ().
-.BR select ()
-can be used to solve
-many problems in a portable and efficient way that naive programmers try
-to solve in a more complicated manner using
-threads, forking, IPCs, signals, memory sharing, and so on.
-.P
-The
-.BR poll (2)
-system call has the same functionality as
-.BR select (),
-and is somewhat more efficient when monitoring sparse
-file descriptor sets.
-It is nowadays widely available, but historically was less portable than
-.BR select ().
-.P
-The Linux-specific
-.BR epoll (7)
-API provides an interface that is more efficient than
-.BR select (2)
-and
-.BR poll (2)
-when monitoring large numbers of file descriptors.
-.SH EXAMPLES
-Here is an example that better demonstrates the true utility of
-.BR select ().
-The listing below is a TCP forwarding program that forwards
-from one TCP port to another.
-.P
-.\" SRC BEGIN (select.c)
-.EX
-#include <arpa/inet.h>
-#include <errno.h>
-#include <netinet/in.h>
-#include <signal.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/select.h>
-#include <sys/socket.h>
-#include <unistd.h>
-\&
-static int forward_port;
-\&
-#undef max
-#define max(x, y) ((x) > (y) ? (x) : (y))
-\&
-static int
-listen_socket(int listen_port)
-{
- int lfd;
- int yes;
- struct sockaddr_in addr;
-\&
- lfd = socket(AF_INET, SOCK_STREAM, 0);
- if (lfd == \-1) {
- perror("socket");
- return \-1;
- }
-\&
- yes = 1;
- if (setsockopt(lfd, SOL_SOCKET, SO_REUSEADDR,
- &yes, sizeof(yes)) == \-1)
- {
- perror("setsockopt");
- close(lfd);
- return \-1;
- }
-\&
- memset(&addr, 0, sizeof(addr));
- addr.sin_port = htons(listen_port);
- addr.sin_family = AF_INET;
- if (bind(lfd, (struct sockaddr *) &addr, sizeof(addr)) == \-1) {
- perror("bind");
- close(lfd);
- return \-1;
- }
-\&
- printf("accepting connections on port %d\en", listen_port);
- listen(lfd, 10);
- return lfd;
-}
-\&
-static int
-connect_socket(int connect_port, char *address)
-{
- int cfd;
- struct sockaddr_in addr;
-\&
- cfd = socket(AF_INET, SOCK_STREAM, 0);
- if (cfd == \-1) {
- perror("socket");
- return \-1;
- }
-\&
- memset(&addr, 0, sizeof(addr));
- addr.sin_port = htons(connect_port);
- addr.sin_family = AF_INET;
-\&
- if (!inet_aton(address, (struct in_addr *) &addr.sin_addr.s_addr)) {
- fprintf(stderr, "inet_aton(): bad IP address format\en");
- close(cfd);
- return \-1;
- }
-\&
- if (connect(cfd, (struct sockaddr *) &addr, sizeof(addr)) == \-1) {
- perror("connect()");
- shutdown(cfd, SHUT_RDWR);
- close(cfd);
- return \-1;
- }
- return cfd;
-}
-\&
-#define SHUT_FD1 do { \e
- if (fd1 >= 0) { \e
- shutdown(fd1, SHUT_RDWR); \e
- close(fd1); \e
- fd1 = \-1; \e
- } \e
- } while (0)
-\&
-#define SHUT_FD2 do { \e
- if (fd2 >= 0) { \e
- shutdown(fd2, SHUT_RDWR); \e
- close(fd2); \e
- fd2 = \-1; \e
- } \e
- } while (0)
-\&
-#define BUF_SIZE 1024
-\&
-int
-main(int argc, char *argv[])
-{
- int h;
- int ready, nfds;
- int fd1 = \-1, fd2 = \-1;
- int buf1_avail = 0, buf1_written = 0;
- int buf2_avail = 0, buf2_written = 0;
- char buf1[BUF_SIZE], buf2[BUF_SIZE];
- fd_set readfds, writefds, exceptfds;
- ssize_t nbytes;
-\&
- if (argc != 4) {
- fprintf(stderr, "Usage\en\etfwd <listen\-port> "
- "<forward\-to\-port> <forward\-to\-ip\-address>\en");
- exit(EXIT_FAILURE);
- }
-\&
- signal(SIGPIPE, SIG_IGN);
-\&
- forward_port = atoi(argv[2]);
-\&
- h = listen_socket(atoi(argv[1]));
- if (h == \-1)
- exit(EXIT_FAILURE);
-\&
- for (;;) {
- nfds = 0;
-\&
- FD_ZERO(&readfds);
- FD_ZERO(&writefds);
- FD_ZERO(&exceptfds);
- FD_SET(h, &readfds);
- nfds = max(nfds, h);
-\&
- if (fd1 > 0 && buf1_avail < BUF_SIZE)
- FD_SET(fd1, &readfds);
- /* Note: nfds is updated below, when fd1 is added to
- exceptfds. */
- if (fd2 > 0 && buf2_avail < BUF_SIZE)
- FD_SET(fd2, &readfds);
-\&
- if (fd1 > 0 && buf2_avail \- buf2_written > 0)
- FD_SET(fd1, &writefds);
- if (fd2 > 0 && buf1_avail \- buf1_written > 0)
- FD_SET(fd2, &writefds);
-\&
- if (fd1 > 0) {
- FD_SET(fd1, &exceptfds);
- nfds = max(nfds, fd1);
- }
- if (fd2 > 0) {
- FD_SET(fd2, &exceptfds);
- nfds = max(nfds, fd2);
- }
-\&
- ready = select(nfds + 1, &readfds, &writefds, &exceptfds, NULL);
-\&
- if (ready == \-1 && errno == EINTR)
- continue;
-\&
- if (ready == \-1) {
- perror("select()");
- exit(EXIT_FAILURE);
- }
-\&
- if (FD_ISSET(h, &readfds)) {
- socklen_t addrlen;
- struct sockaddr_in client_addr;
- int fd;
-\&
- addrlen = sizeof(client_addr);
- memset(&client_addr, 0, addrlen);
- fd = accept(h, (struct sockaddr *) &client_addr, &addrlen);
- if (fd == \-1) {
- perror("accept()");
- } else {
- SHUT_FD1;
- SHUT_FD2;
- buf1_avail = buf1_written = 0;
- buf2_avail = buf2_written = 0;
- fd1 = fd;
- fd2 = connect_socket(forward_port, argv[3]);
- if (fd2 == \-1)
- SHUT_FD1;
- else
- printf("connect from %s\en",
- inet_ntoa(client_addr.sin_addr));
-\&
- /* Skip any events on the old, closed file
- descriptors. */
-\&
- continue;
- }
- }
-\&
- /* NB: read OOB data before normal reads. */
-\&
- if (fd1 > 0 && FD_ISSET(fd1, &exceptfds)) {
- char c;
-\&
- nbytes = recv(fd1, &c, 1, MSG_OOB);
- if (nbytes < 1)
- SHUT_FD1;
- else
- send(fd2, &c, 1, MSG_OOB);
- }
- if (fd2 > 0 && FD_ISSET(fd2, &exceptfds)) {
- char c;
-\&
- nbytes = recv(fd2, &c, 1, MSG_OOB);
- if (nbytes < 1)
- SHUT_FD2;
- else
- send(fd1, &c, 1, MSG_OOB);
- }
- if (fd1 > 0 && FD_ISSET(fd1, &readfds)) {
- nbytes = read(fd1, buf1 + buf1_avail,
- BUF_SIZE \- buf1_avail);
- if (nbytes < 1)
- SHUT_FD1;
- else
- buf1_avail += nbytes;
- }
- if (fd2 > 0 && FD_ISSET(fd2, &readfds)) {
- nbytes = read(fd2, buf2 + buf2_avail,
- BUF_SIZE \- buf2_avail);
- if (nbytes < 1)
- SHUT_FD2;
- else
- buf2_avail += nbytes;
- }
- if (fd1 > 0 && FD_ISSET(fd1, &writefds) && buf2_avail > 0) {
- nbytes = write(fd1, buf2 + buf2_written,
- buf2_avail \- buf2_written);
- if (nbytes < 1)
- SHUT_FD1;
- else
- buf2_written += nbytes;
- }
- if (fd2 > 0 && FD_ISSET(fd2, &writefds) && buf1_avail > 0) {
- nbytes = write(fd2, buf1 + buf1_written,
- buf1_avail \- buf1_written);
- if (nbytes < 1)
- SHUT_FD2;
- else
- buf1_written += nbytes;
- }
-\&
- /* Check if write data has caught read data. */
-\&
- if (buf1_written == buf1_avail)
- buf1_written = buf1_avail = 0;
- if (buf2_written == buf2_avail)
- buf2_written = buf2_avail = 0;
-\&
- /* One side has closed the connection, keep
- writing to the other side until empty. */
-\&
- if (fd1 < 0 && buf1_avail \- buf1_written == 0)
- SHUT_FD2;
- if (fd2 < 0 && buf2_avail \- buf2_written == 0)
- SHUT_FD1;
- }
- exit(EXIT_SUCCESS);
-}
-.EE
-.\" SRC END
-.P
-The above program properly forwards most kinds of TCP connections
-including OOB signal data transmitted by \fBtelnet\fP servers.
-It handles the tricky problem of having data flow in both directions
-simultaneously.
-You might think it more efficient to use a
-.BR fork (2)
-call and devote a thread to each stream.
-This becomes more tricky than you might suspect.
-Another idea is to set nonblocking I/O using
-.BR fcntl (2).
-This also has its problems because you end up using
-inefficient timeouts.
-.P
-The program does not handle more than one simultaneous connection at a
-time, although it could easily be extended to do this with a linked list
-of buffers\[em]one for each connection.
-At the moment, new
-connections cause the current connection to be dropped.
-.SH SEE ALSO
-.BR accept (2),
-.BR connect (2),
-.BR poll (2),
-.BR read (2),
-.BR recv (2),
-.BR select (2),
-.BR send (2),
-.BR sigprocmask (2),
-.BR write (2),
-.BR epoll (7)
-.\" .SH AUTHORS
-.\" This man page was written by Paul Sheer.
diff --git a/man2/semctl.2 b/man2/semctl.2
deleted file mode 100644
index 243919c73..000000000
--- a/man2/semctl.2
+++ /dev/null
@@ -1,623 +0,0 @@
-'\" t
-.\" Copyright 1993 Giorgio Ciucci (giorgio@crcc.it)
-.\" and Copyright 2004, 2005 Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Modified Tue Oct 22 17:53:56 1996 by Eric S. Raymond <esr@thyrsus.com>
-.\" Modified Fri Jun 19 10:59:15 1998 by Andries Brouwer <aeb@cwi.nl>
-.\" Modified Sun Feb 18 01:59:29 2001 by Andries Brouwer <aeb@cwi.nl>
-.\" Modified 20 Dec 2001, Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Modified 21 Dec 2001, aeb
-.\" Modified 27 May 2004, Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Added notes on CAP_IPC_OWNER requirement
-.\" Modified 17 Jun 2004, Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Added notes on CAP_SYS_ADMIN requirement for IPC_SET and IPC_RMID
-.\" Modified, 11 Nov 2004, Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Language and formatting clean-ups
-.\" Rewrote semun text
-.\" Added semid_ds and ipc_perm structure definitions
-.\" 2005-08-02, mtk: Added IPC_INFO, SEM_INFO, SEM_STAT descriptions.
-.\" 2018-03-20, dbueso: Added SEM_STAT_ANY description.
-.\"
-.TH semctl 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-semctl \- System V semaphore control operations
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/sem.h>
-.P
-.BI "int semctl(int " semid ", int " semnum ", int " op ", ...);"
-.fi
-.SH DESCRIPTION
-.BR semctl ()
-performs the control operation specified by
-.I op
-on the System\ V semaphore set identified by
-.IR semid ,
-or on the
-.IR semnum -th
-semaphore of that set.
-(The semaphores in a set are numbered starting at 0.)
-.P
-This function has three or four arguments, depending on
-.IR op .
-When there are four, the fourth has the type
-.IR "union semun" .
-The \fIcalling program\fP must define this union as follows:
-.P
-.in +4n
-.EX
-union semun {
- int val; /* Value for SETVAL */
- struct semid_ds *buf; /* Buffer for IPC_STAT, IPC_SET */
- unsigned short *array; /* Array for GETALL, SETALL */
- struct seminfo *__buf; /* Buffer for IPC_INFO
- (Linux\-specific) */
-};
-.EE
-.in
-.P
-The
-.I semid_ds
-data structure is defined in \fI<sys/sem.h>\fP as follows:
-.P
-.in +4n
-.EX
-struct semid_ds {
- struct ipc_perm sem_perm; /* Ownership and permissions */
- time_t sem_otime; /* Last semop time */
- time_t sem_ctime; /* Creation time/time of last
- modification via semctl() */
- unsigned long sem_nsems; /* No. of semaphores in set */
-};
-.EE
-.in
-.P
-The fields of the
-.I semid_ds
-structure are as follows:
-.TP 11
-.I sem_perm
-This is an
-.I ipc_perm
-structure (see below) that specifies the access permissions on the semaphore
-set.
-.TP
-.I sem_otime
-Time of last
-.BR semop (2)
-system call.
-.TP
-.I sem_ctime
-Time of creation of semaphore set or time of last
-.BR semctl ()
-.BR IPCSET ,
-.BR SETVAL ,
-or
-.B SETALL
-operation.
-.TP
-.I sem_nsems
-Number of semaphores in the set.
-Each semaphore of the set is referenced by a nonnegative integer
-ranging from
-.B 0
-to
-.IR sem_nsems\-1 .
-.P
-The
-.I ipc_perm
-structure is defined as follows
-(the highlighted fields are settable using
-.BR IPC_SET ):
-.P
-.in +4n
-.EX
-struct ipc_perm {
- key_t __key; /* Key supplied to semget(2) */
- uid_t \fBuid\fP; /* Effective UID of owner */
- gid_t \fBgid\fP; /* Effective GID of owner */
- uid_t cuid; /* Effective UID of creator */
- gid_t cgid; /* Effective GID of creator */
- unsigned short \fBmode\fP; /* Permissions */
- unsigned short __seq; /* Sequence number */
-};
-.EE
-.in
-.P
-The least significant 9 bits of the
-.I mode
-field of the
-.I ipc_perm
-structure define the access permissions for the shared memory segment.
-The permission bits are as follows:
-.TS
-l l.
-0400 Read by user
-0200 Write by user
-0040 Read by group
-0020 Write by group
-0004 Read by others
-0002 Write by others
-.TE
-.P
-In effect, "write" means "alter" for a semaphore set.
-Bits 0100, 0010, and 0001 (the execute bits) are unused by the system.
-.P
-Valid values for
-.I op
-are:
-.TP
-.B IPC_STAT
-Copy information from the kernel data structure associated with
-.I semid
-into the
-.I semid_ds
-structure pointed to by
-.IR arg.buf .
-The argument
-.I semnum
-is ignored.
-The calling process must have read permission on the semaphore set.
-.TP
-.B IPC_SET
-Write the values of some members of the
-.I semid_ds
-structure pointed to by
-.I arg.buf
-to the kernel data structure associated with this semaphore set,
-updating also its
-.I sem_ctime
-member.
-.IP
-The following members of the structure are updated:
-.IR sem_perm.uid ,
-.IR sem_perm.gid ,
-and (the least significant 9 bits of)
-.IR sem_perm.mode .
-.IP
-The effective UID of the calling process must match the owner
-.RI ( sem_perm.uid )
-or creator
-.RI ( sem_perm.cuid )
-of the semaphore set, or the caller must be privileged.
-The argument
-.I semnum
-is ignored.
-.TP
-.B IPC_RMID
-Immediately remove the semaphore set,
-awakening all processes blocked in
-.BR semop (2)
-calls on the set (with an error return and
-.I errno
-set to
-.BR EIDRM ).
-The effective user ID of the calling process must
-match the creator or owner of the semaphore set,
-or the caller must be privileged.
-The argument
-.I semnum
-is ignored.
-.TP
-.BR IPC_INFO " (Linux\-specific)"
-Return information about system-wide semaphore limits and
-parameters in the structure pointed to by
-.IR arg.__buf .
-This structure is of type
-.IR seminfo ,
-defined in
-.I <sys/sem.h>
-if the
-.B _GNU_SOURCE
-feature test macro is defined:
-.IP
-.in +4n
-.EX
-struct seminfo {
- int semmap; /* Number of entries in semaphore
- map; unused within kernel */
- int semmni; /* Maximum number of semaphore sets */
- int semmns; /* Maximum number of semaphores in all
- semaphore sets */
- int semmnu; /* System\-wide maximum number of undo
- structures; unused within kernel */
- int semmsl; /* Maximum number of semaphores in a
- set */
- int semopm; /* Maximum number of operations for
- semop(2) */
- int semume; /* Maximum number of undo entries per
- process; unused within kernel */
- int semusz; /* Size of struct sem_undo */
- int semvmx; /* Maximum semaphore value */
- int semaem; /* Max. value that can be recorded for
- semaphore adjustment (SEM_UNDO) */
-};
-.EE
-.in
-.IP
-The
-.IR semmsl ,
-.IR semmns ,
-.IR semopm ,
-and
-.I semmni
-settings can be changed via
-.IR /proc/sys/kernel/sem ;
-see
-.BR proc (5)
-for details.
-.TP
-.BR SEM_INFO " (Linux-specific)"
-Return a
-.I seminfo
-structure containing the same information as for
-.BR IPC_INFO ,
-except that the following fields are returned with information
-about system resources consumed by semaphores: the
-.I semusz
-field returns the number of semaphore sets that currently exist
-on the system; and the
-.I semaem
-field returns the total number of semaphores in all semaphore sets
-on the system.
-.TP
-.BR SEM_STAT " (Linux-specific)"
-Return a
-.I semid_ds
-structure as for
-.BR IPC_STAT .
-However, the
-.I semid
-argument is not a semaphore identifier, but instead an index into
-the kernel's internal array that maintains information about
-all semaphore sets on the system.
-.TP
-.BR SEM_STAT_ANY " (Linux-specific, since Linux 4.17)"
-Return a
-.I semid_ds
-structure as for
-.BR SEM_STAT .
-However,
-.I sem_perm.mode
-is not checked for read access for
-.I semid
-meaning that any user can employ this operation (just as any user may read
-.I /proc/sysvipc/sem
-to obtain the same information).
-.TP
-.B GETALL
-Return
-.B semval
-(i.e., the current value)
-for all semaphores of the set into
-.IR arg.array .
-The argument
-.I semnum
-is ignored.
-The calling process must have read permission on the semaphore set.
-.TP
-.B GETNCNT
-Return the
-.B semncnt
-value for the
-.IR semnum \-th
-semaphore of the set
-(i.e., the number of processes waiting for the semaphore's value to increase).
-The calling process must have read permission on the semaphore set.
-.TP
-.B GETPID
-Return the
-.B sempid
-value for the
-.IR semnum \-th
-semaphore of the set.
-This is the PID of the process that last performed an operation on
-that semaphore (but see NOTES).
-The calling process must have read permission on the semaphore set.
-.TP
-.B GETVAL
-Return
-.B semval
-(i.e., the semaphore value) for the
-.IR semnum \-th
-semaphore of the set.
-The calling process must have read permission on the semaphore set.
-.TP
-.B GETZCNT
-Return the
-.B semzcnt
-value for the
-.IR semnum \-th
-semaphore of the set
-(i.e., the number of processes waiting for the semaphore value to become 0).
-The calling process must have read permission on the semaphore set.
-.TP
-.B SETALL
-Set the
-.B semval
-values for all semaphores of the set using
-.IR arg.array ,
-updating also the
-.I sem_ctime
-member of the
-.I semid_ds
-structure associated with the set.
-Undo entries (see
-.BR semop (2))
-are cleared for altered semaphores in all processes.
-If the changes to semaphore values would permit blocked
-.BR semop (2)
-calls in other processes to proceed, then those processes are woken up.
-The argument
-.I semnum
-is ignored.
-The calling process must have alter (write) permission on
-the semaphore set.
-.TP
-.B SETVAL
-Set the semaphore value
-.RB ( semval )
-to
-.I arg.val
-for the
-.IR semnum \-th
-semaphore of the set, updating also the
-.I sem_ctime
-member of the
-.I semid_ds
-structure associated with the set.
-Undo entries are cleared for altered semaphores in all processes.
-If the changes to semaphore values would permit blocked
-.BR semop (2)
-calls in other processes to proceed, then those processes are woken up.
-The calling process must have alter permission on the semaphore set.
-.SH RETURN VALUE
-On success,
-.BR semctl ()
-returns a nonnegative value depending on
-.I op
-as follows:
-.TP
-.B GETNCNT
-the value of
-.BR semncnt .
-.TP
-.B GETPID
-the value of
-.BR sempid .
-.TP
-.B GETVAL
-the value of
-.BR semval .
-.TP
-.B GETZCNT
-the value of
-.BR semzcnt .
-.TP
-.B IPC_INFO
-the index of the highest used entry in the
-kernel's internal array recording information about all
-semaphore sets.
-(This information can be used with repeated
-.B SEM_STAT
-or
-.B SEM_STAT_ANY
-operations to obtain information about all semaphore sets on the system.)
-.TP
-.B SEM_INFO
-as for
-.BR IPC_INFO .
-.TP
-.B SEM_STAT
-the identifier of the semaphore set whose index was given in
-.IR semid .
-.TP
-.B SEM_STAT_ANY
-as for
-.BR SEM_STAT .
-.P
-All other
-.I op
-values return 0 on success.
-.P
-On failure,
-.BR semctl ()
-returns \-1 and sets
-.I errno
-to indicate the error.
-.SH ERRORS
-.TP
-.B EACCES
-The argument
-.I op
-has one of the values
-.BR GETALL ,
-.BR GETPID ,
-.BR GETVAL ,
-.BR GETNCNT ,
-.BR GETZCNT ,
-.BR IPC_STAT ,
-.BR SEM_STAT ,
-.BR SEM_STAT_ANY ,
-.BR SETALL ,
-or
-.B SETVAL
-and the calling process does not have the required
-permissions on the semaphore set and does not have the
-.B CAP_IPC_OWNER
-capability in the user namespace that governs its IPC namespace.
-.TP
-.B EFAULT
-The address pointed to by
-.I arg.buf
-or
-.I arg.array
-isn't accessible.
-.TP
-.B EIDRM
-The semaphore set was removed.
-.TP
-.B EINVAL
-Invalid value for
-.I op
-or
-.IR semid .
-Or: for a
-.B SEM_STAT
-operation, the index value specified in
-.I semid
-referred to an array slot that is currently unused.
-.TP
-.B EPERM
-The argument
-.I op
-has the value
-.B IPC_SET
-or
-.B IPC_RMID
-but the effective user ID of the calling process is not the creator
-(as found in
-.IR sem_perm.cuid )
-or the owner
-(as found in
-.IR sem_perm.uid )
-of the semaphore set,
-and the process does not have the
-.B CAP_SYS_ADMIN
-capability.
-.TP
-.B ERANGE
-The argument
-.I op
-has the value
-.B SETALL
-or
-.B SETVAL
-and the value to which
-.B semval
-is to be set (for some semaphore of the set) is less than 0
-or greater than the implementation limit
-.BR SEMVMX .
-.SH VERSIONS
-POSIX.1 specifies the
-.\" POSIX.1-2001, POSIX.1-2008
-.I sem_nsems
-field of the
-.I semid_ds
-structure as having the type
-.IR "unsigned\ short" ,
-and the field is so defined on most other systems.
-It was also so defined on Linux 2.2 and earlier,
-but, since Linux 2.4, the field has the type
-.IR "unsigned\ long" .
-.\"
-.SS The sempid value
-POSIX.1 defines
-.I sempid
-as the "process ID of [the] last operation" on a semaphore,
-and explicitly notes that this value is set by a successful
-.BR semop (2)
-call, with the implication that no other interface affects the
-.I sempid
-value.
-.P
-While some implementations conform to the behavior specified in POSIX.1,
-others do not.
-(The fault here probably lies with POSIX.1 inasmuch as it likely failed
-to capture the full range of existing implementation behaviors.)
-Various other implementations
-.\" At least OpenSolaris (and, one supposes, older Solaris) and Darwin
-also update
-.I sempid
-for the other operations that update the value of a semaphore: the
-.B SETVAL
-and
-.B SETALL
-operations, as well as the semaphore adjustments performed
-on process termination as a consequence of the use of the
-.B SEM_UNDO
-flag (see
-.BR semop (2)).
-.P
-Linux also updates
-.I sempid
-for
-.B SETVAL
-operations and semaphore adjustments.
-However, somewhat inconsistently, up to and including Linux 4.5,
-the kernel did not update
-.I sempid
-for
-.B SETALL
-operations.
-This was rectified
-.\" commit a5f4db877177d2a3d7ae62a7bac3a5a27e083d7f
-in Linux 4.6.
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-POSIX.1-2001, SVr4.
-.\" SVr4 documents more error conditions EINVAL and EOVERFLOW.
-.P
-Various fields in a \fIstruct semid_ds\fP were typed as
-.I short
-under Linux 2.2
-and have become
-.I long
-under Linux 2.4.
-To take advantage of this,
-a recompilation under glibc-2.1.91 or later should suffice.
-(The kernel distinguishes old and new calls by an
-.B IPC_64
-flag in
-.IR op .)
-.P
-In some earlier versions of glibc, the
-.I semun
-union was defined in \fI<sys/sem.h>\fP, but POSIX.1 requires
-.\" POSIX.1-2001, POSIX.1-2008
-that the caller define this union.
-On versions of glibc where this union is \fInot\fP defined,
-the macro
-.B _SEM_SEMUN_UNDEFINED
-is defined in \fI<sys/sem.h>\fP.
-.SH NOTES
-The
-.BR IPC_INFO ,
-.BR SEM_STAT ,
-and
-.B SEM_INFO
-operations are used by the
-.BR ipcs (1)
-program to provide information on allocated resources.
-In the future these may modified or moved to a
-.I /proc
-filesystem interface.
-.P
-The following system limit on semaphore sets affects a
-.BR semctl ()
-call:
-.TP
-.B SEMVMX
-Maximum value for
-.BR semval :
-implementation dependent (32767).
-.P
-For greater portability, it is best to always call
-.BR semctl ()
-with four arguments.
-.SH EXAMPLES
-See
-.BR shmop (2).
-.SH SEE ALSO
-.BR ipc (2),
-.BR semget (2),
-.BR semop (2),
-.BR capabilities (7),
-.BR sem_overview (7),
-.BR sysvipc (7)
diff --git a/man2/semget.2 b/man2/semget.2
deleted file mode 100644
index 167957b3a..000000000
--- a/man2/semget.2
+++ /dev/null
@@ -1,434 +0,0 @@
-.\" Copyright 1993 Giorgio Ciucci (giorgio@crcc.it)
-.\" and Copyright (C) 2020 Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Modified Tue Oct 22 17:54:56 1996 by Eric S. Raymond <esr@thyrsus.com>
-.\" Modified 1 Jan 2002, Martin Schulze <joey@infodrom.org>
-.\" Modified 4 Jan 2002, Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Modified, 27 May 2004, Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Added notes on capability requirements
-.\" Modified, 11 Nov 2004, Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Language and formatting clean-ups
-.\" Added notes on /proc files
-.\" Rewrote BUGS note about semget()'s failure to initialize
-.\" semaphore values
-.\"
-.TH semget 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-semget \- get a System V semaphore set identifier
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/sem.h>
-.fi
-.P
-.BI "int semget(key_t " key ,
-.BI "int " nsems ,
-.BI "int " semflg );
-.SH DESCRIPTION
-The
-.BR semget ()
-system call returns the System\ V semaphore set identifier
-associated with the argument
-.IR key .
-It may be used either to obtain the identifier of a previously created
-semaphore set (when
-.I semflg
-is zero and
-.I key
-does not have the value
-.BR IPC_PRIVATE ),
-or to create a new set.
-.P
-A new set of
-.I nsems
-semaphores is created if
-.I key
-has the value
-.B IPC_PRIVATE
-or if no existing semaphore set is associated with
-.I key
-and
-.B IPC_CREAT
-is specified in
-.IR semflg .
-.P
-If
-.I semflg
-specifies both
-.B IPC_CREAT
-and
-.B IPC_EXCL
-and a semaphore set already exists for
-.IR key ,
-then
-.BR semget ()
-fails with
-.I errno
-set to
-.BR EEXIST .
-(This is analogous to the effect of the combination
-.B O_CREAT | O_EXCL
-for
-.BR open (2).)
-.P
-Upon creation, the least significant 9 bits of the argument
-.I semflg
-define the permissions (for owner, group, and others)
-for the semaphore set.
-These bits have the same format, and the same
-meaning, as the
-.I mode
-argument of
-.BR open (2)
-(though the execute permissions are
-not meaningful for semaphores, and write permissions mean permission
-to alter semaphore values).
-.P
-When creating a new semaphore set,
-.BR semget ()
-initializes the set's associated data structure,
-.I semid_ds
-(see
-.BR semctl (2)),
-as follows:
-.IP \[bu] 3
-.I sem_perm.cuid
-and
-.I sem_perm.uid
-are set to the effective user ID of the calling process.
-.IP \[bu]
-.I sem_perm.cgid
-and
-.I sem_perm.gid
-are set to the effective group ID of the calling process.
-.IP \[bu]
-The least significant 9 bits of
-.I sem_perm.mode
-are set to the least significant 9 bits of
-.IR semflg .
-.IP \[bu]
-.I sem_nsems
-is set to the value of
-.IR nsems .
-.IP \[bu]
-.I sem_otime
-is set to 0.
-.IP \[bu]
-.I sem_ctime
-is set to the current time.
-.P
-The argument
-.I nsems
-can be 0
-(a don't care)
-when a semaphore set is not being created.
-Otherwise,
-.I nsems
-must be greater than 0
-and less than or equal to the maximum number of semaphores per semaphore set
-.RB ( SEMMSL ).
-.P
-If the semaphore set already exists, the permissions are
-verified.
-.\" and a check is made to see if it is marked for destruction.
-.SH RETURN VALUE
-On success,
-.BR semget ()
-returns the semaphore set identifier (a nonnegative integer).
-On failure, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EACCES
-A semaphore set exists for
-.IR key ,
-but the calling process does not have permission to access the set,
-and does not have the
-.B CAP_IPC_OWNER
-capability in the user namespace that governs its IPC namespace.
-.TP
-.B EEXIST
-.B IPC_CREAT
-and
-.B IPC_EXCL
-were specified in
-.IR semflg ,
-but a semaphore set already exists for
-.IR key .
-.\" .TP
-.\" .B EIDRM
-.\" The semaphore set is marked to be deleted.
-.TP
-.B EINVAL
-.I nsems
-is less than 0 or greater than the limit on the number
-of semaphores per semaphore set
-.RB ( SEMMSL ).
-.TP
-.B EINVAL
-A semaphore set corresponding to
-.I key
-already exists, but
-.I nsems
-is larger than the number of semaphores in that set.
-.TP
-.B ENOENT
-No semaphore set exists for
-.I key
-and
-.I semflg
-did not specify
-.BR IPC_CREAT .
-.TP
-.B ENOMEM
-A semaphore set has to be created but the system does not have
-enough memory for the new data structure.
-.TP
-.B ENOSPC
-A semaphore set has to be created but the system limit for the maximum
-number of semaphore sets
-.RB ( SEMMNI ),
-or the system wide maximum number of semaphores
-.RB ( SEMMNS ),
-would be exceeded.
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-SVr4, POSIX.1-2001.
-.\" SVr4 documents additional error conditions EFBIG, E2BIG, EAGAIN,
-.\" ERANGE, EFAULT.
-.SH NOTES
-.B IPC_PRIVATE
-isn't a flag field but a
-.I key_t
-type.
-If this special value is used for
-.IR key ,
-the system call ignores all but the least significant 9 bits of
-.I semflg
-and creates a new semaphore set (on success).
-.\"
-.SS Semaphore initialization
-The values of the semaphores in a newly created set are indeterminate.
-(POSIX.1-2001 and POSIX.1-2008 are explicit on this point,
-although POSIX.1-2008 notes that a future version of the standard
-may require an implementation to initialize the semaphores to 0.)
-Although Linux, like many other implementations,
-initializes the semaphore values to 0,
-a portable application cannot rely on this:
-it should explicitly initialize the semaphores to the desired values.
-.\" In truth, every one of the many implementations that I've tested sets
-.\" the values to zero, but I suppose there is/was some obscure
-.\" implementation out there that does not.
-.P
-Initialization can be done using
-.BR semctl (2)
-.B SETVAL
-or
-.B SETALL
-operation.
-Where multiple peers do not know who will be the first to
-initialize the set, checking for a nonzero
-.I sem_otime
-in the associated data structure retrieved by a
-.BR semctl (2)
-.B IPC_STAT
-operation can be used to avoid races.
-.\"
-.SS Semaphore limits
-The following limits on semaphore set resources affect the
-.BR semget ()
-call:
-.TP
-.B SEMMNI
-System-wide limit on the number of semaphore sets.
-Before Linux 3.19,
-the default value for this limit was 128.
-Since Linux 3.19,
-.\" commit e843e7d2c88b7db107a86bd2c7145dc715c058f4
-the default value is 32,000.
-On Linux, this limit can be read and modified via the fourth field of
-.IR /proc/sys/kernel/sem .
-.\" This /proc file is not available in Linux 2.2 and earlier -- MTK
-.TP
-.B SEMMSL
-Maximum number of semaphores per semaphore ID.
-Before Linux 3.19,
-the default value for this limit was 250.
-Since Linux 3.19,
-.\" commit e843e7d2c88b7db107a86bd2c7145dc715c058f4
-the default value is 32,000.
-On Linux, this limit can be read and modified via the first field of
-.IR /proc/sys/kernel/sem .
-.TP
-.B SEMMNS
-System-wide limit on the number of semaphores: policy dependent
-(on Linux, this limit can be read and modified via the second field of
-.IR /proc/sys/kernel/sem ).
-Note that the number of semaphores system-wide
-is also limited by the product of
-.B SEMMSL
-and
-.BR SEMMNI .
-.SH BUGS
-The name choice
-.B IPC_PRIVATE
-was perhaps unfortunate,
-.B IPC_NEW
-would more clearly show its function.
-.SH EXAMPLES
-The program shown below uses
-.BR semget ()
-to create a new semaphore set or retrieve the ID of an existing set.
-It generates the
-.I key
-for
-.BR semget ()
-using
-.BR ftok (3).
-The first two command-line arguments are used as the
-.I pathname
-and
-.I proj_id
-arguments for
-.BR ftok (3).
-The third command-line argument is an integer that specifies the
-.I nsems
-argument for
-.BR semget ().
-Command-line options can be used to specify the
-.B IPC_CREAT
-.RI ( \-c )
-and
-.B IPC_EXCL
-.RI ( \-x )
-flags for the call to
-.BR semget ().
-The usage of this program is demonstrated below.
-.P
-We first create two files that will be used to generate keys using
-.BR ftok (3),
-create two semaphore sets using those files, and then list the sets using
-.BR ipcs (1):
-.P
-.in +4n
-.EX
-$ \fBtouch mykey mykey2\fP
-$ \fB./t_semget \-c mykey p 1\fP
-ID = 9
-$ \fB./t_semget \-c mykey2 p 2\fP
-ID = 10
-$ \fBipcs \-s\fP
-\&
-\-\-\-\-\-\- Semaphore Arrays \-\-\-\-\-\-\-\-
-key semid owner perms nsems
-0x7004136d 9 mtk 600 1
-0x70041368 10 mtk 600 2
-.EE
-.in
-.P
-Next, we demonstrate that when
-.BR semctl (2)
-is given the same
-.I key
-(as generated by the same arguments to
-.BR ftok (3)),
-it returns the ID of the already existing semaphore set:
-.P
-.in +4n
-.EX
-$ \fB./t_semget \-c mykey p 1\fP
-ID = 9
-.EE
-.in
-.P
-Finally, we demonstrate the kind of collision that can occur when
-.BR ftok (3)
-is given different
-.I pathname
-arguments that have the same inode number:
-.P
-.in +4n
-.EX
-$ \fBln mykey link\fP
-$ \fBls \-i1 link mykey\fP
-2233197 link
-2233197 mykey
-$ \fB./t_semget link p 1\fP # Generates same key as \[aq]mykey\[aq]
-ID = 9
-.EE
-.in
-.SS Program source
-\&
-.\" SRC BEGIN (t_semget.c)
-.EX
-/* t_semget.c
-\&
- Licensed under GNU General Public License v2 or later.
-*/
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/ipc.h>
-#include <sys/sem.h>
-#include <unistd.h>
-\&
-static void
-usage(const char *pname)
-{
- fprintf(stderr, "Usage: %s [\-cx] pathname proj\-id num\-sems\en",
- pname);
- fprintf(stderr, " \-c Use IPC_CREAT flag\en");
- fprintf(stderr, " \-x Use IPC_EXCL flag\en");
- exit(EXIT_FAILURE);
-}
-\&
-int
-main(int argc, char *argv[])
-{
- int semid, nsems, flags, opt;
- key_t key;
-\&
- flags = 0;
- while ((opt = getopt(argc, argv, "cx")) != \-1) {
- switch (opt) {
- case \[aq]c\[aq]: flags |= IPC_CREAT; break;
- case \[aq]x\[aq]: flags |= IPC_EXCL; break;
- default: usage(argv[0]);
- }
- }
-\&
- if (argc != optind + 3)
- usage(argv[0]);
-\&
- key = ftok(argv[optind], argv[optind + 1][0]);
- if (key == \-1) {
- perror("ftok");
- exit(EXIT_FAILURE);
- }
-\&
- nsems = atoi(argv[optind + 2]);
-\&
- semid = semget(key, nsems, flags | 0600);
- if (semid == \-1) {
- perror("semget");
- exit(EXIT_FAILURE);
- }
-\&
- printf("ID = %d\en", semid);
-\&
- exit(EXIT_SUCCESS);
-}
-.EE
-.\" SRC END
-.SH SEE ALSO
-.BR semctl (2),
-.BR semop (2),
-.BR ftok (3),
-.BR capabilities (7),
-.BR sem_overview (7),
-.BR sysvipc (7)
diff --git a/man2/semop.2 b/man2/semop.2
deleted file mode 100644
index 92bfec80a..000000000
--- a/man2/semop.2
+++ /dev/null
@@ -1,523 +0,0 @@
-.\" Copyright 1993 Giorgio Ciucci (giorgio@crcc.it)
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Modified 1996-10-22, Eric S. Raymond <esr@thyrsus.com>
-.\" Modified 2002-01-08, Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Modified 2003-04-28, Ernie Petrides <petrides@redhat.com>
-.\" Modified 2004-05-27, Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Modified, 11 Nov 2004, Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Language and formatting clean-ups
-.\" Added notes on /proc files
-.\" 2005-04-08, mtk, Noted kernel version numbers for semtimedop()
-.\" 2007-07-09, mtk, Added an EXAMPLE code segment.
-.\"
-.TH semop 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-semop, semtimedop \- System V semaphore operations
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/sem.h>
-.P
-.BI "int semop(int " semid ", struct sembuf *" sops ", size_t " nsops );
-.BI "int semtimedop(int " semid ", struct sembuf *" sops ", size_t " nsops ,
-.BI " const struct timespec *_Nullable " timeout );
-.fi
-.P
-.RS -4
-Feature Test Macro Requirements for glibc (see
-.BR feature_test_macros (7)):
-.RE
-.P
-.BR semtimedop ():
-.nf
- _GNU_SOURCE
-.fi
-.SH DESCRIPTION
-Each semaphore in a System\ V semaphore set
-has the following associated values:
-.P
-.in +4n
-.EX
-unsigned short semval; /* semaphore value */
-unsigned short semzcnt; /* # waiting for zero */
-unsigned short semncnt; /* # waiting for increase */
-pid_t sempid; /* PID of process that last
- modified the semaphore value */
-.EE
-.in
-.P
-.BR semop ()
-performs operations on selected semaphores in the set indicated by
-.IR semid .
-Each of the
-.I nsops
-elements in the array pointed to by
-.I sops
-is a structure that
-specifies an operation to be performed on a single semaphore.
-The elements of this structure are of type
-.IR "struct sembuf" ,
-containing the following members:
-.P
-.in +4n
-.EX
-unsigned short sem_num; /* semaphore number */
-short sem_op; /* semaphore operation */
-short sem_flg; /* operation flags */
-.EE
-.in
-.P
-Flags recognized in
-.I sem_flg
-are
-.B IPC_NOWAIT
-and
-.BR SEM_UNDO .
-If an operation specifies
-.BR SEM_UNDO ,
-it will be automatically undone when the process terminates.
-.P
-The set of operations contained in
-.I sops
-is performed in
-.IR "array order" ,
-and
-.IR atomically ,
-that is, the operations are performed either as a complete unit,
-or not at all.
-The behavior of the system call if not all operations can be
-performed immediately depends on the presence of the
-.B IPC_NOWAIT
-flag in the individual
-.I sem_flg
-fields, as noted below.
-.P
-Each operation is performed on the
-.IR sem_num \-th
-semaphore of the semaphore set, where the first semaphore of the set
-is numbered 0.
-There are three types of operation, distinguished by the value of
-.IR sem_op .
-.P
-If
-.I sem_op
-is a positive integer, the operation adds this value to
-the semaphore value
-.RI ( semval ).
-Furthermore, if
-.B SEM_UNDO
-is specified for this operation, the system subtracts the value
-.I sem_op
-from the semaphore adjustment
-.RI ( semadj )
-value for this semaphore.
-This operation can always proceed\[em]it never forces a thread to wait.
-The calling process must have alter permission on the semaphore set.
-.P
-If
-.I sem_op
-is zero, the process must have read permission on the semaphore
-set.
-This is a "wait-for-zero" operation: if
-.I semval
-is zero, the operation can immediately proceed.
-Otherwise, if
-.B IPC_NOWAIT
-is specified in
-.IR sem_flg ,
-.BR semop ()
-fails with
-.I errno
-set to
-.B EAGAIN
-(and none of the operations in
-.I sops
-is performed).
-Otherwise,
-.I semzcnt
-(the count of threads waiting until this semaphore's value becomes zero)
-is incremented by one and the thread sleeps until
-one of the following occurs:
-.IP \[bu] 3
-.I semval
-becomes 0, at which time the value of
-.I semzcnt
-is decremented.
-.IP \[bu]
-The semaphore set
-is removed:
-.BR semop ()
-fails, with
-.I errno
-set to
-.BR EIDRM .
-.IP \[bu]
-The calling thread catches a signal:
-the value of
-.I semzcnt
-is decremented and
-.BR semop ()
-fails, with
-.I errno
-set to
-.BR EINTR .
-.P
-If
-.I sem_op
-is less than zero, the process must have alter permission on the
-semaphore set.
-If
-.I semval
-is greater than or equal to the absolute value of
-.IR sem_op ,
-the operation can proceed immediately:
-the absolute value of
-.I sem_op
-is subtracted from
-.IR semval ,
-and, if
-.B SEM_UNDO
-is specified for this operation, the system adds the absolute value of
-.I sem_op
-to the semaphore adjustment
-.RI ( semadj )
-value for this semaphore.
-If the absolute value of
-.I sem_op
-is greater than
-.IR semval ,
-and
-.B IPC_NOWAIT
-is specified in
-.IR sem_flg ,
-.BR semop ()
-fails, with
-.I errno
-set to
-.B EAGAIN
-(and none of the operations in
-.I sops
-is performed).
-Otherwise,
-.I semncnt
-(the counter of threads waiting for this semaphore's value to increase)
-is incremented by one and the thread sleeps until
-one of the following occurs:
-.IP \[bu] 3
-.I semval
-becomes greater than or equal to the absolute value of
-.IR sem_op :
-the operation now proceeds, as described above.
-.IP \[bu]
-The semaphore set is removed from the system:
-.BR semop ()
-fails, with
-.I errno
-set to
-.BR EIDRM .
-.IP \[bu]
-The calling thread catches a signal:
-the value of
-.I semncnt
-is decremented and
-.BR semop ()
-fails, with
-.I errno
-set to
-.BR EINTR .
-.P
-On successful completion, the
-.I sempid
-value for each semaphore specified in the array pointed to by
-.I sops
-is set to the caller's process ID.
-In addition, the
-.I sem_otime
-.\" and
-.\" .I sem_ctime
-is set to the current time.
-.SS semtimedop()
-.BR semtimedop ()
-behaves identically to
-.BR semop ()
-except that in those cases where the calling thread would sleep,
-the duration of that sleep is limited by the amount of elapsed
-time specified by the
-.I timespec
-structure whose address is passed in the
-.I timeout
-argument.
-(This sleep interval will be rounded up to the system clock granularity,
-and kernel scheduling delays mean that the interval
-may overrun by a small amount.)
-If the specified time limit has been reached,
-.BR semtimedop ()
-fails with
-.I errno
-set to
-.B EAGAIN
-(and none of the operations in
-.I sops
-is performed).
-If the
-.I timeout
-argument is NULL,
-then
-.BR semtimedop ()
-behaves exactly like
-.BR semop ().
-.P
-Note that if
-.BR semtimedop ()
-is interrupted by a signal, causing the call to fail with the error
-.BR EINTR ,
-the contents of
-.I timeout
-are left unchanged.
-.SH RETURN VALUE
-On success,
-.BR semop ()
-and
-.BR semtimedop ()
-return 0.
-On failure, they return \-1, and set
-.I errno
-to indicate the error.
-.SH ERRORS
-.TP
-.B E2BIG
-The argument
-.I nsops
-is greater than
-.BR SEMOPM ,
-the maximum number of operations allowed per system
-call.
-.TP
-.B EACCES
-The calling process does not have the permissions required
-to perform the specified semaphore operations,
-and does not have the
-.B CAP_IPC_OWNER
-capability in the user namespace that governs its IPC namespace.
-.TP
-.B EAGAIN
-An operation could not proceed immediately and either
-.B IPC_NOWAIT
-was specified in
-.I sem_flg
-or the time limit specified in
-.I timeout
-expired.
-.TP
-.B EFAULT
-An address specified in either the
-.I sops
-or the
-.I timeout
-argument isn't accessible.
-.TP
-.B EFBIG
-For some operation the value of
-.I sem_num
-is less than 0 or greater than or equal to the number
-of semaphores in the set.
-.TP
-.B EIDRM
-The semaphore set was removed.
-.TP
-.B EINTR
-While blocked in this system call, the thread caught a signal; see
-.BR signal (7).
-.TP
-.B EINVAL
-The semaphore set doesn't exist, or
-.I semid
-is less than zero, or
-.I nsops
-has a nonpositive value.
-.TP
-.B ENOMEM
-The
-.I sem_flg
-of some operation specified
-.B SEM_UNDO
-and the system does not have enough memory to allocate the undo
-structure.
-.TP
-.B ERANGE
-For some operation
-.I sem_op+semval
-is greater than
-.BR SEMVMX ,
-the implementation dependent maximum value for
-.IR semval .
-.SH STANDARDS
-POSIX.1-2008.
-.SH VERSIONS
-Linux 2.5.52 (backported into Linux 2.4.22),
-glibc 2.3.3.
-POSIX.1-2001, SVr4.
-.\" SVr4 documents additional error conditions EINVAL, EFBIG, ENOSPC.
-.SH NOTES
-The
-.I sem_undo
-structures of a process aren't inherited by the child produced by
-.BR fork (2),
-but they are inherited across an
-.BR execve (2)
-system call.
-.P
-.BR semop ()
-is never automatically restarted after being interrupted by a signal handler,
-regardless of the setting of the
-.B SA_RESTART
-flag when establishing a signal handler.
-.P
-A semaphore adjustment
-.RI ( semadj )
-value is a per-process, per-semaphore integer that is the negated sum
-of all operations performed on a semaphore specifying the
-.B SEM_UNDO
-flag.
-Each process has a list of
-.I semadj
-values\[em]one value for each semaphore on which it has operated using
-.BR SEM_UNDO .
-When a process terminates, each of its per-semaphore
-.I semadj
-values is added to the corresponding semaphore,
-thus undoing the effect of that process's operations on the semaphore
-(but see BUGS below).
-When a semaphore's value is directly set using the
-.B SETVAL
-or
-.B SETALL
-request to
-.BR semctl (2),
-the corresponding
-.I semadj
-values in all processes are cleared.
-The
-.BR clone (2)
-.B CLONE_SYSVSEM
-flag allows more than one process to share a
-.I semadj
-list; see
-.BR clone (2)
-for details.
-.P
-The \fIsemval\fP, \fIsempid\fP, \fIsemzcnt\fP, and \fIsemnct\fP values
-for a semaphore can all be retrieved using appropriate
-.BR semctl (2)
-calls.
-.SS Semaphore limits
-The following limits on semaphore set resources affect the
-.BR semop ()
-call:
-.TP
-.B SEMOPM
-Maximum number of operations allowed for one
-.BR semop ()
-call.
-Before Linux 3.19,
-.\" commit e843e7d2c88b7db107a86bd2c7145dc715c058f4
-the default value for this limit was 32.
-Since Linux 3.19, the default value is 500.
-On Linux, this limit can be read and modified via the third field of
-.IR /proc/sys/kernel/sem .
-.\" This /proc file is not available in Linux 2.2 and earlier -- MTK
-.IR Note :
-this limit should not be raised above 1000,
-.\" See comment in Linux 3.19 source file include/uapi/linux/sem.h
-because of the risk of that
-.BR semop ()
-fails due to kernel memory fragmentation when allocating memory to copy the
-.I sops
-array.
-.TP
-.B SEMVMX
-Maximum allowable value for
-.IR semval :
-implementation dependent (32767).
-.P
-The implementation has no intrinsic limits for
-the adjust on exit maximum value
-.RB ( SEMAEM ),
-the system wide maximum number of undo structures
-.RB ( SEMMNU )
-and the per-process maximum number of undo entries system parameters.
-.SH BUGS
-When a process terminates, its set of associated
-.I semadj
-structures is used to undo the effect of all of the
-semaphore operations it performed with the
-.B SEM_UNDO
-flag.
-This raises a difficulty: if one (or more) of these semaphore adjustments
-would result in an attempt to decrease a semaphore's value below zero,
-what should an implementation do?
-One possible approach would be to block until all the semaphore
-adjustments could be performed.
-This is however undesirable since it could force process termination to
-block for arbitrarily long periods.
-Another possibility is that such semaphore adjustments could be ignored
-altogether (somewhat analogously to failing when
-.B IPC_NOWAIT
-is specified for a semaphore operation).
-Linux adopts a third approach: decreasing the semaphore value
-as far as possible (i.e., to zero) and allowing process
-termination to proceed immediately.
-.P
-In Linux 2.6.x, x <= 10, there is a bug that in some circumstances
-prevents a thread that is waiting for a semaphore value to become
-zero from being woken up when the value does actually become zero.
-This bug is fixed in Linux 2.6.11.
-.\" The bug report:
-.\" http://marc.theaimsgroup.com/?l=linux-kernel&m=110260821123863&w=2
-.\" the fix:
-.\" http://marc.theaimsgroup.com/?l=linux-kernel&m=110261701025794&w=2
-.SH EXAMPLES
-The following code segment uses
-.BR semop ()
-to atomically wait for the value of semaphore 0 to become zero,
-and then increment the semaphore value by one.
-.P
-.in +4n
-.EX
-struct sembuf sops[2];
-int semid;
-\&
-/* Code to set \fIsemid\fP omitted */
-\&
-sops[0].sem_num = 0; /* Operate on semaphore 0 */
-sops[0].sem_op = 0; /* Wait for value to equal 0 */
-sops[0].sem_flg = 0;
-\&
-sops[1].sem_num = 0; /* Operate on semaphore 0 */
-sops[1].sem_op = 1; /* Increment value by one */
-sops[1].sem_flg = 0;
-\&
-if (semop(semid, sops, 2) == \-1) {
- perror("semop");
- exit(EXIT_FAILURE);
-}
-.EE
-.in
-.P
-A further example of the use of
-.BR semop ()
-can be found in
-.BR shmop (2).
-.SH SEE ALSO
-.BR clone (2),
-.BR semctl (2),
-.BR semget (2),
-.BR sigaction (2),
-.BR capabilities (7),
-.BR sem_overview (7),
-.BR sysvipc (7),
-.BR time (7)
diff --git a/man2/semtimedop.2 b/man2/semtimedop.2
deleted file mode 100644
index 8a4061874..000000000
--- a/man2/semtimedop.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/semop.2
diff --git a/man2/send.2 b/man2/send.2
deleted file mode 100644
index f3de06e4c..000000000
--- a/man2/send.2
+++ /dev/null
@@ -1,506 +0,0 @@
-.\" Copyright (c) 1983, 1991 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" SPDX-License-Identifier: BSD-4-Clause-UC
-.\"
-.\" Modified 1993-07-24 by Rik Faith <faith@cs.unc.edu>
-.\" Modified 1996-10-22 by Eric S. Raymond <esr@thyrsus.com>
-.\" Modified Oct 1998 by Andi Kleen
-.\" Modified Oct 2003 by aeb
-.\" Modified 2004-07-01 by mtk
-.\"
-.TH send 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-send, sendto, sendmsg \- send a message on a socket
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/socket.h>
-.P
-.BI "ssize_t send(int " sockfd ", const void " buf [. len "], size_t " len \
-", int " flags );
-.BI "ssize_t sendto(int " sockfd ", const void " buf [. len "], size_t " len \
-", int " flags ,
-.BI " const struct sockaddr *" dest_addr ", socklen_t " addrlen );
-.BI "ssize_t sendmsg(int " sockfd ", const struct msghdr *" msg \
-", int " flags );
-.fi
-.SH DESCRIPTION
-The system calls
-.BR send (),
-.BR sendto (),
-and
-.BR sendmsg ()
-are used to transmit a message to another socket.
-.P
-The
-.BR send ()
-call may be used only when the socket is in a
-.I connected
-state (so that the intended recipient is known).
-The only difference between
-.BR send ()
-and
-.BR write (2)
-is the presence of
-.IR flags .
-With a zero
-.I flags
-argument,
-.BR send ()
-is equivalent to
-.BR write (2).
-Also, the following call
-.P
-.in +4n
-.EX
-send(sockfd, buf, len, flags);
-.EE
-.in
-.P
-is equivalent to
-.P
-.in +4n
-.EX
-sendto(sockfd, buf, len, flags, NULL, 0);
-.EE
-.in
-.P
-The argument
-.I sockfd
-is the file descriptor of the sending socket.
-.P
-If
-.BR sendto ()
-is used on a connection-mode
-.RB ( SOCK_STREAM ,
-.BR SOCK_SEQPACKET )
-socket, the arguments
-.I dest_addr
-and
-.I addrlen
-are ignored (and the error
-.B EISCONN
-may be returned when they are
-not NULL and 0), and the error
-.B ENOTCONN
-is returned when the socket was not actually connected.
-Otherwise, the address of the target is given by
-.I dest_addr
-with
-.I addrlen
-specifying its size.
-For
-.BR sendmsg (),
-the address of the target is given by
-.IR msg.msg_name ,
-with
-.I msg.msg_namelen
-specifying its size.
-.P
-For
-.BR send ()
-and
-.BR sendto (),
-the message is found in
-.I buf
-and has length
-.IR len .
-For
-.BR sendmsg (),
-the message is pointed to by the elements of the array
-.IR msg.msg_iov .
-The
-.BR sendmsg ()
-call also allows sending ancillary data (also known as control information).
-.P
-If the message is too long to pass atomically through the
-underlying protocol, the error
-.B EMSGSIZE
-is returned, and the message is not transmitted.
-.P
-No indication of failure to deliver is implicit in a
-.BR send ().
-Locally detected errors are indicated by a return value of \-1.
-.P
-When the message does not fit into the send buffer of the socket,
-.BR send ()
-normally blocks, unless the socket has been placed in nonblocking I/O
-mode.
-In nonblocking mode it would fail with the error
-.B EAGAIN
-or
-.B EWOULDBLOCK
-in this case.
-The
-.BR select (2)
-call may be used to determine when it is possible to send more data.
-.SS The flags argument
-The
-.I flags
-argument is the bitwise OR
-of zero or more of the following flags.
-.\" FIXME . ? document MSG_PROXY (which went away in Linux 2.3.15)
-.TP
-.BR MSG_CONFIRM " (since Linux 2.3.15)"
-Tell the link layer that forward progress happened: you got a successful
-reply from the other side.
-If the link layer doesn't get this
-it will regularly reprobe the neighbor (e.g., via a unicast ARP).
-Valid only on
-.B SOCK_DGRAM
-and
-.B SOCK_RAW
-sockets and currently implemented only for IPv4 and IPv6.
-See
-.BR arp (7)
-for details.
-.TP
-.B MSG_DONTROUTE
-Don't use a gateway to send out the packet, send to hosts only on
-directly connected networks.
-This is usually used only
-by diagnostic or routing programs.
-This is defined only for protocol
-families that route; packet sockets don't.
-.TP
-.BR MSG_DONTWAIT " (since Linux 2.2)"
-Enables nonblocking operation; if the operation would block,
-.B EAGAIN
-or
-.B EWOULDBLOCK
-is returned.
-This provides similar behavior to setting the
-.B O_NONBLOCK
-flag (via the
-.BR fcntl (2)
-.B F_SETFL
-operation), but differs in that
-.B MSG_DONTWAIT
-is a per-call option, whereas
-.B O_NONBLOCK
-is a setting on the open file description (see
-.BR open (2)),
-which will affect all threads in the calling process
-as well as other processes that hold file descriptors
-referring to the same open file description.
-.TP
-.BR MSG_EOR " (since Linux 2.2)"
-Terminates a record (when this notion is supported, as for sockets of type
-.BR SOCK_SEQPACKET ).
-.TP
-.BR MSG_MORE " (since Linux 2.4.4)"
-The caller has more data to send.
-This flag is used with TCP sockets to obtain the same effect
-as the
-.B TCP_CORK
-socket option (see
-.BR tcp (7)),
-with the difference that this flag can be set on a per-call basis.
-.IP
-Since Linux 2.6, this flag is also supported for UDP sockets, and informs
-the kernel to package all of the data sent in calls with this flag set
-into a single datagram which is transmitted only when a call is performed
-that does not specify this flag.
-(See also the
-.B UDP_CORK
-socket option described in
-.BR udp (7).)
-.TP
-.BR MSG_NOSIGNAL " (since Linux 2.2)"
-Don't generate a
-.B SIGPIPE
-signal if the peer on a stream-oriented socket has closed the connection.
-The
-.B EPIPE
-error is still returned.
-This provides similar behavior to using
-.BR sigaction (2)
-to ignore
-.BR SIGPIPE ,
-but, whereas
-.B MSG_NOSIGNAL
-is a per-call feature,
-ignoring
-.B SIGPIPE
-sets a process attribute that affects all threads in the process.
-.TP
-.B MSG_OOB
-Sends
-.I out-of-band
-data on sockets that support this notion (e.g., of type
-.BR SOCK_STREAM );
-the underlying protocol must also support
-.I out-of-band
-data.
-.TP
-.BR MSG_FASTOPEN " (since Linux 3.7)"
-Attempts TCP Fast Open (RFC7413) and sends data in the SYN like a
-combination of
-.BR connect (2)
-and
-.BR write (2),
-by performing an implicit
-.BR connect (2)
-operation.
-It blocks until the data is buffered and the handshake has completed.
-For a non-blocking socket,
-it returns the number of bytes buffered and sent in the SYN packet.
-If the cookie is not available locally,
-it returns
-.BR EINPROGRESS ,
-and sends a SYN with a Fast Open cookie request automatically.
-The caller needs to write the data again when the socket is connected.
-On errors,
-it sets the same
-.I errno
-as
-.BR connect (2)
-if the handshake fails.
-This flag requires enabling TCP Fast Open client support on sysctl
-.IR net.ipv4.tcp_fastopen .
-.IP
-Refer to
-.B TCP_FASTOPEN_CONNECT
-socket option in
-.BR tcp (7)
-for an alternative approach.
-.SS sendmsg()
-The definition of the
-.I msghdr
-structure employed by
-.BR sendmsg ()
-is as follows:
-.P
-.in +4n
-.EX
-struct msghdr {
- void *msg_name; /* Optional address */
- socklen_t msg_namelen; /* Size of address */
- struct iovec *msg_iov; /* Scatter/gather array */
- size_t msg_iovlen; /* # elements in msg_iov */
- void *msg_control; /* Ancillary data, see below */
- size_t msg_controllen; /* Ancillary data buffer len */
- int msg_flags; /* Flags (unused) */
-};
-.EE
-.in
-.P
-The
-.I msg_name
-field is used on an unconnected socket to specify the target
-address for a datagram.
-It points to a buffer containing the address; the
-.I msg_namelen
-field should be set to the size of the address.
-For a connected socket, these fields should be specified as NULL and 0,
-respectively.
-.P
-The
-.I msg_iov
-and
-.I msg_iovlen
-fields specify scatter-gather locations, as for
-.BR writev (2).
-.P
-You may send control information (ancillary data) using the
-.I msg_control
-and
-.I msg_controllen
-members.
-The maximum control buffer length the kernel can process is limited
-per socket by the value in
-.IR /proc/sys/net/core/optmem_max ;
-see
-.BR socket (7).
-For further information on the use of ancillary data in various
-socket domains, see
-.BR unix (7)
-and
-.BR ip (7).
-.P
-The
-.I msg_flags
-field is ignored.
-.\" Still to be documented:
-.\" Send file descriptors and user credentials using the
-.\" msg_control* fields.
-.SH RETURN VALUE
-On success, these calls return the number of bytes sent.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-These are some standard errors generated by the socket layer.
-Additional errors
-may be generated and returned from the underlying protocol modules;
-see their respective manual pages.
-.TP
-.B EACCES
-(For UNIX domain sockets, which are identified by pathname)
-Write permission is denied on the destination socket file,
-or search permission is denied for one of the directories
-the path prefix.
-(See
-.BR path_resolution (7).)
-.IP
-(For UDP sockets) An attempt was made to send to a
-network/broadcast address as though it was a unicast address.
-.TP
-.BR EAGAIN " or " EWOULDBLOCK
-.\" Actually EAGAIN on Linux
-The socket is marked nonblocking and the requested operation
-would block.
-POSIX.1-2001 allows either error to be returned for this case,
-and does not require these constants to have the same value,
-so a portable application should check for both possibilities.
-.TP
-.B EAGAIN
-(Internet domain datagram sockets)
-The socket referred to by
-.I sockfd
-had not previously been bound to an address and,
-upon attempting to bind it to an ephemeral port,
-it was determined that all port numbers in the ephemeral port range
-are currently in use.
-See the discussion of
-.I /proc/sys/net/ipv4/ip_local_port_range
-in
-.BR ip (7).
-.TP
-.B EALREADY
-Another Fast Open is in progress.
-.TP
-.B EBADF
-.I sockfd
-is not a valid open file descriptor.
-.TP
-.B ECONNRESET
-Connection reset by peer.
-.TP
-.B EDESTADDRREQ
-The socket is not connection-mode, and no peer address is set.
-.TP
-.B EFAULT
-An invalid user space address was specified for an argument.
-.TP
-.B EINTR
-A signal occurred before any data was transmitted; see
-.BR signal (7).
-.TP
-.B EINVAL
-Invalid argument passed.
-.TP
-.B EISCONN
-The connection-mode socket was connected already but a
-recipient was specified.
-(Now either this error is returned, or the recipient specification
-is ignored.)
-.TP
-.B EMSGSIZE
-The socket type
-.\" (e.g., SOCK_DGRAM )
-requires that message be sent atomically, and the size
-of the message to be sent made this impossible.
-.TP
-.B ENOBUFS
-The output queue for a network interface was full.
-This generally indicates that the interface has stopped sending,
-but may be caused by transient congestion.
-(Normally, this does not occur in Linux.
-Packets are just silently dropped
-when a device queue overflows.)
-.TP
-.B ENOMEM
-No memory available.
-.TP
-.B ENOTCONN
-The socket is not connected, and no target has been given.
-.TP
-.B ENOTSOCK
-The file descriptor
-.I sockfd
-does not refer to a socket.
-.TP
-.B EOPNOTSUPP
-Some bit in the
-.I flags
-argument is inappropriate for the socket type.
-.TP
-.B EPIPE
-The local end has been shut down on a connection oriented socket.
-In this case, the process
-will also receive a
-.B SIGPIPE
-unless
-.B MSG_NOSIGNAL
-is set.
-.SH VERSIONS
-According to POSIX.1-2001, the
-.I msg_controllen
-field of the
-.I msghdr
-structure should be typed as
-.IR socklen_t ,
-and the
-.I msg_iovlen
-field should be typed as
-.IR int ,
-but glibc currently types both as
-.IR size_t .
-.\" glibc bug for msg_controllen raised 12 Mar 2006
-.\" http://sourceware.org/bugzilla/show_bug.cgi?id=2448
-.\" The problem is an underlying kernel issue: the size of the
-.\" __kernel_size_t type used to type these fields varies
-.\" across architectures, but socklen_t is always 32 bits,
-.\" as (at least with GCC) is int.
-.SH STANDARDS
-POSIX.1-2008.
-.P
-.B MSG_CONFIRM
-is a Linux extension.
-.SH HISTORY
-4.4BSD, SVr4, POSIX.1-2001.
-(first appeared in 4.2BSD).
-.P
-POSIX.1-2001 describes only the
-.B MSG_OOB
-and
-.B MSG_EOR
-flags.
-POSIX.1-2008 adds a specification of
-.BR MSG_NOSIGNAL .
-.SH NOTES
-See
-.BR sendmmsg (2)
-for information about a Linux-specific system call
-that can be used to transmit multiple datagrams in a single call.
-.SH BUGS
-Linux may return
-.B EPIPE
-instead of
-.BR ENOTCONN .
-.SH EXAMPLES
-An example of the use of
-.BR sendto ()
-is shown in
-.BR getaddrinfo (3).
-.SH SEE ALSO
-.BR fcntl (2),
-.BR getsockopt (2),
-.BR recv (2),
-.BR select (2),
-.BR sendfile (2),
-.BR sendmmsg (2),
-.BR shutdown (2),
-.BR socket (2),
-.BR write (2),
-.BR cmsg (3),
-.BR ip (7),
-.BR ipv6 (7),
-.BR socket (7),
-.BR tcp (7),
-.BR udp (7),
-.BR unix (7)
diff --git a/man2/sendfile.2 b/man2/sendfile.2
deleted file mode 100644
index a385949c1..000000000
--- a/man2/sendfile.2
+++ /dev/null
@@ -1,245 +0,0 @@
-.\" SPDX-License-Identifier: Linux-man-pages-1-para
-.\"
-.\" This man page is Copyright (C) 1998 Pawel Krawczyk.
-.\"
-.\" $Id: sendfile.2,v 1.5 1999/05/18 11:54:11 freitag Exp $
-.\" 2000-11-19 bert hubert <ahu@ds9a.nl>: in_fd cannot be socket
-.\"
-.\" 2004-12-17, mtk
-.\" updated description of in_fd and out_fd for 2.6
-.\" Various wording and formatting changes
-.\"
-.\" 2005-03-31 Martin Pool <mbp@sourcefrog.net> mmap() improvements
-.\"
-.TH sendfile 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-sendfile \- transfer data between file descriptors
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/sendfile.h>
-.P
-.BI "ssize_t sendfile(int" " out_fd" ", int" " in_fd" ", \
-off_t *_Nullable " offset ,
-.BI " size_t" " count" );
-.\" The below is too ugly. Comments about glibc versions belong
-.\" in the notes, not in the header.
-.\"
-.\" .B #include <features.h>
-.\" .B #if (__GLIBC__==2 && __GLIBC_MINOR__>=1) || __GLIBC__>2
-.\" .B #include <sys/sendfile.h>
-.\" #else
-.\" .B #include <sys/types.h>
-.\" .B /* No system prototype before glibc 2.1. */
-.\" .BI "ssize_t sendfile(int" " out_fd" ", int" " in_fd" ", off_t *" \
-.\" offset ", size_t" " count" )
-.\" .B #endif
-.\"
-.fi
-.SH DESCRIPTION
-.BR sendfile ()
-copies data between one file descriptor and another.
-Because this copying is done within the kernel,
-.BR sendfile ()
-is more efficient than the combination of
-.BR read (2)
-and
-.BR write (2),
-which would require transferring data to and from user space.
-.P
-.I in_fd
-should be a file descriptor opened for reading and
-.I out_fd
-should be a descriptor opened for writing.
-.P
-If
-.I offset
-is not NULL, then it points
-to a variable holding the file offset from which
-.BR sendfile ()
-will start reading data from
-.IR in_fd .
-When
-.BR sendfile ()
-returns, this variable
-will be set to the offset of the byte following the last byte that was read.
-If
-.I offset
-is not NULL, then
-.BR sendfile ()
-does not modify the file offset of
-.IR in_fd ;
-otherwise the file offset is adjusted to reflect
-the number of bytes read from
-.IR in_fd .
-.P
-If
-.I offset
-is NULL, then data will be read from
-.I in_fd
-starting at the file offset,
-and the file offset will be updated by the call.
-.P
-.I count
-is the number of bytes to copy between the file descriptors.
-.P
-The
-.I in_fd
-argument must correspond to a file which supports
-.BR mmap (2)-like
-operations
-(i.e., it cannot be a socket).
-Except since Linux 5.12
-.\" commit b964bf53e540262f2d12672b3cca10842c0172e7
-and if
-.I out_fd
-is a pipe, in which case
-.BR sendfile ()
-desugars to a
-.BR splice (2)
-and its restrictions apply.
-.P
-Before Linux 2.6.33,
-.I out_fd
-must refer to a socket.
-Since Linux 2.6.33 it can be any file.
-If it's seekable, then
-.BR sendfile ()
-changes the file offset appropriately.
-.SH RETURN VALUE
-If the transfer was successful, the number of bytes written to
-.I out_fd
-is returned.
-Note that a successful call to
-.BR sendfile ()
-may write fewer bytes than requested;
-the caller should be prepared to retry the call if there were unsent bytes.
-See also NOTES.
-.P
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EAGAIN
-Nonblocking I/O has been selected using
-.B O_NONBLOCK
-and the write would block.
-.TP
-.B EBADF
-The input file was not opened for reading or the output file
-was not opened for writing.
-.TP
-.B EFAULT
-Bad address.
-.TP
-.B EINVAL
-Descriptor is not valid or locked, or an
-.BR mmap (2)-like
-operation is not available for
-.IR in_fd ,
-or
-.I count
-is negative.
-.TP
-.B EINVAL
-.I out_fd
-has the
-.B O_APPEND
-flag set.
-This is not currently supported by
-.BR sendfile ().
-.TP
-.B EIO
-Unspecified error while reading from
-.IR in_fd .
-.TP
-.B ENOMEM
-Insufficient memory to read from
-.IR in_fd .
-.TP
-.B EOVERFLOW
-.I count
-is too large, the operation would result in exceeding the maximum size of either
-the input file or the output file.
-.TP
-.B ESPIPE
-.I offset
-is not NULL but the input file is not seekable.
-.SH VERSIONS
-Other UNIX systems implement
-.BR sendfile ()
-with different semantics and prototypes.
-It should not be used in portable programs.
-.SH STANDARDS
-None.
-.SH HISTORY
-Linux 2.2,
-glibc 2.1.
-.P
-In Linux 2.4 and earlier,
-.I out_fd
-could also refer to a regular file;
-this possibility went away in the Linux 2.6.x kernel series,
-but was restored in Linux 2.6.33.
-.P
-The original Linux
-.BR sendfile ()
-system call was not designed to handle large file offsets.
-Consequently, Linux 2.4 added
-.BR sendfile64 (),
-with a wider type for the
-.I offset
-argument.
-The glibc
-.BR sendfile ()
-wrapper function transparently deals with the kernel differences.
-.SH NOTES
-.BR sendfile ()
-will transfer at most 0x7ffff000 (2,147,479,552) bytes,
-returning the number of bytes actually transferred.
-.\" commit e28cc71572da38a5a12c1cfe4d7032017adccf69
-(This is true on both 32-bit and 64-bit systems.)
-.P
-If you plan to use
-.BR sendfile ()
-for sending files to a TCP socket, but need
-to send some header data in front of the file contents, you will find
-it useful to employ the
-.B TCP_CORK
-option, described in
-.BR tcp (7),
-to minimize the number of packets and to tune performance.
-.P
-Applications may wish to fall back to
-.BR read (2)
-and
-.BR write (2)
-in the case where
-.BR sendfile ()
-fails with
-.B EINVAL
-or
-.BR ENOSYS .
-.P
-If
-.I out_fd
-refers to a socket or pipe with zero-copy support, callers must ensure the
-transferred portions of the file referred to by
-.I in_fd
-remain unmodified until the reader on the other end of
-.I out_fd
-has consumed the transferred data.
-.P
-The Linux-specific
-.BR splice (2)
-call supports transferring data between arbitrary file descriptors
-provided one (or both) of them is a pipe.
-.SH SEE ALSO
-.BR copy_file_range (2),
-.BR mmap (2),
-.BR open (2),
-.BR socket (2),
-.BR splice (2)
diff --git a/man2/sendfile64.2 b/man2/sendfile64.2
deleted file mode 100644
index 888077b1a..000000000
--- a/man2/sendfile64.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/sendfile.2
diff --git a/man2/sendmmsg.2 b/man2/sendmmsg.2
deleted file mode 100644
index bc541aae5..000000000
--- a/man2/sendmmsg.2
+++ /dev/null
@@ -1,232 +0,0 @@
-.\" Copyright (c) 2012 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\" with some material from a draft by
-.\" Stephan Mueller <stephan.mueller@atsec.com>
-.\" in turn based on Andi Kleen's recvmmsg.2 page.
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.TH sendmmsg 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-sendmmsg \- send multiple messages on a socket
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#define _GNU_SOURCE" " /* See feature_test_macros(7) */"
-.B #include <sys/socket.h>
-.P
-.BI "int sendmmsg(int " sockfd ", struct mmsghdr *" msgvec \
-", unsigned int " vlen ","
-.BI " int " flags ");"
-.fi
-.SH DESCRIPTION
-The
-.BR sendmmsg ()
-system call is an extension of
-.BR sendmsg (2)
-that allows the caller to transmit multiple messages on a socket
-using a single system call.
-(This has performance benefits for some applications.)
-.\" See commit 228e548e602061b08ee8e8966f567c12aa079682
-.P
-The
-.I sockfd
-argument is the file descriptor of the socket
-on which data is to be transmitted.
-.P
-The
-.I msgvec
-argument is a pointer to an array of
-.I mmsghdr
-structures.
-The size of this array is specified in
-.IR vlen .
-.P
-The
-.I mmsghdr
-structure is defined in
-.I <sys/socket.h>
-as:
-.P
-.in +4n
-.EX
-struct mmsghdr {
- struct msghdr msg_hdr; /* Message header */
- unsigned int msg_len; /* Number of bytes transmitted */
-};
-.EE
-.in
-.P
-The
-.I msg_hdr
-field is a
-.I msghdr
-structure, as described in
-.BR sendmsg (2).
-The
-.I msg_len
-field is used to return the number of bytes sent from the message in
-.I msg_hdr
-(i.e., the same as the return value from a single
-.BR sendmsg (2)
-call).
-.P
-The
-.I flags
-argument contains flags ORed together.
-The flags are the same as for
-.BR sendmsg (2).
-.P
-A blocking
-.BR sendmmsg ()
-call blocks until
-.I vlen
-messages have been sent.
-A nonblocking call sends as many messages as possible
-(up to the limit specified by
-.IR vlen )
-and returns immediately.
-.P
-On return from
-.BR sendmmsg (),
-the
-.I msg_len
-fields of successive elements of
-.I msgvec
-are updated to contain the number of bytes transmitted from the corresponding
-.IR msg_hdr .
-The return value of the call indicates the number of elements of
-.I msgvec
-that have been updated.
-.SH RETURN VALUE
-On success,
-.BR sendmmsg ()
-returns the number of messages sent from
-.IR msgvec ;
-if this is less than
-.IR vlen ,
-the caller can retry with a further
-.BR sendmmsg ()
-call to send the remaining messages.
-.P
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-Errors are as for
-.BR sendmsg (2).
-An error is returned only if no datagrams could be sent.
-See also BUGS.
-.\" commit 728ffb86f10873aaf4abd26dde691ee40ae731fe
-.\" ... only return an error if no datagrams could be sent.
-.\" If less than the requested number of messages were sent, the application
-.\" must retry starting at the first failed one and if the problem is
-.\" persistent the error will be returned.
-.\"
-.\" This matches the behavior of other syscalls like read/write - it
-.\" is not an error if less than the requested number of elements are sent.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 3.0,
-glibc 2.14.
-.SH NOTES
-The value specified in
-.I vlen
-is capped to
-.B UIO_MAXIOV
-(1024).
-.\" commit 98382f419f32d2c12d021943b87dea555677144b
-.\" net: Cap number of elements for sendmmsg
-.\"
-.\" To limit the amount of time we can spend in sendmmsg, cap the
-.\" number of elements to UIO_MAXIOV (currently 1024).
-.\"
-.\" For error handling an application using sendmmsg needs to retry at
-.\" the first unsent message, so capping is simpler and requires less
-.\" application logic than returning EINVAL.
-.SH BUGS
-If an error occurs after at least one message has been sent,
-the call succeeds, and returns the number of messages sent.
-The error code is lost.
-The caller can retry the transmission,
-starting at the first failed message, but there is no guarantee that,
-if an error is returned, it will be the same as the one that was lost
-on the previous call.
-.SH EXAMPLES
-The example below uses
-.BR sendmmsg ()
-to send
-.I onetwo
-and
-.I three
-in two distinct UDP datagrams using one system call.
-The contents of the first datagram originates from a pair of buffers.
-.P
-.\" SRC BEGIN (sendmmsg.c)
-.EX
-#define _GNU_SOURCE
-#include <arpa/inet.h>
-#include <netinet/in.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/socket.h>
-#include <sys/types.h>
-\&
-int
-main(void)
-{
- int retval;
- int sockfd;
- struct iovec msg1[2], msg2;
- struct mmsghdr msg[2];
- struct sockaddr_in addr;
-\&
- sockfd = socket(AF_INET, SOCK_DGRAM, 0);
- if (sockfd == \-1) {
- perror("socket()");
- exit(EXIT_FAILURE);
- }
-\&
- addr.sin_family = AF_INET;
- addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
- addr.sin_port = htons(1234);
- if (connect(sockfd, (struct sockaddr *) &addr, sizeof(addr)) == \-1) {
- perror("connect()");
- exit(EXIT_FAILURE);
- }
-\&
- memset(msg1, 0, sizeof(msg1));
- msg1[0].iov_base = "one";
- msg1[0].iov_len = 3;
- msg1[1].iov_base = "two";
- msg1[1].iov_len = 3;
-\&
- memset(&msg2, 0, sizeof(msg2));
- msg2.iov_base = "three";
- msg2.iov_len = 5;
-\&
- memset(msg, 0, sizeof(msg));
- msg[0].msg_hdr.msg_iov = msg1;
- msg[0].msg_hdr.msg_iovlen = 2;
-\&
- msg[1].msg_hdr.msg_iov = &msg2;
- msg[1].msg_hdr.msg_iovlen = 1;
-\&
- retval = sendmmsg(sockfd, msg, 2, 0);
- if (retval == \-1)
- perror("sendmmsg()");
- else
- printf("%d messages sent\en", retval);
-\&
- exit(0);
-}
-.EE
-.\" SRC END
-.SH SEE ALSO
-.BR recvmmsg (2),
-.BR sendmsg (2),
-.BR socket (2),
-.BR socket (7)
diff --git a/man2/sendmsg.2 b/man2/sendmsg.2
deleted file mode 100644
index 9a61b33bb..000000000
--- a/man2/sendmsg.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/send.2
diff --git a/man2/sendto.2 b/man2/sendto.2
deleted file mode 100644
index 9a61b33bb..000000000
--- a/man2/sendto.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/send.2
diff --git a/man2/set_mempolicy.2 b/man2/set_mempolicy.2
deleted file mode 100644
index f1f225e32..000000000
--- a/man2/set_mempolicy.2
+++ /dev/null
@@ -1,343 +0,0 @@
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft-var
-.\"
-.\" Copyright 2003,2004 Andi Kleen, SuSE Labs.
-.\" and Copyright 2007 Lee Schermerhorn, Hewlett Packard
-.\"
-.\" 2006-02-03, mtk, substantial wording changes and other improvements
-.\" 2007-08-27, Lee Schermerhorn <Lee.Schermerhorn@hp.com>
-.\" more precise specification of behavior.
-.\"
-.TH set_mempolicy 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-set_mempolicy \- set default NUMA memory policy for a thread and its children
-.SH LIBRARY
-NUMA (Non-Uniform Memory Access) policy library
-.RI ( libnuma ", " \-lnuma )
-.SH SYNOPSIS
-.nf
-.B "#include <numaif.h>"
-.P
-.BI "long set_mempolicy(int " mode ", const unsigned long *" nodemask ,
-.BI " unsigned long " maxnode );
-.fi
-.SH DESCRIPTION
-.BR set_mempolicy ()
-sets the NUMA memory policy of the calling thread,
-which consists of a policy mode and zero or more nodes,
-to the values specified by the
-.IR mode ,
-.IR nodemask ,
-and
-.I maxnode
-arguments.
-.P
-A NUMA machine has different
-memory controllers with different distances to specific CPUs.
-The memory policy defines from which node memory is allocated for
-the thread.
-.P
-This system call defines the default policy for the thread.
-The thread policy governs allocation of pages in the process's
-address space outside of memory ranges
-controlled by a more specific policy set by
-.BR mbind (2).
-The thread default policy also controls allocation of any pages for
-memory-mapped files mapped using the
-.BR mmap (2)
-call with the
-.B MAP_PRIVATE
-flag and that are only read (loaded) from by the thread
-and of memory-mapped files mapped using the
-.BR mmap (2)
-call with the
-.B MAP_SHARED
-flag, regardless of the access type.
-The policy is applied only when a new page is allocated
-for the thread.
-For anonymous memory this is when the page is first
-touched by the thread.
-.P
-The
-.I mode
-argument must specify one of
-.BR MPOL_DEFAULT ,
-.BR MPOL_BIND ,
-.BR MPOL_INTERLEAVE ,
-.BR MPOL_WEIGHTED_INTERLEAVE ,
-.BR MPOL_PREFERRED ,
-or
-.B MPOL_LOCAL
-(which are described in detail below).
-All modes except
-.B MPOL_DEFAULT
-require the caller to specify the node or nodes to which the mode applies,
-via the
-.I nodemask
-argument.
-.P
-The
-.I mode
-argument may also include an optional
-.IR "mode flag" .
-The supported
-.I "mode flags"
-are:
-.TP
-.BR MPOL_F_NUMA_BALANCING " (since Linux 5.12)"
-.\" commit bda420b985054a3badafef23807c4b4fa38a3dff
-When
-.I mode
-is
-.BR MPOL_BIND ,
-enable the kernel NUMA balancing for the task if it is supported by the kernel.
-If the flag isn't supported by the kernel, or is used with
-.I mode
-other than
-.BR MPOL_BIND ,
-\-1 is returned and
-.I errno
-is set to
-.BR EINVAL .
-.TP
-.BR MPOL_F_RELATIVE_NODES " (since Linux 2.6.26)"
-A nonempty
-.I nodemask
-specifies node IDs that are relative to the
-set of node IDs allowed by the process's current cpuset.
-.TP
-.BR MPOL_F_STATIC_NODES " (since Linux 2.6.26)"
-A nonempty
-.I nodemask
-specifies physical node IDs.
-Linux will not remap the
-.I nodemask
-when the process moves to a different cpuset context,
-nor when the set of nodes allowed by the process's
-current cpuset context changes.
-.P
-.I nodemask
-points to a bit mask of node IDs that contains up to
-.I maxnode
-bits.
-The bit mask size is rounded to the next multiple of
-.IR "sizeof(unsigned long)" ,
-but the kernel will use bits only up to
-.IR maxnode .
-A NULL value of
-.I nodemask
-or a
-.I maxnode
-value of zero specifies the empty set of nodes.
-If the value of
-.I maxnode
-is zero,
-the
-.I nodemask
-argument is ignored.
-.P
-Where a
-.I nodemask
-is required, it must contain at least one node that is on-line,
-allowed by the process's current cpuset context,
-(unless the
-.B MPOL_F_STATIC_NODES
-mode flag is specified),
-and contains memory.
-If the
-.B MPOL_F_STATIC_NODES
-is set in
-.I mode
-and a required
-.I nodemask
-contains no nodes that are allowed by the process's current cpuset context,
-the memory policy reverts to
-.IR "local allocation" .
-This effectively overrides the specified policy until the process's
-cpuset context includes one or more of the nodes specified by
-.IR nodemask .
-.P
-The
-.I mode
-argument must include one of the following values:
-.TP
-.B MPOL_DEFAULT
-This mode specifies that any nondefault thread memory policy be removed,
-so that the memory policy "falls back" to the system default policy.
-The system default policy is "local allocation"\[em]that is,
-allocate memory on the node of the CPU that triggered the allocation.
-.I nodemask
-must be specified as NULL.
-If the "local node" contains no free memory, the system will
-attempt to allocate memory from a "near by" node.
-.TP
-.B MPOL_BIND
-This mode defines a strict policy that restricts memory allocation to the
-nodes specified in
-.IR nodemask .
-If
-.I nodemask
-specifies more than one node, page allocations will come from
-the node with the lowest numeric node ID first, until that node
-contains no free memory.
-Allocations will then come from the node with the next highest
-node ID specified in
-.I nodemask
-and so forth, until none of the specified nodes contain free memory.
-Pages will not be allocated from any node not specified in the
-.IR nodemask .
-.TP
-.B MPOL_INTERLEAVE
-This mode interleaves page allocations across the nodes specified in
-.I nodemask
-in numeric node ID order.
-This optimizes for bandwidth instead of latency
-by spreading out pages and memory accesses to those pages across
-multiple nodes.
-However, accesses to a single page will still be limited to
-the memory bandwidth of a single node.
-.\" NOTE: the following sentence doesn't make sense in the context
-.\" of set_mempolicy() -- no memory area specified.
-.\" To be effective the memory area should be fairly large,
-.\" at least 1 MB or bigger.
-.TP
-.BR MPOL_WEIGHTED_INTERLEAVE " (since Linux 6.9)"
-.\" commit fa3bea4e1f8202d787709b7e3654eb0a99aed758
-This mode interleaves page allocations across the nodes specified in
-.I nodemask
-according to the weights in
-.IR /sys/kernel/mm/mempolicy/weighted_interleave .
-For example, if bits 0, 2, and 5 are set in
-.IR nodemask ,
-and the contents of
-.IR /sys/kernel/mm/mempolicy/weighted_interleave/node0 ,
-.IR /sys/ .\|.\|. /node2 ,
-and
-.IR /sys/ .\|.\|. /node5
-are 4, 7, and 9, respectively,
-then pages in this region will be allocated on nodes 0, 2, and 5
-in a 4:7:9 ratio.
-.TP
-.B MPOL_PREFERRED
-This mode sets the preferred node for allocation.
-The kernel will try to allocate pages from this node first
-and fall back to "near by" nodes if the preferred node is low on free
-memory.
-If
-.I nodemask
-specifies more than one node ID, the first node in the
-mask will be selected as the preferred node.
-If the
-.I nodemask
-and
-.I maxnode
-arguments specify the empty set, then the policy
-specifies "local allocation"
-(like the system default policy discussed above).
-.TP
-.BR MPOL_LOCAL " (since Linux 3.8)"
-.\" commit 479e2802d09f1e18a97262c4c6f8f17ae5884bd8
-.\" commit f2a07f40dbc603c15f8b06e6ec7f768af67b424f
-This mode specifies "local allocation"; the memory is allocated on
-the node of the CPU that triggered the allocation (the "local node").
-The
-.I nodemask
-and
-.I maxnode
-arguments must specify the empty set.
-If the "local node" is low on free memory,
-the kernel will try to allocate memory from other nodes.
-The kernel will allocate memory from the "local node"
-whenever memory for this node is available.
-If the "local node" is not allowed by the process's current cpuset context,
-the kernel will try to allocate memory from other nodes.
-The kernel will allocate memory from the "local node" whenever
-it becomes allowed by the process's current cpuset context.
-.P
-The thread memory policy is preserved across an
-.BR execve (2),
-and is inherited by child threads created using
-.BR fork (2)
-or
-.BR clone (2).
-.SH RETURN VALUE
-On success,
-.BR set_mempolicy ()
-returns 0;
-on error, \-1 is returned and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EFAULT
-Part of all of the memory range specified by
-.I nodemask
-and
-.I maxnode
-points outside your accessible address space.
-.TP
-.B EINVAL
-.I mode
-is invalid.
-Or,
-.I mode
-is
-.B MPOL_DEFAULT
-and
-.I nodemask
-is nonempty,
-or
-.I mode
-is
-.B MPOL_BIND
-or
-.B MPOL_INTERLEAVE
-and
-.I nodemask
-is empty.
-Or,
-.I maxnode
-specifies more than a page worth of bits.
-Or,
-.I nodemask
-specifies one or more node IDs that are
-greater than the maximum supported node ID.
-Or, none of the node IDs specified by
-.I nodemask
-are on-line and allowed by the process's current cpuset context,
-or none of the specified nodes contain memory.
-Or, the
-.I mode
-argument specified both
-.B MPOL_F_STATIC_NODES
-and
-.BR MPOL_F_RELATIVE_NODES .
-Or, the
-.B MPOL_F_NUMA_BALANCING
-isn't supported by the kernel, or is used with
-.I mode
-other than
-.BR MPOL_BIND .
-.TP
-.B ENOMEM
-Insufficient kernel memory was available.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 2.6.7.
-.SH NOTES
-Memory policy is not remembered if the page is swapped out.
-When such a page is paged back in, it will use the policy of
-the thread or memory range that is in effect at the time the
-page is allocated.
-.P
-For information on library support, see
-.BR numa (7).
-.SH SEE ALSO
-.BR get_mempolicy (2),
-.BR getcpu (2),
-.BR mbind (2),
-.BR mmap (2),
-.BR numa (3),
-.BR cpuset (7),
-.BR numa (7),
-.BR numactl (8)
diff --git a/man2/set_robust_list.2 b/man2/set_robust_list.2
deleted file mode 100644
index a38aa23ed..000000000
--- a/man2/set_robust_list.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/get_robust_list.2
diff --git a/man2/set_thread_area.2 b/man2/set_thread_area.2
deleted file mode 100644
index feef67c4c..000000000
--- a/man2/set_thread_area.2
+++ /dev/null
@@ -1,229 +0,0 @@
-.\" Copyright (C) 2003 Free Software Foundation, Inc.
-.\" Copyright (C) 2015 Andrew Lutomirski
-.\" Author: Kent Yoder
-.\"
-.\" SPDX-License-Identifier: GPL-1.0-or-later
-.\"
-.TH set_thread_area 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-get_thread_area, set_thread_area \- manipulate thread-local storage information
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */"
-.B #include <unistd.h>
-.P
-.B #if defined __i386__ || defined __x86_64__
-.BR "# include <asm/ldt.h>" " /* Definition of " "struct user_desc" " */"
-.P
-.BI "int syscall(SYS_get_thread_area, struct user_desc *" u_info );
-.BI "int syscall(SYS_set_thread_area, struct user_desc *" u_info );
-.P
-.B #elif defined __m68k__
-.P
-.B "int syscall(SYS_get_thread_area);"
-.BI "int syscall(SYS_set_thread_area, unsigned long " tp );
-.P
-.B #elif defined __mips__ || defined __csky__
-.P
-.BI "int syscall(SYS_set_thread_area, unsigned long " addr );
-.P
-.B #endif
-.fi
-.P
-.IR Note :
-glibc provides no wrappers for these system calls,
-necessitating the use of
-.BR syscall (2).
-.SH DESCRIPTION
-These calls provide architecture-specific support for a thread-local storage
-implementation.
-At the moment,
-.BR set_thread_area ()
-is available on m68k, MIPS, C-SKY, and x86 (both 32-bit and 64-bit variants);
-.BR get_thread_area ()
-is available on m68k and x86.
-.P
-On m68k, MIPS and C-SKY,
-.BR set_thread_area ()
-allows storing an arbitrary pointer (provided in the
-.B tp
-argument on m68k and in the
-.B addr
-argument on MIPS and C-SKY)
-in the kernel data structure associated with the calling thread;
-this pointer can later be retrieved using
-.BR get_thread_area ()
-(see also NOTES
-for information regarding obtaining the thread pointer on MIPS).
-.P
-On x86, Linux dedicates three global descriptor table (GDT) entries for
-thread-local storage.
-For more information about the GDT, see the
-Intel Software Developer's Manual or the AMD Architecture Programming Manual.
-.P
-Both of these system calls take an argument that is a pointer
-to a structure of the following type:
-.P
-.in +4n
-.EX
-struct user_desc {
- unsigned int entry_number;
- unsigned int base_addr;
- unsigned int limit;
- unsigned int seg_32bit:1;
- unsigned int contents:2;
- unsigned int read_exec_only:1;
- unsigned int limit_in_pages:1;
- unsigned int seg_not_present:1;
- unsigned int useable:1;
-#ifdef __x86_64__
- unsigned int lm:1;
-#endif
-};
-.EE
-.in
-.P
-.BR get_thread_area ()
-reads the GDT entry indicated by
-.I u_info\->entry_number
-and fills in the rest of the fields in
-.IR u_info .
-.P
-.BR set_thread_area ()
-sets a TLS entry in the GDT.
-.P
-The TLS array entry set by
-.BR set_thread_area ()
-corresponds to the value of
-.I u_info\->entry_number
-passed in by the user.
-If this value is in bounds,
-.BR set_thread_area ()
-writes the TLS descriptor pointed to by
-.I u_info
-into the thread's TLS array.
-.P
-When
-.BR set_thread_area ()
-is passed an
-.I entry_number
-of \-1, it searches for a free TLS entry.
-If
-.BR set_thread_area ()
-finds a free TLS entry, the value of
-.I u_info\->entry_number
-is set upon return to show which entry was changed.
-.P
-A
-.I user_desc
-is considered "empty" if
-.I read_exec_only
-and
-.I seg_not_present
-are set to 1 and all of the other fields are 0.
-If an "empty" descriptor is passed to
-.BR set_thread_area (),
-the corresponding TLS entry will be cleared.
-See BUGS for additional details.
-.P
-Since Linux 3.19,
-.BR set_thread_area ()
-cannot be used to write non-present segments, 16-bit segments, or code
-segments, although clearing a segment is still acceptable.
-.SH RETURN VALUE
-On x86, these system calls
-return 0 on success, and \-1 on failure, with
-.I errno
-set to indicate the error.
-.P
-On C-SKY, MIPS and m68k,
-.BR set_thread_area ()
-always returns 0.
-On m68k,
-.BR get_thread_area ()
-returns the thread area pointer value
-(previously set via
-.BR set_thread_area ()).
-.SH ERRORS
-.TP
-.B EFAULT
-\fIu_info\fP is an invalid pointer.
-.TP
-.B EINVAL
-\fIu_info\->entry_number\fP is out of bounds.
-.TP
-.B ENOSYS
-.BR get_thread_area ()
-or
-.BR set_thread_area ()
-was invoked as a 64-bit system call.
-.TP
-.B ESRCH
-.RB ( set_thread_area ())
-A free TLS entry could not be located.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-.TP
-.BR set_thread_area ()
-Linux 2.5.29.
-.TP
-.BR get_thread_area ()
-Linux 2.5.32.
-.SH NOTES
-These system calls are generally intended for use only by threading libraries.
-.P
-.BR arch_prctl (2)
-can interfere with
-.BR set_thread_area ()
-on x86.
-See
-.BR arch_prctl (2)
-for more details.
-This is not normally a problem, as
-.BR arch_prctl (2)
-is normally used only by 64-bit programs.
-.P
-On MIPS, the current value of the thread area pointer can be obtained
-using the instruction:
-.P
-.in +4n
-.EX
-rdhwr dest, $29
-.EE
-.in
-.P
-This instruction traps and is handled by kernel.
-.SH BUGS
-On 64-bit kernels before Linux 3.19,
-.\" commit e30ab185c490e9a9381385529e0fd32f0a399495
-one of the padding bits in
-.IR user_desc ,
-if set, would prevent the descriptor from being considered empty (see
-.BR modify_ldt (2)).
-As a result, the only reliable way to clear a TLS entry is to use
-.BR memset (3)
-to zero the entire
-.I user_desc
-structure, including padding bits, and then to set the
-.I read_exec_only
-and
-.I seg_not_present
-bits.
-On Linux 3.19, a
-.I user_desc
-consisting entirely of zeros except for
-.I entry_number
-will also be interpreted as a request to clear a TLS entry, but this
-behaved differently on older kernels.
-.P
-Prior to Linux 3.19, the DS and ES segment registers must not reference
-TLS entries.
-.SH SEE ALSO
-.BR arch_prctl (2),
-.BR modify_ldt (2),
-.BR ptrace (2)
-.RB ( PTRACE_GET_THREAD_AREA " and " PTRACE_SET_THREAD_AREA )
diff --git a/man2/set_tid_address.2 b/man2/set_tid_address.2
deleted file mode 100644
index c60144913..000000000
--- a/man2/set_tid_address.2
+++ /dev/null
@@ -1,97 +0,0 @@
-.\" Copyright (C) 2004 Andries Brouwer (aeb@cwi.nl)
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.TH set_tid_address 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-set_tid_address \- set pointer to thread ID
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */"
-.B #include <unistd.h>
-.P
-.BI "pid_t syscall(SYS_set_tid_address, int *" tidptr );
-.fi
-.P
-.IR Note :
-glibc provides no wrapper for
-.BR set_tid_address (),
-necessitating the use of
-.BR syscall (2).
-.SH DESCRIPTION
-For each thread, the kernel maintains two attributes (addresses) called
-.I set_child_tid
-and
-.IR clear_child_tid .
-These two attributes contain the value NULL by default.
-.TP
-.I set_child_tid
-If a thread is started using
-.BR clone (2)
-with the
-.B CLONE_CHILD_SETTID
-flag,
-.I set_child_tid
-is set to the value passed in the
-.I ctid
-argument of that system call.
-.IP
-When
-.I set_child_tid
-is set, the very first thing the new thread does
-is to write its thread ID at this address.
-.TP
-.I clear_child_tid
-If a thread is started using
-.BR clone (2)
-with the
-.B CLONE_CHILD_CLEARTID
-flag,
-.I clear_child_tid
-is set to the value passed in the
-.I ctid
-argument of that system call.
-.P
-The system call
-.BR set_tid_address ()
-sets the
-.I clear_child_tid
-value for the calling thread to
-.IR tidptr .
-.P
-When a thread whose
-.I clear_child_tid
-is not NULL terminates, then,
-if the thread is sharing memory with other threads,
-then 0 is written at the address specified in
-.I clear_child_tid
-and the kernel performs the following operation:
-.P
-.in +4n
-.EX
-futex(clear_child_tid, FUTEX_WAKE, 1, NULL, NULL, 0);
-.EE
-.in
-.P
-The effect of this operation is to wake a single thread that
-is performing a futex wait on the memory location.
-Errors from the futex wake operation are ignored.
-.SH RETURN VALUE
-.BR set_tid_address ()
-always returns the caller's thread ID.
-.SH ERRORS
-.BR set_tid_address ()
-always succeeds.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 2.5.48.
-.P
-Details as given here are valid since Linux 2.5.49.
-.SH SEE ALSO
-.BR clone (2),
-.BR futex (2),
-.BR gettid (2)
diff --git a/man2/setdomainname.2 b/man2/setdomainname.2
deleted file mode 100644
index 1c1594cbe..000000000
--- a/man2/setdomainname.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/getdomainname.2
diff --git a/man2/setegid.2 b/man2/setegid.2
deleted file mode 100644
index 85032b503..000000000
--- a/man2/setegid.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/seteuid.2
diff --git a/man2/seteuid.2 b/man2/seteuid.2
deleted file mode 100644
index 0eae04f79..000000000
--- a/man2/seteuid.2
+++ /dev/null
@@ -1,134 +0,0 @@
-.\" Copyright (C) 2001 Andries Brouwer (aeb@cwi.nl)
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" [should really be seteuid.3]
-.\" Modified, 27 May 2004, Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Added notes on capability requirements
-.\"
-.TH seteuid 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-seteuid, setegid \- set effective user or group ID
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <unistd.h>
-.P
-.BI "int seteuid(uid_t " euid );
-.BI "int setegid(gid_t " egid );
-.fi
-.P
-.RS -4
-Feature Test Macro Requirements for glibc (see
-.BR feature_test_macros (7)):
-.RE
-.P
-.BR seteuid (),
-.BR setegid ():
-.nf
- _POSIX_C_SOURCE >= 200112L
- || /* glibc <= 2.19: */ _BSD_SOURCE
-.fi
-.SH DESCRIPTION
-.BR seteuid ()
-sets the effective user ID of the calling process.
-Unprivileged processes may only set the effective user ID to the
-real user ID, the effective user ID or the saved set-user-ID.
-.P
-Precisely the same holds for
-.BR setegid ()
-with "group" instead of "user".
-.\" When
-.\" .I euid
-.\" equals \-1, nothing is changed.
-.\" (This is an artifact of the implementation in glibc of seteuid()
-.\" using setresuid(2).)
-.SH RETURN VALUE
-On success, zero is returned.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.P
-.IR Note :
-there are cases where
-.BR seteuid ()
-can fail even when the caller is UID 0;
-it is a grave security error to omit checking for a failure return from
-.BR seteuid ().
-.SH ERRORS
-.TP
-.B EINVAL
-The target user or group ID is not valid in this user namespace.
-.TP
-.B EPERM
-In the case of
-.BR seteuid ():
-the calling process is not privileged (does not have the
-.B CAP_SETUID
-capability in its user namespace) and
-.I euid
-does not match the current real user ID, current effective user ID,
-or current saved set-user-ID.
-.IP
-In the case of
-.BR setegid ():
-the calling process is not privileged (does not have the
-.B CAP_SETGID
-capability in its user namespace) and
-.I egid
-does not match the current real group ID, current effective group ID,
-or current saved set-group-ID.
-.SH VERSIONS
-Setting the effective user (group) ID to the
-saved set-user-ID (saved set-group-ID) is
-possible since Linux 1.1.37 (1.1.38).
-On an arbitrary system one should check
-.BR _POSIX_SAVED_IDS .
-.P
-Under glibc 2.0,
-.BI seteuid( euid )
-is equivalent to
-.BI setreuid(\-1, " euid" )
-and hence may change the saved set-user-ID.
-Under glibc 2.1 and later, it is equivalent to
-.BI setresuid(\-1, " euid" ", \-1)"
-and hence does not change the saved set-user-ID.
-Analogous remarks hold for
-.BR setegid (),
-with the difference that the change in implementation from
-.BI setregid(\-1, " egid" )
-to
-.BI setresgid(\-1, " egid" ", \-1)"
-occurred in glibc 2.2 or 2.3 (depending on the hardware architecture).
-.P
-According to POSIX.1,
-.BR seteuid ()
-.RB ( setegid ())
-need not permit
-.I euid
-.RI ( egid )
-to be the same value as the current effective user (group) ID,
-and some implementations do not permit this.
-.SS C library/kernel differences
-On Linux,
-.BR seteuid ()
-and
-.BR setegid ()
-are implemented as library functions that call, respectively,
-.BR setresuid (2)
-and
-.BR setresgid (2).
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-POSIX.1-2001, 4.3BSD.
-.SH SEE ALSO
-.BR geteuid (2),
-.BR setresuid (2),
-.BR setreuid (2),
-.BR setuid (2),
-.BR capabilities (7),
-.BR credentials (7),
-.BR user_namespaces (7)
diff --git a/man2/setfsgid.2 b/man2/setfsgid.2
deleted file mode 100644
index 3d02a65a8..000000000
--- a/man2/setfsgid.2
+++ /dev/null
@@ -1,109 +0,0 @@
-.\" Copyright (C) 1995, Thomas K. Dyas <tdyas@eden.rutgers.edu>
-.\" and Copyright (C) 2019, Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Created 1995-08-06 Thomas K. Dyas <tdyas@eden.rutgers.edu>
-.\" Modified 2000-07-01 aeb
-.\" Modified 2002-07-23 aeb
-.\" Modified, 27 May 2004, Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Added notes on capability requirements
-.\"
-.TH setfsgid 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-setfsgid \- set group identity used for filesystem checks
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/fsuid.h>
-.P
-.BI "[[deprecated]] int setfsgid(gid_t " fsgid );
-.fi
-.SH DESCRIPTION
-On Linux, a process has both a filesystem group ID and an effective group ID.
-The (Linux-specific) filesystem group ID is used
-for permissions checking when accessing filesystem objects,
-while the effective group ID is used for some other kinds
-of permissions checks (see
-.BR credentials (7)).
-.P
-Normally, the value of the process's filesystem group ID
-is the same as the value of its effective group ID.
-This is so, because whenever a process's effective group ID is changed,
-the kernel also changes the filesystem group ID to be the same as
-the new value of the effective group ID.
-A process can cause the value of its filesystem group ID to diverge
-from its effective group ID by using
-.BR setfsgid ()
-to change its filesystem group ID to the value given in
-.IR fsgid .
-.P
-.BR setfsgid ()
-will succeed only if the caller is the superuser or if
-.I fsgid
-matches either the caller's real group ID, effective group ID,
-saved set-group-ID, or current the filesystem user ID.
-.SH RETURN VALUE
-On both success and failure,
-this call returns the previous filesystem group ID of the caller.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 1.2.
-.\" Linux 1.1.44
-.\" and in libc since libc 4.7.6.
-.SS C library/kernel differences
-In glibc 2.15 and earlier,
-when the wrapper for this system call determines that the argument can't be
-passed to the kernel without integer truncation (because the kernel
-is old and does not support 32-bit group IDs),
-it will return \-1 and set \fIerrno\fP to
-.B EINVAL
-without attempting
-the system call.
-.SH NOTES
-The filesystem group ID concept and the
-.BR setfsgid ()
-system call were invented for historical reasons that are
-no longer applicable on modern Linux kernels.
-See
-.BR setfsuid (2)
-for a discussion of why the use of both
-.BR setfsuid (2)
-and
-.BR setfsgid ()
-is nowadays unneeded.
-.P
-The original Linux
-.BR setfsgid ()
-system call supported only 16-bit group IDs.
-Subsequently, Linux 2.4 added
-.BR setfsgid32 ()
-supporting 32-bit IDs.
-The glibc
-.BR setfsgid ()
-wrapper function transparently deals with the variation across kernel versions.
-.SH BUGS
-No error indications of any kind are returned to the caller,
-and the fact that both successful and unsuccessful calls return
-the same value makes it impossible to directly determine
-whether the call succeeded or failed.
-Instead, the caller must resort to looking at the return value
-from a further call such as
-.I setfsgid(\-1)
-(which will always fail), in order to determine if a preceding call to
-.BR setfsgid ()
-changed the filesystem group ID.
-At the very
-least,
-.B EPERM
-should be returned when the call fails (because the caller lacks the
-.B CAP_SETGID
-capability).
-.SH SEE ALSO
-.BR kill (2),
-.BR setfsuid (2),
-.BR capabilities (7),
-.BR credentials (7)
diff --git a/man2/setfsgid32.2 b/man2/setfsgid32.2
deleted file mode 100644
index fdb8bdcce..000000000
--- a/man2/setfsgid32.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/setfsgid.2
diff --git a/man2/setfsuid.2 b/man2/setfsuid.2
deleted file mode 100644
index c6e75dafa..000000000
--- a/man2/setfsuid.2
+++ /dev/null
@@ -1,127 +0,0 @@
-.\" Copyright (C) 1995, Thomas K. Dyas <tdyas@eden.rutgers.edu>
-.\" and Copyright (C) 2013, 2019, Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Created 1995-08-06 Thomas K. Dyas <tdyas@eden.rutgers.edu>
-.\" Modified 2000-07-01 aeb
-.\" Modified 2002-07-23 aeb
-.\" Modified, 27 May 2004, Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Added notes on capability requirements
-.\"
-.TH setfsuid 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-setfsuid \- set user identity used for filesystem checks
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/fsuid.h>
-.P
-.BI "[[deprecated]] int setfsuid(uid_t " fsuid );
-.fi
-.SH DESCRIPTION
-On Linux, a process has both a filesystem user ID and an effective user ID.
-The (Linux-specific) filesystem user ID is used
-for permissions checking when accessing filesystem objects,
-while the effective user ID is used for various other kinds
-of permissions checks (see
-.BR credentials (7)).
-.P
-Normally, the value of the process's filesystem user ID
-is the same as the value of its effective user ID.
-This is so, because whenever a process's effective user ID is changed,
-the kernel also changes the filesystem user ID to be the same as
-the new value of the effective user ID.
-A process can cause the value of its filesystem user ID to diverge
-from its effective user ID by using
-.BR setfsuid ()
-to change its filesystem user ID to the value given in
-.IR fsuid .
-.P
-Explicit calls to
-.BR setfsuid ()
-and
-.BR setfsgid (2)
-are (were) usually used only by programs such as the Linux NFS server that
-need to change what user and group ID is used for file access without a
-corresponding change in the real and effective user and group IDs.
-A change in the normal user IDs for a program such as the NFS server
-is (was) a security hole that can expose it to unwanted signals.
-(However, this issue is historical; see below.)
-.P
-.BR setfsuid ()
-will succeed only if the caller is the superuser or if
-.I fsuid
-matches either the caller's real user ID, effective user ID,
-saved set-user-ID, or current filesystem user ID.
-.SH RETURN VALUE
-On both success and failure,
-this call returns the previous filesystem user ID of the caller.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 1.2.
-.\" Linux 1.1.44
-.\" and in libc since libc 4.7.6.
-.P
-At the time when this system call was introduced, one process
-could send a signal to another process with the same effective user ID.
-This meant that if a privileged process changed its effective user ID
-for the purpose of file permission checking,
-then it could become vulnerable to receiving signals
-sent by another (unprivileged) process with the same user ID.
-The filesystem user ID attribute was thus added to allow a process to
-change its user ID for the purposes of file permission checking without
-at the same time becoming vulnerable to receiving unwanted signals.
-Since Linux 2.0, signal permission handling is different (see
-.BR kill (2)),
-with the result that a process can change its effective user ID
-without being vulnerable to receiving signals from unwanted processes.
-Thus,
-.BR setfsuid ()
-is nowadays unneeded and should be avoided in new applications
-(likewise for
-.BR setfsgid (2)).
-.P
-The original Linux
-.BR setfsuid ()
-system call supported only 16-bit user IDs.
-Subsequently, Linux 2.4 added
-.BR setfsuid32 ()
-supporting 32-bit IDs.
-The glibc
-.BR setfsuid ()
-wrapper function transparently deals with the variation across kernel versions.
-.SS C library/kernel differences
-In glibc 2.15 and earlier,
-when the wrapper for this system call determines that the argument can't be
-passed to the kernel without integer truncation (because the kernel
-is old and does not support 32-bit user IDs),
-it will return \-1 and set \fIerrno\fP to
-.B EINVAL
-without attempting
-the system call.
-.SH BUGS
-No error indications of any kind are returned to the caller,
-and the fact that both successful and unsuccessful calls return
-the same value makes it impossible to directly determine
-whether the call succeeded or failed.
-Instead, the caller must resort to looking at the return value
-from a further call such as
-.I setfsuid(\-1)
-(which will always fail), in order to determine if a preceding call to
-.BR setfsuid ()
-changed the filesystem user ID.
-At the very
-least,
-.B EPERM
-should be returned when the call fails (because the caller lacks the
-.B CAP_SETUID
-capability).
-.SH SEE ALSO
-.BR kill (2),
-.BR setfsgid (2),
-.BR capabilities (7),
-.BR credentials (7)
diff --git a/man2/setfsuid32.2 b/man2/setfsuid32.2
deleted file mode 100644
index 1ea58fd02..000000000
--- a/man2/setfsuid32.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/setfsuid.2
diff --git a/man2/setgid.2 b/man2/setgid.2
deleted file mode 100644
index 4ca3f1765..000000000
--- a/man2/setgid.2
+++ /dev/null
@@ -1,92 +0,0 @@
-.\" Copyright (C), 1994, Graeme W. Wilford. (Wilf.)
-.\" and Copyright (C) 2010, 2015, Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Fri Jul 29th 12:56:44 BST 1994 Wilf. <G.Wilford@ee.surrey.ac.uk>
-.\" Modified 1997-01-31 by Eric S. Raymond <esr@thyrsus.com>
-.\" Modified 2002-03-09 by aeb
-.\"
-.TH setgid 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-setgid \- set group identity
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <unistd.h>
-.P
-.BI "int setgid(gid_t " gid );
-.fi
-.SH DESCRIPTION
-.BR setgid ()
-sets the effective group ID of the calling process.
-If the calling process is privileged (more precisely: has the
-.B CAP_SETGID
-capability in its user namespace),
-the real GID and saved set-group-ID are also set.
-.P
-Under Linux,
-.BR setgid ()
-is implemented like the POSIX version with the
-.B _POSIX_SAVED_IDS
-feature.
-This allows a set-group-ID program that is not set-user-ID-root
-to drop all of its group
-privileges, do some un-privileged work, and then reengage the original
-effective group ID in a secure manner.
-.SH RETURN VALUE
-On success, zero is returned.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EINVAL
-The group ID specified in
-.I gid
-is not valid in this user namespace.
-.TP
-.B EPERM
-The calling process is not privileged (does not have the
-\fBCAP_SETGID\fP capability in its user namespace), and
-.I gid
-does not match the real group ID or saved set-group-ID of
-the calling process.
-.SH VERSIONS
-.SS C library/kernel differences
-At the kernel level, user IDs and group IDs are a per-thread attribute.
-However, POSIX requires that all threads in a process
-share the same credentials.
-The NPTL threading implementation handles the POSIX requirements by
-providing wrapper functions for
-the various system calls that change process UIDs and GIDs.
-These wrapper functions (including the one for
-.BR setgid ())
-employ a signal-based technique to ensure
-that when one thread changes credentials,
-all of the other threads in the process also change their credentials.
-For details, see
-.BR nptl (7).
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-POSIX.1-2001, SVr4.
-.P
-The original Linux
-.BR setgid ()
-system call supported only 16-bit group IDs.
-Subsequently, Linux 2.4 added
-.BR setgid32 ()
-supporting 32-bit IDs.
-The glibc
-.BR setgid ()
-wrapper function transparently deals with the variation across kernel versions.
-.SH SEE ALSO
-.BR getgid (2),
-.BR setegid (2),
-.BR setregid (2),
-.BR capabilities (7),
-.BR credentials (7),
-.BR user_namespaces (7)
diff --git a/man2/setgid32.2 b/man2/setgid32.2
deleted file mode 100644
index bc8ef19ca..000000000
--- a/man2/setgid32.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/setgid.2
diff --git a/man2/setgroups.2 b/man2/setgroups.2
deleted file mode 100644
index 0ae4cc0b1..000000000
--- a/man2/setgroups.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/getgroups.2
diff --git a/man2/setgroups32.2 b/man2/setgroups32.2
deleted file mode 100644
index 478fb63ef..000000000
--- a/man2/setgroups32.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/setgroups.2
diff --git a/man2/sethostname.2 b/man2/sethostname.2
deleted file mode 100644
index e1fa2a617..000000000
--- a/man2/sethostname.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/gethostname.2
diff --git a/man2/setitimer.2 b/man2/setitimer.2
deleted file mode 100644
index 9518567e0..000000000
--- a/man2/setitimer.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/getitimer.2
diff --git a/man2/setns.2 b/man2/setns.2
deleted file mode 100644
index 860ee7e10..000000000
--- a/man2/setns.2
+++ /dev/null
@@ -1,419 +0,0 @@
-.\" Copyright (C) 2011, Eric Biederman <ebiederm@xmission.com>
-.\" and Copyright (C) 2011, 2012, Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: GPL-2.0-only
-.\"
-.TH setns 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-setns \- reassociate thread with a namespace
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#define _GNU_SOURCE" " /* See feature_test_macros(7) */"
-.B #include <sched.h>
-.P
-.BI "int setns(int " fd ", int " nstype );
-.fi
-.SH DESCRIPTION
-The
-.BR setns ()
-system call allows the calling thread to move into different namespaces.
-The
-.I fd
-argument is one of the following:
-.IP \[bu] 3
-a file descriptor referring to one of the magic links in a
-.IR /proc/ pid /ns/
-directory (or a bind mount to such a link);
-.IP \[bu]
-a PID file descriptor (see
-.BR pidfd_open (2)).
-.P
-The
-.I nstype
-argument is interpreted differently in each case.
-.\"
-.SS fd refers to a \fI/proc/\fPpid\fI/ns/\fP link
-If
-.I fd
-refers to a
-.IR /proc/ pid /ns/
-link, then
-.BR setns ()
-reassociates the calling thread with the namespace associated with that link,
-subject to any constraints imposed by the
-.I nstype
-argument.
-In this usage, each call to
-.BR setns ()
-changes just one of the caller's namespace memberships.
-.P
-The
-.I nstype
-argument specifies which type of namespace
-the calling thread may be reassociated with.
-This argument can have
-.I one
-of the following values:
-.TP
-.B 0
-Allow any type of namespace to be joined.
-.TP
-.BR CLONE_NEWCGROUP " (since Linux 4.6)"
-.I fd
-must refer to a cgroup namespace.
-.TP
-.BR CLONE_NEWIPC " (since Linux 3.0)"
-.I fd
-must refer to an IPC namespace.
-.TP
-.BR CLONE_NEWNET " (since Linux 3.0)"
-.I fd
-must refer to a network namespace.
-.TP
-.BR CLONE_NEWNS " (since Linux 3.8)"
-.I fd
-must refer to a mount namespace.
-.TP
-.BR CLONE_NEWPID " (since Linux 3.8)"
-.I fd
-must refer to a descendant PID namespace.
-.TP
-.BR CLONE_NEWTIME " (since Linux 5.8)"
-.\" commit 76c12881a38aaa83e1eb4ce2fada36c3a732bad4
-.I fd
-must refer to a time namespace.
-.TP
-.BR CLONE_NEWUSER " (since Linux 3.8)"
-.I fd
-must refer to a user namespace.
-.TP
-.BR CLONE_NEWUTS " (since Linux 3.0)"
-.I fd
-must refer to a UTS namespace.
-.P
-Specifying
-.I nstype
-as 0 suffices if the caller knows (or does not care)
-what type of namespace is referred to by
-.IR fd .
-Specifying a nonzero value for
-.I nstype
-is useful if the caller does not know what type of namespace is referred to by
-.I fd
-and wants to ensure that the namespace is of a particular type.
-(The caller might not know the type of the namespace referred to by
-.I fd
-if the file descriptor was opened by another process and, for example,
-passed to the caller via a UNIX domain socket.)
-.\"
-.SS fd is a PID file descriptor
-Since Linux 5.8,
-.I fd
-may refer to a PID file descriptor obtained from
-.BR pidfd_open (2)
-or
-.BR clone (2).
-In this usage,
-.BR setns ()
-atomically moves the calling thread into one or more of the same namespaces
-as the thread referred to by
-.IR fd .
-.P
-The
-.I nstype
-argument is a bit mask specified by ORing together
-.I "one or more"
-of the
-.B CLONE_NEW*
-namespace constants listed above.
-The caller is moved into each of the target thread's namespaces
-that is specified in
-.IR nstype ;
-the caller's memberships in the remaining namespaces are left unchanged.
-.P
-For example, the following code would move the caller into the
-same user, network, and UTS namespaces as PID 1234,
-but would leave the caller's other namespace memberships unchanged:
-.P
-.in +4n
-.EX
-int fd = pidfd_open(1234, 0);
-setns(fd, CLONE_NEWUSER | CLONE_NEWNET | CLONE_NEWUTS);
-.EE
-.in
-.\"
-.SS Details for specific namespace types
-Note the following details and restrictions when reassociating with
-specific namespace types:
-.TP
-User namespaces
-A process reassociating itself with a user namespace must have the
-.B CAP_SYS_ADMIN
-.\" See kernel/user_namespace.c:userns_install() [3.8 source]
-capability in the target user namespace.
-(This necessarily implies that it is only possible to join
-a descendant user namespace.)
-Upon successfully joining a user namespace,
-a process is granted all capabilities in that namespace,
-regardless of its user and group IDs.
-.IP
-A multithreaded process may not change user namespace with
-.BR setns ().
-.IP
-It is not permitted to use
-.BR setns ()
-to reenter the caller's current user namespace.
-This prevents a caller that has dropped capabilities from regaining
-those capabilities via a call to
-.BR setns ().
-.IP
-For security reasons,
-.\" commit e66eded8309ebf679d3d3c1f5820d1f2ca332c71
-.\" https://lwn.net/Articles/543273/
-a process can't join a new user namespace if it is sharing
-filesystem-related attributes
-(the attributes whose sharing is controlled by the
-.BR clone (2)
-.B CLONE_FS
-flag) with another process.
-.IP
-For further details on user namespaces, see
-.BR user_namespaces (7).
-.TP
-Mount namespaces
-Changing the mount namespace requires that the caller possess both
-.B CAP_SYS_CHROOT
-and
-.B CAP_SYS_ADMIN
-capabilities in its own user namespace and
-.B CAP_SYS_ADMIN
-in the user namespace that owns the target mount namespace.
-.IP
-A process can't join a new mount namespace if it is sharing
-filesystem-related attributes
-(the attributes whose sharing is controlled by the
-.BR clone (2)
-.B CLONE_FS
-flag) with another process.
-.\" Above check is in fs/namespace.c:mntns_install() [3.8 source]
-.IP
-See
-.BR user_namespaces (7)
-for details on the interaction of user namespaces and mount namespaces.
-.TP
-PID namespaces
-In order to reassociate itself with a new PID namespace,
-the caller must have the
-.B CAP_SYS_ADMIN
-capability both in its own user namespace and in the user namespace
-that owns the target PID namespace.
-.IP
-Reassociating the PID namespace has somewhat different
-from other namespace types.
-Reassociating the calling thread with a PID namespace changes only
-the PID namespace that subsequently created child processes of
-the caller will be placed in;
-it does not change the PID namespace of the caller itself.
-.IP
-Reassociating with a PID namespace is allowed only if the target
-PID namespace is a descendant (child, grandchild, etc.)
-of, or is the same as, the current PID namespace of the caller.
-.IP
-For further details on PID namespaces, see
-.BR pid_namespaces (7).
-.TP
-Cgroup namespaces
-In order to reassociate itself with a new cgroup namespace,
-the caller must have the
-.B CAP_SYS_ADMIN
-capability both in its own user namespace and in the user namespace
-that owns the target cgroup namespace.
-.IP
-Using
-.BR setns ()
-to change the caller's cgroup namespace does not change
-the caller's cgroup memberships.
-.TP
-Network, IPC, time, and UTS namespaces
-In order to reassociate itself with a new network, IPC, time, or UTS namespace,
-the caller must have the
-.B CAP_SYS_ADMIN
-capability both in its own user namespace and in the user namespace
-that owns the target namespace.
-.SH RETURN VALUE
-On success,
-.BR setns ()
-returns 0.
-On failure, \-1 is returned and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EBADF
-.I fd
-is not a valid file descriptor.
-.TP
-.B EINVAL
-.I fd
-refers to a namespace whose type does not match that specified in
-.IR nstype .
-.TP
-.B EINVAL
-There is problem with reassociating
-the thread with the specified namespace.
-.TP
-.\" See kernel/pid_namespace.c::pidns_install() [kernel 3.18 sources]
-.B EINVAL
-The caller tried to join an ancestor (parent, grandparent, and so on)
-PID namespace.
-.TP
-.B EINVAL
-The caller attempted to join the user namespace
-in which it is already a member.
-.TP
-.B EINVAL
-.\" commit e66eded8309ebf679d3d3c1f5820d1f2ca332c71
-The caller shares filesystem
-.RB ( CLONE_FS )
-state (in particular, the root directory)
-with other processes and tried to join a new user namespace.
-.TP
-.B EINVAL
-.\" See kernel/user_namespace.c::userns_install() [kernel 3.15 sources]
-The caller is multithreaded and tried to join a new user namespace.
-.TP
-.B EINVAL
-.I fd
-is a PID file descriptor and
-.I nstype
-is invalid (e.g., it is 0).
-.TP
-.B ENOMEM
-Cannot allocate sufficient memory to change the specified namespace.
-.TP
-.B EPERM
-The calling thread did not have the required capability
-for this operation.
-.TP
-.B ESRCH
-.I fd
-is a PID file descriptor but the process it refers to no longer exists
-(i.e., it has terminated and been waited on).
-.SH STANDARDS
-Linux.
-.SH VERSIONS
-Linux 3.0,
-glibc 2.14.
-.SH NOTES
-For further information on the
-.IR /proc/ pid /ns/
-magic links, see
-.BR namespaces (7).
-.P
-Not all of the attributes that can be shared when
-a new thread is created using
-.BR clone (2)
-can be changed using
-.BR setns ().
-.SH EXAMPLES
-The program below takes two or more arguments.
-The first argument specifies the pathname of a namespace file in an existing
-.IR /proc/ pid /ns/
-directory.
-The remaining arguments specify a command and its arguments.
-The program opens the namespace file, joins that namespace using
-.BR setns (),
-and executes the specified command inside that namespace.
-.P
-The following shell session demonstrates the use of this program
-(compiled as a binary named
-.IR ns_exec )
-in conjunction with the
-.B CLONE_NEWUTS
-example program in the
-.BR clone (2)
-man page (complied as a binary named
-.IR newuts ).
-.P
-We begin by executing the example program in
-.BR clone (2)
-in the background.
-That program creates a child in a separate UTS namespace.
-The child changes the hostname in its namespace,
-and then both processes display the hostnames in their UTS namespaces,
-so that we can see that they are different.
-.P
-.in +4n
-.EX
-$ \fBsu\fP # Need privilege for namespace operations
-Password:
-# \fB./newuts bizarro &\fP
-[1] 3549
-clone() returned 3550
-uts.nodename in child: bizarro
-uts.nodename in parent: antero
-# \fBuname \-n\fP # Verify hostname in the shell
-antero
-.EE
-.in
-.P
-We then run the program shown below,
-using it to execute a shell.
-Inside that shell, we verify that the hostname is the one
-set by the child created by the first program:
-.P
-.in +4n
-.EX
-# \fB./ns_exec /proc/3550/ns/uts /bin/bash\fP
-# \fBuname \-n\fP # Executed in shell started by ns_exec
-bizarro
-.EE
-.in
-.SS Program source
-.\" SRC BEGIN (setns.c)
-.EX
-#define _GNU_SOURCE
-#include <err.h>
-#include <fcntl.h>
-#include <sched.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-\&
-int
-main(int argc, char *argv[])
-{
- int fd;
-\&
- if (argc < 3) {
- fprintf(stderr, "%s /proc/PID/ns/FILE cmd args...\en", argv[0]);
- exit(EXIT_FAILURE);
- }
-\&
- /* Get file descriptor for namespace; the file descriptor is opened
- with O_CLOEXEC so as to ensure that it is not inherited by the
- program that is later executed. */
-\&
- fd = open(argv[1], O_RDONLY | O_CLOEXEC);
- if (fd == \-1)
- err(EXIT_FAILURE, "open");
-\&
- if (setns(fd, 0) == \-1) /* Join that namespace */
- err(EXIT_FAILURE, "setns");
-\&
- execvp(argv[2], &argv[2]); /* Execute a command in namespace */
- err(EXIT_FAILURE, "execvp");
-}
-.EE
-.\" SRC END
-.SH SEE ALSO
-.BR nsenter (1),
-.BR clone (2),
-.BR fork (2),
-.BR unshare (2),
-.BR vfork (2),
-.BR namespaces (7),
-.BR unix (7)
diff --git a/man2/setpgid.2 b/man2/setpgid.2
deleted file mode 100644
index d5f9bbd6b..000000000
--- a/man2/setpgid.2
+++ /dev/null
@@ -1,329 +0,0 @@
-.\" Copyright (c) 1983, 1991 Regents of the University of California.
-.\" and Copyright (C) 2007, Michael Kerrisk <mtk.manpages@gmail.com>
-.\" All rights reserved.
-.\"
-.\" SPDX-License-Identifier: BSD-4-Clause-UC
-.\"
-.\" @(#)getpgrp.2 6.4 (Berkeley) 3/10/91
-.\"
-.\" Modified 1993-07-24 by Rik Faith <faith@cs.unc.edu>
-.\" Modified 1995-04-15 by Michael Chastain <mec@shell.portal.com>:
-.\" Added 'getpgid'.
-.\" Modified 1996-07-21 by Andries Brouwer <aeb@cwi.nl>
-.\" Modified 1996-11-06 by Eric S. Raymond <esr@thyrsus.com>
-.\" Modified 1999-09-02 by Michael Haardt <michael@moria.de>
-.\" Modified 2002-01-18 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Modified 2003-01-20 by Andries Brouwer <aeb@cwi.nl>
-.\" 2007-07-25, mtk, fairly substantial rewrites and rearrangements
-.\" of text.
-.\"
-.TH setpgid 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-setpgid, getpgid, setpgrp, getpgrp \- set/get process group
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <unistd.h>
-.P
-.BI "int setpgid(pid_t " pid ", pid_t " pgid );
-.BI "pid_t getpgid(pid_t " pid );
-.P
-.BR "pid_t getpgrp(void);" " /* POSIX.1 version */"
-.BI "[[deprecated]] pid_t getpgrp(pid_t " pid ");\fR /* BSD version */"
-.P
-.BR "int setpgrp(void);" " /* System V version */"
-.BI "[[deprecated]] int setpgrp(pid_t " pid ", pid_t " pgid ");\fR /* BSD version */"
-.fi
-.P
-.RS -4
-Feature Test Macro Requirements for glibc (see
-.BR feature_test_macros (7)):
-.RE
-.P
-.BR getpgid ():
-.nf
- _XOPEN_SOURCE >= 500
-.\" || _XOPEN_SOURCE && _XOPEN_SOURCE_EXTENDED
- || /* Since glibc 2.12: */ _POSIX_C_SOURCE >= 200809L
-.fi
-.P
-.BR setpgrp "() (POSIX.1):"
-.nf
- _XOPEN_SOURCE >= 500
-.\" || _XOPEN_SOURCE && _XOPEN_SOURCE_EXTENDED
- || /* Since glibc 2.19: */ _DEFAULT_SOURCE
- || /* glibc <= 2.19: */ _SVID_SOURCE
-.fi
-.P
-.BR setpgrp "() (BSD),"
-.BR getpgrp "() (BSD):"
-.nf
- [These are available only before glibc 2.19]
- _BSD_SOURCE &&
- ! (_POSIX_SOURCE || _POSIX_C_SOURCE || _XOPEN_SOURCE
- || _GNU_SOURCE || _SVID_SOURCE)
-.fi
-.SH DESCRIPTION
-All of these interfaces are available on Linux,
-and are used for getting and setting the
-process group ID (PGID) of a process.
-The preferred, POSIX.1-specified ways of doing this are:
-.BR getpgrp (void),
-for retrieving the calling process's PGID; and
-.BR setpgid (),
-for setting a process's PGID.
-.P
-.BR setpgid ()
-sets the PGID of the process specified by
-.I pid
-to
-.IR pgid .
-If
-.I pid
-is zero, then the process ID of the calling process is used.
-If
-.I pgid
-is zero, then the PGID of the process specified by
-.I pid
-is made the same as its process ID.
-If
-.BR setpgid ()
-is used to move a process from one process
-group to another (as is done by some shells when creating pipelines),
-both process groups must be part of the same session (see
-.BR setsid (2)
-and
-.BR credentials (7)).
-In this case,
-the \fIpgid\fP specifies an existing process group to be joined and the
-session ID of that group must match the session ID of the joining process.
-.P
-The POSIX.1 version of
-.BR getpgrp (),
-which takes no arguments,
-returns the PGID of the calling process.
-.P
-.BR getpgid ()
-returns the PGID of the process specified by
-.IR pid .
-If
-.I pid
-is zero, the process ID of the calling process is used.
-(Retrieving the PGID of a process other than the caller is rarely
-necessary, and the POSIX.1
-.BR getpgrp ()
-is preferred for that task.)
-.P
-The System\ V-style
-.BR setpgrp (),
-which takes no arguments, is equivalent to
-.IR "setpgid(0,\ 0)" .
-.P
-The BSD-specific
-.BR setpgrp ()
-call, which takes arguments
-.I pid
-and
-.IR pgid ,
-is a wrapper function that calls
-.P
-.in +4n
-.EX
-setpgid(pid, pgid)
-.EE
-.in
-.P
-.\" The true BSD setpgrp() system call differs in allowing the PGID
-.\" to be set to arbitrary values, rather than being restricted to
-.\" PGIDs in the same session.
-Since glibc 2.19, the BSD-specific
-.BR setpgrp ()
-function is no longer exposed by
-.IR <unistd.h> ;
-calls should be replaced with the
-.BR setpgid ()
-call shown above.
-.P
-The BSD-specific
-.BR getpgrp ()
-call, which takes a single
-.I pid
-argument, is a wrapper function that calls
-.P
-.in +4n
-.EX
-getpgid(pid)
-.EE
-.in
-.P
-Since glibc 2.19, the BSD-specific
-.BR getpgrp ()
-function is no longer exposed by
-.IR <unistd.h> ;
-calls should be replaced with calls to the POSIX.1
-.BR getpgrp ()
-which takes no arguments (if the intent is to obtain the caller's PGID),
-or with the
-.BR getpgid ()
-call shown above.
-.SH RETURN VALUE
-On success,
-.BR setpgid ()
-and
-.BR setpgrp ()
-return zero.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.P
-The POSIX.1
-.BR getpgrp ()
-always returns the PGID of the caller.
-.P
-.BR getpgid (),
-and the BSD-specific
-.BR getpgrp ()
-return a process group on success.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EACCES
-An attempt was made to change the process group ID
-of one of the children of the calling process and the child had
-already performed an
-.BR execve (2)
-.RB ( setpgid (),
-.BR setpgrp ()).
-.TP
-.B EINVAL
-.I pgid
-is less than 0
-.RB ( setpgid (),
-.BR setpgrp ()).
-.TP
-.B EPERM
-An attempt was made to move a process into a process group in a
-different session, or to change the process
-group ID of one of the children of the calling process and the
-child was in a different session, or to change the process group ID of
-a session leader
-.RB ( setpgid (),
-.BR setpgrp ()).
-.TP
-.B EPERM
-The target process group does not exist.
-.RB ( setpgid (),
-.BR setpgrp ()).
-.TP
-.B ESRCH
-For
-.BR getpgid ():
-.I pid
-does not match any process.
-For
-.BR setpgid ():
-.I pid
-is not the calling process and not a child of the calling process.
-.SH STANDARDS
-.TP
-.BR getpgid ()
-.TQ
-.BR setpgid ()
-.TQ
-.BR getpgrp "() (no args)"
-.TQ
-.BR setpgrp "() (no args)"
-POSIX.1-2008 (but see HISTORY).
-.TP
-.BR setpgrp "() (2 args)"
-.TQ
-.BR getpgrp "() (1 arg)"
-None.
-.SH HISTORY
-.TP
-.BR getpgid ()
-.TQ
-.BR setpgid ()
-.TQ
-.BR getpgrp "() (no args)"
-POSIX.1-2001.
-.TP
-.BR setpgrp "() (no args)"
-POSIX.1-2001.
-POSIX.1-2008 marks it as obsolete.
-.TP
-.BR setpgrp "() (2 args)"
-.TQ
-.BR getpgrp "() (1 arg)"
-4.2BSD.
-.SH NOTES
-A child created via
-.BR fork (2)
-inherits its parent's process group ID.
-The PGID is preserved across an
-.BR execve (2).
-.P
-Each process group is a member of a session and each process is a
-member of the session of which its process group is a member.
-(See
-.BR credentials (7).)
-.P
-A session can have a controlling terminal.
-At any time, one (and only one) of the process groups
-in the session can be the foreground process group
-for the terminal;
-the remaining process groups are in the background.
-If a signal is generated from the terminal (e.g., typing the
-interrupt key to generate
-.BR SIGINT ),
-that signal is sent to the foreground process group.
-(See
-.BR termios (3)
-for a description of the characters that generate signals.)
-Only the foreground process group may
-.BR read (2)
-from the terminal;
-if a background process group tries to
-.BR read (2)
-from the terminal, then the group is sent a
-.B SIGTTIN
-signal, which suspends it.
-The
-.BR tcgetpgrp (3)
-and
-.BR tcsetpgrp (3)
-functions are used to get/set the foreground
-process group of the controlling terminal.
-.P
-The
-.BR setpgid ()
-and
-.BR getpgrp ()
-calls are used by programs such as
-.BR bash (1)
-to create process groups in order to implement shell job control.
-.P
-If the termination of a process causes a process group to become orphaned,
-and if any member of the newly orphaned process group is stopped, then a
-.B SIGHUP
-signal followed by a
-.B SIGCONT
-signal will be sent to each process
-in the newly orphaned process group.
-.\" exit.3 refers to the following text:
-An orphaned process group is one in which the parent of
-every member of process group is either itself also a member
-of the process group or is a member of a process group
-in a different session (see also
-.BR credentials (7)).
-.SH SEE ALSO
-.BR getuid (2),
-.BR setsid (2),
-.BR tcgetpgrp (3),
-.BR tcsetpgrp (3),
-.BR termios (3),
-.BR credentials (7)
diff --git a/man2/setpgrp.2 b/man2/setpgrp.2
deleted file mode 100644
index d6b107a1c..000000000
--- a/man2/setpgrp.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/setpgid.2
diff --git a/man2/setpriority.2 b/man2/setpriority.2
deleted file mode 100644
index b1dcfd91d..000000000
--- a/man2/setpriority.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/getpriority.2
diff --git a/man2/setregid.2 b/man2/setregid.2
deleted file mode 100644
index ec3ff6416..000000000
--- a/man2/setregid.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/setreuid.2
diff --git a/man2/setregid32.2 b/man2/setregid32.2
deleted file mode 100644
index 035df1724..000000000
--- a/man2/setregid32.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/setregid.2
diff --git a/man2/setresgid.2 b/man2/setresgid.2
deleted file mode 100644
index d6866a1de..000000000
--- a/man2/setresgid.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/setresuid.2
diff --git a/man2/setresgid32.2 b/man2/setresgid32.2
deleted file mode 100644
index dec1b9531..000000000
--- a/man2/setresgid32.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/setresgid.2
diff --git a/man2/setresuid.2 b/man2/setresuid.2
deleted file mode 100644
index 5ee42b32d..000000000
--- a/man2/setresuid.2
+++ /dev/null
@@ -1,147 +0,0 @@
-.\" Copyright (C) 1997 Andries Brouwer (aeb@cwi.nl)
-.\" and Copyright (C) 2005, 2010, 2014, 2015, Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Modified, 2003-05-26, Michael Kerrisk, <mtk.manpages@gmail.com>
-.TH setresuid 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-setresuid, setresgid \- set real, effective, and saved user or group ID
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#define _GNU_SOURCE" " /* See feature_test_macros(7) */"
-.B #include <unistd.h>
-.P
-.BI "int setresuid(uid_t " ruid ", uid_t " euid ", uid_t " suid );
-.BI "int setresgid(gid_t " rgid ", gid_t " egid ", gid_t " sgid );
-.fi
-.SH DESCRIPTION
-.BR setresuid ()
-sets the real user ID, the effective user ID, and the
-saved set-user-ID of the calling process.
-.P
-An unprivileged process may change its real UID,
-effective UID, and saved set-user-ID, each to one of:
-the current real UID, the current effective UID, or the
-current saved set-user-ID.
-.P
-A privileged process (on Linux, one having the \fBCAP_SETUID\fP capability)
-may set its real UID, effective UID, and
-saved set-user-ID to arbitrary values.
-.P
-If one of the arguments equals \-1, the corresponding value is not changed.
-.P
-Regardless of what changes are made to the real UID, effective UID,
-and saved set-user-ID, the filesystem UID is always set to the same
-value as the (possibly new) effective UID.
-.P
-Completely analogously,
-.BR setresgid ()
-sets the real GID, effective GID, and saved set-group-ID
-of the calling process (and always modifies the filesystem GID
-to be the same as the effective GID),
-with the same restrictions for unprivileged processes.
-.SH RETURN VALUE
-On success, zero is returned.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.P
-.IR Note :
-there are cases where
-.BR setresuid ()
-can fail even when the caller is UID 0;
-it is a grave security error to omit checking for a failure return from
-.BR setresuid ().
-.SH ERRORS
-.TP
-.B EAGAIN
-The call would change the caller's real UID (i.e.,
-.I ruid
-does not match the caller's real UID),
-but there was a temporary failure allocating the
-necessary kernel data structures.
-.TP
-.B EAGAIN
-.I ruid
-does not match the caller's real UID and this call would
-bring the number of processes belonging to the real user ID
-.I ruid
-over the caller's
-.B RLIMIT_NPROC
-resource limit.
-Since Linux 3.1, this error case no longer occurs
-(but robust applications should check for this error);
-see the description of
-.B EAGAIN
-in
-.BR execve (2).
-.TP
-.B EINVAL
-One or more of the target user or group IDs
-is not valid in this user namespace.
-.TP
-.B EPERM
-The calling process is not privileged (did not have the necessary
-capability in its user namespace)
-and tried to change the IDs to values that are not permitted.
-For
-.BR setresuid (),
-the necessary capability is
-.BR CAP_SETUID ;
-for
-.BR setresgid (),
-it is
-.BR CAP_SETGID .
-.SH VERSIONS
-.SS C library/kernel differences
-At the kernel level, user IDs and group IDs are a per-thread attribute.
-However, POSIX requires that all threads in a process
-share the same credentials.
-The NPTL threading implementation handles the POSIX requirements by
-providing wrapper functions for
-the various system calls that change process UIDs and GIDs.
-These wrapper functions (including those for
-.BR setresuid ()
-and
-.BR setresgid ())
-employ a signal-based technique to ensure
-that when one thread changes credentials,
-all of the other threads in the process also change their credentials.
-For details, see
-.BR nptl (7).
-.SH STANDARDS
-None.
-.SH HISTORY
-Linux 2.1.44,
-glibc 2.3.2.
-HP-UX, FreeBSD.
-.P
-The original Linux
-.BR setresuid ()
-and
-.BR setresgid ()
-system calls supported only 16-bit user and group IDs.
-Subsequently, Linux 2.4 added
-.BR setresuid32 ()
-and
-.BR setresgid32 (),
-supporting 32-bit IDs.
-The glibc
-.BR setresuid ()
-and
-.BR setresgid ()
-wrapper functions transparently deal with the variations across kernel versions.
-.SH SEE ALSO
-.BR getresuid (2),
-.BR getuid (2),
-.BR setfsgid (2),
-.BR setfsuid (2),
-.BR setreuid (2),
-.BR setuid (2),
-.BR capabilities (7),
-.BR credentials (7),
-.BR user_namespaces (7)
diff --git a/man2/setresuid32.2 b/man2/setresuid32.2
deleted file mode 100644
index d6866a1de..000000000
--- a/man2/setresuid32.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/setresuid.2
diff --git a/man2/setreuid.2 b/man2/setreuid.2
deleted file mode 100644
index 1ff0229a9..000000000
--- a/man2/setreuid.2
+++ /dev/null
@@ -1,193 +0,0 @@
-.\" Copyright (c) 1983, 1991 The Regents of the University of California.
-.\" and Copyright (C) 2009, 2010, 2014, 2015, Michael Kerrisk <mtk.manpages@gmail.com>
-.\" All rights reserved.
-.\"
-.\" SPDX-License-Identifier: BSD-4-Clause-UC
-.\"
-.\" @(#)setregid.2 6.4 (Berkeley) 3/10/91
-.\"
-.\" Modified Sat Jul 24 09:08:49 1993 by Rik Faith <faith@cs.unc.edu>
-.\" Portions extracted from linux/kernel/sys.c:
-.\" Copyright (C) 1991, 1992 Linus Torvalds
-.\" May be distributed under the GNU General Public License
-.\" Changes: 1994-07-29 by Wilf <G.Wilford@ee.surrey.ac.uk>
-.\" 1994-08-02 by Wilf due to change in kernel.
-.\" 2004-07-04 by aeb
-.\" 2004-05-27 by Michael Kerrisk
-.\"
-.TH setreuid 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-setreuid, setregid \- set real and/or effective user or group ID
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <unistd.h>
-.P
-.BI "int setreuid(uid_t " ruid ", uid_t " euid );
-.BI "int setregid(gid_t " rgid ", gid_t " egid );
-.fi
-.P
-.RS -4
-Feature Test Macro Requirements for glibc (see
-.BR feature_test_macros (7)):
-.RE
-.P
-.BR setreuid (),
-.BR setregid ():
-.nf
- _XOPEN_SOURCE >= 500
-.\" || _XOPEN_SOURCE && _XOPEN_SOURCE_EXTENDED
- || /* Since glibc 2.19: */ _DEFAULT_SOURCE
- || /* glibc <= 2.19: */ _BSD_SOURCE
-.fi
-.SH DESCRIPTION
-.BR setreuid ()
-sets real and effective user IDs of the calling process.
-.P
-Supplying a value of \-1 for either the real or effective user ID forces
-the system to leave that ID unchanged.
-.P
-Unprivileged processes may only set the effective user ID to the real user ID,
-the effective user ID, or the saved set-user-ID.
-.P
-Unprivileged users may only set the real user ID to
-the real user ID or the effective user ID.
-.P
-If the real user ID is set (i.e.,
-.I ruid
-is not \-1) or the effective user ID is set to a value
-not equal to the previous real user ID,
-the saved set-user-ID will be set to the new effective user ID.
-.P
-Completely analogously,
-.BR setregid ()
-sets real and effective group ID's of the calling process,
-and all of the above holds with "group" instead of "user".
-.SH RETURN VALUE
-On success, zero is returned.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.P
-.IR Note :
-there are cases where
-.BR setreuid ()
-can fail even when the caller is UID 0;
-it is a grave security error to omit checking for a failure return from
-.BR setreuid ().
-.SH ERRORS
-.TP
-.B EAGAIN
-The call would change the caller's real UID (i.e.,
-.I ruid
-does not match the caller's real UID),
-but there was a temporary failure allocating the
-necessary kernel data structures.
-.TP
-.B EAGAIN
-.I ruid
-does not match the caller's real UID and this call would
-bring the number of processes belonging to the real user ID
-.I ruid
-over the caller's
-.B RLIMIT_NPROC
-resource limit.
-Since Linux 3.1, this error case no longer occurs
-(but robust applications should check for this error);
-see the description of
-.B EAGAIN
-in
-.BR execve (2).
-.TP
-.B EINVAL
-One or more of the target user or group IDs
-is not valid in this user namespace.
-.TP
-.B EPERM
-The calling process is not privileged
-(on Linux, does not have the necessary capability in its user namespace:
-.B CAP_SETUID
-in the case of
-.BR setreuid (),
-or
-.B CAP_SETGID
-in the case of
-.BR setregid ())
-and a change other than (i)
-swapping the effective user (group) ID with the real user (group) ID,
-or (ii) setting one to the value of the other or (iii) setting the
-effective user (group) ID to the value of the
-saved set-user-ID (saved set-group-ID) was specified.
-.SH VERSIONS
-POSIX.1 does not specify all of the UID changes that Linux permits
-for an unprivileged process.
-For
-.BR setreuid (),
-the effective user ID can be made the same as the
-real user ID or the saved set-user-ID,
-and it is unspecified whether unprivileged processes may set the
-real user ID to the real user ID, the effective user ID, or the
-saved set-user-ID.
-For
-.BR setregid (),
-the real group ID can be changed to the value of the saved set-group-ID,
-and the effective group ID can be changed to the value of
-the real group ID or the saved set-group-ID.
-The precise details of what ID changes are permitted vary
-across implementations.
-.P
-POSIX.1 makes no specification about the effect of these calls
-on the saved set-user-ID and saved set-group-ID.
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-POSIX.1-2001, 4.3BSD (first appeared in 4.2BSD).
-.P
-Setting the effective user (group) ID to the
-saved set-user-ID (saved set-group-ID) is
-possible since Linux 1.1.37 (1.1.38).
-.P
-The original Linux
-.BR setreuid ()
-and
-.BR setregid ()
-system calls supported only 16-bit user and group IDs.
-Subsequently, Linux 2.4 added
-.BR setreuid32 ()
-and
-.BR setregid32 (),
-supporting 32-bit IDs.
-The glibc
-.BR setreuid ()
-and
-.BR setregid ()
-wrapper functions transparently deal with the variations across kernel versions.
-.\"
-.SS C library/kernel differences
-At the kernel level, user IDs and group IDs are a per-thread attribute.
-However, POSIX requires that all threads in a process
-share the same credentials.
-The NPTL threading implementation handles the POSIX requirements by
-providing wrapper functions for
-the various system calls that change process UIDs and GIDs.
-These wrapper functions (including those for
-.BR setreuid ()
-and
-.BR setregid ())
-employ a signal-based technique to ensure
-that when one thread changes credentials,
-all of the other threads in the process also change their credentials.
-For details, see
-.BR nptl (7).
-.SH SEE ALSO
-.BR getgid (2),
-.BR getuid (2),
-.BR seteuid (2),
-.BR setgid (2),
-.BR setresuid (2),
-.BR setuid (2),
-.BR capabilities (7),
-.BR credentials (7),
-.BR user_namespaces (7)
diff --git a/man2/setreuid32.2 b/man2/setreuid32.2
deleted file mode 100644
index ec3ff6416..000000000
--- a/man2/setreuid32.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/setreuid.2
diff --git a/man2/setrlimit.2 b/man2/setrlimit.2
deleted file mode 100644
index df6d7362a..000000000
--- a/man2/setrlimit.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/getrlimit.2
diff --git a/man2/setsid.2 b/man2/setsid.2
deleted file mode 100644
index 3ee4e2a66..000000000
--- a/man2/setsid.2
+++ /dev/null
@@ -1,100 +0,0 @@
-.\" Copyright Michael Haardt (michael@cantor.informatik.rwth-aachen.de)
-.\" Sat Aug 27 20:43:50 MET DST 1994
-.\" and Copyright (C) 2014, Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: GPL-2.0-or-later
-.\"
-.\" Modified Sun Sep 11 19:19:05 1994 <faith@cs.unc.edu>
-.\" Modified Mon Mar 25 10:19:00 1996 <aeb@cwi.nl> (merged a few
-.\" tiny changes from a man page by Charles Livingston).
-.\" Modified Sun Jul 21 14:45:46 1996 <aeb@cwi.nl>
-.\"
-.TH setsid 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-setsid \- creates a session and sets the process group ID
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <unistd.h>
-.P
-.B pid_t setsid(void);
-.fi
-.SH DESCRIPTION
-.BR setsid ()
-creates a new session if the calling process is not a
-process group leader.
-The calling process is the leader of the new session
-(i.e., its session ID is made the same as its process ID).
-The calling process also becomes
-the process group leader of a new process group in the session
-(i.e., its process group ID is made the same as its process ID).
-.P
-The calling process will be the only process in
-the new process group and in the new session.
-.P
-Initially, the new session has no controlling terminal.
-For details of how a session acquires a controlling terminal, see
-.BR credentials (7).
-.SH RETURN VALUE
-On success, the (new) session ID of the calling process is returned.
-On error,
-.I "(pid_t)\ \-1"
-is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EPERM
-The process group ID of any process equals the PID of the calling process.
-Thus, in particular,
-.BR setsid ()
-fails if the calling process is already a process group leader.
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-POSIX.1-2001, SVr4.
-.SH NOTES
-A child created via
-.BR fork (2)
-inherits its parent's session ID.
-The session ID is preserved across an
-.BR execve (2).
-.P
-A process group leader is a process whose process group ID equals its PID.
-Disallowing a process group leader from calling
-.BR setsid ()
-prevents the possibility that a process group leader places itself
-in a new session while other processes in the process group remain
-in the original session;
-such a scenario would break the strict
-two-level hierarchy of sessions and process groups.
-In order to be sure that
-.BR setsid ()
-will succeed, call
-.BR fork (2)
-and have the parent
-.BR _exit (2),
-while the child (which by definition can't be a process group leader) calls
-.BR setsid ().
-.P
-If a session has a controlling terminal, and the
-.B CLOCAL
-flag for that terminal is not set,
-and a terminal hangup occurs, then the session leader is sent a
-.B SIGHUP
-signal.
-.P
-If a process that is a session leader terminates, then a
-.B SIGHUP
-signal is sent to each process in the foreground
-process group of the controlling terminal.
-.SH SEE ALSO
-.BR setsid (1),
-.BR getsid (2),
-.BR setpgid (2),
-.BR setpgrp (2),
-.BR tcgetsid (3),
-.BR credentials (7),
-.BR sched (7)
diff --git a/man2/setsockopt.2 b/man2/setsockopt.2
deleted file mode 100644
index d98c7769b..000000000
--- a/man2/setsockopt.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/getsockopt.2
diff --git a/man2/settimeofday.2 b/man2/settimeofday.2
deleted file mode 100644
index 2b6eff428..000000000
--- a/man2/settimeofday.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/gettimeofday.2
diff --git a/man2/setuid.2 b/man2/setuid.2
deleted file mode 100644
index 2c987c437..000000000
--- a/man2/setuid.2
+++ /dev/null
@@ -1,156 +0,0 @@
-.\" Copyright (C), 1994, Graeme W. Wilford (Wilf).
-.\" and Copyright (C) 2010, 2014, 2015, Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Fri Jul 29th 12:56:44 BST 1994 Wilf. <G.Wilford@ee.surrey.ac.uk>
-.\" Changes inspired by patch from Richard Kettlewell
-.\" <richard@greenend.org.uk>, aeb 970616.
-.\" Modified, 27 May 2004, Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Added notes on capability requirements
-.TH setuid 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-setuid \- set user identity
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <unistd.h>
-.P
-.BI "int setuid(uid_t " uid );
-.fi
-.SH DESCRIPTION
-.BR setuid ()
-sets the effective user ID of the calling process.
-If the calling process is privileged
-(more precisely: if the process has the
-.B CAP_SETUID
-capability in its user namespace),
-the real UID and saved set-user-ID are also set.
-.P
-Under Linux,
-.BR setuid ()
-is implemented like the POSIX version with the
-.B _POSIX_SAVED_IDS
-feature.
-This allows a set-user-ID (other than root) program to drop all of its user
-privileges, do some un-privileged work, and then reengage the original
-effective user ID in a secure manner.
-.P
-If the user is root or the program is set-user-ID-root, special care must be
-taken:
-.BR setuid ()
-checks the effective user ID of the caller and if it is
-the superuser, all process-related user ID's are set to
-.IR uid .
-After this has occurred, it is impossible for the program to regain root
-privileges.
-.P
-Thus, a set-user-ID-root program wishing to temporarily drop root
-privileges, assume the identity of an unprivileged user, and then regain
-root privileges afterward cannot use
-.BR setuid ().
-You can accomplish this with
-.BR seteuid (2).
-.SH RETURN VALUE
-On success, zero is returned.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.P
-.IR Note :
-there are cases where
-.BR setuid ()
-can fail even when the caller is UID 0;
-it is a grave security error to omit checking for a failure return from
-.BR setuid ().
-.SH ERRORS
-.TP
-.B EAGAIN
-The call would change the caller's real UID (i.e.,
-.I uid
-does not match the caller's real UID),
-but there was a temporary failure allocating the
-necessary kernel data structures.
-.TP
-.B EAGAIN
-.I uid
-does not match the real user ID of the caller and this call would
-bring the number of processes belonging to the real user ID
-.I uid
-over the caller's
-.B RLIMIT_NPROC
-resource limit.
-Since Linux 3.1, this error case no longer occurs
-(but robust applications should check for this error);
-see the description of
-.B EAGAIN
-in
-.BR execve (2).
-.TP
-.B EINVAL
-The user ID specified in
-.I uid
-is not valid in this user namespace.
-.TP
-.B EPERM
-The user is not privileged (Linux: does not have the
-.B CAP_SETUID
-capability in its user namespace) and
-.I uid
-does not match the real UID or saved set-user-ID of the calling process.
-.SH VERSIONS
-.SS C library/kernel differences
-At the kernel level, user IDs and group IDs are a per-thread attribute.
-However, POSIX requires that all threads in a process
-share the same credentials.
-The NPTL threading implementation handles the POSIX requirements by
-providing wrapper functions for
-the various system calls that change process UIDs and GIDs.
-These wrapper functions (including the one for
-.BR setuid ())
-employ a signal-based technique to ensure
-that when one thread changes credentials,
-all of the other threads in the process also change their credentials.
-For details, see
-.BR nptl (7).
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-POSIX.1-2001, SVr4.
-.P
-Not quite compatible with the 4.4BSD call, which
-sets all of the real, saved, and effective user IDs.
-.\" SVr4 documents an additional EINVAL error condition.
-.P
-The original Linux
-.BR setuid ()
-system call supported only 16-bit user IDs.
-Subsequently, Linux 2.4 added
-.BR setuid32 ()
-supporting 32-bit IDs.
-The glibc
-.BR setuid ()
-wrapper function transparently deals with the variation across kernel versions.
-.SH NOTES
-Linux has the concept of the filesystem user ID, normally equal to the
-effective user ID.
-The
-.BR setuid ()
-call also sets the filesystem user ID of the calling process.
-See
-.BR setfsuid (2).
-.P
-If
-.I uid
-is different from the old effective UID, the process will
-be forbidden from leaving core dumps.
-.SH SEE ALSO
-.BR getuid (2),
-.BR seteuid (2),
-.BR setfsuid (2),
-.BR setreuid (2),
-.BR capabilities (7),
-.BR credentials (7),
-.BR user_namespaces (7)
diff --git a/man2/setuid32.2 b/man2/setuid32.2
deleted file mode 100644
index 24656c29f..000000000
--- a/man2/setuid32.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/setuid.2
diff --git a/man2/setup.2 b/man2/setup.2
deleted file mode 100644
index 9fb5e8654..000000000
--- a/man2/setup.2
+++ /dev/null
@@ -1,55 +0,0 @@
-.\" Copyright (c) 1992 Drew Eckhardt (drew@cs.colorado.edu), March 28, 1992
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Modified by Michael Haardt <michael@moria.de>
-.\" Modified Sun Jul 25 10:14:13 1993 by Rik Faith <faith@cs.unc.edu>
-.\" Modified 15 April 1995 by Michael Chastain <mec@shell.portal.com>
-.\" Update calling parameters to Linux 1.2.4 values.
-.\" Modified 10 June 1995 by Andries Brouwer <aeb@cwi.nl>
-.\" Modified 3 May 1996 by Martin Schulze <joey@infodrom.north.de>
-.\" Modified Wed Nov 6 04:05:28 1996 by Eric S. Raymond <esr@thyrsus.com>
-.\" Modified Sat Jan 29 01:08:23 2000 by aeb
-.\"
-.TH setup 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-setup \- setup devices and filesystems, mount root filesystem
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <unistd.h>
-.P
-.B [[deprecated]] int setup(void);
-.fi
-.SH DESCRIPTION
-.BR setup ()
-is called once from within
-.IR linux/init/main.c .
-It calls initialization functions for devices and filesystems
-configured into the kernel and then mounts the root filesystem.
-.P
-No user process may call
-.BR setup ().
-Any user process, even a process with superuser permission,
-will receive
-.BR EPERM .
-.SH RETURN VALUE
-.BR setup ()
-always returns \-1 for a user process.
-.SH ERRORS
-.TP
-.B EPERM
-Always, for a user process.
-.SH STANDARDS
-Linux.
-.SH VERSIONS
-Removed in Linux 2.1.121.
-.P
-The calling sequence varied: at some times
-.BR setup ()
-has had a single argument
-.I "void\ *BIOS"
-and at other times a single argument
-.IR "int magic" .
diff --git a/man2/setxattr.2 b/man2/setxattr.2
deleted file mode 100644
index ebb4229f0..000000000
--- a/man2/setxattr.2
+++ /dev/null
@@ -1,159 +0,0 @@
-.\" Copyright (C) Andreas Gruenbacher, February 2001
-.\" Copyright (C) Silicon Graphics Inc, September 2001
-.\"
-.\" SPDX-License-Identifier: GPL-2.0-or-later
-.\"
-.TH setxattr 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-setxattr, lsetxattr, fsetxattr \- set an extended attribute value
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/xattr.h>
-.P
-.BI "int setxattr(const char *" path ", const char *" name ,
-.BI " const void " value [. size "], size_t " size ", int " flags );
-.BI "int lsetxattr(const char *" path ", const char *" name ,
-.BI " const void " value [. size "], size_t " size ", int " flags );
-.BI "int fsetxattr(int " fd ", const char *" name ,
-.BI " const void " value [. size "], size_t " size ", int " flags );
-.fi
-.SH DESCRIPTION
-Extended attributes are
-.IR name : value
-pairs associated with inodes (files, directories, symbolic links, etc.).
-They are extensions to the normal attributes which are associated
-with all inodes in the system (i.e., the
-.BR stat (2)
-data).
-A complete overview of extended attributes concepts can be found in
-.BR xattr (7).
-.P
-.BR setxattr ()
-sets the
-.I value
-of the extended attribute identified by
-.I name
-and associated with the given
-.I path
-in the filesystem.
-The
-.I size
-argument specifies the size (in bytes) of
-.IR value ;
-a zero-length value is permitted.
-.P
-.BR lsetxattr ()
-is identical to
-.BR setxattr (),
-except in the case of a symbolic link, where the extended attribute is
-set on the link itself, not the file that it refers to.
-.P
-.BR fsetxattr ()
-is identical to
-.BR setxattr (),
-only the extended attribute is set on the open file referred to by
-.I fd
-(as returned by
-.BR open (2))
-in place of
-.IR path .
-.P
-An extended attribute name is a null-terminated string.
-The
-.I name
-includes a namespace prefix; there may be several, disjoint
-namespaces associated with an individual inode.
-The
-.I value
-of an extended attribute is a chunk of arbitrary textual or
-binary data of specified length.
-.P
-By default
-(i.e.,
-.I flags
-is zero),
-the extended attribute will be created if it does not exist,
-or the value will be replaced if the attribute already exists.
-To modify these semantics, one of the following values can be specified in
-.IR flags :
-.TP
-.B XATTR_CREATE
-Perform a pure create, which fails if the named attribute exists already.
-.TP
-.B XATTR_REPLACE
-Perform a pure replace operation,
-which fails if the named attribute does not already exist.
-.SH RETURN VALUE
-On success, zero is returned.
-On failure, \-1 is returned and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EDQUOT
-Disk quota limits meant that
-there is insufficient space remaining to store the extended attribute.
-.TP
-.B EEXIST
-.B XATTR_CREATE
-was specified, and the attribute exists already.
-.TP
-.B ENODATA
-.B XATTR_REPLACE
-was specified, and the attribute does not exist.
-.\" .RB ( ENOATTR
-.\" is defined to be a synonym for
-.\" .BR ENODATA
-.\" in
-.\" .IR <attr/attributes.h> .)
-.TP
-.B ENOSPC
-There is insufficient space remaining to store the extended attribute.
-.TP
-.B ENOTSUP
-The namespace prefix of
-.I name
-is not valid.
-.TP
-.B ENOTSUP
-Extended attributes are not supported by the filesystem, or are disabled,
-.TP
-.B EPERM
-The file is marked immutable or append-only.
-(See
-.BR ioctl_iflags (2).)
-.P
-In addition, the errors documented in
-.BR stat (2)
-can also occur.
-.TP
-.B ERANGE
-The size of
-.I name
-or
-.I value
-exceeds a filesystem-specific limit.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 2.4,
-glibc 2.3.
-.\" .SH AUTHORS
-.\" Andreas Gruenbacher,
-.\" .RI < a.gruenbacher@computer.org >
-.\" and the SGI XFS development team,
-.\" .RI < linux-xfs@oss.sgi.com >.
-.\" Please send any bug reports or comments to these addresses.
-.SH SEE ALSO
-.BR getfattr (1),
-.BR setfattr (1),
-.BR getxattr (2),
-.BR listxattr (2),
-.BR open (2),
-.BR removexattr (2),
-.BR stat (2),
-.BR symlink (7),
-.BR xattr (7)
diff --git a/man2/sgetmask.2 b/man2/sgetmask.2
deleted file mode 100644
index 9cf17b8d4..000000000
--- a/man2/sgetmask.2
+++ /dev/null
@@ -1,70 +0,0 @@
-.\" Copyright (c) 2007 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.TH sgetmask 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-sgetmask, ssetmask \- manipulation of signal mask (obsolete)
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */"
-.B #include <unistd.h>
-.P
-.B [[deprecated]] long syscall(SYS_sgetmask, void);
-.BI "[[deprecated]] long syscall(SYS_ssetmask, long " newmask );
-.fi
-.SH DESCRIPTION
-These system calls are obsolete.
-.IR "Do not use them" ;
-use
-.BR sigprocmask (2)
-instead.
-.P
-.BR sgetmask ()
-returns the signal mask of the calling process.
-.P
-.BR ssetmask ()
-sets the signal mask of the calling process to the value given in
-.IR newmask .
-The previous signal mask is returned.
-.P
-The signal masks dealt with by these two system calls
-are plain bit masks (unlike the
-.I sigset_t
-used by
-.BR sigprocmask (2));
-use
-.BR sigmask (3)
-to create and inspect these masks.
-.SH RETURN VALUE
-.BR sgetmask ()
-always successfully returns the signal mask.
-.BR ssetmask ()
-always succeeds, and returns the previous signal mask.
-.SH ERRORS
-These system calls always succeed.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Since Linux 3.16,
-.\" f6187769dae48234f3877df3c4d99294cc2254fa
-support for these system calls is optional,
-depending on whether the kernel was built with the
-.B CONFIG_SGETMASK_SYSCALL
-option.
-.SH NOTES
-These system calls are unaware of signal numbers greater than 31
-(i.e., real-time signals).
-.P
-These system calls do not exist on x86-64.
-.P
-It is not possible to block
-.B SIGSTOP
-or
-.BR SIGKILL .
-.SH SEE ALSO
-.BR sigprocmask (2),
-.BR signal (7)
diff --git a/man2/shmat.2 b/man2/shmat.2
deleted file mode 100644
index 3f3e5a4bd..000000000
--- a/man2/shmat.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/shmop.2
diff --git a/man2/shmctl.2 b/man2/shmctl.2
deleted file mode 100644
index 986268957..000000000
--- a/man2/shmctl.2
+++ /dev/null
@@ -1,494 +0,0 @@
-'\" t
-.\" Copyright (c) 1993 Luigi P. Bai (lpb@softint.com) July 28, 1993
-.\" and Copyright 1993 Giorgio Ciucci <giorgio@crcc.it>
-.\" and Copyright 2004, 2005 Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Modified 1993-07-28, Rik Faith <faith@cs.unc.edu>
-.\" Modified 1993-11-28, Giorgio Ciucci <giorgio@crcc.it>
-.\" Modified 1997-01-31, Eric S. Raymond <esr@thyrsus.com>
-.\" Modified 2001-02-18, Andries Brouwer <aeb@cwi.nl>
-.\" Modified 2002-01-05, 2004-05-27, 2004-06-17,
-.\" Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Modified 2004-10-11, aeb
-.\" Modified, Nov 2004, Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Language and formatting clean-ups
-.\" Updated shmid_ds structure definitions
-.\" Added information on SHM_DEST and SHM_LOCKED flags
-.\" Noted that CAP_IPC_LOCK is not required for SHM_UNLOCK
-.\" since Linux 2.6.9
-.\" Modified, 2004-11-25, mtk, notes on 2.6.9 RLIMIT_MEMLOCK changes
-.\" 2005-04-25, mtk -- noted aberrant Linux behavior w.r.t. new
-.\" attaches to a segment that has already been marked for deletion.
-.\" 2005-08-02, mtk: Added IPC_INFO, SHM_INFO, SHM_STAT descriptions.
-.\" 2018-03-20, dbueso: Added SHM_STAT_ANY description.
-.\"
-.TH shmctl 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-shmctl \- System V shared memory control
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/shm.h>
-.P
-.BI "int shmctl(int " shmid ", int " op ", struct shmid_ds *" buf );
-.fi
-.SH DESCRIPTION
-.BR shmctl ()
-performs the control operation specified by
-.I op
-on the System\ V shared memory segment whose identifier is given in
-.IR shmid .
-.P
-The
-.I buf
-argument is a pointer to a \fIshmid_ds\fP structure,
-defined in \fI<sys/shm.h>\fP as follows:
-.P
-.in +4n
-.EX
-struct shmid_ds {
- struct ipc_perm shm_perm; /* Ownership and permissions */
- size_t shm_segsz; /* Size of segment (bytes) */
- time_t shm_atime; /* Last attach time */
- time_t shm_dtime; /* Last detach time */
- time_t shm_ctime; /* Creation time/time of last
- modification via shmctl() */
- pid_t shm_cpid; /* PID of creator */
- pid_t shm_lpid; /* PID of last shmat(2)/shmdt(2) */
- shmatt_t shm_nattch; /* No. of current attaches */
- ...
-};
-.EE
-.in
-.P
-The fields of the
-.I shmid_ds
-structure are as follows:
-.TP 12
-.I shm_perm
-This is an
-.I ipc_perm
-structure (see below) that specifies the access permissions
-on the shared memory segment.
-.TP
-.I shm_segsz
-Size in bytes of the shared memory segment.
-.TP
-.I shm_atime
-Time of the last
-.BR shmat (2)
-system call that attached this segment.
-.TP
-.I shm_dtime
-Time of the last
-.BR shmdt (2)
-system call that detached tgis segment.
-.TP
-.I shm_ctime
-Time of creation of segment or time of the last
-.BR shmctl ()
-.B IPC_SET
-operation.
-.TP
-.I shm_cpid
-ID of the process that created the shared memory segment.
-.TP
-.I shm_lpid
-ID of the last process that executed a
-.BR shmat (2)
-or
-.BR shmdt (2)
-system call on this segment.
-.TP
-.I shm_nattch
-Number of processes that have this segment attached.
-.P
-The
-.I ipc_perm
-structure is defined as follows
-(the highlighted fields are settable using
-.BR IPC_SET ):
-.P
-.in +4n
-.EX
-struct ipc_perm {
- key_t __key; /* Key supplied to shmget(2) */
- uid_t \fBuid\fP; /* Effective UID of owner */
- gid_t \fBgid\fP; /* Effective GID of owner */
- uid_t cuid; /* Effective UID of creator */
- gid_t cgid; /* Effective GID of creator */
- unsigned short \fBmode\fP; /* \fBPermissions\fP + SHM_DEST and
- SHM_LOCKED flags */
- unsigned short __seq; /* Sequence number */
-};
-.EE
-.in
-.P
-The least significant 9 bits of the
-.I mode
-field of the
-.I ipc_perm
-structure define the access permissions for the shared memory segment.
-The permission bits are as follows:
-.TS
-l l.
-0400 Read by user
-0200 Write by user
-0040 Read by group
-0020 Write by group
-0004 Read by others
-0002 Write by others
-.TE
-.P
-Bits 0100, 0010, and 0001 (the execute bits) are unused by the system.
-(It is not necessary to have execute permission on a segment
-in order to perform a
-.BR shmat (2)
-call with the
-.B SHM_EXEC
-flag.)
-.P
-Valid values for
-.I op
-are:
-.TP
-.B IPC_STAT
-Copy information from the kernel data structure associated with
-.I shmid
-into the
-.I shmid_ds
-structure pointed to by \fIbuf\fP.
-The caller must have read permission on the
-shared memory segment.
-.TP
-.B IPC_SET
-Write the values of some members of the
-.I shmid_ds
-structure pointed to by
-.I buf
-to the kernel data structure associated with this shared memory segment,
-updating also its
-.I shm_ctime
-member.
-.IP
-The following fields are updated:
-\fIshm_perm.uid\fP, \fIshm_perm.gid\fP,
-and (the least significant 9 bits of) \fIshm_perm.mode\fP.
-.IP
-The effective UID of the calling process must match the owner
-.RI ( shm_perm.uid )
-or creator
-.RI ( shm_perm.cuid )
-of the shared memory segment, or the caller must be privileged.
-.TP
-.B IPC_RMID
-Mark the segment to be destroyed.
-The segment will actually be destroyed
-only after the last process detaches it (i.e., when the
-.I shm_nattch
-member of the associated structure
-.I shmid_ds
-is zero).
-The caller must be the owner or creator of the segment, or be privileged.
-The
-.I buf
-argument is ignored.
-.IP
-If a segment has been marked for destruction, then the (nonstandard)
-.B SHM_DEST
-flag of the
-.I shm_perm.mode
-field in the associated data structure retrieved by
-.B IPC_STAT
-will be set.
-.IP
-The caller \fImust\fP ensure that a segment is eventually destroyed;
-otherwise its pages that were faulted in will remain in memory or swap.
-.IP
-See also the description of
-.I /proc/sys/kernel/shm_rmid_forced
-in
-.BR proc (5).
-.TP
-.BR IPC_INFO " (Linux-specific)"
-Return information about system-wide shared memory limits and
-parameters in the structure pointed to by
-.IR buf .
-This structure is of type
-.I shminfo
-(thus, a cast is required),
-defined in
-.I <sys/shm.h>
-if the
-.B _GNU_SOURCE
-feature test macro is defined:
-.IP
-.in +4n
-.EX
-struct shminfo {
- unsigned long shmmax; /* Maximum segment size */
- unsigned long shmmin; /* Minimum segment size;
- always 1 */
- unsigned long shmmni; /* Maximum number of segments */
- unsigned long shmseg; /* Maximum number of segments
- that a process can attach;
- unused within kernel */
- unsigned long shmall; /* Maximum number of pages of
- shared memory, system\-wide */
-};
-.EE
-.in
-.IP
-The
-.IR shmmni ,
-.IR shmmax ,
-and
-.I shmall
-settings can be changed via
-.I /proc
-files of the same name; see
-.BR proc (5)
-for details.
-.TP
-.BR SHM_INFO " (Linux-specific)"
-Return a
-.I shm_info
-structure whose fields contain information
-about system resources consumed by shared memory.
-This structure is defined in
-.I <sys/shm.h>
-if the
-.B _GNU_SOURCE
-feature test macro is defined:
-.IP
-.in +4n
-.EX
-struct shm_info {
- int used_ids; /* # of currently existing
- segments */
- unsigned long shm_tot; /* Total number of shared
- memory pages */
- unsigned long shm_rss; /* # of resident shared
- memory pages */
- unsigned long shm_swp; /* # of swapped shared
- memory pages */
- unsigned long swap_attempts;
- /* Unused since Linux 2.4 */
- unsigned long swap_successes;
- /* Unused since Linux 2.4 */
-};
-.EE
-.in
-.TP
-.BR SHM_STAT " (Linux-specific)"
-Return a
-.I shmid_ds
-structure as for
-.BR IPC_STAT .
-However, the
-.I shmid
-argument is not a segment identifier, but instead an index into
-the kernel's internal array that maintains information about
-all shared memory segments on the system.
-.TP
-.BR SHM_STAT_ANY " (Linux-specific, since Linux 4.17)"
-Return a
-.I shmid_ds
-structure as for
-.BR SHM_STAT .
-However,
-.I shm_perm.mode
-is not checked for read access for
-.IR shmid ,
-meaning that any user can employ this operation (just as any user may read
-.I /proc/sysvipc/shm
-to obtain the same information).
-.P
-The caller can prevent or allow swapping of a shared
-memory segment with the following
-.I op
-values:
-.TP
-.BR SHM_LOCK " (Linux-specific)"
-Prevent swapping of the shared memory segment.
-The caller must fault in
-any pages that are required to be present after locking is enabled.
-If a segment has been locked, then the (nonstandard)
-.B SHM_LOCKED
-flag of the
-.I shm_perm.mode
-field in the associated data structure retrieved by
-.B IPC_STAT
-will be set.
-.TP
-.BR SHM_UNLOCK " (Linux-specific)"
-Unlock the segment, allowing it to be swapped out.
-.P
-Before Linux 2.6.10, only a privileged process
-could employ
-.B SHM_LOCK
-and
-.BR SHM_UNLOCK .
-Since Linux 2.6.10, an unprivileged process can employ these operations
-if its effective UID matches the owner or creator UID of the segment, and
-(for
-.BR SHM_LOCK )
-the amount of memory to be locked falls within the
-.B RLIMIT_MEMLOCK
-resource limit (see
-.BR setrlimit (2)).
-.\" There was some weirdness in Linux 2.6.9: SHM_LOCK and SHM_UNLOCK could
-.\" be applied to a segment, regardless of ownership of the segment.
-.\" This was a botch-up in the move to RLIMIT_MEMLOCK, and was fixed
-.\" in Linux 2.6.10. MTK, May 2005
-.SH RETURN VALUE
-A successful
-.B IPC_INFO
-or
-.B SHM_INFO
-operation returns the index of the highest used entry in the
-kernel's internal array recording information about all
-shared memory segments.
-(This information can be used with repeated
-.B SHM_STAT
-or
-.B SHM_STAT_ANY
-operations to obtain information about all shared memory segments
-on the system.)
-A successful
-.B SHM_STAT
-operation returns the identifier of the shared memory segment
-whose index was given in
-.IR shmid .
-Other operations return 0 on success.
-.P
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EACCES
-\fBIPC_STAT\fP or \fBSHM_STAT\fP is requested and
-\fIshm_perm.mode\fP does not allow read access for
-.IR shmid ,
-and the calling process does not have the
-.B CAP_IPC_OWNER
-capability in the user namespace that governs its IPC namespace.
-.TP
-.B EFAULT
-The argument
-.I op
-has value
-.B IPC_SET
-or
-.B IPC_STAT
-but the address pointed to by
-.I buf
-isn't accessible.
-.TP
-.B EIDRM
-\fIshmid\fP points to a removed identifier.
-.TP
-.B EINVAL
-.I shmid
-is not a valid identifier, or
-.I op
-is not a valid operation.
-Or: for a
-.B SHM_STAT
-or
-.B SHM_STAT_ANY
-operation, the index value specified in
-.I shmid
-referred to an array slot that is currently unused.
-.TP
-.B ENOMEM
-(Since Linux 2.6.9),
-.B SHM_LOCK
-was specified and the size of the to-be-locked segment would mean
-that the total bytes in locked shared memory segments would exceed
-the limit for the real user ID of the calling process.
-This limit is defined by the
-.B RLIMIT_MEMLOCK
-soft resource limit (see
-.BR setrlimit (2)).
-.TP
-.B EOVERFLOW
-\fBIPC_STAT\fP is attempted, and the GID or UID value
-is too large to be stored in the structure pointed to by
-.IR buf .
-.TP
-.B EPERM
-\fBIPC_SET\fP or \fBIPC_RMID\fP is attempted, and the
-effective user ID of the calling process is not that of the creator
-(found in
-.IR shm_perm.cuid ),
-or the owner
-(found in
-.IR shm_perm.uid ),
-and the process was not privileged (Linux: did not have the
-.B CAP_SYS_ADMIN
-capability).
-.IP
-Or (before Linux 2.6.9),
-.B SHM_LOCK
-or
-.B SHM_UNLOCK
-was specified, but the process was not privileged
-(Linux: did not have the
-.B CAP_IPC_LOCK
-capability).
-(Since Linux 2.6.9, this error can also occur if the
-.B RLIMIT_MEMLOCK
-is 0 and the caller is not privileged.)
-.SH VERSIONS
-Linux permits a process to attach
-.RB ( shmat (2))
-a shared memory segment that has already been marked for deletion
-using
-.IR shmctl(IPC_RMID) .
-This feature is not available on other UNIX implementations;
-portable applications should avoid relying on it.
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-POSIX.1-2001, SVr4.
-.\" SVr4 documents additional error conditions EINVAL,
-.\" ENOENT, ENOSPC, ENOMEM, EEXIST. Neither SVr4 nor SVID documents
-.\" an EIDRM error condition.
-.P
-Various fields in a \fIstruct shmid_ds\fP were typed as
-.I short
-under Linux 2.2
-and have become
-.I long
-under Linux 2.4.
-To take advantage of this,
-a recompilation under glibc-2.1.91 or later should suffice.
-(The kernel distinguishes old and new calls by an
-.B IPC_64
-flag in
-.IR op .)
-.SH NOTES
-The
-.BR IPC_INFO ,
-.BR SHM_STAT ,
-and
-.B SHM_INFO
-operations are used by the
-.BR ipcs (1)
-program to provide information on allocated resources.
-In the future, these may modified or moved to a
-.I /proc
-filesystem interface.
-.SH SEE ALSO
-.BR mlock (2),
-.BR setrlimit (2),
-.BR shmget (2),
-.BR shmop (2),
-.BR capabilities (7),
-.BR sysvipc (7)
diff --git a/man2/shmdt.2 b/man2/shmdt.2
deleted file mode 100644
index 3f3e5a4bd..000000000
--- a/man2/shmdt.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/shmop.2
diff --git a/man2/shmget.2 b/man2/shmget.2
deleted file mode 100644
index 96c2e6108..000000000
--- a/man2/shmget.2
+++ /dev/null
@@ -1,412 +0,0 @@
-.\" Copyright (c) 1993 Luigi P. Bai (lpb@softint.com) July 28, 1993
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Modified Wed Jul 28 10:57:35 1993, Rik Faith <faith@cs.unc.edu>
-.\" Modified Sun Nov 28 16:43:30 1993, Rik Faith <faith@cs.unc.edu>
-.\" with material from Giorgio Ciucci <giorgio@crcc.it>
-.\" Portions Copyright 1993 Giorgio Ciucci <giorgio@crcc.it>
-.\" Modified Tue Oct 22 22:03:17 1996 by Eric S. Raymond <esr@thyrsus.com>
-.\" Modified, 8 Jan 2003, Michael Kerrisk, <mtk.manpages@gmail.com>
-.\" Removed EIDRM from errors - that can't happen...
-.\" Modified, 27 May 2004, Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Added notes on capability requirements
-.\" Modified, 11 Nov 2004, Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Language and formatting clean-ups
-.\" Added notes on /proc files
-.\"
-.TH shmget 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-shmget \- allocates a System V shared memory segment
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/shm.h>
-.P
-.BI "int shmget(key_t " key ", size_t " size ", int " shmflg );
-.fi
-.SH DESCRIPTION
-.BR shmget ()
-returns the identifier of the System\ V shared memory segment
-associated with the value of the argument
-.IR key .
-It may be used either to obtain the identifier of a previously created
-shared memory segment (when
-.I shmflg
-is zero and
-.I key
-does not have the value
-.BR IPC_PRIVATE ),
-or to create a new set.
-.P
-A new shared memory segment, with size equal to the value of
-.I size
-rounded up to a multiple of
-.BR PAGE_SIZE ,
-is created if
-.I key
-has the value
-.B IPC_PRIVATE
-or
-.I key
-isn't
-.BR IPC_PRIVATE ,
-no shared memory segment corresponding to
-.I key
-exists, and
-.B IPC_CREAT
-is specified in
-.IR shmflg .
-.P
-If
-.I shmflg
-specifies both
-.B IPC_CREAT
-and
-.B IPC_EXCL
-and a shared memory segment already exists for
-.IR key ,
-then
-.BR shmget ()
-fails with
-.I errno
-set to
-.BR EEXIST .
-(This is analogous to the effect of the combination
-.B O_CREAT | O_EXCL
-for
-.BR open (2).)
-.P
-The value
-.I shmflg
-is composed of:
-.TP
-.B IPC_CREAT
-Create a new segment.
-If this flag is not used, then
-.BR shmget ()
-will find the segment associated with \fIkey\fP and check to see if
-the user has permission to access the segment.
-.TP
-.B IPC_EXCL
-This flag is used with
-.B IPC_CREAT
-to ensure that this call creates the segment.
-If the segment already exists, the call fails.
-.TP
-.BR SHM_HUGETLB " (since Linux 2.6)"
-Allocate the segment using "huge" pages.
-See the Linux kernel source file
-.I Documentation/admin\-guide/mm/hugetlbpage.rst
-for further information.
-.TP
-.B SHM_HUGE_2MB
-.TQ
-.BR SHM_HUGE_1GB " (since Linux 3.8)"
-.\" See https://lwn.net/Articles/533499/
-Used in conjunction with
-.B SHM_HUGETLB
-to select alternative hugetlb page sizes (respectively, 2\ MB and 1\ GB)
-on systems that support multiple hugetlb page sizes.
-.IP
-More generally, the desired huge page size can be configured by encoding
-the base-2 logarithm of the desired page size in the six bits at the offset
-.BR SHM_HUGE_SHIFT .
-Thus, the above two constants are defined as:
-.IP
-.in +4n
-.EX
-#define SHM_HUGE_2MB (21 << SHM_HUGE_SHIFT)
-#define SHM_HUGE_1GB (30 << SHM_HUGE_SHIFT)
-.EE
-.in
-.IP
-For some additional details,
-see the discussion of the similarly named constants in
-.BR mmap (2).
-.TP
-.BR SHM_NORESERVE " (since Linux 2.6.15)"
-This flag serves the same purpose as the
-.BR mmap (2)
-.B MAP_NORESERVE
-flag.
-Do not reserve swap space for this segment.
-When swap space is reserved, one has the guarantee
-that it is possible to modify the segment.
-When swap space is not reserved one might get
-.B SIGSEGV
-upon a write
-if no physical memory is available.
-See also the discussion of the file
-.I /proc/sys/vm/overcommit_memory
-in
-.BR proc (5).
-.\" As at 2.6.17-rc2, this flag has no effect if SHM_HUGETLB was also
-.\" specified.
-.P
-In addition to the above flags, the least significant 9 bits of
-.I shmflg
-specify the permissions granted to the owner, group, and others.
-These bits have the same format, and the same
-meaning, as the
-.I mode
-argument of
-.BR open (2).
-Presently, execute permissions are not used by the system.
-.P
-When a new shared memory segment is created,
-its contents are initialized to zero values, and
-its associated data structure,
-.I shmid_ds
-(see
-.BR shmctl (2)),
-is initialized as follows:
-.IP \[bu] 3
-.I shm_perm.cuid
-and
-.I shm_perm.uid
-are set to the effective user ID of the calling process.
-.IP \[bu]
-.I shm_perm.cgid
-and
-.I shm_perm.gid
-are set to the effective group ID of the calling process.
-.IP \[bu]
-The least significant 9 bits of
-.I shm_perm.mode
-are set to the least significant 9 bit of
-.IR shmflg .
-.IP \[bu]
-.I shm_segsz
-is set to the value of
-.IR size .
-.IP \[bu]
-.IR shm_lpid ,
-.IR shm_nattch ,
-.IR shm_atime ,
-and
-.I shm_dtime
-are set to 0.
-.IP \[bu]
-.I shm_ctime
-is set to the current time.
-.P
-If the shared memory segment already exists, the permissions are
-verified, and a check is made to see if it is marked for destruction.
-.SH RETURN VALUE
-On success, a valid shared memory identifier is returned.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EACCES
-The user does not have permission to access the
-shared memory segment, and does not have the
-.B CAP_IPC_OWNER
-capability in the user namespace that governs its IPC namespace.
-.TP
-.B EEXIST
-.B IPC_CREAT
-and
-.B IPC_EXCL
-were specified in
-.IR shmflg ,
-but a shared memory segment already exists for
-.IR key .
-.TP
-.B EINVAL
-A new segment was to be created and
-.I size
-is less than
-.B SHMMIN
-or greater than
-.BR SHMMAX .
-.TP
-.B EINVAL
-A segment for the given
-.I key
-exists, but \fIsize\fP is greater than the size
-of that segment.
-.TP
-.B ENFILE
-.\" [2.6.7] shmem_zero_setup()-->shmem_file_setup()-->get_empty_filp()
-The system-wide limit on the total number of open files has been reached.
-.TP
-.B ENOENT
-No segment exists for the given \fIkey\fP, and
-.B IPC_CREAT
-was not specified.
-.TP
-.B ENOMEM
-No memory could be allocated for segment overhead.
-.TP
-.B ENOSPC
-All possible shared memory IDs have been taken
-.RB ( SHMMNI ),
-or allocating a segment of the requested
-.I size
-would cause the system to exceed the system-wide limit on shared memory
-.RB ( SHMALL ).
-.TP
-.B EPERM
-The
-.B SHM_HUGETLB
-flag was specified, but the caller was not privileged (did not have the
-.B CAP_IPC_LOCK
-capability)
-and is not a member of the
-.I sysctl_hugetlb_shm_group
-group; see the description of
-.I /proc/sys/vm/sysctl_hugetlb_shm_group
-in
-.BR proc (5).
-.SH STANDARDS
-POSIX.1-2008.
-.P
-.B SHM_HUGETLB
-and
-.B SHM_NORESERVE
-are Linux extensions.
-.SH HISTORY
-POSIX.1-2001, SVr4.
-.\" SVr4 documents an additional error condition EEXIST.
-.SH NOTES
-.B IPC_PRIVATE
-isn't a flag field but a
-.I key_t
-type.
-If this special value is used for
-.IR key ,
-the system call ignores all but the least significant 9 bits of
-.I shmflg
-and creates a new shared memory segment.
-.\"
-.SS Shared memory limits
-The following limits on shared memory segment resources affect the
-.BR shmget ()
-call:
-.TP
-.B SHMALL
-System-wide limit on the total amount of shared memory,
-measured in units of the system page size.
-.IP
-On Linux, this limit can be read and modified via
-.IR /proc/sys/kernel/shmall .
-Since Linux 3.16,
-.\" commit 060028bac94bf60a65415d1d55a359c3a17d5c31
-the default value for this limit is:
-.IP
-.in +4n
-.EX
-ULONG_MAX - 2\[ha]24
-.EE
-.in
-.IP
-The effect of this value
-(which is suitable for both 32-bit and 64-bit systems)
-is to impose no limitation on allocations.
-This value, rather than
-.BR ULONG_MAX ,
-was chosen as the default to prevent some cases where historical
-applications simply raised the existing limit without first checking
-its current value.
-Such applications would cause the value to overflow if the limit was set at
-.BR ULONG_MAX .
-.IP
-From Linux 2.4 up to Linux 3.15,
-the default value for this limit was:
-.IP
-.in +4n
-.EX
-SHMMAX / PAGE_SIZE * (SHMMNI / 16)
-.EE
-.in
-.IP
-If
-.B SHMMAX
-and
-.B SHMMNI
-were not modified, then multiplying the result of this formula
-by the page size (to get a value in bytes) yielded a value of 8\ GB
-as the limit on the total memory used by all shared memory segments.
-.TP
-.B SHMMAX
-Maximum size in bytes for a shared memory segment.
-.IP
-On Linux, this limit can be read and modified via
-.IR /proc/sys/kernel/shmmax .
-Since Linux 3.16,
-.\" commit 060028bac94bf60a65415d1d55a359c3a17d5c31
-the default value for this limit is:
-.IP
-.in +4n
-.EX
-ULONG_MAX - 2\[ha]24
-.EE
-.in
-.IP
-The effect of this value
-(which is suitable for both 32-bit and 64-bit systems)
-is to impose no limitation on allocations.
-See the description of
-.B SHMALL
-for a discussion of why this default value (rather than
-.BR ULONG_MAX )
-is used.
-.IP
-From Linux 2.2 up to Linux 3.15, the default value of
-this limit was 0x2000000 (32\ MiB).
-.IP
-Because it is not possible to map just part of a shared memory segment,
-the amount of virtual memory places another limit on the maximum size of a
-usable segment:
-for example, on i386 the largest segments that can be mapped have a
-size of around 2.8\ GB, and on x86-64 the limit is around 127 TB.
-.TP
-.B SHMMIN
-Minimum size in bytes for a shared memory segment: implementation
-dependent (currently 1 byte, though
-.B PAGE_SIZE
-is the effective minimum size).
-.TP
-.B SHMMNI
-System-wide limit on the number of shared memory segments.
-In Linux 2.2, the default value for this limit was 128;
-since Linux 2.4, the default value is 4096.
-.IP
-On Linux, this limit can be read and modified via
-.IR /proc/sys/kernel/shmmni .
-.\" Kernels between Linux 2.4.x and Linux 2.6.8 had an off-by-one error
-.\" that meant that we could create one more segment than SHMMNI -- MTK
-.\" This /proc file is not available in Linux 2.2 and earlier -- MTK
-.P
-The implementation has no specific limits for the per-process maximum
-number of shared memory segments
-.RB ( SHMSEG ).
-.SS Linux notes
-Until Linux 2.3.30, Linux would return
-.B EIDRM
-for a
-.BR shmget ()
-on a shared memory segment scheduled for deletion.
-.SH BUGS
-The name choice
-.B IPC_PRIVATE
-was perhaps unfortunate,
-.B IPC_NEW
-would more clearly show its function.
-.SH EXAMPLES
-See
-.BR shmop (2).
-.SH SEE ALSO
-.BR memfd_create (2),
-.BR shmat (2),
-.BR shmctl (2),
-.BR shmdt (2),
-.BR ftok (3),
-.BR capabilities (7),
-.BR shm_overview (7),
-.BR sysvipc (7)
diff --git a/man2/shmop.2 b/man2/shmop.2
deleted file mode 100644
index 9d0acf7f5..000000000
--- a/man2/shmop.2
+++ /dev/null
@@ -1,508 +0,0 @@
-.\" Copyright 1993 Giorgio Ciucci (giorgio@crcc.it)
-.\" and Copyright 2020 Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Modified Sun Nov 28 17:06:19 1993, Rik Faith (faith@cs.unc.edu)
-.\" with material from Luigi P. Bai (lpb@softint.com)
-.\" Portions Copyright 1993 Luigi P. Bai
-.\" Modified Tue Oct 22 22:04:23 1996 by Eric S. Raymond <esr@thyrsus.com>
-.\" Modified, 5 Jan 2002, Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Modified, 19 Sep 2002, Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Added SHM_REMAP flag description
-.\" Modified, 27 May 2004, Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Added notes on capability requirements
-.\" Modified, 11 Nov 2004, Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Language and formatting clean-ups
-.\" Changed wording and placement of sentence regarding attachment
-.\" of segments marked for destruction
-.\"
-.TH SHMOP 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-shmat, shmdt \- System V shared memory operations
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/shm.h>
-.P
-.BI "void *shmat(int " shmid ", const void *_Nullable " shmaddr ", \
-int " shmflg );
-.BI "int shmdt(const void *" shmaddr );
-.fi
-.SH DESCRIPTION
-.SS shmat()
-.BR shmat ()
-attaches the System\ V shared memory segment identified by
-.I shmid
-to the address space of the calling process.
-The attaching address is specified by
-.I shmaddr
-with one of the following criteria:
-.IP \[bu] 3
-If
-.I shmaddr
-is NULL,
-the system chooses a suitable (unused) page-aligned address to attach
-the segment.
-.IP \[bu]
-If
-.I shmaddr
-isn't NULL
-and
-.B SHM_RND
-is specified in
-.IR shmflg ,
-the attach occurs at the address equal to
-.I shmaddr
-rounded down to the nearest multiple of
-.BR SHMLBA .
-.IP \[bu]
-Otherwise,
-.I shmaddr
-must be a page-aligned address at which the attach occurs.
-.P
-In addition to
-.BR SHM_RND ,
-the following flags may be specified in the
-.I shmflg
-bit-mask argument:
-.TP
-.BR SHM_EXEC " (Linux-specific; since Linux 2.6.9)"
-Allow the contents of the segment to be executed.
-The caller must have execute permission on the segment.
-.TP
-.B SHM_RDONLY
-Attach the segment for read-only access.
-The process must have read permission for the segment.
-If this flag is not specified,
-the segment is attached for read and write access,
-and the process must have read and write permission for the segment.
-There is no notion of a write-only shared memory segment.
-.TP
-.BR SHM_REMAP " (Linux-specific)"
-This flag specifies
-that the mapping of the segment should replace
-any existing mapping in the range starting at
-.I shmaddr
-and continuing for the size of the segment.
-(Normally, an
-.B EINVAL
-error would result if a mapping already exists in this address range.)
-In this case,
-.I shmaddr
-must not be NULL.
-.P
-The
-.BR brk (2)
-value of the calling process is not altered by the attach.
-The segment will automatically be detached at process exit.
-The same segment may be attached as a read and as a read-write
-one, and more than once, in the process's address space.
-.P
-A successful
-.BR shmat ()
-call updates the members of the
-.I shmid_ds
-structure (see
-.BR shmctl (2))
-associated with the shared memory segment as follows:
-.IP \[bu] 3
-.I shm_atime
-is set to the current time.
-.IP \[bu]
-.I shm_lpid
-is set to the process-ID of the calling process.
-.IP \[bu]
-.I shm_nattch
-is incremented by one.
-.\"
-.SS shmdt()
-.BR shmdt ()
-detaches the shared memory segment located at the address specified by
-.I shmaddr
-from the address space of the calling process.
-The to-be-detached segment must be currently
-attached with
-.I shmaddr
-equal to the value returned by the attaching
-.BR shmat ()
-call.
-.P
-On a successful
-.BR shmdt ()
-call, the system updates the members of the
-.I shmid_ds
-structure associated with the shared memory segment as follows:
-.IP \[bu] 3
-.I shm_dtime
-is set to the current time.
-.IP \[bu]
-.I shm_lpid
-is set to the process-ID of the calling process.
-.IP \[bu]
-.I shm_nattch
-is decremented by one.
-If it becomes 0 and the segment is marked for deletion,
-the segment is deleted.
-.SH RETURN VALUE
-On success,
-.BR shmat ()
-returns the address of the attached shared memory segment; on error,
-.I (void\ *)\ \-1
-is returned, and
-.I errno
-is set to indicate the error.
-.P
-On success,
-.BR shmdt ()
-returns 0; on error \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.BR shmat ()
-can fail with one of the following errors:
-.TP
-.B EACCES
-The calling process does not have the required permissions for
-the requested attach type, and does not have the
-.B CAP_IPC_OWNER
-capability in the user namespace that governs its IPC namespace.
-.TP
-.B EIDRM
-\fIshmid\fP points to a removed identifier.
-.TP
-.B EINVAL
-Invalid
-.I shmid
-value, unaligned (i.e., not page-aligned and \fBSHM_RND\fP was not
-specified) or invalid
-.I shmaddr
-value, or can't attach segment at
-.IR shmaddr ,
-or
-.B SHM_REMAP
-was specified and
-.I shmaddr
-was NULL.
-.TP
-.B ENOMEM
-Could not allocate memory for the descriptor or for the page tables.
-.P
-.BR shmdt ()
-can fail with one of the following errors:
-.TP
-.B EINVAL
-There is no shared memory segment attached at
-.IR shmaddr ;
-or,
-.\" The following since Linux 2.6.17-rc1:
-.I shmaddr
-is not aligned on a page boundary.
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-POSIX.1-2001, SVr4.
-.\" SVr4 documents an additional error condition EMFILE.
-.P
-In SVID 3 (or perhaps earlier),
-the type of the \fIshmaddr\fP argument was changed from
-.I "char\ *"
-into
-.IR "const void\ *" ,
-and the returned type of
-.BR shmat ()
-from
-.I "char\ *"
-into
-.IR "void\ *" .
-.SH NOTES
-After a
-.BR fork (2),
-the child inherits the attached shared memory segments.
-.P
-After an
-.BR execve (2),
-all attached shared memory segments are detached from the process.
-.P
-Upon
-.BR _exit (2),
-all attached shared memory segments are detached from the process.
-.P
-Using
-.BR shmat ()
-with
-.I shmaddr
-equal to NULL
-is the preferred, portable way of attaching a shared memory segment.
-Be aware that the shared memory segment attached in this way
-may be attached at different addresses in different processes.
-Therefore, any pointers maintained within the shared memory must be
-made relative (typically to the starting address of the segment),
-rather than absolute.
-.P
-On Linux, it is possible to attach a shared memory segment even if it
-is already marked to be deleted.
-However, POSIX.1 does not specify this behavior and
-many other implementations do not support it.
-.P
-The following system parameter affects
-.BR shmat ():
-.TP
-.B SHMLBA
-Segment low boundary address multiple.
-When explicitly specifying an attach address in a call to
-.BR shmat (),
-the caller should ensure that the address is a multiple of this value.
-This is necessary on some architectures,
-in order either to ensure good CPU cache performance or to ensure that
-different attaches of the same segment have consistent views
-within the CPU cache.
-.B SHMLBA
-is normally some multiple of the system page size.
-(On many Linux architectures,
-.B SHMLBA
-is the same as the system page size.)
-.P
-The implementation places no intrinsic per-process limit on the
-number of shared memory segments
-.RB ( SHMSEG ).
-.SH EXAMPLES
-The two programs shown below exchange a string using a shared memory segment.
-Further details about the programs are given below.
-First, we show a shell session demonstrating their use.
-.P
-In one terminal window, we run the "reader" program,
-which creates a System V shared memory segment and a System V semaphore set.
-The program prints out the IDs of the created objects,
-and then waits for the semaphore to change value.
-.P
-.in +4n
-.EX
-$ \fB./svshm_string_read\fP
-shmid = 1114194; semid = 15
-.EE
-.in
-.P
-In another terminal window, we run the "writer" program.
-The "writer" program takes three command-line arguments:
-the IDs of the shared memory segment and semaphore set created
-by the "reader", and a string.
-It attaches the existing shared memory segment,
-copies the string to the shared memory, and modifies the semaphore value.
-.P
-.in +4n
-.EX
-$ \fB./svshm_string_write 1114194 15 \[aq]Hello, world\[aq]\fP
-.EE
-.in
-.P
-Returning to the terminal where the "reader" is running,
-we see that the program has ceased waiting on the semaphore
-and has printed the string that was copied into the
-shared memory segment by the writer:
-.P
-.in +4n
-.EX
-Hello, world
-.EE
-.in
-.\"
-.SS Program source: svshm_string.h
-The following header file is included by the "reader" and "writer" programs:
-.P
-.in +4n
-.\" SRC BEGIN (svshm_string.h)
-.EX
-/* svshm_string.h
-\&
- Licensed under GNU General Public License v2 or later.
-*/
-#ifndef SVSHM_STRING_H
-#define SVSHM_STRING_H
-\&
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/sem.h>
-\&
-#define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \e
- } while (0)
-\&
-union semun { /* Used in calls to semctl() */
- int val;
- struct semid_ds *buf;
- unsigned short *array;
-#if defined(__linux__)
- struct seminfo *__buf;
-#endif
-};
-\&
-#define MEM_SIZE 4096
-\&
-#endif // include guard
-.EE
-.\" SRC END
-.in
-.\"
-.SS Program source: svshm_string_read.c
-The "reader" program creates a shared memory segment and a semaphore set
-containing one semaphore.
-It then attaches the shared memory object into its address space
-and initializes the semaphore value to 1.
-Finally, the program waits for the semaphore value to become 0,
-and afterwards prints the string that has been copied into the
-shared memory segment by the "writer".
-.P
-.in +4n
-.\" SRC BEGIN (svshm_string_read.c)
-.EX
-/* svshm_string_read.c
-\&
- Licensed under GNU General Public License v2 or later.
-*/
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/ipc.h>
-#include <sys/sem.h>
-#include <sys/shm.h>
-\&
-#include "svshm_string.h"
-\&
-int
-main(void)
-{
- int semid, shmid;
- char *addr;
- union semun arg, dummy;
- struct sembuf sop;
-\&
- /* Create shared memory and semaphore set containing one
- semaphore. */
-\&
- shmid = shmget(IPC_PRIVATE, MEM_SIZE, IPC_CREAT | 0600);
- if (shmid == \-1)
- errExit("shmget");
-\&
- semid = semget(IPC_PRIVATE, 1, IPC_CREAT | 0600);
- if (semid == \-1)
- errExit("semget");
-\&
- /* Attach shared memory into our address space. */
-\&
- addr = shmat(shmid, NULL, SHM_RDONLY);
- if (addr == (void *) \-1)
- errExit("shmat");
-\&
- /* Initialize semaphore 0 in set with value 1. */
-\&
- arg.val = 1;
- if (semctl(semid, 0, SETVAL, arg) == \-1)
- errExit("semctl");
-\&
- printf("shmid = %d; semid = %d\en", shmid, semid);
-\&
- /* Wait for semaphore value to become 0. */
-\&
- sop.sem_num = 0;
- sop.sem_op = 0;
- sop.sem_flg = 0;
-\&
- if (semop(semid, &sop, 1) == \-1)
- errExit("semop");
-\&
- /* Print the string from shared memory. */
-\&
- printf("%s\en", addr);
-\&
- /* Remove shared memory and semaphore set. */
-\&
- if (shmctl(shmid, IPC_RMID, NULL) == \-1)
- errExit("shmctl");
- if (semctl(semid, 0, IPC_RMID, dummy) == \-1)
- errExit("semctl");
-\&
- exit(EXIT_SUCCESS);
-}
-.EE
-.\" SRC END
-.in
-.\"
-.SS Program source: svshm_string_write.c
-The writer program takes three command-line arguments:
-the IDs of the shared memory segment and semaphore set
-that have already been created by the "reader", and a string.
-It attaches the shared memory segment into its address space,
-and then decrements the semaphore value to 0 in order to inform the
-"reader" that it can now examine the contents of the shared memory.
-.P
-.in +4n
-.\" SRC BEGIN (svshm_string_write.c)
-.EX
-/* svshm_string_write.c
-\&
- Licensed under GNU General Public License v2 or later.
-*/
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/sem.h>
-#include <sys/shm.h>
-\&
-#include "svshm_string.h"
-\&
-int
-main(int argc, char *argv[])
-{
- int semid, shmid;
- char *addr;
- size_t len;
- struct sembuf sop;
-\&
- if (argc != 4) {
- fprintf(stderr, "Usage: %s shmid semid string\en", argv[0]);
- exit(EXIT_FAILURE);
- }
-\&
- len = strlen(argv[3]) + 1; /* +1 to include trailing \[aq]\e0\[aq] */
- if (len > MEM_SIZE) {
- fprintf(stderr, "String is too big!\en");
- exit(EXIT_FAILURE);
- }
-\&
- /* Get object IDs from command\-line. */
-\&
- shmid = atoi(argv[1]);
- semid = atoi(argv[2]);
-\&
- /* Attach shared memory into our address space and copy string
- (including trailing null byte) into memory. */
-\&
- addr = shmat(shmid, NULL, 0);
- if (addr == (void *) \-1)
- errExit("shmat");
-\&
- memcpy(addr, argv[3], len);
-\&
- /* Decrement semaphore to 0. */
-\&
- sop.sem_num = 0;
- sop.sem_op = \-1;
- sop.sem_flg = 0;
-\&
- if (semop(semid, &sop, 1) == \-1)
- errExit("semop");
-\&
- exit(EXIT_SUCCESS);
-}
-.EE
-.\" SRC END
-.in
-.SH SEE ALSO
-.BR brk (2),
-.BR mmap (2),
-.BR shmctl (2),
-.BR shmget (2),
-.BR capabilities (7),
-.BR shm_overview (7),
-.BR sysvipc (7)
diff --git a/man2/shutdown.2 b/man2/shutdown.2
deleted file mode 100644
index d15566fb5..000000000
--- a/man2/shutdown.2
+++ /dev/null
@@ -1,98 +0,0 @@
-.\" Copyright (c) 1983, 1991 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" SPDX-License-Identifier: BSD-4-Clause-UC
-.\"
-.\" $Id: shutdown.2,v 1.1.1.1 1999/03/21 22:52:23 freitag Exp $
-.\"
-.\" Modified Sat Jul 24 09:57:55 1993 by Rik Faith <faith@cs.unc.edu>
-.\" Modified Tue Oct 22 22:04:51 1996 by Eric S. Raymond <esr@thyrsus.com>
-.\" Modified 1998 by Andi Kleen
-.\"
-.TH shutdown 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-shutdown \- shut down part of a full-duplex connection
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/socket.h>
-.P
-.BI "int shutdown(int " sockfd ", int " how );
-.fi
-.SH DESCRIPTION
-The
-.BR shutdown ()
-call causes all or part of a full-duplex connection on the socket
-associated with
-.I sockfd
-to be shut down.
-If
-.I how
-is
-.BR SHUT_RD ,
-further receptions will be disallowed.
-If
-.I how
-is
-.BR SHUT_WR ,
-further transmissions will be disallowed.
-If
-.I how
-is
-.BR SHUT_RDWR ,
-further receptions and transmissions will be disallowed.
-.SH RETURN VALUE
-On success, zero is returned.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EBADF
-.I sockfd
-is not a valid file descriptor.
-.TP
-.B EINVAL
-An invalid value was specified in
-.I how
-(but see BUGS).
-.TP
-.B ENOTCONN
-The specified socket is not connected.
-.TP
-.B ENOTSOCK
-The file descriptor
-.I sockfd
-does not refer to a socket.
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-POSIX.1-2001, 4.4BSD
-(first appeared in 4.2BSD).
-.SH NOTES
-The constants
-.BR SHUT_RD ,
-.BR SHUT_WR ,
-.B SHUT_RDWR
-have the value 0, 1, 2,
-respectively, and are defined in
-.I <sys/socket.h>
-since glibc-2.1.91.
-.SH BUGS
-Checks for the validity of
-.I how
-are done in domain-specific code,
-and before Linux 3.7 not all domains performed these checks.
-.\" https://bugzilla.kernel.org/show_bug.cgi?id=47111
-Most notably, UNIX domain sockets simply ignored invalid values.
-This problem was fixed for UNIX domain sockets
-.\" commit fc61b928dc4d72176cf4bd4d30bf1d22e599aefc
-.\" and for DECnet sockets in commit 46b66d7077b89fb4917ceef19b3f7dd86055c94a
-in Linux 3.7.
-.SH SEE ALSO
-.BR close (2),
-.BR connect (2),
-.BR socket (2),
-.BR socket (7)
diff --git a/man2/sigaction.2 b/man2/sigaction.2
deleted file mode 100644
index daca34a27..000000000
--- a/man2/sigaction.2
+++ /dev/null
@@ -1,1210 +0,0 @@
-.\" Copyright (c) 1994,1995 Mike Battersby <mib@deakin.edu.au>
-.\" and Copyright 2004, 2005 Michael Kerrisk <mtk.manpages@gmail.com>
-.\" based on work by faith@cs.unc.edu
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Modified, aeb, 960424
-.\" Modified Fri Jan 31 17:31:20 1997 by Eric S. Raymond <esr@thyrsus.com>
-.\" Modified Thu Nov 26 02:12:45 1998 by aeb - add SIGCHLD stuff.
-.\" Modified Sat May 8 17:40:19 1999 by Matthew Wilcox
-.\" add POSIX.1b signals
-.\" Modified Sat Dec 29 01:44:52 2001 by Evan Jones <ejones@uwaterloo.ca>
-.\" SA_ONSTACK
-.\" Modified 2004-11-11 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Added mention of SIGCONT under SA_NOCLDSTOP
-.\" Added SA_NOCLDWAIT
-.\" Modified 2004-11-17 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Updated discussion for POSIX.1-2001 and SIGCHLD and sa_flags.
-.\" Formatting fixes
-.\" 2004-12-09, mtk, added SI_TKILL + other minor changes
-.\" 2005-09-15, mtk, split sigpending(), sigprocmask(), sigsuspend()
-.\" out of this page into separate pages.
-.\" 2010-06-11 Andi Kleen, add hwpoison signal extensions
-.\" 2010-06-11 mtk, improvements to discussion of various siginfo_t fields.
-.\" 2015-01-17, Kees Cook <keescook@chromium.org>
-.\" Added notes on ptrace SIGTRAP and SYS_SECCOMP.
-.\"
-.TH sigaction 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-sigaction, rt_sigaction \- examine and change a signal action
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <signal.h>
-.P
-.BI "int sigaction(int " signum ,
-.BI " const struct sigaction *_Nullable restrict " act ,
-.BI " struct sigaction *_Nullable restrict " oldact );
-.fi
-.P
-.RS -4
-Feature Test Macro Requirements for glibc (see
-.BR feature_test_macros (7)):
-.RE
-.P
-.BR sigaction ():
-.nf
- _POSIX_C_SOURCE
-.fi
-.P
-.IR siginfo_t :
-.nf
- _POSIX_C_SOURCE >= 199309L
-.fi
-.SH DESCRIPTION
-The
-.BR sigaction ()
-system call is used to change the action taken by a process on
-receipt of a specific signal.
-(See
-.BR signal (7)
-for an overview of signals.)
-.P
-.I signum
-specifies the signal and can be any valid signal except
-.B SIGKILL
-and
-.BR SIGSTOP .
-.P
-If
-.I act
-is non-NULL, the new action for signal
-.I signum
-is installed from
-.IR act .
-If
-.I oldact
-is non-NULL, the previous action is saved in
-.IR oldact .
-.P
-The
-.I sigaction
-structure is defined as something like:
-.P
-.in +4n
-.EX
-struct sigaction {
- void (*sa_handler)(int);
- void (*sa_sigaction)(int, siginfo_t *, void *);
- sigset_t sa_mask;
- int sa_flags;
- void (*sa_restorer)(void);
-};
-.EE
-.in
-.P
-On some architectures a union is involved: do not assign to both
-.I sa_handler
-and
-.IR sa_sigaction .
-.P
-The
-.I sa_restorer
-field is not intended for application use.
-(POSIX does not specify a
-.I sa_restorer
-field.)
-Some further details of the purpose of this field can be found in
-.BR sigreturn (2).
-.P
-.I sa_handler
-specifies the action to be associated with
-.I signum
-and can be one of the following:
-.IP \[bu] 3
-.B SIG_DFL
-for the default action.
-.IP \[bu]
-.B SIG_IGN
-to ignore this signal.
-.IP \[bu]
-A pointer to a signal handling function.
-This function receives the signal number as its only argument.
-.P
-If
-.B SA_SIGINFO
-is specified in
-.IR sa_flags ,
-then
-.I sa_sigaction
-(instead of
-.IR sa_handler )
-specifies the signal-handling function for
-.IR signum .
-This function receives three arguments, as described below.
-.P
-.I sa_mask
-specifies a mask of signals which should be blocked
-(i.e., added to the signal mask of the thread in which
-the signal handler is invoked)
-during execution of the signal handler.
-In addition, the signal which triggered the handler
-will be blocked, unless the
-.B SA_NODEFER
-flag is used.
-.P
-.I sa_flags
-specifies a set of flags which modify the behavior of the signal.
-It is formed by the bitwise OR of zero or more of the following:
-.TP
-.B SA_NOCLDSTOP
-If
-.I signum
-is
-.BR SIGCHLD ,
-do not receive notification when child processes stop (i.e., when they
-receive one of
-.BR SIGSTOP ", " SIGTSTP ", " SIGTTIN ,
-or
-.BR SIGTTOU )
-or resume (i.e., they receive
-.BR SIGCONT )
-(see
-.BR wait (2)).
-This flag is meaningful only when establishing a handler for
-.BR SIGCHLD .
-.TP
-.BR SA_NOCLDWAIT " (since Linux 2.6)"
-.\" To be precise: Linux 2.5.60 -- MTK
-If
-.I signum
-is
-.BR SIGCHLD ,
-do not transform children into zombies when they terminate.
-See also
-.BR waitpid (2).
-This flag is meaningful only when establishing a handler for
-.BR SIGCHLD ,
-or when setting that signal's disposition to
-.BR SIG_DFL .
-.IP
-If the
-.B SA_NOCLDWAIT
-flag is set when establishing a handler for
-.BR SIGCHLD ,
-POSIX.1 leaves it unspecified whether a
-.B SIGCHLD
-signal is generated when a child process terminates.
-On Linux, a
-.B SIGCHLD
-signal is generated in this case;
-on some other implementations, it is not.
-.TP
-.B SA_NODEFER
-Do not add the signal to the thread's signal mask while the
-handler is executing, unless the signal is specified in
-.IR act.sa_mask .
-Consequently, a further instance of the signal may be delivered
-to the thread while it is executing the handler.
-This flag is meaningful only when establishing a signal handler.
-.IP
-.B SA_NOMASK
-is an obsolete, nonstandard synonym for this flag.
-.TP
-.B SA_ONSTACK
-Call the signal handler on an alternate signal stack provided by
-.BR sigaltstack (2).
-If an alternate stack is not available, the default stack will be used.
-This flag is meaningful only when establishing a signal handler.
-.TP
-.B SA_RESETHAND
-Restore the signal action to the default upon entry to the signal handler.
-This flag is meaningful only when establishing a signal handler.
-.IP
-.B SA_ONESHOT
-is an obsolete, nonstandard synonym for this flag.
-.TP
-.B SA_RESTART
-Provide behavior compatible with BSD signal semantics by making certain
-system calls restartable across signals.
-This flag is meaningful only when establishing a signal handler.
-See
-.BR signal (7)
-for a discussion of system call restarting.
-.TP
-.B SA_RESTORER
-.IR "Not intended for application use" .
-This flag is used by C libraries to indicate that the
-.I sa_restorer
-field contains the address of a "signal trampoline".
-See
-.BR sigreturn (2)
-for more details.
-.TP
-.BR SA_SIGINFO " (since Linux 2.2)"
-The signal handler takes three arguments, not one.
-In this case,
-.I sa_sigaction
-should be set instead of
-.IR sa_handler .
-This flag is meaningful only when establishing a signal handler.
-.\" (The
-.\" .I sa_sigaction
-.\" field was added in Linux 2.1.86.)
-.\"
-.TP
-.BR SA_UNSUPPORTED " (since Linux 5.11)"
-Used to dynamically probe for flag bit support.
-.IP
-If an attempt to register a handler succeeds with this flag set in
-.I act\->sa_flags
-alongside other flags that are potentially unsupported by the kernel,
-and an immediately subsequent
-.BR sigaction ()
-call specifying the same signal number and with a non-NULL
-.I oldact
-argument yields
-.B SA_UNSUPPORTED
-.I clear
-in
-.IR oldact\->sa_flags ,
-then
-.I oldact\->sa_flags
-may be used as a bitmask
-describing which of the potentially unsupported flags are,
-in fact, supported.
-See the section "Dynamically probing for flag bit support"
-below for more details.
-.TP
-.BR SA_EXPOSE_TAGBITS " (since Linux 5.11)"
-Normally, when delivering a signal,
-an architecture-specific set of tag bits are cleared from the
-.I si_addr
-field of
-.IR siginfo_t .
-If this flag is set,
-an architecture-specific subset of the tag bits will be preserved in
-.IR si_addr .
-.IP
-Programs that need to be compatible with Linux versions older than 5.11
-must use
-.B SA_UNSUPPORTED
-to probe for support.
-.SS The siginfo_t argument to a SA_SIGINFO handler
-When the
-.B SA_SIGINFO
-flag is specified in
-.IR act.sa_flags ,
-the signal handler address is passed via the
-.I act.sa_sigaction
-field.
-This handler takes three arguments, as follows:
-.P
-.in +4n
-.EX
-void
-handler(int sig, siginfo_t *info, void *ucontext)
-{
- ...
-}
-.EE
-.in
-.P
-These three arguments are as follows
-.TP
-.I sig
-The number of the signal that caused invocation of the handler.
-.TP
-.I info
-A pointer to a
-.IR siginfo_t ,
-which is a structure containing further information about the signal,
-as described below.
-.TP
-.I ucontext
-This is a pointer to a
-.I ucontext_t
-structure, cast to \fIvoid\ *\fP.
-The structure pointed to by this field contains
-signal context information that was saved
-on the user-space stack by the kernel; for details, see
-.BR sigreturn (2).
-Further information about the
-.I ucontext_t
-structure can be found in
-.BR getcontext (3)
-and
-.BR signal (7).
-Commonly, the handler function doesn't make any use of the third argument.
-.P
-The
-.I siginfo_t
-data type is a structure with the following fields:
-.P
-.in +4n
-.EX
-siginfo_t {
- int si_signo; /* Signal number */
- int si_errno; /* An errno value */
- int si_code; /* Signal code */
- int si_trapno; /* Trap number that caused
- hardware\-generated signal
- (unused on most architectures) */
-.\" FIXME
-.\" The siginfo_t 'si_trapno' field seems to be used
-.\" only on SPARC and Alpha; this page could use
-.\" a little more detail on its purpose there.
- pid_t si_pid; /* Sending process ID */
- uid_t si_uid; /* Real user ID of sending process */
- int si_status; /* Exit value or signal */
- clock_t si_utime; /* User time consumed */
- clock_t si_stime; /* System time consumed */
- union sigval si_value; /* Signal value */
- int si_int; /* POSIX.1b signal */
- void *si_ptr; /* POSIX.1b signal */
- int si_overrun; /* Timer overrun count;
- POSIX.1b timers */
- int si_timerid; /* Timer ID; POSIX.1b timers */
-.\" In the kernel: si_tid
- void *si_addr; /* Memory location which caused fault */
- long si_band; /* Band event (was \fIint\fP in
- glibc 2.3.2 and earlier) */
- int si_fd; /* File descriptor */
- short si_addr_lsb; /* Least significant bit of address
- (since Linux 2.6.32) */
- void *si_lower; /* Lower bound when address violation
- occurred (since Linux 3.19) */
- void *si_upper; /* Upper bound when address violation
- occurred (since Linux 3.19) */
- int si_pkey; /* Protection key on PTE that caused
- fault (since Linux 4.6) */
- void *si_call_addr; /* Address of system call instruction
- (since Linux 3.5) */
- int si_syscall; /* Number of attempted system call
- (since Linux 3.5) */
- unsigned int si_arch; /* Architecture of attempted system call
- (since Linux 3.5) */
-}
-.EE
-.in
-.P
-.IR si_signo ", " si_errno " and " si_code
-are defined for all signals.
-.RI ( si_errno
-is generally unused on Linux.)
-The rest of the struct may be a union, so that one should
-read only the fields that are meaningful for the given signal:
-.IP \[bu] 3
-Signals sent with
-.BR kill (2)
-and
-.BR sigqueue (3)
-fill in
-.IR si_pid " and " si_uid .
-In addition, signals sent with
-.BR sigqueue (3)
-fill in
-.IR si_int " and " si_ptr
-with the values specified by the sender of the signal;
-see
-.BR sigqueue (3)
-for more details.
-.IP \[bu]
-Signals sent by POSIX.1b timers (since Linux 2.6) fill in
-.I si_overrun
-and
-.IR si_timerid .
-The
-.I si_timerid
-field is an internal ID used by the kernel to identify
-the timer; it is not the same as the timer ID returned by
-.BR timer_create (2).
-The
-.I si_overrun
-field is the timer overrun count;
-this is the same information as is obtained by a call to
-.BR timer_getoverrun (2).
-These fields are nonstandard Linux extensions.
-.IP \[bu]
-Signals sent for message queue notification (see the description of
-.B SIGEV_SIGNAL
-in
-.BR mq_notify (3))
-fill in
-.IR si_int / si_ptr ,
-with the
-.I sigev_value
-supplied to
-.BR mq_notify (3);
-.IR si_pid ,
-with the process ID of the message sender; and
-.IR si_uid ,
-with the real user ID of the message sender.
-.IP \[bu]
-.B SIGCHLD
-fills in
-.IR si_pid ", " si_uid ", " si_status ", " si_utime ", and " si_stime ,
-providing information about the child.
-The
-.I si_pid
-field is the process ID of the child;
-.I si_uid
-is the child's real user ID.
-The
-.I si_status
-field contains the exit status of the child (if
-.I si_code
-is
-.BR CLD_EXITED ),
-or the signal number that caused the process to change state.
-The
-.I si_utime
-and
-.I si_stime
-contain the user and system CPU time used by the child process;
-these fields do not include the times used by waited-for children (unlike
-.BR getrusage (2)
-and
-.BR times (2)).
-Up to Linux 2.6, and since Linux 2.6.27, these fields report
-CPU time in units of
-.IR sysconf(_SC_CLK_TCK) .
-In Linux 2.6 kernels before Linux 2.6.27,
-a bug meant that these fields reported time in units
-of the (configurable) system jiffy (see
-.BR time (7)).
-.\" FIXME .
-.\" When si_utime and si_stime where originally implemented, the
-.\" measurement unit was HZ, which was the same as clock ticks
-.\" (sysconf(_SC_CLK_TCK)). In Linux 2.6, HZ became configurable, and
-.\" was *still* used as the unit to return the info these fields,
-.\" with the result that the field values depended on the
-.\" configured HZ. Of course, the should have been measured in
-.\" USER_HZ instead, so that sysconf(_SC_CLK_TCK) could be used to
-.\" convert to seconds. I have a queued patch to fix this:
-.\" http://thread.gmane.org/gmane.linux.kernel/698061/ .
-.\" This patch made it into Linux 2.6.27.
-.\" But note that these fields still don't return the times of
-.\" waited-for children (as is done by getrusage() and times()
-.\" and wait4()). Solaris 8 does include child times.
-.IP \[bu]
-.BR SIGILL ,
-.BR SIGFPE ,
-.BR SIGSEGV ,
-.BR SIGBUS ,
-and
-.B SIGTRAP
-fill in
-.I si_addr
-with the address of the fault.
-On some architectures,
-these signals also fill in the
-.I si_trapno
-field.
-.IP
-Some suberrors of
-.BR SIGBUS ,
-in particular
-.B BUS_MCEERR_AO
-and
-.BR BUS_MCEERR_AR ,
-also fill in
-.IR si_addr_lsb .
-This field indicates the least significant bit of the reported address
-and therefore the extent of the corruption.
-For example, if a full page was corrupted,
-.I si_addr_lsb
-contains
-.IR log2(sysconf(_SC_PAGESIZE)) .
-When
-.B SIGTRAP
-is delivered in response to a
-.BR ptrace (2)
-event (PTRACE_EVENT_foo),
-.I si_addr
-is not populated, but
-.I si_pid
-and
-.I si_uid
-are populated with the respective process ID and user ID responsible for
-delivering the trap.
-In the case of
-.BR seccomp (2),
-the tracee will be shown as delivering the event.
-.B BUS_MCEERR_*
-and
-.I si_addr_lsb
-are Linux-specific extensions.
-.IP
-The
-.B SEGV_BNDERR
-suberror of
-.B SIGSEGV
-populates
-.I si_lower
-and
-.IR si_upper .
-.IP
-The
-.B SEGV_PKUERR
-suberror of
-.B SIGSEGV
-populates
-.IR si_pkey .
-.IP \[bu]
-.BR SIGIO / SIGPOLL
-(the two names are synonyms on Linux)
-fills in
-.I si_band
-and
-.IR si_fd .
-The
-.I si_band
-event is a bit mask containing the same values as are filled in the
-.I revents
-field by
-.BR poll (2).
-The
-.I si_fd
-field indicates the file descriptor for which the I/O event occurred;
-for further details, see the description of
-.B F_SETSIG
-in
-.BR fcntl (2).
-.IP \[bu]
-.BR SIGSYS ,
-generated (since Linux 3.5)
-.\" commit a0727e8ce513fe6890416da960181ceb10fbfae6
-when a seccomp filter returns
-.BR SECCOMP_RET_TRAP ,
-fills in
-.IR si_call_addr ,
-.IR si_syscall ,
-.IR si_arch ,
-.IR si_errno ,
-and other fields as described in
-.BR seccomp (2).
-.\"
-.SS
-The si_code field
-The
-.I si_code
-field inside the
-.I siginfo_t
-argument that is passed to a
-.B SA_SIGINFO
-signal handler is a value (not a bit mask)
-indicating why this signal was sent.
-For a
-.BR ptrace (2)
-event,
-.I si_code
-will contain
-.B SIGTRAP
-and have the ptrace event in the high byte:
-.P
-.in +4n
-.EX
-(SIGTRAP | PTRACE_EVENT_foo << 8).
-.EE
-.in
-.P
-For a
-.RB non- ptrace (2)
-event, the values that can appear in
-.I si_code
-are described in the remainder of this section.
-Since glibc 2.20,
-the definitions of most of these symbols are obtained from
-.I <signal.h>
-by defining feature test macros (before including
-.I any
-header file) as follows:
-.IP \[bu] 3
-.B _XOPEN_SOURCE
-with the value 500 or greater;
-.IP \[bu]
-.B _XOPEN_SOURCE
-and
-.BR _XOPEN_SOURCE_EXTENDED ;
-or
-.IP \[bu]
-.B _POSIX_C_SOURCE
-with the value 200809L or greater.
-.P
-For the
-.B TRAP_*
-constants, the symbol definitions are provided only in the first two cases.
-Before glibc 2.20, no feature test macros were required to obtain these symbols.
-.P
-For a regular signal, the following list shows the values which can be
-placed in
-.I si_code
-for any signal, along with the reason that the signal was generated.
-.RS 4
-.TP
-.B SI_USER
-.BR kill (2).
-.TP
-.B SI_KERNEL
-Sent by the kernel.
-.TP
-.B SI_QUEUE
-.BR sigqueue (3).
-.TP
-.B SI_TIMER
-POSIX timer expired.
-.TP
-.BR SI_MESGQ " (since Linux 2.6.6)"
-POSIX message queue state changed; see
-.BR mq_notify (3).
-.TP
-.B SI_ASYNCIO
-AIO completed.
-.TP
-.B SI_SIGIO
-Queued
-.B SIGIO
-(only up to Linux 2.2; from Linux 2.4 onward
-.BR SIGIO / SIGPOLL
-fills in
-.I si_code
-as described below).
-.TP
-.BR SI_TKILL " (since Linux 2.4.19)"
-.BR tkill (2)
-or
-.BR tgkill (2).
-.\" SI_DETHREAD is defined in Linux 2.6.9 sources, but isn't implemented
-.\" It appears to have been an idea that was tried during 2.5.6
-.\" through to Linux 2.5.24 and then was backed out.
-.RE
-.P
-The following values can be placed in
-.I si_code
-for a
-.B SIGILL
-signal:
-.RS 4
-.TP
-.B ILL_ILLOPC
-Illegal opcode.
-.TP
-.B ILL_ILLOPN
-Illegal operand.
-.TP
-.B ILL_ILLADR
-Illegal addressing mode.
-.TP
-.B ILL_ILLTRP
-Illegal trap.
-.TP
-.B ILL_PRVOPC
-Privileged opcode.
-.TP
-.B ILL_PRVREG
-Privileged register.
-.TP
-.B ILL_COPROC
-Coprocessor error.
-.TP
-.B ILL_BADSTK
-Internal stack error.
-.RE
-.P
-The following values can be placed in
-.I si_code
-for a
-.B SIGFPE
-signal:
-.RS 4
-.TP
-.B FPE_INTDIV
-Integer divide by zero.
-.TP
-.B FPE_INTOVF
-Integer overflow.
-.TP
-.B FPE_FLTDIV
-Floating-point divide by zero.
-.TP
-.B FPE_FLTOVF
-Floating-point overflow.
-.TP
-.B FPE_FLTUND
-Floating-point underflow.
-.TP
-.B FPE_FLTRES
-Floating-point inexact result.
-.TP
-.B FPE_FLTINV
-Floating-point invalid operation.
-.TP
-.B FPE_FLTSUB
-Subscript out of range.
-.RE
-.P
-The following values can be placed in
-.I si_code
-for a
-.B SIGSEGV
-signal:
-.RS 4
-.TP
-.B SEGV_MAPERR
-Address not mapped to object.
-.TP
-.B SEGV_ACCERR
-Invalid permissions for mapped object.
-.TP
-.BR SEGV_BNDERR " (since Linux 3.19)"
-.\" commit ee1b58d36aa1b5a79eaba11f5c3633c88231da83
-Failed address bound checks.
-.TP
-.BR SEGV_PKUERR " (since Linux 4.6)"
-.\" commit cd0ea35ff5511cde299a61c21a95889b4a71464e
-Access was denied by memory protection keys.
-See
-.BR pkeys (7).
-The protection key which applied to this access is available via
-.IR si_pkey .
-.RE
-.P
-The following values can be placed in
-.I si_code
-for a
-.B SIGBUS
-signal:
-.RS 4
-.TP
-.B BUS_ADRALN
-Invalid address alignment.
-.TP
-.B BUS_ADRERR
-Nonexistent physical address.
-.TP
-.B BUS_OBJERR
-Object-specific hardware error.
-.TP
-.BR BUS_MCEERR_AR " (since Linux 2.6.32)"
-Hardware memory error consumed on a machine check; action required.
-.TP
-.BR BUS_MCEERR_AO " (since Linux 2.6.32)"
-Hardware memory error detected in process but not consumed; action optional.
-.RE
-.P
-The following values can be placed in
-.I si_code
-for a
-.B SIGTRAP
-signal:
-.RS 4
-.TP
-.B TRAP_BRKPT
-Process breakpoint.
-.TP
-.B TRAP_TRACE
-Process trace trap.
-.TP
-.BR TRAP_BRANCH " (since Linux 2.4, IA64 only)"
-Process taken branch trap.
-.TP
-.BR TRAP_HWBKPT " (since Linux 2.4, IA64 only)"
-Hardware breakpoint/watchpoint.
-.RE
-.P
-The following values can be placed in
-.I si_code
-for a
-.B SIGCHLD
-signal:
-.RS 4
-.TP
-.B CLD_EXITED
-Child has exited.
-.TP
-.B CLD_KILLED
-Child was killed.
-.TP
-.B CLD_DUMPED
-Child terminated abnormally.
-.TP
-.B CLD_TRAPPED
-Traced child has trapped.
-.TP
-.B CLD_STOPPED
-Child has stopped.
-.TP
-.BR CLD_CONTINUED " (since Linux 2.6.9)"
-Stopped child has continued.
-.RE
-.P
-The following values can be placed in
-.I si_code
-for a
-.BR SIGIO / SIGPOLL
-signal:
-.RS 4
-.TP
-.B POLL_IN
-Data input available.
-.TP
-.B POLL_OUT
-Output buffers available.
-.TP
-.B POLL_MSG
-Input message available.
-.TP
-.B POLL_ERR
-I/O error.
-.TP
-.B POLL_PRI
-High priority input available.
-.TP
-.B POLL_HUP
-Device disconnected.
-.RE
-.P
-The following value can be placed in
-.I si_code
-for a
-.B SIGSYS
-signal:
-.RS 4
-.TP
-.BR SYS_SECCOMP " (since Linux 3.5)"
-Triggered by a
-.BR seccomp (2)
-filter rule.
-.RE
-.SS Dynamically probing for flag bit support
-The
-.BR sigaction ()
-call on Linux accepts unknown bits set in
-.I act\->sa_flags
-without error.
-The behavior of the kernel starting with Linux 5.11 is that a second
-.BR sigaction ()
-will clear unknown bits from
-.IR oldact\->sa_flags .
-However, historically, a second
-.BR sigaction ()
-call would typically leave those bits set in
-.IR oldact\->sa_flags .
-.P
-This means that support for new flags cannot be detected
-simply by testing for a flag in
-.IR sa_flags ,
-and a program must test that
-.B SA_UNSUPPORTED
-has been cleared before relying on the contents of
-.IR sa_flags .
-.P
-Since the behavior of the signal handler cannot be guaranteed
-unless the check passes,
-it is wise to either block the affected signal
-while registering the handler and performing the check in this case,
-or where this is not possible,
-for example if the signal is synchronous, to issue the second
-.BR sigaction ()
-in the signal handler itself.
-.P
-In kernels that do not support a specific flag,
-the kernel's behavior is as if the flag was not set,
-even if the flag was set in
-.IR act\->sa_flags .
-.P
-The flags
-.BR SA_NOCLDSTOP ,
-.BR SA_NOCLDWAIT ,
-.BR SA_SIGINFO ,
-.BR SA_ONSTACK ,
-.BR SA_RESTART ,
-.BR SA_NODEFER ,
-.BR SA_RESETHAND ,
-and, if defined by the architecture,
-.B SA_RESTORER
-may not be reliably probed for using this mechanism,
-because they were introduced before Linux 5.11.
-However, in general, programs may assume that these flags are supported,
-since they have all been supported since Linux 2.6,
-which was released in the year 2003.
-.P
-See EXAMPLES below for a demonstration of the use of
-.BR SA_UNSUPPORTED .
-.SH RETURN VALUE
-.BR sigaction ()
-returns 0 on success; on error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EFAULT
-.IR act " or " oldact
-points to memory which is not a valid part of the process address space.
-.TP
-.B EINVAL
-An invalid signal was specified.
-This will also be generated if an attempt
-is made to change the action for
-.BR SIGKILL " or " SIGSTOP ,
-which cannot be caught or ignored.
-.SH VERSIONS
-.SS C library/kernel differences
-The glibc wrapper function for
-.BR sigaction ()
-gives an error
-.RB ( EINVAL )
-on attempts to change the disposition of the two real-time signals
-used internally by the NPTL threading implementation.
-See
-.BR nptl (7)
-for details.
-.P
-On architectures where the signal trampoline resides in the C library,
-the glibc wrapper function for
-.BR sigaction ()
-places the address of the trampoline code in the
-.I act.sa_restorer
-field and sets the
-.B SA_RESTORER
-flag in the
-.I act.sa_flags
-field.
-See
-.BR sigreturn (2).
-.P
-The original Linux system call was named
-.BR sigaction ().
-However, with the addition of real-time signals in Linux 2.2,
-the fixed-size, 32-bit
-.I sigset_t
-type supported by that system call was no longer fit for purpose.
-Consequently, a new system call,
-.BR rt_sigaction (),
-was added to support an enlarged
-.I sigset_t
-type.
-The new system call takes a fourth argument,
-.IR "size_t sigsetsize" ,
-which specifies the size in bytes of the signal sets in
-.I act.sa_mask
-and
-.IR oldact.sa_mask .
-This argument is currently required to have the value
-.I sizeof(sigset_t)
-(or the error
-.B EINVAL
-results).
-The glibc
-.BR sigaction ()
-wrapper function hides these details from us, transparently calling
-.BR rt_sigaction ()
-when the kernel provides it.
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-POSIX.1-2001, SVr4.
-.\" SVr4 does not document the EINTR condition.
-.P
-POSIX.1-1990 disallowed setting the action for
-.B SIGCHLD
-to
-.BR SIG_IGN .
-POSIX.1-2001 and later allow this possibility, so that ignoring
-.B SIGCHLD
-can be used to prevent the creation of zombies (see
-.BR wait (2)).
-Nevertheless, the historical BSD and System\ V behaviors for ignoring
-.B SIGCHLD
-differ, so that the only completely portable method of ensuring that
-terminated children do not become zombies is to catch the
-.B SIGCHLD
-signal and perform a
-.BR wait (2)
-or similar.
-.P
-POSIX.1-1990 specified only
-.BR SA_NOCLDSTOP .
-POSIX.1-2001 added
-.BR SA_NOCLDWAIT ,
-.BR SA_NODEFER ,
-.BR SA_ONSTACK ,
-.BR SA_RESETHAND ,
-.BR SA_RESTART ,
-and
-.B SA_SIGINFO
-as XSI extensions.
-POSIX.1-2008 moved
-.BR SA_NODEFER ,
-.BR SA_RESETHAND ,
-.BR SA_RESTART ,
-and
-.B SA_SIGINFO
-to the base specifications.
-Use of these latter values in
-.I sa_flags
-may be less portable in applications intended for older
-UNIX implementations.
-.P
-The
-.B SA_RESETHAND
-flag is compatible with the SVr4 flag of the same name.
-.P
-The
-.B SA_NODEFER
-flag is compatible with the SVr4 flag of the same name under kernels
-1.3.9 and later.
-On older kernels the Linux implementation
-allowed the receipt of any signal, not just the one we are installing
-(effectively overriding any
-.I sa_mask
-settings).
-.SH NOTES
-A child created via
-.BR fork (2)
-inherits a copy of its parent's signal dispositions.
-During an
-.BR execve (2),
-the dispositions of handled signals are reset to the default;
-the dispositions of ignored signals are left unchanged.
-.P
-According to POSIX, the behavior of a process is undefined after it
-ignores a
-.BR SIGFPE ,
-.BR SIGILL ,
-or
-.B SIGSEGV
-signal that was not generated by
-.BR kill (2)
-or
-.BR raise (3).
-Integer division by zero has undefined result.
-On some architectures it will generate a
-.B SIGFPE
-signal.
-(Also dividing the most negative integer by \-1 may generate
-.BR SIGFPE .)
-Ignoring this signal might lead to an endless loop.
-.P
-.BR sigaction ()
-can be called with a NULL second argument to query the current signal
-handler.
-It can also be used to check whether a given signal is valid for
-the current machine by calling it with NULL second and third arguments.
-.P
-It is not possible to block
-.BR SIGKILL " or " SIGSTOP
-(by specifying them in
-.IR sa_mask ).
-Attempts to do so are silently ignored.
-.P
-See
-.BR sigsetops (3)
-for details on manipulating signal sets.
-.P
-See
-.BR signal\-safety (7)
-for a list of the async-signal-safe functions that can be
-safely called inside from inside a signal handler.
-.\"
-.SS Undocumented
-Before the introduction of
-.BR SA_SIGINFO ,
-it was also possible to get some additional information about the signal.
-This was done by providing an
-.I sa_handler
-signal handler with a second argument of type
-.IR "struct sigcontext" ,
-which is the same structure as the one that is passed in the
-.I uc_mcontext
-field of the
-.I ucontext
-structure that is passed (via a pointer) in the third argument of the
-.I sa_sigaction
-handler.
-See the relevant Linux kernel sources for details.
-This use is obsolete now.
-.SH BUGS
-When delivering a signal with a
-.B SA_SIGINFO
-handler,
-the kernel does not always provide meaningful values
-for all of the fields of the
-.I siginfo_t
-that are relevant for that signal.
-.P
-Up to and including Linux 2.6.13, specifying
-.B SA_NODEFER
-in
-.I sa_flags
-prevents not only the delivered signal from being masked during
-execution of the handler, but also the signals specified in
-.IR sa_mask .
-This bug was fixed in Linux 2.6.14.
-.\" commit 69be8f189653cd81aae5a74e26615b12871bb72e
-.SH EXAMPLES
-See
-.BR mprotect (2).
-.SS Probing for flag support
-The following example program exits with status
-.B EXIT_SUCCESS
-if
-.B SA_EXPOSE_TAGBITS
-is determined to be supported, and
-.B EXIT_FAILURE
-otherwise.
-.P
-.\" SRC BEGIN (sigaction.c)
-.EX
-#include <signal.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-\&
-void
-handler(int signo, siginfo_t *info, void *context)
-{
- struct sigaction oldact;
-\&
- if (sigaction(SIGSEGV, NULL, &oldact) == \-1
- || (oldact.sa_flags & SA_UNSUPPORTED)
- || !(oldact.sa_flags & SA_EXPOSE_TAGBITS))
- {
- _exit(EXIT_FAILURE);
- }
- _exit(EXIT_SUCCESS);
-}
-\&
-int
-main(void)
-{
- struct sigaction act = { 0 };
-\&
- act.sa_flags = SA_SIGINFO | SA_UNSUPPORTED | SA_EXPOSE_TAGBITS;
- act.sa_sigaction = &handler;
- if (sigaction(SIGSEGV, &act, NULL) == \-1) {
- perror("sigaction");
- exit(EXIT_FAILURE);
- }
-\&
- raise(SIGSEGV);
-}
-.EE
-.\" SRC END
-.SH SEE ALSO
-.BR kill (1),
-.BR kill (2),
-.BR pause (2),
-.BR pidfd_send_signal (2),
-.BR restart_syscall (2),
-.BR seccomp (2),
-.BR sigaltstack (2),
-.BR signal (2),
-.BR signalfd (2),
-.BR sigpending (2),
-.BR sigprocmask (2),
-.BR sigreturn (2),
-.BR sigsuspend (2),
-.BR wait (2),
-.BR killpg (3),
-.BR raise (3),
-.BR siginterrupt (3),
-.BR sigqueue (3),
-.BR sigsetops (3),
-.BR sigvec (3),
-.BR core (5),
-.BR signal (7)
diff --git a/man2/sigaltstack.2 b/man2/sigaltstack.2
deleted file mode 100644
index bd5e6ab86..000000000
--- a/man2/sigaltstack.2
+++ /dev/null
@@ -1,362 +0,0 @@
-'\" t
-.\" Copyright (c) 2001, 2017 Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" aeb, various minor fixes
-.TH sigaltstack 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-sigaltstack \- set and/or get signal stack context
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <signal.h>
-.P
-.BI "int sigaltstack(const stack_t *_Nullable restrict " ss ,
-.BI " stack_t *_Nullable restrict " old_ss );
-.fi
-.P
-.RS -4
-Feature Test Macro Requirements for glibc (see
-.BR feature_test_macros (7)):
-.RE
-.P
-.BR sigaltstack ():
-.nf
- _XOPEN_SOURCE >= 500
-.\" || _XOPEN_SOURCE && _XOPEN_SOURCE_EXTENDED
- || /* Since glibc 2.12: */ _POSIX_C_SOURCE >= 200809L
- || /* glibc <= 2.19: */ _BSD_SOURCE
-.fi
-.SH DESCRIPTION
-.BR sigaltstack ()
-allows a thread to define a new alternate
-signal stack and/or retrieve the state of an existing
-alternate signal stack.
-An alternate signal stack is used during the
-execution of a signal handler if the establishment of that handler (see
-.BR sigaction (2))
-requested it.
-.P
-The normal sequence of events for using an alternate signal stack
-is the following:
-.TP 3
-1.
-Allocate an area of memory to be used for the alternate
-signal stack.
-.TP
-2.
-Use
-.BR sigaltstack ()
-to inform the system of the existence and
-location of the alternate signal stack.
-.TP
-3.
-When establishing a signal handler using
-.BR sigaction (2),
-inform the system that the signal handler should be executed
-on the alternate signal stack by
-specifying the \fBSA_ONSTACK\fP flag.
-.P
-The \fIss\fP argument is used to specify a new
-alternate signal stack, while the \fIold_ss\fP argument
-is used to retrieve information about the currently
-established signal stack.
-If we are interested in performing just one
-of these tasks, then the other argument can be specified as NULL.
-.P
-The
-.I stack_t
-type used to type the arguments of this function is defined as follows:
-.P
-.in +4n
-.EX
-typedef struct {
- void *ss_sp; /* Base address of stack */
- int ss_flags; /* Flags */
- size_t ss_size; /* Number of bytes in stack */
-} stack_t;
-.EE
-.in
-.P
-To establish a new alternate signal stack,
-the fields of this structure are set as follows:
-.TP
-.I ss.ss_flags
-This field contains either 0, or the following flag:
-.RS
-.TP
-.BR SS_AUTODISARM " (since Linux 4.7)"
-.\" commit 2a74213838104a41588d86fd5e8d344972891ace
-.\" See tools/testing/selftests/sigaltstack/sas.c in kernel sources
-Clear the alternate signal stack settings on entry to the signal handler.
-When the signal handler returns,
-the previous alternate signal stack settings are restored.
-.IP
-This flag was added in order to make it safe
-to switch away from the signal handler with
-.BR swapcontext (3).
-Without this flag, a subsequently handled signal will corrupt
-the state of the switched-away signal handler.
-On kernels where this flag is not supported,
-.BR sigaltstack ()
-fails with the error
-.B EINVAL
-when this flag is supplied.
-.RE
-.TP
-.I ss.ss_sp
-This field specifies the starting address of the stack.
-When a signal handler is invoked on the alternate stack,
-the kernel automatically aligns the address given in \fIss.ss_sp\fP
-to a suitable address boundary for the underlying hardware architecture.
-.TP
-.I ss.ss_size
-This field specifies the size of the stack.
-The constant \fBSIGSTKSZ\fP is defined to be large enough
-to cover the usual size requirements for an alternate signal stack,
-and the constant \fBMINSIGSTKSZ\fP defines the minimum
-size required to execute a signal handler.
-.P
-To disable an existing stack, specify \fIss.ss_flags\fP
-as \fBSS_DISABLE\fP.
-In this case, the kernel ignores any other flags in
-.I ss.ss_flags
-and the remaining fields
-in \fIss\fP.
-.P
-If \fIold_ss\fP is not NULL, then it is used to return information about
-the alternate signal stack which was in effect prior to the
-call to
-.BR sigaltstack ().
-The \fIold_ss.ss_sp\fP and \fIold_ss.ss_size\fP fields return the starting
-address and size of that stack.
-The \fIold_ss.ss_flags\fP may return either of the following values:
-.TP
-.B SS_ONSTACK
-The thread is currently executing on the alternate signal stack.
-(Note that it is not possible
-to change the alternate signal stack if the thread is
-currently executing on it.)
-.TP
-.B SS_DISABLE
-The alternate signal stack is currently disabled.
-.IP
-Alternatively, this value is returned if the thread is currently
-executing on an alternate signal stack that was established using the
-.B SS_AUTODISARM
-flag.
-In this case, it is safe to switch away from the signal handler with
-.BR swapcontext (3).
-It is also possible to set up a different alternative signal stack
-using a further call to
-.BR sigaltstack ().
-.\" FIXME Was it intended that one can set up a different alternative
-.\" signal stack in this scenario? (In passing, if one does this, the
-.\" sigaltstack(NULL, &old_ss) now returns old_ss.ss_flags==SS_AUTODISARM
-.\" rather than old_ss.ss_flags==SS_DISABLE. The API design here seems
-.\" confusing...
-.TP
-.B SS_AUTODISARM
-The alternate signal stack has been marked to be autodisarmed
-as described above.
-.P
-By specifying
-.I ss
-as NULL, and
-.I old_ss
-as a non-NULL value, one can obtain the current settings for
-the alternate signal stack without changing them.
-.SH RETURN VALUE
-.BR sigaltstack ()
-returns 0 on success, or \-1 on failure with
-\fIerrno\fP set to indicate the error.
-.SH ERRORS
-.TP
-.B EFAULT
-Either \fIss\fP or \fIold_ss\fP is not NULL and points to an area
-outside of the process's address space.
-.TP
-.B EINVAL
-\fIss\fP is not NULL and the \fIss_flags\fP field contains
-an invalid flag.
-.TP
-.B ENOMEM
-The specified size of the new alternate signal stack
-.I ss.ss_size
-was less than
-.BR MINSIGSTKSZ .
-.TP
-.B EPERM
-An attempt was made to change the alternate signal stack while
-it was active (i.e., the thread was already executing
-on the current alternate signal stack).
-.SH ATTRIBUTES
-For an explanation of the terms used in this section, see
-.BR attributes (7).
-.TS
-allbox;
-lbx lb lb
-l l l.
-Interface Attribute Value
-T{
-.na
-.nh
-.BR sigaltstack ()
-T} Thread safety MT-Safe
-.TE
-.SH STANDARDS
-POSIX.1-2008.
-.P
-.B SS_AUTODISARM
-is a Linux extension.
-.SH HISTORY
-POSIX.1-2001, SUSv2, SVr4.
-.SH NOTES
-The most common usage of an alternate signal stack is to handle the
-.B SIGSEGV
-signal that is generated if the space available for the
-standard stack is exhausted: in this case, a signal handler for
-.B SIGSEGV
-cannot be invoked on the standard stack; if we wish to handle it,
-we must use an alternate signal stack.
-.P
-Establishing an alternate signal stack is useful if a thread
-expects that it may exhaust its standard stack.
-This may occur, for example, because the stack grows so large
-that it encounters the upwardly growing heap, or it reaches a
-limit established by a call to \fB\%setrlimit(RLIMIT_STACK, &rlim)\fP.
-If the standard stack is exhausted, the kernel sends
-the thread a \fBSIGSEGV\fP signal.
-In these circumstances the only way to catch this signal is
-on an alternate signal stack.
-.P
-On most hardware architectures supported by Linux, stacks grow
-downward.
-.BR sigaltstack ()
-automatically takes account
-of the direction of stack growth.
-.P
-Functions called from a signal handler executing on an alternate
-signal stack will also use the alternate signal stack.
-(This also applies to any handlers invoked for other signals while
-the thread is executing on the alternate signal stack.)
-Unlike the standard stack, the system does not
-automatically extend the alternate signal stack.
-Exceeding the allocated size of the alternate signal stack will
-lead to unpredictable results.
-.P
-A successful call to
-.BR execve (2)
-removes any existing alternate
-signal stack.
-A child process created via
-.BR fork (2)
-inherits a copy of its parent's alternate signal stack settings.
-The same is also true for a child process created using
-.BR clone (2),
-unless the clone flags include
-.B CLONE_VM
-and do not include
-.BR CLONE_VFORK ,
-in which case any alternate signal stack that was established in the parent
-is disabled in the child process.
-.P
-.BR sigaltstack ()
-supersedes the older
-.BR sigstack ()
-call.
-For backward compatibility, glibc also provides
-.BR sigstack ().
-All new applications should be written using
-.BR sigaltstack ().
-.SS History
-4.2BSD had a
-.BR sigstack ()
-system call.
-It used a slightly
-different struct, and had the major disadvantage that the caller
-had to know the direction of stack growth.
-.SH BUGS
-In Linux 2.2 and earlier, the only flag that could be specified
-in
-.I ss.sa_flags
-was
-.BR SS_DISABLE .
-In the lead up to the release of the Linux 2.4 kernel,
-.\" Linux 2.3.40
-.\" After quite a bit of web and mail archive searching,
-.\" I could not find the patch on any mailing list, and I
-.\" could find no place where the rationale for this change
-.\" explained -- mtk
-a change was made to allow
-.BR sigaltstack ()
-to allow
-.I ss.ss_flags==SS_ONSTACK
-with the same meaning as
-.I ss.ss_flags==0
-(i.e., the inclusion of
-.B SS_ONSTACK
-in
-.I ss.ss_flags
-is a no-op).
-On other implementations, and according to POSIX.1,
-.B SS_ONSTACK
-appears only as a reported flag in
-.IR old_ss.ss_flags .
-On Linux, there is no need ever to specify
-.B SS_ONSTACK
-in
-.IR ss.ss_flags ,
-and indeed doing so should be avoided on portability grounds:
-various other systems
-.\" See the source code of Illumos and FreeBSD, for example.
-give an error if
-.B SS_ONSTACK
-is specified in
-.IR ss.ss_flags .
-.SH EXAMPLES
-The following code segment demonstrates the use of
-.BR sigaltstack ()
-(and
-.BR sigaction (2))
-to install an alternate signal stack that is employed by a handler
-for the
-.B SIGSEGV
-signal:
-.P
-.in +4n
-.EX
-stack_t ss;
-\&
-ss.ss_sp = malloc(SIGSTKSZ);
-if (ss.ss_sp == NULL) {
- perror("malloc");
- exit(EXIT_FAILURE);
-}
-\&
-ss.ss_size = SIGSTKSZ;
-ss.ss_flags = 0;
-if (sigaltstack(&ss, NULL) == \-1) {
- perror("sigaltstack");
- exit(EXIT_FAILURE);
-}
-\&
-sa.sa_flags = SA_ONSTACK;
-sa.sa_handler = handler(); /* Address of a signal handler */
-sigemptyset(&sa.sa_mask);
-if (sigaction(SIGSEGV, &sa, NULL) == \-1) {
- perror("sigaction");
- exit(EXIT_FAILURE);
-}
-.EE
-.in
-.SH SEE ALSO
-.BR execve (2),
-.BR setrlimit (2),
-.BR sigaction (2),
-.BR siglongjmp (3),
-.BR sigsetjmp (3),
-.BR signal (7)
diff --git a/man2/signal.2 b/man2/signal.2
deleted file mode 100644
index 5e84f40c3..000000000
--- a/man2/signal.2
+++ /dev/null
@@ -1,280 +0,0 @@
-.\" Copyright (c) 2000 Andries Brouwer <aeb@cwi.nl>
-.\" and Copyright (c) 2007 Michael Kerrisk <mtk.manpages@gmail.com>
-.\" and Copyright (c) 2008, Linux Foundation, written by Michael Kerrisk
-.\" <mtk.manpages@gmail.com>
-.\" based on work by Rik Faith <faith@cs.unc.edu>
-.\" and Mike Battersby <mike@starbug.apana.org.au>.
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Modified 2004-11-19, mtk:
-.\" added pointer to sigaction.2 for details of ignoring SIGCHLD
-.\" 2007-06-03, mtk: strengthened portability warning, and rewrote
-.\" various sections.
-.\" 2008-07-11, mtk: rewrote and expanded portability discussion.
-.\"
-.TH signal 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-signal \- ANSI C signal handling
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <signal.h>
-.P
-.B typedef void (*sighandler_t)(int);
-.P
-.BI "sighandler_t signal(int " signum ", sighandler_t " handler );
-.fi
-.SH DESCRIPTION
-.BR WARNING :
-the behavior of
-.BR signal ()
-varies across UNIX versions,
-and has also varied historically across different versions of Linux.
-\fBAvoid its use\fP: use
-.BR sigaction (2)
-instead.
-See \fIPortability\fP below.
-.P
-.BR signal ()
-sets the disposition of the signal
-.I signum
-to
-.IR handler ,
-which is either
-.BR SIG_IGN ,
-.BR SIG_DFL ,
-or the address of a programmer-defined function (a "signal handler").
-.P
-If the signal
-.I signum
-is delivered to the process, then one of the following happens:
-.TP 3
-*
-If the disposition is set to
-.BR SIG_IGN ,
-then the signal is ignored.
-.TP
-*
-If the disposition is set to
-.BR SIG_DFL ,
-then the default action associated with the signal (see
-.BR signal (7))
-occurs.
-.TP
-*
-If the disposition is set to a function,
-then first either the disposition is reset to
-.BR SIG_DFL ,
-or the signal is blocked (see \fIPortability\fP below), and then
-.I handler
-is called with argument
-.IR signum .
-If invocation of the handler caused the signal to be blocked,
-then the signal is unblocked upon return from the handler.
-.P
-The signals
-.B SIGKILL
-and
-.B SIGSTOP
-cannot be caught or ignored.
-.SH RETURN VALUE
-.BR signal ()
-returns the previous value of the signal handler.
-On failure, it returns
-.BR SIG_ERR ,
-and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EINVAL
-.I signum
-is invalid.
-.SH VERSIONS
-The use of
-.I sighandler_t
-is a GNU extension, exposed if
-.B _GNU_SOURCE
-is defined;
-.\" libc4 and libc5 define
-.\" .IR SignalHandler ;
-glibc also defines (the BSD-derived)
-.I sig_t
-if
-.B _BSD_SOURCE
-(glibc 2.19 and earlier)
-or
-.B _DEFAULT_SOURCE
-(glibc 2.19 and later)
-is defined.
-Without use of such a type, the declaration of
-.BR signal ()
-is the somewhat harder to read:
-.P
-.in +4n
-.EX
-.BI "void ( *" signal "(int " signum ", void (*" handler ")(int)) ) (int);"
-.EE
-.in
-.SS Portability
-The only portable use of
-.BR signal ()
-is to set a signal's disposition to
-.B SIG_DFL
-or
-.BR SIG_IGN .
-The semantics when using
-.BR signal ()
-to establish a signal handler vary across systems
-(and POSIX.1 explicitly permits this variation);
-.B do not use it for this purpose.
-.P
-POSIX.1 solved the portability mess by specifying
-.BR sigaction (2),
-which provides explicit control of the semantics when a
-signal handler is invoked; use that interface instead of
-.BR signal ().
-.SH STANDARDS
-C11, POSIX.1-2008.
-.SH HISTORY
-C89, POSIX.1-2001.
-.P
-In the original UNIX systems, when a handler that was established using
-.BR signal ()
-was invoked by the delivery of a signal,
-the disposition of the signal would be reset to
-.BR SIG_DFL ,
-and the system did not block delivery of further instances of the signal.
-This is equivalent to calling
-.BR sigaction (2)
-with the following flags:
-.P
-.in +4n
-.EX
-sa.sa_flags = SA_RESETHAND | SA_NODEFER;
-.EE
-.in
-.P
-System\ V also provides these semantics for
-.BR signal ().
-This was bad because the signal might be delivered again
-before the handler had a chance to reestablish itself.
-Furthermore, rapid deliveries of the same signal could
-result in recursive invocations of the handler.
-.P
-BSD improved on this situation, but unfortunately also
-changed the semantics of the existing
-.BR signal ()
-interface while doing so.
-On BSD, when a signal handler is invoked,
-the signal disposition is not reset,
-and further instances of the signal are blocked from
-being delivered while the handler is executing.
-Furthermore, certain blocking system calls are automatically
-restarted if interrupted by a signal handler (see
-.BR signal (7)).
-The BSD semantics are equivalent to calling
-.BR sigaction (2)
-with the following flags:
-.P
-.in +4n
-.EX
-sa.sa_flags = SA_RESTART;
-.EE
-.in
-.P
-The situation on Linux is as follows:
-.IP \[bu] 3
-The kernel's
-.BR signal ()
-system call provides System\ V semantics.
-.IP \[bu]
-By default, in glibc 2 and later, the
-.BR signal ()
-wrapper function does not invoke the kernel system call.
-Instead, it calls
-.BR sigaction (2)
-using flags that supply BSD semantics.
-This default behavior is provided as long as a suitable
-feature test macro is defined:
-.B _BSD_SOURCE
-on glibc 2.19 and earlier or
-.B _DEFAULT_SOURCE
-in glibc 2.19 and later.
-(By default, these macros are defined; see
-.BR feature_test_macros (7)
-for details.)
-If such a feature test macro is not defined, then
-.BR signal ()
-provides System\ V semantics.
-.\"
-.\" System V semantics are also provided if one uses the separate
-.\" .BR sysv_signal (3)
-.\" function.
-.\" .IP \[bu]
-.\" The
-.\" .BR signal ()
-.\" function in Linux libc4 and libc5 provide System\ V semantics.
-.\" If one on a libc5 system includes
-.\" .I <bsd/signal.h>
-.\" instead of
-.\" .IR <signal.h> ,
-.\" then
-.\" .BR signal ()
-.\" provides BSD semantics.
-.SH NOTES
-The effects of
-.BR signal ()
-in a multithreaded process are unspecified.
-.P
-According to POSIX, the behavior of a process is undefined after it
-ignores a
-.BR SIGFPE ,
-.BR SIGILL ,
-or
-.B SIGSEGV
-signal that was not generated by
-.BR kill (2)
-or
-.BR raise (3).
-Integer division by zero has undefined result.
-On some architectures it will generate a
-.B SIGFPE
-signal.
-(Also dividing the most negative integer by \-1 may generate
-.BR SIGFPE .)
-Ignoring this signal might lead to an endless loop.
-.P
-See
-.BR sigaction (2)
-for details on what happens when the disposition
-.B SIGCHLD
-is set to
-.BR SIG_IGN .
-.P
-See
-.BR signal\-safety (7)
-for a list of the async-signal-safe functions that can be
-safely called from inside a signal handler.
-.SH SEE ALSO
-.BR kill (1),
-.BR alarm (2),
-.BR kill (2),
-.BR pause (2),
-.BR sigaction (2),
-.BR signalfd (2),
-.BR sigpending (2),
-.BR sigprocmask (2),
-.BR sigsuspend (2),
-.BR bsd_signal (3),
-.BR killpg (3),
-.BR raise (3),
-.BR siginterrupt (3),
-.BR sigqueue (3),
-.BR sigsetops (3),
-.BR sigvec (3),
-.BR sysv_signal (3),
-.BR signal (7)
diff --git a/man2/signalfd.2 b/man2/signalfd.2
deleted file mode 100644
index 831fa944c..000000000
--- a/man2/signalfd.2
+++ /dev/null
@@ -1,525 +0,0 @@
-.\" Copyright (C) 2008 Michael Kerrisk <mtk.manpages@gmail.com>
-.\" starting from a version by Davide Libenzi <davidel@xmailserver.org>
-.\"
-.\" SPDX-License-Identifier: GPL-2.0-or-later
-.\"
-.TH signalfd 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-signalfd \- create a file descriptor for accepting signals
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/signalfd.h>
-.P
-.BI "int signalfd(int " fd ", const sigset_t *" mask ", int " flags );
-.fi
-.SH DESCRIPTION
-.BR signalfd ()
-creates a file descriptor that can be used to accept signals
-targeted at the caller.
-This provides an alternative to the use of a signal handler or
-.BR sigwaitinfo (2),
-and has the advantage that the file descriptor may be monitored by
-.BR select (2),
-.BR poll (2),
-and
-.BR epoll (7).
-.P
-The
-.I mask
-argument specifies the set of signals that the caller
-wishes to accept via the file descriptor.
-This argument is a signal set whose contents can be initialized
-using the macros described in
-.BR sigsetops (3).
-Normally, the set of signals to be received via the
-file descriptor should be blocked using
-.BR sigprocmask (2),
-to prevent the signals being handled according to their default
-dispositions.
-It is not possible to receive
-.B SIGKILL
-or
-.B SIGSTOP
-signals via a signalfd file descriptor;
-these signals are silently ignored if specified in
-.IR mask .
-.P
-If the
-.I fd
-argument is \-1,
-then the call creates a new file descriptor and associates the
-signal set specified in
-.I mask
-with that file descriptor.
-If
-.I fd
-is not \-1,
-then it must specify a valid existing signalfd file descriptor, and
-.I mask
-is used to replace the signal set associated with that file descriptor.
-.P
-Starting with Linux 2.6.27, the following values may be bitwise ORed in
-.I flags
-to change the behavior of
-.BR signalfd ():
-.TP 14
-.B SFD_NONBLOCK
-Set the
-.B O_NONBLOCK
-file status flag on the open file description (see
-.BR open (2))
-referred to by the new file descriptor.
-Using this flag saves extra calls to
-.BR fcntl (2)
-to achieve the same result.
-.TP
-.B SFD_CLOEXEC
-Set the close-on-exec
-.RB ( FD_CLOEXEC )
-flag on the new file descriptor.
-See the description of the
-.B O_CLOEXEC
-flag in
-.BR open (2)
-for reasons why this may be useful.
-.P
-Up to Linux 2.6.26, the
-.I flags
-argument is unused, and must be specified as zero.
-.P
-.BR signalfd ()
-returns a file descriptor that supports the following operations:
-.TP
-.BR read (2)
-If one or more of the signals specified in
-.I mask
-is pending for the process, then the buffer supplied to
-.BR read (2)
-is used to return one or more
-.I signalfd_siginfo
-structures (see below) that describe the signals.
-The
-.BR read (2)
-returns information for as many signals as are pending and will
-fit in the supplied buffer.
-The buffer must be at least
-.I "sizeof(struct signalfd_siginfo)"
-bytes.
-The return value of the
-.BR read (2)
-is the total number of bytes read.
-.IP
-As a consequence of the
-.BR read (2),
-the signals are consumed,
-so that they are no longer pending for the process
-(i.e., will not be caught by signal handlers,
-and cannot be accepted using
-.BR sigwaitinfo (2)).
-.IP
-If none of the signals in
-.I mask
-is pending for the process, then the
-.BR read (2)
-either blocks until one of the signals in
-.I mask
-is generated for the process,
-or fails with the error
-.B EAGAIN
-if the file descriptor has been made nonblocking.
-.TP
-.BR poll (2)
-.TQ
-.BR select (2)
-.TQ
-(and similar)
-The file descriptor is readable
-(the
-.BR select (2)
-.I readfds
-argument; the
-.BR poll (2)
-.B POLLIN
-flag)
-if one or more of the signals in
-.I mask
-is pending for the process.
-.IP
-The signalfd file descriptor also supports the other file-descriptor
-multiplexing APIs:
-.BR pselect (2),
-.BR ppoll (2),
-and
-.BR epoll (7).
-.TP
-.BR close (2)
-When the file descriptor is no longer required it should be closed.
-When all file descriptors associated with the same signalfd object
-have been closed, the resources for object are freed by the kernel.
-.SS The signalfd_siginfo structure
-The format of the
-.I signalfd_siginfo
-structure(s) returned by
-.BR read (2)s
-from a signalfd file descriptor is as follows:
-.P
-.in +4n
-.EX
-struct signalfd_siginfo {
- uint32_t ssi_signo; /* Signal number */
- int32_t ssi_errno; /* Error number (unused) */
- int32_t ssi_code; /* Signal code */
- uint32_t ssi_pid; /* PID of sender */
- uint32_t ssi_uid; /* Real UID of sender */
- int32_t ssi_fd; /* File descriptor (SIGIO) */
- uint32_t ssi_tid; /* Kernel timer ID (POSIX timers)
- uint32_t ssi_band; /* Band event (SIGIO) */
- uint32_t ssi_overrun; /* POSIX timer overrun count */
- uint32_t ssi_trapno; /* Trap number that caused signal */
-.\" ssi_trapno is unused on most arches
- int32_t ssi_status; /* Exit status or signal (SIGCHLD) */
- int32_t ssi_int; /* Integer sent by sigqueue(3) */
- uint64_t ssi_ptr; /* Pointer sent by sigqueue(3) */
- uint64_t ssi_utime; /* User CPU time consumed (SIGCHLD) */
- uint64_t ssi_stime; /* System CPU time consumed
- (SIGCHLD) */
- uint64_t ssi_addr; /* Address that generated signal
- (for hardware\-generated signals) */
- uint16_t ssi_addr_lsb; /* Least significant bit of address
- (SIGBUS; since Linux 2.6.37) */
-.\" ssi_addr_lsb: commit b8aeec34175fc8fe8b0d40efea4846dfc1ba663e
- uint8_t pad[\fIX\fP]; /* Pad size to 128 bytes (allow for
- additional fields in the future) */
-};
-.EE
-.in
-.P
-Each of the fields in this structure
-is analogous to the similarly named field in the
-.I siginfo_t
-structure.
-The
-.I siginfo_t
-structure is described in
-.BR sigaction (2).
-Not all fields in the returned
-.I signalfd_siginfo
-structure will be valid for a specific signal;
-the set of valid fields can be determined from the value returned in the
-.I ssi_code
-field.
-This field is the analog of the
-.I siginfo_t
-.I si_code
-field; see
-.BR sigaction (2)
-for details.
-.SS fork(2) semantics
-After a
-.BR fork (2),
-the child inherits a copy of the signalfd file descriptor.
-A
-.BR read (2)
-from the file descriptor in the child will return information
-about signals queued to the child.
-.SS Semantics of file descriptor passing
-As with other file descriptors,
-signalfd file descriptors can be passed to another process
-via a UNIX domain socket (see
-.BR unix (7)).
-In the receiving process, a
-.BR read (2)
-from the received file descriptor will return information
-about signals queued to that process.
-.SS execve(2) semantics
-Just like any other file descriptor,
-a signalfd file descriptor remains open across an
-.BR execve (2),
-unless it has been marked for close-on-exec (see
-.BR fcntl (2)).
-Any signals that were available for reading before the
-.BR execve (2)
-remain available to the newly loaded program.
-(This is analogous to traditional signal semantics,
-where a blocked signal that is pending remains pending across an
-.BR execve (2).)
-.SS Thread semantics
-The semantics of signalfd file descriptors in a multithreaded program
-mirror the standard semantics for signals.
-In other words,
-when a thread reads from a signalfd file descriptor,
-it will read the signals that are directed to the thread
-itself and the signals that are directed to the process
-(i.e., the entire thread group).
-(A thread will not be able to read signals that are directed
-to other threads in the process.)
-.\"
-.SS epoll(7) semantics
-If a process adds (via
-.BR epoll_ctl (2))
-a signalfd file descriptor to an
-.BR epoll (7)
-instance, then
-.BR epoll_wait (2)
-returns events only for signals sent to that process.
-In particular, if the process then uses
-.BR fork (2)
-to create a child process, then the child will be able to
-.BR read (2)
-signals that are sent to it using the signalfd file descriptor, but
-.BR epoll_wait (2)
-will
-.B not
-indicate that the signalfd file descriptor is ready.
-In this scenario, a possible workaround is that after the
-.BR fork (2),
-the child process can close the signalfd file descriptor that it inherited
-from the parent process and then create another signalfd file descriptor
-and add it to the epoll instance.
-Alternatively, the parent and the child could delay creating their
-(separate) signalfd file descriptors and adding them to the
-epoll instance until after the call to
-.BR fork (2).
-.SH RETURN VALUE
-On success,
-.BR signalfd ()
-returns a signalfd file descriptor;
-this is either a new file descriptor (if
-.I fd
-was \-1), or
-.I fd
-if
-.I fd
-was a valid signalfd file descriptor.
-On error, \-1 is returned and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EBADF
-The
-.I fd
-file descriptor is not a valid file descriptor.
-.TP
-.B EINVAL
-.I fd
-is not a valid signalfd file descriptor.
-.\" or, the
-.\" .I sizemask
-.\" argument is not equal to
-.\" .IR sizeof(sigset_t) ;
-.TP
-.B EINVAL
-.I flags
-is invalid;
-or, in Linux 2.6.26 or earlier,
-.I flags
-is nonzero.
-.TP
-.B EMFILE
-The per-process limit on the number of open file descriptors has been reached.
-.TP
-.B ENFILE
-The system-wide limit on the total number of open files has been
-reached.
-.TP
-.B ENODEV
-Could not mount (internal) anonymous inode device.
-.TP
-.B ENOMEM
-There was insufficient memory to create a new signalfd file descriptor.
-.SH VERSIONS
-.SS C library/kernel differences
-The underlying Linux system call requires an additional argument,
-.IR "size_t sizemask" ,
-which specifies the size of the
-.I mask
-argument.
-The glibc
-.BR signalfd ()
-wrapper function does not include this argument,
-since it provides the required value for the underlying system call.
-.P
-There are two underlying Linux system calls:
-.BR signalfd ()
-and the more recent
-.BR signalfd4 ().
-The former system call does not implement a
-.I flags
-argument.
-The latter system call implements the
-.I flags
-values described above.
-Starting with glibc 2.9, the
-.BR signalfd ()
-wrapper function will use
-.BR signalfd4 ()
-where it is available.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-.TP
-.BR signalfd ()
-Linux 2.6.22,
-glibc 2.8.
-.\" signalfd() is in glibc 2.7, but reportedly does not build
-.TP
-.BR signalfd4 ()
-Linux 2.6.27.
-.SH NOTES
-A process can create multiple signalfd file descriptors.
-This makes it possible to accept different signals
-on different file descriptors.
-(This may be useful if monitoring the file descriptors using
-.BR select (2),
-.BR poll (2),
-or
-.BR epoll (7):
-the arrival of different signals will make different file descriptors ready.)
-If a signal appears in the
-.I mask
-of more than one of the file descriptors, then occurrences
-of that signal can be read (once) from any one of the file descriptors.
-.P
-Attempts to include
-.B SIGKILL
-and
-.B SIGSTOP
-in
-.I mask
-are silently ignored.
-.P
-The signal mask employed by a signalfd file descriptor can be viewed
-via the entry for the corresponding file descriptor in the process's
-.IR /proc/ pid /fdinfo
-directory.
-See
-.BR proc (5)
-for further details.
-.\"
-.SS Limitations
-The signalfd mechanism can't be used to receive signals that
-are synchronously generated, such as the
-.B SIGSEGV
-signal that results from accessing an invalid memory address
-or the
-.B SIGFPE
-signal that results from an arithmetic error.
-Such signals can be caught only via signal handler.
-.P
-As described above,
-in normal usage one blocks the signals that will be accepted via
-.BR signalfd ().
-If spawning a child process to execute a helper program
-(that does not need the signalfd file descriptor),
-then, after the call to
-.BR fork (2),
-you will normally want to unblock those signals before calling
-.BR execve (2),
-so that the helper program can see any signals that it expects to see.
-Be aware, however,
-that this won't be possible in the case of a helper program spawned
-behind the scenes by any library function that the program may call.
-In such cases, one must fall back to using a traditional signal
-handler that writes to a file descriptor monitored by
-.BR select (2),
-.BR poll (2),
-or
-.BR epoll (7).
-.SH BUGS
-Before Linux 2.6.25, the
-.I ssi_ptr
-and
-.I ssi_int
-fields are not filled in with the data accompanying a signal sent by
-.BR sigqueue (3).
-.\" The fix also was put into Linux 2.6.24.5
-.SH EXAMPLES
-The program below accepts the signals
-.B SIGINT
-and
-.B SIGQUIT
-via a signalfd file descriptor.
-The program terminates after accepting a
-.B SIGQUIT
-signal.
-The following shell session demonstrates the use of the program:
-.P
-.in +4n
-.EX
-.RB "$" " ./signalfd_demo"
-.BR "\[ha]C" " # Control\-C generates SIGINT"
-Got SIGINT
-.B \[ha]C
-Got SIGINT
-\fB\[ha]\e\fP # Control\-\e generates SIGQUIT
-Got SIGQUIT
-$
-.EE
-.in
-.SS Program source
-\&
-.\" SRC BEGIN (signalfd.c)
-.EX
-#include <err.h>
-#include <signal.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/signalfd.h>
-#include <unistd.h>
-\&
-int
-main(void)
-{
- int sfd;
- ssize_t s;
- sigset_t mask;
- struct signalfd_siginfo fdsi;
-\&
- sigemptyset(&mask);
- sigaddset(&mask, SIGINT);
- sigaddset(&mask, SIGQUIT);
-\&
- /* Block signals so that they aren\[aq]t handled
- according to their default dispositions. */
-\&
- if (sigprocmask(SIG_BLOCK, &mask, NULL) == \-1)
- err(EXIT_FAILURE, "sigprocmask");
-\&
- sfd = signalfd(\-1, &mask, 0);
- if (sfd == \-1)
- err(EXIT_FAILURE, "signalfd");
-\&
- for (;;) {
- s = read(sfd, &fdsi, sizeof(fdsi));
- if (s != sizeof(fdsi))
- err(EXIT_FAILURE, "read");
-\&
- if (fdsi.ssi_signo == SIGINT) {
- printf("Got SIGINT\en");
- } else if (fdsi.ssi_signo == SIGQUIT) {
- printf("Got SIGQUIT\en");
- exit(EXIT_SUCCESS);
- } else {
- printf("Read unexpected signal\en");
- }
- }
-}
-.EE
-.\" SRC END
-.SH SEE ALSO
-.BR eventfd (2),
-.BR poll (2),
-.BR read (2),
-.BR select (2),
-.BR sigaction (2),
-.BR sigprocmask (2),
-.BR sigwaitinfo (2),
-.BR timerfd_create (2),
-.BR sigsetops (3),
-.BR sigwait (3),
-.BR epoll (7),
-.BR signal (7)
diff --git a/man2/signalfd4.2 b/man2/signalfd4.2
deleted file mode 100644
index 8dbea5cb6..000000000
--- a/man2/signalfd4.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/signalfd.2
diff --git a/man2/sigpending.2 b/man2/sigpending.2
deleted file mode 100644
index 0562440dc..000000000
--- a/man2/sigpending.2
+++ /dev/null
@@ -1,110 +0,0 @@
-.\" Copyright (c) 2005 Michael Kerrisk
-.\" based on earlier work by faith@cs.unc.edu and
-.\" Mike Battersby <mib@deakin.edu.au>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" 2005-09-15, mtk, Created new page by splitting off from sigaction.2
-.\"
-.TH sigpending 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-sigpending, rt_sigpending \- examine pending signals
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <signal.h>
-.P
-.BI "int sigpending(sigset_t *" set );
-.fi
-.P
-.RS -4
-Feature Test Macro Requirements for glibc (see
-.BR feature_test_macros (7)):
-.RE
-.P
-.BR sigpending ():
-.nf
- _POSIX_C_SOURCE
-.fi
-.SH DESCRIPTION
-.BR sigpending ()
-returns the set of signals that are pending for delivery to the calling
-thread (i.e., the signals which have been raised while blocked).
-The mask of pending signals is returned in
-.IR set .
-.SH RETURN VALUE
-.BR sigpending ()
-returns 0 on success.
-On failure, \-1 is returned and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EFAULT
-.I set
-points to memory which is not a valid part of the process address space.
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-POSIX.1-2001.
-.SS C library/kernel differences
-The original Linux system call was named
-.BR sigpending ().
-However, with the addition of real-time signals in Linux 2.2,
-the fixed-size, 32-bit
-.I sigset_t
-argument supported by that system call was no longer fit for purpose.
-Consequently, a new system call,
-.BR rt_sigpending (),
-was added to support an enlarged
-.I sigset_t
-type.
-The new system call takes a second argument,
-.IR "size_t sigsetsize" ,
-which specifies the size in bytes of the signal set in
-.IR set .
-.\" This argument is currently required to be less than or equal to
-.\" .IR sizeof(sigset_t)
-.\" (or the error
-.\" .B EINVAL
-.\" results).
-The glibc
-.BR sigpending ()
-wrapper function hides these details from us, transparently calling
-.BR rt_sigpending ()
-when the kernel provides it.
-.SH NOTES
-See
-.BR sigsetops (3)
-for details on manipulating signal sets.
-.P
-If a signal is both blocked and has a disposition of "ignored", it is
-.I not
-added to the mask of pending signals when generated.
-.P
-The set of signals that is pending for a thread
-is the union of the set of signals that is pending for that thread
-and the set of signals that is pending for the process as a whole; see
-.BR signal (7).
-.P
-A child created via
-.BR fork (2)
-initially has an empty pending signal set;
-the pending signal set is preserved across an
-.BR execve (2).
-.SH BUGS
-Up to and including glibc 2.2.1,
-there is a bug in the wrapper function for
-.BR sigpending ()
-which means that information about pending real-time signals
-is not correctly returned.
-.SH SEE ALSO
-.BR kill (2),
-.BR sigaction (2),
-.BR signal (2),
-.BR sigprocmask (2),
-.BR sigsuspend (2),
-.BR sigsetops (3),
-.BR signal (7)
diff --git a/man2/sigprocmask.2 b/man2/sigprocmask.2
deleted file mode 100644
index 89838a51d..000000000
--- a/man2/sigprocmask.2
+++ /dev/null
@@ -1,224 +0,0 @@
-.\" Copyright (c) 2005 Michael Kerrisk
-.\" based on earlier work by faith@cs.unc.edu and
-.\" Mike Battersby <mib@deakin.edu.au>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" 2005-09-15, mtk, Created new page by splitting off from sigaction.2
-.\"
-.TH sigprocmask 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-sigprocmask, rt_sigprocmask \- examine and change blocked signals
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.B #include <signal.h>
-.P
-.nf
-/* Prototype for the glibc wrapper function */
-.BI "int sigprocmask(int " how ", const sigset_t *_Nullable restrict " set ,
-.BI " sigset_t *_Nullable restrict " oldset );
-.P
-.BR "#include <signal.h>" " /* Definition of " SIG_* " constants */"
-.BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */"
-.B #include <unistd.h>
-.P
-/* Prototype for the underlying system call */
-.BI "int syscall(SYS_rt_sigprocmask, int " how ,
-.BI " const kernel_sigset_t *_Nullable " set ,
-.BI " kernel_sigset_t *_Nullable " oldset ,
-.BI " size_t " sigsetsize );
-.P
-/* Prototype for the legacy system call */
-.BI "[[deprecated]] int syscall(SYS_sigprocmask, int " how ,
-.BI " const old_kernel_sigset_t *_Nullable " set ,
-.BI " old_kernel_sigset_t *_Nullable " oldset );
-.fi
-.P
-.RS -4
-Feature Test Macro Requirements for glibc (see
-.BR feature_test_macros (7)):
-.RE
-.P
-.BR sigprocmask ():
-.nf
- _POSIX_C_SOURCE
-.fi
-.SH DESCRIPTION
-.BR sigprocmask ()
-is used to fetch and/or change the signal mask of the calling thread.
-The signal mask is the set of signals whose delivery is currently
-blocked for the caller
-(see also
-.BR signal (7)
-for more details).
-.P
-The behavior of the call is dependent on the value of
-.IR how ,
-as follows.
-.TP
-.B SIG_BLOCK
-The set of blocked signals is the union of the current set and the
-.I set
-argument.
-.TP
-.B SIG_UNBLOCK
-The signals in
-.I set
-are removed from the current set of blocked signals.
-It is permissible to attempt to unblock a signal which is not blocked.
-.TP
-.B SIG_SETMASK
-The set of blocked signals is set to the argument
-.IR set .
-.P
-If
-.I oldset
-is non-NULL, the previous value of the signal mask is stored in
-.IR oldset .
-.P
-If
-.I set
-is NULL, then the signal mask is unchanged (i.e.,
-.I how
-is ignored),
-but the current value of the signal mask is nevertheless returned in
-.I oldset
-(if it is not NULL).
-.P
-A set of functions for modifying and inspecting variables of type
-.I sigset_t
-("signal sets") is described in
-.BR sigsetops (3).
-.P
-The use of
-.BR sigprocmask ()
-is unspecified in a multithreaded process; see
-.BR pthread_sigmask (3).
-.SH RETURN VALUE
-.BR sigprocmask ()
-returns 0 on success.
-On failure, \-1 is returned and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EFAULT
-The
-.I set
-or
-.I oldset
-argument points outside the process's allocated address space.
-.TP
-.B EINVAL
-Either the value specified in
-.I how
-was invalid or the kernel does not support the size passed in
-.I sigsetsize.
-.SH VERSIONS
-.SS C library/kernel differences
-The kernel's definition of
-.I sigset_t
-differs in size from that used
-by the C library.
-In this manual page, the former is referred to as
-.I kernel_sigset_t
-(it is nevertheless named
-.I sigset_t
-in the kernel sources).
-.P
-The glibc wrapper function for
-.BR sigprocmask ()
-silently ignores attempts to block the two real-time signals that
-are used internally by the NPTL threading implementation.
-See
-.BR nptl (7)
-for details.
-.P
-The original Linux system call was named
-.BR sigprocmask ().
-However, with the addition of real-time signals in Linux 2.2,
-the fixed-size, 32-bit
-.I sigset_t
-(referred to as
-.I old_kernel_sigset_t
-in this manual page)
-type supported by that system call was no longer fit for purpose.
-Consequently, a new system call,
-.BR rt_sigprocmask (),
-was added to support an enlarged
-.I sigset_t
-type
-(referred to as
-.I kernel_sigset_t
-in this manual page).
-The new system call takes a fourth argument,
-.IR "size_t sigsetsize" ,
-which specifies the size in bytes of the signal sets in
-.I set
-and
-.IR oldset .
-This argument is currently required to have a fixed architecture specific value
-(equal to
-.IR sizeof(kernel_sigset_t) ).
-.\" sizeof(kernel_sigset_t) == _NSIG / 8,
-.\" which equals to 8 on most architectures, but e.g. on MIPS it's 16.
-.P
-The glibc
-.BR sigprocmask ()
-wrapper function hides these details from us, transparently calling
-.BR rt_sigprocmask ()
-when the kernel provides it.
-.\"
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-POSIX.1-2001.
-.SH NOTES
-It is not possible to block
-.BR SIGKILL " or " SIGSTOP .
-Attempts to do so are silently ignored.
-.P
-Each of the threads in a process has its own signal mask.
-.P
-A child created via
-.BR fork (2)
-inherits a copy of its parent's signal mask;
-the signal mask is preserved across
-.BR execve (2).
-.P
-If
-.BR SIGBUS ,
-.BR SIGFPE ,
-.BR SIGILL ,
-or
-.B SIGSEGV
-are generated
-while they are blocked, the result is undefined,
-unless the signal was generated by
-.BR kill (2),
-.BR sigqueue (3),
-or
-.BR raise (3).
-.P
-See
-.BR sigsetops (3)
-for details on manipulating signal sets.
-.P
-Note that it is permissible (although not very useful) to specify both
-.I set
-and
-.I oldset
-as NULL.
-.SH SEE ALSO
-.BR kill (2),
-.BR pause (2),
-.BR sigaction (2),
-.BR signal (2),
-.BR sigpending (2),
-.BR sigsuspend (2),
-.BR pthread_sigmask (3),
-.BR sigqueue (3),
-.BR sigsetops (3),
-.BR signal (7)
diff --git a/man2/sigreturn.2 b/man2/sigreturn.2
deleted file mode 100644
index 1a34ef54c..000000000
--- a/man2/sigreturn.2
+++ /dev/null
@@ -1,151 +0,0 @@
-.\" Copyright (C) 2008, 2014, Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Created Sat Aug 21 1995 Thomas K. Dyas <tdyas@eden.rutgers.edu>
-.\" Modified Tue Oct 22 22:09:03 1996 by Eric S. Raymond <esr@thyrsus.com>
-.\" 2008-06-26, mtk, added some more detail on the work done by sigreturn()
-.\" 2014-12-05, mtk, rewrote all of the rest of the original page
-.\"
-.TH sigreturn 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-sigreturn, rt_sigreturn \- return from signal handler and cleanup stack frame
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B int sigreturn(...);
-.fi
-.SH DESCRIPTION
-If the Linux kernel determines that an unblocked
-signal is pending for a process, then,
-at the next transition back to user mode in that process
-(e.g., upon return from a system call or
-when the process is rescheduled onto the CPU),
-it creates a new frame on the user-space stack where it
-saves various pieces of process context
-(processor status word, registers, signal mask, and signal stack settings).
-.\" See arch/x86/kernel/signal.c::__setup_frame() [in Linux 3.17 source code]
-.P
-The kernel also arranges that, during the transition back to user mode,
-the signal handler is called, and that, upon return from the handler,
-control passes to a piece of user-space code commonly called
-the "signal trampoline".
-The signal trampoline code in turn calls
-.BR sigreturn ().
-.P
-This
-.BR sigreturn ()
-call undoes everything that was
-done\[em]changing the process's signal mask, switching signal stacks (see
-.BR sigaltstack "(2))\[em]in"
-order to invoke the signal handler.
-Using the information that was earlier saved on the user-space stack
-.BR sigreturn ()
-restores the process's signal mask, switches stacks,
-and restores the process's context
-(processor flags and registers,
-including the stack pointer and instruction pointer),
-so that the process resumes execution
-at the point where it was interrupted by the signal.
-.SH RETURN VALUE
-.BR sigreturn ()
-never returns.
-.SH VERSIONS
-Many UNIX-type systems have a
-.BR sigreturn ()
-system call or near equivalent.
-However, this call is not specified in POSIX,
-and details of its behavior vary across systems.
-.SH STANDARDS
-None.
-.SH NOTES
-.BR sigreturn ()
-exists only to allow the implementation of signal handlers.
-It should
-.B never
-be called directly.
-(Indeed, a simple
-.BR sigreturn ()
-.\" See sysdeps/unix/sysv/linux/sigreturn.c and
-.\" signal/sigreturn.c in the glibc source
-wrapper in the GNU C library simply returns \-1, with
-.I errno
-set to
-.BR ENOSYS .)
-Details of the arguments (if any) passed to
-.BR sigreturn ()
-vary depending on the architecture.
-(On some architectures, such as x86-64,
-.BR sigreturn ()
-takes no arguments, since all of the information that it requires
-is available in the stack frame that was previously created by the
-kernel on the user-space stack.)
-.P
-Once upon a time, UNIX systems placed the signal trampoline code
-onto the user stack.
-Nowadays, pages of the user stack are protected so as to
-disallow code execution.
-Thus, on contemporary Linux systems, depending on the architecture,
-the signal trampoline code lives either in the
-.BR vdso (7)
-or in the C library.
-In the latter case,
-.\" See, for example, sysdeps/unix/sysv/linux/i386/sigaction.c and
-.\" sysdeps/unix/sysv/linux/x86_64/sigaction.c in the glibc (2.20) source.
-the C library's
-.BR sigaction (2)
-wrapper function informs the kernel of the location of the trampoline code
-by placing its address in the
-.I sa_restorer
-field of the
-.I sigaction
-structure,
-and sets the
-.B SA_RESTORER
-flag in the
-.I sa_flags
-field.
-.P
-The saved process context information is placed in a
-.I ucontext_t
-structure (see
-.IR <sys/ucontext.h> ).
-That structure is visible within the signal handler
-as the third argument of a handler established via
-.BR sigaction (2)
-with the
-.B SA_SIGINFO
-flag.
-.P
-On some other UNIX systems,
-the operation of the signal trampoline differs a little.
-In particular, on some systems, upon transitioning back to user mode,
-the kernel passes control to the trampoline (rather than the signal handler),
-and the trampoline code calls the signal handler (and then calls
-.BR sigreturn ()
-once the handler returns).
-.\"
-.SS C library/kernel differences
-The original Linux system call was named
-.BR sigreturn ().
-However, with the addition of real-time signals in Linux 2.2,
-a new system call,
-.BR rt_sigreturn ()
-was added to support an enlarged
-.I sigset_t
-type.
-The GNU C library
-hides these details from us, transparently employing
-.BR rt_sigreturn ()
-when the kernel provides it.
-.\"
-.SH SEE ALSO
-.BR kill (2),
-.BR restart_syscall (2),
-.BR sigaltstack (2),
-.BR signal (2),
-.BR getcontext (3),
-.BR signal (7),
-.BR vdso (7)
diff --git a/man2/sigsuspend.2 b/man2/sigsuspend.2
deleted file mode 100644
index 01f9c9dcc..000000000
--- a/man2/sigsuspend.2
+++ /dev/null
@@ -1,131 +0,0 @@
-.\" Copyright (c) 2005 Michael Kerrisk
-.\" based on earlier work by faith@cs.unc.edu and
-.\" Mike Battersby <mib@deakin.edu.au>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" 2005-09-15, mtk, Created new page by splitting off from sigaction.2
-.\"
-.TH sigsuspend 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-sigsuspend, rt_sigsuspend \- wait for a signal
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <signal.h>
-.P
-.BI "int sigsuspend(const sigset_t *" mask );
-.fi
-.P
-.RS -4
-Feature Test Macro Requirements for glibc (see
-.BR feature_test_macros (7)):
-.RE
-.P
-.BR sigsuspend ():
-.nf
- _POSIX_C_SOURCE
-.fi
-.SH DESCRIPTION
-.BR sigsuspend ()
-temporarily replaces the signal mask of the calling thread with the
-mask given by
-.I mask
-and then suspends the thread until delivery of a signal whose
-action is to invoke a signal handler or to terminate a process.
-.P
-If the signal terminates the process, then
-.BR sigsuspend ()
-does not return.
-If the signal is caught, then
-.BR sigsuspend ()
-returns after the signal handler returns,
-and the signal mask is restored to the state before the call to
-.BR sigsuspend ().
-.P
-It is not possible to block
-.B SIGKILL
-or
-.BR SIGSTOP ;
-specifying these signals in
-.IR mask ,
-has no effect on the thread's signal mask.
-.SH RETURN VALUE
-.BR sigsuspend ()
-always returns \-1, with
-.I errno
-set to indicate the error (normally,
-.BR EINTR ).
-.SH ERRORS
-.TP
-.B EFAULT
-.I mask
-points to memory which is not a valid part of the process address space.
-.TP
-.B EINTR
-The call was interrupted by a signal;
-.BR signal (7).
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-POSIX.1-2001.
-.SS C library/kernel differences
-The original Linux system call was named
-.BR sigsuspend ().
-However, with the addition of real-time signals in Linux 2.2,
-the fixed-size, 32-bit
-.I sigset_t
-type supported by that system call was no longer fit for purpose.
-Consequently, a new system call,
-.BR rt_sigsuspend (),
-was added to support an enlarged
-.I sigset_t
-type.
-The new system call takes a second argument,
-.IR "size_t sigsetsize" ,
-which specifies the size in bytes of the signal set in
-.IR mask .
-This argument is currently required to have the value
-.I sizeof(sigset_t)
-(or the error
-.B EINVAL
-results).
-The glibc
-.BR sigsuspend ()
-wrapper function hides these details from us, transparently calling
-.BR rt_sigsuspend ()
-when the kernel provides it.
-.\"
-.SH NOTES
-Normally,
-.BR sigsuspend ()
-is used in conjunction with
-.BR sigprocmask (2)
-in order to prevent delivery of a signal during the execution of a
-critical code section.
-The caller first blocks the signals with
-.BR sigprocmask (2).
-When the critical code has completed, the caller then waits for the
-signals by calling
-.BR sigsuspend ()
-with the signal mask that was returned by
-.BR sigprocmask (2)
-(in the
-.I oldset
-argument).
-.P
-See
-.BR sigsetops (3)
-for details on manipulating signal sets.
-.SH SEE ALSO
-.BR kill (2),
-.BR pause (2),
-.BR sigaction (2),
-.BR signal (2),
-.BR sigprocmask (2),
-.BR sigwaitinfo (2),
-.BR sigsetops (3),
-.BR sigwait (3),
-.BR signal (7)
diff --git a/man2/sigtimedwait.2 b/man2/sigtimedwait.2
deleted file mode 100644
index 1b13df15d..000000000
--- a/man2/sigtimedwait.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/sigwaitinfo.2
diff --git a/man2/sigwaitinfo.2 b/man2/sigwaitinfo.2
deleted file mode 100644
index 5d302dfb5..000000000
--- a/man2/sigwaitinfo.2
+++ /dev/null
@@ -1,231 +0,0 @@
-.\" Copyright (c) 2002 Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.TH sigwaitinfo 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-sigwaitinfo, sigtimedwait, rt_sigtimedwait \- synchronously wait
-for queued signals
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <signal.h>
-.P
-.BI "int sigwaitinfo(const sigset_t *restrict " set ,
-.BI " siginfo_t *_Nullable restrict " info );
-.BI "int sigtimedwait(const sigset_t *restrict " set ,
-.BI " siginfo_t *_Nullable restrict " info ,
-.BI " const struct timespec *restrict " timeout );
-.fi
-.P
-.RS -4
-Feature Test Macro Requirements for glibc (see
-.BR feature_test_macros (7)):
-.RE
-.P
-.BR sigwaitinfo (),
-.BR sigtimedwait ():
-.nf
- _POSIX_C_SOURCE >= 199309L
-.fi
-.SH DESCRIPTION
-.BR sigwaitinfo ()
-suspends execution of the calling thread until one of the signals in
-.I set
-is pending
-(If one of the signals in
-.I set
-is already pending for the calling thread,
-.BR sigwaitinfo ()
-will return immediately.)
-.P
-.BR sigwaitinfo ()
-removes the signal from the set of pending
-signals and returns the signal number as its function result.
-If the
-.I info
-argument is not NULL,
-then the buffer that it points to is used to return a structure of type
-.I siginfo_t
-(see
-.BR sigaction (2))
-containing information about the signal.
-.P
-If multiple signals in
-.I set
-are pending for the caller, the signal that is retrieved by
-.BR sigwaitinfo ()
-is determined according to the usual ordering rules; see
-.BR signal (7)
-for further details.
-.P
-.BR sigtimedwait ()
-operates in exactly the same way as
-.BR sigwaitinfo ()
-except that it has an additional argument,
-.IR timeout ,
-which specifies the interval for which
-the thread is suspended waiting for a signal.
-(This interval will be rounded up to the system clock granularity,
-and kernel scheduling delays mean that the interval
-may overrun by a small amount.)
-This argument is a
-.BR timespec (3)
-structure.
-.P
-If both fields of this structure are specified as 0, a poll is performed:
-.BR sigtimedwait ()
-returns immediately, either with information about a signal that
-was pending for the caller, or with an error
-if none of the signals in
-.I set
-was pending.
-.SH RETURN VALUE
-On success, both
-.BR sigwaitinfo ()
-and
-.BR sigtimedwait ()
-return a signal number (i.e., a value greater than zero).
-On failure both calls return \-1, with
-.I errno
-set to indicate the error.
-.SH ERRORS
-.TP
-.B EAGAIN
-No signal in
-.I set
-became pending within the
-.I timeout
-period specified to
-.BR sigtimedwait ().
-.TP
-.B EINTR
-The wait was interrupted by a signal handler; see
-.BR signal (7).
-(This handler was for a signal other than one of those in
-.IR set .)
-.TP
-.B EINVAL
-.I timeout
-was invalid.
-.SH VERSIONS
-.SS C library/kernel differences
-On Linux,
-.BR sigwaitinfo ()
-is a library function implemented on top of
-.BR sigtimedwait ().
-.P
-The glibc wrapper functions for
-.BR sigwaitinfo ()
-and
-.BR sigtimedwait ()
-silently ignore attempts to wait for the two real-time signals that
-are used internally by the NPTL threading implementation.
-See
-.BR nptl (7)
-for details.
-.P
-The original Linux system call was named
-.BR sigtimedwait ().
-However, with the addition of real-time signals in Linux 2.2,
-the fixed-size, 32-bit
-.I sigset_t
-type supported by that system call was no longer fit for purpose.
-Consequently, a new system call,
-.BR rt_sigtimedwait (),
-was added to support an enlarged
-.I sigset_t
-type.
-The new system call takes a fourth argument,
-.IR "size_t sigsetsize" ,
-which specifies the size in bytes of the signal set in
-.IR set .
-This argument is currently required to have the value
-.I sizeof(sigset_t)
-(or the error
-.B EINVAL
-results).
-The glibc
-.BR sigtimedwait ()
-wrapper function hides these details from us, transparently calling
-.BR rt_sigtimedwait ()
-when the kernel provides it.
-.\"
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-POSIX.1-2001.
-.SH NOTES
-In normal usage, the calling program blocks the signals in
-.I set
-via a prior call to
-.BR sigprocmask (2)
-(so that the default disposition for these signals does not occur if they
-become pending between successive calls to
-.BR sigwaitinfo ()
-or
-.BR sigtimedwait ())
-and does not establish handlers for these signals.
-In a multithreaded program,
-the signal should be blocked in all threads, in order to prevent
-the signal being treated according to its default disposition in
-a thread other than the one calling
-.BR sigwaitinfo ()
-or
-.BR sigtimedwait ()).
-.P
-The set of signals that is pending for a given thread is the
-union of the set of signals that is pending specifically for that thread
-and the set of signals that is pending for the process as a whole (see
-.BR signal (7)).
-.P
-Attempts to wait for
-.B SIGKILL
-and
-.B SIGSTOP
-are silently ignored.
-.P
-If multiple threads of a process are blocked
-waiting for the same signal(s) in
-.BR sigwaitinfo ()
-or
-.BR sigtimedwait (),
-then exactly one of the threads will actually receive the
-signal if it becomes pending for the process as a whole;
-which of the threads receives the signal is indeterminate.
-.P
-.BR sigwaitinfo ()
-or
-.BR sigtimedwait (),
-can't be used to receive signals that
-are synchronously generated, such as the
-.B SIGSEGV
-signal that results from accessing an invalid memory address
-or the
-.B SIGFPE
-signal that results from an arithmetic error.
-Such signals can be caught only via signal handler.
-.P
-POSIX leaves the meaning of a NULL value for the
-.I timeout
-argument of
-.BR sigtimedwait ()
-unspecified, permitting the possibility that this has the same meaning
-as a call to
-.BR sigwaitinfo (),
-and indeed this is what is done on Linux.
-.SH SEE ALSO
-.BR kill (2),
-.BR sigaction (2),
-.BR signal (2),
-.BR signalfd (2),
-.BR sigpending (2),
-.BR sigprocmask (2),
-.BR sigqueue (3),
-.BR sigsetops (3),
-.BR sigwait (3),
-.BR timespec (3),
-.BR signal (7),
-.BR time (7)
diff --git a/man2/socket.2 b/man2/socket.2
deleted file mode 100644
index 4490f6c1a..000000000
--- a/man2/socket.2
+++ /dev/null
@@ -1,493 +0,0 @@
-'\" t
-.\" Copyright (c) 1983, 1991 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" SPDX-License-Identifier: BSD-4-Clause-UC
-.\"
-.\" $Id: socket.2,v 1.4 1999/05/13 11:33:42 freitag Exp $
-.\"
-.\" Modified 1993-07-24 by Rik Faith <faith@cs.unc.edu>
-.\" Modified 1996-10-22 by Eric S. Raymond <esr@thyrsus.com>
-.\" Modified 1998, 1999 by Andi Kleen <ak@muc.de>
-.\" Modified 2002-07-17 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Modified 2004-06-17 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.TH socket 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-socket \- create an endpoint for communication
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/socket.h>
-.P
-.BI "int socket(int " domain ", int " type ", int " protocol );
-.fi
-.SH DESCRIPTION
-.BR socket ()
-creates an endpoint for communication and returns a file descriptor
-that refers to that endpoint.
-The file descriptor returned by a successful call will be
-the lowest-numbered file descriptor not currently open for the process.
-.P
-The
-.I domain
-argument specifies a communication domain; this selects the protocol
-family which will be used for communication.
-These families are defined in
-.IR <sys/socket.h> .
-The formats currently understood by the Linux kernel include:
-.TS
-tab(:);
-l1 lw40 l.
-Name:Purpose:Man page
-T{
-.B AF_UNIX
-T}:T{
-Local communication
-T}:T{
-.BR unix (7)
-T}
-T{
-.B AF_LOCAL
-T}:T{
-Synonym for
-.B AF_UNIX
-T}:T{
-T}
-T{
-.B AF_INET
-T}:IPv4 Internet protocols:T{
-.BR ip (7)
-T}
-T{
-.B AF_AX25
-T}:T{
-Amateur radio AX.25 protocol
-T}:T{
-.\" Part of ax25-tools
-.BR ax25 (4)
-T}
-T{
-.B AF_IPX
-T}:IPX \- Novell protocols:
-T{
-.B AF_APPLETALK
-T}:AppleTalk:T{
-.BR ddp (7)
-T}
-T{
-.B AF_X25
-T}:ITU-T X.25 / ISO/IEC\~8208 protocol:T{
-.BR x25 (7)
-T}
-T{
-.B AF_INET6
-T}:IPv6 Internet protocols:T{
-.BR ipv6 (7)
-T}
-T{
-.B AF_DECnet
-T}:T{
-DECet protocol sockets
-T}
-T{
-.B AF_KEY
-T}:T{
-Key management protocol, originally developed for usage with IPsec
-T}
-T{
-.B AF_NETLINK
-T}:T{
-Kernel user interface device
-T}:T{
-.BR netlink (7)
-T}
-T{
-.B AF_PACKET
-T}:T{
-Low-level packet interface
-T}:T{
-.BR packet (7)
-T}
-T{
-.B AF_RDS
-T}:T{
-.\" commit: 639b321b4d8f4e412bfbb2a4a19bfebc1e68ace4
-Reliable Datagram Sockets (RDS) protocol
-T}:T{
-.\" rds-tools: https://github.com/oracle/rds-tools/blob/master/rds.7
-.\" rds-tools: https://github.com/oracle/rds-tools/blob/master/rds-rdma.7
-.BR rds (7)
-.br
-.BR rds\-rdma (7)
-T}
-T{
-.B AF_PPPOX
-T}:T{
-Generic PPP transport layer, for setting up L2 tunnels
-(L2TP and PPPoE)
-T}
-T{
-.B AF_LLC
-T}:T{
-.\" linux-history commit: 34beb106cde7da233d4df35dd3d6cf4fee937caa
-Logical link control (IEEE 802.2 LLC) protocol
-T}
-T{
-.B AF_IB
-T}:T{
-.\" commits: 8d36eb01da5d371f..ce117ffac2e93334
-InfiniBand native addressing
-T}
-T{
-.B AF_MPLS
-T}:T{
-.\" commits: 0189197f441602acdca3f97750d392a895b778fd
-Multiprotocol Label Switching
-T}
-T{
-.B AF_CAN
-T}:T{
-.\" commits: 8dbde28d9711475a..5423dd67bd0108a1
-Controller Area Network automotive bus protocol
-T}
-T{
-.B AF_TIPC
-T}:T{
-.\" commits: b97bf3fd8f6a16966d4f18983b2c40993ff937d4
-TIPC, "cluster domain sockets" protocol
-T}
-T{
-.B AF_BLUETOOTH
-T}:T{
-.\" commits: 8d36eb01da5d371f..ce117ffac2e93334
-Bluetooth low-level socket protocol
-T}
-T{
-.B AF_ALG
-T}:T{
-.\" commit: 03c8efc1ffeb6b82a22c1af8dd908af349563314
-Interface to kernel crypto API
-T}
-T{
-.B AF_VSOCK
-T}:T{
-.\" commit: d021c344051af91f42c5ba9fdedc176740cbd238
-VSOCK (originally "VMWare VSockets") protocol
-for hypervisor-guest communication
-T}:T{
-.BR vsock (7)
-T}
-T{
-.B AF_KCM
-T}:T{
-.\" commit: 03c8efc1ffeb6b82a22c1af8dd908af349563314
-KCM (kernel connection multiplexer) interface
-T}
-T{
-.B AF_XDP
-T}:T{
-.\" commit: c0c77d8fb787cfe0c3fca689c2a30d1dad4eaba7
-XDP (express data path) interface
-T}
-.TE
-.P
-Further details of the above address families,
-as well as information on several other address families, can be found in
-.BR address_families (7).
-.P
-The socket has the indicated
-.IR type ,
-which specifies the communication semantics.
-Currently defined types
-are:
-.TP 16
-.B SOCK_STREAM
-Provides sequenced, reliable, two-way, connection-based byte streams.
-An out-of-band data transmission mechanism may be supported.
-.TP
-.B SOCK_DGRAM
-Supports datagrams (connectionless, unreliable messages of a fixed
-maximum length).
-.TP
-.B SOCK_SEQPACKET
-Provides a sequenced, reliable, two-way connection-based data
-transmission path for datagrams of fixed maximum length; a consumer is
-required to read an entire packet with each input system call.
-.TP
-.B SOCK_RAW
-Provides raw network protocol access.
-.TP
-.B SOCK_RDM
-Provides a reliable datagram layer that does not guarantee ordering.
-.TP
-.B SOCK_PACKET
-Obsolete and should not be used in new programs;
-see
-.BR packet (7).
-.P
-Some socket types may not be implemented by all protocol families.
-.P
-Since Linux 2.6.27, the
-.I type
-argument serves a second purpose:
-in addition to specifying a socket type,
-it may include the bitwise OR of any of the following values,
-to modify the behavior of
-.BR socket ():
-.TP 16
-.B SOCK_NONBLOCK
-Set the
-.B O_NONBLOCK
-file status flag on the open file description (see
-.BR open (2))
-referred to by the new file descriptor.
-Using this flag saves extra calls to
-.BR fcntl (2)
-to achieve the same result.
-.TP
-.B SOCK_CLOEXEC
-Set the close-on-exec
-.RB ( FD_CLOEXEC )
-flag on the new file descriptor.
-See the description of the
-.B O_CLOEXEC
-flag in
-.BR open (2)
-for reasons why this may be useful.
-.P
-The
-.I protocol
-specifies a particular protocol to be used with the socket.
-Normally only a single protocol exists to support a particular
-socket type within a given protocol family, in which case
-.I protocol
-can be specified as 0.
-However, it is possible that many protocols may exist, in
-which case a particular protocol must be specified in this manner.
-The protocol number to use is specific to the \*(lqcommunication domain\*(rq
-in which communication is to take place; see
-.BR protocols (5).
-See
-.BR getprotoent (3)
-on how to map protocol name strings to protocol numbers.
-.P
-Sockets of type
-.B SOCK_STREAM
-are full-duplex byte streams.
-They do not preserve
-record boundaries.
-A stream socket must be in
-a
-.I connected
-state before any data may be sent or received on it.
-A connection to
-another socket is created with a
-.BR connect (2)
-call.
-Once connected, data may be transferred using
-.BR read (2)
-and
-.BR write (2)
-calls or some variant of the
-.BR send (2)
-and
-.BR recv (2)
-calls.
-When a session has been completed a
-.BR close (2)
-may be performed.
-Out-of-band data may also be transmitted as described in
-.BR send (2)
-and received as described in
-.BR recv (2).
-.P
-The communications protocols which implement a
-.B SOCK_STREAM
-ensure that data is not lost or duplicated.
-If a piece of data for which
-the peer protocol has buffer space cannot be successfully transmitted
-within a reasonable length of time, then the connection is considered
-to be dead.
-When
-.B SO_KEEPALIVE
-is enabled on the socket the protocol checks in a protocol-specific
-manner if the other end is still alive.
-A
-.B SIGPIPE
-signal is raised if a process sends or receives
-on a broken stream; this causes naive processes,
-which do not handle the signal, to exit.
-.B SOCK_SEQPACKET
-sockets employ the same system calls as
-.B SOCK_STREAM
-sockets.
-The only difference is that
-.BR read (2)
-calls will return only the amount of data requested,
-and any data remaining in the arriving packet will be discarded.
-Also all message boundaries in incoming datagrams are preserved.
-.P
-.B SOCK_DGRAM
-and
-.B SOCK_RAW
-sockets allow sending of datagrams to correspondents named in
-.BR sendto (2)
-calls.
-Datagrams are generally received with
-.BR recvfrom (2),
-which returns the next datagram along with the address of its sender.
-.P
-.B SOCK_PACKET
-is an obsolete socket type to receive raw packets directly from the
-device driver.
-Use
-.BR packet (7)
-instead.
-.P
-An
-.BR fcntl (2)
-.B F_SETOWN
-operation can be used to specify a process or process group to receive a
-.B SIGURG
-signal when the out-of-band data arrives or
-.B SIGPIPE
-signal when a
-.B SOCK_STREAM
-connection breaks unexpectedly.
-This operation may also be used to set the process or process group
-that receives the I/O and asynchronous notification of I/O events via
-.BR SIGIO .
-Using
-.B F_SETOWN
-is equivalent to an
-.BR ioctl (2)
-call with the
-.B FIOSETOWN
-or
-.B SIOCSPGRP
-argument.
-.P
-When the network signals an error condition to the protocol module (e.g.,
-using an ICMP message for IP) the pending error flag is set for the socket.
-The next operation on this socket will return the error code of the pending
-error.
-For some protocols it is possible to enable a per-socket error queue
-to retrieve detailed information about the error; see
-.B IP_RECVERR
-in
-.BR ip (7).
-.P
-The operation of sockets is controlled by socket level
-.IR options .
-These options are defined in
-.IR <sys/socket.h> .
-The functions
-.BR setsockopt (2)
-and
-.BR getsockopt (2)
-are used to set and get options.
-.SH RETURN VALUE
-On success, a file descriptor for the new socket is returned.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EACCES
-Permission to create a socket of the specified type and/or protocol
-is denied.
-.TP
-.B EAFNOSUPPORT
-The implementation does not support the specified address family.
-.TP
-.B EINVAL
-Unknown protocol, or protocol family not available.
-.TP
-.B EINVAL
-.\" Since Linux 2.6.27
-Invalid flags in
-.IR type .
-.TP
-.B EMFILE
-The per-process limit on the number of open file descriptors has been reached.
-.TP
-.B ENFILE
-The system-wide limit on the total number of open files has been reached.
-.TP
-.BR ENOBUFS " or " ENOMEM
-Insufficient memory is available.
-The socket cannot be
-created until sufficient resources are freed.
-.TP
-.B EPROTONOSUPPORT
-The protocol type or the specified protocol is not
-supported within this domain.
-.P
-Other errors may be generated by the underlying protocol modules.
-.SH STANDARDS
-POSIX.1-2008.
-.P
-.B SOCK_NONBLOCK
-and
-.B SOCK_CLOEXEC
-are Linux-specific.
-.SH HISTORY
-POSIX.1-2001, 4.4BSD.
-.P
-.BR socket ()
-appeared in 4.2BSD.
-It is generally portable to/from
-non-BSD systems supporting clones of the BSD socket layer (including
-System\ V variants).
-.P
-The manifest constants used under 4.x BSD for protocol families
-are
-.BR PF_UNIX ,
-.BR PF_INET ,
-and so on, while
-.BR AF_UNIX ,
-.BR AF_INET ,
-and so on are used for address
-families.
-However, already the BSD man page promises: "The protocol
-family generally is the same as the address family", and subsequent
-standards use AF_* everywhere.
-.SH EXAMPLES
-An example of the use of
-.BR socket ()
-is shown in
-.BR getaddrinfo (3).
-.SH SEE ALSO
-.BR accept (2),
-.BR bind (2),
-.BR close (2),
-.BR connect (2),
-.BR fcntl (2),
-.BR getpeername (2),
-.BR getsockname (2),
-.BR getsockopt (2),
-.BR ioctl (2),
-.BR listen (2),
-.BR read (2),
-.BR recv (2),
-.BR select (2),
-.BR send (2),
-.BR shutdown (2),
-.BR socketpair (2),
-.BR write (2),
-.BR getprotoent (3),
-.BR address_families (7),
-.BR ip (7),
-.BR socket (7),
-.BR tcp (7),
-.BR udp (7),
-.BR unix (7)
-.P
-\[lq]An Introductory 4.3BSD Interprocess Communication Tutorial\[rq]
-and
-\[lq]BSD Interprocess Communication Tutorial\[rq],
-reprinted in
-.I UNIX Programmer's Supplementary Documents Volume 1.
diff --git a/man2/socketcall.2 b/man2/socketcall.2
deleted file mode 100644
index 086ed6bdb..000000000
--- a/man2/socketcall.2
+++ /dev/null
@@ -1,185 +0,0 @@
-'\" t
-.\" Copyright (c) 1995 Michael Chastain (mec@shell.portal.com), 15 April 1995.
-.\"
-.\" SPDX-License-Identifier: GPL-2.0-or-later
-.\"
-.\" Modified Tue Oct 22 22:11:53 1996 by Eric S. Raymond <esr@thyrsus.com>
-.TH socketcall 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-socketcall \- socket system calls
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#include <linux/net.h>" " /* Definition of " SYS_* " constants */"
-.BR "#include <sys/syscall.h>" " /* Definition of " SYS_socketcall " */"
-.B #include <unistd.h>
-.P
-.BI "int syscall(SYS_socketcall, int " call ", unsigned long *" args );
-.fi
-.P
-.IR Note :
-glibc provides no wrapper for
-.BR socketcall (),
-necessitating the use of
-.BR syscall (2).
-.SH DESCRIPTION
-.BR socketcall ()
-is a common kernel entry point for the socket system calls.
-.I call
-determines which socket function to invoke.
-.I args
-points to a block containing the actual arguments,
-which are passed through to the appropriate call.
-.P
-User programs should call the appropriate functions by their usual names.
-Only standard library implementors and kernel hackers need to know about
-.BR socketcall ().
-.P
-.TS
-tab(:);
-l l.
-\fIcall\fR:Man page
-T{
-.B SYS_SOCKET
-T}:T{
-.BR socket (2)
-T}
-T{
-.B SYS_BIND
-T}:T{
-.BR bind (2)
-T}
-T{
-.B SYS_CONNECT
-T}:T{
-.BR connect (2)
-T}
-T{
-.B SYS_LISTEN
-T}:T{
-.BR listen (2)
-T}
-T{
-.B SYS_ACCEPT
-T}:T{
-.BR accept (2)
-T}
-T{
-.B SYS_GETSOCKNAME
-T}:T{
-.BR getsockname (2)
-T}
-T{
-.B SYS_GETPEERNAME
-T}:T{
-.BR getpeername (2)
-T}
-T{
-.B SYS_SOCKETPAIR
-T}:T{
-.BR socketpair (2)
-T}
-T{
-.B SYS_SEND
-T}:T{
-.BR send (2)
-T}
-T{
-.B SYS_RECV
-T}:T{
-.BR recv (2)
-T}
-T{
-.B SYS_SENDTO
-T}:T{
-.BR sendto (2)
-T}
-T{
-.B SYS_RECVFROM
-T}:T{
-.BR recvfrom (2)
-T}
-T{
-.B SYS_SHUTDOWN
-T}:T{
-.BR shutdown (2)
-T}
-T{
-.B SYS_SETSOCKOPT
-T}:T{
-.BR setsockopt (2)
-T}
-T{
-.B SYS_GETSOCKOPT
-T}:T{
-.BR getsockopt (2)
-T}
-T{
-.B SYS_SENDMSG
-T}:T{
-.BR sendmsg (2)
-T}
-T{
-.B SYS_RECVMSG
-T}:T{
-.BR recvmsg (2)
-T}
-T{
-.B SYS_ACCEPT4
-T}:T{
-.BR accept4 (2)
-T}
-T{
-.B SYS_RECVMMSG
-T}:T{
-.BR recvmmsg (2)
-T}
-T{
-.B SYS_SENDMMSG
-T}:T{
-.BR sendmmsg (2)
-T}
-.TE
-.SH VERSIONS
-On some architectures\[em]for example, x86-64 and ARM\[em]there is no
-.BR socketcall ()
-system call; instead
-.BR socket (2),
-.BR accept (2),
-.BR bind (2),
-and so on really are implemented as separate system calls.
-.SH STANDARDS
-Linux.
-.P
-On x86-32,
-.BR socketcall ()
-was historically the only entry point for the sockets API.
-However, starting in Linux 4.3,
-.\" commit 9dea5dc921b5f4045a18c63eb92e84dc274d17eb
-direct system calls are provided on x86-32 for the sockets API.
-This facilitates the creation of
-.BR seccomp (2)
-filters that filter sockets system calls
-(for new user-space binaries that are compiled
-to use the new entry points)
-and also provides a (very) small performance improvement.
-.SH SEE ALSO
-.BR accept (2),
-.BR bind (2),
-.BR connect (2),
-.BR getpeername (2),
-.BR getsockname (2),
-.BR getsockopt (2),
-.BR listen (2),
-.BR recv (2),
-.BR recvfrom (2),
-.BR recvmsg (2),
-.BR send (2),
-.BR sendmsg (2),
-.BR sendto (2),
-.BR setsockopt (2),
-.BR shutdown (2),
-.BR socket (2),
-.BR socketpair (2)
diff --git a/man2/socketpair.2 b/man2/socketpair.2
deleted file mode 100644
index 17d3bb35c..000000000
--- a/man2/socketpair.2
+++ /dev/null
@@ -1,116 +0,0 @@
-.\" Copyright (c) 1983, 1991 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" SPDX-License-Identifier: BSD-4-Clause-UC
-.\"
-.\" @(#)socketpair.2 6.4 (Berkeley) 3/10/91
-.\"
-.\" Modified 1993-07-24 by Rik Faith <faith@cs.unc.edu>
-.\" Modified 1996-10-22 by Eric S. Raymond <esr@thyrsus.com>
-.\" Modified 2002-07-22 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Modified 2004-06-17 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\" 2008-10-11, mtk: Add description of SOCK_NONBLOCK and SOCK_CLOEXEC
-.\"
-.TH socketpair 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-socketpair \- create a pair of connected sockets
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/socket.h>
-.P
-.BI "int socketpair(int " domain ", int " type ", int " protocol \
-", int " sv [2]);
-.fi
-.SH DESCRIPTION
-The
-.BR socketpair ()
-call creates an unnamed pair of connected sockets in the specified
-.IR domain ,
-of the specified
-.IR type ,
-and using the optionally specified
-.IR protocol .
-For further details of these arguments, see
-.BR socket (2).
-.P
-The file descriptors used in referencing the new sockets are returned in
-.I sv[0]
-and
-.IR sv[1] .
-The two sockets are indistinguishable.
-.SH RETURN VALUE
-On success, zero is returned.
-On error, \-1 is returned,
-.I errno
-is set to indicate the error, and
-.I sv
-is left unchanged
-.P
-On Linux (and other systems),
-.BR socketpair ()
-does not modify
-.I sv
-on failure.
-A requirement standardizing this behavior was added in POSIX.1-2008 TC2.
-.\" http://austingroupbugs.net/view.php?id=483
-.SH ERRORS
-.TP
-.B EAFNOSUPPORT
-The specified address family is not supported on this machine.
-.TP
-.B EFAULT
-The address
-.I sv
-does not specify a valid part of the process address space.
-.TP
-.B EMFILE
-The per-process limit on the number of open file descriptors has been reached.
-.TP
-.B ENFILE
-The system-wide limit on the total number of open files has been reached.
-.TP
-.B EOPNOTSUPP
-The specified protocol does not support creation of socket pairs.
-.TP
-.B EPROTONOSUPPORT
-The specified protocol is not supported on this machine.
-.SH VERSIONS
-On Linux, the only supported domains for this call are
-.B AF_UNIX
-(or synonymously,
-.BR AF_LOCAL )
-and
-.B AF_TIPC
-.\" commit: 70b03759e9ecfae400605fa34f3d7154cccbbba3
-(since Linux 4.12).
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-POSIX.1-2001, 4.4BSD.
-.P
-.BR socketpair ()
-first appeared in 4.2BSD.
-It is generally portable to/from
-non-BSD systems supporting clones of the BSD socket layer (including
-System\ V variants).
-.P
-Since Linux 2.6.27,
-.BR socketpair ()
-supports the
-.B SOCK_NONBLOCK
-and
-.B SOCK_CLOEXEC
-flags in the
-.I type
-argument, as described in
-.BR socket (2).
-.SH SEE ALSO
-.BR pipe (2),
-.BR read (2),
-.BR socket (2),
-.BR write (2),
-.BR socket (7),
-.BR unix (7)
diff --git a/man2/splice.2 b/man2/splice.2
deleted file mode 100644
index e5d05a05c..000000000
--- a/man2/splice.2
+++ /dev/null
@@ -1,266 +0,0 @@
-.\" This manpage is Copyright (C) 2006 Jens Axboe
-.\" and Copyright (C) 2006 Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.TH splice 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-splice \- splice data to/from a pipe
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#define _GNU_SOURCE" " /* See feature_test_macros(7) */"
-.B #define _FILE_OFFSET_BITS 64
-.B #include <fcntl.h>
-.P
-.BI "ssize_t splice(int " fd_in ", off_t *_Nullable " off_in ,
-.BI " int " fd_out ", off_t *_Nullable " off_out ,
-.BI " size_t " len ", unsigned int " flags );
-.\" Return type was long before glibc 2.7
-.fi
-.SH DESCRIPTION
-.BR splice ()
-moves data between two file descriptors
-without copying between kernel address space and user address space.
-It transfers up to
-.I len
-bytes of data from the file descriptor
-.I fd_in
-to the file descriptor
-.IR fd_out ,
-where one of the file descriptors must refer to a pipe.
-.P
-The following semantics apply for
-.I fd_in
-and
-.IR off_in :
-.IP \[bu] 3
-If
-.I fd_in
-refers to a pipe, then
-.I off_in
-must be NULL.
-.IP \[bu]
-If
-.I fd_in
-does not refer to a pipe and
-.I off_in
-is NULL, then bytes are read from
-.I fd_in
-starting from the file offset,
-and the file offset is adjusted appropriately.
-.IP \[bu]
-If
-.I fd_in
-does not refer to a pipe and
-.I off_in
-is not NULL, then
-.I off_in
-must point to a buffer which specifies the starting
-offset from which bytes will be read from
-.IR fd_in ;
-in this case, the file offset of
-.I fd_in
-is not changed.
-.P
-Analogous statements apply for
-.I fd_out
-and
-.IR off_out .
-.P
-The
-.I flags
-argument is a bit mask that is composed by ORing together
-zero or more of the following values:
-.TP
-.B SPLICE_F_MOVE
-Attempt to move pages instead of copying.
-This is only a hint to the kernel:
-pages may still be copied if the kernel cannot move the
-pages from the pipe, or if
-the pipe buffers don't refer to full pages.
-The initial implementation of this flag was buggy:
-therefore starting in Linux 2.6.21 it is a no-op
-(but is still permitted in a
-.BR splice ()
-call);
-in the future, a correct implementation may be restored.
-.TP
-.B SPLICE_F_NONBLOCK
-Do not block on I/O.
-This makes the splice pipe operations nonblocking, but
-.BR splice ()
-may nevertheless block because the file descriptors that
-are spliced to/from may block (unless they have the
-.B O_NONBLOCK
-flag set).
-.TP
-.B SPLICE_F_MORE
-More data will be coming in a subsequent splice.
-This is a helpful hint when
-the
-.I fd_out
-refers to a socket (see also the description of
-.B MSG_MORE
-in
-.BR send (2),
-and the description of
-.B TCP_CORK
-in
-.BR tcp (7)).
-.TP
-.B SPLICE_F_GIFT
-Unused for
-.BR splice ();
-see
-.BR vmsplice (2).
-.SH RETURN VALUE
-Upon successful completion,
-.BR splice ()
-returns the number of bytes
-spliced to or from the pipe.
-.P
-A return value of 0 means end of input.
-If
-.I fd_in
-refers to a pipe, then this means that there was no data to transfer,
-and it would not make sense to block because there are no writers
-connected to the write end of the pipe.
-.P
-On error,
-.BR splice ()
-returns \-1 and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EAGAIN
-.B SPLICE_F_NONBLOCK
-was specified in
-.I flags
-or one of the file descriptors had been marked as nonblocking
-.RB ( O_NONBLOCK ) ,
-and the operation would block.
-.TP
-.B EBADF
-One or both file descriptors are not valid,
-or do not have proper read-write mode.
-.TP
-.B EINVAL
-The target filesystem doesn't support splicing.
-.TP
-.B EINVAL
-The target file is opened in append mode.
-.\" The append-mode error is given since Linux 2.6.27; in earlier kernels,
-.\" splice() in append mode was broken
-.TP
-.B EINVAL
-Neither of the file descriptors refers to a pipe.
-.TP
-.B EINVAL
-An offset was given for nonseekable device (e.g., a pipe).
-.TP
-.B EINVAL
-.I fd_in
-and
-.I fd_out
-refer to the same pipe.
-.TP
-.B ENOMEM
-Out of memory.
-.TP
-.B ESPIPE
-Either
-.I off_in
-or
-.I off_out
-was not NULL, but the corresponding file descriptor refers to a pipe.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 2.6.17,
-glibc 2.5.
-.P
-In Linux 2.6.30 and earlier,
-exactly one of
-.I fd_in
-and
-.I fd_out
-was required to be a pipe.
-Since Linux 2.6.31,
-.\" commit 7c77f0b3f9208c339a4b40737bb2cb0f0319bb8d
-both arguments may refer to pipes.
-.SH NOTES
-The three system calls
-.BR splice (),
-.BR vmsplice (2),
-and
-.BR tee (2),
-provide user-space programs with full control over an arbitrary
-kernel buffer, implemented within the kernel using the same type
-of buffer that is used for a pipe.
-In overview, these system calls perform the following tasks:
-.TP
-.BR splice ()
-moves data from the buffer to an arbitrary file descriptor, or vice versa,
-or from one buffer to another.
-.TP
-.BR tee (2)
-"copies" the data from one buffer to another.
-.TP
-.BR vmsplice (2)
-"copies" data from user space into the buffer.
-.P
-Though we talk of copying, actual copies are generally avoided.
-The kernel does this by implementing a pipe buffer as a set
-of reference-counted pointers to pages of kernel memory.
-The kernel creates "copies" of pages in a buffer by creating new
-pointers (for the output buffer) referring to the pages,
-and increasing the reference counts for the pages:
-only pointers are copied, not the pages of the buffer.
-.\"
-.\" Linus: Now, imagine using the above in a media server, for example.
-.\" Let's say that a year or two has passed, so that the video drivers
-.\" have been updated to be able to do the splice thing, and what can
-.\" you do? You can:
-.\"
-.\" - splice from the (mpeg or whatever - let's just assume that the video
-.\" input is either digital or does the encoding on its own - like they
-.\" pretty much all do) video input into a pipe (remember: no copies - the
-.\" video input will just DMA directly into memory, and splice will just
-.\" set up the pages in the pipe buffer)
-.\" - tee that pipe to split it up
-.\" - splice one end to a file (ie "save the compressed stream to disk")
-.\" - splice the other end to a real-time video decoder window for your
-.\" real-time viewing pleasure.
-.\"
-.\" Linus: Now, the advantage of splice()/tee() is that you can
-.\" do zero-copy movement of data, and unlike sendfile() you can
-.\" do it on _arbitrary_ data (and, as shown by "tee()", it's more
-.\" than just sending the data to somebody else: you can duplicate
-.\" the data and choose to forward it to two or more different
-.\" users - for things like logging etc.).
-.\"
-.P
-.B _FILE_OFFSET_BITS
-should be defined to be 64 in code that uses non-null
-.I off_in
-or
-.I off_out
-or that takes the address of
-.BR splice ,
-if the code is intended to be portable
-to traditional 32-bit x86 and ARM platforms where
-.BR off_t 's
-width defaults to 32 bits.
-.SH EXAMPLES
-See
-.BR tee (2).
-.SH SEE ALSO
-.BR copy_file_range (2),
-.BR sendfile (2),
-.BR tee (2),
-.BR vmsplice (2),
-.BR pipe (7)
diff --git a/man2/spu_create.2 b/man2/spu_create.2
deleted file mode 100644
index 9f137a047..000000000
--- a/man2/spu_create.2
+++ /dev/null
@@ -1,276 +0,0 @@
-.\" Copyright (c) International Business Machines Corp., 2006
-.\"
-.\" SPDX-License-Identifier: GPL-2.0-or-later
-.\"
-.\" HISTORY:
-.\" 2005-09-28, created by Arnd Bergmann <arndb@de.ibm.com>
-.\" 2006-06-16, revised by Eduardo M. Fleury <efleury@br.ibm.com>
-.\" 2007-07-10, some polishing by mtk
-.\" 2007-09-28, updates for newer kernels by Jeremy Kerr <jk@ozlabs.org>
-.\"
-.TH spu_create 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-spu_create \- create a new spu context
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#include <sys/spu.h>" " /* Definition of " SPU_* " constants */"
-.BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */"
-.B #include <unistd.h>
-.P
-.BI "int syscall(SYS_spu_create, const char *" pathname \
-", unsigned int " flags ,
-.BI " mode_t " mode ", int " neighbor_fd );
-.fi
-.P
-.IR Note :
-glibc provides no wrapper for
-.BR spu_create (),
-necessitating the use of
-.BR syscall (2).
-.SH DESCRIPTION
-The
-.BR spu_create ()
-system call is used on PowerPC machines that implement the
-Cell Broadband Engine Architecture in order to access Synergistic
-Processor Units (SPUs).
-It creates a new logical context for an SPU in
-.I pathname
-and returns a file descriptor associated with it.
-.I pathname
-must refer to a nonexistent directory in the mount point of
-the SPU filesystem
-.RB ( spufs ).
-If
-.BR spu_create ()
-is successful, a directory is created at
-.I pathname
-and it is populated with the files described in
-.BR spufs (7).
-.P
-When a context is created,
-the returned file descriptor can only be passed to
-.BR spu_run (2),
-used as the
-.I dirfd
-argument to the
-.B *at
-family of system calls (e.g.,
-.BR openat (2)),
-or closed;
-other operations are not defined.
-A logical SPU
-context is destroyed (along with all files created within the context's
-.I pathname
-directory) once the last reference to the context has gone;
-this usually occurs when the file descriptor returned by
-.BR spu_create ()
-is closed.
-.P
-The
-.I mode
-argument (minus any bits set in the process's
-.BR umask (2))
-specifies the permissions used for creating the new directory in
-.BR spufs .
-See
-.BR stat (2)
-for a full list of the possible
-.I mode
-values.
-.P
-The
-.I neighbor_fd
-is used only when the
-.B SPU_CREATE_AFFINITY_SPU
-flag is specified; see below.
-.P
-The
-.I flags
-argument can be zero or any bitwise OR-ed
-combination of the following constants:
-.TP
-.B SPU_CREATE_EVENTS_ENABLED
-Rather than using signals for reporting DMA errors, use the
-.I event
-argument to
-.BR spu_run (2).
-.TP
-.B SPU_CREATE_GANG
-Create an SPU gang instead of a context.
-(A gang is a group of SPU contexts that are
-functionally related to each other and which share common scheduling
-parameters\[em]priority and policy.
-In the future, gang scheduling may be implemented causing
-the group to be switched in and out as a single unit.)
-.IP
-A new directory will be created at the location specified by the
-.I pathname
-argument.
-This gang may be used to hold other SPU contexts, by providing
-a pathname that is within the gang directory to further calls to
-.BR spu_create ().
-.TP
-.B SPU_CREATE_NOSCHED
-Create a context that is not affected by the SPU scheduler.
-Once the context is run,
-it will not be scheduled out until it is destroyed by
-the creating process.
-.IP
-Because the context cannot be removed from the SPU, some functionality
-is disabled for
-.B SPU_CREATE_NOSCHED
-contexts.
-Only a subset of the files will be
-available in this context directory in
-.BR spufs .
-Additionally,
-.B SPU_CREATE_NOSCHED
-contexts cannot dump a core file when crashing.
-.IP
-Creating
-.B SPU_CREATE_NOSCHED
-contexts requires the
-.B CAP_SYS_NICE
-capability.
-.TP
-.B SPU_CREATE_ISOLATE
-Create an isolated SPU context.
-Isolated contexts are protected from some
-PPE (PowerPC Processing Element)
-operations,
-such as access to the SPU local store and the NPC register.
-.IP
-Creating
-.B SPU_CREATE_ISOLATE
-contexts also requires the
-.B SPU_CREATE_NOSCHED
-flag.
-.TP
-.BR SPU_CREATE_AFFINITY_SPU " (since Linux 2.6.23)"
-.\" commit 8e68e2f248332a9c3fd4f08258f488c209bd3e0c
-Create a context with affinity to another SPU context.
-This affinity information is used within the SPU scheduling algorithm.
-Using this flag requires that a file descriptor referring to
-the other SPU context be passed in the
-.I neighbor_fd
-argument.
-.TP
-.BR SPU_CREATE_AFFINITY_MEM " (since Linux 2.6.23)"
-.\" commit 8e68e2f248332a9c3fd4f08258f488c209bd3e0c
-Create a context with affinity to system memory.
-This affinity information
-is used within the SPU scheduling algorithm.
-.SH RETURN VALUE
-On success,
-.BR spu_create ()
-returns a new file descriptor.
-On failure, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EACCES
-The current user does not have write access to the
-.BR spufs (7)
-mount point.
-.TP
-.B EEXIST
-An SPU context already exists at the given pathname.
-.TP
-.B EFAULT
-.I pathname
-is not a valid string pointer in the
-calling process's address space.
-.TP
-.B EINVAL
-.I pathname
-is not a directory in the
-.BR spufs (7)
-mount point, or invalid flags have been provided.
-.TP
-.B ELOOP
-Too many symbolic links were found while resolving
-.IR pathname .
-.TP
-.B EMFILE
-The per-process limit on the number of open file descriptors has been reached.
-.TP
-.B ENAMETOOLONG
-.I pathname
-is too long.
-.TP
-.B ENFILE
-The system-wide limit on the total number of open files has been reached.
-.TP
-.B ENODEV
-An isolated context was requested, but the hardware does not support
-SPU isolation.
-.TP
-.B ENOENT
-Part of
-.I pathname
-could not be resolved.
-.TP
-.B ENOMEM
-The kernel could not allocate all resources required.
-.TP
-.B ENOSPC
-There are not enough SPU resources available to create
-a new context or the user-specific limit for the number
-of SPU contexts has been reached.
-.TP
-.B ENOSYS
-The functionality is not provided by the current system, because
-either the hardware does not provide SPUs or the spufs module is not
-loaded.
-.TP
-.B ENOTDIR
-A part of
-.I pathname
-is not a directory.
-.TP
-.B EPERM
-The
-.B SPU_CREATE_NOSCHED
-flag has been given, but the user does not have the
-.B CAP_SYS_NICE
-capability.
-.SH FILES
-.I pathname
-must point to a location beneath the mount point of
-.BR spufs .
-By convention, it gets mounted in
-.IR /spu .
-.SH STANDARDS
-Linux on PowerPC.
-.SH HISTORY
-Linux 2.6.16.
-.P
-Prior to the addition of the
-.B SPU_CREATE_AFFINITY_SPU
-flag in Linux 2.6.23, the
-.BR spu_create ()
-system call took only three arguments (i.e., there was no
-.I neighbor_fd
-argument).
-.SH NOTES
-.BR spu_create ()
-is meant to be used from libraries that implement a more abstract
-interface to SPUs, not to be used from regular applications.
-See
-.UR http://www.bsc.es\:/projects\:/deepcomputing\:/linuxoncell/
-.UE
-for the recommended libraries.
-.SH EXAMPLES
-See
-.BR spu_run (2)
-for an example of the use of
-.BR spu_create ()
-.SH SEE ALSO
-.BR close (2),
-.BR spu_run (2),
-.BR capabilities (7),
-.BR spufs (7)
diff --git a/man2/spu_run.2 b/man2/spu_run.2
deleted file mode 100644
index d96592726..000000000
--- a/man2/spu_run.2
+++ /dev/null
@@ -1,260 +0,0 @@
-.\" Copyright (c) International Business Machines Corp., 2006
-.\"
-.\" SPDX-License-Identifier: GPL-2.0-or-later
-.\"
-.\" HISTORY:
-.\" 2005-09-28, created by Arnd Bergmann <arndb@de.ibm.com>
-.\" 2006-06-16, revised by Eduardo M. Fleury <efleury@br.ibm.com>
-.\" 2007-07-10, some polishing by mtk
-.\" 2007-09-28, updates for newer kernels, added example
-.\" by Jeremy Kerr <jk@ozlabs.org>
-.\"
-.TH spu_run 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-spu_run \- execute an SPU context
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#include <sys/spu.h>" " /* Definition of " SPU_* " constants */"
-.BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */"
-.B #include <unistd.h>
-.P
-.BI "int syscall(SYS_spu_run, int " fd ", uint32_t *" npc \
-", uint32_t *" event );
-.fi
-.P
-.IR Note :
-glibc provides no wrapper for
-.BR spu_run (),
-necessitating the use of
-.BR syscall (2).
-.SH DESCRIPTION
-The
-.BR spu_run ()
-system call is used on PowerPC machines that implement the
-Cell Broadband Engine Architecture in order to access Synergistic
-Processor Units (SPUs).
-The
-.I fd
-argument is a file descriptor returned by
-.BR spu_create (2)
-that refers to a specific SPU context.
-When the context gets scheduled to a physical SPU,
-it starts execution at the instruction pointer passed in
-.IR npc .
-.P
-Execution of SPU code happens synchronously, meaning that
-.BR spu_run ()
-blocks while the SPU is still running.
-If there is a need
-to execute SPU code in parallel with other code on either the
-main CPU or other SPUs, a new thread of execution must be created
-first (e.g., using
-.BR pthread_create (3)).
-.P
-When
-.BR spu_run ()
-returns, the current value of the SPU program counter is written to
-.IR npc ,
-so successive calls to
-.BR spu_run ()
-can use the same
-.I npc
-pointer.
-.P
-The
-.I event
-argument provides a buffer for an extended status code.
-If the SPU
-context was created with the
-.B SPU_CREATE_EVENTS_ENABLED
-flag, then this buffer is populated by the Linux kernel before
-.BR spu_run ()
-returns.
-.P
-The status code may be one (or more) of the following constants:
-.TP
-.B SPE_EVENT_DMA_ALIGNMENT
-A DMA alignment error occurred.
-.TP
-.B SPE_EVENT_INVALID_DMA
-An invalid MFC DMA command was attempted.
-.\" SPE_EVENT_SPE_DATA_SEGMENT is defined, but does not seem to be generated
-.\" at any point (in Linux 5.9 sources).
-.TP
-.B SPE_EVENT_SPE_DATA_STORAGE
-A DMA storage error occurred.
-.TP
-.B SPE_EVENT_SPE_ERROR
-An illegal instruction was executed.
-.P
-NULL
-is a valid value for the
-.I event
-argument.
-In this case, the events will not be reported to the calling process.
-.SH RETURN VALUE
-On success,
-.BR spu_run ()
-returns the value of the
-.I spu_status
-register.
-On failure, it returns \-1 and sets
-.I errno
-is set to indicate the error.
-.P
-The
-.I spu_status
-register value is a bit mask of status codes and
-optionally a 14-bit code returned from the
-.B stop-and-signal
-instruction on the SPU.
-The bit masks for the status codes
-are:
-.TP
-.B 0x02
-SPU was stopped by a
-.B stop-and-signal
-instruction.
-.TP
-.B 0x04
-SPU was stopped by a
-.B halt
-instruction.
-.TP
-.B 0x08
-SPU is waiting for a channel.
-.TP
-.B 0x10
-SPU is in single-step mode.
-.TP
-.B 0x20
-SPU has tried to execute an invalid instruction.
-.TP
-.B 0x40
-SPU has tried to access an invalid channel.
-.TP
-.B 0x3fff0000
-The bits masked with this value contain the code returned from a
-.B stop-and-signal
-instruction.
-These bits are valid only if the 0x02 bit is set.
-.P
-If
-.BR spu_run ()
-has not returned an error, one or more bits among the lower eight
-ones are always set.
-.SH ERRORS
-.TP
-.B EBADF
-.I fd
-is not a valid file descriptor.
-.TP
-.B EFAULT
-.I npc
-is not a valid pointer, or
-.I event
-is non-NULL and an invalid pointer.
-.TP
-.B EINTR
-A signal occurred while
-.BR spu_run ()
-was in progress; see
-.BR signal (7).
-The
-.I npc
-value has been updated to the new program counter value if
-necessary.
-.TP
-.B EINVAL
-.I fd
-is not a valid file descriptor returned from
-.BR spu_create (2).
-.TP
-.B ENOMEM
-There was not enough memory available to handle a page fault
-resulting from a Memory Flow Controller (MFC) direct memory access.
-.TP
-.B ENOSYS
-The functionality is not provided by the current system, because
-either the hardware does not provide SPUs or the spufs module is not
-loaded.
-.SH STANDARDS
-Linux on PowerPC.
-.SH HISTORY
-Linux 2.6.16.
-.SH NOTES
-.BR spu_run ()
-is meant to be used from libraries that implement a more abstract
-interface to SPUs, not to be used from regular applications.
-See
-.UR http://www.bsc.es\:/projects\:/deepcomputing\:/linuxoncell/
-.UE
-for the recommended libraries.
-.SH EXAMPLES
-The following is an example of running a simple, one-instruction SPU
-program with the
-.BR spu_run ()
-system call.
-.P
-.\" SRC BEGIN (spu_run.c)
-.EX
-#include <err.h>
-#include <fcntl.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/types.h>
-#include <unistd.h>
-\&
-int main(void)
-{
- int context, fd, spu_status;
- uint32_t instruction, npc;
-\&
- context = syscall(SYS_spu_create, "/spu/example\-context", 0, 0755);
- if (context == \-1)
- err(EXIT_FAILURE, "spu_create");
-\&
- /*
- * Write a \[aq]stop 0x1234\[aq] instruction to the SPU\[aq]s
- * local store memory.
- */
- instruction = 0x00001234;
-\&
- fd = open("/spu/example\-context/mem", O_RDWR);
- if (fd == \-1)
- err(EXIT_FAILURE, "open");
- write(fd, &instruction, sizeof(instruction));
-\&
- /*
- * set npc to the starting instruction address of the
- * SPU program. Since we wrote the instruction at the
- * start of the mem file, the entry point will be 0x0.
- */
- npc = 0;
-\&
- spu_status = syscall(SYS_spu_run, context, &npc, NULL);
- if (spu_status == \-1)
- err(EXIT_FAILURE, "open");
-\&
- /*
- * We should see a status code of 0x12340002:
- * 0x00000002 (spu was stopped due to stop\-and\-signal)
- * | 0x12340000 (the stop\-and\-signal code)
- */
- printf("SPU Status: %#08x\en", spu_status);
-\&
- exit(EXIT_SUCCESS);
-}
-.EE
-.\" SRC END
-.\" .SH AUTHORS
-.\" Arnd Bergmann <arndb@de.ibm.com>, Jeremy Kerr <jk@ozlabs.org>
-.SH SEE ALSO
-.BR close (2),
-.BR spu_create (2),
-.BR capabilities (7),
-.BR spufs (7)
diff --git a/man2/ssetmask.2 b/man2/ssetmask.2
deleted file mode 100644
index a7f99d206..000000000
--- a/man2/ssetmask.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/sgetmask.2
diff --git a/man2/stat.2 b/man2/stat.2
deleted file mode 100644
index e15269a70..000000000
--- a/man2/stat.2
+++ /dev/null
@@ -1,539 +0,0 @@
-.\" Copyright (c) 1992 Drew Eckhardt (drew@cs.colorado.edu), March 28, 1992
-.\" Parts Copyright (c) 1995 Nicolai Langfeldt (janl@ifi.uio.no), 1/1/95
-.\" and Copyright (c) 2006, 2007, 2014 Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Modified by Michael Haardt <michael@moria.de>
-.\" Modified 1993-07-24 by Rik Faith <faith@cs.unc.edu>
-.\" Modified 1995-05-18 by Todd Larason <jtl@molehill.org>
-.\" Modified 1997-01-31 by Eric S. Raymond <esr@thyrsus.com>
-.\" Modified 1995-01-09 by Richard Kettlewell <richard@greenend.org.uk>
-.\" Modified 1998-05-13 by Michael Haardt <michael@cantor.informatik.rwth-aachen.de>
-.\" Modified 1999-07-06 by aeb & Albert Cahalan
-.\" Modified 2000-01-07 by aeb
-.\" Modified 2004-06-23 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\" 2007-06-08 mtk: Added example program
-.\" 2007-07-05 mtk: Added details on underlying system call interfaces
-.\"
-.TH stat 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-stat, fstat, lstat, fstatat \- get file status
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/stat.h>
-.P
-.BI "int stat(const char *restrict " pathname ,
-.BI " struct stat *restrict " statbuf );
-.BI "int fstat(int " fd ", struct stat *" statbuf );
-.BI "int lstat(const char *restrict " pathname ,
-.BI " struct stat *restrict " statbuf );
-.P
-.BR "#include <fcntl.h> " "/* Definition of " AT_* " constants */"
-.B #include <sys/stat.h>
-.P
-.BI "int fstatat(int " dirfd ", const char *restrict " pathname ,
-.BI " struct stat *restrict " statbuf ", int " flags );
-.fi
-.P
-.RS -4
-Feature Test Macro Requirements for glibc (see
-.BR feature_test_macros (7)):
-.RE
-.P
-.BR lstat ():
-.nf
- /* Since glibc 2.20 */ _DEFAULT_SOURCE
- || _XOPEN_SOURCE >= 500
-.\" _XOPEN_SOURCE && _XOPEN_SOURCE_EXTENDED
- || /* Since glibc 2.10: */ _POSIX_C_SOURCE >= 200112L
- || /* glibc 2.19 and earlier */ _BSD_SOURCE
-.fi
-.P
-.BR fstatat ():
-.nf
- Since glibc 2.10:
- _POSIX_C_SOURCE >= 200809L
- Before glibc 2.10:
- _ATFILE_SOURCE
-.fi
-.SH DESCRIPTION
-These functions return information about a file, in the buffer pointed to by
-.IR statbuf .
-No permissions are required on the file itself, but\[em]in the case of
-.BR stat (),
-.BR fstatat (),
-and
-.BR lstat ()\[em]execute
-(search) permission is required on all of the directories in
-.I pathname
-that lead to the file.
-.P
-.BR stat ()
-and
-.BR fstatat ()
-retrieve information about the file pointed to by
-.IR pathname ;
-the differences for
-.BR fstatat ()
-are described below.
-.P
-.BR lstat ()
-is identical to
-.BR stat (),
-except that if
-.I pathname
-is a symbolic link, then it returns information about the link itself,
-not the file that the link refers to.
-.P
-.BR fstat ()
-is identical to
-.BR stat (),
-except that the file about which information is to be retrieved
-is specified by the file descriptor
-.IR fd .
-.\"
-.SS The stat structure
-All of these system calls return a
-.I stat
-structure (see
-.BR stat (3type)).
-.P
-.\" Background: inode attributes are modified with i_mutex held, but
-.\" read by stat() without taking the mutex.
-.IR Note :
-for performance and simplicity reasons, different fields in the
-.I stat
-structure may contain state information from different moments
-during the execution of the system call.
-For example, if
-.I st_mode
-or
-.I st_uid
-is changed by another process by calling
-.BR chmod (2)
-or
-.BR chown (2),
-.BR stat ()
-might return the old
-.I st_mode
-together with the new
-.IR st_uid ,
-or the old
-.I st_uid
-together with the new
-.IR st_mode .
-.SS fstatat()
-The
-.BR fstatat ()
-system call is a more general interface for accessing file information
-which can still provide exactly the behavior of each of
-.BR stat (),
-.BR lstat (),
-and
-.BR fstat ().
-.P
-If the pathname given in
-.I pathname
-is relative, then it is interpreted relative to the directory
-referred to by the file descriptor
-.I dirfd
-(rather than relative to the current working directory of
-the calling process, as is done by
-.BR stat ()
-and
-.BR lstat ()
-for a relative pathname).
-.P
-If
-.I pathname
-is relative and
-.I dirfd
-is the special value
-.BR AT_FDCWD ,
-then
-.I pathname
-is interpreted relative to the current working
-directory of the calling process (like
-.BR stat ()
-and
-.BR lstat ()).
-.P
-If
-.I pathname
-is absolute, then
-.I dirfd
-is ignored.
-.P
-.I flags
-can either be 0, or include one or more of the following flags ORed:
-.TP
-.BR AT_EMPTY_PATH " (since Linux 2.6.39)"
-.\" commit 65cfc6722361570bfe255698d9cd4dccaf47570d
-If
-.I pathname
-is an empty string, operate on the file referred to by
-.I dirfd
-(which may have been obtained using the
-.BR open (2)
-.B O_PATH
-flag).
-In this case,
-.I dirfd
-can refer to any type of file, not just a directory, and
-the behavior of
-.BR fstatat ()
-is similar to that of
-.BR fstat ().
-If
-.I dirfd
-is
-.BR AT_FDCWD ,
-the call operates on the current working directory.
-This flag is Linux-specific; define
-.B _GNU_SOURCE
-.\" Before glibc 2.16, defining _ATFILE_SOURCE sufficed
-to obtain its definition.
-.TP
-.BR AT_NO_AUTOMOUNT " (since Linux 2.6.38)"
-Don't automount the terminal ("basename") component of
-.I pathname.
-Since Linux 3.1 this flag is ignored.
-Since Linux 4.11 this flag is implied.
-.TP
-.B AT_SYMLINK_NOFOLLOW
-If
-.I pathname
-is a symbolic link, do not dereference it:
-instead return information about the link itself, like
-.BR lstat ().
-(By default,
-.BR fstatat ()
-dereferences symbolic links, like
-.BR stat ().)
-.P
-See
-.BR openat (2)
-for an explanation of the need for
-.BR fstatat ().
-.SH RETURN VALUE
-On success, zero is returned.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EACCES
-Search permission is denied for one of the directories
-in the path prefix of
-.IR pathname .
-(See also
-.BR path_resolution (7).)
-.TP
-.B EBADF
-.I fd
-is not a valid open file descriptor.
-.TP
-.B EBADF
-.RB ( fstatat ())
-.I pathname
-is relative but
-.I dirfd
-is neither
-.B AT_FDCWD
-nor a valid file descriptor.
-.TP
-.B EFAULT
-Bad address.
-.TP
-.B EINVAL
-.RB ( fstatat ())
-Invalid flag specified in
-.IR flags .
-.TP
-.B ELOOP
-Too many symbolic links encountered while traversing the path.
-.TP
-.B ENAMETOOLONG
-.I pathname
-is too long.
-.TP
-.B ENOENT
-A component of
-.I pathname
-does not exist or is a dangling symbolic link.
-.TP
-.B ENOENT
-.I pathname
-is an empty string and
-.B AT_EMPTY_PATH
-was not specified in
-.IR flags .
-.TP
-.B ENOMEM
-Out of memory (i.e., kernel memory).
-.TP
-.B ENOTDIR
-A component of the path prefix of
-.I pathname
-is not a directory.
-.TP
-.B ENOTDIR
-.RB ( fstatat ())
-.I pathname
-is relative and
-.I dirfd
-is a file descriptor referring to a file other than a directory.
-.TP
-.B EOVERFLOW
-.I pathname
-or
-.I fd
-refers to a file whose size, inode number,
-or number of blocks cannot be represented in, respectively, the types
-.IR off_t ,
-.IR ino_t ,
-or
-.IR blkcnt_t .
-This error can occur when, for example,
-an application compiled on a 32-bit platform without
-.I \-D_FILE_OFFSET_BITS=64
-calls
-.BR stat ()
-on a file whose size exceeds
-.I (1<<31)\-1
-bytes.
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-.TP
-.BR stat ()
-.TQ
-.BR fstat ()
-.TQ
-.BR lstat ()
-SVr4, 4.3BSD, POSIX.1-2001.
-.\" SVr4 documents additional
-.\" .BR fstat ()
-.\" error conditions EINTR, ENOLINK, and EOVERFLOW. SVr4
-.\" documents additional
-.\" .BR stat ()
-.\" and
-.\" .BR lstat ()
-.\" error conditions EINTR, EMULTIHOP, ENOLINK, and EOVERFLOW.
-.TP
-.BR fstatat ()
-POSIX.1-2008.
-Linux 2.6.16,
-glibc 2.4.
-.P
-According to POSIX.1-2001,
-.BR lstat ()
-on a symbolic link need return valid information only in the
-.I st_size
-field and the file type of the
-.I st_mode
-field of the
-.I stat
-structure.
-POSIX.1-2008 tightens the specification, requiring
-.BR lstat ()
-to return valid information in all fields except the mode bits in
-.IR st_mode .
-.P
-Use of the
-.I st_blocks
-and
-.I st_blksize
-fields may be less portable.
-(They were introduced in BSD.
-The interpretation differs between systems,
-and possibly on a single system when NFS mounts are involved.)
-.SS C library/kernel differences
-Over time, increases in the size of the
-.I stat
-structure have led to three successive versions of
-.BR stat ():
-.IR sys_stat ()
-(slot
-.IR __NR_oldstat ),
-.IR sys_newstat ()
-(slot
-.IR __NR_stat ),
-and
-.I sys_stat64()
-(slot
-.IR __NR_stat64 )
-on 32-bit platforms such as i386.
-The first two versions were already present in Linux 1.0
-(albeit with different names);
-.\" See include/asm-i386/stat.h in the Linux 2.4 source code for the
-.\" various versions of the structure definitions
-the last was added in Linux 2.4.
-Similar remarks apply for
-.BR fstat ()
-and
-.BR lstat ().
-.P
-The kernel-internal versions of the
-.I stat
-structure dealt with by the different versions are, respectively:
-.TP
-.I __old_kernel_stat
-The original structure, with rather narrow fields, and no padding.
-.TP
-.I stat
-Larger
-.I st_ino
-field and padding added to various parts of the structure to
-allow for future expansion.
-.TP
-.I stat64
-Even larger
-.I st_ino
-field,
-larger
-.I st_uid
-and
-.I st_gid
-fields to accommodate the Linux-2.4 expansion of UIDs and GIDs to 32 bits,
-and various other enlarged fields and further padding in the structure.
-(Various padding bytes were eventually consumed in Linux 2.6,
-with the advent of 32-bit device IDs and nanosecond components
-for the timestamp fields.)
-.P
-The glibc
-.BR stat ()
-wrapper function hides these details from applications,
-invoking the most recent version of the system call provided by the kernel,
-and repacking the returned information if required for old binaries.
-.\"
-.\" A note from Andries Brouwer, July 2007
-.\"
-.\" > Is the story not rather more complicated for some calls like
-.\" > stat(2)?
-.\"
-.\" Yes and no, mostly no. See /usr/include/sys/stat.h .
-.\"
-.\" The idea is here not so much that syscalls change, but that
-.\" the definitions of struct stat and of the types dev_t and mode_t change.
-.\" This means that libc (even if it does not call the kernel
-.\" but only calls some internal function) must know what the
-.\" format of dev_t or of struct stat is.
-.\" The communication between the application and libc goes via
-.\" the include file <sys/stat.h> that defines a _STAT_VER and
-.\" _MKNOD_VER describing the layout of the data that user space
-.\" uses. Each (almost each) occurrence of stat() is replaced by
-.\" an occurrence of xstat() where the first parameter of xstat()
-.\" is this version number _STAT_VER.
-.\"
-.\" Now, also the definitions used by the kernel change.
-.\" But glibc copes with this in the standard way, and the
-.\" struct stat as returned by the kernel is repacked into
-.\" the struct stat as expected by the application.
-.\" Thus, _STAT_VER and this setup cater for the application-libc
-.\" interface, rather than the libc-kernel interface.
-.\"
-.\" (Note that the details depend on gcc being used as c compiler.)
-.P
-On modern 64-bit systems, life is simpler: there is a single
-.BR stat ()
-system call and the kernel deals with a
-.I stat
-structure that contains fields of a sufficient size.
-.P
-The underlying system call employed by the glibc
-.BR fstatat ()
-wrapper function is actually called
-.BR fstatat64 ()
-or, on some architectures,
-.\" strace(1) shows the name "newfstatat" on x86-64
-.BR newfstatat ().
-.SH EXAMPLES
-The following program calls
-.BR lstat ()
-and displays selected fields in the returned
-.I stat
-structure.
-.P
-.\" SRC BEGIN (stat.c)
-.EX
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/stat.h>
-#include <sys/sysmacros.h>
-#include <time.h>
-\&
-int
-main(int argc, char *argv[])
-{
- struct stat sb;
-\&
- if (argc != 2) {
- fprintf(stderr, "Usage: %s <pathname>\en", argv[0]);
- exit(EXIT_FAILURE);
- }
-\&
- if (lstat(argv[1], &sb) == \-1) {
- perror("lstat");
- exit(EXIT_FAILURE);
- }
-\&
- printf("ID of containing device: [%x,%x]\en",
- major(sb.st_dev),
- minor(sb.st_dev));
-\&
- printf("File type: ");
-\&
- switch (sb.st_mode & S_IFMT) {
- case S_IFBLK: printf("block device\en"); break;
- case S_IFCHR: printf("character device\en"); break;
- case S_IFDIR: printf("directory\en"); break;
- case S_IFIFO: printf("FIFO/pipe\en"); break;
- case S_IFLNK: printf("symlink\en"); break;
- case S_IFREG: printf("regular file\en"); break;
- case S_IFSOCK: printf("socket\en"); break;
- default: printf("unknown?\en"); break;
- }
-\&
- printf("I\-node number: %ju\en", (uintmax_t) sb.st_ino);
-\&
- printf("Mode: %jo (octal)\en",
- (uintmax_t) sb.st_mode);
-\&
- printf("Link count: %ju\en", (uintmax_t) sb.st_nlink);
- printf("Ownership: UID=%ju GID=%ju\en",
- (uintmax_t) sb.st_uid, (uintmax_t) sb.st_gid);
-\&
- printf("Preferred I/O block size: %jd bytes\en",
- (intmax_t) sb.st_blksize);
- printf("File size: %jd bytes\en",
- (intmax_t) sb.st_size);
- printf("Blocks allocated: %jd\en",
- (intmax_t) sb.st_blocks);
-\&
- printf("Last status change: %s", ctime(&sb.st_ctime));
- printf("Last file access: %s", ctime(&sb.st_atime));
- printf("Last file modification: %s", ctime(&sb.st_mtime));
-\&
- exit(EXIT_SUCCESS);
-}
-.EE
-.\" SRC END
-.SH SEE ALSO
-.BR ls (1),
-.BR stat (1),
-.BR access (2),
-.BR chmod (2),
-.BR chown (2),
-.BR readlink (2),
-.BR statx (2),
-.BR utime (2),
-.BR stat (3type),
-.BR capabilities (7),
-.BR inode (7),
-.BR symlink (7)
diff --git a/man2/stat64.2 b/man2/stat64.2
deleted file mode 100644
index b1a86c195..000000000
--- a/man2/stat64.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/stat.2
diff --git a/man2/statfs.2 b/man2/statfs.2
deleted file mode 100644
index ffee7ee87..000000000
--- a/man2/statfs.2
+++ /dev/null
@@ -1,389 +0,0 @@
-.\" Copyright (C) 2003 Andries Brouwer (aeb@cwi.nl)
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Modified 2003-08-17 by Walter Harms
-.\" Modified 2004-06-23 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.TH statfs 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-statfs, fstatfs \- get filesystem statistics
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#include <sys/vfs.h> " "/* or <sys/statfs.h> */"
-.P
-.BI "int statfs(const char *" path ", struct statfs *" buf );
-.BI "int fstatfs(int " fd ", struct statfs *" buf );
-.fi
-.P
-Unless you need the
-.I f_type
-field, you should use the standard
-.BR statvfs (3)
-interface instead.
-.SH DESCRIPTION
-The
-.BR statfs ()
-system call returns information about a mounted filesystem.
-.I path
-is the pathname of any file within the mounted filesystem.
-.I buf
-is a pointer to a
-.I statfs
-structure defined approximately as follows:
-.P
-.in +4n
-.EX
-struct statfs {
- __fsword_t f_type; /* Type of filesystem (see below) */
- __fsword_t f_bsize; /* Optimal transfer block size */
- fsblkcnt_t f_blocks; /* Total data blocks in filesystem */
- fsblkcnt_t f_bfree; /* Free blocks in filesystem */
- fsblkcnt_t f_bavail; /* Free blocks available to
- unprivileged user */
- fsfilcnt_t f_files; /* Total inodes in filesystem */
- fsfilcnt_t f_ffree; /* Free inodes in filesystem */
- fsid_t f_fsid; /* Filesystem ID */
- __fsword_t f_namelen; /* Maximum length of filenames */
- __fsword_t f_frsize; /* Fragment size (since Linux 2.6) */
- __fsword_t f_flags; /* Mount flags of filesystem
- (since Linux 2.6.36) */
- __fsword_t f_spare[xxx];
- /* Padding bytes reserved for future use */
-};
-.EE
-.in
-.P
-The following filesystem types may appear in
-.IR f_type :
-.P
-.in +4n
-.EX
-ADFS_SUPER_MAGIC 0xadf5
-AFFS_SUPER_MAGIC 0xadff
-AFS_SUPER_MAGIC 0x5346414f
-ANON_INODE_FS_MAGIC 0x09041934 /* Anonymous inode FS (for
- pseudofiles that have no name;
- e.g., epoll, signalfd, bpf) */
-AUTOFS_SUPER_MAGIC 0x0187
-BDEVFS_MAGIC 0x62646576
-BEFS_SUPER_MAGIC 0x42465331
-BFS_MAGIC 0x1badface
-BINFMTFS_MAGIC 0x42494e4d
-BPF_FS_MAGIC 0xcafe4a11
-BTRFS_SUPER_MAGIC 0x9123683e
-BTRFS_TEST_MAGIC 0x73727279
-CGROUP_SUPER_MAGIC 0x27e0eb /* Cgroup pseudo FS */
-CGROUP2_SUPER_MAGIC 0x63677270 /* Cgroup v2 pseudo FS */
-CIFS_MAGIC_NUMBER 0xff534d42
-CODA_SUPER_MAGIC 0x73757245
-COH_SUPER_MAGIC 0x012ff7b7
-CRAMFS_MAGIC 0x28cd3d45
-DEBUGFS_MAGIC 0x64626720
-DEVFS_SUPER_MAGIC 0x1373 /* Linux 2.6.17 and earlier */
-DEVPTS_SUPER_MAGIC 0x1cd1
-ECRYPTFS_SUPER_MAGIC 0xf15f
-EFIVARFS_MAGIC 0xde5e81e4
-EFS_SUPER_MAGIC 0x00414a53
-EXT_SUPER_MAGIC 0x137d /* Linux 2.0 and earlier */
-EXT2_OLD_SUPER_MAGIC 0xef51
-EXT2_SUPER_MAGIC 0xef53
-EXT3_SUPER_MAGIC 0xef53
-EXT4_SUPER_MAGIC 0xef53
-F2FS_SUPER_MAGIC 0xf2f52010
-FUSE_SUPER_MAGIC 0x65735546
-FUTEXFS_SUPER_MAGIC 0xbad1dea /* Unused */
-HFS_SUPER_MAGIC 0x4244
-HOSTFS_SUPER_MAGIC 0x00c0ffee
-HPFS_SUPER_MAGIC 0xf995e849
-HUGETLBFS_MAGIC 0x958458f6
-ISOFS_SUPER_MAGIC 0x9660
-JFFS2_SUPER_MAGIC 0x72b6
-JFS_SUPER_MAGIC 0x3153464a
-MINIX_SUPER_MAGIC 0x137f /* original minix FS */
-MINIX_SUPER_MAGIC2 0x138f /* 30 char minix FS */
-MINIX2_SUPER_MAGIC 0x2468 /* minix V2 FS */
-MINIX2_SUPER_MAGIC2 0x2478 /* minix V2 FS, 30 char names */
-MINIX3_SUPER_MAGIC 0x4d5a /* minix V3 FS, 60 char names */
-MQUEUE_MAGIC 0x19800202 /* POSIX message queue FS */
-MSDOS_SUPER_MAGIC 0x4d44
-MTD_INODE_FS_MAGIC 0x11307854
-NCP_SUPER_MAGIC 0x564c
-NFS_SUPER_MAGIC 0x6969
-NILFS_SUPER_MAGIC 0x3434
-NSFS_MAGIC 0x6e736673
-NTFS_SB_MAGIC 0x5346544e
-OCFS2_SUPER_MAGIC 0x7461636f
-OPENPROM_SUPER_MAGIC 0x9fa1
-OVERLAYFS_SUPER_MAGIC 0x794c7630
-PIPEFS_MAGIC 0x50495045
-PROC_SUPER_MAGIC 0x9fa0 /* /proc FS */
-PSTOREFS_MAGIC 0x6165676c
-QNX4_SUPER_MAGIC 0x002f
-QNX6_SUPER_MAGIC 0x68191122
-RAMFS_MAGIC 0x858458f6
-REISERFS_SUPER_MAGIC 0x52654973
-ROMFS_MAGIC 0x7275
-SECURITYFS_MAGIC 0x73636673
-SELINUX_MAGIC 0xf97cff8c
-SMACK_MAGIC 0x43415d53
-SMB_SUPER_MAGIC 0x517b
-SMB2_MAGIC_NUMBER 0xfe534d42
-SOCKFS_MAGIC 0x534f434b
-SQUASHFS_MAGIC 0x73717368
-SYSFS_MAGIC 0x62656572
-SYSV2_SUPER_MAGIC 0x012ff7b6
-SYSV4_SUPER_MAGIC 0x012ff7b5
-TMPFS_MAGIC 0x01021994
-TRACEFS_MAGIC 0x74726163
-UDF_SUPER_MAGIC 0x15013346
-UFS_MAGIC 0x00011954
-USBDEVICE_SUPER_MAGIC 0x9fa2
-V9FS_MAGIC 0x01021997
-VXFS_SUPER_MAGIC 0xa501fcf5
-XENFS_SUPER_MAGIC 0xabba1974
-XENIX_SUPER_MAGIC 0x012ff7b4
-XFS_SUPER_MAGIC 0x58465342
-_XIAFS_SUPER_MAGIC 0x012fd16d /* Linux 2.0 and earlier */
-.EE
-.in
-.P
-Most of these MAGIC constants are defined in
-.IR /usr/include/linux/magic.h ,
-and some are hardcoded in kernel sources.
-.P
-The
-.I f_flags
-field is a bit mask indicating mount options for the filesystem.
-It contains zero or more of the following bits:
-.\" XXX Keep this list in sync with statvfs(3)
-.TP
-.B ST_MANDLOCK
-Mandatory locking is permitted on the filesystem (see
-.BR fcntl (2)).
-.TP
-.B ST_NOATIME
-Do not update access times; see
-.BR mount (2).
-.TP
-.B ST_NODEV
-Disallow access to device special files on this filesystem.
-.TP
-.B ST_NODIRATIME
-Do not update directory access times; see
-.BR mount (2).
-.TP
-.B ST_NOEXEC
-Execution of programs is disallowed on this filesystem.
-.TP
-.B ST_NOSUID
-The set-user-ID and set-group-ID bits are ignored by
-.BR exec (3)
-for executable files on this filesystem
-.TP
-.B ST_RDONLY
-This filesystem is mounted read-only.
-.TP
-.B ST_RELATIME
-Update atime relative to mtime/ctime; see
-.BR mount (2).
-.TP
-.B ST_SYNCHRONOUS
-Writes are synched to the filesystem immediately (see the description of
-.B O_SYNC
-in
-.BR open (2)).
-.TP
-.BR ST_NOSYMFOLLOW " (since Linux 5.10)"
-.\" dab741e0e02bd3c4f5e2e97be74b39df2523fc6e
-Symbolic links are not followed when resolving paths; see
-.BR mount (2).
-.P
-Nobody knows what
-.I f_fsid
-is supposed to contain (but see below).
-.P
-Fields that are undefined for a particular filesystem are set to 0.
-.P
-.BR fstatfs ()
-returns the same information about an open file referenced by descriptor
-.IR fd .
-.SH RETURN VALUE
-On success, zero is returned.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EACCES
-.RB ( statfs ())
-Search permission is denied for a component of the path prefix of
-.IR path .
-(See also
-.BR path_resolution (7).)
-.TP
-.B EBADF
-.RB ( fstatfs ())
-.I fd
-is not a valid open file descriptor.
-.TP
-.B EFAULT
-.I buf
-or
-.I path
-points to an invalid address.
-.TP
-.B EINTR
-The call was interrupted by a signal; see
-.BR signal (7).
-.TP
-.B EIO
-An I/O error occurred while reading from the filesystem.
-.TP
-.B ELOOP
-.RB ( statfs ())
-Too many symbolic links were encountered in translating
-.IR path .
-.TP
-.B ENAMETOOLONG
-.RB ( statfs ())
-.I path
-is too long.
-.TP
-.B ENOENT
-.RB ( statfs ())
-The file referred to by
-.I path
-does not exist.
-.TP
-.B ENOMEM
-Insufficient kernel memory was available.
-.TP
-.B ENOSYS
-The filesystem does not support this call.
-.TP
-.B ENOTDIR
-.RB ( statfs ())
-A component of the path prefix of
-.I path
-is not a directory.
-.TP
-.B EOVERFLOW
-Some values were too large to be represented in the returned struct.
-.SH VERSIONS
-.SS The f_fsid field
-Solaris, Irix, and POSIX have a system call
-.BR statvfs (2)
-that returns a
-.I "struct statvfs"
-(defined in
-.IR <sys/statvfs.h> )
-containing an
-.I "unsigned long"
-.IR f_fsid .
-Linux, SunOS, HP-UX, 4.4BSD have a system call
-.BR statfs ()
-that returns a
-.I "struct statfs"
-(defined in
-.IR <sys/vfs.h> )
-containing a
-.I fsid_t
-.IR f_fsid ,
-where
-.I fsid_t
-is defined as
-.IR "struct { int val[2]; }" .
-The same holds for FreeBSD, except that it uses the include file
-.IR <sys/mount.h> .
-.P
-The general idea is that
-.I f_fsid
-contains some random stuff such that the pair
-.RI ( f_fsid , ino )
-uniquely determines a file.
-Some operating systems use (a variation on) the device number,
-or the device number combined with the filesystem type.
-Several operating systems restrict giving out the
-.I f_fsid
-field to the superuser only (and zero it for unprivileged users),
-because this field is used in the filehandle of the filesystem
-when NFS-exported, and giving it out is a security concern.
-.P
-Under some operating systems, the
-.I fsid
-can be used as the second argument to the
-.BR sysfs (2)
-system call.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-The Linux
-.BR statfs ()
-was inspired by the 4.4BSD one
-(but they do not use the same structure).
-.P
-The original Linux
-.BR statfs ()
-and
-.BR fstatfs ()
-system calls were not designed with extremely large file sizes in mind.
-Subsequently, Linux 2.6
-added new
-.BR statfs64 ()
-and
-.BR fstatfs64 ()
-system calls that employ a new structure,
-.IR statfs64 .
-The new structure contains the same fields as the original
-.I statfs
-structure, but the sizes of various fields are increased,
-to accommodate large file sizes.
-The glibc
-.BR statfs ()
-and
-.BR fstatfs ()
-wrapper functions transparently deal with the kernel differences.
-.P
-LSB has deprecated the library calls
-.BR statfs ()
-and
-.BR fstatfs ()
-and tells us to use
-.BR statvfs (3)
-and
-.BR fstatvfs (3)
-instead.
-.SH NOTES
-The
-.I __fsword_t
-type used for various fields in the
-.I statfs
-structure definition is a glibc internal type,
-not intended for public use.
-This leaves the programmer in a bit of a conundrum when trying to copy
-or compare these fields to local variables in a program.
-Using
-.I "unsigned\ int"
-for such variables suffices on most systems.
-.P
-Some systems have only \fI<sys/vfs.h>\fP, other systems also have
-\fI<sys/statfs.h>\fP, where the former includes the latter.
-So it seems
-including the former is the best choice.
-.SH BUGS
-From Linux 2.6.38 up to and including Linux 3.1,
-.\" broken in commit ff0c7d15f9787b7e8c601533c015295cc68329f8
-.\" fixed in commit d70ef97baf048412c395bb5d65791d8fe133a52b
-.BR fstatfs ()
-failed with the error
-.B ENOSYS
-for file descriptors created by
-.BR pipe (2).
-.SH SEE ALSO
-.BR stat (2),
-.BR statvfs (3),
-.BR path_resolution (7)
diff --git a/man2/statfs64.2 b/man2/statfs64.2
deleted file mode 100644
index 923d3c0cc..000000000
--- a/man2/statfs64.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/statfs.2
diff --git a/man2/statx.2 b/man2/statx.2
deleted file mode 100644
index 0dcf7e20b..000000000
--- a/man2/statx.2
+++ /dev/null
@@ -1,614 +0,0 @@
-'\" t
-.\" Copyright (c) 2017 David Howells <dhowells@redhat.com>
-.\"
-.\" Derived from the stat.2 manual page:
-.\" Copyright (c) 1992 Drew Eckhardt (drew@cs.colorado.edu), March 28, 1992
-.\" Parts Copyright (c) 1995 Nicolai Langfeldt (janl@ifi.uio.no), 1/1/95
-.\" and Copyright (c) 2006, 2007, 2014 Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.TH statx 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-statx \- get file status (extended)
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#define _GNU_SOURCE " "/* See feature_test_macros(7) */"
-.BR "#include <fcntl.h> " "/* Definition of " AT_* " constants */"
-.B #include <sys/stat.h>
-.P
-.BI "int statx(int " dirfd ", const char *restrict " pathname ", int " flags ,
-.BI " unsigned int " mask ", struct statx *restrict " statxbuf );
-.fi
-.SH DESCRIPTION
-This function returns information about a file, storing it in the buffer
-pointed to by
-.IR statxbuf .
-The returned buffer is a structure of the following type:
-.P
-.in +4n
-.EX
-struct statx {
- __u32 stx_mask; /* Mask of bits indicating
- filled fields */
- __u32 stx_blksize; /* Block size for filesystem I/O */
- __u64 stx_attributes; /* Extra file attribute indicators */
- __u32 stx_nlink; /* Number of hard links */
- __u32 stx_uid; /* User ID of owner */
- __u32 stx_gid; /* Group ID of owner */
- __u16 stx_mode; /* File type and mode */
- __u64 stx_ino; /* Inode number */
- __u64 stx_size; /* Total size in bytes */
- __u64 stx_blocks; /* Number of 512B blocks allocated */
- __u64 stx_attributes_mask;
- /* Mask to show what\[aq]s supported
- in stx_attributes */
-\&
- /* The following fields are file timestamps */
- struct statx_timestamp stx_atime; /* Last access */
- struct statx_timestamp stx_btime; /* Creation */
- struct statx_timestamp stx_ctime; /* Last status change */
- struct statx_timestamp stx_mtime; /* Last modification */
-\&
- /* If this file represents a device, then the next two
- fields contain the ID of the device */
- __u32 stx_rdev_major; /* Major ID */
- __u32 stx_rdev_minor; /* Minor ID */
-\&
- /* The next two fields contain the ID of the device
- containing the filesystem where the file resides */
- __u32 stx_dev_major; /* Major ID */
- __u32 stx_dev_minor; /* Minor ID */
-\&
- __u64 stx_mnt_id; /* Mount ID */
-\&
- /* Direct I/O alignment restrictions */
- __u32 stx_dio_mem_align;
- __u32 stx_dio_offset_align;
-};
-.EE
-.in
-.P
-The file timestamps are structures of the following type:
-.P
-.in +4n
-.EX
-struct statx_timestamp {
- __s64 tv_sec; /* Seconds since the Epoch (UNIX time) */
- __u32 tv_nsec; /* Nanoseconds since tv_sec */
-};
-.EE
-.in
-.P
-(Note that reserved space and padding is omitted.)
-.SS
-Invoking \fBstatx\fR():
-To access a file's status, no permissions are required on the file itself,
-but in the case of
-.BR statx ()
-with a pathname,
-execute (search) permission is required on all of the directories in
-.I pathname
-that lead to the file.
-.P
-.BR statx ()
-uses
-.IR pathname ,
-.IR dirfd ,
-and
-.I flags
-to identify the target file in one of the following ways:
-.TP
-An absolute pathname
-If
-.I pathname
-begins with a slash,
-then it is an absolute pathname that identifies the target file.
-In this case,
-.I dirfd
-is ignored.
-.TP
-A relative pathname
-If
-.I pathname
-is a string that begins with a character other than a slash and
-.I dirfd
-is
-.BR AT_FDCWD ,
-then
-.I pathname
-is a relative pathname that is interpreted relative to the process's
-current working directory.
-.TP
-A directory-relative pathname
-If
-.I pathname
-is a string that begins with a character other than a slash and
-.I dirfd
-is a file descriptor that refers to a directory, then
-.I pathname
-is a relative pathname that is interpreted relative to the directory
-referred to by
-.IR dirfd .
-(See
-.BR openat (2)
-for an explanation of why this is useful.)
-.TP
-By file descriptor
-If
-.I pathname
-is an empty string and the
-.B AT_EMPTY_PATH
-flag is specified in
-.I flags
-(see below),
-then the target file is the one referred to by the file descriptor
-.IR dirfd .
-.P
-.I flags
-can be used to influence a pathname-based lookup.
-A value for
-.I flags
-is constructed by ORing together zero or more of the following constants:
-.TP
-.B AT_EMPTY_PATH
-.\" commit 65cfc6722361570bfe255698d9cd4dccaf47570d
-If
-.I pathname
-is an empty string, operate on the file referred to by
-.I dirfd
-(which may have been obtained using the
-.BR open (2)
-.B O_PATH
-flag).
-In this case,
-.I dirfd
-can refer to any type of file, not just a directory.
-.IP
-If
-.I dirfd
-is
-.BR AT_FDCWD ,
-the call operates on the current working directory.
-.TP
-.B AT_NO_AUTOMOUNT
-Don't automount the terminal ("basename") component of
-.I pathname
-if it is a directory that is an automount point.
-This allows the caller to gather attributes of an automount point
-(rather than the location it would mount).
-This flag has no effect if the mount point has already been mounted over.
-.IP
-The
-.B AT_NO_AUTOMOUNT
-flag can be used in tools that scan directories
-to prevent mass-automounting of a directory of automount points.
-.IP
-All of
-.BR stat (2),
-.BR lstat (2),
-and
-.BR fstatat (2)
-act as though
-.B AT_NO_AUTOMOUNT
-was set.
-.TP
-.B AT_SYMLINK_NOFOLLOW
-If
-.I pathname
-is a symbolic link, do not dereference it:
-instead return information about the link itself, like
-.BR lstat (2).
-.P
-.I flags
-can also be used to control what sort of synchronization the kernel will do
-when querying a file on a remote filesystem.
-This is done by ORing in one of the following values:
-.TP
-.B AT_STATX_SYNC_AS_STAT
-Do whatever
-.BR stat (2)
-does.
-This is the default and is very much filesystem-specific.
-.TP
-.B AT_STATX_FORCE_SYNC
-Force the attributes to be synchronized with the server.
-This may require that
-a network filesystem perform a data writeback to get the timestamps correct.
-.TP
-.B AT_STATX_DONT_SYNC
-Don't synchronize anything, but rather just take whatever
-the system has cached if possible.
-This may mean that the information returned is approximate, but,
-on a network filesystem, it may not involve a round trip to the server - even
-if no lease is held.
-.P
-The
-.I mask
-argument to
-.BR statx ()
-is used to tell the kernel which fields the caller is interested in.
-.I mask
-is an ORed combination of the following constants:
-.P
-.in +4n
-.TS
-lB l.
-STATX_TYPE Want stx_mode & S_IFMT
-STATX_MODE Want stx_mode & \[ti]S_IFMT
-STATX_NLINK Want stx_nlink
-STATX_UID Want stx_uid
-STATX_GID Want stx_gid
-STATX_ATIME Want stx_atime
-STATX_MTIME Want stx_mtime
-STATX_CTIME Want stx_ctime
-STATX_INO Want stx_ino
-STATX_SIZE Want stx_size
-STATX_BLOCKS Want stx_blocks
-STATX_BASIC_STATS [All of the above]
-STATX_BTIME Want stx_btime
-STATX_ALL The same as STATX_BASIC_STATS | STATX_BTIME.
- It is deprecated and should not be used.
-STATX_MNT_ID Want stx_mnt_id (since Linux 5.8)
-STATX_DIOALIGN Want stx_dio_mem_align and stx_dio_offset_align
- (since Linux 6.1; support varies by filesystem)
-.TE
-.in
-.P
-Note that, in general, the kernel does
-.I not
-reject values in
-.I mask
-other than the above.
-(For an exception, see
-.B EINVAL
-in errors.)
-Instead, it simply informs the caller which values are supported
-by this kernel and filesystem via the
-.I statx.stx_mask
-field.
-Therefore,
-.I "do not"
-simply set
-.I mask
-to
-.B UINT_MAX
-(all bits set),
-as one or more bits may, in the future, be used to specify an
-extension to the buffer.
-.SS
-The returned information
-The status information for the target file is returned in the
-.I statx
-structure pointed to by
-.IR statxbuf .
-Included in this is
-.I stx_mask
-which indicates what other information has been returned.
-.I stx_mask
-has the same format as the
-.I mask
-argument and bits are set in it to indicate
-which fields have been filled in.
-.P
-It should be noted that the kernel may return fields that weren't
-requested and may fail to return fields that were requested,
-depending on what the backing filesystem supports.
-(Fields that are given values despite being unrequested can just be ignored.)
-In either case,
-.I stx_mask
-will not be equal
-.IR mask .
-.P
-If a filesystem does not support a field or if it has
-an unrepresentable value (for instance, a file with an exotic type),
-then the mask bit corresponding to that field will be cleared in
-.I stx_mask
-even if the user asked for it and a dummy value will be filled in for
-compatibility purposes if one is available (e.g., a dummy UID and GID may be
-specified to mount under some circumstances).
-.P
-A filesystem may also fill in fields that the caller didn't ask for if it has
-values for them available and the information is available at no extra cost.
-If this happens, the corresponding bits will be set in
-.IR stx_mask .
-.P
-.\" Background: inode attributes are modified with i_mutex held, but
-.\" read by stat() without taking the mutex.
-.IR Note :
-for performance and simplicity reasons, different fields in the
-.I statx
-structure may contain state information from different moments
-during the execution of the system call.
-For example, if
-.I stx_mode
-or
-.I stx_uid
-is changed by another process by calling
-.BR chmod (2)
-or
-.BR chown (2),
-.BR stat ()
-might return the old
-.I stx_mode
-together with the new
-.IR stx_uid ,
-or the old
-.I stx_uid
-together with the new
-.IR stx_mode .
-.P
-Apart from
-.I stx_mask
-(which is described above), the fields in the
-.I statx
-structure are:
-.TP
-.I stx_blksize
-The "preferred" block size for efficient filesystem I/O.
-(Writing to a file in
-smaller chunks may cause an inefficient read-modify-rewrite.)
-.TP
-.I stx_attributes
-Further status information about the file (see below for more information).
-.TP
-.I stx_nlink
-The number of hard links on a file.
-.TP
-.I stx_uid
-This field contains the user ID of the owner of the file.
-.TP
-.I stx_gid
-This field contains the ID of the group owner of the file.
-.TP
-.I stx_mode
-The file type and mode.
-See
-.BR inode (7)
-for details.
-.TP
-.I stx_ino
-The inode number of the file.
-.TP
-.I stx_size
-The size of the file (if it is a regular file or a symbolic link) in bytes.
-The size of a symbolic link is the length of the pathname it contains,
-without a terminating null byte.
-.TP
-.I stx_blocks
-The number of blocks allocated to the file on the medium, in 512-byte units.
-(This may be smaller than
-.IR stx_size /512
-when the file has holes.)
-.TP
-.I stx_attributes_mask
-A mask indicating which bits in
-.I stx_attributes
-are supported by the VFS and the filesystem.
-.TP
-.I stx_atime
-The file's last access timestamp.
-.TP
-.I stx_btime
-The file's creation timestamp.
-.TP
-.I stx_ctime
-The file's last status change timestamp.
-.TP
-.I stx_mtime
-The file's last modification timestamp.
-.TP
-.IR stx_dev_major " and " stx_dev_minor
-The device on which this file (inode) resides.
-.TP
-.IR stx_rdev_major " and " stx_rdev_minor
-The device that this file (inode) represents if the file is of block or
-character device type.
-.TP
-.I stx_mnt_id
-.\" commit fa2fcf4f1df1559a0a4ee0f46915b496cc2ebf60
-The mount ID of the mount containing the file.
-This is the same number reported by
-.BR name_to_handle_at (2)
-and corresponds to the number in the first field in one of the records in
-.IR /proc/self/mountinfo .
-.TP
-.I stx_dio_mem_align
-The alignment (in bytes) required for user memory buffers for direct I/O
-.RB ( O_DIRECT )
-on this file,
-or 0 if direct I/O is not supported on this file.
-.IP
-.B STATX_DIOALIGN
-.RI ( stx_dio_mem_align
-and
-.IR stx_dio_offset_align )
-is supported on block devices since Linux 6.1.
-The support on regular files varies by filesystem;
-it is supported by ext4, f2fs, and xfs since Linux 6.1.
-.TP
-.I stx_dio_offset_align
-The alignment (in bytes) required for file offsets and I/O segment lengths
-for direct I/O
-.RB ( O_DIRECT )
-on this file,
-or 0 if direct I/O is not supported on this file.
-This will only be nonzero if
-.I stx_dio_mem_align
-is nonzero, and vice versa.
-.P
-For further information on the above fields, see
-.BR inode (7).
-.\"
-.SS File attributes
-The
-.I stx_attributes
-field contains a set of ORed flags that indicate additional attributes
-of the file.
-Note that any attribute that is not indicated as supported by
-.I stx_attributes_mask
-has no usable value here.
-The bits in
-.I stx_attributes_mask
-correspond bit-by-bit to
-.IR stx_attributes .
-.P
-The flags are as follows:
-.TP
-.B STATX_ATTR_COMPRESSED
-The file is compressed by the filesystem and may take extra resources
-to access.
-.TP
-.B STATX_ATTR_IMMUTABLE
-The file cannot be modified: it cannot be deleted or renamed,
-no hard links can be created to this file and no data can be written to it.
-See
-.BR chattr (1).
-.TP
-.B STATX_ATTR_APPEND
-The file can only be opened in append mode for writing.
-Random access writing
-is not permitted.
-See
-.BR chattr (1).
-.TP
-.B STATX_ATTR_NODUMP
-File is not a candidate for backup when a backup program such as
-.BR dump (8)
-is run.
-See
-.BR chattr (1).
-.TP
-.B STATX_ATTR_ENCRYPTED
-A key is required for the file to be encrypted by the filesystem.
-.TP
-.BR STATX_ATTR_VERITY " (since Linux 5.5)"
-.\" commit 3ad2522c64cff1f5aebb987b00683268f0cc7c29
-The file has fs-verity enabled.
-It cannot be written to, and all reads from it will be verified
-against a cryptographic hash that covers the
-entire file (e.g., via a Merkle tree).
-.TP
-.BR STATX_ATTR_DAX " (since Linux 5.8)"
-The file is in the DAX (cpu direct access) state.
-DAX state attempts to
-minimize software cache effects for both I/O and memory mappings of this file.
-It requires a file system which has been configured to support DAX.
-.IP
-DAX generally assumes all accesses are via CPU load / store instructions
-which can minimize overhead for small accesses,
-but may adversely affect CPU utilization for large transfers.
-.IP
-File I/O is done directly to/from user-space buffers and memory mapped I/O may
-be performed with direct memory mappings that bypass the kernel page cache.
-.IP
-While the DAX property tends to result in data being transferred synchronously,
-it does not give the same guarantees as the
-.B O_SYNC
-flag (see
-.BR open (2)),
-where data and the necessary metadata are transferred together.
-.IP
-A DAX file may support being mapped with the
-.B MAP_SYNC
-flag, which enables a
-program to use CPU cache flush instructions to persist CPU store operations
-without an explicit
-.BR fsync (2).
-See
-.BR mmap (2)
-for more information.
-.TP
-.BR STATX_ATTR_MOUNT_ROOT " (since Linux 5.8)"
-.\" commit 80340fe3605c0e78cfe496c3b3878be828cfdbfe
-The file is the root of a mount.
-.SH RETURN VALUE
-On success, zero is returned.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EACCES
-Search permission is denied for one of the directories
-in the path prefix of
-.IR pathname .
-(See also
-.BR path_resolution (7).)
-.TP
-.B EBADF
-.I pathname
-is relative but
-.I dirfd
-is neither
-.B AT_FDCWD
-nor a valid file descriptor.
-.TP
-.B EFAULT
-.I pathname
-or
-.I statxbuf
-is NULL or points to a location outside the process's
-accessible address space.
-.TP
-.B EINVAL
-Invalid flag specified in
-.IR flags .
-.TP
-.B EINVAL
-Reserved flag specified in
-.IR mask .
-(Currently, there is one such flag, designated by the constant
-.BR STATX__RESERVED ,
-with the value 0x80000000U.)
-.TP
-.B ELOOP
-Too many symbolic links encountered while traversing the pathname.
-.TP
-.B ENAMETOOLONG
-.I pathname
-is too long.
-.TP
-.B ENOENT
-A component of
-.I pathname
-does not exist, or
-.I pathname
-is an empty string and
-.B AT_EMPTY_PATH
-was not specified in
-.IR flags .
-.TP
-.B ENOMEM
-Out of memory (i.e., kernel memory).
-.TP
-.B ENOTDIR
-A component of the path prefix of
-.I pathname
-is not a directory or
-.I pathname
-is relative and
-.I dirfd
-is a file descriptor referring to a file other than a directory.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 4.11,
-glibc 2.28.
-.SH SEE ALSO
-.BR ls (1),
-.BR stat (1),
-.BR access (2),
-.BR chmod (2),
-.BR chown (2),
-.BR name_to_handle_at (2),
-.BR readlink (2),
-.BR stat (2),
-.BR utime (2),
-.BR proc (5),
-.BR capabilities (7),
-.BR inode (7),
-.BR symlink (7)
diff --git a/man2/stime.2 b/man2/stime.2
deleted file mode 100644
index 70ad2be83..000000000
--- a/man2/stime.2
+++ /dev/null
@@ -1,73 +0,0 @@
-.\" Copyright (c) 1992 Drew Eckhardt (drew@cs.colorado.edu), March 28, 1992
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Modified by Michael Haardt <michael@moria.de>
-.\" Modified 1993-07-24 by Rik Faith <faith@cs.unc.edu>
-.\" Modified 2001-03-16 by Andries Brouwer <aeb@cwi.nl>
-.\" Modified 2004-05-27 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.TH stime 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-stime \- set time
-.SH SYNOPSIS
-.nf
-.B #include <time.h>
-.P
-.BI "[[deprecated]] int stime(const time_t *" t );
-.fi
-.P
-.RS -4
-Feature Test Macro Requirements for glibc (see
-.BR feature_test_macros (7)):
-.RE
-.P
-.BR stime ():
-.nf
- Since glibc 2.19:
- _DEFAULT_SOURCE
- glibc 2.19 and earlier:
- _SVID_SOURCE
-.fi
-.SH DESCRIPTION
-.BR NOTE :
-This function is deprecated;
-use
-.BR clock_settime (2)
-instead.
-.P
-.BR stime ()
-sets the system's idea of the time and date.
-The time, pointed
-to by \fIt\fP, is measured in seconds since the
-Epoch, 1970-01-01 00:00:00 +0000 (UTC).
-.BR stime ()
-may be executed only by the superuser.
-.SH RETURN VALUE
-On success, zero is returned.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EFAULT
-Error in getting information from user space.
-.TP
-.B EPERM
-The calling process has insufficient privilege.
-Under Linux, the
-.B CAP_SYS_TIME
-privilege is required.
-.SH STANDARDS
-None.
-.SH HISTORY
-SVr4.
-.P
-Starting with glibc 2.31,
-this function is no longer available to newly linked applications
-and is no longer declared in
-.IR <time.h> .
-.SH SEE ALSO
-.BR date (1),
-.BR settimeofday (2),
-.BR capabilities (7)
diff --git a/man2/stty.2 b/man2/stty.2
deleted file mode 100644
index 5d25ea6ce..000000000
--- a/man2/stty.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/unimplemented.2
diff --git a/man2/subpage_prot.2 b/man2/subpage_prot.2
deleted file mode 100644
index c006821c1..000000000
--- a/man2/subpage_prot.2
+++ /dev/null
@@ -1,118 +0,0 @@
-.\" Copyright (c) 2010 Michael Kerrisk <mtk.manpages@gmail.com>
-.\" based on a proposal from Stephan Mueller <smueller@atsec.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Various pieces of text taken from the kernel source and the commentary
-.\" in Linux commit fa28237cfcc5827553044cbd6ee52e33692b0faa
-.\" both written by Paul Mackerras <paulus@samba.org>
-.\"
-.TH subpage_prot 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-subpage_prot \- define a subpage protection for an address range
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */"
-.B #include <unistd.h>
-.P
-.BI "int syscall(SYS_subpage_prot, unsigned long " addr ", unsigned long " len ,
-.BI " uint32_t *" map );
-.fi
-.P
-.IR Note :
-glibc provides no wrapper for
-.BR subpage_prot (),
-necessitating the use of
-.BR syscall (2).
-.SH DESCRIPTION
-The PowerPC-specific
-.BR subpage_prot ()
-system call provides the facility to control the access
-permissions on individual 4\ kB subpages on systems configured with
-a page size of 64\ kB.
-.P
-The protection map is applied to the memory pages in the region starting at
-.I addr
-and continuing for
-.I len
-bytes.
-Both of these arguments must be aligned to a 64-kB boundary.
-.P
-The protection map is specified in the buffer pointed to by
-.IR map .
-The map has 2 bits per 4\ kB subpage;
-thus each 32-bit word specifies the protections of 16 4\ kB subpages
-inside a 64\ kB page
-(so, the number of 32-bit words pointed to by
-.I map
-should equate to the number of 64-kB pages specified by
-.IR len ).
-Each 2-bit field in the protection map is either 0 to allow any access,
-1 to prevent writes, or 2 or 3 to prevent all accesses.
-.SH RETURN VALUE
-On success,
-.BR subpage_prot ()
-returns 0.
-Otherwise, one of the error codes specified below is returned.
-.SH ERRORS
-.TP
-.B EFAULT
-The buffer referred to by
-.I map
-is not accessible.
-.TP
-.B EINVAL
-The
-.I addr
-or
-.I len
-arguments are incorrect.
-Both of these arguments must be aligned to a multiple of the system page size,
-and they must not refer to a region outside of the
-address space of the process or to a region that consists of huge pages.
-.TP
-.B ENOMEM
-Out of memory.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 2.6.25 (PowerPC).
-.P
-The system call is provided only if the kernel is configured with
-.BR CONFIG_PPC_64K_PAGES .
-.SH NOTES
-Normal page protections (at the 64-kB page level) also apply;
-the subpage protection mechanism is an additional constraint,
-so putting 0 in a 2-bit field won't allow writes to a page that is otherwise
-write-protected.
-.SS Rationale
-This system call is provided to assist writing emulators that
-operate using 64-kB pages on PowerPC systems.
-When emulating systems such as x86, which uses a smaller page size,
-the emulator can no longer use the memory-management unit (MMU)
-and normal system calls for controlling page protections.
-(The emulator could emulate the MMU by checking and possibly remapping
-the address for each memory access in software, but that is slow.)
-The idea is that the emulator supplies an array of protection masks
-to apply to a specified range of virtual addresses.
-These masks are applied at the level where hardware page-table entries (PTEs)
-are inserted into the hardware page table based on the Linux PTEs,
-so the Linux PTEs are not affected.
-Implicit in this is that the regions of the address space that are
-protected are switched to use 4-kB hardware pages rather than 64-kB
-hardware pages (on machines with hardware 64-kB page support).
-.\" In the initial implementation, it was the case that:
-.\" In fact the whole process is switched to use 4 kB hardware pages when the
-.\" subpage_prot system call is used, but this could be improved in future
-.\" to switch only the affected segments.
-.\" But Paul Mackerass says (Oct 2010): I'm pretty sure we now only switch
-.\" the affected segment, not the whole process.
-.SH SEE ALSO
-.BR mprotect (2),
-.BR syscall (2)
-.P
-.I Documentation/admin\-guide/mm/hugetlbpage.rst
-in the Linux kernel source tree
diff --git a/man2/swapoff.2 b/man2/swapoff.2
deleted file mode 100644
index 2bd424c14..000000000
--- a/man2/swapoff.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/swapon.2
diff --git a/man2/swapon.2 b/man2/swapon.2
deleted file mode 100644
index b2651fc39..000000000
--- a/man2/swapon.2
+++ /dev/null
@@ -1,202 +0,0 @@
-.\" Copyright (c) 1992 Drew Eckhardt (drew@cs.colorado.edu), March 28, 1992
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Modified by Michael Haardt <michael@moria.de>
-.\" Modified 1993-07-24 by Rik Faith <faith@cs.unc.edu>
-.\" Modified 1995-07-22 by Michael Chastain <mec@duracef.shout.net>
-.\" Modified 1995-07-23 by aeb
-.\" Modified 1996-10-22 by Eric S. Raymond <esr@thyrsus.com>
-.\" Modified 1998-09-08 by aeb
-.\" Modified 2004-06-17 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Modified 2004-10-10 by aeb
-.\" 2004-12-14 mtk, Anand Kumria: added new errors
-.\" 2007-06-22 Ivana Varekova <varekova@redhat.com>, mtk
-.\" Update text describing limit on number of swap files.
-.\" 2021-01-17 Alex Baranowski <alex@euro-linux.com>
-.\" Update information about available swap files decreased by
-.\" CONFIG_DEVICE_PRIVATE option.
-.\"
-.\" FIXME Linux 3.11 added SWAP_FLAG_DISCARD_ONCE and SWAP_FLAG_DISCARD_PAGES
-.\" commit dcf6b7ddd7df8965727746f89c59229b23180e5a
-.\" Author: Rafael Aquini <aquini@redhat.com>
-.\" Date: Wed Jul 3 15:02:46 2013 -0700
-.\"
-.TH swapon 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-swapon, swapoff \- start/stop swapping to file/device
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/swap.h>
-.P
-.BI "int swapon(const char *" path ", int " swapflags );
-.BI "int swapoff(const char *" path );
-.fi
-.SH DESCRIPTION
-.BR swapon ()
-sets the swap area to the file or block device specified by
-.IR path .
-.BR swapoff ()
-stops swapping to the file or block device specified by
-.IR path .
-.P
-If the
-.B SWAP_FLAG_PREFER
-flag is specified in the
-.BR swapon ()
-.I swapflags
-argument, the new swap area will have a higher priority than default.
-The priority is encoded within
-.I swapflags
-as:
-.P
-.in +4n
-.EX
-.I "(prio << SWAP_FLAG_PRIO_SHIFT) & SWAP_FLAG_PRIO_MASK"
-.EE
-.in
-.P
-If the
-.B SWAP_FLAG_DISCARD
-flag is specified in the
-.BR swapon ()
-.I swapflags
-argument, freed swap pages will be discarded before they are reused,
-if the swap device supports the discard or trim operation.
-(This may improve performance on some Solid State Devices,
-but often it does not.)
-See also NOTES.
-.P
-These functions may be used only by a privileged process (one having the
-.B CAP_SYS_ADMIN
-capability).
-.SS Priority
-Each swap area has a priority, either high or low.
-The default priority is low.
-Within the low-priority areas,
-newer areas are even lower priority than older areas.
-.P
-All priorities set with
-.I swapflags
-are high-priority, higher than default.
-They may have any nonnegative value chosen by the caller.
-Higher numbers mean higher priority.
-.P
-Swap pages are allocated from areas in priority order,
-highest priority first.
-For areas with different priorities,
-a higher-priority area is exhausted before using a lower-priority area.
-If two or more areas have the same priority,
-and it is the highest priority available,
-pages are allocated on a round-robin basis between them.
-.P
-As of Linux 1.3.6, the kernel usually follows these rules,
-but there are exceptions.
-.SH RETURN VALUE
-On success, zero is returned.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EBUSY
-(for
-.BR swapon ())
-The specified
-.I path
-is already being used as a swap area.
-.TP
-.B EINVAL
-The file
-.I path
-exists, but refers neither to a regular file nor to a block device;
-.TP
-.B EINVAL
-.RB ( swapon ())
-The indicated path does not contain a valid swap signature or
-resides on an in-memory filesystem such as
-.BR tmpfs (5).
-.TP
-.BR EINVAL " (since Linux 3.4)"
-.RB ( swapon ())
-An invalid flag value was specified in
-.IR swapflags .
-.TP
-.B EINVAL
-.RB ( swapoff ())
-.I path
-is not currently a swap area.
-.TP
-.B ENFILE
-The system-wide limit on the total number of open files has been reached.
-.TP
-.B ENOENT
-The file
-.I path
-does not exist.
-.TP
-.B ENOMEM
-The system has insufficient memory to start swapping.
-.TP
-.B EPERM
-The caller does not have the
-.B CAP_SYS_ADMIN
-capability.
-Alternatively, the maximum number of swap files are already in use;
-see NOTES below.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-The
-.I swapflags
-argument was introduced in Linux 1.3.2.
-.SH NOTES
-The partition or path must be prepared with
-.BR mkswap (8).
-.P
-There is an upper limit on the number of swap files that may be used,
-defined by the kernel constant
-.BR MAX_SWAPFILES .
-Before Linux 2.4.10,
-.B MAX_SWAPFILES
-has the value 8;
-since Linux 2.4.10, it has the value 32.
-Since Linux 2.6.18, the limit is decreased by 2 (thus 30),
-since Linux 5.19, the limit is decreased by 3 (thus: 29)
-if the kernel is built with the
-.B CONFIG_MIGRATION
-option
-(which reserves two swap table entries for the page migration features of
-.BR mbind (2)
-and
-.BR migrate_pages (2)).
-Since Linux 2.6.32, the limit is further decreased by 1
-if the kernel is built with the
-.B CONFIG_MEMORY_FAILURE
-option.
-Since Linux 5.14, the limit is further decreased by 4
-if the kernel is built with the
-.B CONFIG_DEVICE_PRIVATE
-option.
-Since Linux 5.19, the limit is further decreased by 1
-if the kernel is built with the
-.B CONFIG_PTE_MARKER
-option.
-.P
-Discard of swap pages was introduced in Linux 2.6.29,
-then made conditional
-on the
-.B SWAP_FLAG_DISCARD
-flag in Linux 2.6.36,
-.\" To be precise: 2.6.35.5
-which still discards the
-entire swap area when
-.BR swapon ()
-is called, even if that flag bit is not set.
-.SH SEE ALSO
-.BR mkswap (8),
-.BR swapoff (8),
-.BR swapon (8)
diff --git a/man2/symlink.2 b/man2/symlink.2
deleted file mode 100644
index 21ab4fa45..000000000
--- a/man2/symlink.2
+++ /dev/null
@@ -1,265 +0,0 @@
-.\" This manpage is Copyright (C) 1992 Drew Eckhardt;
-.\" and Copyright (C) 1993 Michael Haardt, Ian Jackson.
-.\" and Copyright (C) 2006, 2014 Michael Kerrisk
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Modified 1993-07-24 by Rik Faith
-.\" Modified 1996-04-26 by Nick Duffek <nsd@bbc.com>
-.\" Modified 1996-11-06 by Eric S. Raymond <esr@thyrsus.com>
-.\" Modified 1997-01-31 by Eric S. Raymond <esr@thyrsus.com>
-.\" Modified 2004-06-23 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.TH symlink 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-symlink, symlinkat \- make a new name for a file
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <unistd.h>
-.P
-.BI "int symlink(const char *" target ", const char *" linkpath );
-.P
-.BR "#include <fcntl.h> " "/* Definition of " AT_* " constants */"
-.B #include <unistd.h>
-.P
-.BI "int symlinkat(const char *" target ", int " newdirfd \
-", const char *" linkpath );
-.P
-.fi
-.RS -4
-Feature Test Macro Requirements for glibc (see
-.BR feature_test_macros (7)):
-.RE
-.P
-.BR symlink ():
-.nf
- _XOPEN_SOURCE >= 500 || _POSIX_C_SOURCE >= 200112L
-.\" || _XOPEN_SOURCE && _XOPEN_SOURCE_EXTENDED
- || /* glibc <= 2.19: */ _BSD_SOURCE
-.fi
-.P
-.BR symlinkat ():
-.nf
- Since glibc 2.10:
- _POSIX_C_SOURCE >= 200809L
- Before glibc 2.10:
- _ATFILE_SOURCE
-.fi
-.SH DESCRIPTION
-.BR symlink ()
-creates a symbolic link named
-.I linkpath
-which contains the string
-.IR target .
-.P
-Symbolic links are interpreted at run time as if the contents of the
-link had been substituted into the path being followed to find a file or
-directory.
-.P
-Symbolic links may contain
-.I ..
-path components, which (if used at the start of the link) refer to the
-parent directories of that in which the link resides.
-.P
-A symbolic link (also known as a soft link) may point to an existing
-file or to a nonexistent one; the latter case is known as a dangling
-link.
-.P
-The permissions of a symbolic link are irrelevant; the ownership is
-ignored when following the link
-(except when the
-.I protected_symlinks
-feature is enabled, as explained in
-.BR proc (5)),
-but is checked when removal or
-renaming of the link is requested and the link is in a directory with
-the sticky bit
-.RB ( S_ISVTX )
-set.
-.P
-If
-.I linkpath
-exists, it will
-.I not
-be overwritten.
-.SS symlinkat()
-The
-.BR symlinkat ()
-system call operates in exactly the same way as
-.BR symlink (),
-except for the differences described here.
-.P
-If the pathname given in
-.I linkpath
-is relative, then it is interpreted relative to the directory
-referred to by the file descriptor
-.I newdirfd
-(rather than relative to the current working directory of
-the calling process, as is done by
-.BR symlink ()
-for a relative pathname).
-.P
-If
-.I linkpath
-is relative and
-.I newdirfd
-is the special value
-.BR AT_FDCWD ,
-then
-.I linkpath
-is interpreted relative to the current working
-directory of the calling process (like
-.BR symlink ()).
-.P
-If
-.I linkpath
-is absolute, then
-.I newdirfd
-is ignored.
-.P
-See
-.BR openat (2)
-for an explanation of the need for
-.BR symlinkat ().
-.SH RETURN VALUE
-On success, zero is returned.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EACCES
-Write access to the directory containing
-.I linkpath
-is denied, or one of the directories in the path prefix of
-.I linkpath
-did not allow search permission.
-(See also
-.BR path_resolution (7).)
-.TP
-.B EBADF
-.RB ( symlinkat ())
-.I linkpath
-is relative but
-.I newdirfd
-is neither
-.B AT_FDCWD
-nor a valid file descriptor.
-.TP
-.B EDQUOT
-The user's quota of resources on the filesystem has been exhausted.
-The resources could be inodes or disk blocks, depending on the filesystem
-implementation.
-.TP
-.B EEXIST
-.I linkpath
-already exists.
-.TP
-.B EFAULT
-.IR target " or " linkpath " points outside your accessible address space."
-.TP
-.B EIO
-An I/O error occurred.
-.TP
-.B ELOOP
-Too many symbolic links were encountered in resolving
-.IR linkpath .
-.TP
-.B ENAMETOOLONG
-.IR target " or " linkpath " was too long."
-.TP
-.B ENOENT
-A directory component in
-.I linkpath
-does not exist or is a dangling symbolic link, or
-.I target
-or
-.I linkpath
-is an empty string.
-.TP
-.B ENOENT
-.RB ( symlinkat ())
-.I linkpath
-is a relative pathname and
-.I newdirfd
-refers to a directory that has been deleted.
-.TP
-.B ENOMEM
-Insufficient kernel memory was available.
-.TP
-.B ENOSPC
-The device containing the file has no room for the new directory
-entry.
-.TP
-.B ENOTDIR
-A component used as a directory in
-.I linkpath
-is not, in fact, a directory.
-.TP
-.B ENOTDIR
-.RB ( symlinkat ())
-.I linkpath
-is relative and
-.I newdirfd
-is a file descriptor referring to a file other than a directory.
-.TP
-.B EPERM
-The filesystem containing
-.I linkpath
-does not support the creation of symbolic links.
-.TP
-.B EROFS
-.I linkpath
-is on a read-only filesystem.
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-.TP
-.BR symlink ()
-SVr4, 4.3BSD, POSIX.1-2001.
-.\" SVr4 documents additional error codes EDQUOT and ENOSYS.
-.\" See
-.\" .BR open (2)
-.\" re multiple files with the same name, and NFS.
-.TP
-.BR symlinkat ()
-POSIX.1-2008.
-Linux 2.6.16,
-glibc 2.4.
-.SS glibc notes
-On older kernels where
-.BR symlinkat ()
-is unavailable, the glibc wrapper function falls back to the use of
-.BR symlink ().
-When
-.I linkpath
-is a relative pathname,
-glibc constructs a pathname based on the symbolic link in
-.I /proc/self/fd
-that corresponds to the
-.I newdirfd
-argument.
-.SH NOTES
-No checking of
-.I target
-is done.
-.P
-Deleting the name referred to by a symbolic link will actually delete the
-file (unless it also has other hard links).
-If this behavior is not desired, use
-.BR link (2).
-.SH SEE ALSO
-.BR ln (1),
-.BR namei (1),
-.BR lchown (2),
-.BR link (2),
-.BR lstat (2),
-.BR open (2),
-.BR readlink (2),
-.BR rename (2),
-.BR unlink (2),
-.BR path_resolution (7),
-.BR symlink (7)
diff --git a/man2/symlinkat.2 b/man2/symlinkat.2
deleted file mode 100644
index 78568cd0a..000000000
--- a/man2/symlinkat.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/symlink.2
diff --git a/man2/sync.2 b/man2/sync.2
deleted file mode 100644
index 1c0c7d7b4..000000000
--- a/man2/sync.2
+++ /dev/null
@@ -1,148 +0,0 @@
-.\" Copyright (c) 1992 Drew Eckhardt (drew@cs.colorado.edu), March 28, 1992
-.\" and Copyright (c) 2011 Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Modified by Michael Haardt <michael@moria.de>
-.\" Modified Sat Jul 24 12:02:47 1993 by Rik Faith <faith@cs.unc.edu>
-.\" Modified 15 Apr 1995 by Michael Chastain <mec@shell.portal.com>:
-.\" Added reference to `bdflush(2)'.
-.\" Modified 960414 by Andries Brouwer <aeb@cwi.nl>:
-.\" Added the fact that since 1.3.20 sync actually waits.
-.\" Modified Tue Oct 22 22:27:07 1996 by Eric S. Raymond <esr@thyrsus.com>
-.\" Modified 2001-10-10 by aeb, following Michael Kerrisk.
-.\" 2011-09-07, mtk, Added syncfs() documentation,
-.\"
-.TH sync 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-sync, syncfs \- commit filesystem caches to disk
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <unistd.h>
-.P
-.B void sync(void);
-.P
-.BI "int syncfs(int " fd );
-.fi
-.P
-.RS -4
-Feature Test Macro Requirements for glibc (see
-.BR feature_test_macros (7)):
-.RE
-.P
-.BR sync ():
-.nf
- _XOPEN_SOURCE >= 500
-.\" || _XOPEN_SOURCE && _XOPEN_SOURCE_EXTENDED
- || /* Since glibc 2.19: */ _DEFAULT_SOURCE
- || /* glibc <= 2.19: */ _BSD_SOURCE
-.fi
-.P
-.BR syncfs ():
-.nf
- _GNU_SOURCE
-.fi
-.SH DESCRIPTION
-.BR sync ()
-causes all pending modifications to filesystem metadata and cached file
-data to be written to the underlying filesystems.
-.P
-.BR syncfs ()
-is like
-.BR sync (),
-but synchronizes just the filesystem containing file
-referred to by the open file descriptor
-.IR fd .
-.SH RETURN VALUE
-.BR syncfs ()
-returns 0 on success;
-on error, it returns \-1 and sets
-.I errno
-to indicate the error.
-.SH ERRORS
-.BR sync ()
-is always successful.
-.P
-.BR syncfs ()
-can fail for at least the following reasons:
-.TP
-.B EBADF
-.I fd
-is not a valid file descriptor.
-.TP
-.B EIO
-An error occurred during synchronization.
-This error may relate to data written to any file on the filesystem, or on
-metadata related to the filesystem itself.
-.TP
-.B ENOSPC
-Disk space was exhausted while synchronizing.
-.TP
-.B ENOSPC
-.TQ
-.B EDQUOT
-Data was written to a file on NFS or another filesystem which does not
-allocate space at the time of a
-.BR write (2)
-system call, and some previous write failed due to insufficient
-storage space.
-.SH VERSIONS
-According to the standard specification (e.g., POSIX.1-2001),
-.BR sync ()
-schedules the writes, but may return before the actual
-writing is done.
-However Linux waits for I/O completions,
-and thus
-.BR sync ()
-or
-.BR syncfs ()
-provide the same guarantees as
-.BR fsync ()
-called on every file in
-the system or filesystem respectively.
-.SH STANDARDS
-.TP
-.BR sync ()
-POSIX.1-2008.
-.TP
-.BR syncfs ()
-Linux.
-.SH HISTORY
-.TP
-.BR sync ()
-POSIX.1-2001, SVr4, 4.3BSD.
-.TP
-.BR syncfs ()
-Linux 2.6.39,
-glibc 2.14.
-.P
-Since glibc 2.2.2, the Linux prototype for
-.BR sync ()
-is as listed above,
-following the various standards.
-In glibc 2.2.1 and earlier,
-it was "int sync(void)", and
-.BR sync ()
-always returned 0.
-.P
-In mainline kernel versions prior to Linux 5.8,
-.BR syncfs ()
-will fail only when passed a bad file descriptor
-.RB ( EBADF ).
-Since Linux 5.8,
-.\" commit 735e4ae5ba28c886d249ad04d3c8cc097dad6336
-.BR syncfs ()
-will also report an error if one or more inodes failed
-to be written back since the last
-.BR syncfs ()
-call.
-.SH BUGS
-Before Linux 1.3.20, Linux did not wait for I/O to complete
-before returning.
-.SH SEE ALSO
-.BR sync (1),
-.BR fdatasync (2),
-.BR fsync (2)
diff --git a/man2/sync_file_range.2 b/man2/sync_file_range.2
deleted file mode 100644
index 2f6fadbe9..000000000
--- a/man2/sync_file_range.2
+++ /dev/null
@@ -1,213 +0,0 @@
-.\" Copyright (c) 2006 Andrew Morton <akpm@osdl.org>
-.\" and Copyright 2006 Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" 2006-07-05 Initial creation, Michael Kerrisk based on
-.\" Andrew Morton's comments in fs/sync.c
-.\" 2010-10-09, mtk, Document sync_file_range2()
-.\"
-.TH sync_file_range 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-sync_file_range \- sync a file segment with disk
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#define _GNU_SOURCE" " /* See feature_test_macros(7) */"
-.B #define _FILE_OFFSET_BITS 64
-.B #include <fcntl.h>
-.P
-.BI "int sync_file_range(int " fd ", off_t " offset ", off_t " nbytes ,
-.BI " unsigned int " flags );
-.fi
-.SH DESCRIPTION
-.BR sync_file_range ()
-permits fine control when synchronizing the open file referred to by the
-file descriptor
-.I fd
-with disk.
-.P
-.I offset
-is the starting byte of the file range to be synchronized.
-.I nbytes
-specifies the length of the range to be synchronized, in bytes; if
-.I nbytes
-is zero, then all bytes from
-.I offset
-through to the end of file are synchronized.
-Synchronization is in units of the system page size:
-.I offset
-is rounded down to a page boundary;
-.I (offset+nbytes\-1)
-is rounded up to a page boundary.
-.P
-The
-.I flags
-bit-mask argument can include any of the following values:
-.TP
-.B SYNC_FILE_RANGE_WAIT_BEFORE
-Wait upon write-out of all pages in the specified range
-that have already been submitted to the device driver for write-out
-before performing any write.
-.TP
-.B SYNC_FILE_RANGE_WRITE
-Initiate write-out of all dirty pages in the specified
-range which are not presently submitted write-out.
-Note that even this may block if you attempt to
-write more than request queue size.
-.TP
-.B SYNC_FILE_RANGE_WAIT_AFTER
-Wait upon write-out of all pages in the range
-after performing any write.
-.P
-Specifying
-.I flags
-as 0 is permitted, as a no-op.
-.SS Warning
-This system call is extremely dangerous and should not be used in portable
-programs.
-None of these operations writes out the file's metadata.
-Therefore, unless the application is strictly performing overwrites of
-already-instantiated disk blocks, there are no guarantees that the data will
-be available after a crash.
-There is no user interface to know if a write is purely an overwrite.
-On filesystems using copy-on-write semantics (e.g.,
-.IR btrfs )
-an overwrite of existing allocated blocks is impossible.
-When writing into preallocated space,
-many filesystems also require calls into the block
-allocator, which this system call does not sync out to disk.
-This system call does not flush disk write caches and thus does not provide
-any data integrity on systems with volatile disk write caches.
-.SS Some details
-.B SYNC_FILE_RANGE_WAIT_BEFORE
-and
-.B SYNC_FILE_RANGE_WAIT_AFTER
-will detect any
-I/O errors or
-.B ENOSPC
-conditions and will return these to the caller.
-.P
-Useful combinations of the
-.I flags
-bits are:
-.TP
-.B SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE
-Ensures that all pages
-in the specified range which were dirty when
-.BR sync_file_range ()
-was called are placed
-under write-out.
-This is a start-write-for-data-integrity operation.
-.TP
-.B SYNC_FILE_RANGE_WRITE
-Start write-out of all dirty pages in the specified range which
-are not presently under write-out.
-This is an asynchronous flush-to-disk
-operation.
-This is not suitable for data integrity operations.
-.TP
-.BR SYNC_FILE_RANGE_WAIT_BEFORE " (or " SYNC_FILE_RANGE_WAIT_AFTER )
-Wait for
-completion of write-out of all pages in the specified range.
-This can be used after an earlier
-.B SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE
-operation to wait for completion of that operation, and obtain its result.
-.TP
-.B SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE | \
-SYNC_FILE_RANGE_WAIT_AFTER
-This is a write-for-data-integrity operation
-that will ensure that all pages in the specified range which were dirty when
-.BR sync_file_range ()
-was called are committed to disk.
-.SH RETURN VALUE
-On success,
-.BR sync_file_range ()
-returns 0; on failure \-1 is returned and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EBADF
-.I fd
-is not a valid file descriptor.
-.TP
-.B EINVAL
-.I flags
-specifies an invalid bit; or
-.I offset
-or
-.I nbytes
-is invalid.
-.TP
-.B EIO
-I/O error.
-.TP
-.B ENOMEM
-Out of memory.
-.TP
-.B ENOSPC
-Out of disk space.
-.TP
-.B ESPIPE
-.I fd
-refers to something other than a regular file, a block device, or
-a directory.
-.SH VERSIONS
-.SS sync_file_range2()
-Some architectures (e.g., PowerPC, ARM)
-need 64-bit arguments to be aligned in a suitable pair of registers.
-.\" See kernel commit edd5cd4a9424f22b0fa08bef5e299d41befd5622
-On such architectures, the call signature of
-.BR sync_file_range ()
-shown in the SYNOPSIS would force
-a register to be wasted as padding between the
-.I fd
-and
-.I offset
-arguments.
-(See
-.BR syscall (2)
-for details.)
-Therefore, these architectures define a different
-system call that orders the arguments suitably:
-.P
-.in +4n
-.EX
-.BI "int sync_file_range2(int " fd ", unsigned int " flags ,
-.BI " off_t " offset ", off_t " nbytes );
-.EE
-.in
-.P
-The behavior of this system call is otherwise exactly the same as
-.BR sync_file_range ().
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 2.6.17.
-.SS sync_file_range2()
-A system call with this signature first appeared on the ARM architecture
-in Linux 2.6.20, with the name
-.BR arm_sync_file_range ().
-It was renamed in Linux 2.6.22,
-when the analogous system call was added for PowerPC.
-On architectures where glibc support is provided,
-glibc transparently wraps
-.BR sync_file_range2 ()
-under the name
-.BR sync_file_range ().
-.SH NOTES
-.B _FILE_OFFSET_BITS
-should be defined to be 64 in code that takes the address of
-.BR sync_file_range ,
-if the code is intended to be portable
-to traditional 32-bit x86 and ARM platforms where
-.BR off_t 's
-width defaults to 32 bits.
-.SH SEE ALSO
-.BR fdatasync (2),
-.BR fsync (2),
-.BR msync (2),
-.BR sync (2)
diff --git a/man2/sync_file_range2.2 b/man2/sync_file_range2.2
deleted file mode 100644
index ad7a1e6c7..000000000
--- a/man2/sync_file_range2.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/sync_file_range.2
diff --git a/man2/syncfs.2 b/man2/syncfs.2
deleted file mode 100644
index 555579827..000000000
--- a/man2/syncfs.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/sync.2
diff --git a/man2/syscall.2 b/man2/syscall.2
deleted file mode 100644
index 82f02ebe1..000000000
--- a/man2/syscall.2
+++ /dev/null
@@ -1,367 +0,0 @@
-'\" t
-.\" Copyright (c) 1980, 1991, 1993
-.\" The Regents of the University of California. All rights reserved.
-.\"
-.\" SPDX-License-Identifier: BSD-4-Clause-UC
-.\"
-.\" @(#)syscall.2 8.1 (Berkeley) 6/16/93
-.\"
-.\"
-.\" 2002-03-20 Christoph Hellwig <hch@infradead.org>
-.\" - adopted for Linux
-.\" 2015-01-17, Kees Cook <keescook@chromium.org>
-.\" Added mips and arm64.
-.\"
-.TH syscall 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-syscall \- indirect system call
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */"
-.B #include <unistd.h>
-.P
-.BI "long syscall(long " number ", ...);"
-.fi
-.P
-.RS -4
-Feature Test Macro Requirements for glibc (see
-.BR feature_test_macros (7)):
-.RE
-.P
-.BR syscall ():
-.nf
- Since glibc 2.19:
- _DEFAULT_SOURCE
- Before glibc 2.19:
- _BSD_SOURCE || _SVID_SOURCE
-.fi
-.SH DESCRIPTION
-.BR syscall ()
-is a small library function that invokes
-the system call whose assembly language
-interface has the specified
-.I number
-with the specified arguments.
-Employing
-.BR syscall ()
-is useful, for example,
-when invoking a system call that has no wrapper function in the C library.
-.P
-.BR syscall ()
-saves CPU registers before making the system call,
-restores the registers upon return from the system call,
-and stores any error returned by the system call in
-.BR errno (3).
-.P
-Symbolic constants for system call numbers can be found in the header file
-.IR <sys/syscall.h> .
-.SH RETURN VALUE
-The return value is defined by the system call being invoked.
-In general, a 0 return value indicates success.
-A \-1 return value indicates an error,
-and an error number is stored in
-.IR errno .
-.SH ERRORS
-.TP
-.B ENOSYS
-The requested system call number is not implemented.
-.P
-Other errors are specific to the invoked system call.
-.SH NOTES
-.BR syscall ()
-first appeared in
-4BSD.
-.SS Architecture-specific requirements
-Each architecture ABI has its own requirements on how
-system call arguments are passed to the kernel.
-For system calls that have a glibc wrapper (e.g., most system calls),
-glibc handles the details of copying arguments to the right registers
-in a manner suitable for the architecture.
-However, when using
-.BR syscall ()
-to make a system call,
-the caller might need to handle architecture-dependent details;
-this requirement is most commonly encountered on certain 32-bit architectures.
-.P
-For example, on the ARM architecture Embedded ABI (EABI), a
-64-bit value (e.g.,
-.IR "long long" )
-must be aligned to an even register pair.
-Thus, using
-.BR syscall ()
-instead of the wrapper provided by glibc,
-the
-.BR readahead (2)
-system call would be invoked as follows on the ARM architecture with the EABI
-in little endian mode:
-.P
-.in +4n
-.EX
-syscall(SYS_readahead, fd, 0,
- (unsigned int) (offset & 0xFFFFFFFF),
- (unsigned int) (offset >> 32),
- count);
-.EE
-.in
-.P
-Since the offset argument is 64 bits, and the first argument
-.RI ( fd )
-is passed in
-.IR r0 ,
-the caller must manually split and align the 64-bit value
-so that it is passed in the
-.IR r2 / r3
-register pair.
-That means inserting a dummy value into
-.I r1
-(the second argument of 0).
-Care also must be taken so that the split follows endian conventions
-(according to the C ABI for the platform).
-.P
-Similar issues can occur on MIPS with the O32 ABI,
-on PowerPC and parisc with the 32-bit ABI, and on Xtensa.
-.\" Mike Frysinger: this issue ends up forcing MIPS
-.\" O32 to take 7 arguments to syscall()
-.P
-.\" See arch/parisc/kernel/sys_parisc.c.
-Note that while the parisc C ABI also uses aligned register pairs,
-it uses a shim layer to hide the issue from user space.
-.P
-The affected system calls are
-.BR fadvise64_64 (2),
-.BR ftruncate64 (2),
-.BR posix_fadvise (2),
-.BR pread64 (2),
-.BR pwrite64 (2),
-.BR readahead (2),
-.BR sync_file_range (2),
-and
-.BR truncate64 (2).
-.P
-.\" You need to look up the syscalls directly in the kernel source to see if
-.\" they should be in this list. For example, look at fs/read_write.c and
-.\" the function signatures that do:
-.\" ..., unsigned long, pos_l, unsigned long, pos_h, ...
-.\" If they use off_t, then they most likely do not belong in this list.
-This does not affect syscalls that manually split and assemble 64-bit values
-such as
-.BR _llseek (2),
-.BR preadv (2),
-.BR preadv2 (2),
-.BR pwritev (2),
-and
-.BR pwritev2 (2).
-Welcome to the wonderful world of historical baggage.
-.SS Architecture calling conventions
-Every architecture has its own way of invoking and passing arguments to the
-kernel.
-The details for various architectures are listed in the two tables below.
-.P
-The first table lists the instruction used to transition to kernel mode
-(which might not be the fastest or best way to transition to the kernel,
-so you might have to refer to
-.BR vdso (7)),
-the register used to indicate the system call number,
-the register(s) used to return the system call result,
-and the register used to signal an error.
-.if t \{\
-.ft CW
-\}
-.TS
-l2 l2 l2 l2 l1 l2 l.
-Arch/ABI Instruction System Ret Ret Error Notes
- call # val val2
-_
-alpha callsys v0 v0 a4 a3 1, 6
-arc trap0 r8 r0 - -
-arm/OABI swi NR - r0 - - 2
-arm/EABI swi 0x0 r7 r0 r1 -
-arm64 svc #0 w8 x0 x1 -
-blackfin excpt 0x0 P0 R0 - -
-i386 int $0x80 eax eax edx -
-ia64 break 0x100000 r15 r8 r9 r10 1, 6
-loongarch syscall 0 a7 a0 - -
-m68k trap #0 d0 d0 - -
-microblaze brki r14,8 r12 r3 - -
-mips syscall v0 v0 v1 a3 1, 6
-nios2 trap r2 r2 - r7
-parisc ble 0x100(%sr2, %r0) r20 r28 - -
-powerpc sc r0 r3 - r0 1
-powerpc64 sc r0 r3 - cr0.SO 1
-riscv ecall a7 a0 a1 -
-s390 svc 0 r1 r2 r3 - 3
-s390x svc 0 r1 r2 r3 - 3
-superh trapa #31 r3 r0 r1 - 4, 6
-sparc/32 t 0x10 g1 o0 o1 psr/csr 1, 6
-sparc/64 t 0x6d g1 o0 o1 psr/csr 1, 6
-tile swint1 R10 R00 - R01 1
-x86-64 syscall rax rax rdx - 5
-x32 syscall rax rax rdx - 5
-xtensa syscall a2 a2 - -
-.TE
-.P
-Notes:
-.IP \[bu] 3
-On a few architectures,
-a register is used as a boolean
-(0 indicating no error, and \-1 indicating an error) to signal that the
-system call failed.
-The actual error value is still contained in the return register.
-On sparc, the carry bit
-.RI ( csr )
-in the processor status register
-.RI ( psr )
-is used instead of a full register.
-On powerpc64, the summary overflow bit
-.RI ( SO )
-in field 0 of the condition register
-.RI ( cr0 )
-is used.
-.IP \[bu]
-.I NR
-is the system call number.
-.IP \[bu]
-For s390 and s390x,
-.I NR
-(the system call number) may be passed directly with
-.I "svc\ NR"
-if it is less than 256.
-.IP \[bu]
-On SuperH additional trap numbers are supported for historic reasons, but
-.BR trapa #31
-is the recommended "unified" ABI.
-.IP \[bu]
-The x32 ABI shares syscall table with x86-64 ABI, but there are some
-nuances:
-.RS
-.IP \[bu] 3
-In order to indicate that a system call is called under the x32 ABI,
-an additional bit,
-.BR __X32_SYSCALL_BIT ,
-is bitwise ORed with the system call number.
-The ABI used by a process affects some process behaviors,
-including signal handling or system call restarting.
-.IP \[bu]
-Since x32 has different sizes for
-.I long
-and pointer types, layouts of some (but not all;
-.I struct timeval
-or
-.I struct rlimit
-are 64-bit, for example) structures are different.
-In order to handle this,
-additional system calls are added to the system call table,
-starting from number 512
-(without the
-.BR __X32_SYSCALL_BIT ).
-For example,
-.B __NR_readv
-is defined as 19 for the x86-64 ABI and as
-.IR __X32_SYSCALL_BIT " | " \fB515\fP
-for the x32 ABI.
-Most of these additional system calls are actually identical
-to the system calls used for providing i386 compat.
-There are some notable exceptions, however, such as
-.BR preadv2 (2),
-which uses
-.I struct iovec
-entities with 4-byte pointers and sizes ("compat_iovec" in kernel terms),
-but passes an 8-byte
-.I pos
-argument in a single register and not two, as is done in every other ABI.
-.RE
-.IP \[bu]
-Some architectures
-(namely, Alpha, IA-64, MIPS, SuperH, sparc/32, and sparc/64)
-use an additional register ("Retval2" in the above table)
-to pass back a second return value from the
-.BR pipe (2)
-system call;
-Alpha uses this technique in the architecture-specific
-.BR getxpid (2),
-.BR getxuid (2),
-and
-.BR getxgid (2)
-system calls as well.
-Other architectures do not use the second return value register
-in the system call interface, even if it is defined in the System V ABI.
-.if t \{\
-.in
-.ft P
-\}
-.P
-The second table shows the registers used to pass the system call arguments.
-.if t \{\
-.ft CW
-\}
-.TS
-l l2 l2 l2 l2 l2 l2 l2 l.
-Arch/ABI arg1 arg2 arg3 arg4 arg5 arg6 arg7 Notes
-_
-alpha a0 a1 a2 a3 a4 a5 -
-arc r0 r1 r2 r3 r4 r5 -
-arm/OABI r0 r1 r2 r3 r4 r5 r6
-arm/EABI r0 r1 r2 r3 r4 r5 r6
-arm64 x0 x1 x2 x3 x4 x5 -
-blackfin R0 R1 R2 R3 R4 R5 -
-i386 ebx ecx edx esi edi ebp -
-ia64 out0 out1 out2 out3 out4 out5 -
-loongarch a0 a1 a2 a3 a4 a5 a6
-m68k d1 d2 d3 d4 d5 a0 -
-microblaze r5 r6 r7 r8 r9 r10 -
-mips/o32 a0 a1 a2 a3 - - - 1
-mips/n32,64 a0 a1 a2 a3 a4 a5 -
-nios2 r4 r5 r6 r7 r8 r9 -
-parisc r26 r25 r24 r23 r22 r21 -
-powerpc r3 r4 r5 r6 r7 r8 r9
-powerpc64 r3 r4 r5 r6 r7 r8 -
-riscv a0 a1 a2 a3 a4 a5 -
-s390 r2 r3 r4 r5 r6 r7 -
-s390x r2 r3 r4 r5 r6 r7 -
-superh r4 r5 r6 r7 r0 r1 r2
-sparc/32 o0 o1 o2 o3 o4 o5 -
-sparc/64 o0 o1 o2 o3 o4 o5 -
-tile R00 R01 R02 R03 R04 R05 -
-x86-64 rdi rsi rdx r10 r8 r9 -
-x32 rdi rsi rdx r10 r8 r9 -
-xtensa a6 a3 a4 a5 a8 a9 -
-.TE
-.P
-Notes:
-.IP \[bu] 3
-The mips/o32 system call convention passes
-arguments 5 through 8 on the user stack.
-.if t \{\
-.in
-.ft P
-\}
-.P
-Note that these tables don't cover the entire calling convention\[em]some
-architectures may indiscriminately clobber other registers not listed here.
-.SH EXAMPLES
-.\" SRC BEGIN (syscall.c)
-.EX
-#define _GNU_SOURCE
-#include <signal.h>
-#include <sys/syscall.h>
-#include <unistd.h>
-\&
-int
-main(void)
-{
- pid_t tid;
-\&
- tid = syscall(SYS_gettid);
- syscall(SYS_tgkill, getpid(), tid, SIGHUP);
-}
-.EE
-.\" SRC END
-.SH SEE ALSO
-.BR _syscall (2),
-.BR intro (2),
-.BR syscalls (2),
-.BR errno (3),
-.BR vdso (7)
diff --git a/man2/syscalls.2 b/man2/syscalls.2
deleted file mode 100644
index 7a7d6d730..000000000
--- a/man2/syscalls.2
+++ /dev/null
@@ -1,1172 +0,0 @@
-'\" t
-.\" Copyright (C) 2007 Michael Kerrisk <mtk.manpages@gmail.com>
-.\" with some input from Stepan Kasal <kasal@ucw.cz>
-.\"
-.\" Some content retained from an earlier version of this page:
-.\" Copyright (C) 1998 Andries Brouwer (aeb@cwi.nl)
-.\" Modifications for 2.2 and 2.4 Copyright (C) 2002 Ian Redfern
-.\" <redferni@logica.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.TH syscalls 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-syscalls \- Linux system calls
-.SH SYNOPSIS
-.nf
-Linux system calls.
-.fi
-.SH DESCRIPTION
-The system call is the fundamental interface between an application
-and the Linux kernel.
-.SS System calls and library wrapper functions
-System calls are generally not invoked directly,
-but rather via wrapper functions in glibc (or perhaps some other library).
-For details of direct invocation of a system call, see
-.BR intro (2).
-Often, but not always, the name of the wrapper function is the same
-as the name of the system call that it invokes.
-For example, glibc contains a function
-.BR chdir ()
-which invokes the underlying "chdir" system call.
-.P
-Often the glibc wrapper function is quite thin, doing little work
-other than copying arguments to the right registers
-before invoking the system call,
-and then setting
-.I errno
-appropriately after the system call has returned.
-(These are the same steps that are performed by
-.BR syscall (2),
-which can be used to invoke system calls
-for which no wrapper function is provided.)
-Note: system calls indicate a failure by returning a negative error
-number to the caller on architectures without a separate error register/flag,
-as noted in
-.BR syscall (2);
-when this happens,
-the wrapper function negates the returned error number
-(to make it positive), copies it to
-.IR errno ,
-and returns \-1 to the caller of the wrapper.
-.P
-Sometimes, however, the wrapper function does some extra work
-before invoking the system call.
-For example, nowadays there are (for reasons described below) two
-related system calls,
-.BR truncate (2)
-and
-.BR truncate64 (2),
-and the glibc
-.BR truncate ()
-wrapper function checks which of those system calls
-are provided by the kernel and determines which should be employed.
-.SS System call list
-Below is a list of the Linux system calls.
-In the list, the
-.I Kernel
-column indicates the kernel version
-for those system calls that were new in Linux 2.2,
-or have appeared since that kernel version.
-Note the following points:
-.IP \[bu] 3
-Where no kernel version is indicated,
-the system call appeared in Linux 1.0 or earlier.
-.IP \[bu]
-Where a system call is marked "1.2"
-this means the system call probably appeared in a Linux 1.1.x kernel version,
-and first appeared in a stable kernel with 1.2.
-(Development of the Linux 1.2 kernel was initiated from a branch of
-Linux 1.0.6 via the Linux 1.1.x unstable kernel series.)
-.IP \[bu]
-Where a system call is marked "2.0"
-this means the system call probably appeared in a Linux 1.3.x kernel version,
-and first appeared in a stable kernel with Linux 2.0.
-(Development of the Linux 2.0 kernel was initiated from a branch of
-Linux 1.2.x, somewhere around Linux 1.2.10,
-via the Linux 1.3.x unstable kernel series.)
-.\" Was Linux 2.0 started from a branch of Linux 1.2.10?
-.\" At least from the timestamps of the tarballs of
-.\" of Linux 1.2.10 and Linux 1.3.0, that's how it looks, but in
-.\" fact the diff doesn't seem very clear, the
-.\" Linux 1.3.0 .tar.bz is much bigger (2.0 MB) than the
-.\" Linux 1.2.10 .tar.bz2 (1.8 MB), and AEB points out the
-.\" timestamps of some files in Linux 1.3.0 seem to be older
-.\" than those in Linux 1.2.10. All of this suggests
-.\" that there might not have been a clean branch point.
-.IP \[bu]
-Where a system call is marked "2.2"
-this means the system call probably appeared in a Linux 2.1.x kernel version,
-and first appeared in a stable kernel with Linux 2.2.0.
-(Development of the Linux 2.2 kernel was initiated from a branch of
-Linux 2.0.21 via the Linux 2.1.x unstable kernel series.)
-.IP \[bu]
-Where a system call is marked "2.4"
-this means the system call probably appeared in a Linux 2.3.x kernel version,
-and first appeared in a stable kernel with Linux 2.4.0.
-(Development of the Linux 2.4 kernel was initiated from a branch of
-Linux 2.2.8 via the Linux 2.3.x unstable kernel series.)
-.IP \[bu]
-Where a system call is marked "2.6"
-this means the system call probably appeared in a Linux 2.5.x kernel version,
-and first appeared in a stable kernel with Linux 2.6.0.
-(Development of Linux 2.6 was initiated from a branch
-of Linux 2.4.15 via the Linux 2.5.x unstable kernel series.)
-.IP \[bu]
-Starting with Linux 2.6.0, the development model changed,
-and new system calls may appear in each Linux 2.6.x release.
-In this case, the exact version number where the system call appeared
-is shown.
-This convention continues with the Linux 3.x kernel series,
-which followed on from Linux 2.6.39; and the Linux 4.x kernel series,
-which followed on from Linux 3.19; and the Linux 5.x kernel series,
-which followed on from Linux 4.20; and the Linux 6.x kernel series,
-which followed on from Linux 5.19.
-.IP \[bu]
-In some cases, a system call was added to a stable kernel
-series after it branched from the previous stable kernel
-series, and then backported into the earlier stable kernel series.
-For example some system calls that appeared in Linux 2.6.x were also backported
-into a Linux 2.4.x release after Linux 2.4.15.
-When this is so, the version where the system call appeared
-in both of the major kernel series is listed.
-.P
-The list of system calls that are available as at Linux 5.14
-(or in a few cases only on older kernels) is as follows:
-.P
-.\"
-.\" Looking at scripts/checksyscalls.sh in the kernel source is
-.\" instructive about x86 specifics.
-.\"
-.TS
-Lb Lb Lb
-L2 L L.
-System call Kernel Notes
-_
-\fB_llseek\fP(2) 1.2
-\fB_newselect\fP(2) 2.0
-\fB_sysctl\fP(2) 2.0 Removed in 5.5
-\fBaccept\fP(2) 2.0 T{
-See notes on \fBsocketcall\fP(2)
-T}
-\fBaccept4\fP(2) 2.6.28
-\fBaccess\fP(2) 1.0
-\fBacct\fP(2) 1.0
-\fBadd_key\fP(2) 2.6.10
-\fBadjtimex\fP(2) 1.0
-\fBalarm\fP(2) 1.0
-\fBalloc_hugepages\fP(2) 2.5.36 Removed in 2.5.44
-.\" 4adeefe161a74369e44cc8e663f240ece0470dc3
-\fBarc_gettls\fP(2) 3.9 ARC only
-\fBarc_settls\fP(2) 3.9 ARC only
-.\" 91e040a79df73d371f70792f30380d4e44805250
-\fBarc_usr_cmpxchg\fP(2) 4.9 ARC only
-.\" x86: 79170fda313ed5be2394f87aa2a00d597f8ed4a1
-\fBarch_prctl\fP(2) 2.6 T{
-x86_64, x86 since 4.12
-T}
-.\" 9674cdc74d63f346870943ef966a034f8c71ee57
-\fBatomic_barrier\fP(2) 2.6.34 m68k only
-\fBatomic_cmpxchg_32\fP(2) 2.6.34 m68k only
-\fBbdflush\fP(2) 1.2 T{
-Deprecated (does nothing)
-since 2.6
-T}
-\fBbind\fP(2) 2.0 T{
-See notes on \fBsocketcall\fP(2)
-T}
-\fBbpf\fP(2) 3.18
-\fBbrk\fP(2) 1.0
-\fBbreakpoint\fP(2) 2.2 T{
-ARM OABI only, defined with
-\fB__ARM_NR\fP prefix
-T}
-\fBcacheflush\fP(2) 1.2 Not on x86
-\fBcapget\fP(2) 2.2
-\fBcapset\fP(2) 2.2
-\fBchdir\fP(2) 1.0
-\fBchmod\fP(2) 1.0
-\fBchown\fP(2) 2.2 T{
-See \fBchown\fP(2) for
-version details
-T}
-\fBchown32\fP(2) 2.4
-\fBchroot\fP(2) 1.0
-\fBclock_adjtime\fP(2) 2.6.39
-\fBclock_getres\fP(2) 2.6
-\fBclock_gettime\fP(2) 2.6
-\fBclock_nanosleep\fP(2) 2.6
-\fBclock_settime\fP(2) 2.6
-\fBclone2\fP(2) 2.4 IA-64 only
-\fBclone\fP(2) 1.0
-\fBclone3\fP(2) 5.3
-\fBclose\fP(2) 1.0
-\fBclose_range\fP(2) 5.9
-.\" .\" dcef1f634657dabe7905af3ccda12cf7f0b6fcc1
-.\" .\" cc20d42986d5807cbe4f5c7c8e3dab2e59ea0db3
-.\" .\" db695c0509d6ec9046ee5e4c520a19fa17d9fce2
-.\" \fBcmpxchg\fP(2) 2.6.12 T{
-.\" ARM, syscall constant never was
-.\" exposed to user space, in-kernel
-.\" definition had \fB__ARM_NR\fP prefix,
-.\" removed in 4.4
-.\" T}
-.\" 867e359b97c970a60626d5d76bbe2a8fadbf38fb
-.\" bb9d812643d8a121df7d614a2b9c60193a92deb0
-\fBconnect\fP(2) 2.0 T{
-See notes on \fBsocketcall\fP(2)
-T}
-\fBcopy_file_range\fP(2) 4.5
-\fBcreat\fP(2) 1.0
-\fBcreate_module\fP(2) 1.0 Removed in 2.6
-\fBdelete_module\fP(2) 1.0
-.\" 1394f03221790a988afc3e4b3cb79f2e477246a9
-.\" 4ba66a9760722ccbb691b8f7116cad2f791cca7b
-\fBdup\fP(2) 1.0
-\fBdup2\fP(2) 1.0
-\fBdup3\fP(2) 2.6.27
-\fBepoll_create\fP(2) 2.6
-\fBepoll_create1\fP(2) 2.6.27
-\fBepoll_ctl\fP(2) 2.6
-\fBepoll_pwait\fP(2) 2.6.19
-\fBepoll_pwait2\fP(2) 5.11
-\fBepoll_wait\fP(2) 2.6
-\fBeventfd\fP(2) 2.6.22
-\fBeventfd2\fP(2) 2.6.27
-\fBexecv\fP(2) 2.0 T{
-SPARC/SPARC64 only, for
-compatibility with SunOS
-T}
-\fBexecve\fP(2) 1.0
-\fBexecveat\fP(2) 3.19
-\fBexit\fP(2) 1.0
-\fBexit_group\fP(2) 2.6
-\fBfaccessat\fP(2) 2.6.16
-\fBfaccessat2\fP(2) 5.8
-\fBfadvise64\fP(2) 2.6
-.\" Implements \fBposix_fadvise\fP(2)
-\fBfadvise64_64\fP(2) 2.6
-\fBfallocate\fP(2) 2.6.23
-\fBfanotify_init\fP(2) 2.6.37
-\fBfanotify_mark\fP(2) 2.6.37
-.\" The fanotify calls were added in Linux 2.6.36,
-.\" but disabled while the API was finalized.
-\fBfchdir\fP(2) 1.0
-\fBfchmod\fP(2) 1.0
-\fBfchmodat\fP(2) 2.6.16
-\fBfchown\fP(2) 1.0
-\fBfchown32\fP(2) 2.4
-\fBfchownat\fP(2) 2.6.16
-\fBfcntl\fP(2) 1.0
-\fBfcntl64\fP(2) 2.4
-\fBfdatasync\fP(2) 2.0
-\fBfgetxattr\fP(2) 2.6; 2.4.18
-\fBfinit_module\fP(2) 3.8
-\fBflistxattr\fP(2) 2.6; 2.4.18
-\fBflock\fP(2) 2.0
-\fBfork\fP(2) 1.0
-\fBfree_hugepages\fP(2) 2.5.36 Removed in 2.5.44
-\fBfremovexattr\fP(2) 2.6; 2.4.18
-\fBfsconfig\fP(2) 5.2
-\fBfsetxattr\fP(2) 2.6; 2.4.18
-\fBfsmount\fP(2) 5.2
-\fBfsopen\fP(2) 5.2
-\fBfspick\fP(2) 5.2
-\fBfstat\fP(2) 1.0
-\fBfstat64\fP(2) 2.4
-\fBfstatat64\fP(2) 2.6.16
-\fBfstatfs\fP(2) 1.0
-\fBfstatfs64\fP(2) 2.6
-\fBfsync\fP(2) 1.0
-\fBftruncate\fP(2) 1.0
-\fBftruncate64\fP(2) 2.4
-\fBfutex\fP(2) 2.6
-\fBfutimesat\fP(2) 2.6.16
-\fBget_kernel_syms\fP(2) 1.0 Removed in 2.6
-\fBget_mempolicy\fP(2) 2.6.6
-\fBget_robust_list\fP(2) 2.6.17
-\fBget_thread_area\fP(2) 2.6
-.\" 8fcd6c45f5a65621ec809b7866a3623e9a01d4ed
-\fBget_tls\fP(2) 4.15 T{
-ARM OABI only, has
-\fB__ARM_NR\fP prefix
-T}
-\fBgetcpu\fP(2) 2.6.19
-\fBgetcwd\fP(2) 2.2
-\fBgetdents\fP(2) 2.0
-\fBgetdents64\fP(2) 2.4
-.\" parisc: 863722e856e64dae0e252b6bb546737c6c5626ce
-\fBgetdomainname\fP(2) 2.2 T{
-SPARC, SPARC64; available
-as \fBosf_getdomainname\fP(2)
-on Alpha since Linux 2.0
-T}
-.\" ec98c6b9b47df6df1c1fa6cf3d427414f8c2cf16
-\fBgetdtablesize\fP(2) 2.0 T{
-SPARC (removed in 2.6.26),
-available on Alpha as
-\fBosf_getdtablesize\fP(2)
-T}
-\fBgetegid\fP(2) 1.0
-\fBgetegid32\fP(2) 2.4
-\fBgeteuid\fP(2) 1.0
-\fBgeteuid32\fP(2) 2.4
-\fBgetgid\fP(2) 1.0
-\fBgetgid32\fP(2) 2.4
-\fBgetgroups\fP(2) 1.0
-\fBgetgroups32\fP(2) 2.4
-.\" SPARC removal: ec98c6b9b47df6df1c1fa6cf3d427414f8c2cf16
-\fBgethostname\fP(2) 2.0 T{
-Alpha, was available on
-SPARC up to Linux 2.6.26
-T}
-\fBgetitimer\fP(2) 1.0
-\fBgetpeername\fP(2) 2.0 T{
-See notes on \fBsocketcall\fP(2)
-T}
-\fBgetpagesize\fP(2) 2.0 T{
-Alpha, SPARC/SPARC64 only
-T}
-\fBgetpgid\fP(2) 1.0
-\fBgetpgrp\fP(2) 1.0
-\fBgetpid\fP(2) 1.0
-\fBgetppid\fP(2) 1.0
-\fBgetpriority\fP(2) 1.0
-\fBgetrandom\fP(2) 3.17
-\fBgetresgid\fP(2) 2.2
-\fBgetresgid32\fP(2) 2.4
-\fBgetresuid\fP(2) 2.2
-\fBgetresuid32\fP(2) 2.4
-\fBgetrlimit\fP(2) 1.0
-\fBgetrusage\fP(2) 1.0
-\fBgetsid\fP(2) 2.0
-\fBgetsockname\fP(2) 2.0 T{
-See notes on \fBsocketcall\fP(2)
-T}
-\fBgetsockopt\fP(2) 2.0 T{
-See notes on \fBsocketcall\fP(2)
-T}
-\fBgettid\fP(2) 2.4.11
-\fBgettimeofday\fP(2) 1.0
-\fBgetuid\fP(2) 1.0
-\fBgetuid32\fP(2) 2.4
-\fBgetunwind\fP(2) 2.4.8 T{
-IA-64 only; deprecated
-T}
-\fBgetxattr\fP(2) 2.6; 2.4.18
-\fBgetxgid\fP(2) 2.0 T{
-Alpha only; see NOTES
-T}
-\fBgetxpid\fP(2) 2.0 T{
-Alpha only; see NOTES
-T}
-\fBgetxuid\fP(2) 2.0 T{
-Alpha only; see NOTES
-T}
-\fBinit_module\fP(2) 1.0
-\fBinotify_add_watch\fP(2) 2.6.13
-\fBinotify_init\fP(2) 2.6.13
-\fBinotify_init1\fP(2) 2.6.27
-\fBinotify_rm_watch\fP(2) 2.6.13
-\fBio_cancel\fP(2) 2.6
-\fBio_destroy\fP(2) 2.6
-\fBio_getevents\fP(2) 2.6
-\fBio_pgetevents\fP(2) 4.18
-\fBio_setup\fP(2) 2.6
-\fBio_submit\fP(2) 2.6
-\fBio_uring_enter\fP(2) 5.1
-\fBio_uring_register\fP(2) 5.1
-\fBio_uring_setup\fP(2) 5.1
-\fBioctl\fP(2) 1.0
-\fBioperm\fP(2) 1.0
-\fBiopl\fP(2) 1.0
-\fBioprio_get\fP(2) 2.6.13
-\fBioprio_set\fP(2) 2.6.13
-\fBipc\fP(2) 1.0
-.\" Implements System V IPC calls
-\fBkcmp\fP(2) 3.5
-\fBkern_features\fP(2) 3.7 SPARC64 only
-.\" FIXME . document kern_features():
-.\" commit 517ffce4e1a03aea979fe3a18a3dd1761a24fafb
-\fBkexec_file_load\fP(2) 3.17
-\fBkexec_load\fP(2) 2.6.13
-.\" The entry in the syscall table was reserved starting in 2.6.7
-.\" Was named sys_kexec_load() from 2.6.7 to 2.6.16
-\fBkeyctl\fP(2) 2.6.10
-\fBkill\fP(2) 1.0
-\fBlandlock_add_rule\fP(2) 5.13
-\fBlandlock_create_ruleset\fP(2) 5.13
-\fBlandlock_restrict_self\fP(2) 5.13
-\fBlchown\fP(2) 1.0 T{
-See \fBchown\fP(2) for
-version details
-T}
-\fBlchown32\fP(2) 2.4
-\fBlgetxattr\fP(2) 2.6; 2.4.18
-\fBlink\fP(2) 1.0
-\fBlinkat\fP(2) 2.6.16
-\fBlisten\fP(2) 2.0 T{
-See notes on \fBsocketcall\fP(2)
-T}
-\fBlistxattr\fP(2) 2.6; 2.4.18
-\fBllistxattr\fP(2) 2.6; 2.4.18
-\fBlookup_dcookie\fP(2) 2.6
-\fBlremovexattr\fP(2) 2.6; 2.4.18
-\fBlseek\fP(2) 1.0
-\fBlsetxattr\fP(2) 2.6; 2.4.18
-\fBlstat\fP(2) 1.0
-\fBlstat64\fP(2) 2.4
-\fBmadvise\fP(2) 2.4
-\fBmbind\fP(2) 2.6.6
-\fBmemory_ordering\fP(2) 2.2 SPARC64 only
-.\" 26025bbfbba33a9425be1b89eccb4664ea4c17b6
-.\" bb6fb6dfcc17cddac11ac295861f7608194447a7
-\fBmembarrier\fP(2) 3.17
-\fBmemfd_create\fP(2) 3.17
-\fBmemfd_secret\fP(2) 5.14
-\fBmigrate_pages\fP(2) 2.6.16
-\fBmincore\fP(2) 2.4
-\fBmkdir\fP(2) 1.0
-\fBmkdirat\fP(2) 2.6.16
-\fBmknod\fP(2) 1.0
-\fBmknodat\fP(2) 2.6.16
-\fBmlock\fP(2) 2.0
-\fBmlock2\fP(2) 4.4
-\fBmlockall\fP(2) 2.0
-\fBmmap\fP(2) 1.0
-\fBmmap2\fP(2) 2.4
-\fBmodify_ldt\fP(2) 1.0
-\fBmount\fP(2) 1.0
-\fBmove_mount\fP(2) 5.2
-\fBmove_pages\fP(2) 2.6.18
-\fBmprotect\fP(2) 1.0
-\fBmq_getsetattr\fP(2) 2.6.6
-.\" Implements \fBmq_getattr\fP(3) and \fBmq_setattr\fP(3)
-\fBmq_notify\fP(2) 2.6.6
-\fBmq_open\fP(2) 2.6.6
-\fBmq_timedreceive\fP(2) 2.6.6
-\fBmq_timedsend\fP(2) 2.6.6
-\fBmq_unlink\fP(2) 2.6.6
-\fBmremap\fP(2) 2.0
-\fBmsgctl\fP(2) 2.0 T{
-See notes on \fBipc\fP(2)
-T}
-\fBmsgget\fP(2) 2.0 T{
-See notes on \fBipc\fP(2)
-T}
-\fBmsgrcv\fP(2) 2.0 T{
-See notes on \fBipc\fP(2)
-T}
-\fBmsgsnd\fP(2) 2.0 T{
-See notes on \fBipc\fP(2)
-T}
-\fBmsync\fP(2) 2.0
-.\" \fBmultiplexer\fP(2) ?? __NR_multiplexer reserved on
-.\" PowerPC, but unimplemented?
-\fBmunlock\fP(2) 2.0
-\fBmunlockall\fP(2) 2.0
-\fBmunmap\fP(2) 1.0
-\fBname_to_handle_at\fP(2) 2.6.39
-\fBnanosleep\fP(2) 2.0
-.\" 5590ff0d5528b60153c0b4e7b771472b5a95e297
-\fBnewfstatat\fP(2) 2.6.16 T{
-See \fBstat\fP(2)
-T}
-\fBnfsservctl\fP(2) 2.2 Removed in 3.1
-\fBnice\fP(2) 1.0
-\fBold_adjtimex\fP(2) 2.0 T{
-Alpha only; see NOTES
-T}
-\fBold_getrlimit\fP(2) 2.4 T{
-Old variant of \fBgetrlimit\fP(2)
-that used a different value
-for \fBRLIM_INFINITY\fP
-T}
-\fBoldfstat\fP(2) 1.0
-\fBoldlstat\fP(2) 1.0
-\fBoldolduname\fP(2) 1.0
-\fBoldstat\fP(2) 1.0
-\fBoldumount\fP(2) 2.4.116 T{
-Name of the old \fBumount\fP(2)
-syscall on Alpha
-T}
-\fBolduname\fP(2) 1.0
-\fBopen\fP(2) 1.0
-\fBopen_by_handle_at\fP(2) 2.6.39
-\fBopen_tree\fP(2) 5.2
-\fBopenat\fP(2) 2.6.16
-\fBopenat2\fP(2) 5.6
-.\" 9d02a4283e9ce4e9ca11ff00615bdacdb0515a1a
-\fBor1k_atomic\fP(2) 3.1 T{
-OpenRISC 1000 only
-T}
-\fBpause\fP(2) 1.0
-\fBpciconfig_iobase\fP(2) 2.2.15; 2.4 Not on x86
-.\" Alpha, PowerPC, ARM; not x86
-\fBpciconfig_read\fP(2) 2.0.26; 2.2 Not on x86
-.\" , PowerPC, ARM; not x86
-\fBpciconfig_write\fP(2) 2.0.26; 2.2 Not on x86
-.\" , PowerPC, ARM; not x86
-\fBperf_event_open\fP(2) 2.6.31 T{
-Was perf_counter_open() in
-2.6.31; renamed in 2.6.32
-T}
-\fBpersonality\fP(2) 1.2
-\fBperfctr\fP(2) 2.2 T{
-SPARC only; removed in 2.6.34
-T}
-.\" commit c7d5a0050773e98d1094eaa9f2a1a793fafac300 removed perfctr()
-\fBperfmonctl\fP(2) 2.4 IA-64 only; removed in 5.10
-\fBpidfd_getfd\fP(2) 5.6
-\fBpidfd_send_signal\fP(2) 5.1
-\fBpidfd_open\fP(2) 5.3
-\fBpipe\fP(2) 1.0
-\fBpipe2\fP(2) 2.6.27
-\fBpivot_root\fP(2) 2.4
-\fBpkey_alloc\fP(2) 4.8
-\fBpkey_free\fP(2) 4.8
-\fBpkey_mprotect\fP(2) 4.8
-\fBpoll\fP(2) 2.0.36; 2.2
-\fBppoll\fP(2) 2.6.16
-\fBprctl\fP(2) 2.2
-\fBpread64\fP(2) T{
-Added as "pread" in 2.2;
-renamed "pread64" in 2.6
-T}
-\fBpreadv\fP(2) 2.6.30
-\fBpreadv2\fP(2) 4.6
-\fBprlimit64\fP(2) 2.6.36
-\fBprocess_madvise\fP(2) 5.10
-\fBprocess_vm_readv\fP(2) 3.2
-\fBprocess_vm_writev\fP(2) 3.2
-\fBpselect6\fP(2) 2.6.16
-.\" Implements \fBpselect\fP(2)
-\fBptrace\fP(2) 1.0
-\fBpwrite64\fP(2) T{
-Added as "pwrite" in 2.2;
-renamed "pwrite64" in 2.6
-T}
-\fBpwritev\fP(2) 2.6.30
-\fBpwritev2\fP(2) 4.6
-\fBquery_module\fP(2) 2.2 Removed in 2.6
-\fBquotactl\fP(2) 1.0
-\fBquotactl_fd\fP(2) 5.14
-\fBread\fP(2) 1.0
-\fBreadahead\fP(2) 2.4.13
-\fBreaddir\fP(2) 1.0
-.\" Supersedes \fBgetdents\fP(2)
-\fBreadlink\fP(2) 1.0
-\fBreadlinkat\fP(2) 2.6.16
-\fBreadv\fP(2) 2.0
-\fBreboot\fP(2) 1.0
-\fBrecv\fP(2) 2.0 T{
-See notes on \fBsocketcall\fP(2)
-T}
-\fBrecvfrom\fP(2) 2.0 T{
-See notes on \fBsocketcall\fP(2)
-T}
-\fBrecvmsg\fP(2) 2.0 T{
-See notes on \fBsocketcall\fP(2)
-T}
-\fBrecvmmsg\fP(2) 2.6.33
-\fBremap_file_pages\fP(2) 2.6 T{
-Deprecated since 3.16
-T}
-\fBremovexattr\fP(2) 2.6; 2.4.18
-\fBrename\fP(2) 1.0
-\fBrenameat\fP(2) 2.6.16
-\fBrenameat2\fP(2) 3.15
-\fBrequest_key\fP(2) 2.6.10
-\fBrestart_syscall\fP(2) 2.6
-.\" 921ebd8f2c081b3cf6c3b29ef4103eef3ff26054
-\fBriscv_flush_icache\fP(2) 4.15 RISC-V only
-\fBrmdir\fP(2) 1.0
-\fBrseq\fP(2) 4.18
-\fBrt_sigaction\fP(2) 2.2
-\fBrt_sigpending\fP(2) 2.2
-\fBrt_sigprocmask\fP(2) 2.2
-\fBrt_sigqueueinfo\fP(2) 2.2
-\fBrt_sigreturn\fP(2) 2.2
-\fBrt_sigsuspend\fP(2) 2.2
-\fBrt_sigtimedwait\fP(2) 2.2
-\fBrt_tgsigqueueinfo\fP(2) 2.6.31
-\fBrtas\fP(2) 2.6.2 T{
-PowerPC/PowerPC64 only
-T}
-\fBs390_runtime_instr\fP(2) 3.7 s390 only
-\fBs390_pci_mmio_read\fP(2) 3.19 s390 only
-\fBs390_pci_mmio_write\fP(2) 3.19 s390 only
-\fBs390_sthyi\fP(2) 4.15 s390 only
-\fBs390_guarded_storage\fP(2) 4.12 s390 only
-\fBsched_get_affinity\fP(2) 2.6 T{
-Name of
-.BR \%sched_getaffinity (2)
-on SPARC and SPARC64
-T}
-\fBsched_get_priority_max\fP(2) 2.0
-\fBsched_get_priority_min\fP(2) 2.0
-\fBsched_getaffinity\fP(2) 2.6
-\fBsched_getattr\fP(2) 3.14
-\fBsched_getparam\fP(2) 2.0
-\fBsched_getscheduler\fP(2) 2.0
-\fBsched_rr_get_interval\fP(2) 2.0
-\fBsched_set_affinity\fP(2) 2.6 T{
-Name of
-.BR \%sched_setaffinity (2)
-on SPARC and SPARC64
-T}
-\fBsched_setaffinity\fP(2) 2.6
-\fBsched_setattr\fP(2) 3.14
-\fBsched_setparam\fP(2) 2.0
-\fBsched_setscheduler\fP(2) 2.0
-\fBsched_yield\fP(2) 2.0
-\fBseccomp\fP(2) 3.17
-\fBselect\fP(2) 1.0
-\fBsemctl\fP(2) 2.0 T{
-See notes on \fBipc\fP(2)
-T}
-\fBsemget\fP(2) 2.0 T{
-See notes on \fBipc\fP(2)
-T}
-\fBsemop\fP(2) 2.0 T{
-See notes on \fBipc\fP(2)
-T}
-\fBsemtimedop\fP(2) 2.6; 2.4.22
-\fBsend\fP(2) 2.0 T{
-See notes on \fBsocketcall\fP(2)
-T}
-\fBsendfile\fP(2) 2.2
-\fBsendfile64\fP(2) 2.6; 2.4.19
-\fBsendmmsg\fP(2) 3.0
-\fBsendmsg\fP(2) 2.0 T{
-See notes on \fBsocketcall\fP(2)
-T}
-\fBsendto\fP(2) 2.0 T{
-See notes on \fBsocketcall\fP(2)
-T}
-\fBset_mempolicy\fP(2) 2.6.6
-\fBset_robust_list\fP(2) 2.6.17
-\fBset_thread_area\fP(2) 2.6
-\fBset_tid_address\fP(2) 2.6
-\fBset_tls\fP(2) 2.6.11 T{
-ARM OABI/EABI only (constant
-has \fB__ARM_NR\fP prefix)
-T}
-.\" \fBsetaltroot\fP(2) 2.6.10 T{
-.\" Removed in 2.6.11, exposed one
-.\" of implementation details of
-.\" \fBpersonality\fP(2) (creating an
-.\" alternative root, precursor of
-.\" mount namespaces) to user space.
-.\" T}
-.\" See http://lkml.org/lkml/2005/8/1/83
-.\" "[PATCH] remove sys_set_zone_reclaim()"
-\fBsetdomainname\fP(2) 1.0
-\fBsetfsgid\fP(2) 1.2
-\fBsetfsgid32\fP(2) 2.4
-\fBsetfsuid\fP(2) 1.2
-\fBsetfsuid32\fP(2) 2.4
-\fBsetgid\fP(2) 1.0
-\fBsetgid32\fP(2) 2.4
-\fBsetgroups\fP(2) 1.0
-\fBsetgroups32\fP(2) 2.4
-.\" arch/alpha/include/asm/core_lca.h
-\fBsethae\fP(2) 2.0 T{
-Alpha only; see NOTES
-T}
-\fBsethostname\fP(2) 1.0
-\fBsetitimer\fP(2) 1.0
-\fBsetns\fP(2) 3.0
-\fBsetpgid\fP(2) 1.0
-\fBsetpgrp\fP(2) 2.0 T{
-Alternative name for \fBsetpgid\fP(2) on Alpha
-T}
-\fBsetpriority\fP(2) 1.0
-\fBsetregid\fP(2) 1.0
-\fBsetregid32\fP(2) 2.4
-\fBsetresgid\fP(2) 2.2
-\fBsetresgid32\fP(2) 2.4
-\fBsetresuid\fP(2) 2.2
-\fBsetresuid32\fP(2) 2.4
-\fBsetreuid\fP(2) 1.0
-\fBsetreuid32\fP(2) 2.4
-\fBsetrlimit\fP(2) 1.0
-\fBsetsid\fP(2) 1.0
-\fBsetsockopt\fP(2) 2.0 T{
-See notes on \fBsocketcall\fP(2)
-T}
-\fBsettimeofday\fP(2) 1.0
-\fBsetuid\fP(2) 1.0
-\fBsetuid32\fP(2) 2.4
-\fBsetup\fP(2) 1.0 Removed in 2.2
-\fBsetxattr\fP(2) 2.6; 2.4.18
-\fBsgetmask\fP(2) 1.0
-\fBshmat\fP(2) 2.0 T{
-See notes on \fBipc\fP(2)
-T}
-\fBshmctl\fP(2) 2.0 T{
-See notes on \fBipc\fP(2)
-T}
-\fBshmdt\fP(2) 2.0 T{
-See notes on \fBipc\fP(2)
-T}
-\fBshmget\fP(2) 2.0 T{
-See notes on \fBipc\fP(2)
-T}
-\fBshutdown\fP(2) 2.0 T{
-See notes on \fBsocketcall\fP(2)
-T}
-\fBsigaction\fP(2) 1.0
-\fBsigaltstack\fP(2) 2.2
-\fBsignal\fP(2) 1.0
-\fBsignalfd\fP(2) 2.6.22
-\fBsignalfd4\fP(2) 2.6.27
-\fBsigpending\fP(2) 1.0
-\fBsigprocmask\fP(2) 1.0
-\fBsigreturn\fP(2) 1.0
-\fBsigsuspend\fP(2) 1.0
-\fBsocket\fP(2) 2.0 T{
-See notes on \fBsocketcall\fP(2)
-T}
-\fBsocketcall\fP(2) 1.0
-.\" Implements BSD socket calls
-\fBsocketpair\fP(2) 2.0 T{
-See notes on \fBsocketcall\fP(2)
-T}
-.\" 5a0015d62668e64c8b6e02e360fbbea121bfd5e6
-\fBspill\fP(2) 2.6.13 Xtensa only
-\fBsplice\fP(2) 2.6.17
-\fBspu_create\fP(2) 2.6.16 T{
-PowerPC/PowerPC64 only
-T}
-\fBspu_run\fP(2) 2.6.16 T{
-PowerPC/PowerPC64 only
-T}
-\fBssetmask\fP(2) 1.0
-\fBstat\fP(2) 1.0
-\fBstat64\fP(2) 2.4
-\fBstatfs\fP(2) 1.0
-\fBstatfs64\fP(2) 2.6
-\fBstatx\fP(2) 4.11
-\fBstime\fP(2) 1.0
-\fBsubpage_prot\fP(2) 2.6.25 T{
-PowerPC/PowerPC64 only
-T}
-\fBswapcontext\fP(2) 2.6.3 T{
-PowerPC/PowerPC64 only
-T}
-.\" 529d235a0e190ded1d21ccc80a73e625ebcad09b
-\fBswitch_endian\fP(2) 4.1 PowerPC64 only
-\fBswapoff\fP(2) 1.0
-\fBswapon\fP(2) 1.0
-\fBsymlink\fP(2) 1.0
-\fBsymlinkat\fP(2) 2.6.16
-\fBsync\fP(2) 1.0
-\fBsync_file_range\fP(2) 2.6.17
-\fBsync_file_range2\fP(2) 2.6.22
-.\" PowerPC, ARM, tile
-.\" First appeared on ARM, as arm_sync_file_range(), but later renamed
-.\" \fBsys_debug_setcontext\fP(2) ??? PowerPC if CONFIG_PPC32
-\fBsyncfs\fP(2) 2.6.39
-\fBsys_debug_setcontext\fP(2) 2.6.11 PowerPC only
-\fBsyscall\fP(2) 1.0 T{
-Still available on ARM OABI
-and MIPS O32 ABI
-T}
-\fBsysfs\fP(2) 1.2
-\fBsysinfo\fP(2) 1.0
-\fBsyslog\fP(2) 1.0
-.\" glibc interface is \fBklogctl\fP(3)
-\fBsysmips\fP(2) 2.6.0 MIPS only
-\fBtee\fP(2) 2.6.17
-\fBtgkill\fP(2) 2.6
-\fBtime\fP(2) 1.0
-\fBtimer_create\fP(2) 2.6
-\fBtimer_delete\fP(2) 2.6
-\fBtimer_getoverrun\fP(2) 2.6
-\fBtimer_gettime\fP(2) 2.6
-\fBtimer_settime\fP(2) 2.6
-.\" .\" b215e283992899650c4271e7385c79e26fb9a88e
-.\" .\" 4d672e7ac79b5ec5cdc90e450823441e20464691
-.\" \fBtimerfd\fP(2) 2.6.22 T{
-.\" Old timerfd interface,
-.\" removed in 2.6.25
-.\" T}
-\fBtimerfd_create\fP(2) 2.6.25
-\fBtimerfd_gettime\fP(2) 2.6.25
-\fBtimerfd_settime\fP(2) 2.6.25
-\fBtimes\fP(2) 1.0
-\fBtkill\fP(2) 2.6; 2.4.22
-\fBtruncate\fP(2) 1.0
-\fBtruncate64\fP(2) 2.4
-\fBugetrlimit\fP(2) 2.4
-\fBumask\fP(2) 1.0
-\fBumount\fP(2) 1.0
-.\" sys_oldumount() -- __NR_umount
-\fBumount2\fP(2) 2.2
-.\" sys_umount() -- __NR_umount2
-\fBuname\fP(2) 1.0
-\fBunlink\fP(2) 1.0
-\fBunlinkat\fP(2) 2.6.16
-\fBunshare\fP(2) 2.6.16
-\fBuselib\fP(2) 1.0
-\fBustat\fP(2) 1.0
-\fBuserfaultfd\fP(2) 4.3
-\fBusr26\fP(2) 2.4.8.1 ARM OABI only
-\fBusr32\fP(2) 2.4.8.1 ARM OABI only
-\fButime\fP(2) 1.0
-\fButimensat\fP(2) 2.6.22
-\fButimes\fP(2) 2.2
-\fButrap_install\fP(2) 2.2 SPARC64 only
-.\" FIXME . document utrap_install()
-.\" There's a man page for Solaris 5.11
-\fBvfork\fP(2) 2.2
-\fBvhangup\fP(2) 1.0
-\fBvm86old\fP(2) 1.0 T{
-Was "vm86"; renamed in
-2.0.28/2.2
-T}
-\fBvm86\fP(2) 2.0.28; 2.2
-\fBvmsplice\fP(2) 2.6.17
-\fBwait4\fP(2) 1.0
-\fBwaitid\fP(2) 2.6.10
-\fBwaitpid\fP(2) 1.0
-\fBwrite\fP(2) 1.0
-\fBwritev\fP(2) 2.0
-.\" 5a0015d62668e64c8b6e02e360fbbea121bfd5e6
-\fBxtensa\fP(2) 2.6.13 Xtensa only
-.TE
-.P
-On many platforms, including x86-32, socket calls are all multiplexed
-(via glibc wrapper functions) through
-.BR socketcall (2)
-and similarly System\ V IPC calls are multiplexed through
-.BR ipc (2).
-.P
-Although slots are reserved for them in the system call table,
-the following system calls are not implemented in the standard kernel:
-.BR afs_syscall (2), \" __NR_afs_syscall is 53 on Linux 2.6.22/i386
-.BR break (2), \" __NR_break is 17 on Linux 2.6.22/i386
-.BR ftime (2), \" __NR_ftime is 35 on Linux 2.6.22/i386
-.BR getpmsg (2), \" __NR_getpmsg is 188 on Linux 2.6.22/i386
-.BR gtty (2), \" __NR_gtty is 32 on Linux 2.6.22/i386
-.BR idle (2), \" __NR_idle is 112 on Linux 2.6.22/i386
-.BR lock (2), \" __NR_lock is 53 on Linux 2.6.22/i386
-.BR madvise1 (2), \" __NR_madvise1 is 219 on Linux 2.6.22/i386
-.BR mpx (2), \" __NR_mpx is 66 on Linux 2.6.22/i386
-.BR phys (2), \" Slot has been reused
-.BR prof (2), \" __NR_prof is 44 on Linux 2.6.22/i386
-.BR profil (2), \" __NR_profil is 98 on Linux 2.6.22/i386
-.BR putpmsg (2), \" __NR_putpmsg is 189 on Linux 2.6.22/i386
-.\" __NR_security is 223 on Linux 2.4/i386; absent on 2.6/i386, present
-.\" on a couple of 2.6 architectures
-.BR security (2), \" __NR_security is 223 on Linux 2.4/i386
-.\" The security call is for future use.
-.BR stty (2), \" __NR_stty is 31 on Linux 2.6.22/i386
-.BR tuxcall (2), \" __NR_tuxcall is 184 on x86_64, also on PPC and alpha
-.BR ulimit (2), \" __NR_ulimit is 58 on Linux 2.6.22/i386
-and
-.BR vserver (2) \" __NR_vserver is 273 on Linux 2.6.22/i386
-(see also
-.BR unimplemented (2)).
-However,
-.BR ftime (3),
-.BR profil (3),
-and
-.BR ulimit (3)
-exist as library routines.
-The slot for
-.BR phys (2)
-is in use since Linux 2.1.116 for
-.BR umount (2);
-.BR phys (2)
-will never be implemented.
-The
-.BR getpmsg (2)
-and
-.BR putpmsg (2)
-calls are for kernels patched to support STREAMS,
-and may never be in the standard kernel.
-.P
-There was briefly
-.BR set_zone_reclaim (2),
-added in Linux 2.6.13, and removed in Linux 2.6.16;
-this system call was never available to user space.
-.\"
-.SS System calls on removed ports
-Some system calls only ever existed on Linux architectures that have
-since been removed from the kernel:
-.TP
-AVR32 (port removed in Linux 4.12)
-.RS
-.PD 0
-.IP \[bu] 3
-.BR pread (2)
-.IP \[bu]
-.BR pwrite (2)
-.PD
-.RE
-.TP
-Blackfin (port removed in Linux 4.17)
-.RS
-.PD 0
-.IP \[bu] 3
-.BR bfin_spinlock (2)
-(added in Linux 2.6.22)
-.IP \[bu]
-.BR dma_memcpy (2)
-(added in Linux 2.6.22)
-.IP \[bu]
-.BR pread (2)
-(added in Linux 2.6.22)
-.IP \[bu]
-.BR pwrite (2)
-(added in Linux 2.6.22)
-.IP \[bu]
-.BR sram_alloc (2)
-(added in Linux 2.6.22)
-.IP \[bu]
-.BR sram_free (2)
-(added in Linux 2.6.22)
-.PD
-.RE
-.TP
-Metag (port removed in Linux 4.17)
-.RS
-.PD 0
-.IP \[bu] 3
-.BR metag_get_tls (2)
-(add in Linux 3.9)
-.IP \[bu]
-.BR metag_set_fpu_flags (2)
-(add in Linux 3.9)
-.IP \[bu]
-.BR metag_set_tls (2)
-(add in Linux 3.9)
-.IP \[bu]
-.BR metag_setglobalbit (2)
-(add in Linux 3.9)
-.PD
-.RE
-.TP
-Tile (port removed in Linux 4.17)
-.RS
-.PD 0
-.IP \[bu] 3
-.BR cmpxchg_badaddr (2)
-(added in Linux 2.6.36)
-.PD
-.RE
-.SH NOTES
-Roughly speaking, the code belonging to the system call
-with number __NR_xxx defined in
-.I /usr/include/asm/unistd.h
-can be found in the Linux kernel source in the routine
-.IR sys_xxx ().
-There are many exceptions, however, mostly because
-older system calls were superseded by newer ones,
-and this has been treated somewhat unsystematically.
-On platforms with
-proprietary operating-system emulation,
-such as sparc, sparc64, and alpha,
-there are many additional system calls; mips64 also contains a full
-set of 32-bit system calls.
-.P
-Over time, changes to the interfaces of some system calls have been
-necessary.
-One reason for such changes was the need to increase the size of
-structures or scalar values passed to the system call.
-Because of these changes, certain architectures
-(notably, longstanding 32-bit architectures such as i386)
-now have various groups of related system calls (e.g.,
-.BR truncate (2)
-and
-.BR truncate64 (2))
-which perform similar tasks, but which vary in
-details such as the size of their arguments.
-(As noted earlier, applications are generally unaware of this:
-the glibc wrapper functions do some work to ensure that the right
-system call is invoked, and that ABI compatibility is
-preserved for old binaries.)
-Examples of system calls that exist in multiple versions are
-the following:
-.IP \[bu] 3
-By now there are three different versions of
-.BR stat (2):
-.IR sys_stat ()
-(slot
-.IR __NR_oldstat ),
-.IR sys_newstat ()
-(slot
-.IR __NR_stat ),
-and
-.IR sys_stat64 ()
-(slot
-.IR __NR_stat64 ),
-with the last being the most current.
-.\" e.g., on 2.6.22/i386: __NR_oldstat 18, __NR_stat 106, __NR_stat64 195
-.\" The stat system calls deal with three different data structures,
-.\" defined in include/asm-i386/stat.h: __old_kernel_stat, stat, stat64
-A similar story applies for
-.BR lstat (2)
-and
-.BR fstat (2).
-.IP \[bu]
-Similarly, the defines
-.IR __NR_oldolduname ,
-.IR __NR_olduname ,
-and
-.I __NR_uname
-refer to the routines
-.IR sys_olduname (),
-.IR sys_uname (),
-and
-.IR sys_newuname ().
-.IP \[bu]
-In Linux 2.0, a new version of
-.BR vm86 (2)
-appeared, with the old and the new kernel routines being named
-.IR sys_vm86old ()
-and
-.IR sys_vm86 ().
-.IP \[bu]
-In Linux 2.4, a new version of
-.BR getrlimit (2)
-appeared, with the old and the new kernel routines being named
-.IR sys_old_getrlimit ()
-(slot
-.IR __NR_getrlimit )
-and
-.IR sys_getrlimit ()
-(slot
-.IR __NR_ugetrlimit ).
-.IP \[bu]
-Linux 2.4 increased the size of user and group IDs from 16 to 32 bits.
-.\" 64-bit off_t changes: ftruncate64, *stat64,
-.\" fcntl64 (because of the flock structure), getdents64, *statfs64
-To support this change, a range of system calls were added
-(e.g.,
-.BR chown32 (2),
-.BR getuid32 (2),
-.BR getgroups32 (2),
-.BR setresuid32 (2)),
-superseding earlier calls of the same name without the
-"32" suffix.
-.IP \[bu]
-Linux 2.4 added support for applications on 32-bit architectures
-to access large files (i.e., files for which the sizes and
-file offsets can't be represented in 32 bits.)
-To support this change, replacements were required for system calls
-that deal with file offsets and sizes.
-Thus the following system calls were added:
-.BR fcntl64 (2),
-.BR getdents64 (2),
-.BR stat64 (2),
-.BR statfs64 (2),
-.BR truncate64 (2),
-and their analogs that work with file descriptors or
-symbolic links.
-These system calls supersede the older system calls
-which, except in the case of the "stat" calls,
-have the same name without the "64" suffix.
-.IP
-On newer platforms that only have 64-bit file access and 32-bit UIDs/GIDs
-(e.g., alpha, ia64, s390x, x86-64), there is just a single version of
-the UID/GID and file access system calls.
-On platforms (typically, 32-bit platforms) where the *64 and *32 calls exist,
-the other versions are obsolete.
-.IP \[bu]
-The
-.I rt_sig*
-calls were added in Linux 2.2 to support the addition
-of real-time signals (see
-.BR signal (7)).
-These system calls supersede the older system calls of the same
-name without the "rt_" prefix.
-.IP \[bu]
-The
-.BR select (2)
-and
-.BR mmap (2)
-system calls use five or more arguments,
-which caused problems in the way
-argument passing on the i386 used to be set up.
-Thus, while other architectures have
-.IR sys_select ()
-and
-.IR sys_mmap ()
-corresponding to
-.I __NR_select
-and
-.IR __NR_mmap ,
-on i386 one finds
-.IR old_select ()
-and
-.IR old_mmap ()
-(routines that use a pointer to an
-argument block) instead.
-These days passing five arguments
-is not a problem any more, and there is a
-.I __NR__newselect
-.\" (used by libc 6)
-that corresponds directly to
-.IR sys_select ()
-and similarly
-.IR __NR_mmap2 .
-s390x is the only 64-bit architecture that has
-.IR old_mmap ().
-.\" .P
-.\" Two system call numbers,
-.\" .IR __NR__llseek
-.\" and
-.\" .IR __NR__sysctl
-.\" have an additional underscore absent in
-.\" .IR sys_llseek ()
-.\" and
-.\" .IR sys_sysctl ().
-.\"
-.\" In Linux 2.1.81,
-.\" .BR lchown (2)
-.\" and
-.\" .BR chown (2)
-.\" were swapped; that is,
-.\" .BR lchown (2)
-.\" was added with the semantics that were then current for
-.\" .BR chown (2),
-.\" and the semantics of the latter call were changed to what
-.\" they are today.
-.\"
-.\"
-.SS "Architecture-specific details: Alpha"
-.TP
-.BR getxgid (2)
-returns a pair of GID and effective GID via registers
-\fBr0\fP and \fBr20\fP; it is provided
-instead of
-\fBgetgid\fP(2) and \fBgetegid\fP(2).
-.TP
-.BR getxpid (2)
-returns a pair of PID and parent PID via registers
-\fBr0\fP and \fBr20\fP; it is provided instead of
-\fBgetpid\fP(2) and \fBgetppid\fP(2).
-.TP
-.BR old_adjtimex (2)
-is a variant of \fBadjtimex\fP(2) that uses \fIstruct timeval32\fP,
-for compatibility with OSF/1.
-.TP
-.BR getxuid (2)
-returns a pair of GID and effective GID via registers
-\fBr0\fP and \fBr20\fP; it is provided instead of
-\fBgetuid\fP(2) and \fBgeteuid\fP(2).
-.TP
-.BR sethae (2)
-is used for configuring the Host Address Extension register on
-low-cost Alphas in order to access address space beyond first 27 bits.
-.SH SEE ALSO
-.BR ausyscall (1),
-.BR intro (2),
-.BR syscall (2),
-.BR unimplemented (2),
-.BR errno (3),
-.BR libc (7),
-.BR vdso (7)
diff --git a/man2/sysctl.2 b/man2/sysctl.2
deleted file mode 100644
index 3d91dc632..000000000
--- a/man2/sysctl.2
+++ /dev/null
@@ -1,160 +0,0 @@
-.\" Copyright (C) 1996 Andries Brouwer (aeb@cwi.nl)
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Written 11 April 1996 by Andries Brouwer <aeb@cwi.nl>
-.\" 960412: Added comments from Stephen Tweedie
-.\" Modified Tue Oct 22 22:28:41 1996 by Eric S. Raymond <esr@thyrsus.com>
-.\" Modified Mon Jan 5 20:31:04 1998 by aeb.
-.\"
-.TH sysctl 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-sysctl \- read/write system parameters
-.SH SYNOPSIS
-.nf
-.B #include <unistd.h>
-.B #include <linux/sysctl.h>
-.P
-.BI "[[deprecated]] int _sysctl(struct __sysctl_args *" args );
-.fi
-.SH DESCRIPTION
-.B This system call no longer exists on current kernels!
-See NOTES.
-.P
-The
-.BR _sysctl ()
-call reads and/or writes kernel parameters.
-For example, the hostname,
-or the maximum number of open files.
-The argument has the form
-.P
-.in +4n
-.EX
-struct __sysctl_args {
- int *name; /* integer vector describing variable */
- int nlen; /* length of this vector */
- void *oldval; /* 0 or address where to store old value */
- size_t *oldlenp; /* available room for old value,
- overwritten by actual size of old value */
- void *newval; /* 0 or address of new value */
- size_t newlen; /* size of new value */
-};
-.EE
-.in
-.P
-This call does a search in a tree structure, possibly resembling
-a directory tree under
-.IR /proc/sys ,
-and if the requested item is found calls some appropriate routine
-to read or modify the value.
-.SH RETURN VALUE
-Upon successful completion,
-.BR _sysctl ()
-returns 0.
-Otherwise, a value of \-1 is returned and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EACCES
-.TQ
-.B EPERM
-No search permission for one of the encountered "directories",
-or no read permission where
-.I oldval
-was nonzero, or no write permission where
-.I newval
-was nonzero.
-.TP
-.B EFAULT
-The invocation asked for the previous value by setting
-.I oldval
-non-NULL, but allowed zero room in
-.IR oldlenp .
-.TP
-.B ENOTDIR
-.I name
-was not found.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 1.3.57.
-Removed in Linux 5.5, glibc 2.32.
-.P
-It originated in
-4.4BSD.
-Only Linux has the
-.I /proc/sys
-mirror, and the object naming schemes differ between Linux and 4.4BSD,
-but the declaration of the
-.BR sysctl ()
-function is the same in both.
-.SH NOTES
-Use of this system call was long discouraged:
-since Linux 2.6.24,
-uses of this system call result in warnings in the kernel log,
-and in Linux 5.5, the system call was finally removed.
-Use the
-.I /proc/sys
-interface instead.
-.P
-Note that on older kernels where this system call still exists,
-it is available only if the kernel was configured with the
-.B CONFIG_SYSCTL_SYSCALL
-option.
-Furthermore, glibc does not provide a wrapper for this system call,
-necessitating the use of
-.BR syscall (2).
-.SH BUGS
-The object names vary between kernel versions,
-making this system call worthless for applications.
-.P
-Not all available objects are properly documented.
-.P
-It is not yet possible to change operating system by writing to
-.IR /proc/sys/kernel/ostype .
-.SH EXAMPLES
-.\" SRC BEGIN (sysctl.c)
-.EX
-#define _GNU_SOURCE
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/syscall.h>
-#include <unistd.h>
-\&
-#include <linux/sysctl.h>
-\&
-#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
-\&
-int _sysctl(struct __sysctl_args *args);
-\&
-#define OSNAMESZ 100
-\&
-int
-main(void)
-{
- int name[] = { CTL_KERN, KERN_OSTYPE };
- char osname[OSNAMESZ];
- size_t osnamelth;
- struct __sysctl_args args;
-\&
- memset(&args, 0, sizeof(args));
- args.name = name;
- args.nlen = ARRAY_SIZE(name);
- args.oldval = osname;
- args.oldlenp = &osnamelth;
-\&
- osnamelth = sizeof(osname);
-\&
- if (syscall(SYS__sysctl, &args) == \-1) {
- perror("_sysctl");
- exit(EXIT_FAILURE);
- }
- printf("This machine is running %*s\en", (int) osnamelth, osname);
- exit(EXIT_SUCCESS);
-}
-.EE
-.\" SRC END
-.SH SEE ALSO
-.BR proc (5)
diff --git a/man2/sysfs.2 b/man2/sysfs.2
deleted file mode 100644
index 6e75cf3ae..000000000
--- a/man2/sysfs.2
+++ /dev/null
@@ -1,97 +0,0 @@
-.\" Copyright (C) 1995, Thomas K. Dyas <tdyas@eden.rutgers.edu>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Created Wed Aug 9 1995 Thomas K. Dyas <tdyas@eden.rutgers.edu>
-.\"
-.TH sysfs 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-sysfs \- get filesystem type information
-.SH SYNOPSIS
-.nf
-.BI "[[deprecated]] int sysfs(int " option ", const char *" fsname );
-.BI "[[deprecated]] int sysfs(int " option ", unsigned int " fs_index ", char *" buf );
-.BI "[[deprecated]] int sysfs(int " option );
-.fi
-.SH DESCRIPTION
-.BR "Note" :
-if you are looking for information about the
-.B sysfs
-filesystem that is normally mounted at
-.IR /sys ,
-see
-.BR sysfs (5).
-.P
-The (obsolete)
-.BR sysfs ()
-system call returns information about the filesystem types
-currently present in the kernel.
-The specific form of the
-.BR sysfs ()
-call and the information returned depends on the
-.I option
-in effect:
-.TP 3
-.B 1
-Translate the filesystem identifier string
-.I fsname
-into a filesystem type index.
-.TP
-.B 2
-Translate the filesystem type index
-.I fs_index
-into a null-terminated filesystem identifier string.
-This string will
-be written to the buffer pointed to by
-.IR buf .
-Make sure that
-.I buf
-has enough space to accept the string.
-.TP
-.B 3
-Return the total number of filesystem types currently present in the
-kernel.
-.P
-The numbering of the filesystem type indexes begins with zero.
-.SH RETURN VALUE
-On success,
-.BR sysfs ()
-returns the filesystem index for option
-.BR 1 ,
-zero for option
-.BR 2 ,
-and the number of currently configured filesystems for option
-.BR 3 .
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EFAULT
-.RI "Either " fsname " or " buf
-is outside your accessible address space.
-.TP
-.B EINVAL
-.I fsname
-is not a valid filesystem type identifier;
-.I fs_index
-is out-of-bounds;
-.I option
-is invalid.
-.SH STANDARDS
-None.
-.SH HISTORY
-SVr4.
-.P
-This System-V derived system call is obsolete; don't use it.
-On systems with
-.IR /proc ,
-the same information can be obtained via
-.IR /proc ;
-use that interface instead.
-.SH BUGS
-There is no libc or glibc support.
-There is no way to guess how large \fIbuf\fP should be.
-.SH SEE ALSO
-.BR proc (5),
-.BR sysfs (5)
diff --git a/man2/sysinfo.2 b/man2/sysinfo.2
deleted file mode 100644
index 75c578404..000000000
--- a/man2/sysinfo.2
+++ /dev/null
@@ -1,106 +0,0 @@
-.\" Copyright (C) 2016, Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Based on an earlier version of the page where a few pieces were
-.\" copyright (C) 1993 by Dan Miner (dminer@nyx.cs.du.edu) and subsequently
-.\" others (see old changelog below).
-.\" The structure definitions are taken more or less straight from the kernel
-.\" source files.
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\"
-.\" Modified Sat Jul 24 12:35:12 1993 by Rik Faith <faith@cs.unc.edu>
-.\" Modified Tue Oct 22 22:29:51 1996 by Eric S. Raymond <esr@thyrsus.com>
-.\" Modified Mon Aug 25 16:06:11 1997 by Nicolás Lichtmaier <nick@debian.org>
-.\"
-.TH sysinfo 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-sysinfo \- return system information
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/sysinfo.h>
-.P
-.BI "int sysinfo(struct sysinfo *" info );
-.fi
-.SH DESCRIPTION
-.BR sysinfo ()
-returns certain statistics on memory and swap usage,
-as well as the load average.
-.P
-Until Linux 2.3.16,
-.BR sysinfo ()
-returned information in the following structure:
-.P
-.in +4n
-.EX
-struct sysinfo {
- long uptime; /* Seconds since boot */
- unsigned long loads[3]; /* 1, 5, and 15 minute load averages */
- unsigned long totalram; /* Total usable main memory size */
- unsigned long freeram; /* Available memory size */
- unsigned long sharedram; /* Amount of shared memory */
- unsigned long bufferram; /* Memory used by buffers */
- unsigned long totalswap; /* Total swap space size */
- unsigned long freeswap; /* Swap space still available */
- unsigned short procs; /* Number of current processes */
- char _f[22]; /* Pads structure to 64 bytes */
-};
-.EE
-.in
-.P
-In the above structure, the sizes of the memory and swap fields
-are given in bytes.
-.P
-Since Linux 2.3.23 (i386) and Linux 2.3.48
-(all architectures) the structure is:
-.P
-.in +4n
-.EX
-struct sysinfo {
- long uptime; /* Seconds since boot */
- unsigned long loads[3]; /* 1, 5, and 15 minute load averages */
- unsigned long totalram; /* Total usable main memory size */
- unsigned long freeram; /* Available memory size */
- unsigned long sharedram; /* Amount of shared memory */
- unsigned long bufferram; /* Memory used by buffers */
- unsigned long totalswap; /* Total swap space size */
- unsigned long freeswap; /* Swap space still available */
- unsigned short procs; /* Number of current processes */
- unsigned long totalhigh; /* Total high memory size */
- unsigned long freehigh; /* Available high memory size */
- unsigned int mem_unit; /* Memory unit size in bytes */
- char _f[20\-2*sizeof(long)\-sizeof(int)];
- /* Padding to 64 bytes */
-};
-.EE
-.in
-.P
-In the above structure,
-sizes of the memory and swap fields are given as multiples of
-.I mem_unit
-bytes.
-.SH RETURN VALUE
-On success,
-.BR sysinfo ()
-returns zero.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EFAULT
-.I info
-is not a valid address.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 0.98.pl6.
-.SH NOTES
-All of the information provided by this system call is also available via
-.I /proc/meminfo
-and
-.IR /proc/loadavg .
-.SH SEE ALSO
-.BR proc (5)
diff --git a/man2/syslog.2 b/man2/syslog.2
deleted file mode 100644
index 15426badf..000000000
--- a/man2/syslog.2
+++ /dev/null
@@ -1,378 +0,0 @@
-'\" t
-.\" Copyright (C) 1995 Andries Brouwer (aeb@cwi.nl)
-.\" and Copyright (C) 2012, 2014 Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Written 11 June 1995 by Andries Brouwer <aeb@cwi.nl>
-.\" 2008-02-15, Jeremy Kerr <jk@ozlabs.org>
-.\" Add info on command type 10; add details on types 6, 7, 8, & 9.
-.\" 2008-02-15, Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Update LOG_BUF_LEN details; update RETURN VALUE section.
-.\"
-.TH syslog 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-syslog, klogctl \- read and/or clear kernel message ring buffer;
-set console_loglevel
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#include <sys/klog.h>" " /* Definition of " SYSLOG_* " constants */"
-.BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */"
-.B #include <unistd.h>
-.P
-.BI "int syscall(SYS_syslog, int " type ", char *" bufp ", int " len );
-.P
-/* The glibc interface */
-.B #include <sys/klog.h>
-.P
-.BI "int klogctl(int " type ", char *" bufp ", int " len );
-.fi
-.SH DESCRIPTION
-.IR Note :
-Probably, you are looking for the C library function
-.BR syslog (),
-which talks to
-.BR syslogd (8);
-see
-.BR syslog (3)
-for details.
-.P
-This page describes the kernel
-.BR syslog ()
-system call, which is used to control the kernel
-.IR printk ()
-buffer; the glibc wrapper function for the system call is called
-.BR klogctl ().
-.SS The kernel log buffer
-The kernel has a cyclic buffer of length
-.B LOG_BUF_LEN
-in which messages given as arguments to the kernel function
-.BR printk ()
-are stored (regardless of their log level).
-In early kernels,
-.B LOG_BUF_LEN
-had the value 4096;
-from Linux 1.3.54, it was 8192;
-from Linux 2.1.113, it was 16384;
-since Linux 2.4.23/2.6, the value is a kernel configuration option
-.RB ( CONFIG_LOG_BUF_SHIFT ,
-default value dependent on the architecture).
-.\" Under "General setup" ==> "Kernel log buffer size"
-.\" For Linux 2.6, precisely the option seems to have appeared in Linux 2.5.55.
-Since Linux 2.6.6, the size can be queried with command type 10 (see below).
-.SS Commands
-The \fItype\fP argument determines the action taken by this function.
-The list below specifies the values for
-.IR type .
-The symbolic names are defined in the kernel source,
-but are not exported to user space;
-you will either need to use the numbers, or define the names yourself.
-.TP
-.BR SYSLOG_ACTION_CLOSE " (0)"
-Close the log.
-Currently a NOP.
-.TP
-.BR SYSLOG_ACTION_OPEN " (1)"
-Open the log.
-Currently a NOP.
-.TP
-.BR SYSLOG_ACTION_READ " (2)"
-Read from the log.
-The call
-waits until the kernel log buffer is nonempty, and then reads
-at most \fIlen\fP bytes into the buffer pointed to by
-.IR bufp .
-The call returns the number of bytes read.
-Bytes read from the log disappear from the log buffer:
-the information can be read only once.
-This is the function executed by the kernel when a user program reads
-.IR /proc/kmsg .
-.TP
-.BR SYSLOG_ACTION_READ_ALL " (3)"
-Read all messages remaining in the ring buffer,
-placing them in the buffer pointed to by
-.IR bufp .
-The call reads the last \fIlen\fP
-bytes from the log buffer (nondestructively),
-but will not read more than was written into the buffer since the
-last "clear ring buffer" command (see command 5 below)).
-The call returns the number of bytes read.
-.TP
-.BR SYSLOG_ACTION_READ_CLEAR " (4)"
-Read and clear all messages remaining in the ring buffer.
-The call does precisely the same as for a
-.I type
-of 3, but also executes the "clear ring buffer" command.
-.TP
-.BR SYSLOG_ACTION_CLEAR " (5)"
-The call executes just the "clear ring buffer" command.
-The
-.I bufp
-and
-.I len
-arguments are ignored.
-.IP
-This command does not really clear the ring buffer.
-Rather, it sets a kernel bookkeeping variable that
-determines the results returned by commands 3
-.RB ( SYSLOG_ACTION_READ_ALL )
-and 4
-.RB ( SYSLOG_ACTION_READ_CLEAR ).
-This command has no effect on commands 2
-.RB ( SYSLOG_ACTION_READ )
-and 9
-.RB ( SYSLOG_ACTION_SIZE_UNREAD ).
-.TP
-.BR SYSLOG_ACTION_CONSOLE_OFF " (6)"
-The command saves the current value of
-.I console_loglevel
-and then sets
-.I console_loglevel
-to
-.IR minimum_console_loglevel ,
-so that no messages are printed to the console.
-Before Linux 2.6.32,
-.\" commit 1aaad49e856ce41adc07d8ae0c8ef35fc4483245
-the command simply sets
-.I console_loglevel
-to
-.IR minimum_console_loglevel .
-See the discussion of
-.IR /proc/sys/kernel/printk ,
-below.
-.IP
-The
-.I bufp
-and
-.I len
-arguments are ignored.
-.TP
-.BR SYSLOG_ACTION_CONSOLE_ON " (7)"
-If a previous
-.B SYSLOG_ACTION_CONSOLE_OFF
-command has been performed,
-this command restores
-.I console_loglevel
-to the value that was saved by that command.
-Before Linux 2.6.32,
-.\" commit 1aaad49e856ce41adc07d8ae0c8ef35fc4483245
-this command simply sets
-.I console_loglevel
-to
-.IR default_console_loglevel .
-See the discussion of
-.IR /proc/sys/kernel/printk ,
-below.
-.IP
-The
-.I bufp
-and
-.I len
-arguments are ignored.
-.TP
-.BR SYSLOG_ACTION_CONSOLE_LEVEL " (8)"
-The call sets
-.I console_loglevel
-to the value given in
-.IR len ,
-which must be an integer between 1 and 8 (inclusive).
-The kernel silently enforces a minimum value of
-.I minimum_console_loglevel
-for
-.IR len .
-See the
-.I log level
-section for details.
-The
-.I bufp
-argument is ignored.
-.TP
-.BR SYSLOG_ACTION_SIZE_UNREAD " (9) (since Linux 2.4.10)"
-The call
-returns the number of bytes currently available to be read
-from the kernel log buffer via command 2
-.RB ( SYSLOG_ACTION_READ ).
-The
-.I bufp
-and
-.I len
-arguments are ignored.
-.TP
-.BR SYSLOG_ACTION_SIZE_BUFFER " (10) (since Linux 2.6.6)"
-This command returns the total size of the kernel log buffer.
-The
-.I bufp
-and
-.I len
-arguments are ignored.
-.P
-All commands except 3 and 10 require privilege.
-In Linux kernels before Linux 2.6.37,
-command types 3 and 10 are allowed to unprivileged processes;
-since Linux 2.6.37,
-these commands are allowed to unprivileged processes only if
-.I /proc/sys/kernel/dmesg_restrict
-has the value 0.
-Before Linux 2.6.37, "privileged" means that the caller has the
-.B CAP_SYS_ADMIN
-capability.
-Since Linux 2.6.37,
-"privileged" means that the caller has either the
-.B CAP_SYS_ADMIN
-capability (now deprecated for this purpose) or the (new)
-.B CAP_SYSLOG
-capability.
-.\"
-.\"
-.SS /proc/sys/kernel/printk
-.I /proc/sys/kernel/printk
-is a writable file containing four integer values that influence kernel
-.I printk()
-behavior when printing or logging error messages.
-The four values are:
-.TP
-.I console_loglevel
-Only messages with a log level lower than this value will
-be printed to the console.
-The default value for this field is
-.B DEFAULT_CONSOLE_LOGLEVEL
-(7), but it is set to
-4 if the kernel command line contains the word "quiet",\" since Linux 2.4
-10 if the kernel command line contains the word "debug",
-and to 15 in case
-of a kernel fault (the 10 and 15 are just silly, and equivalent to 8).
-The value of
-.I console_loglevel
-can be set (to a value in the range 1\[en]8) by a
-.BR syslog ()
-call with a
-.I type
-of 8.
-.TP
-.I default_message_loglevel
-This value will be used as the log level for
-.I printk()
-messages that do not have an explicit level.
-Up to and including Linux 2.6.38,
-the hard-coded default value for this field was 4
-.RB ( KERN_WARNING );
-since Linux 2.6.39,
-.\" commit 5af5bcb8d37f99ba415a1adc6da71051b84f93a5
-the default value is defined by the kernel configuration option
-.BR CONFIG_DEFAULT_MESSAGE_LOGLEVEL ,
-which defaults to 4.
-.TP
-.I minimum_console_loglevel
-The value in this field is the minimum value to which
-.I console_loglevel
-can be set.
-.TP
-.I default_console_loglevel
-This is the default value for
-.IR console_loglevel .
-.\"
-.\"
-.SS The log level
-Every
-.IR printk ()
-message has its own log level.
-If the log level is not explicitly specified as part of the message,
-it defaults to
-.IR default_message_loglevel .
-The conventional meaning of the log level is as follows:
-.TS
-lB lB lB
-lB c l.
-Kernel constant Level value Meaning
-KERN_EMERG 0 System is unusable
-KERN_ALERT 1 T{
-Action must be taken immediately
-T}
-KERN_CRIT 2 Critical conditions
-KERN_ERR 3 Error conditions
-KERN_WARNING 4 Warning conditions
-KERN_NOTICE 5 T{
-Normal but significant condition
-T}
-KERN_INFO 6 Informational
-KERN_DEBUG 7 Debug-level messages
-.TE
-.P
-The kernel
-.I printk()
-routine will print a message on the
-console only if it has a log level less than the value of
-.IR console_loglevel .
-.SH RETURN VALUE
-For \fItype\fP equal to 2, 3, or 4, a successful call to
-.BR syslog ()
-returns the number
-of bytes read.
-For \fItype\fP 9,
-.BR syslog ()
-returns the number of bytes currently
-available to be read on the kernel log buffer.
-For \fItype\fP 10,
-.BR syslog ()
-returns the total size of the kernel log buffer.
-For other values of \fItype\fP, 0 is returned on success.
-.P
-In case of error, \-1 is returned,
-and \fIerrno\fP is set to indicate the error.
-.SH ERRORS
-.TP
-.B EINVAL
-Bad arguments (e.g.,
-bad
-.IR type ;
-or for
-.I type
-2, 3, or 4,
-.I buf
-is NULL,
-or
-.I len
-is less than zero; or for
-.I type
-8, the
-.I level
-is outside the range 1 to 8).
-.TP
-.B ENOSYS
-This
-.BR syslog ()
-system call is not available, because the kernel was compiled with the
-.B CONFIG_PRINTK
-kernel-configuration option disabled.
-.TP
-.B EPERM
-An attempt was made to change
-.I console_loglevel
-or clear the kernel
-message ring buffer by a process without sufficient privilege
-(more precisely: without the
-.B CAP_SYS_ADMIN
-or
-.B CAP_SYSLOG
-capability).
-.TP
-.B ERESTARTSYS
-System call was interrupted by a signal; nothing was read.
-(This can be seen only during a trace.)
-.SH STANDARDS
-Linux.
-.SH HISTORY
-From the very start, people noted that it is unfortunate that
-a system call and a library routine of the same name are entirely
-different animals.
-.\" In libc4 and libc5 the number of this call was defined by
-.\" .BR SYS_klog .
-.\" In glibc 2.0 the syscall is baptized
-.\" .BR klogctl ().
-.SH SEE ALSO
-.BR dmesg (1),
-.BR syslog (3),
-.BR capabilities (7)
diff --git a/man2/tee.2 b/man2/tee.2
deleted file mode 100644
index 8ed9a67a8..000000000
--- a/man2/tee.2
+++ /dev/null
@@ -1,199 +0,0 @@
-.\" This manpage is Copyright (C) 2006 Jens Axboe
-.\" and Copyright (C) 2006 Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.TH tee 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-tee \- duplicating pipe content
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#define _GNU_SOURCE" " /* See feature_test_macros(7) */"
-.B #include <fcntl.h>
-.P
-.BI "ssize_t tee(int " fd_in ", int " fd_out ", size_t " len \
-", unsigned int " flags );
-.fi
-.\" Return type was long before glibc 2.7
-.SH DESCRIPTION
-.\" Example programs http://brick.kernel.dk/snaps
-.\"
-.\"
-.\" add a "tee(in, out1, out2)" system call that duplicates the pages
-.\" (again, incrementing their reference count, not copying the data) from
-.\" one pipe to two other pipes.
-.BR tee ()
-duplicates up to
-.I len
-bytes of data from the pipe referred to by the file descriptor
-.I fd_in
-to the pipe referred to by the file descriptor
-.IR fd_out .
-It does not consume the data that is duplicated from
-.IR fd_in ;
-therefore, that data can be copied by a subsequent
-.BR splice (2).
-.P
-.I flags
-is a bit mask that is composed by ORing together
-zero or more of the following values:
-.TP 1.9i
-.B SPLICE_F_MOVE
-Currently has no effect for
-.BR tee ();
-see
-.BR splice (2).
-.TP
-.B SPLICE_F_NONBLOCK
-Do not block on I/O; see
-.BR splice (2)
-for further details.
-.TP
-.B SPLICE_F_MORE
-Currently has no effect for
-.BR tee (),
-but may be implemented in the future; see
-.BR splice (2).
-.TP
-.B SPLICE_F_GIFT
-Unused for
-.BR tee ();
-see
-.BR vmsplice (2).
-.SH RETURN VALUE
-Upon successful completion,
-.BR tee ()
-returns the number of bytes that were duplicated between the input
-and output.
-A return value of 0 means that there was no data to transfer,
-and it would not make sense to block, because there are no
-writers connected to the write end of the pipe referred to by
-.IR fd_in .
-.P
-On error,
-.BR tee ()
-returns \-1 and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EAGAIN
-.B SPLICE_F_NONBLOCK
-was specified in
-.I flags
-or one of the file descriptors had been marked as nonblocking
-.RB ( O_NONBLOCK ) ,
-and the operation would block.
-.TP
-.B EINVAL
-.I fd_in
-or
-.I fd_out
-does not refer to a pipe; or
-.I fd_in
-and
-.I fd_out
-refer to the same pipe.
-.TP
-.B ENOMEM
-Out of memory.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 2.6.17,
-glibc 2.5.
-.SH NOTES
-Conceptually,
-.BR tee ()
-copies the data between the two pipes.
-In reality no real data copying takes place though:
-under the covers,
-.BR tee ()
-assigns data to the output by merely grabbing
-a reference to the input.
-.SH EXAMPLES
-The example below implements a basic
-.BR tee (1)
-program using the
-.BR tee ()
-system call.
-Here is an example of its use:
-.P
-.in +4n
-.EX
-$ \fBdate | ./a.out out.log | cat\fP
-Tue Oct 28 10:06:00 CET 2014
-$ \fBcat out.log\fP
-Tue Oct 28 10:06:00 CET 2014
-.EE
-.in
-.SS Program source
-\&
-.\" SRC BEGIN (tee.c)
-.EX
-#define _GNU_SOURCE
-#include <errno.h>
-#include <fcntl.h>
-#include <limits.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-\&
-int
-main(int argc, char *argv[])
-{
- int fd;
- ssize_t len, slen;
-\&
- if (argc != 2) {
- fprintf(stderr, "Usage: %s <file>\en", argv[0]);
- exit(EXIT_FAILURE);
- }
-\&
- fd = open(argv[1], O_WRONLY | O_CREAT | O_TRUNC, 0644);
- if (fd == \-1) {
- perror("open");
- exit(EXIT_FAILURE);
- }
-\&
- for (;;) {
- /*
- * tee stdin to stdout.
- */
- len = tee(STDIN_FILENO, STDOUT_FILENO,
- INT_MAX, SPLICE_F_NONBLOCK);
- if (len < 0) {
- if (errno == EAGAIN)
- continue;
- perror("tee");
- exit(EXIT_FAILURE);
- }
- if (len == 0)
- break;
-\&
- /*
- * Consume stdin by splicing it to a file.
- */
- while (len > 0) {
- slen = splice(STDIN_FILENO, NULL, fd, NULL,
- len, SPLICE_F_MOVE);
- if (slen < 0) {
- perror("splice");
- exit(EXIT_FAILURE);
- }
- len \-= slen;
- }
- }
-\&
- close(fd);
- exit(EXIT_SUCCESS);
-}
-.EE
-.\" SRC END
-.SH SEE ALSO
-.BR splice (2),
-.BR vmsplice (2),
-.BR pipe (7)
diff --git a/man2/tgkill.2 b/man2/tgkill.2
deleted file mode 100644
index 82fc2d6af..000000000
--- a/man2/tgkill.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/tkill.2
diff --git a/man2/time.2 b/man2/time.2
deleted file mode 100644
index e85029db0..000000000
--- a/man2/time.2
+++ /dev/null
@@ -1,114 +0,0 @@
-.\" Copyright (c) 1992 Drew Eckhardt (drew@cs.colorado.edu), March 28, 1992
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Modified by Michael Haardt <michael@moria.de>
-.\" Modified Sat Jul 24 14:13:40 1993 by Rik Faith <faith@cs.unc.edu>
-.\" Additions by Joseph S. Myers <jsm28@cam.ac.uk>, 970909
-.\"
-.TH time 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-time \- get time in seconds
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <time.h>
-.P
-.BI "time_t time(time_t *_Nullable " tloc );
-.fi
-.SH DESCRIPTION
-.BR time ()
-returns the time as the number of seconds since the
-Epoch, 1970-01-01 00:00:00 +0000 (UTC).
-.P
-If
-.I tloc
-is non-NULL,
-the return value is also stored in the memory pointed to by
-.IR tloc .
-.SH RETURN VALUE
-On success, the value of time in seconds since the Epoch is returned.
-On error, \fI((time_t)\ \-1)\fP is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EOVERFLOW
-The time cannot be represented as a
-.I time_t
-value.
-This can happen if an executable with 32-bit
-.I time_t
-is run on a 64-bit kernel when the time is 2038-01-19 03:14:08 UTC or later.
-However, when the system time is out of
-.I time_t
-range in other situations, the behavior is undefined.
-.TP
-.B EFAULT
-.I tloc
-points outside your accessible address space (but see BUGS).
-.IP
-On systems where the C library
-.BR time ()
-wrapper function invokes an implementation provided by the
-.BR vdso (7)
-(so that there is no trap into the kernel),
-an invalid address may instead trigger a
-.B SIGSEGV
-signal.
-.SH VERSIONS
-POSIX.1 defines
-.I seconds since the Epoch
-using a formula that approximates the number of seconds between a
-specified time and the Epoch.
-This formula takes account of the facts that
-all years that are evenly divisible by 4 are leap years,
-but years that are evenly divisible by 100 are not leap years
-unless they are also evenly divisible by 400,
-in which case they are leap years.
-This value is not the same as the actual number of seconds between the time
-and the Epoch, because of leap seconds and because system clocks are not
-required to be synchronized to a standard reference.
-Linux systems normally follow the POSIX requirement
-that this value ignore leap seconds,
-so that conforming systems interpret it consistently;
-see POSIX.1-2018 Rationale A.4.16.
-.P
-Applications intended to run after 2038 should use ABIs with
-.I time_t
-wider than 32 bits; see
-.BR time_t (3type).
-.SS C library/kernel differences
-On some architectures, an implementation of
-.BR time ()
-is provided in the
-.BR vdso (7).
-.SH STANDARDS
-C11, POSIX.1-2008.
-.SH HISTORY
-SVr4, 4.3BSD, C89, POSIX.1-2001.
-.\" Under 4.3BSD, this call is obsoleted by
-.\" .BR gettimeofday (2).
-.SH BUGS
-Error returns from this system call are indistinguishable from
-successful reports that the time is a few seconds
-.I before
-the Epoch, so the C library wrapper function never sets
-.I errno
-as a result of this call.
-.P
-The
-.I tloc
-argument is obsolescent and should always be NULL in new code.
-When
-.I tloc
-is NULL, the call cannot fail.
-.SH SEE ALSO
-.BR date (1),
-.BR gettimeofday (2),
-.BR ctime (3),
-.BR ftime (3),
-.BR time (7),
-.BR vdso (7)
diff --git a/man2/timer_create.2 b/man2/timer_create.2
deleted file mode 100644
index 1109858b8..000000000
--- a/man2/timer_create.2
+++ /dev/null
@@ -1,487 +0,0 @@
-.\" Copyright (c) 2009 Linux Foundation, written by Michael Kerrisk
-.\" <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.TH timer_create 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-timer_create \- create a POSIX per-process timer
-.SH LIBRARY
-Real-time library
-.RI ( librt ", " \-lrt )
-.SH SYNOPSIS
-.nf
-.BR "#include <signal.h>" " /* Definition of " SIGEV_* " constants */"
-.B #include <time.h>
-.P
-.BI "int timer_create(clockid_t " clockid ,
-.BI " struct sigevent *_Nullable restrict " sevp ,
-.BI " timer_t *restrict " timerid );
-.fi
-.P
-.RS -4
-Feature Test Macro Requirements for glibc (see
-.BR feature_test_macros (7)):
-.RE
-.P
-.BR timer_create ():
-.nf
- _POSIX_C_SOURCE >= 199309L
-.fi
-.SH DESCRIPTION
-.BR timer_create ()
-creates a new per-process interval timer.
-The ID of the new timer is returned in the buffer pointed to by
-.IR timerid ,
-which must be a non-null pointer.
-This ID is unique within the process, until the timer is deleted.
-The new timer is initially disarmed.
-.P
-The
-.I clockid
-argument specifies the clock that the new timer uses to measure time.
-It can be specified as one of the following values:
-.TP
-.B CLOCK_REALTIME
-A settable system-wide real-time clock.
-.TP
-.B CLOCK_MONOTONIC
-A nonsettable monotonically increasing clock that measures time
-from some unspecified point in the past that does not change
-after system startup.
-.\" Note: the CLOCK_MONOTONIC_RAW clock added for clock_gettime()
-.\" in Linux 2.6.28 is not supported for POSIX timers -- mtk, Feb 2009
-.TP
-.BR CLOCK_PROCESS_CPUTIME_ID " (since Linux 2.6.12)"
-A clock that measures (user and system) CPU time consumed by
-(all of the threads in) the calling process.
-.TP
-.BR CLOCK_THREAD_CPUTIME_ID " (since Linux 2.6.12)"
-A clock that measures (user and system) CPU time consumed by
-the calling thread.
-.\" The CLOCK_MONOTONIC_RAW that was added in Linux 2.6.28 can't be used
-.\" to create a timer -- mtk, Feb 2009
-.TP
-.BR CLOCK_BOOTTIME " (Since Linux 2.6.39)"
-.\" commit 70a08cca1227dc31c784ec930099a4417a06e7d0
-Like
-.BR CLOCK_MONOTONIC ,
-this is a monotonically increasing clock.
-However, whereas the
-.B CLOCK_MONOTONIC
-clock does not measure the time while a system is suspended, the
-.B CLOCK_BOOTTIME
-clock does include the time during which the system is suspended.
-This is useful for applications that need to be suspend-aware.
-.B CLOCK_REALTIME
-is not suitable for such applications, since that clock is affected
-by discontinuous changes to the system clock.
-.TP
-.BR CLOCK_REALTIME_ALARM " (since Linux 3.0)"
-.\" commit 9a7adcf5c6dea63d2e47e6f6d2f7a6c9f48b9337
-This clock is like
-.BR CLOCK_REALTIME ,
-but will wake the system if it is suspended.
-The caller must have the
-.B CAP_WAKE_ALARM
-capability in order to set a timer against this clock.
-.TP
-.BR CLOCK_BOOTTIME_ALARM " (since Linux 3.0)"
-.\" commit 9a7adcf5c6dea63d2e47e6f6d2f7a6c9f48b9337
-This clock is like
-.BR CLOCK_BOOTTIME ,
-but will wake the system if it is suspended.
-The caller must have the
-.B CAP_WAKE_ALARM
-capability in order to set a timer against this clock.
-.TP
-.BR CLOCK_TAI " (since Linux 3.10)"
-A system-wide clock derived from wall-clock time but counting leap seconds.
-.P
-See
-.BR clock_getres (2)
-for some further details on the above clocks.
-.P
-As well as the above values,
-.I clockid
-can be specified as the
-.I clockid
-returned by a call to
-.BR clock_getcpuclockid (3)
-or
-.BR pthread_getcpuclockid (3).
-.P
-The
-.I sevp
-argument points to a
-.I sigevent
-structure that specifies how the caller
-should be notified when the timer expires.
-For the definition and general details of this structure, see
-.BR sigevent (3type).
-.P
-The
-.I sevp.sigev_notify
-field can have the following values:
-.TP
-.B SIGEV_NONE
-Don't asynchronously notify when the timer expires.
-Progress of the timer can be monitored using
-.BR timer_gettime (2).
-.TP
-.B SIGEV_SIGNAL
-Upon timer expiration, generate the signal
-.I sigev_signo
-for the process.
-See
-.BR sigevent (3type)
-for general details.
-The
-.I si_code
-field of the
-.I siginfo_t
-structure will be set to
-.BR SI_TIMER .
-At any point in time,
-at most one signal is queued to the process for a given timer; see
-.BR timer_getoverrun (2)
-for more details.
-.TP
-.B SIGEV_THREAD
-Upon timer expiration, invoke
-.I sigev_notify_function
-as if it were the start function of a new thread.
-See
-.BR sigevent (3type)
-for details.
-.TP
-.BR SIGEV_THREAD_ID " (Linux-specific)"
-As for
-.BR SIGEV_SIGNAL ,
-but the signal is targeted at the thread whose ID is given in
-.IR sigev_notify_thread_id ,
-which must be a thread in the same process as the caller.
-The
-.I sigev_notify_thread_id
-field specifies a kernel thread ID, that is, the value returned by
-.BR clone (2)
-or
-.BR gettid (2).
-This flag is intended only for use by threading libraries.
-.P
-Specifying
-.I sevp
-as NULL is equivalent to specifying a pointer to a
-.I sigevent
-structure in which
-.I sigev_notify
-is
-.BR SIGEV_SIGNAL ,
-.I sigev_signo
-is
-.BR SIGALRM ,
-and
-.I sigev_value.sival_int
-is the timer ID.
-.SH RETURN VALUE
-On success,
-.BR timer_create ()
-returns 0, and the ID of the new timer is placed in
-.IR *timerid .
-On failure, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EAGAIN
-Temporary error during kernel allocation of timer structures.
-.TP
-.B EINVAL
-Clock ID,
-.IR sigev_notify ,
-.IR sigev_signo ,
-or
-.I sigev_notify_thread_id
-is invalid.
-.TP
-.B ENOMEM
-.\" glibc layer: malloc()
-Could not allocate memory.
-.TP
-.B ENOTSUP
-The kernel does not support creating a timer against this
-.IR clockid .
-.TP
-.B EPERM
-.I clockid
-was
-.B CLOCK_REALTIME_ALARM
-or
-.B CLOCK_BOOTTIME_ALARM
-but the caller did not have the
-.B CAP_WAKE_ALARM
-capability.
-.SH VERSIONS
-.SS C library/kernel differences
-Part of the implementation of the POSIX timers API is provided by glibc.
-.\" See nptl/sysdeps/unix/sysv/linux/timer_create.c
-In particular:
-.IP \[bu] 3
-Much of the functionality for
-.B SIGEV_THREAD
-is implemented within glibc, rather than the kernel.
-(This is necessarily so,
-since the thread involved in handling the notification is one
-that must be managed by the C library POSIX threads implementation.)
-Although the notification delivered to the process is via a thread,
-internally the NPTL implementation uses a
-.I sigev_notify
-value of
-.B SIGEV_THREAD_ID
-along with a real-time signal that is reserved by the implementation (see
-.BR nptl (7)).
-.IP \[bu]
-The implementation of the default case where
-.I evp
-is NULL is handled inside glibc,
-which invokes the underlying system call with a suitably populated
-.I sigevent
-structure.
-.IP \[bu]
-The timer IDs presented at user level are maintained by glibc,
-which maps these IDs to the timer IDs employed by the kernel.
-.\" See the glibc source file kernel-posix-timers.h for the structure
-.\" that glibc uses to map user-space timer IDs to kernel timer IDs
-.\" The kernel-level timer ID is exposed via siginfo.si_tid.
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-Linux 2.6.
-POSIX.1-2001.
-.P
-Prior to Linux 2.6,
-glibc provided an incomplete user-space implementation
-.RB ( CLOCK_REALTIME
-timers only) using POSIX threads,
-and before glibc 2.17,
-.\" glibc commit 93a78ac437ba44f493333d7e2a4b0249839ce460
-the implementation falls back to this technique on systems
-running kernels older than Linux 2.6.
-.SH NOTES
-A program may create multiple interval timers using
-.BR timer_create ().
-.P
-Timers are not inherited by the child of a
-.BR fork (2),
-and are disarmed and deleted during an
-.BR execve (2).
-.P
-The kernel preallocates a "queued real-time signal"
-for each timer created using
-.BR timer_create ().
-Consequently, the number of timers is limited by the
-.B RLIMIT_SIGPENDING
-resource limit (see
-.BR setrlimit (2)).
-.P
-The timers created by
-.BR timer_create ()
-are commonly known as "POSIX (interval) timers".
-The POSIX timers API consists of the following interfaces:
-.TP
-.BR timer_create ()
-Create a timer.
-.TP
-.BR timer_settime (2)
-Arm (start) or disarm (stop) a timer.
-.TP
-.BR timer_gettime (2)
-Fetch the time remaining until the next expiration of a timer,
-along with the interval setting of the timer.
-.TP
-.BR timer_getoverrun (2)
-Return the overrun count for the last timer expiration.
-.TP
-.BR timer_delete (2)
-Disarm and delete a timer.
-.P
-Since Linux 3.10, the
-.IR /proc/ pid /timers
-file can be used to list the POSIX timers for the process with PID
-.IR pid .
-See
-.BR proc (5)
-for further information.
-.P
-Since Linux 4.10,
-.\" baa73d9e478ff32d62f3f9422822b59dd9a95a21
-support for POSIX timers is a configurable option that is enabled by default.
-Kernel support can be disabled via the
-.B CONFIG_POSIX_TIMERS
-option.
-.SH EXAMPLES
-The program below takes two arguments: a sleep period in seconds,
-and a timer frequency in nanoseconds.
-The program establishes a handler for the signal it uses for the timer,
-blocks that signal,
-creates and arms a timer that expires with the given frequency,
-sleeps for the specified number of seconds,
-and then unblocks the timer signal.
-Assuming that the timer expired at least once while the program slept,
-the signal handler will be invoked,
-and the handler displays some information about the timer notification.
-The program terminates after one invocation of the signal handler.
-.P
-In the following example run, the program sleeps for 1 second,
-after creating a timer that has a frequency of 100 nanoseconds.
-By the time the signal is unblocked and delivered,
-there have been around ten million overruns.
-.P
-.in +4n
-.EX
-$ \fB./a.out 1 100\fP
-Establishing handler for signal 34
-Blocking signal 34
-timer ID is 0x804c008
-Sleeping for 1 seconds
-Unblocking signal 34
-Caught signal 34
- sival_ptr = 0xbfb174f4; *sival_ptr = 0x804c008
- overrun count = 10004886
-.EE
-.in
-.SS Program source
-\&
-.\" SRC BEGIN (timer_create.c)
-.EX
-#include <signal.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <time.h>
-#include <unistd.h>
-\&
-#define CLOCKID CLOCK_REALTIME
-#define SIG SIGRTMIN
-\&
-#define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \e
- } while (0)
-\&
-static void
-print_siginfo(siginfo_t *si)
-{
- int or;
- timer_t *tidp;
-\&
- tidp = si\->si_value.sival_ptr;
-\&
- printf(" sival_ptr = %p; ", si\->si_value.sival_ptr);
- printf(" *sival_ptr = %#jx\en", (uintmax_t) *tidp);
-\&
- or = timer_getoverrun(*tidp);
- if (or == \-1)
- errExit("timer_getoverrun");
- else
- printf(" overrun count = %d\en", or);
-}
-\&
-static void
-handler(int sig, siginfo_t *si, void *uc)
-{
- /* Note: calling printf() from a signal handler is not safe
- (and should not be done in production programs), since
- printf() is not async\-signal\-safe; see signal\-safety(7).
- Nevertheless, we use printf() here as a simple way of
- showing that the handler was called. */
-\&
- printf("Caught signal %d\en", sig);
- print_siginfo(si);
- signal(sig, SIG_IGN);
-}
-\&
-int
-main(int argc, char *argv[])
-{
- timer_t timerid;
- sigset_t mask;
- long long freq_nanosecs;
- struct sigevent sev;
- struct sigaction sa;
- struct itimerspec its;
-\&
- if (argc != 3) {
- fprintf(stderr, "Usage: %s <sleep\-secs> <freq\-nanosecs>\en",
- argv[0]);
- exit(EXIT_FAILURE);
- }
-\&
- /* Establish handler for timer signal. */
-\&
- printf("Establishing handler for signal %d\en", SIG);
- sa.sa_flags = SA_SIGINFO;
- sa.sa_sigaction = handler;
- sigemptyset(&sa.sa_mask);
- if (sigaction(SIG, &sa, NULL) == \-1)
- errExit("sigaction");
-\&
- /* Block timer signal temporarily. */
-\&
- printf("Blocking signal %d\en", SIG);
- sigemptyset(&mask);
- sigaddset(&mask, SIG);
- if (sigprocmask(SIG_SETMASK, &mask, NULL) == \-1)
- errExit("sigprocmask");
-\&
- /* Create the timer. */
-\&
- sev.sigev_notify = SIGEV_SIGNAL;
- sev.sigev_signo = SIG;
- sev.sigev_value.sival_ptr = &timerid;
- if (timer_create(CLOCKID, &sev, &timerid) == \-1)
- errExit("timer_create");
-\&
- printf("timer ID is %#jx\en", (uintmax_t) timerid);
-\&
- /* Start the timer. */
-\&
- freq_nanosecs = atoll(argv[2]);
- its.it_value.tv_sec = freq_nanosecs / 1000000000;
- its.it_value.tv_nsec = freq_nanosecs % 1000000000;
- its.it_interval.tv_sec = its.it_value.tv_sec;
- its.it_interval.tv_nsec = its.it_value.tv_nsec;
-\&
- if (timer_settime(timerid, 0, &its, NULL) == \-1)
- errExit("timer_settime");
-\&
- /* Sleep for a while; meanwhile, the timer may expire
- multiple times. */
-\&
- printf("Sleeping for %d seconds\en", atoi(argv[1]));
- sleep(atoi(argv[1]));
-\&
- /* Unlock the timer signal, so that timer notification
- can be delivered. */
-\&
- printf("Unblocking signal %d\en", SIG);
- if (sigprocmask(SIG_UNBLOCK, &mask, NULL) == \-1)
- errExit("sigprocmask");
-\&
- exit(EXIT_SUCCESS);
-}
-.EE
-.\" SRC END
-.SH SEE ALSO
-.ad l
-.nh
-.BR clock_gettime (2),
-.BR setitimer (2),
-.BR timer_delete (2),
-.BR timer_getoverrun (2),
-.BR timer_settime (2),
-.BR timerfd_create (2),
-.BR clock_getcpuclockid (3),
-.BR pthread_getcpuclockid (3),
-.BR pthreads (7),
-.BR sigevent (3type),
-.BR signal (7),
-.BR time (7)
diff --git a/man2/timer_delete.2 b/man2/timer_delete.2
deleted file mode 100644
index bdc2cfa12..000000000
--- a/man2/timer_delete.2
+++ /dev/null
@@ -1,58 +0,0 @@
-.\" Copyright (c) 2009 Linux Foundation, written by Michael Kerrisk
-.\" <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.TH timer_delete 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-timer_delete \- delete a POSIX per-process timer
-.SH LIBRARY
-Real-time library
-.RI ( librt ", " \-lrt )
-.SH SYNOPSIS
-.nf
-.B #include <time.h>
-.P
-.BI "int timer_delete(timer_t " timerid );
-.fi
-.P
-.RS -4
-Feature Test Macro Requirements for glibc (see
-.BR feature_test_macros (7)):
-.RE
-.P
-.BR timer_delete ():
-.nf
- _POSIX_C_SOURCE >= 199309L
-.fi
-.SH DESCRIPTION
-.BR timer_delete ()
-deletes the timer whose ID is given in
-.IR timerid .
-If the timer was armed at the time of this call,
-it is disarmed before being deleted.
-The treatment of any pending signal generated by the deleted timer
-is unspecified.
-.SH RETURN VALUE
-On success,
-.BR timer_delete ()
-returns 0.
-On failure, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EINVAL
-.I timerid
-is not a valid timer ID.
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-Linux 2.6.
-POSIX.1-2001.
-.SH SEE ALSO
-.BR clock_gettime (2),
-.BR timer_create (2),
-.BR timer_getoverrun (2),
-.BR timer_settime (2),
-.BR time (7)
diff --git a/man2/timer_getoverrun.2 b/man2/timer_getoverrun.2
deleted file mode 100644
index edc1ff6ab..000000000
--- a/man2/timer_getoverrun.2
+++ /dev/null
@@ -1,134 +0,0 @@
-.\" Copyright (c) 2009 Linux Foundation, written by Michael Kerrisk
-.\" <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.TH timer_getoverrun 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-timer_getoverrun \- get overrun count for a POSIX per-process timer
-.SH LIBRARY
-Real-time library
-.RI ( librt ", " \-lrt )
-.SH SYNOPSIS
-.nf
-.B #include <time.h>
-.P
-.BI "int timer_getoverrun(timer_t " timerid );
-.fi
-.P
-.RS -4
-Feature Test Macro Requirements for glibc (see
-.BR feature_test_macros (7)):
-.RE
-.P
-.BR timer_getoverrun ():
-.nf
- _POSIX_C_SOURCE >= 199309L
-.fi
-.SH DESCRIPTION
-.BR timer_getoverrun ()
-returns the "overrun count" for the timer referred to by
-.IR timerid .
-An application can use the overrun count to accurately calculate the number
-of timer expirations that would have occurred over a given time interval.
-Timer overruns can occur both when receiving expiration notifications
-via signals
-.RB ( SIGEV_SIGNAL ),
-and via threads
-.RB ( SIGEV_THREAD ).
-.P
-When expiration notifications are delivered via a signal,
-overruns can occur as follows.
-Regardless of whether or not a real-time signal is used for
-timer notifications,
-the system queues at most one signal per timer.
-(This is the behavior specified by POSIX.1.
-The alternative, queuing one signal for each timer expiration,
-could easily result in overflowing the allowed limits for
-queued signals on the system.)
-Because of system scheduling delays,
-or because the signal may be temporarily blocked,
-there can be a delay between the time when the notification
-signal is generated and the time when it
-is delivered (e.g., caught by a signal handler) or accepted (e.g., using
-.BR sigwaitinfo (2)).
-In this interval, further timer expirations may occur.
-The timer overrun count is the number of additional
-timer expirations that occurred between the time when the signal
-was generated and when it was delivered or accepted.
-.P
-Timer overruns can also occur when expiration notifications
-are delivered via invocation of a thread,
-since there may be an arbitrary delay between an expiration of the timer
-and the invocation of the notification thread,
-and in that delay interval, additional timer expirations may occur.
-.SH RETURN VALUE
-On success,
-.BR timer_getoverrun ()
-returns the overrun count of the specified timer;
-this count may be 0 if no overruns have occurred.
-On failure, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EINVAL
-.I timerid
-is not a valid timer ID.
-.SH VERSIONS
-When timer notifications are delivered via signals
-.RB ( SIGEV_SIGNAL ),
-on Linux it is also possible to obtain the overrun count via the
-.I si_overrun
-field of the
-.I siginfo_t
-structure (see
-.BR sigaction (2)).
-This allows an application to avoid the overhead of making
-a system call to obtain the overrun count,
-but is a nonportable extension to POSIX.1.
-.P
-POSIX.1 discusses timer overruns only in the context of
-timer notifications using signals.
-.\" FIXME . Austin bug filed, 11 Feb 09
-.\" https://www.austingroupbugs.net/view.php?id=95
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-Linux 2.6.
-POSIX.1-2001.
-.SH BUGS
-POSIX.1 specifies that if the timer overrun count
-is equal to or greater than an implementation-defined maximum,
-.BR DELAYTIMER_MAX ,
-then
-.BR timer_getoverrun ()
-should return
-.BR DELAYTIMER_MAX .
-However, before Linux 4.19,
-.\" http://bugzilla.kernel.org/show_bug.cgi?id=12665
-if the timer overrun value exceeds the maximum representable integer,
-the counter cycles, starting once more from low values.
-Since Linux 4.19,
-.\" commit 78c9c4dfbf8c04883941445a195276bb4bb92c76
-.BR timer_getoverrun ()
-returns
-.B DELAYTIMER_MAX
-(defined as
-.B INT_MAX
-in
-.IR <limits.h> )
-in this case (and the overrun value is reset to 0).
-.SH EXAMPLES
-See
-.BR timer_create (2).
-.SH SEE ALSO
-.BR clock_gettime (2),
-.BR sigaction (2),
-.BR signalfd (2),
-.BR sigwaitinfo (2),
-.BR timer_create (2),
-.BR timer_delete (2),
-.BR timer_settime (2),
-.BR signal (7),
-.BR time (7)
diff --git a/man2/timer_gettime.2 b/man2/timer_gettime.2
deleted file mode 100644
index 42015ca6b..000000000
--- a/man2/timer_gettime.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/timer_settime.2
diff --git a/man2/timer_settime.2 b/man2/timer_settime.2
deleted file mode 100644
index 1f8589874..000000000
--- a/man2/timer_settime.2
+++ /dev/null
@@ -1,187 +0,0 @@
-.\" Copyright (c) 2009 Linux Foundation, written by Michael Kerrisk
-.\" <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.TH timer_settime 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-timer_settime, timer_gettime \- arm/disarm and fetch
-state of POSIX per-process timer
-.SH LIBRARY
-Real-time library
-.RI ( librt ", " \-lrt )
-.SH SYNOPSIS
-.nf
-.B #include <time.h>
-.P
-.BI "int timer_gettime(timer_t " timerid ", struct itimerspec *" curr_value );
-.BI "int timer_settime(timer_t " timerid ", int " flags ,
-.BI " const struct itimerspec *restrict " new_value ,
-.BI " struct itimerspec *_Nullable restrict " old_value );
-.fi
-.P
-.RS -4
-Feature Test Macro Requirements for glibc (see
-.BR feature_test_macros (7)):
-.RE
-.P
-.BR timer_settime (),
-.BR timer_gettime ():
-.nf
- _POSIX_C_SOURCE >= 199309L
-.fi
-.SH DESCRIPTION
-.BR timer_settime ()
-arms or disarms the timer identified by
-.IR timerid .
-The
-.I new_value
-argument is pointer to an
-.I itimerspec
-structure that specifies the new initial value and
-the new interval for the timer.
-The
-.I itimerspec
-structure is described in
-.BR itimerspec (3type).
-.P
-Each of the substructures of the
-.I itimerspec
-structure is a
-.BR timespec (3)
-structure that allows a time value to be specified
-in seconds and nanoseconds.
-These time values are measured according to the clock
-that was specified when the timer was created by
-.BR timer_create (2).
-.P
-If
-.I new_value\->it_value
-specifies a nonzero value (i.e., either subfield is nonzero), then
-.BR timer_settime ()
-arms (starts) the timer,
-setting it to initially expire at the given time.
-(If the timer was already armed,
-then the previous settings are overwritten.)
-If
-.I new_value\->it_value
-specifies a zero value
-(i.e., both subfields are zero),
-then the timer is disarmed.
-.P
-The
-.I new_value\->it_interval
-field specifies the period of the timer, in seconds and nanoseconds.
-If this field is nonzero, then each time that an armed timer expires,
-the timer is reloaded from the value specified in
-.IR new_value\->it_interval .
-If
-.I new_value\->it_interval
-specifies a zero value,
-then the timer expires just once, at the time specified by
-.IR it_value .
-.P
-By default, the initial expiration time specified in
-.I new_value\->it_value
-is interpreted relative to the current time on the timer's
-clock at the time of the call.
-This can be modified by specifying
-.B TIMER_ABSTIME
-in
-.IR flags ,
-in which case
-.I new_value\->it_value
-is interpreted as an absolute value as measured on the timer's clock;
-that is, the timer will expire when the clock value reaches the
-value specified by
-.IR new_value\->it_value .
-If the specified absolute time has already passed,
-then the timer expires immediately,
-and the overrun count (see
-.BR timer_getoverrun (2))
-will be set correctly.
-.\" By experiment: the overrun count is set correctly, for CLOCK_REALTIME.
-.P
-If the value of the
-.B CLOCK_REALTIME
-clock is adjusted while an absolute timer based on that clock is armed,
-then the expiration of the timer will be appropriately adjusted.
-Adjustments to the
-.B CLOCK_REALTIME
-clock have no effect on relative timers based on that clock.
-.\" Similar remarks might apply with respect to process and thread CPU time
-.\" clocks, but these clocks are not currently (2.6.28) settable on Linux.
-.P
-If
-.I old_value
-is not NULL, then it points to a buffer
-that is used to return the previous interval of the timer (in
-.IR old_value\->it_interval )
-and the amount of time until the timer
-would previously have next expired (in
-.IR old_value\->it_value ).
-.P
-.BR timer_gettime ()
-returns the time until next expiration, and the interval,
-for the timer specified by
-.IR timerid ,
-in the buffer pointed to by
-.IR curr_value .
-The time remaining until the next timer expiration is returned in
-.IR curr_value\->it_value ;
-this is always a relative value, regardless of whether the
-.B TIMER_ABSTIME
-flag was used when arming the timer.
-If the value returned in
-.I curr_value\->it_value
-is zero, then the timer is currently disarmed.
-The timer interval is returned in
-.IR curr_value\->it_interval .
-If the value returned in
-.I curr_value\->it_interval
-is zero, then this is a "one-shot" timer.
-.SH RETURN VALUE
-On success,
-.BR timer_settime ()
-and
-.BR timer_gettime ()
-return 0.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-These functions may fail with the following errors:
-.TP
-.B EFAULT
-.IR new_value ,
-.IR old_value ,
-or
-.I curr_value
-is not a valid pointer.
-.TP
-.B EINVAL
-.I timerid
-is invalid.
-.\" FIXME . eventually: invalid value in flags
-.P
-.BR timer_settime ()
-may fail with the following errors:
-.TP
-.B EINVAL
-.I new_value.it_value
-is negative; or
-.I new_value.it_value.tv_nsec
-is negative or greater than 999,999,999.
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-Linux 2.6.
-POSIX.1-2001.
-.SH EXAMPLES
-See
-.BR timer_create (2).
-.SH SEE ALSO
-.BR timer_create (2),
-.BR timer_getoverrun (2),
-.BR timespec (3),
-.BR time (7)
diff --git a/man2/timerfd_create.2 b/man2/timerfd_create.2
deleted file mode 100644
index 23c399c7d..000000000
--- a/man2/timerfd_create.2
+++ /dev/null
@@ -1,704 +0,0 @@
-.\" Copyright (C) 2008 Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: GPL-2.0-or-later
-.\"
-.TH timerfd_create 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-timerfd_create, timerfd_settime, timerfd_gettime \-
-timers that notify via file descriptors
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/timerfd.h>
-.P
-.BI "int timerfd_create(int " clockid ", int " flags );
-.P
-.BI "int timerfd_settime(int " fd ", int " flags ,
-.BI " const struct itimerspec *" new_value ,
-.BI " struct itimerspec *_Nullable " old_value );
-.BI "int timerfd_gettime(int " fd ", struct itimerspec *" curr_value );
-.fi
-.SH DESCRIPTION
-These system calls create and operate on a timer
-that delivers timer expiration notifications via a file descriptor.
-They provide an alternative to the use of
-.BR setitimer (2)
-or
-.BR timer_create (2),
-with the advantage that the file descriptor may be monitored by
-.BR select (2),
-.BR poll (2),
-and
-.BR epoll (7).
-.P
-The use of these three system calls is analogous to the use of
-.BR timer_create (2),
-.BR timer_settime (2),
-and
-.BR timer_gettime (2).
-(There is no analog of
-.BR timer_getoverrun (2),
-since that functionality is provided by
-.BR read (2),
-as described below.)
-.\"
-.SS timerfd_create()
-.BR timerfd_create ()
-creates a new timer object,
-and returns a file descriptor that refers to that timer.
-The
-.I clockid
-argument specifies the clock that is used to mark the progress
-of the timer, and must be one of the following:
-.TP
-.B CLOCK_REALTIME
-A settable system-wide real-time clock.
-.TP
-.B CLOCK_MONOTONIC
-A nonsettable monotonically increasing clock that measures time
-from some unspecified point in the past that does not change
-after system startup.
-.TP
-.BR CLOCK_BOOTTIME " (Since Linux 3.15)"
-.\" commit 4a2378a943f09907fb1ae35c15de917f60289c14
-Like
-.BR CLOCK_MONOTONIC ,
-this is a monotonically increasing clock.
-However, whereas the
-.B CLOCK_MONOTONIC
-clock does not measure the time while a system is suspended, the
-.B CLOCK_BOOTTIME
-clock does include the time during which the system is suspended.
-This is useful for applications that need to be suspend-aware.
-.B CLOCK_REALTIME
-is not suitable for such applications, since that clock is affected
-by discontinuous changes to the system clock.
-.TP
-.BR CLOCK_REALTIME_ALARM " (since Linux 3.11)"
-.\" commit 11ffa9d6065f344a9bd769a2452f26f2f671e5f8
-This clock is like
-.BR CLOCK_REALTIME ,
-but will wake the system if it is suspended.
-The caller must have the
-.B CAP_WAKE_ALARM
-capability in order to set a timer against this clock.
-.TP
-.BR CLOCK_BOOTTIME_ALARM " (since Linux 3.11)"
-.\" commit 11ffa9d6065f344a9bd769a2452f26f2f671e5f8
-This clock is like
-.BR CLOCK_BOOTTIME ,
-but will wake the system if it is suspended.
-The caller must have the
-.B CAP_WAKE_ALARM
-capability in order to set a timer against this clock.
-.P
-See
-.BR clock_getres (2)
-for some further details on the above clocks.
-.P
-The current value of each of these clocks can be retrieved using
-.BR clock_gettime (2).
-.P
-Starting with Linux 2.6.27, the following values may be bitwise ORed in
-.I flags
-to change the behavior of
-.BR timerfd_create ():
-.TP 14
-.B TFD_NONBLOCK
-Set the
-.B O_NONBLOCK
-file status flag on the open file description (see
-.BR open (2))
-referred to by the new file descriptor.
-Using this flag saves extra calls to
-.BR fcntl (2)
-to achieve the same result.
-.TP
-.B TFD_CLOEXEC
-Set the close-on-exec
-.RB ( FD_CLOEXEC )
-flag on the new file descriptor.
-See the description of the
-.B O_CLOEXEC
-flag in
-.BR open (2)
-for reasons why this may be useful.
-.P
-In Linux versions up to and including 2.6.26,
-.I flags
-must be specified as zero.
-.SS timerfd_settime()
-.BR timerfd_settime ()
-arms (starts) or disarms (stops)
-the timer referred to by the file descriptor
-.IR fd .
-.P
-The
-.I new_value
-argument specifies the initial expiration and interval for the timer.
-The
-.I itimerspec
-structure used for this argument is described in
-.BR itimerspec (3type).
-.P
-.I new_value.it_value
-specifies the initial expiration of the timer,
-in seconds and nanoseconds.
-Setting either field of
-.I new_value.it_value
-to a nonzero value arms the timer.
-Setting both fields of
-.I new_value.it_value
-to zero disarms the timer.
-.P
-Setting one or both fields of
-.I new_value.it_interval
-to nonzero values specifies the period, in seconds and nanoseconds,
-for repeated timer expirations after the initial expiration.
-If both fields of
-.I new_value.it_interval
-are zero, the timer expires just once, at the time specified by
-.IR new_value.it_value .
-.P
-By default,
-the initial expiration time specified in
-.I new_value
-is interpreted relative to the current time
-on the timer's clock at the time of the call (i.e.,
-.I new_value.it_value
-specifies a time relative to the current value of the clock specified by
-.IR clockid ).
-An absolute timeout can be selected via the
-.I flags
-argument.
-.P
-The
-.I flags
-argument is a bit mask that can include the following values:
-.TP
-.B TFD_TIMER_ABSTIME
-Interpret
-.I new_value.it_value
-as an absolute value on the timer's clock.
-The timer will expire when the value of the timer's
-clock reaches the value specified in
-.IR new_value.it_value .
-.TP
-.B TFD_TIMER_CANCEL_ON_SET
-If this flag is specified along with
-.B TFD_TIMER_ABSTIME
-and the clock for this timer is
-.B CLOCK_REALTIME
-or
-.BR CLOCK_REALTIME_ALARM ,
-then mark this timer as cancelable if the real-time clock
-undergoes a discontinuous change
-.RB ( settimeofday (2),
-.BR clock_settime (2),
-or similar).
-When such changes occur, a current or future
-.BR read (2)
-from the file descriptor will fail with the error
-.BR ECANCELED .
-.P
-If the
-.I old_value
-argument is not NULL, then the
-.I itimerspec
-structure that it points to is used to return the setting of the timer
-that was current at the time of the call;
-see the description of
-.BR timerfd_gettime ()
-following.
-.\"
-.SS timerfd_gettime()
-.BR timerfd_gettime ()
-returns, in
-.IR curr_value ,
-an
-.I itimerspec
-structure that contains the current setting of the timer
-referred to by the file descriptor
-.IR fd .
-.P
-The
-.I it_value
-field returns the amount of time
-until the timer will next expire.
-If both fields of this structure are zero,
-then the timer is currently disarmed.
-This field always contains a relative value, regardless of whether the
-.B TFD_TIMER_ABSTIME
-flag was specified when setting the timer.
-.P
-The
-.I it_interval
-field returns the interval of the timer.
-If both fields of this structure are zero,
-then the timer is set to expire just once, at the time specified by
-.IR curr_value.it_value .
-.SS Operating on a timer file descriptor
-The file descriptor returned by
-.BR timerfd_create ()
-supports the following additional operations:
-.TP
-.BR read (2)
-If the timer has already expired one or more times since
-its settings were last modified using
-.BR timerfd_settime (),
-or since the last successful
-.BR read (2),
-then the buffer given to
-.BR read (2)
-returns an unsigned 8-byte integer
-.RI ( uint64_t )
-containing the number of expirations that have occurred.
-(The returned value is in host byte order\[em]that is,
-the native byte order for integers on the host machine.)
-.IP
-If no timer expirations have occurred at the time of the
-.BR read (2),
-then the call either blocks until the next timer expiration,
-or fails with the error
-.B EAGAIN
-if the file descriptor has been made nonblocking
-(via the use of the
-.BR fcntl (2)
-.B F_SETFL
-operation to set the
-.B O_NONBLOCK
-flag).
-.IP
-A
-.BR read (2)
-fails with the error
-.B EINVAL
-if the size of the supplied buffer is less than 8 bytes.
-.IP
-If the associated clock is either
-.B CLOCK_REALTIME
-or
-.BR CLOCK_REALTIME_ALARM ,
-the timer is absolute
-.RB ( TFD_TIMER_ABSTIME ),
-and the flag
-.B TFD_TIMER_CANCEL_ON_SET
-was specified when calling
-.BR timerfd_settime (),
-then
-.BR read (2)
-fails with the error
-.B ECANCELED
-if the real-time clock undergoes a discontinuous change.
-(This allows the reading application to discover
-such discontinuous changes to the clock.)
-.IP
-If the associated clock is either
-.B CLOCK_REALTIME
-or
-.BR CLOCK_REALTIME_ALARM ,
-the timer is absolute
-.RB ( TFD_TIMER_ABSTIME ),
-and the flag
-.B TFD_TIMER_CANCEL_ON_SET
-was
-.I not
-specified when calling
-.BR timerfd_settime (),
-then a discontinuous negative change to the clock (e.g.,
-.BR clock_settime (2))
-may cause
-.BR read (2)
-to unblock, but return a value of 0 (i.e., no bytes read),
-if the clock change occurs after the time expired,
-but before the
-.BR read (2)
-on the file descriptor.
-.TP
-.BR poll (2)
-.TQ
-.BR select (2)
-.TQ
-(and similar)
-The file descriptor is readable
-(the
-.BR select (2)
-.I readfds
-argument; the
-.BR poll (2)
-.B POLLIN
-flag)
-if one or more timer expirations have occurred.
-.IP
-The file descriptor also supports the other file-descriptor
-multiplexing APIs:
-.BR pselect (2),
-.BR ppoll (2),
-and
-.BR epoll (7).
-.TP
-.BR ioctl (2)
-The following timerfd-specific command is supported:
-.RS
-.TP
-.BR TFD_IOC_SET_TICKS " (since Linux 3.17)"
-.\" commit 5442e9fbd7c23172a1c9bc736629cd123a9923f0
-Adjust the number of timer expirations that have occurred.
-The argument is a pointer to a nonzero 8-byte integer
-.RI ( uint64_t *)
-containing the new number of expirations.
-Once the number is set, any waiter on the timer is woken up.
-The only purpose of this command is to restore the expirations
-for the purpose of checkpoint/restore.
-This operation is available only if the kernel was configured with the
-.B CONFIG_CHECKPOINT_RESTORE
-option.
-.RE
-.TP
-.BR close (2)
-When the file descriptor is no longer required it should be closed.
-When all file descriptors associated with the same timer object
-have been closed,
-the timer is disarmed and its resources are freed by the kernel.
-.\"
-.SS fork(2) semantics
-After a
-.BR fork (2),
-the child inherits a copy of the file descriptor created by
-.BR timerfd_create ().
-The file descriptor refers to the same underlying
-timer object as the corresponding file descriptor in the parent,
-and
-.BR read (2)s
-in the child will return information about
-expirations of the timer.
-.\"
-.SS execve(2) semantics
-A file descriptor created by
-.BR timerfd_create ()
-is preserved across
-.BR execve (2),
-and continues to generate timer expirations if the timer was armed.
-.SH RETURN VALUE
-On success,
-.BR timerfd_create ()
-returns a new file descriptor.
-On error, \-1 is returned and
-.I errno
-is set to indicate the error.
-.P
-.BR timerfd_settime ()
-and
-.BR timerfd_gettime ()
-return 0 on success;
-on error they return \-1, and set
-.I errno
-to indicate the error.
-.SH ERRORS
-.BR timerfd_create ()
-can fail with the following errors:
-.TP
-.B EINVAL
-The
-.I clockid
-is not valid.
-.TP
-.B EINVAL
-.I flags
-is invalid;
-or, in Linux 2.6.26 or earlier,
-.I flags
-is nonzero.
-.TP
-.B EMFILE
-The per-process limit on the number of open file descriptors has been reached.
-.TP
-.B ENFILE
-The system-wide limit on the total number of open files has been
-reached.
-.TP
-.B ENODEV
-Could not mount (internal) anonymous inode device.
-.TP
-.B ENOMEM
-There was insufficient kernel memory to create the timer.
-.TP
-.B EPERM
-.I clockid
-was
-.B CLOCK_REALTIME_ALARM
-or
-.B CLOCK_BOOTTIME_ALARM
-but the caller did not have the
-.B CAP_WAKE_ALARM
-capability.
-.P
-.BR timerfd_settime ()
-and
-.BR timerfd_gettime ()
-can fail with the following errors:
-.TP
-.B EBADF
-.I fd
-is not a valid file descriptor.
-.TP
-.B EFAULT
-.IR new_value ,
-.IR old_value ,
-or
-.I curr_value
-is not a valid pointer.
-.TP
-.B EINVAL
-.I fd
-is not a valid timerfd file descriptor.
-.P
-.BR timerfd_settime ()
-can also fail with the following errors:
-.TP
-.B ECANCELED
-See NOTES.
-.TP
-.B EINVAL
-.I new_value
-is not properly initialized (one of the
-.I tv_nsec
-falls outside the range zero to 999,999,999).
-.TP
-.B EINVAL
-.\" This case only checked since Linux 2.6.29, and Linux 2.2.2[78].some-stable-version.
-.\" In older kernel versions, no check was made for invalid flags.
-.I flags
-is invalid.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 2.6.25,
-glibc 2.8.
-.SH NOTES
-Suppose the following scenario for
-.B CLOCK_REALTIME
-or
-.B CLOCK_REALTIME_ALARM
-timer that was created with
-.BR timerfd_create ():
-.IP (1) 5
-The timer has been started
-.RB ( timerfd_settime ())
-with the
-.B TFD_TIMER_ABSTIME
-and
-.B TFD_TIMER_CANCEL_ON_SET
-flags;
-.IP (2)
-A discontinuous change (e.g.,
-.BR settimeofday (2))
-is subsequently made to the
-.B CLOCK_REALTIME
-clock; and
-.IP (3)
-the caller once more calls
-.BR timerfd_settime ()
-to rearm the timer (without first doing a
-.BR read (2)
-on the file descriptor).
-.P
-In this case the following occurs:
-.IP \[bu] 3
-The
-.BR timerfd_settime ()
-returns \-1 with
-.I errno
-set to
-.BR ECANCELED .
-(This enables the caller to know that the previous timer was affected
-by a discontinuous change to the clock.)
-.IP \[bu]
-The timer
-.I "is successfully rearmed"
-with the settings provided in the second
-.BR timerfd_settime ()
-call.
-(This was probably an implementation accident, but won't be fixed now,
-in case there are applications that depend on this behaviour.)
-.SH BUGS
-Currently,
-.\" 2.6.29
-.BR timerfd_create ()
-supports fewer types of clock IDs than
-.BR timer_create (2).
-.SH EXAMPLES
-The following program creates a timer and then monitors its progress.
-The program accepts up to three command-line arguments.
-The first argument specifies the number of seconds for
-the initial expiration of the timer.
-The second argument specifies the interval for the timer, in seconds.
-The third argument specifies the number of times the program should
-allow the timer to expire before terminating.
-The second and third command-line arguments are optional.
-.P
-The following shell session demonstrates the use of the program:
-.P
-.in +4n
-.EX
-.RB "$" " a.out 3 1 100"
-0.000: timer started
-3.000: read: 1; total=1
-4.000: read: 1; total=2
-.BR "\[ha]Z " " # type control\-Z to suspend the program"
-[1]+ Stopped ./timerfd3_demo 3 1 100
-.RB "$ " "fg" " # Resume execution after a few seconds"
-a.out 3 1 100
-9.660: read: 5; total=7
-10.000: read: 1; total=8
-11.000: read: 1; total=9
-.BR "\[ha]C " " # type control\-C to suspend the program"
-.EE
-.in
-.SS Program source
-\&
-.\" SRC BEGIN (timerfd_create.c)
-.EX
-.\" The commented out code here is what we currently need until
-.\" the required stuff is in glibc
-.\"
-.\"
-.\"/* Link with \-lrt */
-.\"#define _GNU_SOURCE
-.\"#include <sys/syscall.h>
-.\"#include <unistd.h>
-.\"#include <time.h>
-.\"#if defined(__i386__)
-.\"#define __NR_timerfd_create 322
-.\"#define __NR_timerfd_settime 325
-.\"#define __NR_timerfd_gettime 326
-.\"#endif
-.\"
-.\"static int
-.\"timerfd_create(int clockid, int flags)
-.\"{
-.\" return syscall(__NR_timerfd_create, clockid, flags);
-.\"}
-.\"
-.\"static int
-.\"timerfd_settime(int fd, int flags, struct itimerspec *new_value,
-.\" struct itimerspec *curr_value)
-.\"{
-.\" return syscall(__NR_timerfd_settime, fd, flags, new_value,
-.\" curr_value);
-.\"}
-.\"
-.\"static int
-.\"timerfd_gettime(int fd, struct itimerspec *curr_value)
-.\"{
-.\" return syscall(__NR_timerfd_gettime, fd, curr_value);
-.\"}
-.\"
-.\"#define TFD_TIMER_ABSTIME (1 << 0)
-.\"
-.\"////////////////////////////////////////////////////////////
-#include <err.h>
-#include <inttypes.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/timerfd.h>
-#include <time.h>
-#include <unistd.h>
-\&
-static void
-print_elapsed_time(void)
-{
- int secs, nsecs;
- static int first_call = 1;
- struct timespec curr;
- static struct timespec start;
-\&
- if (first_call) {
- first_call = 0;
- if (clock_gettime(CLOCK_MONOTONIC, &start) == \-1)
- err(EXIT_FAILURE, "clock_gettime");
- }
-\&
- if (clock_gettime(CLOCK_MONOTONIC, &curr) == \-1)
- err(EXIT_FAILURE, "clock_gettime");
-\&
- secs = curr.tv_sec \- start.tv_sec;
- nsecs = curr.tv_nsec \- start.tv_nsec;
- if (nsecs < 0) {
- secs\-\-;
- nsecs += 1000000000;
- }
- printf("%d.%03d: ", secs, (nsecs + 500000) / 1000000);
-}
-\&
-int
-main(int argc, char *argv[])
-{
- int fd;
- ssize_t s;
- uint64_t exp, tot_exp, max_exp;
- struct timespec now;
- struct itimerspec new_value;
-\&
- if (argc != 2 && argc != 4) {
- fprintf(stderr, "%s init\-secs [interval\-secs max\-exp]\en",
- argv[0]);
- exit(EXIT_FAILURE);
- }
-\&
- if (clock_gettime(CLOCK_REALTIME, &now) == \-1)
- err(EXIT_FAILURE, "clock_gettime");
-\&
- /* Create a CLOCK_REALTIME absolute timer with initial
- expiration and interval as specified in command line. */
-\&
- new_value.it_value.tv_sec = now.tv_sec + atoi(argv[1]);
- new_value.it_value.tv_nsec = now.tv_nsec;
- if (argc == 2) {
- new_value.it_interval.tv_sec = 0;
- max_exp = 1;
- } else {
- new_value.it_interval.tv_sec = atoi(argv[2]);
- max_exp = atoi(argv[3]);
- }
- new_value.it_interval.tv_nsec = 0;
-\&
- fd = timerfd_create(CLOCK_REALTIME, 0);
- if (fd == \-1)
- err(EXIT_FAILURE, "timerfd_create");
-\&
- if (timerfd_settime(fd, TFD_TIMER_ABSTIME, &new_value, NULL) == \-1)
- err(EXIT_FAILURE, "timerfd_settime");
-\&
- print_elapsed_time();
- printf("timer started\en");
-\&
- for (tot_exp = 0; tot_exp < max_exp;) {
- s = read(fd, &exp, sizeof(uint64_t));
- if (s != sizeof(uint64_t))
- err(EXIT_FAILURE, "read");
-\&
- tot_exp += exp;
- print_elapsed_time();
- printf("read: %" PRIu64 "; total=%" PRIu64 "\en", exp, tot_exp);
- }
-\&
- exit(EXIT_SUCCESS);
-}
-.EE
-.\" SRC END
-.SH SEE ALSO
-.BR eventfd (2),
-.BR poll (2),
-.BR read (2),
-.BR select (2),
-.BR setitimer (2),
-.BR signalfd (2),
-.BR timer_create (2),
-.BR timer_gettime (2),
-.BR timer_settime (2),
-.BR timespec (3),
-.BR epoll (7),
-.BR time (7)
diff --git a/man2/timerfd_gettime.2 b/man2/timerfd_gettime.2
deleted file mode 100644
index 6d1294048..000000000
--- a/man2/timerfd_gettime.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/timerfd_create.2
diff --git a/man2/timerfd_settime.2 b/man2/timerfd_settime.2
deleted file mode 100644
index 6d1294048..000000000
--- a/man2/timerfd_settime.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/timerfd_create.2
diff --git a/man2/times.2 b/man2/times.2
deleted file mode 100644
index 7e85beb6e..000000000
--- a/man2/times.2
+++ /dev/null
@@ -1,222 +0,0 @@
-.\" Copyright (c) 1992 Drew Eckhardt (drew@cs.colorado.edu), March 28, 1992
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Modified by Michael Haardt (michael@moria.de)
-.\" Modified Sat Jul 24 14:29:17 1993 by Rik Faith (faith@cs.unc.edu)
-.\" Modified 961203 and 001211 and 010326 by aeb@cwi.nl
-.\" Modified 001213 by Michael Haardt (michael@moria.de)
-.\" Modified 13 Jun 02, Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Added note on nonstandard behavior when SIGCHLD is ignored.
-.\" Modified 2004-11-16, mtk, Noted that the nonconformance when
-.\" SIGCHLD is being ignored is fixed in Linux 2.6.9; other minor changes
-.\" Modified 2004-12-08, mtk, in Linux 2.6 times() return value changed
-.\" 2005-04-13, mtk
-.\" Added notes on nonstandard behavior: Linux allows 'buf' to
-.\" be NULL, but POSIX.1 doesn't specify this and it's nonportable.
-.\"
-.TH times 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-times \- get process times
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/times.h>
-.P
-.BI "clock_t times(struct tms *" buf );
-.fi
-.SH DESCRIPTION
-.BR times ()
-stores the current process times in the
-.I "struct tms"
-that
-.I buf
-points to.
-The
-.I struct tms
-is as defined in
-.IR <sys/times.h> :
-.P
-.in +4n
-.EX
-struct tms {
- clock_t tms_utime; /* user time */
- clock_t tms_stime; /* system time */
- clock_t tms_cutime; /* user time of children */
- clock_t tms_cstime; /* system time of children */
-};
-.EE
-.in
-.P
-The
-.I tms_utime
-field contains the CPU time spent executing instructions
-of the calling process.
-The
-.I tms_stime
-field contains the CPU time spent executing inside the kernel
-while performing tasks on behalf of the calling process.
-.P
-The
-.I tms_cutime
-field contains the sum of the
-.I tms_utime
-and
-.I tms_cutime
-values for all waited-for terminated children.
-The
-.I tms_cstime
-field contains the sum of the
-.I tms_stime
-and
-.I tms_cstime
-values for all waited-for terminated children.
-.P
-Times for terminated children (and their descendants)
-are added in at the moment
-.BR wait (2)
-or
-.BR waitpid (2)
-returns their process ID.
-In particular,
-times of grandchildren
-that the children did not wait for are never seen.
-.P
-All times reported are in clock ticks.
-.SH RETURN VALUE
-.BR times ()
-returns the number of clock ticks that have elapsed since
-an arbitrary point in the past.
-The return value may overflow the possible range of type
-.IR clock_t .
-On error,
-\fI(clock_t)\ \-1\fP is returned,
-and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EFAULT
-.I tms
-points outside the process's address space.
-.SH VERSIONS
-On Linux,
-the
-.I buf
-argument can be specified as NULL,
-with the result that
-.BR times ()
-just returns a function result.
-However,
-POSIX does not specify this behavior,
-and most
-other UNIX implementations require a non-NULL value for
-.IR buf .
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-POSIX.1-2001,
-SVr4,
-4.3BSD.
-.P
-In POSIX.1-1996 the symbol \fBCLK_TCK\fP (defined in
-.IR <time.h> )
-is mentioned as obsolescent.
-It is obsolete now.
-.P
-Before Linux 2.6.9,
-if the disposition of
-.B SIGCHLD
-is set to
-.BR SIG_IGN ,
-then the times of terminated children
-are automatically included in the
-.I tms_cstime
-and
-.I tms_cutime
-fields,
-although POSIX.1-2001 says that this should happen
-only if the calling process
-.BR wait (2)s
-on its children.
-This nonconformance is rectified in Linux 2.6.9 and later.
-.\" See the description of times() in XSH, which says:
-.\" The times of a terminated child process are included... when wait()
-.\" or waitpid() returns the process ID of this terminated child.
-.P
-On Linux,
-the \[lq]arbitrary point in the past\[rq]
-from which the return value of
-.BR times ()
-is measured has varied across kernel versions.
-On Linux 2.4 and earlier,
-this point is the moment the system was booted.
-Since Linux 2.6,
-this point is \fI(2\[ha]32/HZ) \- 300\fP
-seconds before system boot time.
-This variability across kernel versions (and across UNIX implementations),
-combined with the fact that the returned value may overflow the range of
-.IR clock_t ,
-means that a portable application would be wise to avoid using this value.
-To measure changes in elapsed time,
-use
-.BR clock_gettime (2)
-instead.
-.\" .P
-.\" On older systems the number of clock ticks per second is given
-.\" by the variable HZ.
-.P
-SVr1-3 returns
-.I long
-and the struct members are of type
-.I time_t
-although they store clock ticks,
-not seconds since the Epoch.
-V7 used
-.I long
-for the struct members,
-because it had no type
-.I time_t
-yet.
-.SH NOTES
-The number of clock ticks per second can be obtained using:
-.P
-.in +4n
-.EX
-sysconf(_SC_CLK_TCK);
-.EE
-.in
-.P
-Note that
-.BR clock (3)
-also returns a value of type
-.IR clock_t ,
-but this value is measured in units of
-.BR CLOCKS_PER_SEC ,
-not the clock ticks used by
-.BR times ().
-.SH BUGS
-A limitation of the Linux system call conventions on some architectures
-(notably i386) means that on Linux 2.6 there is a small time window
-(41 seconds) soon after boot when
-.BR times ()
-can return \-1,
-falsely indicating that an error occurred.
-The same problem can occur when the return value wraps past
-the maximum value that can be stored in
-.BR clock_t .
-.\" The problem is that a syscall return of -4095 to -1
-.\" is interpreted by glibc as an error, and the wrapper converts
-.\" the return value to -1.
-.\" http://marc.info/?l=linux-kernel&m=119447727031225&w=2
-.\" "compat_sys_times() bogus until jiffies >= 0"
-.\" November 2007
-.SH SEE ALSO
-.BR time (1),
-.BR getrusage (2),
-.BR wait (2),
-.BR clock (3),
-.BR sysconf (3),
-.BR time (7)
diff --git a/man2/tkill.2 b/man2/tkill.2
deleted file mode 100644
index 9bbf722fe..000000000
--- a/man2/tkill.2
+++ /dev/null
@@ -1,130 +0,0 @@
-.\" Copyright (C) 2008 Michael Kerrisk <tmk.manpages@gmail.com>
-.\" and Copyright 2003 Abhijit Menon-Sen <ams@wiw.org>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" 2004-05-31, added tgkill, ahu, aeb
-.\" 2008-01-15 mtk -- rewrote DESCRIPTION
-.\"
-.TH tkill 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-tkill, tgkill \- send a signal to a thread
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#include <signal.h>" " /* Definition of " SIG* " constants */"
-.BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */"
-.B #include <unistd.h>
-.P
-.BI "[[deprecated]] int syscall(SYS_tkill, pid_t " tid ", int " sig );
-.P
-.B #include <signal.h>
-.P
-.BI "int tgkill(pid_t " tgid ", pid_t " tid ", int " sig );
-.fi
-.P
-.IR Note :
-glibc provides no wrapper for
-.BR tkill (),
-necessitating the use of
-.BR syscall (2).
-.SH DESCRIPTION
-.BR tgkill ()
-sends the signal
-.I sig
-to the thread with the thread ID
-.I tid
-in the thread group
-.IR tgid .
-(By contrast,
-.BR kill (2)
-can be used to send a signal only to a process (i.e., thread group)
-as a whole, and the signal will be delivered to an arbitrary
-thread within that process.)
-.P
-.BR tkill ()
-is an obsolete predecessor to
-.BR tgkill ().
-It allows only the target thread ID to be specified,
-which may result in the wrong thread being signaled if a thread
-terminates and its thread ID is recycled.
-Avoid using this system call.
-.\" FIXME Maybe say something about the following:
-.\" http://sourceware.org/bugzilla/show_bug.cgi?id=12889
-.\"
-.\" Quoting Rich Felker <bugdal@aerifal.cx>:
-.\"
-.\" There is a race condition in pthread_kill: it is possible that,
-.\" between the time pthread_kill reads the pid/tid from the target
-.\" thread descriptor and the time it makes the tgkill syscall,
-.\" the target thread terminates and the same tid gets assigned
-.\" to a new thread in the same process.
-.\"
-.\" (The tgkill syscall was designed to eliminate a similar race
-.\" condition in tkill, but it only succeeded in eliminating races
-.\" where the tid gets reused in a different process, and does not
-.\" help if the same tid gets assigned to a new thread in the
-.\" same process.)
-.\"
-.\" The only solution I can see is to introduce a mutex that ensures
-.\" that a thread cannot exit while pthread_kill is being called on it.
-.\"
-.\" Note that in most real-world situations, like almost all race
-.\" conditions, this one will be extremely rare. To make it
-.\" measurable, one could exhaust all but 1-2 available pid values,
-.\" possibly by lowering the max pid parameter in /proc, forcing
-.\" the same tid to be reused rapidly.
-.P
-These are the raw system call interfaces, meant for internal
-thread library use.
-.SH RETURN VALUE
-On success, zero is returned.
-On error, \-1 is returned, and \fIerrno\fP
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EAGAIN
-The
-.B RLIMIT_SIGPENDING
-resource limit was reached and
-.I sig
-is a real-time signal.
-.TP
-.B EAGAIN
-Insufficient kernel memory was available and
-.I sig
-is a real-time signal.
-.TP
-.B EINVAL
-An invalid thread ID, thread group ID, or signal was specified.
-.TP
-.B EPERM
-Permission denied.
-For the required permissions, see
-.BR kill (2).
-.TP
-.B ESRCH
-No process with the specified thread ID (and thread group ID) exists.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-.TP
-.BR tkill ()
-Linux 2.4.19 / 2.5.4.
-.TP
-.BR tgkill ()
-Linux 2.5.75,
-glibc 2.30.
-.SH NOTES
-See the description of
-.B CLONE_THREAD
-in
-.BR clone (2)
-for an explanation of thread groups.
-.SH SEE ALSO
-.BR clone (2),
-.BR gettid (2),
-.BR kill (2),
-.BR rt_sigqueueinfo (2)
diff --git a/man2/truncate.2 b/man2/truncate.2
deleted file mode 100644
index ac72f8820..000000000
--- a/man2/truncate.2
+++ /dev/null
@@ -1,251 +0,0 @@
-.\" Copyright (c) 1983, 1991 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" SPDX-License-Identifier: BSD-4-Clause-UC
-.\"
-.\" @(#)truncate.2 6.9 (Berkeley) 3/10/91
-.\"
-.\" Modified 1993-07-24 by Rik Faith <faith@cs.unc.edu>
-.\" Modified 1996-10-22 by Eric S. Raymond <esr@thyrsus.com>
-.\" Modified 1998-12-21 by Andries Brouwer <aeb@cwi.nl>
-.\" Modified 2002-01-07 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Modified 2002-04-06 by Andries Brouwer <aeb@cwi.nl>
-.\" Modified 2004-06-23 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.TH truncate 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-truncate, ftruncate \- truncate a file to a specified length
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <unistd.h>
-.P
-.BI "int truncate(const char *" path ", off_t " length );
-.BI "int ftruncate(int " fd ", off_t " length );
-.fi
-.P
-.RS -4
-Feature Test Macro Requirements for glibc (see
-.BR feature_test_macros (7)):
-.RE
-.P
-.BR truncate ():
-.nf
- _XOPEN_SOURCE >= 500
-.\" || _XOPEN_SOURCE && _XOPEN_SOURCE_EXTENDED
- || /* Since glibc 2.12: */ _POSIX_C_SOURCE >= 200809L
- || /* glibc <= 2.19: */ _BSD_SOURCE
-.fi
-.P
-.BR ftruncate ():
-.nf
- _XOPEN_SOURCE >= 500
-.\" || _XOPEN_SOURCE && _XOPEN_SOURCE_EXTENDED
- || /* Since glibc 2.3.5: */ _POSIX_C_SOURCE >= 200112L
- || /* glibc <= 2.19: */ _BSD_SOURCE
-.fi
-.SH DESCRIPTION
-The
-.BR truncate ()
-and
-.BR ftruncate ()
-functions cause the regular file named by
-.I path
-or referenced by
-.I fd
-to be truncated to a size of precisely
-.I length
-bytes.
-.P
-If the file previously was larger than this size, the extra data is lost.
-If the file previously was shorter, it is extended, and
-the extended part reads as null bytes (\[aq]\e0\[aq]).
-.P
-The file offset is not changed.
-.P
-If the size changed, then the st_ctime and st_mtime fields
-(respectively, time of last status change and
-time of last modification; see
-.BR inode (7))
-for the file are updated,
-and the set-user-ID and set-group-ID mode bits may be cleared.
-.P
-With
-.BR ftruncate (),
-the file must be open for writing; with
-.BR truncate (),
-the file must be writable.
-.SH RETURN VALUE
-On success, zero is returned.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-For
-.BR truncate ():
-.TP
-.B EACCES
-Search permission is denied for a component of the path prefix,
-or the named file is not writable by the user.
-(See also
-.BR path_resolution (7).)
-.TP
-.B EFAULT
-The argument
-.I path
-points outside the process's allocated address space.
-.TP
-.B EFBIG
-The argument
-.I length
-is larger than the maximum file size. (XSI)
-.TP
-.B EINTR
-While blocked waiting to complete,
-the call was interrupted by a signal handler; see
-.BR fcntl (2)
-and
-.BR signal (7).
-.TP
-.B EINVAL
-The argument
-.I length
-is negative or larger than the maximum file size.
-.TP
-.B EIO
-An I/O error occurred updating the inode.
-.TP
-.B EISDIR
-The named file is a directory.
-.TP
-.B ELOOP
-Too many symbolic links were encountered in translating the pathname.
-.TP
-.B ENAMETOOLONG
-A component of a pathname exceeded 255 characters,
-or an entire pathname exceeded 1023 characters.
-.TP
-.B ENOENT
-The named file does not exist.
-.TP
-.B ENOTDIR
-A component of the path prefix is not a directory.
-.TP
-.B EPERM
-.\" This happens for at least MSDOS and VFAT filesystems
-.\" on kernel 2.6.13
-The underlying filesystem does not support extending
-a file beyond its current size.
-.TP
-.B EPERM
-The operation was prevented by a file seal; see
-.BR fcntl (2).
-.TP
-.B EROFS
-The named file resides on a read-only filesystem.
-.TP
-.B ETXTBSY
-The file is an executable file that is being executed.
-.P
-For
-.BR ftruncate ()
-the same errors apply, but instead of things that can be wrong with
-.IR path ,
-we now have things that can be wrong with the file descriptor,
-.IR fd :
-.TP
-.B EBADF
-.I fd
-is not a valid file descriptor.
-.TP
-.BR EBADF " or " EINVAL
-.I fd
-is not open for writing.
-.TP
-.B EINVAL
-.I fd
-does not reference a regular file or a POSIX shared memory object.
-.TP
-.BR EINVAL " or " EBADF
-The file descriptor
-.I fd
-is not open for writing.
-POSIX permits, and portable applications should handle,
-either error for this case.
-(Linux produces
-.BR EINVAL .)
-.SH VERSIONS
-The details in DESCRIPTION are for XSI-compliant systems.
-For non-XSI-compliant systems, the POSIX standard allows
-two behaviors for
-.BR ftruncate ()
-when
-.I length
-exceeds the file length
-(note that
-.BR truncate ()
-is not specified at all in such an environment):
-either returning an error, or extending the file.
-Like most UNIX implementations, Linux follows the XSI requirement
-when dealing with native filesystems.
-However, some nonnative filesystems do not permit
-.BR truncate ()
-and
-.BR ftruncate ()
-to be used to extend a file beyond its current length:
-a notable example on Linux is VFAT.
-.\" At the very least: OSF/1, Solaris 7, and FreeBSD conform, mtk, Jan 2002
-.P
-On some 32-bit architectures,
-the calling signature for these system calls differ,
-for the reasons described in
-.BR syscall (2).
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-POSIX.1-2001,
-4.4BSD, SVr4 (first appeared in 4.2BSD).
-.\" POSIX.1-1996 has
-.\" .BR ftruncate ().
-.\" POSIX.1-2001 also has
-.\" .BR truncate (),
-.\" as an XSI extension.
-.\" .P
-.\" SVr4 documents additional
-.\" .BR truncate ()
-.\" error conditions EMFILE, EMULTIHP, ENFILE, ENOLINK. SVr4 documents for
-.\" .BR ftruncate ()
-.\" an additional EAGAIN error condition.
-.P
-The original Linux
-.BR truncate ()
-and
-.BR ftruncate ()
-system calls were not designed to handle large file offsets.
-Consequently, Linux 2.4 added
-.BR truncate64 ()
-and
-.BR ftruncate64 ()
-system calls that handle large files.
-However, these details can be ignored by applications using glibc, whose
-wrapper functions transparently employ the more recent system calls
-where they are available.
-.SH NOTES
-.BR ftruncate ()
-can also be used to set the size of a POSIX shared memory object; see
-.BR shm_open (3).
-.SH BUGS
-A header file bug in glibc 2.12 meant that the minimum value of
-.\" http://sourceware.org/bugzilla/show_bug.cgi?id=12037
-.B _POSIX_C_SOURCE
-required to expose the declaration of
-.BR ftruncate ()
-was 200809L instead of 200112L.
-This has been fixed in later glibc versions.
-.SH SEE ALSO
-.BR truncate (1),
-.BR open (2),
-.BR stat (2),
-.BR path_resolution (7)
diff --git a/man2/truncate64.2 b/man2/truncate64.2
deleted file mode 100644
index 2ed34f1ed..000000000
--- a/man2/truncate64.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/truncate.2
diff --git a/man2/tuxcall.2 b/man2/tuxcall.2
deleted file mode 100644
index 5d25ea6ce..000000000
--- a/man2/tuxcall.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/unimplemented.2
diff --git a/man2/ugetrlimit.2 b/man2/ugetrlimit.2
deleted file mode 100644
index df6d7362a..000000000
--- a/man2/ugetrlimit.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/getrlimit.2
diff --git a/man2/umask.2 b/man2/umask.2
deleted file mode 100644
index 939f258e7..000000000
--- a/man2/umask.2
+++ /dev/null
@@ -1,149 +0,0 @@
-.\" Copyright (c) 2006, 2008, Michael Kerrisk <mtk.manpages@gmail.com>
-.\" (A few fragments remain from an earlier (1992) version written in
-.\" 1992 by Drew Eckhardt <drew@cs.colorado.edu>.)
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Modified by Michael Haardt <michael@moria.de>
-.\" Modified Sat Jul 24 12:51:53 1993 by Rik Faith <faith@cs.unc.edu>
-.\" Modified Tue Oct 22 22:39:04 1996 by Eric S. Raymond <esr@thyrsus.com>
-.\" Modified Thu May 1 06:05:54 UTC 1997 by Nicolás Lichtmaier
-.\" <nick@debian.com> with Lars Wirzenius <liw@iki.fi> suggestion
-.\" 2006-05-13, mtk, substantial rewrite of description of 'mask'
-.\" 2008-01-09, mtk, a few rewrites and additions.
-.TH umask 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-umask \- set file mode creation mask
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/stat.h>
-.P
-.BI "mode_t umask(mode_t " mask );
-.fi
-.SH DESCRIPTION
-.BR umask ()
-sets the calling process's file mode creation mask (umask) to
-.I mask
-& 0777 (i.e., only the file permission bits of
-.I mask
-are used), and returns the previous value of the mask.
-.P
-The umask is used by
-.BR open (2),
-.BR mkdir (2),
-and other system calls that create files
-.\" e.g., mkfifo(), creat(), mknod(), sem_open(), mq_open(), shm_open()
-.\" but NOT the System V IPC *get() calls
-to modify the permissions placed on newly created files or directories.
-Specifically, permissions in the umask are turned off from
-the
-.I mode
-argument to
-.BR open (2)
-and
-.BR mkdir (2).
-.P
-Alternatively, if the parent directory has a default ACL (see
-.BR acl (5)),
-the umask is ignored, the default ACL is inherited,
-the permission bits are set based on the inherited ACL,
-and permission bits absent in the
-.I mode
-argument are turned off.
-For example, the following default ACL is equivalent to a umask of 022:
-.P
-.in +4n
-.EX
-u::rwx,g::r-x,o::r-x
-.EE
-.in
-.P
-Combining the effect of this default ACL with a
-.I mode
-argument of 0666 (rw-rw-rw-), the resulting file permissions would be 0644
-(rw-r--r--).
-.P
-The constants that should be used to specify
-.I mask
-are described in
-.BR inode (7).
-.P
-The typical default value for the process umask is
-.BR S_IWGRP " | " S_IWOTH
-(octal 022).
-In the usual case where the
-.I mode
-argument to
-.BR open (2)
-is specified as:
-.P
-.in +4n
-.EX
-.BR S_IRUSR " | " S_IWUSR " | " S_IRGRP " | " S_IWGRP " | " S_IROTH " | " S_IWOTH
-.EE
-.in
-.P
-(octal 0666) when creating a new file, the permissions on the
-resulting file will be:
-.P
-.in +4n
-.EX
-.BR S_IRUSR " | " S_IWUSR " | " S_IRGRP " | " S_IROTH
-.EE
-.in
-.P
-(because 0666 & \[ti]022 = 0644; i.e. rw\-r\-\-r\-\-).
-.SH RETURN VALUE
-This system call always succeeds and the previous value of the mask
-is returned.
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-POSIX.1-2001, SVr4, 4.3BSD.
-.SH NOTES
-A child process created via
-.BR fork (2)
-inherits its parent's umask.
-The umask is left unchanged by
-.BR execve (2).
-.P
-It is impossible to use
-.BR umask ()
-to fetch a process's umask without at the same time changing it.
-A second call to
-.BR umask ()
-would then be needed to restore the umask.
-The nonatomicity of these two steps provides the potential
-for races in multithreaded programs.
-.P
-Since Linux 4.7, the umask of any process can be viewed via the
-.I Umask
-field of
-.IR /proc/ pid /status .
-Inspecting this field in
-.I /proc/self/status
-allows a process to retrieve its umask without at the same time changing it.
-.P
-The umask setting also affects the permissions assigned to POSIX IPC objects
-.RB ( mq_open (3),
-.BR sem_open (3),
-.BR shm_open (3)),
-FIFOs
-.RB ( mkfifo (3)),
-and UNIX domain sockets
-.RB ( unix (7))
-created by the process.
-The umask does not affect the permissions assigned
-to System\ V IPC objects created by the process (using
-.BR msgget (2),
-.BR semget (2),
-.BR shmget (2)).
-.SH SEE ALSO
-.BR chmod (2),
-.BR mkdir (2),
-.BR open (2),
-.BR stat (2),
-.BR acl (5)
diff --git a/man2/umount.2 b/man2/umount.2
deleted file mode 100644
index 6decda265..000000000
--- a/man2/umount.2
+++ /dev/null
@@ -1,214 +0,0 @@
-.\" Copyright (C) 1993 Rickard E. Faith <faith@cs.unc.edu>
-.\" and Copyright (C) 1994 Andries E. Brouwer <aeb@cwi.nl>
-.\" and Copyright (C) 2002, 2005 Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" 2008-10-06, mtk: Created this as a new page by splitting
-.\" umount/umount2 material out of mount.2
-.\"
-.TH umount 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-umount, umount2 \- unmount filesystem
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B "#include <sys/mount.h>"
-.P
-.BI "int umount(const char *" target );
-.BI "int umount2(const char *" target ", int " flags );
-.fi
-.SH DESCRIPTION
-.BR umount ()
-and
-.BR umount2 ()
-remove the attachment of the (topmost) filesystem mounted on
-.IR target .
-.\" Note: the kernel naming differs from the glibc naming
-.\" umount2 is the glibc name for what the kernel now calls umount
-.\" and umount is the glibc name for oldumount
-.P
-Appropriate privilege (Linux: the
-.B CAP_SYS_ADMIN
-capability) is required to unmount filesystems.
-.P
-Linux 2.1.116 added the
-.BR umount2 ()
-system call, which, like
-.BR umount (),
-unmounts a target, but allows additional
-.I flags
-controlling the behavior of the operation:
-.TP
-.BR MNT_FORCE " (since Linux 2.1.116)"
-Ask the filesystem to abort pending requests before attempting the
-unmount.
-This may allow the unmount to complete without waiting
-for an inaccessible server, but could cause data loss.
-If, after aborting requests,
-some processes still have active references to the filesystem,
-the unmount will still fail.
-As at Linux 4.12,
-.B MNT_FORCE
-is supported only on the following filesystems:
-9p (since Linux 2.6.16),
-ceph (since Linux 2.6.34),
-cifs (since Linux 2.6.12),
-fuse (since Linux 2.6.16),
-lustre (since Linux 3.11),
-and NFS (since Linux 2.1.116).
-.TP
-.BR MNT_DETACH " (since Linux 2.4.11)"
-Perform a lazy unmount: make the mount unavailable for new
-accesses, immediately disconnect the filesystem and all filesystems
-mounted below it from each other and from the mount table, and
-actually perform the unmount when the mount ceases to be busy.
-.TP
-.BR MNT_EXPIRE " (since Linux 2.6.8)"
-Mark the mount as expired.
-If a mount is not currently in use, then an initial call to
-.BR umount2 ()
-with this flag fails with the error
-.BR EAGAIN ,
-but marks the mount as expired.
-The mount remains expired as long as it isn't accessed
-by any process.
-A second
-.BR umount2 ()
-call specifying
-.B MNT_EXPIRE
-unmounts an expired mount.
-This flag cannot be specified with either
-.B MNT_FORCE
-or
-.BR MNT_DETACH .
-.TP
-.BR UMOUNT_NOFOLLOW " (since Linux 2.6.34)"
-.\" Later added to Linux 2.6.33-stable
-Don't dereference
-.I target
-if it is a symbolic link.
-This flag allows security problems to be avoided in set-user-ID-\fIroot\fP
-programs that allow unprivileged users to unmount filesystems.
-.SH RETURN VALUE
-On success, zero is returned.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-The error values given below result from filesystem type independent
-errors.
-Each filesystem type may have its own special errors and its
-own special behavior.
-See the Linux kernel source code for details.
-.TP
-.B EAGAIN
-A call to
-.BR umount2 ()
-specifying
-.B MNT_EXPIRE
-successfully marked an unbusy filesystem as expired.
-.TP
-.B EBUSY
-.I target
-could not be unmounted because it is busy.
-.TP
-.B EFAULT
-.I target
-points outside the user address space.
-.TP
-.B EINVAL
-.I target
-is not a mount point.
-.TP
-.B EINVAL
-.I target
-is locked; see
-.BR mount_namespaces (7).
-.TP
-.B EINVAL
-.BR umount2 ()
-was called with
-.B MNT_EXPIRE
-and either
-.B MNT_DETACH
-or
-.BR MNT_FORCE .
-.TP
-.BR EINVAL " (since Linux 2.6.34)"
-.BR umount2 ()
-was called with an invalid flag value in
-.IR flags .
-.TP
-.B ENAMETOOLONG
-A pathname was longer than
-.BR MAXPATHLEN .
-.TP
-.B ENOENT
-A pathname was empty or had a nonexistent component.
-.TP
-.B ENOMEM
-The kernel could not allocate a free page to copy filenames or data into.
-.TP
-.B EPERM
-The caller does not have the required privileges.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-.B MNT_DETACH
-and
-.B MNT_EXPIRE
-.\" http://sourceware.org/bugzilla/show_bug.cgi?id=10092
-are available since glibc 2.11.
-.P
-The original
-.BR umount ()
-function was called as \fIumount(device)\fP and would return
-.B ENOTBLK
-when called with something other than a block device.
-In Linux 0.98p4, a call \fIumount(dir)\fP was added, in order to
-support anonymous devices.
-In Linux 2.3.99-pre7, the call \fIumount(device)\fP was removed,
-leaving only \fIumount(dir)\fP (since now devices can be mounted
-in more than one place, so specifying the device does not suffice).
-.SH NOTES
-.SS umount() and shared mounts
-Shared mounts cause any mount activity on a mount, including
-.BR umount ()
-operations, to be forwarded to every shared mount in the
-peer group and every slave mount of that peer group.
-This means that
-.BR umount ()
-of any peer in a set of shared mounts will cause all of its
-peers to be unmounted and all of their slaves to be unmounted as well.
-.P
-This propagation of unmount activity can be particularly surprising
-on systems where every mount is shared by default.
-On such systems,
-recursively bind mounting the root directory of the filesystem
-onto a subdirectory and then later unmounting that subdirectory with
-.B MNT_DETACH
-will cause every mount in the mount namespace to be lazily unmounted.
-.P
-To ensure
-.BR umount ()
-does not propagate in this fashion,
-the mount may be remounted using a
-.BR mount (2)
-call with a
-.I mount_flags
-argument that includes both
-.B MS_REC
-and
-.B MS_PRIVATE
-prior to
-.BR umount ()
-being called.
-.SH SEE ALSO
-.BR mount (2),
-.BR mount_namespaces (7),
-.BR path_resolution (7),
-.BR mount (8),
-.BR umount (8)
diff --git a/man2/umount2.2 b/man2/umount2.2
deleted file mode 100644
index 84ea41905..000000000
--- a/man2/umount2.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/umount.2
diff --git a/man2/uname.2 b/man2/uname.2
deleted file mode 100644
index 99178fc22..000000000
--- a/man2/uname.2
+++ /dev/null
@@ -1,134 +0,0 @@
-.\" Copyright (C) 2001 Andries Brouwer <aeb@cwi.nl>.
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" 2007-07-05 mtk: Added details on underlying system call interfaces
-.\"
-.TH uname 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-uname \- get name and information about current kernel
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/utsname.h>
-.P
-.BI "int uname(struct utsname *" buf );
-.fi
-.SH DESCRIPTION
-.BR uname ()
-returns system information in the structure pointed to by
-.IR buf .
-The
-.I utsname
-struct is defined in
-.IR <sys/utsname.h> :
-.P
-.in +4n
-.EX
-struct utsname {
- char sysname[]; /* Operating system name (e.g., "Linux") */
- char nodename[]; /* Name within communications network
- to which the node is attached, if any */
- char release[]; /* Operating system release
- (e.g., "2.6.28") */
- char version[]; /* Operating system version */
- char machine[]; /* Hardware type identifier */
-#ifdef _GNU_SOURCE
- char domainname[]; /* NIS or YP domain name */
-#endif
-};
-.EE
-.in
-.P
-The length of the arrays in a
-.I struct utsname
-is unspecified (see NOTES);
-the fields are terminated by a null byte (\[aq]\e0\[aq]).
-.SH RETURN VALUE
-On success, zero is returned.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EFAULT
-.I buf
-is not valid.
-.SH VERSIONS
-The
-.I domainname
-member (the NIS or YP domain name) is a GNU extension.
-.P
-The length of the fields in the struct varies.
-Some operating systems
-or libraries use a hardcoded 9 or 33 or 65 or 257.
-Other systems use
-.B SYS_NMLN
-or
-.B _SYS_NMLN
-or
-.B UTSLEN
-or
-.BR _UTSNAME_LENGTH .
-Clearly, it is a bad
-idea to use any of these constants; just use sizeof(...).
-SVr4 uses 257, "to support Internet hostnames"
-\[em] this is the largest value likely to be encountered in the wild.
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-POSIX.1-2001, SVr4, 4.4BSD.
-.SS C library/kernel differences
-Over time, increases in the size of the
-.I utsname
-structure have led to three successive versions of
-.BR uname ():
-.IR sys_olduname ()
-(slot
-.IR __NR_oldolduname ),
-.IR sys_uname ()
-(slot
-.IR __NR_olduname ),
-and
-.IR sys_newuname ()
-(slot
-.IR __NR_uname) .
-The first one
-.\" That was back before Linux 1.0
-used length 9 for all fields;
-the second
-.\" That was also back before Linux 1.0
-used 65;
-the third also uses 65 but adds the
-.I domainname
-field.
-The glibc
-.BR uname ()
-wrapper function hides these details from applications,
-invoking the most recent version of the system call provided by the kernel.
-.SH NOTES
-The kernel has the name, release, version, and supported machine type built in.
-Conversely, the
-.I nodename
-field is configured by the administrator to match the network
-(this is what the BSD historically calls the "hostname",
-and is set via
-.BR sethostname (2)).
-Similarly, the
-.I domainname
-field is set via
-.BR setdomainname (2).
-.P
-Part of the utsname information is also accessible via
-.IR /proc/sys/kernel/ { ostype ,
-.IR hostname ,
-.IR osrelease ,
-.IR version ,
-.IR domainname }.
-.SH SEE ALSO
-.BR uname (1),
-.BR getdomainname (2),
-.BR gethostname (2),
-.BR uts_namespaces (7)
diff --git a/man2/unimplemented.2 b/man2/unimplemented.2
deleted file mode 100644
index 577380615..000000000
--- a/man2/unimplemented.2
+++ /dev/null
@@ -1,48 +0,0 @@
-.\" Copyright 1995 Michael Chastain (mec@shell.portal.com), 15 April 1995.
-.\"
-.\" SPDX-License-Identifier: GPL-2.0-or-later
-.\"
-.\" Updated, aeb, 980612
-.\"
-.TH UNIMPLEMENTED 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-afs_syscall, break, fattach, fdetach, ftime, getmsg, getpmsg, gtty, isastream,
-lock, madvise1, mpx, prof, profil, putmsg, putpmsg, security,
-stty, tuxcall, ulimit, vserver \- unimplemented system calls
-.SH SYNOPSIS
-.nf
-Unimplemented system calls.
-.fi
-.SH DESCRIPTION
-These system calls are not implemented in the Linux kernel.
-.SH RETURN VALUE
-These system calls always return \-1 and set
-.I errno
-to
-.BR ENOSYS .
-.SH NOTES
-Note that
-.BR ftime (3),
-.BR profil (3),
-and
-.BR ulimit (3)
-are implemented as library functions.
-.P
-Some system calls, like
-.BR alloc_hugepages (2),
-.BR free_hugepages (2),
-.BR ioperm (2),
-.BR iopl (2),
-and
-.BR vm86 (2)
-exist only on certain architectures.
-.P
-Some system calls, like
-.BR ipc (2),
-.BR create_module (2),
-.BR init_module (2),
-and
-.BR delete_module (2)
-exist only when the Linux kernel was built with support for them.
-.SH SEE ALSO
-.BR syscalls (2)
diff --git a/man2/unlink.2 b/man2/unlink.2
deleted file mode 100644
index ed729109d..000000000
--- a/man2/unlink.2
+++ /dev/null
@@ -1,298 +0,0 @@
-.\" This manpage is Copyright (C) 1992 Drew Eckhardt;
-.\" and Copyright (C) 1993 Ian Jackson
-.\" and Copyright (C) 2006, 2014 Michael Kerrisk.
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Modified 1993-07-24 by Rik Faith <faith@cs.unc.edu>
-.\" Modified 1996-09-08 by Arnt Gulbrandsen <agulbra@troll.no>
-.\" Modified 1997-01-31 by Eric S. Raymond <esr@thyrsus.com>
-.\" Modified 2001-05-17 by aeb
-.\" Modified 2004-06-23 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.TH unlink 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-unlink, unlinkat \- delete a name and possibly the file it refers to
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <unistd.h>
-.P
-.BI "int unlink(const char *" pathname );
-.P
-.BR "#include <fcntl.h> " "/* Definition of " AT_* " constants */"
-.B #include <unistd.h>
-.P
-.BI "int unlinkat(int " dirfd ", const char *" pathname ", int " flags );
-.fi
-.P
-.RS -4
-Feature Test Macro Requirements for glibc (see
-.BR feature_test_macros (7)):
-.RE
-.P
-.BR unlinkat ():
-.nf
- Since glibc 2.10:
- _POSIX_C_SOURCE >= 200809L
- Before glibc 2.10:
- _ATFILE_SOURCE
-.fi
-.SH DESCRIPTION
-.BR unlink ()
-deletes a name from the filesystem.
-If that name was the
-last link to a file and no processes have the file open, the file is
-deleted and the space it was using is made available for reuse.
-.P
-If the name was the last link to a file but any processes still have
-the file open, the file will remain in existence until the last file
-descriptor referring to it is closed.
-.P
-If the name referred to a symbolic link, the link is removed.
-.P
-If the name referred to a socket, FIFO, or device, the name for it is
-removed but processes which have the object open may continue to use
-it.
-.SS unlinkat()
-The
-.BR unlinkat ()
-system call operates in exactly the same way as either
-.BR unlink ()
-or
-.BR rmdir (2)
-(depending on whether or not
-.I flags
-includes the
-.B AT_REMOVEDIR
-flag)
-except for the differences described here.
-.P
-If the pathname given in
-.I pathname
-is relative, then it is interpreted relative to the directory
-referred to by the file descriptor
-.I dirfd
-(rather than relative to the current working directory of
-the calling process, as is done by
-.BR unlink ()
-and
-.BR rmdir (2)
-for a relative pathname).
-.P
-If the pathname given in
-.I pathname
-is relative and
-.I dirfd
-is the special value
-.BR AT_FDCWD ,
-then
-.I pathname
-is interpreted relative to the current working
-directory of the calling process (like
-.BR unlink ()
-and
-.BR rmdir (2)).
-.P
-If the pathname given in
-.I pathname
-is absolute, then
-.I dirfd
-is ignored.
-.P
-.I flags
-is a bit mask that can either be specified as 0, or by ORing
-together flag values that control the operation of
-.BR unlinkat ().
-Currently, only one such flag is defined:
-.TP
-.B AT_REMOVEDIR
-By default,
-.BR unlinkat ()
-performs the equivalent of
-.BR unlink ()
-on
-.IR pathname .
-If the
-.B AT_REMOVEDIR
-flag is specified, it
-performs the equivalent of
-.BR rmdir (2)
-on
-.IR pathname .
-.P
-See
-.BR openat (2)
-for an explanation of the need for
-.BR unlinkat ().
-.SH RETURN VALUE
-On success, zero is returned.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EACCES
-Write access to the directory containing
-.I pathname
-is not allowed for the process's effective UID, or one of the
-directories in
-.I pathname
-did not allow search permission.
-(See also
-.BR path_resolution (7).)
-.TP
-.B EBUSY
-The file
-.I pathname
-cannot be unlinked because it is being used by the system
-or another process;
-for example, it is a mount point
-or the NFS client software created it to represent an
-active but otherwise nameless inode ("NFS silly renamed").
-.TP
-.B EFAULT
-.I pathname
-points outside your accessible address space.
-.TP
-.B EIO
-An I/O error occurred.
-.TP
-.B EISDIR
-.I pathname
-refers to a directory.
-(This is the non-POSIX value returned since Linux 2.1.132.)
-.TP
-.B ELOOP
-Too many symbolic links were encountered in translating
-.IR pathname .
-.TP
-.B ENAMETOOLONG
-.IR pathname " was too long."
-.TP
-.B ENOENT
-A component in
-.I pathname
-does not exist or is a dangling symbolic link, or
-.I pathname
-is empty.
-.TP
-.B ENOMEM
-Insufficient kernel memory was available.
-.TP
-.B ENOTDIR
-A component used as a directory in
-.I pathname
-is not, in fact, a directory.
-.TP
-.B EPERM
-The system does not allow unlinking of directories,
-or unlinking of directories requires privileges that the
-calling process doesn't have.
-(This is the POSIX prescribed error return;
-as noted above, Linux returns
-.B EISDIR
-for this case.)
-.TP
-.BR EPERM " (Linux only)"
-The filesystem does not allow unlinking of files.
-.TP
-.BR EPERM " or " EACCES
-The directory containing
-.I pathname
-has the sticky bit
-.RB ( S_ISVTX )
-set and the process's effective UID is neither the UID of the file to
-be deleted nor that of the directory containing it, and
-the process is not privileged (Linux: does not have the
-.B CAP_FOWNER
-capability).
-.TP
-.B EPERM
-The file to be unlinked is marked immutable or append-only.
-(See
-.BR ioctl_iflags (2).)
-.TP
-.B EROFS
-.I pathname
-refers to a file on a read-only filesystem.
-.P
-The same errors that occur for
-.BR unlink ()
-and
-.BR rmdir (2)
-can also occur for
-.BR unlinkat ().
-The following additional errors can occur for
-.BR unlinkat ():
-.TP
-.B EBADF
-.I pathname
-is relative but
-.I dirfd
-is neither
-.B AT_FDCWD
-nor a valid file descriptor.
-.TP
-.B EINVAL
-An invalid flag value was specified in
-.IR flags .
-.TP
-.B EISDIR
-.I pathname
-refers to a directory, and
-.B AT_REMOVEDIR
-was not specified in
-.IR flags .
-.TP
-.B ENOTDIR
-.I pathname
-is relative and
-.I dirfd
-is a file descriptor referring to a file other than a directory.
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-.TP
-.BR unlink ()
-SVr4, 4.3BSD, POSIX.1-2001.
-.\" SVr4 documents additional error
-.\" conditions EINTR, EMULTIHOP, ETXTBSY, ENOLINK.
-.TP
-.BR unlinkat ()
-POSIX.1-2008.
-Linux 2.6.16,
-glibc 2.4.
-.SS glibc
-On older kernels where
-.BR unlinkat ()
-is unavailable, the glibc wrapper function falls back to the use of
-.BR unlink ()
-or
-.BR rmdir (2).
-When
-.I pathname
-is a relative pathname,
-glibc constructs a pathname based on the symbolic link in
-.I /proc/self/fd
-that corresponds to the
-.I dirfd
-argument.
-.SH BUGS
-Infelicities in the protocol underlying NFS can cause the unexpected
-disappearance of files which are still being used.
-.SH SEE ALSO
-.BR rm (1),
-.BR unlink (1),
-.BR chmod (2),
-.BR link (2),
-.BR mknod (2),
-.BR open (2),
-.BR rename (2),
-.BR rmdir (2),
-.BR mkfifo (3),
-.BR remove (3),
-.BR path_resolution (7),
-.BR symlink (7)
diff --git a/man2/unlinkat.2 b/man2/unlinkat.2
deleted file mode 100644
index 4921f7313..000000000
--- a/man2/unlinkat.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/unlink.2
diff --git a/man2/unshare.2 b/man2/unshare.2
deleted file mode 100644
index 98d92cf72..000000000
--- a/man2/unshare.2
+++ /dev/null
@@ -1,572 +0,0 @@
-.\" Copyright (C) 2006, Janak Desai <janak@us.ibm.com>
-.\" and Copyright (C) 2006, 2012 Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: GPL-1.0-or-later
-.\"
-.\" Patch Justification:
-.\" unshare system call is needed to implement, using PAM,
-.\" per-security_context and/or per-user namespace to provide
-.\" polyinstantiated directories. Using unshare and bind mounts, a
-.\" PAM module can create private namespace with appropriate
-.\" directories(based on user's security context) bind mounted on
-.\" public directories such as /tmp, thus providing an instance of
-.\" /tmp that is based on user's security context. Without the
-.\" unshare system call, namespace separation can only be achieved
-.\" by clone, which would require porting and maintaining all commands
-.\" such as login, and su, that establish a user session.
-.\"
-.TH unshare 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-unshare \- disassociate parts of the process execution context
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #define _GNU_SOURCE
-.B #include <sched.h>
-.P
-.BI "int unshare(int " flags );
-.fi
-.SH DESCRIPTION
-.BR unshare ()
-allows a process (or thread) to disassociate parts of its execution
-context that are currently being shared with other processes (or threads).
-Part of the execution context, such as the mount namespace, is shared
-implicitly when a new process is created using
-.BR fork (2)
-or
-.BR vfork (2),
-while other parts, such as virtual memory, may be
-shared by explicit request when creating a process or thread using
-.BR clone (2).
-.P
-The main use of
-.BR unshare ()
-is to allow a process to control its
-shared execution context without creating a new process.
-.P
-The
-.I flags
-argument is a bit mask that specifies which parts of
-the execution context should be unshared.
-This argument is specified by ORing together zero or more
-of the following constants:
-.TP
-.B CLONE_FILES
-Reverse the effect of the
-.BR clone (2)
-.B CLONE_FILES
-flag.
-Unshare the file descriptor table, so that the calling process
-no longer shares its file descriptors with any other process.
-.TP
-.B CLONE_FS
-Reverse the effect of the
-.BR clone (2)
-.B CLONE_FS
-flag.
-Unshare filesystem attributes, so that the calling process
-no longer shares its root directory
-.RB ( chroot (2)),
-current directory
-.RB ( chdir (2)),
-or umask
-.RB ( umask (2))
-attributes with any other process.
-.TP
-.BR CLONE_NEWCGROUP " (since Linux 4.6)"
-This flag has the same effect as the
-.BR clone (2)
-.B CLONE_NEWCGROUP
-flag.
-Unshare the cgroup namespace.
-Use of
-.B CLONE_NEWCGROUP
-requires the
-.B CAP_SYS_ADMIN
-capability.
-.TP
-.BR CLONE_NEWIPC " (since Linux 2.6.19)"
-This flag has the same effect as the
-.BR clone (2)
-.B CLONE_NEWIPC
-flag.
-Unshare the IPC namespace,
-so that the calling process has a private copy of the
-IPC namespace which is not shared with any other process.
-Specifying this flag automatically implies
-.B CLONE_SYSVSEM
-as well.
-Use of
-.B CLONE_NEWIPC
-requires the
-.B CAP_SYS_ADMIN
-capability.
-.TP
-.BR CLONE_NEWNET " (since Linux 2.6.24)"
-This flag has the same effect as the
-.BR clone (2)
-.B CLONE_NEWNET
-flag.
-Unshare the network namespace,
-so that the calling process is moved into a
-new network namespace which is not shared
-with any previously existing process.
-Use of
-.B CLONE_NEWNET
-requires the
-.B CAP_SYS_ADMIN
-capability.
-.TP
-.B CLONE_NEWNS
-.\" These flag name are inconsistent:
-.\" CLONE_NEWNS does the same thing in clone(), but CLONE_VM,
-.\" CLONE_FS, and CLONE_FILES reverse the action of the clone()
-.\" flags of the same name.
-This flag has the same effect as the
-.BR clone (2)
-.B CLONE_NEWNS
-flag.
-Unshare the mount namespace,
-so that the calling process has a private copy of
-its namespace which is not shared with any other process.
-Specifying this flag automatically implies
-.B CLONE_FS
-as well.
-Use of
-.B CLONE_NEWNS
-requires the
-.B CAP_SYS_ADMIN
-capability.
-For further information, see
-.BR mount_namespaces (7).
-.TP
-.BR CLONE_NEWPID " (since Linux 3.8)"
-This flag has the same effect as the
-.BR clone (2)
-.B CLONE_NEWPID
-flag.
-Unshare the PID namespace,
-so that the calling process has a new PID namespace for its children
-which is not shared with any previously existing process.
-The calling process is
-.I not
-moved into the new namespace.
-The first child created by the calling process will have
-the process ID 1 and will assume the role of
-.BR init (1)
-in the new namespace.
-.B CLONE_NEWPID
-automatically implies
-.B CLONE_THREAD
-as well.
-Use of
-.B CLONE_NEWPID
-requires the
-.B CAP_SYS_ADMIN
-capability.
-For further information, see
-.BR pid_namespaces (7).
-.TP
-.BR CLONE_NEWTIME " (since Linux 5.6)"
-Unshare the time namespace,
-so that the calling process has a new time namespace for its children
-which is not shared with any previously existing process.
-The calling process is
-.I not
-moved into the new namespace.
-Use of
-.B CLONE_NEWTIME
-requires the
-.B CAP_SYS_ADMIN
-capability.
-For further information, see
-.BR time_namespaces (7).
-.TP
-.BR CLONE_NEWUSER " (since Linux 3.8)"
-This flag has the same effect as the
-.BR clone (2)
-.B CLONE_NEWUSER
-flag.
-Unshare the user namespace,
-so that the calling process is moved into a new user namespace
-which is not shared with any previously existing process.
-As with the child process created by
-.BR clone (2)
-with the
-.B CLONE_NEWUSER
-flag, the caller obtains a full set of capabilities in the new namespace.
-.IP
-.B CLONE_NEWUSER
-requires that the calling process is not threaded; specifying
-.B CLONE_NEWUSER
-automatically implies
-.BR CLONE_THREAD .
-Since Linux 3.9,
-.\" commit e66eded8309ebf679d3d3c1f5820d1f2ca332c71
-.\" https://lwn.net/Articles/543273/
-.B CLONE_NEWUSER
-also automatically implies
-.BR CLONE_FS .
-.B CLONE_NEWUSER
-requires that the user ID and group ID
-of the calling process are mapped to user IDs and group IDs in the
-user namespace of the calling process at the time of the call.
-.IP
-For further information on user namespaces, see
-.BR user_namespaces (7).
-.TP
-.BR CLONE_NEWUTS " (since Linux 2.6.19)"
-This flag has the same effect as the
-.BR clone (2)
-.B CLONE_NEWUTS
-flag.
-Unshare the UTS IPC namespace,
-so that the calling process has a private copy of the
-UTS namespace which is not shared with any other process.
-Use of
-.B CLONE_NEWUTS
-requires the
-.B CAP_SYS_ADMIN
-capability.
-.TP
-.BR CLONE_SYSVSEM " (since Linux 2.6.26)"
-.\" commit 9edff4ab1f8d82675277a04e359d0ed8bf14a7b7
-This flag reverses the effect of the
-.BR clone (2)
-.B CLONE_SYSVSEM
-flag.
-Unshare System\ V semaphore adjustment
-.RI ( semadj )
-values,
-so that the calling process has a new empty
-.I semadj
-list that is not shared with any other process.
-If this is the last process that has a reference to the process's current
-.I semadj
-list, then the adjustments in that list are applied
-to the corresponding semaphores, as described in
-.BR semop (2).
-.\" CLONE_NEWNS If CLONE_SIGHAND is set and signals are also being shared
-.\" (i.e., current->signal->count > 1), force CLONE_THREAD.
-.P
-In addition,
-.BR CLONE_THREAD ,
-.BR CLONE_SIGHAND ,
-and
-.B CLONE_VM
-can be specified in
-.I flags
-if the caller is single threaded (i.e., it is not sharing
-its address space with another process or thread).
-In this case, these flags have no effect.
-(Note also that specifying
-.B CLONE_THREAD
-automatically implies
-.BR CLONE_VM ,
-and specifying
-.B CLONE_VM
-automatically implies
-.BR CLONE_SIGHAND .)
-.\" As at 3.9, the following forced implications also apply,
-.\" although the relevant flags are not yet implemented.
-.\" If CLONE_THREAD is set force CLONE_VM.
-.\" If CLONE_VM is set, force CLONE_SIGHAND.
-.\"
-If the process is multithreaded, then
-the use of these flags results in an error.
-.\" See kernel/fork.c::check_unshare_flags()
-.P
-If
-.I flags
-is specified as zero, then
-.BR unshare ()
-is a no-op;
-no changes are made to the calling process's execution context.
-.SH RETURN VALUE
-On success, zero returned.
-On failure, \-1 is returned and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EINVAL
-An invalid bit was specified in
-.IR flags .
-.TP
-.B EINVAL
-.BR CLONE_THREAD ,
-.BR CLONE_SIGHAND ,
-or
-.B CLONE_VM
-was specified in
-.IR flags ,
-and the caller is multithreaded.
-.TP
-.B EINVAL
-.B CLONE_NEWIPC
-was specified in
-.IR flags ,
-but the kernel was not configured with the
-.B CONFIG_SYSVIPC
-and
-.B CONFIG_IPC_NS
-options.
-.TP
-.B EINVAL
-.B CLONE_NEWNET
-was specified in
-.IR flags ,
-but the kernel was not configured with the
-.B CONFIG_NET_NS
-option.
-.TP
-.B EINVAL
-.B CLONE_NEWPID
-was specified in
-.IR flags ,
-but the kernel was not configured with the
-.B CONFIG_PID_NS
-option.
-.TP
-.B EINVAL
-.B CLONE_NEWUSER
-was specified in
-.IR flags ,
-but the kernel was not configured with the
-.B CONFIG_USER_NS
-option.
-.TP
-.B EINVAL
-.B CLONE_NEWUTS
-was specified in
-.IR flags ,
-but the kernel was not configured with the
-.B CONFIG_UTS_NS
-option.
-.TP
-.B EINVAL
-.B CLONE_NEWPID
-was specified in
-.IR flags ,
-but the process has previously called
-.BR unshare ()
-with the
-.B CLONE_NEWPID
-flag.
-.TP
-.B ENOMEM
-Cannot allocate sufficient memory to copy parts of caller's
-context that need to be unshared.
-.TP
-.BR ENOSPC " (since Linux 3.7)"
-.\" commit f2302505775fd13ba93f034206f1e2a587017929
-.B CLONE_NEWPID
-was specified in flags,
-but the limit on the nesting depth of PID namespaces
-would have been exceeded; see
-.BR pid_namespaces (7).
-.TP
-.BR ENOSPC " (since Linux 4.9; beforehand " EUSERS )
-.B CLONE_NEWUSER
-was specified in
-.IR flags ,
-and the call would cause the limit on the number of
-nested user namespaces to be exceeded.
-See
-.BR user_namespaces (7).
-.IP
-From Linux 3.11 to Linux 4.8, the error diagnosed in this case was
-.BR EUSERS .
-.TP
-.BR ENOSPC " (since Linux 4.9)"
-One of the values in
-.I flags
-specified the creation of a new user namespace,
-but doing so would have caused the limit defined by the corresponding file in
-.I /proc/sys/user
-to be exceeded.
-For further details, see
-.BR namespaces (7).
-.TP
-.B EPERM
-The calling process did not have the required privileges for this operation.
-.TP
-.B EPERM
-.B CLONE_NEWUSER
-was specified in
-.IR flags ,
-but either the effective user ID or the effective group ID of the caller
-does not have a mapping in the parent namespace (see
-.BR user_namespaces (7)).
-.TP
-.BR EPERM " (since Linux 3.9)"
-.\" commit 3151527ee007b73a0ebd296010f1c0454a919c7d
-.B CLONE_NEWUSER
-was specified in
-.I flags
-and the caller is in a chroot environment
-.\" FIXME What is the rationale for this restriction?
-(i.e., the caller's root directory does not match the root directory
-of the mount namespace in which it resides).
-.TP
-.BR EUSERS " (from Linux 3.11 to Linux 4.8)"
-.B CLONE_NEWUSER
-was specified in
-.IR flags ,
-and the limit on the number of nested user namespaces would be exceeded.
-See the discussion of the
-.B ENOSPC
-error above.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 2.6.16.
-.SH NOTES
-Not all of the process attributes that can be shared when
-a new process is created using
-.BR clone (2)
-can be unshared using
-.BR unshare ().
-In particular, as at kernel 3.8,
-.\" FIXME all of the following needs to be reviewed for the current kernel
-.BR unshare ()
-does not implement flags that reverse the effects of
-.BR CLONE_SIGHAND ,
-.\" However, we can do unshare(CLONE_SIGHAND) if CLONE_SIGHAND
-.\" was not specified when doing clone(); i.e., unsharing
-.\" signal handlers is permitted if we are not actually
-.\" sharing signal handlers. mtk
-.BR CLONE_THREAD ,
-or
-.BR CLONE_VM .
-.\" However, we can do unshare(CLONE_VM) if CLONE_VM
-.\" was not specified when doing clone(); i.e., unsharing
-.\" virtual memory is permitted if we are not actually
-.\" sharing virtual memory. mtk
-Such functionality may be added in the future, if required.
-.\"
-.\"9) Future Work
-.\"--------------
-.\"The current implementation of unshare does not allow unsharing of
-.\"signals and signal handlers. Signals are complex to begin with and
-.\"to unshare signals and/or signal handlers of a currently running
-.\"process is even more complex. If in the future there is a specific
-.\"need to allow unsharing of signals and/or signal handlers, it can
-.\"be incrementally added to unshare without affecting legacy
-.\"applications using unshare.
-.\"
-.P
-Creating all kinds of namespace, except user namespaces, requires the
-.B CAP_SYS_ADMIN
-capability.
-However, since creating a user namespace automatically confers a full set of
-capabilities,
-creating both a user namespace and any other type of namespace in the same
-.BR unshare ()
-call does not require the
-.B CAP_SYS_ADMIN
-capability in the original namespace.
-.SH EXAMPLES
-The program below provides a simple implementation of the
-.BR unshare (1)
-command, which unshares one or more namespaces and executes the
-command supplied in its command-line arguments.
-Here's an example of the use of this program,
-running a shell in a new mount namespace,
-and verifying that the original shell and the
-new shell are in separate mount namespaces:
-.P
-.in +4n
-.EX
-$ \fBreadlink /proc/$$/ns/mnt\fP
-mnt:[4026531840]
-$ \fBsudo ./unshare \-m /bin/bash\fP
-# \fBreadlink /proc/$$/ns/mnt\fP
-mnt:[4026532325]
-.EE
-.in
-.P
-The differing output of the two
-.BR readlink (1)
-commands shows that the two shells are in different mount namespaces.
-.SS Program source
-\&
-.\" SRC BEGIN (unshare.c)
-.EX
-/* unshare.c
-\&
- A simple implementation of the unshare(1) command: unshare
- namespaces and execute a command.
-*/
-#define _GNU_SOURCE
-#include <err.h>
-#include <sched.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-\&
-static void
-usage(char *pname)
-{
- fprintf(stderr, "Usage: %s [options] program [arg...]\en", pname);
- fprintf(stderr, "Options can be:\en");
- fprintf(stderr, " \-C unshare cgroup namespace\en");
- fprintf(stderr, " \-i unshare IPC namespace\en");
- fprintf(stderr, " \-m unshare mount namespace\en");
- fprintf(stderr, " \-n unshare network namespace\en");
- fprintf(stderr, " \-p unshare PID namespace\en");
- fprintf(stderr, " \-t unshare time namespace\en");
- fprintf(stderr, " \-u unshare UTS namespace\en");
- fprintf(stderr, " \-U unshare user namespace\en");
- exit(EXIT_FAILURE);
-}
-\&
-int
-main(int argc, char *argv[])
-{
- int flags, opt;
-\&
- flags = 0;
-\&
- while ((opt = getopt(argc, argv, "CimnptuU")) != \-1) {
- switch (opt) {
- case \[aq]C\[aq]: flags |= CLONE_NEWCGROUP; break;
- case \[aq]i\[aq]: flags |= CLONE_NEWIPC; break;
- case \[aq]m\[aq]: flags |= CLONE_NEWNS; break;
- case \[aq]n\[aq]: flags |= CLONE_NEWNET; break;
- case \[aq]p\[aq]: flags |= CLONE_NEWPID; break;
- case \[aq]t\[aq]: flags |= CLONE_NEWTIME; break;
- case \[aq]u\[aq]: flags |= CLONE_NEWUTS; break;
- case \[aq]U\[aq]: flags |= CLONE_NEWUSER; break;
- default: usage(argv[0]);
- }
- }
-\&
- if (optind >= argc)
- usage(argv[0]);
-\&
- if (unshare(flags) == \-1)
- err(EXIT_FAILURE, "unshare");
-\&
- execvp(argv[optind], &argv[optind]);
- err(EXIT_FAILURE, "execvp");
-}
-.EE
-.\" SRC END
-.SH SEE ALSO
-.BR unshare (1),
-.BR clone (2),
-.BR fork (2),
-.BR kcmp (2),
-.BR setns (2),
-.BR vfork (2),
-.BR namespaces (7)
-.P
-.I Documentation/userspace\-api/unshare.rst
-in the Linux kernel source tree
-.\" commit f504d47be5e8fa7ecf2bf660b18b42e6960c0eb2
-(or
-.I Documentation/unshare.txt
-before Linux 4.12)
diff --git a/man2/uselib.2 b/man2/uselib.2
deleted file mode 100644
index bb567c688..000000000
--- a/man2/uselib.2
+++ /dev/null
@@ -1,106 +0,0 @@
-.\" Copyright (c) 1992 Drew Eckhardt (drew@cs.colorado.edu), March 28, 1992
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Modified by Michael Haardt <michael@moria.de>
-.\" Modified 1993-07-24 by Rik Faith <faith@cs.unc.edu>
-.\" Modified 1996-10-22 by Eric S. Raymond <esr@thyrsus.com>
-.\" Modified 2004-06-23 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Modified 2005-01-09 by aeb
-.\"
-.TH uselib 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-uselib \- load shared library
-.SH SYNOPSIS
-.nf
-.B #include <unistd.h>
-.P
-.BI "[[deprecated]] int uselib(const char *" library );
-.fi
-.SH DESCRIPTION
-The system call
-.BR uselib ()
-serves to load
-a shared library to be used by the calling process.
-It is given a pathname.
-The address where to load is found
-in the library itself.
-The library can have any recognized
-binary format.
-.SH RETURN VALUE
-On success, zero is returned.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-In addition to all of the error codes returned by
-.BR open (2)
-and
-.BR mmap (2),
-the following may also be returned:
-.TP
-.B EACCES
-The library specified by
-.I library
-does not have read or execute permission, or the caller does not have
-search permission for one of the directories in the path prefix.
-(See also
-.BR path_resolution (7).)
-.TP
-.B ENFILE
-The system-wide limit on the total number of open files has been reached.
-.TP
-.B ENOEXEC
-The file specified by
-.I library
-is not an executable of a known type;
-for example, it does not have the correct magic numbers.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-This obsolete system call is not supported by glibc.
-No declaration is provided in glibc headers, but, through a quirk of history,
-glibc before glibc 2.23 did export an ABI for this system call.
-Therefore, in order to employ this system call,
-it was sufficient to manually declare the interface in your code;
-alternatively, you could invoke the system call using
-.BR syscall (2).
-.P
-In ancient libc versions (before glibc 2.0),
-.BR uselib ()
-was used to load
-the shared libraries with names found in an array of names
-in the binary.
-.\" .P
-.\" .\" libc 4.3.1f - changelog 1993-03-02
-.\" Since libc 4.3.2, startup code tries to prefix these names
-.\" with "/usr/lib", "/lib" and "" before giving up.
-.\" .\" libc 4.3.4 - changelog 1993-04-21
-.\" In libc 4.3.4 and later these names are looked for in the directories
-.\" found in
-.\" .BR LD_LIBRARY_PATH ,
-.\" and if not found there,
-.\" prefixes "/usr/lib", "/lib" and "/" are tried.
-.\" .P
-.\" From libc 4.4.4 on only the library "/lib/ld.so" is loaded,
-.\" so that this dynamic library can load the remaining libraries needed
-.\" (again using this call).
-.\" This is also the state of affairs in libc5.
-.\" .P
-.\" glibc2 does not use this call.
-.P
-Since Linux 3.15,
-.\" commit 69369a7003735d0d8ef22097e27a55a8bad9557a
-this system call is available only when the kernel is configured with the
-.B CONFIG_USELIB
-option.
-.SH SEE ALSO
-.BR ar (1),
-.BR gcc (1),
-.BR ld (1),
-.BR ldd (1),
-.BR mmap (2),
-.BR open (2),
-.BR dlopen (3),
-.BR capabilities (7),
-.BR ld.so (8)
diff --git a/man2/userfaultfd.2 b/man2/userfaultfd.2
deleted file mode 100644
index fa6038774..000000000
--- a/man2/userfaultfd.2
+++ /dev/null
@@ -1,951 +0,0 @@
-.\" Copyright (c) 2016, IBM Corporation.
-.\" Written by Mike Rapoport <rppt@linux.vnet.ibm.com>
-.\" and Copyright (C) 2017 Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.TH userfaultfd 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-userfaultfd \- create a file descriptor for handling page faults in user space
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#include <fcntl.h>" " /* Definition of " O_* " constants */"
-.BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */"
-.BR "#include <linux/userfaultfd.h>" " /* Definition of " UFFD_* " constants */"
-.B #include <unistd.h>
-.P
-.BI "int syscall(SYS_userfaultfd, int " flags );
-.fi
-.P
-.IR Note :
-glibc provides no wrapper for
-.BR userfaultfd (),
-necessitating the use of
-.BR syscall (2).
-.SH DESCRIPTION
-.BR userfaultfd ()
-creates a new userfaultfd object that can be used for delegation of page-fault
-handling to a user-space application,
-and returns a file descriptor that refers to the new object.
-The new userfaultfd object is configured using
-.BR ioctl (2).
-.P
-Once the userfaultfd object is configured, the application can use
-.BR read (2)
-to receive userfaultfd notifications.
-The reads from userfaultfd may be blocking or non-blocking,
-depending on the value of
-.I flags
-used for the creation of the userfaultfd or subsequent calls to
-.BR fcntl (2).
-.P
-The following values may be bitwise ORed in
-.I flags
-to change the behavior of
-.BR userfaultfd ():
-.TP
-.B O_CLOEXEC
-Enable the close-on-exec flag for the new userfaultfd file descriptor.
-See the description of the
-.B O_CLOEXEC
-flag in
-.BR open (2).
-.TP
-.B O_NONBLOCK
-Enables non-blocking operation for the userfaultfd object.
-See the description of the
-.B O_NONBLOCK
-flag in
-.BR open (2).
-.TP
-.B UFFD_USER_MODE_ONLY
-This is an userfaultfd-specific flag that was introduced in Linux 5.11.
-When set, the userfaultfd object will only be able to handle
-page faults originated from the user space on the registered regions.
-When a kernel-originated fault was triggered
-on the registered range with this userfaultfd, a
-.B SIGBUS
-signal will be delivered.
-.P
-When the last file descriptor referring to a userfaultfd object is closed,
-all memory ranges that were registered with the object are unregistered
-and unread events are flushed.
-.\"
-.P
-Userfaultfd supports three modes of registration:
-.TP
-.BR UFFDIO_REGISTER_MODE_MISSING " (since Linux 4.10)"
-When registered with
-.B UFFDIO_REGISTER_MODE_MISSING
-mode, user-space will receive a page-fault notification
-when a missing page is accessed.
-The faulted thread will be stopped from execution until the page fault is
-resolved from user-space by either an
-.B UFFDIO_COPY
-or an
-.B UFFDIO_ZEROPAGE
-ioctl.
-.TP
-.BR UFFDIO_REGISTER_MODE_MINOR " (since Linux 5.13)"
-When registered with
-.B UFFDIO_REGISTER_MODE_MINOR
-mode, user-space will receive a page-fault notification
-when a minor page fault occurs.
-That is,
-when a backing page is in the page cache,
-but page table entries don't yet exist.
-The faulted thread will be stopped from execution
-until the page fault is resolved from user-space by an
-.B UFFDIO_CONTINUE
-ioctl.
-.TP
-.BR UFFDIO_REGISTER_MODE_WP " (since Linux 5.7)"
-When registered with
-.B UFFDIO_REGISTER_MODE_WP
-mode, user-space will receive a page-fault notification
-when a write-protected page is written.
-The faulted thread will be stopped from execution
-until user-space write-unprotects the page using an
-.B UFFDIO_WRITEPROTECT
-ioctl.
-.P
-Multiple modes can be enabled at the same time for the same memory range.
-.P
-Since Linux 4.14, a userfaultfd page-fault notification can selectively embed
-faulting thread ID information into the notification.
-One needs to enable this feature explicitly using the
-.B UFFD_FEATURE_THREAD_ID
-feature bit when initializing the userfaultfd context.
-By default, thread ID reporting is disabled.
-.SS Usage
-The userfaultfd mechanism is designed to allow a thread in a multithreaded
-program to perform user-space paging for the other threads in the process.
-When a page fault occurs for one of the regions registered
-to the userfaultfd object,
-the faulting thread is put to sleep and
-an event is generated that can be read via the userfaultfd file descriptor.
-The fault-handling thread reads events from this file descriptor and services
-them using the operations described in
-.BR ioctl_userfaultfd (2).
-When servicing the page fault events,
-the fault-handling thread can trigger a wake-up for the sleeping thread.
-.P
-It is possible for the faulting threads and the fault-handling threads
-to run in the context of different processes.
-In this case, these threads may belong to different programs,
-and the program that executes the faulting threads
-will not necessarily cooperate with the program that handles the page faults.
-In such non-cooperative mode,
-the process that monitors userfaultfd and handles page faults
-needs to be aware of the changes in the virtual memory layout
-of the faulting process to avoid memory corruption.
-.P
-Since Linux 4.11,
-userfaultfd can also notify the fault-handling threads about changes
-in the virtual memory layout of the faulting process.
-In addition, if the faulting process invokes
-.BR fork (2),
-the userfaultfd objects associated with the parent may be duplicated
-into the child process and the userfaultfd monitor will be notified
-(via the
-.B UFFD_EVENT_FORK
-described below)
-about the file descriptor associated with the userfault objects
-created for the child process,
-which allows the userfaultfd monitor to perform user-space paging
-for the child process.
-Unlike page faults which have to be synchronous and require an
-explicit or implicit wakeup,
-all other events are delivered asynchronously and
-the non-cooperative process resumes execution as
-soon as the userfaultfd manager executes
-.BR read (2).
-The userfaultfd manager should carefully synchronize calls to
-.B UFFDIO_COPY
-with the processing of events.
-.P
-The current asynchronous model of the event delivery is optimal for
-single threaded non-cooperative userfaultfd manager implementations.
-.\" Regarding the preceding sentence, Mike Rapoport says:
-.\" The major point here is that current events delivery model could be
-.\" problematic for multi-threaded monitor. I even suspect that it would be
-.\" impossible to ensure synchronization between page faults and non-page
-.\" fault events in multi-threaded monitor.
-.\" .P
-.\" FIXME elaborate about non-cooperating mode, describe its limitations
-.\" for kernels before Linux 4.11, features added in Linux 4.11
-.\" and limitations remaining in Linux 4.11
-.\" Maybe it's worth adding a dedicated sub-section...
-.\"
-.P
-Since Linux 5.7, userfaultfd is able to do
-synchronous page dirty tracking using the new write-protect register mode.
-One should check against the feature bit
-.B UFFD_FEATURE_PAGEFAULT_FLAG_WP
-before using this feature.
-Similar to the original userfaultfd missing mode, the write-protect mode will
-generate a userfaultfd notification when the protected page is written.
-The user needs to resolve the page fault by unprotecting the faulted page and
-kicking the faulted thread to continue.
-For more information,
-please refer to the "Userfaultfd write-protect mode" section.
-.\"
-.SS Userfaultfd operation
-After the userfaultfd object is created with
-.BR userfaultfd (),
-the application must enable it using the
-.B UFFDIO_API
-.BR ioctl (2)
-operation.
-This operation allows a two-step handshake between the kernel and user space
-to determine what API version and features the kernel supports,
-and then to enable those features user space wants.
-This operation must be performed before any of the other
-.BR ioctl (2)
-operations described below (or those operations fail with the
-.B EINVAL
-error).
-.P
-After a successful
-.B UFFDIO_API
-operation,
-the application then registers memory address ranges using the
-.B UFFDIO_REGISTER
-.BR ioctl (2)
-operation.
-After successful completion of a
-.B UFFDIO_REGISTER
-operation,
-a page fault occurring in the requested memory range, and satisfying
-the mode defined at the registration time, will be forwarded by the kernel to
-the user-space application.
-The application can then use various (e.g.,
-.BR UFFDIO_COPY ,
-.BR UFFDIO_ZEROPAGE ,
-or
-.BR UFFDIO_CONTINUE )
-.BR ioctl (2)
-operations to resolve the page fault.
-.P
-Since Linux 4.14, if the application sets the
-.B UFFD_FEATURE_SIGBUS
-feature bit using the
-.B UFFDIO_API
-.BR ioctl (2),
-no page-fault notification will be forwarded to user space.
-Instead a
-.B SIGBUS
-signal is delivered to the faulting process.
-With this feature,
-userfaultfd can be used for robustness purposes to simply catch
-any access to areas within the registered address range that do not
-have pages allocated, without having to listen to userfaultfd events.
-No userfaultfd monitor will be required for dealing with such memory
-accesses.
-For example, this feature can be useful for applications that
-want to prevent the kernel from automatically allocating pages and filling
-holes in sparse files when the hole is accessed through a memory mapping.
-.P
-The
-.B UFFD_FEATURE_SIGBUS
-feature is implicitly inherited through
-.BR fork (2)
-if used in combination with
-.BR UFFD_FEATURE_FORK .
-.P
-Details of the various
-.BR ioctl (2)
-operations can be found in
-.BR ioctl_userfaultfd (2).
-.P
-Since Linux 4.11, events other than page-fault may enabled during
-.B UFFDIO_API
-operation.
-.P
-Up to Linux 4.11,
-userfaultfd can be used only with anonymous private memory mappings.
-Since Linux 4.11,
-userfaultfd can be also used with hugetlbfs and shared memory mappings.
-.\"
-.SS Userfaultfd write-protect mode (since Linux 5.7)
-Since Linux 5.7, userfaultfd supports write-protect mode for anonymous memory.
-The user needs to first check availability of this feature using
-.B UFFDIO_API
-ioctl against the feature bit
-.B UFFD_FEATURE_PAGEFAULT_FLAG_WP
-before using this feature.
-.P
-Since Linux 5.19,
-the write-protection mode was also supported on
-shmem and hugetlbfs memory types.
-It can be detected with the feature bit
-.BR UFFD_FEATURE_WP_HUGETLBFS_SHMEM .
-.P
-To register with userfaultfd write-protect mode, the user needs to initiate the
-.B UFFDIO_REGISTER
-ioctl with mode
-.B UFFDIO_REGISTER_MODE_WP
-set.
-Note that it is legal to monitor the same memory range with multiple modes.
-For example, the user can do
-.B UFFDIO_REGISTER
-with the mode set to
-.BR "UFFDIO_REGISTER_MODE_MISSING | UFFDIO_REGISTER_MODE_WP" .
-When there is only
-.B UFFDIO_REGISTER_MODE_WP
-registered, user-space will
-.I not
-receive any notification when a missing page is written.
-Instead, user-space will receive a write-protect page-fault notification
-only when an existing but write-protected page got written.
-.P
-After the
-.B UFFDIO_REGISTER
-ioctl completed with
-.B UFFDIO_REGISTER_MODE_WP
-mode set,
-the user can write-protect any existing memory within the range using the ioctl
-.B UFFDIO_WRITEPROTECT
-where
-.I uffdio_writeprotect.mode
-should be set to
-.BR UFFDIO_WRITEPROTECT_MODE_WP .
-.P
-When a write-protect event happens,
-user-space will receive a page-fault notification whose
-.I uffd_msg.pagefault.flags
-will be with
-.B UFFD_PAGEFAULT_FLAG_WP
-flag set.
-Note: since only writes can trigger this kind of fault,
-write-protect notifications will always have the
-.B UFFD_PAGEFAULT_FLAG_WRITE
-bit set along with the
-.B UFFD_PAGEFAULT_FLAG_WP
-bit.
-.P
-To resolve a write-protection page fault, the user should initiate another
-.B UFFDIO_WRITEPROTECT
-ioctl, whose
-.I uffd_msg.pagefault.flags
-should have the flag
-.B UFFDIO_WRITEPROTECT_MODE_WP
-cleared upon the faulted page or range.
-.\"
-.SS Userfaultfd minor fault mode (since Linux 5.13)
-Since Linux 5.13,
-userfaultfd supports minor fault mode.
-In this mode,
-fault messages are produced not for major faults
-(where the page was missing),
-but rather for minor faults,
-where a page exists in the page cache,
-but the page table entries are not yet present.
-The user needs to first check availability of this feature using the
-.B UFFDIO_API
-ioctl with the appropriate feature bits set before using this feature:
-.B UFFD_FEATURE_MINOR_HUGETLBFS
-since Linux 5.13,
-or
-.B UFFD_FEATURE_MINOR_SHMEM
-since Linux 5.14.
-.P
-To register with userfaultfd minor fault mode,
-the user needs to initiate the
-.B UFFDIO_REGISTER
-ioctl with mode
-.B UFFD_REGISTER_MODE_MINOR
-set.
-.P
-When a minor fault occurs,
-user-space will receive a page-fault notification
-whose
-.I uffd_msg.pagefault.flags
-will have the
-.B UFFD_PAGEFAULT_FLAG_MINOR
-flag set.
-.P
-To resolve a minor page fault,
-the handler should decide whether or not
-the existing page contents need to be modified first.
-If so,
-this should be done in-place via a second,
-non-userfaultfd-registered mapping
-to the same backing page
-(e.g., by mapping the shmem or hugetlbfs file twice).
-Once the page is considered "up to date",
-the fault can be resolved by initiating an
-.B UFFDIO_CONTINUE
-ioctl,
-which installs the page table entries and
-(by default)
-wakes up the faulting thread(s).
-.P
-Minor fault mode supports only hugetlbfs-backed (since Linux 5.13)
-and shmem-backed (since Linux 5.14) memory.
-.\"
-.SS Reading from the userfaultfd structure
-Each
-.BR read (2)
-from the userfaultfd file descriptor returns one or more
-.I uffd_msg
-structures, each of which describes a page-fault event
-or an event required for the non-cooperative userfaultfd usage:
-.P
-.in +4n
-.EX
-struct uffd_msg {
- __u8 event; /* Type of event */
- ...
- union {
- struct {
- __u64 flags; /* Flags describing fault */
- __u64 address; /* Faulting address */
- union {
- __u32 ptid; /* Thread ID of the fault */
- } feat;
- } pagefault;
-\&
- struct { /* Since Linux 4.11 */
- __u32 ufd; /* Userfault file descriptor
- of the child process */
- } fork;
-\&
- struct { /* Since Linux 4.11 */
- __u64 from; /* Old address of remapped area */
- __u64 to; /* New address of remapped area */
- __u64 len; /* Original mapping length */
- } remap;
-\&
- struct { /* Since Linux 4.11 */
- __u64 start; /* Start address of removed area */
- __u64 end; /* End address of removed area */
- } remove;
- ...
- } arg;
-\&
- /* Padding fields omitted */
-} __packed;
-.EE
-.in
-.P
-If multiple events are available and the supplied buffer is large enough,
-.BR read (2)
-returns as many events as will fit in the supplied buffer.
-If the buffer supplied to
-.BR read (2)
-is smaller than the size of the
-.I uffd_msg
-structure, the
-.BR read (2)
-fails with the error
-.BR EINVAL .
-.P
-The fields set in the
-.I uffd_msg
-structure are as follows:
-.TP
-.I event
-The type of event.
-Depending of the event type,
-different fields of the
-.I arg
-union represent details required for the event processing.
-The non-page-fault events are generated only when appropriate feature
-is enabled during API handshake with
-.B UFFDIO_API
-.BR ioctl (2).
-.IP
-The following values can appear in the
-.I event
-field:
-.RS
-.TP
-.BR UFFD_EVENT_PAGEFAULT " (since Linux 4.3)"
-A page-fault event.
-The page-fault details are available in the
-.I pagefault
-field.
-.TP
-.BR UFFD_EVENT_FORK " (since Linux 4.11)"
-Generated when the faulting process invokes
-.BR fork (2)
-(or
-.BR clone (2)
-without the
-.B CLONE_VM
-flag).
-The event details are available in the
-.I fork
-field.
-.\" FIXME describe duplication of userfault file descriptor during fork
-.TP
-.BR UFFD_EVENT_REMAP " (since Linux 4.11)"
-Generated when the faulting process invokes
-.BR mremap (2).
-The event details are available in the
-.I remap
-field.
-.TP
-.BR UFFD_EVENT_REMOVE " (since Linux 4.11)"
-Generated when the faulting process invokes
-.BR madvise (2)
-with
-.B MADV_DONTNEED
-or
-.B MADV_REMOVE
-advice.
-The event details are available in the
-.I remove
-field.
-.TP
-.BR UFFD_EVENT_UNMAP " (since Linux 4.11)"
-Generated when the faulting process unmaps a memory range,
-either explicitly using
-.BR munmap (2)
-or implicitly during
-.BR mmap (2)
-or
-.BR mremap (2).
-The event details are available in the
-.I remove
-field.
-.RE
-.TP
-.I pagefault.address
-The address that triggered the page fault.
-.TP
-.I pagefault.flags
-A bit mask of flags that describe the event.
-For
-.BR UFFD_EVENT_PAGEFAULT ,
-the following flag may appear:
-.RS
-.TP
-.B UFFD_PAGEFAULT_FLAG_WP
-If this flag is set, then the fault was a write-protect fault.
-.TP
-.B UFFD_PAGEFAULT_FLAG_MINOR
-If this flag is set, then the fault was a minor fault.
-.TP
-.B UFFD_PAGEFAULT_FLAG_WRITE
-If this flag is set, then the fault was a write fault.
-.P
-If neither
-.B UFFD_PAGEFAULT_FLAG_WP
-nor
-.B UFFD_PAGEFAULT_FLAG_MINOR
-are set, then the fault was a missing fault.
-.RE
-.TP
-.I pagefault.feat.pid
-The thread ID that triggered the page fault.
-.TP
-.I fork.ufd
-The file descriptor associated with the userfault object
-created for the child created by
-.BR fork (2).
-.TP
-.I remap.from
-The original address of the memory range that was remapped using
-.BR mremap (2).
-.TP
-.I remap.to
-The new address of the memory range that was remapped using
-.BR mremap (2).
-.TP
-.I remap.len
-The original length of the memory range that was remapped using
-.BR mremap (2).
-.TP
-.I remove.start
-The start address of the memory range that was freed using
-.BR madvise (2)
-or unmapped
-.TP
-.I remove.end
-The end address of the memory range that was freed using
-.BR madvise (2)
-or unmapped
-.P
-A
-.BR read (2)
-on a userfaultfd file descriptor can fail with the following errors:
-.TP
-.B EINVAL
-The userfaultfd object has not yet been enabled using the
-.B UFFDIO_API
-.BR ioctl (2)
-operation
-.P
-If the
-.B O_NONBLOCK
-flag is enabled in the associated open file description,
-the userfaultfd file descriptor can be monitored with
-.BR poll (2),
-.BR select (2),
-and
-.BR epoll (7).
-When events are available, the file descriptor indicates as readable.
-If the
-.B O_NONBLOCK
-flag is not enabled, then
-.BR poll (2)
-(always) indicates the file as having a
-.B POLLERR
-condition, and
-.BR select (2)
-indicates the file descriptor as both readable and writable.
-.\" FIXME What is the reason for this seemingly odd behavior with respect
-.\" to the O_NONBLOCK flag? (see userfaultfd_poll() in fs/userfaultfd.c).
-.\" Something needs to be said about this.
-.SH RETURN VALUE
-On success,
-.BR userfaultfd ()
-returns a new file descriptor that refers to the userfaultfd object.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EINVAL
-An unsupported value was specified in
-.IR flags .
-.TP
-.B EMFILE
-The per-process limit on the number of open file descriptors has been
-reached
-.TP
-.B ENFILE
-The system-wide limit on the total number of open files has been
-reached.
-.TP
-.B ENOMEM
-Insufficient kernel memory was available.
-.TP
-.BR EPERM " (since Linux 5.2)"
-.\" cefdca0a86be517bc390fc4541e3674b8e7803b0
-The caller is not privileged (does not have the
-.B CAP_SYS_PTRACE
-capability in the initial user namespace), and
-.I /proc/sys/vm/unprivileged_userfaultfd
-has the value 0.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 4.3.
-.P
-Support for hugetlbfs and shared memory areas and
-non-page-fault events was added in Linux 4.11
-.SH NOTES
-The userfaultfd mechanism can be used as an alternative to
-traditional user-space paging techniques based on the use of the
-.B SIGSEGV
-signal and
-.BR mmap (2).
-It can also be used to implement lazy restore
-for checkpoint/restore mechanisms,
-as well as post-copy migration to allow (nearly) uninterrupted execution
-when transferring virtual machines and Linux containers
-from one host to another.
-.SH BUGS
-If the
-.B UFFD_FEATURE_EVENT_FORK
-is enabled and a system call from the
-.BR fork (2)
-family is interrupted by a signal or failed, a stale userfaultfd descriptor
-might be created.
-In this case, a spurious
-.B UFFD_EVENT_FORK
-will be delivered to the userfaultfd monitor.
-.SH EXAMPLES
-The program below demonstrates the use of the userfaultfd mechanism.
-The program creates two threads, one of which acts as the
-page-fault handler for the process, for the pages in a demand-page zero
-region created using
-.BR mmap (2).
-.P
-The program takes one command-line argument,
-which is the number of pages that will be created in a mapping
-whose page faults will be handled via userfaultfd.
-After creating a userfaultfd object,
-the program then creates an anonymous private mapping of the specified size
-and registers the address range of that mapping using the
-.B UFFDIO_REGISTER
-.BR ioctl (2)
-operation.
-The program then creates a second thread that will perform the
-task of handling page faults.
-.P
-The main thread then walks through the pages of the mapping fetching
-bytes from successive pages.
-Because the pages have not yet been accessed,
-the first access of a byte in each page will trigger a page-fault event
-on the userfaultfd file descriptor.
-.P
-Each of the page-fault events is handled by the second thread,
-which sits in a loop processing input from the userfaultfd file descriptor.
-In each loop iteration, the second thread first calls
-.BR poll (2)
-to check the state of the file descriptor,
-and then reads an event from the file descriptor.
-All such events should be
-.B UFFD_EVENT_PAGEFAULT
-events,
-which the thread handles by copying a page of data into
-the faulting region using the
-.B UFFDIO_COPY
-.BR ioctl (2)
-operation.
-.P
-The following is an example of what we see when running the program:
-.P
-.in +4n
-.EX
-$ \fB./userfaultfd_demo 3\fP
-Address returned by mmap() = 0x7fd30106c000
-\&
-fault_handler_thread():
- poll() returns: nready = 1; POLLIN = 1; POLLERR = 0
- UFFD_EVENT_PAGEFAULT event: flags = 0; address = 7fd30106c00f
- (uffdio_copy.copy returned 4096)
-Read address 0x7fd30106c00f in main(): A
-Read address 0x7fd30106c40f in main(): A
-Read address 0x7fd30106c80f in main(): A
-Read address 0x7fd30106cc0f in main(): A
-\&
-fault_handler_thread():
- poll() returns: nready = 1; POLLIN = 1; POLLERR = 0
- UFFD_EVENT_PAGEFAULT event: flags = 0; address = 7fd30106d00f
- (uffdio_copy.copy returned 4096)
-Read address 0x7fd30106d00f in main(): B
-Read address 0x7fd30106d40f in main(): B
-Read address 0x7fd30106d80f in main(): B
-Read address 0x7fd30106dc0f in main(): B
-\&
-fault_handler_thread():
- poll() returns: nready = 1; POLLIN = 1; POLLERR = 0
- UFFD_EVENT_PAGEFAULT event: flags = 0; address = 7fd30106e00f
- (uffdio_copy.copy returned 4096)
-Read address 0x7fd30106e00f in main(): C
-Read address 0x7fd30106e40f in main(): C
-Read address 0x7fd30106e80f in main(): C
-Read address 0x7fd30106ec0f in main(): C
-.EE
-.in
-.SS Program source
-\&
-.\" SRC BEGIN (userfaultfd.c)
-.EX
-/* userfaultfd_demo.c
-\&
- Licensed under the GNU General Public License version 2 or later.
-*/
-#define _GNU_SOURCE
-#include <err.h>
-#include <errno.h>
-#include <fcntl.h>
-#include <inttypes.h>
-#include <linux/userfaultfd.h>
-#include <poll.h>
-#include <pthread.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-#include <sys/mman.h>
-#include <sys/syscall.h>
-#include <unistd.h>
-\&
-static int page_size;
-\&
-static void *
-fault_handler_thread(void *arg)
-{
- int nready;
- long uffd; /* userfaultfd file descriptor */
- ssize_t nread;
- struct pollfd pollfd;
- struct uffdio_copy uffdio_copy;
-\&
- static int fault_cnt = 0; /* Number of faults so far handled */
- static char *page = NULL;
- static struct uffd_msg msg; /* Data read from userfaultfd */
-\&
- uffd = (long) arg;
-\&
- /* Create a page that will be copied into the faulting region. */
-\&
- if (page == NULL) {
- page = mmap(NULL, page_size, PROT_READ | PROT_WRITE,
- MAP_PRIVATE | MAP_ANONYMOUS, \-1, 0);
- if (page == MAP_FAILED)
- err(EXIT_FAILURE, "mmap");
- }
-\&
- /* Loop, handling incoming events on the userfaultfd
- file descriptor. */
-\&
- for (;;) {
-\&
- /* See what poll() tells us about the userfaultfd. */
-\&
- pollfd.fd = uffd;
- pollfd.events = POLLIN;
- nready = poll(&pollfd, 1, \-1);
- if (nready == \-1)
- err(EXIT_FAILURE, "poll");
-\&
- printf("\enfault_handler_thread():\en");
- printf(" poll() returns: nready = %d; "
- "POLLIN = %d; POLLERR = %d\en", nready,
- (pollfd.revents & POLLIN) != 0,
- (pollfd.revents & POLLERR) != 0);
-\&
- /* Read an event from the userfaultfd. */
-\&
- nread = read(uffd, &msg, sizeof(msg));
- if (nread == 0) {
- printf("EOF on userfaultfd!\en");
- exit(EXIT_FAILURE);
- }
-\&
- if (nread == \-1)
- err(EXIT_FAILURE, "read");
-\&
- /* We expect only one kind of event; verify that assumption. */
-\&
- if (msg.event != UFFD_EVENT_PAGEFAULT) {
- fprintf(stderr, "Unexpected event on userfaultfd\en");
- exit(EXIT_FAILURE);
- }
-\&
- /* Display info about the page\-fault event. */
-\&
- printf(" UFFD_EVENT_PAGEFAULT event: ");
- printf("flags = %"PRIx64"; ", msg.arg.pagefault.flags);
- printf("address = %"PRIx64"\en", msg.arg.pagefault.address);
-\&
- /* Copy the page pointed to by \[aq]page\[aq] into the faulting
- region. Vary the contents that are copied in, so that it
- is more obvious that each fault is handled separately. */
-\&
- memset(page, \[aq]A\[aq] + fault_cnt % 20, page_size);
- fault_cnt++;
-\&
- uffdio_copy.src = (unsigned long) page;
-\&
- /* We need to handle page faults in units of pages(!).
- So, round faulting address down to page boundary. */
-\&
- uffdio_copy.dst = (unsigned long) msg.arg.pagefault.address &
- \[ti](page_size \- 1);
- uffdio_copy.len = page_size;
- uffdio_copy.mode = 0;
- uffdio_copy.copy = 0;
- if (ioctl(uffd, UFFDIO_COPY, &uffdio_copy) == \-1)
- err(EXIT_FAILURE, "ioctl\-UFFDIO_COPY");
-\&
- printf(" (uffdio_copy.copy returned %"PRId64")\en",
- uffdio_copy.copy);
- }
-}
-\&
-int
-main(int argc, char *argv[])
-{
- int s;
- char c;
- char *addr; /* Start of region handled by userfaultfd */
- long uffd; /* userfaultfd file descriptor */
- size_t len, l; /* Length of region handled by userfaultfd */
- pthread_t thr; /* ID of thread that handles page faults */
- struct uffdio_api uffdio_api;
- struct uffdio_register uffdio_register;
-\&
- if (argc != 2) {
- fprintf(stderr, "Usage: %s num\-pages\en", argv[0]);
- exit(EXIT_FAILURE);
- }
-\&
- page_size = sysconf(_SC_PAGE_SIZE);
- len = strtoull(argv[1], NULL, 0) * page_size;
-\&
- /* Create and enable userfaultfd object. */
-\&
- uffd = syscall(SYS_userfaultfd, O_CLOEXEC | O_NONBLOCK);
- if (uffd == \-1)
- err(EXIT_FAILURE, "userfaultfd");
-\&
- /* NOTE: Two-step feature handshake is not needed here, since this
- example doesn't require any specific features.
-\&
- Programs that *do* should call UFFDIO_API twice: once with
- `features = 0` to detect features supported by this kernel, and
- again with the subset of features the program actually wants to
- enable. */
- uffdio_api.api = UFFD_API;
- uffdio_api.features = 0;
- if (ioctl(uffd, UFFDIO_API, &uffdio_api) == \-1)
- err(EXIT_FAILURE, "ioctl\-UFFDIO_API");
-\&
- /* Create a private anonymous mapping. The memory will be
- demand\-zero paged\-\-that is, not yet allocated. When we
- actually touch the memory, it will be allocated via
- the userfaultfd. */
-\&
- addr = mmap(NULL, len, PROT_READ | PROT_WRITE,
- MAP_PRIVATE | MAP_ANONYMOUS, \-1, 0);
- if (addr == MAP_FAILED)
- err(EXIT_FAILURE, "mmap");
-\&
- printf("Address returned by mmap() = %p\en", addr);
-\&
- /* Register the memory range of the mapping we just created for
- handling by the userfaultfd object. In mode, we request to track
- missing pages (i.e., pages that have not yet been faulted in). */
-\&
- uffdio_register.range.start = (unsigned long) addr;
- uffdio_register.range.len = len;
- uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
- if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == \-1)
- err(EXIT_FAILURE, "ioctl\-UFFDIO_REGISTER");
-\&
- /* Create a thread that will process the userfaultfd events. */
-\&
- s = pthread_create(&thr, NULL, fault_handler_thread, (void *) uffd);
- if (s != 0) {
- errc(EXIT_FAILURE, s, "pthread_create");
- }
-\&
- /* Main thread now touches memory in the mapping, touching
- locations 1024 bytes apart. This will trigger userfaultfd
- events for all pages in the region. */
-\&
- l = 0xf; /* Ensure that faulting address is not on a page
- boundary, in order to test that we correctly
- handle that case in fault_handling_thread(). */
- while (l < len) {
- c = addr[l];
- printf("Read address %p in %s(): ", addr + l, __func__);
- printf("%c\en", c);
- l += 1024;
- usleep(100000); /* Slow things down a little */
- }
-\&
- exit(EXIT_SUCCESS);
-}
-.EE
-.\" SRC END
-.SH SEE ALSO
-.BR fcntl (2),
-.BR ioctl (2),
-.BR ioctl_userfaultfd (2),
-.BR madvise (2),
-.BR mmap (2)
-.P
-.I Documentation/admin\-guide/mm/userfaultfd.rst
-in the Linux kernel source tree
diff --git a/man2/ustat.2 b/man2/ustat.2
deleted file mode 100644
index 2b47f9f54..000000000
--- a/man2/ustat.2
+++ /dev/null
@@ -1,104 +0,0 @@
-.\" Copyright (C) 1995, Thomas K. Dyas <tdyas@eden.rutgers.edu>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Created 1995-08-09 Thomas K. Dyas <tdyas@eden.rutgers.edu>
-.\" Modified 1997-01-31 by Eric S. Raymond <esr@thyrsus.com>
-.\" Modified 2001-03-22 by aeb
-.\" Modified 2003-08-04 by aeb
-.\"
-.TH ustat 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-ustat \- get filesystem statistics
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/types.h>
-.BR "#include <unistd.h>" " /* libc[45] */"
-.BR "#include <ustat.h>" " /* glibc2 */"
-.P
-.BI "[[deprecated]] int ustat(dev_t " dev ", struct ustat *" ubuf );
-.fi
-.SH DESCRIPTION
-.BR ustat ()
-returns information about a mounted filesystem.
-.I dev
-is a device number identifying a device containing
-a mounted filesystem.
-.I ubuf
-is a pointer to a
-.I ustat
-structure that contains the following
-members:
-.P
-.in +4n
-.EX
-daddr_t f_tfree; /* Total free blocks */
-ino_t f_tinode; /* Number of free inodes */
-char f_fname[6]; /* Filsys name */
-char f_fpack[6]; /* Filsys pack name */
-.EE
-.in
-.P
-The last two fields,
-.I f_fname
-and
-.IR f_fpack ,
-are not implemented and will
-always be filled with null bytes (\[aq]\e0\[aq]).
-.SH RETURN VALUE
-On success, zero is returned and the
-.I ustat
-structure pointed to by
-.I ubuf
-will be filled in.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EFAULT
-.I ubuf
-points outside of your accessible address space.
-.TP
-.B EINVAL
-.I dev
-does not refer to a device containing a mounted filesystem.
-.TP
-.B ENOSYS
-The mounted filesystem referenced by
-.I dev
-does not support this operation,
-or any version of Linux before Linux 1.3.16.
-.SH STANDARDS
-None.
-.SH HISTORY
-SVr4.
-Removed in glibc 2.28.
-.\" SVr4 documents additional error conditions ENOLINK, ECOMM, and EINTR
-.\" but has no ENOSYS condition.
-.P
-.BR ustat ()
-is deprecated and has been provided only for compatibility.
-All new programs should use
-.BR statfs (2)
-instead.
-.SS HP-UX notes
-The HP-UX version of the
-.I ustat
-structure has an additional field,
-.IR f_blksize ,
-that is unknown elsewhere.
-HP-UX warns:
-For some filesystems, the number of free inodes does not change.
-Such filesystems will return \-1 in the field
-.IR f_tinode .
-.\" Some software tries to use this in order to test whether the
-.\" underlying filesystem is NFS.
-For some filesystems, inodes are dynamically allocated.
-Such filesystems will return the current number of free inodes.
-.SH SEE ALSO
-.BR stat (2),
-.BR statfs (2)
diff --git a/man2/utime.2 b/man2/utime.2
deleted file mode 100644
index f4858d3e3..000000000
--- a/man2/utime.2
+++ /dev/null
@@ -1,179 +0,0 @@
-.\" Copyright (c) 1992 Drew Eckhardt (drew@cs.colorado.edu), March 28, 1992
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Modified by Michael Haardt <michael@moria.de>
-.\" Modified 1993-07-24 by Rik Faith <faith@cs.unc.edu>
-.\" Modified 1995-06-10 by Andries Brouwer <aeb@cwi.nl>
-.\" Modified 2004-06-23 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Modified 2004-10-10 by Andries Brouwer <aeb@cwi.nl>
-.\"
-.TH utime 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-utime, utimes \- change file last access and modification times
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <utime.h>
-.P
-.BI "int utime(const char *" filename ,
-.BI " const struct utimbuf *_Nullable " times );
-.P
-.B #include <sys/time.h>
-.P
-.BI "int utimes(const char *" filename ,
-.BI " const struct timeval " times "[_Nullable 2]);"
-.fi
-.SH DESCRIPTION
-.B Note:
-modern applications may prefer to use the interfaces described in
-.BR utimensat (2).
-.P
-The
-.BR utime ()
-system call
-changes the access and modification times of the inode specified by
-.I filename
-to the
-.IR actime " and " modtime
-fields of
-.I times
-respectively.
-The status change time (ctime) will be set to the current time, even if the
-other time stamps don't actually change.
-.P
-If
-.I times
-is NULL, then the access and modification times of the file are set
-to the current time.
-.P
-Changing timestamps is permitted when: either
-the process has appropriate privileges,
-or the effective user ID equals the user ID
-of the file, or
-.I times
-is NULL and the process has write permission for the file.
-.P
-The
-.I utimbuf
-structure is:
-.P
-.in +4n
-.EX
-struct utimbuf {
- time_t actime; /* access time */
- time_t modtime; /* modification time */
-};
-.EE
-.in
-.P
-The
-.BR utime ()
-system call
-allows specification of timestamps with a resolution of 1 second.
-.P
-The
-.BR utimes ()
-system call
-is similar, but the
-.I times
-argument refers to an array rather than a structure.
-The elements of this array are
-.I timeval
-structures, which allow a precision of 1 microsecond for specifying timestamps.
-The
-.I timeval
-structure is:
-.P
-.in +4n
-.EX
-struct timeval {
- long tv_sec; /* seconds */
- long tv_usec; /* microseconds */
-};
-.EE
-.in
-.P
-.I times[0]
-specifies the new access time, and
-.I times[1]
-specifies the new modification time.
-If
-.I times
-is NULL, then analogously to
-.BR utime (),
-the access and modification times of the file are
-set to the current time.
-.SH RETURN VALUE
-On success, zero is returned.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EACCES
-Search permission is denied for one of the directories in
-the path prefix of
-.I path
-(see also
-.BR path_resolution (7)).
-.TP
-.B EACCES
-.I times
-is NULL,
-the caller's effective user ID does not match the owner of the file,
-the caller does not have write access to the file,
-and the caller is not privileged
-(Linux: does not have either the
-.B CAP_DAC_OVERRIDE
-or the
-.B CAP_FOWNER
-capability).
-.TP
-.B ENOENT
-.I filename
-does not exist.
-.TP
-.B EPERM
-.I times
-is not NULL,
-the caller's effective UID does not match the owner of the file,
-and the caller is not privileged
-(Linux: does not have the
-.B CAP_FOWNER
-capability).
-.TP
-.B EROFS
-.I path
-resides on a read-only filesystem.
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-.TP
-.BR utime ()
-SVr4, POSIX.1-2001.
-POSIX.1-2008 marks it as obsolete.
-.TP
-.BR utimes ()
-4.3BSD, POSIX.1-2001.
-.SH NOTES
-Linux does not allow changing the timestamps on an immutable file,
-or setting the timestamps to something other than the current time
-on an append-only file.
-.\"
-.\" In libc4 and libc5,
-.\" .BR utimes ()
-.\" is just a wrapper for
-.\" .BR utime ()
-.\" and hence does not allow a subsecond resolution.
-.SH SEE ALSO
-.BR chattr (1),
-.BR touch (1),
-.BR futimesat (2),
-.BR stat (2),
-.BR utimensat (2),
-.BR futimens (3),
-.BR futimes (3),
-.BR inode (7)
diff --git a/man2/utimensat.2 b/man2/utimensat.2
deleted file mode 100644
index d58b4d82e..000000000
--- a/man2/utimensat.2
+++ /dev/null
@@ -1,633 +0,0 @@
-'\" t
-.\" Copyright (C) 2008, Linux Foundation, written by Michael Kerrisk
-.\" <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.TH utimensat 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-utimensat, futimens \- change file timestamps with nanosecond precision
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#include <fcntl.h>" " /* Definition of " AT_* " constants */"
-.B #include <sys/stat.h>
-.P
-.BI "int utimensat(int " dirfd ", const char *" pathname ,
-.BI " const struct timespec " times "[_Nullable 2], int " flags );
-.BI "int futimens(int " fd ", const struct timespec " times "[_Nullable 2]);"
-.fi
-.P
-.RS -4
-Feature Test Macro Requirements for glibc (see
-.BR feature_test_macros (7)):
-.RE
-.P
-.BR utimensat ():
-.nf
- Since glibc 2.10:
- _POSIX_C_SOURCE >= 200809L
- Before glibc 2.10:
- _ATFILE_SOURCE
-.fi
-.P
-.BR futimens ():
-.nf
- Since glibc 2.10:
- _POSIX_C_SOURCE >= 200809L
- Before glibc 2.10:
- _GNU_SOURCE
-.fi
-.SH DESCRIPTION
-.BR utimensat ()
-and
-.BR futimens ()
-update the timestamps of a file with nanosecond precision.
-This contrasts with the historical
-.BR utime (2)
-and
-.BR utimes (2),
-which permit only second and microsecond precision, respectively,
-when setting file timestamps.
-.P
-With
-.BR utimensat ()
-the file is specified via the pathname given in
-.IR pathname .
-With
-.BR futimens ()
-the file whose timestamps are to be updated is specified via
-an open file descriptor,
-.IR fd .
-.P
-For both calls, the new file timestamps are specified in the array
-.IR times :
-.I times[0]
-specifies the new "last access time" (\fIatime\fP);
-.I times[1]
-specifies the new "last modification time" (\fImtime\fP).
-Each of the elements of
-.I times
-specifies a time as the number of seconds and nanoseconds
-since the Epoch, 1970-01-01 00:00:00 +0000 (UTC).
-This information is conveyed in a
-.BR timespec (3)
-structure.
-.P
-Updated file timestamps are set to the greatest value
-supported by the filesystem that is not greater than the specified time.
-.P
-If the
-.I tv_nsec
-field of one of the
-.I timespec
-structures has the special value
-.BR UTIME_NOW ,
-then the corresponding file timestamp is set to the current time.
-If the
-.I tv_nsec
-field of one of the
-.I timespec
-structures has the special value
-.BR UTIME_OMIT ,
-then the corresponding file timestamp is left unchanged.
-In both of these cases, the value of the corresponding
-.I tv_sec
-.\" 2.6.22 was broken: it is not ignored
-field is ignored.
-.P
-If
-.I times
-is NULL, then both timestamps are set to the current time.
-.\"
-.P
-The status change time (ctime) will be set to the current time, even if the
-other time stamps don't actually change.
-.SS Permissions requirements
-To set both file timestamps to the current time (i.e.,
-.I times
-is NULL, or both
-.I tv_nsec
-fields specify
-.BR UTIME_NOW ),
-either:
-.IP \[bu] 3
-the caller must have write access to the file;
-.\" 2.6.22 was broken here -- for futimens() the check is
-.\" based on whether or not the file descriptor is writable,
-.\" not on whether the caller's effective UID has write
-.\" permission for the file referred to by the descriptor.
-.IP \[bu]
-the caller's effective user ID must match the owner of the file; or
-.IP \[bu]
-the caller must have appropriate privileges.
-.P
-To make any change other than setting both timestamps to the
-current time (i.e.,
-.I times
-is not NULL, and neither
-.I tv_nsec
-field is
-.B UTIME_NOW
-.\" 2.6.22 was broken here:
-.\" both must be something other than *either* UTIME_OMIT *or* UTIME_NOW.
-and neither
-.I tv_nsec
-field is
-.BR UTIME_OMIT ),
-either condition 2 or 3 above must apply.
-.P
-If both
-.I tv_nsec
-fields are specified as
-.BR UTIME_OMIT ,
-then no file ownership or permission checks are performed,
-and the file timestamps are not modified,
-but other error conditions may still be detected.
-.\"
-.\"
-.SS utimensat() specifics
-If
-.I pathname
-is relative, then by default it is interpreted relative to the
-directory referred to by the open file descriptor,
-.I dirfd
-(rather than relative to the current working directory of
-the calling process, as is done by
-.BR utimes (2)
-for a relative pathname).
-See
-.BR openat (2)
-for an explanation of why this can be useful.
-.P
-If
-.I pathname
-is relative and
-.I dirfd
-is the special value
-.BR AT_FDCWD ,
-then
-.I pathname
-is interpreted relative to the current working
-directory of the calling process (like
-.BR utimes (2)).
-.P
-If
-.I pathname
-is absolute, then
-.I dirfd
-is ignored.
-.P
-The
-.I flags
-argument is a bit mask created by ORing together zero or more of
-the following values defined in
-.IR <fcntl.h> :
-.TP
-.BR AT_EMPTY_PATH " (since Linux 5.8)"
-If
-.I pathname
-is an empty string, operate on the file referred to by
-.I dirfd
-(which may have been obtained using the
-.BR open (2)
-.B O_PATH
-flag).
-In this case,
-.I dirfd
-can refer to any type of file, not just a directory.
-If
-.I dirfd
-is
-.BR AT_FDCWD ,
-the call operates on the current working directory.
-This flag is Linux-specific; define
-.B _GNU_SOURCE
-to obtain its definition.
-.TP
-.B AT_SYMLINK_NOFOLLOW
-If
-.I pathname
-specifies a symbolic link, then update the timestamps of the link,
-rather than the file to which it refers.
-.SH RETURN VALUE
-On success,
-.BR utimensat ()
-and
-.BR futimens ()
-return 0.
-On error, \-1 is returned and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EACCES
-.I times
-is NULL,
-or both
-.I tv_nsec
-values are
-.BR UTIME_NOW ,
-and the effective user ID of the caller does not match
-the owner of the file,
-the caller does not have write access to the file,
-and the caller is not privileged
-(Linux: does not have either the
-.B CAP_FOWNER
-or the
-.B CAP_DAC_OVERRIDE
-capability).
-.\" But Linux 2.6.22 was broken here.
-.\" Traditionally, utime()/utimes() gives the error EACCES for the case
-.\" where the timestamp pointer argument is NULL (i.e., set both timestamps
-.\" to the current time), and the file is owned by a user other than the
-.\" effective UID of the caller, and the file is not writable by the
-.\" effective UID of the program. utimensat() also gives this error in the
-.\" same case. However, in the same circumstances, when utimensat() is
-.\" given a 'times' array in which both tv_nsec fields are UTIME_NOW, which
-.\" provides equivalent functionality to specifying 'times' as NULL, the
-.\" call succeeds. It should fail with the error EACCES in this case.
-.\"
-.\" POSIX.1-2008 has the following:
-.\" .TP
-.\" .B EACCES
-.\" .RB ( utimensat ())
-.\" .I fd
-.\" was not opened with
-.\" .B O_SEARCH
-.\" and the permissions of the directory to which
-.\" .I fd
-.\" refers do not allow searches.
-.\" EXT2_IMMUTABLE_FL and similar flags for other filesystems.
-.TP
-.B EBADF
-.RB ( futimens ())
-.I fd
-is not a valid file descriptor.
-.TP
-.B EBADF
-.RB ( utimensat ())
-.I pathname
-is relative but
-.I dirfd
-is neither
-.B AT_FDCWD
-nor a valid file descriptor.
-.TP
-.B EFAULT
-.I times
-pointed to an invalid address; or,
-.I dirfd
-was
-.BR AT_FDCWD ,
-and
-.I pathname
-is NULL or an invalid address.
-.TP
-.B EINVAL
-Invalid value in
-.IR flags .
-.TP
-.B EINVAL
-Invalid value in one of the
-.I tv_nsec
-fields (value outside range [0, 999,999,999], and not
-.B UTIME_NOW
-or
-.BR UTIME_OMIT );
-or an invalid value in one of the
-.I tv_sec
-fields.
-.TP
-.B EINVAL
-.\" SUSv4 does not specify this error.
-.I pathname
-is NULL,
-.I dirfd
-is not
-.BR AT_FDCWD ,
-and
-.I flags
-contains
-.BR AT_SYMLINK_NOFOLLOW .
-.TP
-.B ELOOP
-.RB ( utimensat ())
-Too many symbolic links were encountered in resolving
-.IR pathname .
-.TP
-.B ENAMETOOLONG
-.RB ( utimensat ())
-.I pathname
-is too long.
-.TP
-.B ENOENT
-.RB ( utimensat ())
-A component of
-.I pathname
-does not refer to an existing directory or file,
-or
-.I pathname
-is an empty string.
-.TP
-.B ENOTDIR
-.RB ( utimensat ())
-.I pathname
-is a relative pathname, but
-.I dirfd
-is neither
-.B AT_FDCWD
-nor a file descriptor referring to a directory;
-or, one of the prefix components of
-.I pathname
-is not a directory.
-.TP
-.B EPERM
-The caller attempted to change one or both timestamps to a value
-other than the current time,
-or to change one of the timestamps to the current time while
-leaving the other timestamp unchanged,
-(i.e.,
-.I times
-is not NULL, neither
-.I tv_nsec
-field is
-.BR UTIME_NOW ,
-and neither
-.I tv_nsec
-field is
-.BR UTIME_OMIT )
-and either:
-.RS
-.IP \[bu] 3
-the caller's effective user ID does not match the owner of file,
-and the caller is not privileged
-(Linux: does not have the
-.B CAP_FOWNER
-capability); or,
-.IP \[bu]
-.\" Linux 2.6.22 was broken here:
-.\" it was not consistent with the old utimes() implementation,
-.\" since the case when both tv_nsec fields are UTIME_NOW, was not
-.\" treated like the (times == NULL) case.
-the file is marked append-only or immutable (see
-.BR chattr (1)).
-.\" EXT2_IMMUTABLE_FL EXT_APPEND_FL and similar flags for
-.\" other filesystems.
-.\"
-.\" Why the inconsistency (which is described under NOTES) between
-.\" EACCES and EPERM, where only EPERM tests for append-only.
-.\" (This was also so for the older utimes() implementation.)
-.RE
-.TP
-.B EROFS
-The file is on a read-only filesystem.
-.TP
-.B ESRCH
-.RB ( utimensat ())
-Search permission is denied for one of the prefix components of
-.IR pathname .
-.SH ATTRIBUTES
-For an explanation of the terms used in this section, see
-.BR attributes (7).
-.TS
-allbox;
-lbx lb lb
-l l l.
-Interface Attribute Value
-T{
-.na
-.nh
-.BR utimensat (),
-.BR futimens ()
-T} Thread safety MT-Safe
-.TE
-.SH VERSIONS
-.SS C library/kernel ABI differences
-On Linux,
-.BR futimens ()
-is a library function implemented on top of the
-.BR utimensat ()
-system call.
-To support this, the Linux
-.BR utimensat ()
-system call implements a nonstandard feature: if
-.I pathname
-is NULL, then the call modifies the timestamps of
-the file referred to by the file descriptor
-.I dirfd
-(which may refer to any type of file).
-Using this feature, the call
-.I "futimens(fd,\ times)"
-is implemented as:
-.P
-.in +4n
-.EX
-utimensat(fd, NULL, times, 0);
-.EE
-.in
-.P
-Note, however, that the glibc wrapper for
-.BR utimensat ()
-disallows passing NULL as the value for
-.IR pathname :
-the wrapper function returns the error
-.B EINVAL
-in this case.
-.SH STANDARDS
-POSIX.1-2008.
-.SH VERSIONS
-.TP
-.BR utimensat ()
-Linux 2.6.22,
-glibc 2.6.
-POSIX.1-2008.
-.TP
-.BR futimens ()
-glibc 2.6.
-POSIX.1-2008.
-.SH NOTES
-.BR utimensat ()
-obsoletes
-.BR futimesat (2).
-.P
-On Linux, timestamps cannot be changed for a file marked immutable,
-and the only change permitted for files marked append-only is to
-set the timestamps to the current time.
-(This is consistent with the historical behavior of
-.BR utime (2)
-and
-.BR utimes (2)
-on Linux.)
-.P
-If both
-.I tv_nsec
-fields are specified as
-.BR UTIME_OMIT ,
-then the Linux implementation of
-.BR utimensat ()
-succeeds even if the file referred to by
-.I dirfd
-and
-.I pathname
-does not exist.
-.SH BUGS
-Several bugs afflict
-.BR utimensat ()
-and
-.BR futimens ()
-before Linux 2.6.26.
-These bugs are either nonconformances with the POSIX.1 draft specification
-or inconsistencies with historical Linux behavior.
-.IP \[bu] 3
-POSIX.1 specifies that if one of the
-.I tv_nsec
-fields has the value
-.B UTIME_NOW
-or
-.BR UTIME_OMIT ,
-then the value of the corresponding
-.I tv_sec
-field should be ignored.
-Instead, the value of the
-.I tv_sec
-field is required to be 0 (or the error
-.B EINVAL
-results).
-.IP \[bu]
-Various bugs mean that for the purposes of permission checking,
-the case where both
-.I tv_nsec
-fields are set to
-.B UTIME_NOW
-isn't always treated the same as specifying
-.I times
-as NULL,
-and the case where one
-.I tv_nsec
-value is
-.B UTIME_NOW
-and the other is
-.B UTIME_OMIT
-isn't treated the same as specifying
-.I times
-as a pointer to an array of structures containing arbitrary time values.
-As a result, in some cases:
-a) file timestamps can be updated by a process that shouldn't have
-permission to perform updates;
-b) file timestamps can't be updated by a process that should have
-permission to perform updates; and
-c) the wrong
-.I errno
-value is returned in case of an error.
-.\" Below, the long description of the errors from the previous bullet
-.\" point (abridged because it's too much detail for a man page).
-.\" .IP \[bu]
-.\" If one of the
-.\" .I tv_nsec
-.\" fields is
-.\" .BR UTIME_OMIT
-.\" and the other is
-.\" .BR UTIME_NOW ,
-.\" then the error
-.\" .B EPERM
-.\" should occur if the process's effective user ID does not match
-.\" the file owner and the process is not privileged.
-.\" Instead, the call successfully changes one of the timestamps.
-.\" .IP \[bu]
-.\" If file is not writable by the effective user ID of the process and
-.\" the process's effective user ID does not match the file owner and
-.\" the process is not privileged,
-.\" and
-.\" .I times
-.\" is NULL, then the error
-.\" .B EACCES
-.\" results.
-.\" This error should also occur if
-.\" .I times
-.\" points to an array of structures in which both
-.\" .I tv_nsec
-.\" fields are
-.\" .BR UTIME_NOW .
-.\" Instead the call succeeds.
-.\" .IP \[bu]
-.\" If a file is marked as append-only (see
-.\" .BR chattr (1)),
-.\" then Linux traditionally
-.\" (i.e.,
-.\" .BR utime (2),
-.\" .BR utimes (2)),
-.\" permits a NULL
-.\" .I times
-.\" argument to be used in order to update both timestamps to the current time.
-.\" For consistency,
-.\" .BR utimensat ()
-.\" and
-.\" .BR futimens ()
-.\" should also produce the same result when given a
-.\" .I times
-.\" argument that points to an array of structures in which both
-.\" .I tv_nsec
-.\" fields are
-.\" .BR UTIME_NOW .
-.\" Instead, the call fails with the error
-.\" .BR EPERM .
-.\" .IP \[bu]
-.\" If a file is marked as immutable (see
-.\" .BR chattr (1)),
-.\" then Linux traditionally
-.\" (i.e.,
-.\" .BR utime (2),
-.\" .BR utimes (2)),
-.\" gives an
-.\" .B EACCES
-.\" error if
-.\" .I times
-.\" is NULL.
-.\" For consistency,
-.\" .BR utimensat ()
-.\" and
-.\" .BR futimens ()
-.\" should also produce the same result when given a
-.\" .I times
-.\" that points to an array of structures in which both
-.\" .I tv_nsec
-.\" fields are
-.\" .BR UTIME_NOW .
-.\" Instead, the call fails with the error
-.\" .BR EPERM .
-.IP \[bu]
-POSIX.1 says that a process that has \fIwrite access to the file\fP
-can make a call with
-.I times
-as NULL, or with
-.I times
-pointing to an array of structures in which both
-.I tv_nsec
-fields are
-.BR UTIME_NOW ,
-in order to update both timestamps to the current time.
-However,
-.BR futimens ()
-instead checks whether the
-.IR "access mode of the file descriptor allows writing" .
-.\" This means that a process with a file descriptor that allows
-.\" writing could change the timestamps of a file for which it
-.\" does not have write permission;
-.\" conversely, a process with a read-only file descriptor won't
-.\" be able to update the timestamps of a file,
-.\" even if it has write permission on the file.
-.SH SEE ALSO
-.BR chattr (1),
-.BR touch (1),
-.BR futimesat (2),
-.BR openat (2),
-.BR stat (2),
-.BR utimes (2),
-.BR futimes (3),
-.BR timespec (3),
-.BR inode (7),
-.BR path_resolution (7),
-.BR symlink (7)
diff --git a/man2/utimes.2 b/man2/utimes.2
deleted file mode 100644
index 04372d49b..000000000
--- a/man2/utimes.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/utime.2
diff --git a/man2/vfork.2 b/man2/vfork.2
deleted file mode 100644
index 0436b0f42..000000000
--- a/man2/vfork.2
+++ /dev/null
@@ -1,316 +0,0 @@
-.\" Copyright (c) 1999 Andries Brouwer (aeb@cwi.nl), 1 Nov 1999
-.\" and Copyright 2006, 2012, 2017 Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" 1999-11-10: Merged text taken from the page contributed by
-.\" Reed H. Petty (rhp@draper.net)
-.\"
-.TH vfork 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-vfork \- create a child process and block parent
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <unistd.h>
-.P
-.B pid_t vfork(void);
-.fi
-.P
-.RS -4
-Feature Test Macro Requirements for glibc (see
-.BR feature_test_macros (7)):
-.RE
-.P
-.BR vfork ():
-.nf
- Since glibc 2.12:
- (_XOPEN_SOURCE >= 500) && ! (_POSIX_C_SOURCE >= 200809L)
- || /* Since glibc 2.19: */ _DEFAULT_SOURCE
- || /* glibc <= 2.19: */ _BSD_SOURCE
- Before glibc 2.12:
- _BSD_SOURCE || _XOPEN_SOURCE >= 500
-.\" || _XOPEN_SOURCE && _XOPEN_SOURCE_EXTENDED
-.fi
-.SH DESCRIPTION
-.SS Standard description
-(From POSIX.1)
-The
-.BR vfork ()
-function has the same effect as
-.BR fork (2),
-except that the behavior is undefined if the process created by
-.BR vfork ()
-either modifies any data other than a variable of type
-.I pid_t
-used to store the return value from
-.BR vfork (),
-or returns from the function in which
-.BR vfork ()
-was called, or calls any other function before successfully calling
-.BR _exit (2)
-or one of the
-.BR exec (3)
-family of functions.
-.SS Linux description
-.BR vfork (),
-just like
-.BR fork (2),
-creates a child process of the calling process.
-For details and return value and errors, see
-.BR fork (2).
-.P
-.BR vfork ()
-is a special case of
-.BR clone (2).
-It is used to create new processes without copying the page tables of
-the parent process.
-It may be useful in performance-sensitive applications
-where a child is created which then immediately issues an
-.BR execve (2).
-.P
-.BR vfork ()
-differs from
-.BR fork (2)
-in that the calling thread is suspended until the child terminates
-(either normally,
-by calling
-.BR _exit (2),
-or abnormally, after delivery of a fatal signal),
-or it makes a call to
-.BR execve (2).
-Until that point, the child shares all memory with its parent,
-including the stack.
-The child must not return from the current function or call
-.BR exit (3)
-(which would have the effect of calling exit handlers
-established by the parent process and flushing the parent's
-.BR stdio (3)
-buffers), but may call
-.BR _exit (2).
-.P
-As with
-.BR fork (2),
-the child process created by
-.BR vfork ()
-inherits copies of various of the caller's process attributes
-(e.g., file descriptors, signal dispositions, and current working directory);
-the
-.BR vfork ()
-call differs only in the treatment of the virtual address space,
-as described above.
-.P
-Signals sent to the parent
-arrive after the child releases the parent's memory
-(i.e., after the child terminates
-or calls
-.BR execve (2)).
-.SS Historic description
-Under Linux,
-.BR fork (2)
-is implemented using copy-on-write pages, so the only penalty incurred by
-.BR fork (2)
-is the time and memory required to duplicate the parent's page tables,
-and to create a unique task structure for the child.
-However, in the bad old days a
-.BR fork (2)
-would require making a complete copy of the caller's data space,
-often needlessly, since usually immediately afterward an
-.BR exec (3)
-is done.
-Thus, for greater efficiency, BSD introduced the
-.BR vfork ()
-system call, which did not fully copy the address space of
-the parent process, but borrowed the parent's memory and thread
-of control until a call to
-.BR execve (2)
-or an exit occurred.
-The parent process was suspended while the
-child was using its resources.
-The use of
-.BR vfork ()
-was tricky: for example, not modifying data
-in the parent process depended on knowing which variables were
-held in a register.
-.SH VERSIONS
-The requirements put on
-.BR vfork ()
-by the standards are weaker than those put on
-.BR fork (2),
-so an implementation where the two are synonymous is compliant.
-In particular, the programmer cannot rely on the parent
-remaining blocked until the child either terminates or calls
-.BR execve (2),
-and cannot rely on any specific behavior with respect to shared memory.
-.\" In AIXv3.1 vfork is equivalent to fork.
-.P
-Some consider the semantics of
-.BR vfork ()
-to be an architectural blemish, and the 4.2BSD man page stated:
-\[lq]This system call will be eliminated
-when proper system sharing mechanisms are implemented.
-Users should not depend on the memory sharing semantics of
-.I vfork
-as it will, in that case, be made synonymous to
-.IR fork .\[rq]
-However, even though modern memory management hardware
-has decreased the performance difference between
-.BR fork (2)
-and
-.BR vfork (),
-there are various reasons why Linux and other systems have retained
-.BR vfork ():
-.IP \[bu] 3
-Some performance-critical applications require the small performance
-advantage conferred by
-.BR vfork ().
-.IP \[bu]
-.BR vfork ()
-can be implemented on systems that lack a memory-management unit (MMU), but
-.BR fork (2)
-can't be implemented on such systems.
-(POSIX.1-2008 removed
-.BR vfork ()
-from the standard; the POSIX rationale for the
-.BR posix_spawn (3)
-function notes that that function,
-which provides functionality equivalent to
-.BR fork (2)+\c
-.BR exec (3),
-is designed to be implementable on systems that lack an MMU.)
-.\" http://stackoverflow.com/questions/4259629/what-is-the-difference-between-fork-and-vfork
-.\" http://developers.sun.com/solaris/articles/subprocess/subprocess.html
-.\" http://mailman.uclinux.org/pipermail/uclinux-dev/2009-April/000684.html
-.\"
-.IP \[bu]
-On systems where memory is constrained,
-.BR vfork ()
-avoids the need to temporarily commit memory (see the description of
-.I /proc/sys/vm/overcommit_memory
-in
-.BR proc (5))
-in order to execute a new program.
-(This can be especially beneficial where a large parent process wishes
-to execute a small helper program in a child process.)
-By contrast, using
-.BR fork (2)
-in this scenario requires either committing an amount of memory equal
-to the size of the parent process (if strict overcommitting is in force)
-or overcommitting memory with the risk that a process is terminated
-by the out-of-memory (OOM) killer.
-.SS Linux notes
-Fork handlers established using
-.BR pthread_atfork (3)
-are not called when a multithreaded program employing
-the NPTL threading library calls
-.BR vfork ().
-Fork handlers are called in this case in a program using the
-LinuxThreads threading library.
-(See
-.BR pthreads (7)
-for a description of Linux threading libraries.)
-.P
-A call to
-.BR vfork ()
-is equivalent to calling
-.BR clone (2)
-with
-.I flags
-specified as:
-.P
-.in +4n
-.EX
- CLONE_VM | CLONE_VFORK | SIGCHLD
-.EE
-.in
-.SH STANDARDS
-None.
-.SH HISTORY
-4.3BSD; POSIX.1-2001 (but marked OBSOLETE).
-POSIX.1-2008 removes the specification of
-.BR vfork ().
-.P
-The
-.BR vfork ()
-system call appeared in 3.0BSD.
-.\" In the release notes for 4.2BSD Sam Leffler wrote: `vfork: Is still
-.\" present, but definitely on its way out'.
-In 4.4BSD it was made synonymous to
-.BR fork (2)
-but NetBSD introduced it again;
-see
-.UR http://www.netbsd.org\:/Documentation\:/kernel\:/vfork.html
-.UE .
-In Linux, it has been equivalent to
-.BR fork (2)
-until Linux 2.2.0-pre6 or so.
-Since Linux 2.2.0-pre9 (on i386, somewhat later on
-other architectures) it is an independent system call.
-Support was added in glibc 2.0.112.
-.\"
-.SH CAVEATS
-The child process should take care not to modify the memory in unintended ways,
-since such changes will be seen by the parent process once
-the child terminates or executes another program.
-In this regard, signal handlers can be especially problematic:
-if a signal handler that is invoked in the child of
-.BR vfork ()
-changes memory, those changes may result in an inconsistent process state
-from the perspective of the parent process
-(e.g., memory changes would be visible in the parent,
-but changes to the state of open file descriptors would not be visible).
-.P
-When
-.BR vfork ()
-is called in a multithreaded process,
-only the calling thread is suspended until the child terminates
-or executes a new program.
-This means that the child is sharing an address space with other running code.
-This can be dangerous if another thread in the parent process
-changes credentials (using
-.BR setuid (2)
-or similar),
-since there are now two processes with different privilege levels
-running in the same address space.
-As an example of the dangers,
-suppose that a multithreaded program running as root creates a child using
-.BR vfork ().
-After the
-.BR vfork (),
-a thread in the parent process drops the process to an unprivileged user
-in order to run some untrusted code
-(e.g., perhaps via plug-in opened with
-.BR dlopen (3)).
-In this case, attacks are possible where the parent process uses
-.BR mmap (2)
-to map in code that will be executed by the privileged child process.
-.\"
-.SH BUGS
-Details of the signal handling are obscure and differ between systems.
-The BSD man page states:
-"To avoid a possible deadlock situation, processes that are children
-in the middle of a
-.BR vfork ()
-are never sent
-.B SIGTTOU
-or
-.B SIGTTIN
-signals; rather, output or
-.IR ioctl s
-are allowed and input attempts result in an end-of-file indication."
-.\"
-.\" As far as I can tell, the following is not true in Linux 2.6.19:
-.\" Currently (Linux 2.3.25),
-.\" .BR strace (1)
-.\" cannot follow
-.\" .BR vfork ()
-.\" and requires a kernel patch.
-.SH SEE ALSO
-.BR clone (2),
-.BR execve (2),
-.BR _exit (2),
-.BR fork (2),
-.BR unshare (2),
-.BR wait (2)
diff --git a/man2/vhangup.2 b/man2/vhangup.2
deleted file mode 100644
index c0f68f37a..000000000
--- a/man2/vhangup.2
+++ /dev/null
@@ -1,58 +0,0 @@
-.\" Copyright 1993 Rickard E. Faith (faith@cs.unc.edu)
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Modified, 27 May 2004, Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Added notes on capability requirements
-.\"
-.TH vhangup 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-vhangup \- virtually hangup the current terminal
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <unistd.h>
-.P
-.B int vhangup(void);
-.fi
-.P
-.RS -4
-Feature Test Macro Requirements for glibc (see
-.BR feature_test_macros (7)):
-.RE
-.P
-.BR vhangup ():
-.nf
- Since glibc 2.21:
-.\" commit 266865c0e7b79d4196e2cc393693463f03c90bd8
- _DEFAULT_SOURCE
- In glibc 2.19 and 2.20:
- _DEFAULT_SOURCE || (_XOPEN_SOURCE && _XOPEN_SOURCE < 500)
- Up to and including glibc 2.19:
- _BSD_SOURCE || (_XOPEN_SOURCE && _XOPEN_SOURCE < 500)
-.fi
-.SH DESCRIPTION
-.BR vhangup ()
-simulates a hangup on the current terminal.
-This call arranges for other
-users to have a \*(lqclean\*(rq terminal at login time.
-.SH RETURN VALUE
-On success, zero is returned.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EPERM
-The calling process has insufficient privilege to call
-.BR vhangup ();
-the
-.B CAP_SYS_TTY_CONFIG
-capability is required.
-.SH STANDARDS
-Linux.
-.SH SEE ALSO
-.BR init (1),
-.BR capabilities (7)
diff --git a/man2/vm86.2 b/man2/vm86.2
deleted file mode 100644
index 2e0feaf5a..000000000
--- a/man2/vm86.2
+++ /dev/null
@@ -1,58 +0,0 @@
-.\" Copyright 1993 Rickard E. Faith (faith@cs.unc.edu)
-.\" Copyright 1997 Andries E. Brouwer (aeb@cwi.nl)
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.TH vm86 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-vm86old, vm86 \- enter virtual 8086 mode
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/vm86.h>
-.P
-.BI "int vm86old(struct vm86_struct *" info );
-.BI "int vm86(unsigned long " fn ", struct vm86plus_struct *" v86 );
-.fi
-.SH DESCRIPTION
-The system call
-.BR vm86 ()
-was introduced in Linux 0.97p2.
-In Linux 2.1.15 and 2.0.28, it was renamed to
-.BR vm86old (),
-and a new
-.BR vm86 ()
-was introduced.
-The definition of
-.I struct vm86_struct
-was changed
-in 1.1.8 and 1.1.9.
-.P
-These calls cause the process to enter VM86 mode (virtual-8086 in Intel
-literature), and are used by
-.BR dosemu .
-.P
-VM86 mode is an emulation of real mode within a protected mode task.
-.SH RETURN VALUE
-On success, zero is returned.
-On error, \-1 is returned, and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EFAULT
-This return value is specific to i386 and indicates a problem with getting
-user-space data.
-.TP
-.B ENOSYS
-This return value indicates the call is not implemented on the present
-architecture.
-.TP
-.B EPERM
-Saved kernel stack exists.
-(This is a kernel sanity check; the saved
-stack should exist only within vm86 mode itself.)
-.SH STANDARDS
-Linux on 32-bit Intel processors.
diff --git a/man2/vm86old.2 b/man2/vm86old.2
deleted file mode 100644
index bf2581d4c..000000000
--- a/man2/vm86old.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/vm86.2
diff --git a/man2/vmsplice.2 b/man2/vmsplice.2
deleted file mode 100644
index 819465b23..000000000
--- a/man2/vmsplice.2
+++ /dev/null
@@ -1,162 +0,0 @@
-.\" This manpage is Copyright (C) 2006 Jens Axboe
-.\" and Copyright (C) 2006 Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.TH vmsplice 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-vmsplice \- splice user pages to/from a pipe
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.BR "#define _GNU_SOURCE" " /* See feature_test_macros(7) */"
-.B #include <fcntl.h>
-.P
-.BI "ssize_t vmsplice(int " fd ", const struct iovec *" iov ,
-.BI " size_t " nr_segs ", unsigned int " flags );
-.fi
-.\" Return type was long before glibc 2.7
-.SH DESCRIPTION
-.\" Linus: vmsplice() system call to basically do a "write to
-.\" the buffer", but using the reference counting and VM traversal
-.\" to actually fill the buffer. This means that the user needs to
-.\" be careful not to reuse the user-space buffer it spliced into
-.\" the kernel-space one (contrast this to "write()", which copies
-.\" the actual data, and you can thus reuse the buffer immediately
-.\" after a successful write), but that is often easy to do.
-If
-.I fd
-is opened for writing, the
-.BR vmsplice ()
-system call maps
-.I nr_segs
-ranges of user memory described by
-.I iov
-into a pipe.
-If
-.I fd
-is opened for reading,
-.\" Since Linux 2.6.23
-.\" commit 6a14b90bb6bc7cd83e2a444bf457a2ea645cbfe7
-the
-.BR vmsplice ()
-system call fills
-.I nr_segs
-ranges of user memory described by
-.I iov
-from a pipe.
-The file descriptor
-.I fd
-must refer to a pipe.
-.P
-The pointer
-.I iov
-points to an array of
-.I iovec
-structures as described in
-.BR iovec (3type).
-.P
-The
-.I flags
-argument is a bit mask that is composed by ORing together
-zero or more of the following values:
-.TP
-.B SPLICE_F_MOVE
-Unused for
-.BR vmsplice ();
-see
-.BR splice (2).
-.TP
-.B SPLICE_F_NONBLOCK
-.\" Not used for vmsplice
-.\" May be in the future -- therefore EAGAIN
-Do not block on I/O; see
-.BR splice (2)
-for further details.
-.TP
-.B SPLICE_F_MORE
-Currently has no effect for
-.BR vmsplice (),
-but may be implemented in the future; see
-.BR splice (2).
-.TP
-.B SPLICE_F_GIFT
-The user pages are a gift to the kernel.
-The application may not modify this memory ever,
-.\" FIXME . Explain the following line in a little more detail:
-otherwise the page cache and on-disk data may differ.
-Gifting pages to the kernel means that a subsequent
-.BR splice (2)
-.B SPLICE_F_MOVE
-can successfully move the pages;
-if this flag is not specified, then a subsequent
-.BR splice (2)
-.B SPLICE_F_MOVE
-must copy the pages.
-Data must also be properly page aligned, both in memory and length.
-.\" FIXME
-.\" It looks like the page-alignment requirement went away with
-.\" commit bd1a68b59c8e3bce45fb76632c64e1e063c3962d
-.\"
-.\" .... if we expect to later SPLICE_F_MOVE to the cache.
-.SH RETURN VALUE
-Upon successful completion,
-.BR vmsplice ()
-returns the number of bytes transferred to the pipe.
-On error,
-.BR vmsplice ()
-returns \-1 and
-.I errno
-is set to indicate the error.
-.SH ERRORS
-.TP
-.B EAGAIN
-.B SPLICE_F_NONBLOCK
-was specified in
-.IR flags ,
-and the operation would block.
-.TP
-.B EBADF
-.I fd
-either not valid, or doesn't refer to a pipe.
-.TP
-.B EINVAL
-.I nr_segs
-is greater than
-.BR IOV_MAX ;
-or memory not aligned if
-.B SPLICE_F_GIFT
-set.
-.TP
-.B ENOMEM
-Out of memory.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 2.6.17,
-glibc 2.5.
-.SH NOTES
-.BR vmsplice ()
-follows the other vectorized read/write type functions when it comes to
-limitations on the number of segments being passed in.
-This limit is
-.B IOV_MAX
-as defined in
-.IR <limits.h> .
-Currently,
-.\" UIO_MAXIOV in kernel source
-this limit is 1024.
-.P
-.\" commit 6a14b90bb6bc7cd83e2a444bf457a2ea645cbfe7
-.BR vmsplice ()
-really supports true splicing only from user memory to a pipe.
-In the opposite direction, it actually just copies the data to user space.
-But this makes the interface nice and symmetric and enables people to build on
-.BR vmsplice ()
-with room for future improvement in performance.
-.SH SEE ALSO
-.BR splice (2),
-.BR tee (2),
-.BR pipe (7)
diff --git a/man2/vserver.2 b/man2/vserver.2
deleted file mode 100644
index 5d25ea6ce..000000000
--- a/man2/vserver.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/unimplemented.2
diff --git a/man2/wait.2 b/man2/wait.2
deleted file mode 100644
index 2e0e9c254..000000000
--- a/man2/wait.2
+++ /dev/null
@@ -1,720 +0,0 @@
-.\" Copyright (c) 1993 by Thomas Koenig <ig25@rz.uni-karlsruhe.de>
-.\" and Copyright (c) 2004 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Modified Sat Jul 24 13:30:06 1993 by Rik Faith <faith@cs.unc.edu>
-.\" Modified Sun Aug 21 17:42:42 1994 by Rik Faith <faith@cs.unc.edu>
-.\" (Thanks to Koen Holtman <koen@win.tue.nl>)
-.\" Modified Wed May 17 15:54:12 1995 by Rik Faith <faith@cs.unc.edu>
-.\" To remove *'s from status in macros (Thanks to Michael Shields).
-.\" Modified as suggested by Nick Duffek <nsd@bbc.com>, aeb, 960426
-.\" Modified Mon Jun 23 14:09:52 1997 by aeb - add EINTR.
-.\" Modified Thu Nov 26 02:12:45 1998 by aeb - add SIGCHLD stuff.
-.\" Modified Mon Jul 24 21:37:38 2000 by David A. Wheeler
-.\" <dwheeler@dwheeler.com> - noted thread issues.
-.\" Modified 26 Jun 01 by Michael Kerrisk
-.\" Added __WCLONE, __WALL, and __WNOTHREAD descriptions
-.\" Modified 2001-09-25, aeb
-.\" Modified 26 Jun 01 by Michael Kerrisk, <mtk.manpages@gmail.com>
-.\" Updated notes on setting disposition of SIGCHLD to SIG_IGN
-.\" 2004-11-11, mtk
-.\" Added waitid(2); added WCONTINUED and WIFCONTINUED()
-.\" Added text on SA_NOCLDSTOP
-.\" Updated discussion of SA_NOCLDWAIT to reflect 2.6 behavior
-.\" Much other text rewritten
-.\" 2005-05-10, mtk, __W* flags can't be used with waitid()
-.\" 2008-07-04, mtk, removed erroneous text about SA_NOCLDSTOP
-.\"
-.TH wait 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-wait, waitpid, waitid \- wait for process to change state
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/wait.h>
-.P
-.BI "pid_t wait(int *_Nullable " "wstatus" );
-.BI "pid_t waitpid(pid_t " pid ", int *_Nullable " wstatus ", int " options );
-.P
-.BI "int waitid(idtype_t " idtype ", id_t " id \
-", siginfo_t *" infop ", int " options );
- /* This is the glibc and POSIX interface; see
- NOTES for information on the raw system call. */
-.fi
-.P
-.RS -4
-Feature Test Macro Requirements for glibc (see
-.BR feature_test_macros (7)):
-.RE
-.P
-.BR waitid ():
-.nf
- Since glibc 2.26:
- _XOPEN_SOURCE >= 500 || _POSIX_C_SOURCE >= 200809L
-.\" (_XOPEN_SOURCE && _XOPEN_SOURCE_EXTENDED)
- glibc 2.25 and earlier:
- _XOPEN_SOURCE
- || /* Since glibc 2.12: */ _POSIX_C_SOURCE >= 200809L
- || /* glibc <= 2.19: */ _BSD_SOURCE
-.fi
-.SH DESCRIPTION
-All of these system calls are used to wait for state changes
-in a child of the calling process, and obtain information
-about the child whose state has changed.
-A state change is considered to be: the child terminated;
-the child was stopped by a signal; or the child was resumed by a signal.
-In the case of a terminated child, performing a wait allows
-the system to release the resources associated with the child;
-if a wait is not performed, then the terminated child remains in
-a "zombie" state (see NOTES below).
-.P
-If a child has already changed state, then these calls return immediately.
-Otherwise, they block until either a child changes state or
-a signal handler interrupts the call (assuming that system calls
-are not automatically restarted using the
-.B SA_RESTART
-flag of
-.BR sigaction (2)).
-In the remainder of this page, a child whose state has changed
-and which has not yet been waited upon by one of these system
-calls is termed
-.IR waitable .
-.SS wait() and waitpid()
-The
-.BR wait ()
-system call suspends execution of the calling thread until one of its
-children terminates.
-The call
-.I wait(&wstatus)
-is equivalent to:
-.P
-.in +4n
-.EX
-waitpid(\-1, &wstatus, 0);
-.EE
-.in
-.P
-The
-.BR waitpid ()
-system call suspends execution of the calling thread until a
-child specified by
-.I pid
-argument has changed state.
-By default,
-.BR waitpid ()
-waits only for terminated children, but this behavior is modifiable
-via the
-.I options
-argument, as described below.
-.P
-The value of
-.I pid
-can be:
-.TP
-.RB "< " \-1
-meaning wait for any child process whose process group ID is
-equal to the absolute value of
-.IR pid .
-.TP
-.B \-1
-meaning wait for any child process.
-.TP
-.B 0
-meaning wait for any child process whose process group ID is
-equal to that of the calling process at the time of the call to
-.BR waitpid ().
-.TP
-.RB "> " 0
-meaning wait for the child whose process ID is equal to the
-value of
-.IR pid .
-.P
-The value of
-.I options
-is an OR of zero or more of the following constants:
-.TP
-.B WNOHANG
-return immediately if no child has exited.
-.TP
-.B WUNTRACED
-also return if a child has stopped
-(but not traced via
-.BR ptrace (2)).
-Status for
-.I traced
-children which have stopped is provided
-even if this option is not specified.
-.TP
-.BR WCONTINUED " (since Linux 2.6.10)"
-also return if a stopped child has been resumed by delivery of
-.BR SIGCONT .
-.P
-(For Linux-only options, see below.)
-.P
-If
-.I wstatus
-is not NULL,
-.BR wait ()
-and
-.BR waitpid ()
-store status information in the \fIint\fP to which it points.
-This integer can be inspected with the following macros (which
-take the integer itself as an argument, not a pointer to it,
-as is done in
-.BR wait ()
-and
-.BR waitpid ()!):
-.TP
-.BI WIFEXITED( wstatus )
-returns true if the child terminated normally, that is,
-by calling
-.BR exit (3)
-or
-.BR _exit (2),
-or by returning from main().
-.TP
-.BI WEXITSTATUS( wstatus )
-returns the exit status of the child.
-This consists of the least significant 8 bits of the
-.I status
-argument that the child specified in a call to
-.BR exit (3)
-or
-.BR _exit (2)
-or as the argument for a return statement in main().
-This macro should be employed only if
-.B WIFEXITED
-returned true.
-.TP
-.BI WIFSIGNALED( wstatus )
-returns true if the child process was terminated by a signal.
-.TP
-.BI WTERMSIG( wstatus )
-returns the number of the signal that caused the child process to
-terminate.
-This macro should be employed only if
-.B WIFSIGNALED
-returned true.
-.TP
-.BI WCOREDUMP( wstatus )
-returns true if the child produced a core dump (see
-.BR core (5)).
-This macro should be employed only if
-.B WIFSIGNALED
-returned true.
-.IP
-This macro is not specified in POSIX.1-2001 and is not available on
-some UNIX implementations (e.g., AIX, SunOS).
-Therefore, enclose its use inside
-.IR "#ifdef WCOREDUMP ... #endif" .
-.TP
-.BI WIFSTOPPED( wstatus )
-returns true if the child process was stopped by delivery of a signal;
-this is possible only if the call was done using
-.B WUNTRACED
-or when the child is being traced (see
-.BR ptrace (2)).
-.TP
-.BI WSTOPSIG( wstatus )
-returns the number of the signal which caused the child to stop.
-This macro should be employed only if
-.B WIFSTOPPED
-returned true.
-.TP
-.BI WIFCONTINUED( wstatus )
-(since Linux 2.6.10)
-returns true if the child process was resumed by delivery of
-.BR SIGCONT .
-.SS waitid()
-The
-.BR waitid ()
-system call (available since Linux 2.6.9) provides more precise
-control over which child state changes to wait for.
-.P
-The
-.I idtype
-and
-.I id
-arguments select the child(ren) to wait for, as follows:
-.TP
-.IR idtype " == " \fBP_PID\fP
-Wait for the child whose process ID matches
-.IR id .
-.TP
-.IR idtype " == " \fBP_PIDFD\fP " (since Linux 5.4)"
-.\" commit 3695eae5fee0605f316fbaad0b9e3de791d7dfaf
-Wait for the child referred to by the PID file descriptor specified in
-.IR id .
-(See
-.BR pidfd_open (2)
-for further information on PID file descriptors.)
-.TP
-.IR idtype " == " \fBP_PGID\fP
-Wait for any child whose process group ID matches
-.IR id .
-Since Linux 5.4,
-.\" commit 821cc7b0b205c0df64cce59aacc330af251fa8f7
-if
-.I id
-is zero, then wait for any child that is in the same process group
-as the caller's process group at the time of the call.
-.TP
-.IR idtype " == " \fBP_ALL\fP
-Wait for any child;
-.I id
-is ignored.
-.P
-The child state changes to wait for are specified by ORing
-one or more of the following flags in
-.IR options :
-.TP
-.B WEXITED
-Wait for children that have terminated.
-.TP
-.B WSTOPPED
-Wait for children that have been stopped by delivery of a signal.
-.TP
-.B WCONTINUED
-Wait for (previously stopped) children that have been
-resumed by delivery of
-.BR SIGCONT .
-.P
-The following flags may additionally be ORed in
-.IR options :
-.TP
-.B WNOHANG
-As for
-.BR waitpid ().
-.TP
-.B WNOWAIT
-Leave the child in a waitable state; a later wait call
-can be used to again retrieve the child status information.
-.P
-Upon successful return,
-.BR waitid ()
-fills in the following fields of the
-.I siginfo_t
-structure pointed to by
-.IR infop :
-.TP
-\fIsi_pid\fP
-The process ID of the child.
-.TP
-\fIsi_uid\fP
-The real user ID of the child.
-(This field is not set on most other implementations.)
-.TP
-\fIsi_signo\fP
-Always set to
-.BR SIGCHLD .
-.TP
-\fIsi_status\fP
-Either the exit status of the child, as given to
-.BR _exit (2)
-(or
-.BR exit (3)),
-or the signal that caused the child to terminate, stop, or continue.
-The
-.I si_code
-field can be used to determine how to interpret this field.
-.TP
-\fIsi_code\fP
-Set to one of:
-.B CLD_EXITED
-(child called
-.BR _exit (2));
-.B CLD_KILLED
-(child killed by signal);
-.B CLD_DUMPED
-(child killed by signal, and dumped core);
-.B CLD_STOPPED
-(child stopped by signal);
-.B CLD_TRAPPED
-(traced child has trapped); or
-.B CLD_CONTINUED
-(child continued by
-.BR SIGCONT ).
-.P
-If
-.B WNOHANG
-was specified in
-.I options
-and there were no children in a waitable state, then
-.BR waitid ()
-returns 0 immediately and
-the state of the
-.I siginfo_t
-structure pointed to by
-.I infop
-depends on the implementation.
-To (portably) distinguish this case from that where a child was in a
-waitable state, zero out the
-.I si_pid
-field before the call and check for a nonzero value in this field
-after the call returns.
-.P
-POSIX.1-2008 Technical Corrigendum 1 (2013) adds the requirement that when
-.B WNOHANG
-is specified in
-.I options
-and there were no children in a waitable state, then
-.BR waitid ()
-should zero out the
-.I si_pid
-and
-.I si_signo
-fields of the structure.
-On Linux and other implementations that adhere to this requirement,
-it is not necessary to zero out the
-.I si_pid
-field before calling
-.BR waitid ().
-However,
-not all implementations follow the POSIX.1 specification on this point.
-.\" POSIX.1-2001 leaves this possibility unspecified; most
-.\" implementations (including Linux) zero out the structure
-.\" in this case, but at least one implementation (AIX 5.1)
-.\" does not -- MTK Nov 04
-.SH RETURN VALUE
-.BR wait ():
-on success, returns the process ID of the terminated child;
-on failure, \-1 is returned.
-.P
-.BR waitpid ():
-on success, returns the process ID of the child whose state has changed;
-if
-.B WNOHANG
-was specified and one or more child(ren) specified by
-.I pid
-exist, but have not yet changed state, then 0 is returned.
-On failure, \-1 is returned.
-.P
-.BR waitid ():
-returns 0 on success or
-if
-.B WNOHANG
-was specified and no child(ren) specified by
-.I id
-has yet changed state;
-on failure, \-1 is returned.
-.\" FIXME As reported by Vegard Nossum, if infop is NULL, then waitid()
-.\" returns the PID of the child. Either this is a bug, or it is intended
-.\" behavior that needs to be documented. See my Jan 2009 LKML mail
-.\" "waitid() return value strangeness when infop is NULL".
-.P
-On failure, each of these calls sets
-.I errno
-to indicate the error.
-.SH ERRORS
-.TP
-.B EAGAIN
-The PID file descriptor specified in
-.I id
-is nonblocking and the process that it refers to has not terminated.
-.TP
-.B ECHILD
-(for
-.BR wait ())
-The calling process does not have any unwaited-for children.
-.TP
-.B ECHILD
-(for
-.BR waitpid ()
-or
-.BR waitid ())
-The process specified by
-.I pid
-.RB ( waitpid ())
-or
-.I idtype
-and
-.I id
-.RB ( waitid ())
-does not exist or is not a child of the calling process.
-(This can happen for one's own child if the action for
-.B SIGCHLD
-is set to
-.BR SIG_IGN .
-See also the \fILinux Notes\fP section about threads.)
-.TP
-.B EINTR
-.B WNOHANG
-was not set and an unblocked signal or a
-.B SIGCHLD
-was caught; see
-.BR signal (7).
-.TP
-.B EINVAL
-The
-.I options
-argument was invalid.
-.TP
-.B ESRCH
-(for
-.BR wait ()
-or
-.BR waitpid ())
-.I pid
-is equal to
-.BR INT_MIN .
-.SH VERSIONS
-.SS C library/kernel differences
-.BR wait ()
-is actually a library function that (in glibc) is implemented as a call to
-.BR wait4 (2).
-.P
-On some architectures, there is no
-.BR waitpid ()
-system call;
-.\" e.g., i386 has the system call, but not x86-64
-instead, this interface is implemented via a C library
-wrapper function that calls
-.BR wait4 (2).
-.P
-The raw
-.BR waitid ()
-system call takes a fifth argument, of type
-.IR "struct rusage\ *" .
-If this argument is non-NULL,
-then it is used to return resource usage information about the child,
-in the same manner as
-.BR wait4 (2).
-See
-.BR getrusage (2)
-for details.
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-SVr4, 4.3BSD, POSIX.1-2001.
-.SH NOTES
-A child that terminates, but has not been waited for becomes a "zombie".
-The kernel maintains a minimal set of information about the zombie
-process (PID, termination status, resource usage information)
-in order to allow the parent to later perform a wait to obtain
-information about the child.
-As long as a zombie is not removed from the system via a wait,
-it will consume a slot in the kernel process table, and if
-this table fills, it will not be possible to create further processes.
-If a parent process terminates, then its "zombie" children (if any)
-are adopted by
-.BR init (1),
-(or by the nearest "subreaper" process as defined through the use of the
-.BR prctl (2)
-.B PR_SET_CHILD_SUBREAPER
-operation);
-.BR init (1)
-automatically performs a wait to remove the zombies.
-.P
-POSIX.1-2001 specifies that if the disposition of
-.B SIGCHLD
-is set to
-.B SIG_IGN
-or the
-.B SA_NOCLDWAIT
-flag is set for
-.B SIGCHLD
-(see
-.BR sigaction (2)),
-then children that terminate do not become zombies and a call to
-.BR wait ()
-or
-.BR waitpid ()
-will block until all children have terminated, and then fail with
-.I errno
-set to
-.BR ECHILD .
-(The original POSIX standard left the behavior of setting
-.B SIGCHLD
-to
-.B SIG_IGN
-unspecified.
-Note that even though the default disposition of
-.B SIGCHLD
-is "ignore", explicitly setting the disposition to
-.B SIG_IGN
-results in different treatment of zombie process children.)
-.P
-Linux 2.6 conforms to the POSIX requirements.
-However, Linux 2.4 (and earlier) does not:
-if a
-.BR wait ()
-or
-.BR waitpid ()
-call is made while
-.B SIGCHLD
-is being ignored, the call behaves just as though
-.B SIGCHLD
-were not being ignored, that is, the call blocks until the next child
-terminates and then returns the process ID and status of that child.
-.SS Linux notes
-In the Linux kernel, a kernel-scheduled thread is not a distinct
-construct from a process.
-Instead, a thread is simply a process
-that is created using the Linux-unique
-.BR clone (2)
-system call; other routines such as the portable
-.BR pthread_create (3)
-call are implemented using
-.BR clone (2).
-Before Linux 2.4, a thread was just a special case of a process,
-and as a consequence one thread could not wait on the children
-of another thread, even when the latter belongs to the same thread group.
-However, POSIX prescribes such functionality, and since Linux 2.4
-a thread can, and by default will, wait on children of other threads
-in the same thread group.
-.P
-The following Linux-specific
-.I options
-are for use with children created using
-.BR clone (2);
-they can also, since Linux 4.7,
-.\" commit 91c4e8ea8f05916df0c8a6f383508ac7c9e10dba
-be used with
-.BR waitid ():
-.TP
-.B __WCLONE
-.\" since 0.99pl10
-Wait for "clone" children only.
-If omitted, then wait for "non-clone" children only.
-(A "clone" child is one which delivers no signal, or a signal other than
-.B SIGCHLD
-to its parent upon termination.)
-This option is ignored if
-.B __WALL
-is also specified.
-.TP
-.BR __WALL " (since Linux 2.4)"
-.\" since patch-2.3.48
-Wait for all children, regardless of
-type ("clone" or "non-clone").
-.TP
-.BR __WNOTHREAD " (since Linux 2.4)"
-.\" since patch-2.4.0-test8
-Do not wait for children of other threads in
-the same thread group.
-This was the default before Linux 2.4.
-.P
-Since Linux 4.7,
-.\" commit bf959931ddb88c4e4366e96dd22e68fa0db9527c
-.\" prevents cases where an unreapable zombie is created if
-.\" /sbin/init doesn't use __WALL.
-the
-.B __WALL
-flag is automatically implied if the child is being ptraced.
-.SH BUGS
-According to POSIX.1-2008, an application calling
-.BR waitid ()
-must ensure that
-.I infop
-points to a
-.I siginfo_t
-structure (i.e., that it is a non-null pointer).
-On Linux, if
-.I infop
-is NULL,
-.BR waitid ()
-succeeds, and returns the process ID of the waited-for child.
-Applications should avoid relying on this inconsistent,
-nonstandard, and unnecessary feature.
-.SH EXAMPLES
-.\" fork.2 refers to this example program.
-The following program demonstrates the use of
-.BR fork (2)
-and
-.BR waitpid ().
-The program creates a child process.
-If no command-line argument is supplied to the program,
-then the child suspends its execution using
-.BR pause (2),
-to allow the user to send signals to the child.
-Otherwise, if a command-line argument is supplied,
-then the child exits immediately,
-using the integer supplied on the command line as the exit status.
-The parent process executes a loop that monitors the child using
-.BR waitpid (),
-and uses the W*() macros described above to analyze the wait status value.
-.P
-The following shell session demonstrates the use of the program:
-.P
-.in +4n
-.EX
-.RB "$" " ./a.out &"
-Child PID is 32360
-[1] 32359
-.RB "$" " kill \-STOP 32360"
-stopped by signal 19
-.RB "$" " kill \-CONT 32360"
-continued
-.RB "$" " kill \-TERM 32360"
-killed by signal 15
-[1]+ Done ./a.out
-$
-.EE
-.in
-.SS Program source
-\&
-.\" SRC BEGIN (wait.c)
-.EX
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/wait.h>
-#include <unistd.h>
-\&
-int
-main(int argc, char *argv[])
-{
- int wstatus;
- pid_t cpid, w;
-\&
- cpid = fork();
- if (cpid == \-1) {
- perror("fork");
- exit(EXIT_FAILURE);
- }
-\&
- if (cpid == 0) { /* Code executed by child */
- printf("Child PID is %jd\en", (intmax_t) getpid());
- if (argc == 1)
- pause(); /* Wait for signals */
- _exit(atoi(argv[1]));
-\&
- } else { /* Code executed by parent */
- do {
- w = waitpid(cpid, &wstatus, WUNTRACED | WCONTINUED);
- if (w == \-1) {
- perror("waitpid");
- exit(EXIT_FAILURE);
- }
-\&
- if (WIFEXITED(wstatus)) {
- printf("exited, status=%d\en", WEXITSTATUS(wstatus));
- } else if (WIFSIGNALED(wstatus)) {
- printf("killed by signal %d\en", WTERMSIG(wstatus));
- } else if (WIFSTOPPED(wstatus)) {
- printf("stopped by signal %d\en", WSTOPSIG(wstatus));
- } else if (WIFCONTINUED(wstatus)) {
- printf("continued\en");
- }
- } while (!WIFEXITED(wstatus) && !WIFSIGNALED(wstatus));
- exit(EXIT_SUCCESS);
- }
-}
-.EE
-.\" SRC END
-.SH SEE ALSO
-.BR _exit (2),
-.BR clone (2),
-.BR fork (2),
-.BR kill (2),
-.BR ptrace (2),
-.BR sigaction (2),
-.BR signal (2),
-.BR wait4 (2),
-.BR pthread_create (3),
-.BR core (5),
-.BR credentials (7),
-.BR signal (7)
diff --git a/man2/wait3.2 b/man2/wait3.2
deleted file mode 100644
index 097794b11..000000000
--- a/man2/wait3.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/wait4.2
diff --git a/man2/wait4.2 b/man2/wait4.2
deleted file mode 100644
index 27d908997..000000000
--- a/man2/wait4.2
+++ /dev/null
@@ -1,169 +0,0 @@
-.\" Copyright (c) 1993 by Thomas Koenig (ig25@rz.uni-karlsruhe.de)
-.\" and Copyright (c) 2004 by Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Modified Sat Jul 24 13:32:44 1993 by Rik Faith (faith@cs.unc.edu)
-.\" Modified Mon Jun 23 14:09:52 1997 by aeb - add EINTR.
-.\" Modified Tue Jul 7 12:26:42 1998 by aeb - changed return value wait3
-.\" Modified 2004-11-11, Michael Kerrisk <mtk.manpages@gmail.com>
-.\" Rewrote much of this page, and removed much duplicated text,
-.\" replacing with pointers to wait.2
-.\"
-.TH wait4 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-wait3, wait4 \- wait for process to change state, BSD style
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <sys/wait.h>
-.P
-.BI "pid_t wait3(int *_Nullable " "wstatus" ", int " options ,
-.BI " struct rusage *_Nullable " rusage );
-.BI "pid_t wait4(pid_t " pid ", int *_Nullable " wstatus ", int " options ,
-.BI " struct rusage *_Nullable " rusage );
-.fi
-.P
-.RS -4
-Feature Test Macro Requirements for glibc (see
-.BR feature_test_macros (7)):
-.RE
-.P
-.BR wait3 ():
-.nf
- Since glibc 2.26:
- _DEFAULT_SOURCE
- || (_XOPEN_SOURCE >= 500 &&
- ! (_POSIX_C_SOURCE >= 200112L
- || _XOPEN_SOURCE >= 600))
- From glibc 2.19 to glibc 2.25:
- _DEFAULT_SOURCE || _XOPEN_SOURCE >= 500
- glibc 2.19 and earlier:
- _BSD_SOURCE || _XOPEN_SOURCE >= 500
-.\" || _XOPEN_SOURCE && _XOPEN_SOURCE_EXTENDED
-.fi
-.P
-.BR wait4 ():
-.nf
- Since glibc 2.19:
- _DEFAULT_SOURCE
- glibc 2.19 and earlier:
- _BSD_SOURCE
-.fi
-.SH DESCRIPTION
-These functions are nonstandard; in new programs, the use of
-.BR waitpid (2)
-or
-.BR waitid (2)
-is preferable.
-.P
-The
-.BR wait3 ()
-and
-.BR wait4 ()
-system calls are similar to
-.BR waitpid (2),
-but additionally return resource usage information about the
-child in the structure pointed to by
-.IR rusage .
-.P
-Other than the use of the
-.I rusage
-argument, the following
-.BR wait3 ()
-call:
-.P
-.in +4n
-.EX
-wait3(wstatus, options, rusage);
-.EE
-.in
-.P
-is equivalent to:
-.P
-.in +4n
-.EX
-waitpid(\-1, wstatus, options);
-.EE
-.in
-.P
-Similarly, the following
-.BR wait4 ()
-call:
-.P
-.in +4n
-.EX
-wait4(pid, wstatus, options, rusage);
-.EE
-.in
-.P
-is equivalent to:
-.P
-.in +4n
-.EX
-waitpid(pid, wstatus, options);
-.EE
-.in
-.P
-In other words,
-.BR wait3 ()
-waits of any child, while
-.BR wait4 ()
-can be used to select a specific child, or children, on which to wait.
-See
-.BR wait (2)
-for further details.
-.P
-If
-.I rusage
-is not NULL, the
-.I struct rusage
-to which it points will be filled with accounting information
-about the child.
-See
-.BR getrusage (2)
-for details.
-.SH RETURN VALUE
-As for
-.BR waitpid (2).
-.SH ERRORS
-As for
-.BR waitpid (2).
-.SH STANDARDS
-None.
-.SH HISTORY
-4.3BSD.
-.P
-SUSv1 included a specification of
-.BR wait3 ();
-SUSv2 included
-.BR wait3 (),
-but marked it LEGACY;
-SUSv3 removed it.
-.P
-Including
-.I <sys/time.h>
-is not required these days, but increases portability.
-(Indeed,
-.I <sys/resource.h>
-defines the
-.I rusage
-structure with fields of type
-.I struct timeval
-defined in
-.IR <sys/time.h> .)
-.SS C library/kernel differences
-On Linux,
-.BR wait3 ()
-is a library function implemented on top of the
-.BR wait4 ()
-system call.
-.SH SEE ALSO
-.BR fork (2),
-.BR getrusage (2),
-.BR sigaction (2),
-.BR signal (2),
-.BR wait (2),
-.BR signal (7)
diff --git a/man2/waitid.2 b/man2/waitid.2
deleted file mode 100644
index 0605b3543..000000000
--- a/man2/waitid.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/wait.2
diff --git a/man2/waitpid.2 b/man2/waitpid.2
deleted file mode 100644
index 0605b3543..000000000
--- a/man2/waitpid.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/wait.2
diff --git a/man2/write.2 b/man2/write.2
deleted file mode 100644
index a24cbdc31..000000000
--- a/man2/write.2
+++ /dev/null
@@ -1,329 +0,0 @@
-.\" This manpage is Copyright (C) 1992 Drew Eckhardt;
-.\" and Copyright (C) 1993 Michael Haardt, Ian Jackson.
-.\" and Copyright (C) 2007 Michael Kerrisk <mtk.manpages@gmail.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.\" Modified Sat Jul 24 13:35:59 1993 by Rik Faith <faith@cs.unc.edu>
-.\" Modified Sun Nov 28 17:19:01 1993 by Rik Faith <faith@cs.unc.edu>
-.\" Modified Sat Jan 13 12:58:08 1996 by Michael Haardt
-.\" <michael@cantor.informatik.rwth-aachen.de>
-.\" Modified Sun Jul 21 18:59:33 1996 by Andries Brouwer <aeb@cwi.nl>
-.\" 2001-12-13 added remark by Zack Weinberg
-.\" 2007-06-18 mtk:
-.\" Added details about seekable files and file offset.
-.\" Noted that write() may write less than 'count' bytes, and
-.\" gave some examples of why this might occur.
-.\" Noted what happens if write() is interrupted by a signal.
-.\"
-.TH write 2 (date) "Linux man-pages (unreleased)"
-.SH NAME
-write \- write to a file descriptor
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.B #include <unistd.h>
-.P
-.BI "ssize_t write(int " fd ", const void " buf [. count "], size_t " count );
-.fi
-.SH DESCRIPTION
-.BR write ()
-writes up to
-.I count
-bytes from the buffer starting at
-.I buf
-to the file referred to by the file descriptor
-.IR fd .
-.P
-The number of bytes written may be less than
-.I count
-if, for example,
-there is insufficient space on the underlying physical medium, or the
-.B RLIMIT_FSIZE
-resource limit is encountered (see
-.BR setrlimit (2)),
-or the call was interrupted by a signal
-handler after having written less than
-.I count
-bytes.
-(See also
-.BR pipe (7).)
-.P
-For a seekable file (i.e., one to which
-.BR lseek (2)
-may be applied, for example, a regular file)
-writing takes place at the file offset,
-and the file offset is incremented by
-the number of bytes actually written.
-If the file was
-.BR open (2)ed
-with
-.BR O_APPEND ,
-the file offset is first set to the end of the file before writing.
-The adjustment of the file offset and the write operation
-are performed as an atomic step.
-.P
-POSIX requires that a
-.BR read (2)
-that can be proved to occur after a
-.BR write ()
-has returned will return the new data.
-Note that not all filesystems are POSIX conforming.
-.P
-According to POSIX.1, if
-.I count
-is greater than
-.BR SSIZE_MAX ,
-the result is implementation-defined;
-see NOTES for the upper limit on Linux.
-.SH RETURN VALUE
-On success, the number of bytes written is returned.
-On error, \-1 is returned, and \fIerrno\fP is set
-to indicate the error.
-.P
-Note that a successful
-.BR write ()
-may transfer fewer than
-.I count
-bytes.
-Such partial writes can occur for various reasons;
-for example, because there was insufficient space on the disk device
-to write all of the requested bytes, or because a blocked
-.BR write ()
-to a socket, pipe, or similar was interrupted by a signal handler
-after it had transferred some, but before it had transferred all
-of the requested bytes.
-In the event of a partial write, the caller can make another
-.BR write ()
-call to transfer the remaining bytes.
-The subsequent call will either transfer further bytes or
-may result in an error (e.g., if the disk is now full).
-.P
-If \fIcount\fP is zero and
-.I fd
-refers to a regular file, then
-.BR write ()
-may return a failure status if one of the errors below is detected.
-If no errors are detected, or error detection is not performed,
-0 is returned without causing any other effect.
-If
-\fIcount\fP is zero and
-.I fd
-refers to a file other than a regular file,
-the results are not specified.
-.SH ERRORS
-.TP
-.B EAGAIN
-The file descriptor
-.I fd
-refers to a file other than a socket and has been marked nonblocking
-.RB ( O_NONBLOCK ),
-and the write would block.
-See
-.BR open (2)
-for further details on the
-.B O_NONBLOCK
-flag.
-.TP
-.BR EAGAIN " or " EWOULDBLOCK
-.\" Actually EAGAIN on Linux
-The file descriptor
-.I fd
-refers to a socket and has been marked nonblocking
-.RB ( O_NONBLOCK ),
-and the write would block.
-POSIX.1-2001 allows either error to be returned for this case,
-and does not require these constants to have the same value,
-so a portable application should check for both possibilities.
-.TP
-.B EBADF
-.I fd
-is not a valid file descriptor or is not open for writing.
-.TP
-.B EDESTADDRREQ
-.I fd
-refers to a datagram socket for which a peer address has not been set using
-.BR connect (2).
-.TP
-.B EDQUOT
-The user's quota of disk blocks on the filesystem containing the file
-referred to by
-.I fd
-has been exhausted.
-.TP
-.B EFAULT
-.I buf
-is outside your accessible address space.
-.TP
-.B EFBIG
-An attempt was made to write a file that exceeds the implementation-defined
-maximum file size or the process's file size limit,
-or to write at a position past the maximum allowed offset.
-.TP
-.B EINTR
-The call was interrupted by a signal before any data was written; see
-.BR signal (7).
-.TP
-.B EINVAL
-.I fd
-is attached to an object which is unsuitable for writing;
-or the file was opened with the
-.B O_DIRECT
-flag, and either the address specified in
-.IR buf ,
-the value specified in
-.IR count ,
-or the file offset is not suitably aligned.
-.TP
-.B EIO
-A low-level I/O error occurred while modifying the inode.
-This error may relate to the write-back of data written by an earlier
-.BR write (),
-which may have been issued to a different file descriptor on
-the same file.
-Since Linux 4.13, errors from write-back come
-with a promise that they
-.I may
-be reported by subsequent.
-.BR write ()
-requests, and
-.I will
-be reported by a subsequent
-.BR fsync (2)
-(whether or not they were also reported by
-.BR write ()).
-.\" commit 088737f44bbf6378745f5b57b035e57ee3dc4750
-An alternate cause of
-.B EIO
-on networked filesystems is when an advisory lock had been taken out
-on the file descriptor and this lock has been lost.
-See the
-.I "Lost locks"
-section of
-.BR fcntl (2)
-for further details.
-.TP
-.B ENOSPC
-The device containing the file referred to by
-.I fd
-has no room for the data.
-.TP
-.B EPERM
-The operation was prevented by a file seal; see
-.BR fcntl (2).
-.TP
-.B EPIPE
-.I fd
-is connected to a pipe or socket whose reading end is closed.
-When this happens the writing process will also receive a
-.B SIGPIPE
-signal.
-(Thus, the write return value is seen only if the program
-catches, blocks or ignores this signal.)
-.P
-Other errors may occur, depending on the object connected to
-.IR fd .
-.SH STANDARDS
-POSIX.1-2008.
-.SH HISTORY
-SVr4, 4.3BSD, POSIX.1-2001.
-.\" SVr4 documents additional error
-.\" conditions EDEADLK, ENOLCK, ENOLNK, ENOSR, ENXIO, or ERANGE.
-.P
-Under SVr4 a write may be interrupted and return
-.B EINTR
-at any point,
-not just before any data is written.
-.SH NOTES
-A successful return from
-.BR write ()
-does not make any guarantee that data has been committed to disk.
-On some filesystems, including NFS, it does not even guarantee
-that space has successfully been reserved for the data.
-In this case,
-some errors might be delayed until a future
-.BR write (),
-.BR fsync (2),
-or even
-.BR close (2).
-The only way to be sure is to call
-.BR fsync (2)
-after you are done writing all your data.
-.P
-If a
-.BR write ()
-is interrupted by a signal handler before any bytes are written,
-then the call fails with the error
-.BR EINTR ;
-if it is interrupted after at least one byte has been written,
-the call succeeds, and returns the number of bytes written.
-.P
-On Linux,
-.BR write ()
-(and similar system calls) will transfer at most
-0x7ffff000 (2,147,479,552) bytes,
-returning the number of bytes actually transferred.
-.\" commit e28cc71572da38a5a12c1cfe4d7032017adccf69
-(This is true on both 32-bit and 64-bit systems.)
-.P
-An error return value while performing
-.BR write ()
-using direct I/O does not mean the
-entire write has failed.
-Partial data may be written
-and the data at the file offset on which the
-.BR write ()
-was attempted should be considered inconsistent.
-.SH BUGS
-According to POSIX.1-2008/SUSv4 Section XSI 2.9.7
-("Thread Interactions with Regular File Operations"):
-.P
-.RS 4
-All of the following functions shall be atomic with respect to
-each other in the effects specified in POSIX.1-2008 when they
-operate on regular files or symbolic links: ...
-.RE
-.P
-Among the APIs subsequently listed are
-.BR write ()
-and
-.BR writev (2).
-And among the effects that should be atomic across threads (and processes)
-are updates of the file offset.
-However, before Linux 3.14,
-this was not the case: if two processes that share
-an open file description (see
-.BR open (2))
-perform a
-.BR write ()
-(or
-.BR writev (2))
-at the same time, then the I/O operations were not atomic
-with respect to updating the file offset,
-with the result that the blocks of data output by the two processes
-might (incorrectly) overlap.
-This problem was fixed in Linux 3.14.
-.\" http://thread.gmane.org/gmane.linux.kernel/1649458
-.\" From: Michael Kerrisk (man-pages <mtk.manpages <at> gmail.com>
-.\" Subject: Update of file offset on write() etc. is non-atomic with I/O
-.\" Date: 2014-02-17 15:41:37 GMT
-.\" Newsgroups: gmane.linux.kernel, gmane.linux.file-systems
-.\" commit 9c225f2655e36a470c4f58dbbc99244c5fc7f2d4
-.\" Author: Linus Torvalds <torvalds@linux-foundation.org>
-.\" Date: Mon Mar 3 09:36:58 2014 -0800
-.\"
-.\" vfs: atomic f_pos accesses as per POSIX
-.SH SEE ALSO
-.BR close (2),
-.BR fcntl (2),
-.BR fsync (2),
-.BR ioctl (2),
-.BR lseek (2),
-.BR open (2),
-.BR pwrite (2),
-.BR read (2),
-.BR select (2),
-.BR writev (2),
-.BR fwrite (3)
diff --git a/man2/writev.2 b/man2/writev.2
deleted file mode 100644
index 54e3384ef..000000000
--- a/man2/writev.2
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/readv.2