summaryrefslogtreecommitdiffstats
path: root/man/man7/netlink.7
diff options
context:
space:
mode:
Diffstat (limited to 'man/man7/netlink.7')
-rw-r--r--man/man7/netlink.7611
1 files changed, 611 insertions, 0 deletions
diff --git a/man/man7/netlink.7 b/man/man7/netlink.7
new file mode 100644
index 000000000..4198bf157
--- /dev/null
+++ b/man/man7/netlink.7
@@ -0,0 +1,611 @@
+'\" t
+.\" This man page is Copyright (c) 1998 by Andi Kleen.
+.\"
+.\" SPDX-License-Identifier: GPL-1.0-or-later
+.\"
+.\" Based on the original comments from Alexey Kuznetsov
+.\" Modified 2005-12-27 by Hasso Tepper <hasso@estpak.ee>
+.\" $Id: netlink.7,v 1.8 2000/06/22 13:23:00 ak Exp $
+.TH netlink 7 (date) "Linux man-pages (unreleased)"
+.SH NAME
+netlink \- communication between kernel and user space (AF_NETLINK)
+.SH SYNOPSIS
+.nf
+.B #include <asm/types.h>
+.B #include <sys/socket.h>
+.B #include <linux/netlink.h>
+.P
+.BI "netlink_socket = socket(AF_NETLINK, " socket_type ", " netlink_family );
+.fi
+.SH DESCRIPTION
+Netlink is used to transfer information between the kernel and
+user-space processes.
+It consists of a standard sockets-based interface for user space
+processes and an internal kernel API for kernel modules.
+The internal kernel interface is not documented in this manual page.
+There is also an obsolete netlink interface
+via netlink character devices; this interface is not documented here
+and is provided only for backward compatibility.
+.P
+Netlink is a datagram-oriented service.
+Both
+.B SOCK_RAW
+and
+.B SOCK_DGRAM
+are valid values for
+.IR socket_type .
+However, the netlink protocol does not distinguish between datagram
+and raw sockets.
+.P
+.I netlink_family
+selects the kernel module or netlink group to communicate with.
+The currently assigned netlink families are:
+.TP
+.B NETLINK_ROUTE
+Receives routing and link updates and may be used to modify the routing
+tables (both IPv4 and IPv6), IP addresses, link parameters,
+neighbor setups, queueing disciplines, traffic classes, and
+packet classifiers (see
+.BR rtnetlink (7)).
+.TP
+.BR NETLINK_W1 " (Linux 2.6.13 to Linux 2.16.17)"
+Messages from 1-wire subsystem.
+.TP
+.B NETLINK_USERSOCK
+Reserved for user-mode socket protocols.
+.TP
+.BR NETLINK_FIREWALL " (up to and including Linux 3.4)"
+.\" removed by commit d16cf20e2f2f13411eece7f7fb72c17d141c4a84
+Transport IPv4 packets from netfilter to user space.
+Used by
+.I ip_queue
+kernel module.
+After a long period of being declared obsolete (in favor of the more advanced
+.I nfnetlink_queue
+feature),
+.B NETLINK_FIREWALL
+was removed in Linux 3.5.
+.TP
+.BR NETLINK_SOCK_DIAG " (since Linux 3.3)"
+.\" commit 7f1fb60c4fc9fb29fbb406ac8c4cfb4e59e168d6
+Query information about sockets of various protocol families from the kernel
+(see
+.BR sock_diag (7)).
+.TP
+.BR NETLINK_INET_DIAG " (since Linux 2.6.14)"
+An obsolete synonym for
+.BR NETLINK_SOCK_DIAG .
+.TP
+.BR NETLINK_NFLOG " (up to and including Linux 3.16)"
+Netfilter/iptables ULOG.
+.TP
+.B NETLINK_XFRM
+.\" FIXME More details on NETLINK_XFRM needed.
+IPsec.
+.TP
+.BR NETLINK_SELINUX " (since Linux 2.6.4)"
+SELinux event notifications.
+.TP
+.BR NETLINK_ISCSI " (since Linux 2.6.15)"
+.\" FIXME More details on NETLINK_ISCSI needed.
+Open-iSCSI.
+.TP
+.BR NETLINK_AUDIT " (since Linux 2.6.6)"
+.\" FIXME More details on NETLINK_AUDIT needed.
+Auditing.
+.TP
+.BR NETLINK_FIB_LOOKUP " (since Linux 2.6.13)"
+.\" FIXME More details on NETLINK_FIB_LOOKUP needed.
+Access to FIB lookup from user space.
+.TP
+.BR NETLINK_CONNECTOR " (since Linux 2.6.14)"
+Kernel connector.
+See
+.I Documentation/driver\-api/connector.rst
+(or
+.I /Documentation/connector/connector.*
+.\" commit baa293e9544bea71361950d071579f0e4d5713ed
+in Linux 5.2 and earlier)
+in the Linux kernel source tree for further information.
+.TP
+.BR NETLINK_NETFILTER " (since Linux 2.6.14)"
+.\" FIXME More details on NETLINK_NETFILTER needed.
+Netfilter subsystem.
+.TP
+.BR NETLINK_SCSITRANSPORT " (since Linux 2.6.19)"
+.\" commit 84314fd4740ad73550c76dee4a9578979d84af48
+.\" FIXME More details on NETLINK_SCSITRANSPORT needed.
+SCSI Transports.
+.TP
+.BR NETLINK_RDMA " (since Linux 3.0)"
+.\" commit b2cbae2c248776d81cc265ff7d48405b6a4cc463
+.\" FIXME More details on NETLINK_RDMA needed.
+Infiniband RDMA.
+.TP
+.BR NETLINK_IP6_FW " (up to and including Linux 3.4)"
+Transport IPv6 packets from netfilter to user space.
+Used by
+.I ip6_queue
+kernel module.
+.TP
+.B NETLINK_DNRTMSG
+DECnet routing messages.
+.TP
+.BR NETLINK_KOBJECT_UEVENT " (since Linux 2.6.10)"
+.\" FIXME More details on NETLINK_KOBJECT_UEVENT needed.
+Kernel messages to user space.
+.TP
+.BR NETLINK_GENERIC " (since Linux 2.6.15)"
+Generic netlink family for simplified netlink usage.
+.TP
+.BR NETLINK_CRYPTO " (since Linux 3.2)"
+.\" commit a38f7907b926e4c6c7d389ad96cc38cec2e5a9e9
+.\" Author: Steffen Klassert <steffen.klassert@secunet.com>
+Netlink interface to request information about ciphers registered
+with the kernel crypto API as well as allow configuration of the
+kernel crypto API.
+.P
+Netlink messages consist of a byte stream with one or multiple
+.I nlmsghdr
+headers and associated payload.
+The byte stream should be accessed only with the standard
+.B NLMSG_*
+macros.
+See
+.BR netlink (3)
+for further information.
+.P
+In multipart messages (multiple
+.I nlmsghdr
+headers with associated payload in one byte stream) the first and all
+following headers have the
+.B NLM_F_MULTI
+flag set, except for the last header which has the type
+.BR NLMSG_DONE .
+.P
+After each
+.I nlmsghdr
+the payload follows.
+.P
+.in +4n
+.EX
+struct nlmsghdr {
+ __u32 nlmsg_len; /* Length of message including header */
+ __u16 nlmsg_type; /* Type of message content */
+ __u16 nlmsg_flags; /* Additional flags */
+ __u32 nlmsg_seq; /* Sequence number */
+ __u32 nlmsg_pid; /* Sender port ID */
+};
+.EE
+.in
+.P
+.I nlmsg_type
+can be one of the standard message types:
+.B NLMSG_NOOP
+message is to be ignored,
+.B NLMSG_ERROR
+message signals an error and the payload contains an
+.I nlmsgerr
+structure,
+.B NLMSG_DONE
+message terminates a multipart message.
+Error messages get the
+original request appended, unless the user requests to cap the
+error message, and get extra error data if requested.
+.P
+.in +4n
+.EX
+struct nlmsgerr {
+ int error; /* Negative errno or 0 for acknowledgements */
+ struct nlmsghdr msg; /* Message header that caused the error */
+ /*
+ * followed by the message contents
+ * unless NETLINK_CAP_ACK was set
+ * or the ACK indicates success (error == 0).
+ * For example Generic Netlink message with attributes.
+ * message length is aligned with NLMSG_ALIGN()
+ */
+ /*
+ * followed by TLVs defined in enum nlmsgerr_attrs
+ * if NETLINK_EXT_ACK was set
+ */
+};
+.EE
+.in
+.P
+A netlink family usually specifies more message types, see the
+appropriate manual pages for that, for example,
+.BR rtnetlink (7)
+for
+.BR NETLINK_ROUTE .
+.TS
+tab(:);
+l s
+lB lx.
+Standard flag bits in \fInlmsg_flags\fP
+_
+NLM_F_REQUEST:T{
+Must be set on all request messages.
+T}
+NLM_F_MULTI:T{
+The message is part of a multipart message terminated by
+.BR NLMSG_DONE .
+T}
+NLM_F_ACK:T{
+Request for an acknowledgement on success.
+T}
+NLM_F_ECHO:T{
+Echo this request.
+T}
+.TE
+.\" No right adjustment for text blocks in tables
+.TS
+tab(:);
+l s
+lB lx.
+Additional flag bits for GET requests
+_
+NLM_F_ROOT:T{
+Return the complete table instead of a single entry.
+T}
+NLM_F_MATCH:T{
+Return all entries matching criteria passed in message content.
+Not implemented yet.
+T}
+NLM_F_ATOMIC:T{
+Return an atomic snapshot of the table.
+T}
+NLM_F_DUMP:T{
+Convenience macro; equivalent to
+(NLM_F_ROOT|NLM_F_MATCH).
+T}
+.TE
+.\" FIXME NLM_F_ATOMIC is not used anymore?
+.P
+Note that
+.B NLM_F_ATOMIC
+requires the
+.B CAP_NET_ADMIN
+capability or an effective UID of 0.
+.TS
+tab(:);
+l s
+lB lx.
+Additional flag bits for NEW requests
+_
+NLM_F_REPLACE:T{
+Replace existing matching object.
+T}
+NLM_F_EXCL:T{
+Don't replace if the object already exists.
+T}
+NLM_F_CREATE:T{
+Create object if it doesn't already exist.
+T}
+NLM_F_APPEND:T{
+Add to the end of the object list.
+T}
+.TE
+.P
+.I nlmsg_seq
+and
+.I nlmsg_pid
+are used to track messages.
+.I nlmsg_pid
+shows the origin of the message.
+Note that there isn't a 1:1 relationship between
+.I nlmsg_pid
+and the PID of the process if the message originated from a netlink
+socket.
+See the
+.B ADDRESS FORMATS
+section for further information.
+.P
+Both
+.I nlmsg_seq
+and
+.I nlmsg_pid
+.\" FIXME Explain more about nlmsg_seq and nlmsg_pid.
+are opaque to netlink core.
+.P
+Netlink is not a reliable protocol.
+It tries its best to deliver a message to its destination(s),
+but may drop messages when an out-of-memory condition or
+other error occurs.
+For reliable transfer the sender can request an
+acknowledgement from the receiver by setting the
+.B NLM_F_ACK
+flag.
+An acknowledgement is an
+.B NLMSG_ERROR
+packet with the error field set to 0.
+The application must generate acknowledgements for
+received messages itself.
+The kernel tries to send an
+.B NLMSG_ERROR
+message for every failed packet.
+A user process should follow this convention too.
+.P
+However, reliable transmissions from kernel to user are impossible
+in any case.
+The kernel can't send a netlink message if the socket buffer is full:
+the message will be dropped and the kernel and the user-space process will
+no longer have the same view of kernel state.
+It is up to the application to detect when this happens (via the
+.B ENOBUFS
+error returned by
+.BR recvmsg (2))
+and resynchronize.
+.SS Address formats
+The
+.I sockaddr_nl
+structure describes a netlink client in user space or in the kernel.
+A
+.I sockaddr_nl
+can be either unicast (only sent to one peer) or sent to
+netlink multicast groups
+.RI ( nl_groups
+not equal 0).
+.P
+.in +4n
+.EX
+struct sockaddr_nl {
+ sa_family_t nl_family; /* AF_NETLINK */
+ unsigned short nl_pad; /* Zero */
+ pid_t nl_pid; /* Port ID */
+ __u32 nl_groups; /* Multicast groups mask */
+};
+.EE
+.in
+.P
+.I nl_pid
+is the unicast address of netlink socket.
+It's always 0 if the destination is in the kernel.
+For a user-space process,
+.I nl_pid
+is usually the PID of the process owning the destination socket.
+However,
+.I nl_pid
+identifies a netlink socket, not a process.
+If a process owns several netlink
+sockets, then
+.I nl_pid
+can be equal to the process ID only for at most one socket.
+There are two ways to assign
+.I nl_pid
+to a netlink socket.
+If the application sets
+.I nl_pid
+before calling
+.BR bind (2),
+then it is up to the application to make sure that
+.I nl_pid
+is unique.
+If the application sets it to 0, the kernel takes care of assigning it.
+The kernel assigns the process ID to the first netlink socket the process
+opens and assigns a unique
+.I nl_pid
+to every netlink socket that the process subsequently creates.
+.P
+.I nl_groups
+is a bit mask with every bit representing a netlink group number.
+Each netlink family has a set of 32 multicast groups.
+When
+.BR bind (2)
+is called on the socket, the
+.I nl_groups
+field in the
+.I sockaddr_nl
+should be set to a bit mask of the groups which it wishes to listen to.
+The default value for this field is zero which means that no multicasts
+will be received.
+A socket may multicast messages to any of the multicast groups by setting
+.I nl_groups
+to a bit mask of the groups it wishes to send to when it calls
+.BR sendmsg (2)
+or does a
+.BR connect (2).
+Only processes with an effective UID of 0 or the
+.B CAP_NET_ADMIN
+capability may send or listen to a netlink multicast group.
+Since Linux 2.6.13,
+.\" commit d629b836d151d43332492651dd841d32e57ebe3b
+messages can't be broadcast to multiple groups.
+Any replies to a message received for a multicast group should be
+sent back to the sending PID and the multicast group.
+Some Linux kernel subsystems may additionally allow other users
+to send and/or receive messages.
+As at Linux 3.0, the
+.BR NETLINK_KOBJECT_UEVENT ,
+.BR NETLINK_GENERIC ,
+.BR NETLINK_ROUTE ,
+and
+.B NETLINK_SELINUX
+groups allow other users to receive messages.
+No groups allow other users to send messages.
+.SS Socket options
+To set or get a netlink socket option, call
+.BR getsockopt (2)
+to read or
+.BR setsockopt (2)
+to write the option with the option level argument set to
+.BR SOL_NETLINK .
+Unless otherwise noted,
+.I optval
+is a pointer to an
+.IR int .
+.TP
+.BR NETLINK_PKTINFO " (since Linux 2.6.14)"
+.\" commit 9a4595bc7e67962f13232ee55a64e063062c3a99
+.\" Author: Patrick McHardy <kaber@trash.net>
+Enable
+.B nl_pktinfo
+control messages for received packets to get the extended
+destination group number.
+.TP
+.B NETLINK_ADD_MEMBERSHIP
+.TQ
+.BR NETLINK_DROP_MEMBERSHIP " (since Linux 2.6.14)"
+.\" commit 9a4595bc7e67962f13232ee55a64e063062c3a99
+.\" Author: Patrick McHardy <kaber@trash.net>
+Join/leave a group specified by
+.IR optval .
+.TP
+.BR NETLINK_LIST_MEMBERSHIPS " (since Linux 4.2)"
+.\" commit b42be38b2778eda2237fc759e55e3b698b05b315
+.\" Author: David Herrmann <dh.herrmann@gmail.com>
+Retrieve all groups a socket is a member of.
+.I optval
+is a pointer to
+.B __u32
+and
+.I optlen
+is the size of the array.
+The array is filled with the full membership set of the
+socket, and the required array size is returned in
+.IR optlen .
+.TP
+.BR NETLINK_BROADCAST_ERROR " (since Linux 2.6.30)"
+.\" commit be0c22a46cfb79ab2342bb28fde99afa94ef868e
+.\" Author: Pablo Neira Ayuso <pablo@netfilter.org>
+When not set,
+.B netlink_broadcast()
+only reports
+.B ESRCH
+errors and silently ignore
+.B ENOBUFS
+errors.
+.TP
+.BR NETLINK_NO_ENOBUFS " (since Linux 2.6.30)"
+.\" commit 38938bfe3489394e2eed5e40c9bb8f66a2ce1405
+.\" Author: Pablo Neira Ayuso <pablo@netfilter.org>
+This flag can be used by unicast and broadcast listeners to avoid receiving
+.B ENOBUFS
+errors.
+.TP
+.BR NETLINK_LISTEN_ALL_NSID " (since Linux 4.2)"
+.\" commit 59324cf35aba5336b611074028777838a963d03b
+.\" Author: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+When set, this socket will receive netlink notifications from
+all network namespaces that have an
+.I nsid
+assigned into the network namespace where the socket has been opened.
+The
+.I nsid
+is sent to user space via an ancillary data.
+.TP
+.BR NETLINK_CAP_ACK " (since Linux 4.3)"
+.\" commit 0a6a3a23ea6efde079a5b77688541a98bf202721
+.\" Author: Christophe Ricard <christophe.ricard@gmail.com>
+The kernel may fail to allocate the necessary room for the acknowledgement
+message back to user space.
+This option trims off the payload of the original netlink message.
+The netlink message header is still included, so the user can guess from the
+sequence number which message triggered the acknowledgement.
+.SH VERSIONS
+The socket interface to netlink first appeared Linux 2.2.
+.P
+Linux 2.0 supported a more primitive device-based netlink interface
+(which is still available as a compatibility option).
+This obsolete interface is not described here.
+.SH NOTES
+It is often better to use netlink via
+.I libnetlink
+or
+.I libnl
+than via the low-level kernel interface.
+.SH BUGS
+This manual page is not complete.
+.SH EXAMPLES
+The following example creates a
+.B NETLINK_ROUTE
+netlink socket which will listen to the
+.B RTMGRP_LINK
+(network interface create/delete/up/down events) and
+.B RTMGRP_IPV4_IFADDR
+(IPv4 addresses add/delete events) multicast groups.
+.P
+.in +4n
+.EX
+struct sockaddr_nl sa;
+\&
+memset(&sa, 0, sizeof(sa));
+sa.nl_family = AF_NETLINK;
+sa.nl_groups = RTMGRP_LINK | RTMGRP_IPV4_IFADDR;
+\&
+fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+bind(fd, (struct sockaddr *) &sa, sizeof(sa));
+.EE
+.in
+.P
+The next example demonstrates how to send a netlink message to the
+kernel (pid 0).
+Note that the application must take care of message sequence numbers
+in order to reliably track acknowledgements.
+.P
+.in +4n
+.EX
+struct nlmsghdr *nh; /* The nlmsghdr with payload to send */
+struct sockaddr_nl sa;
+struct iovec iov = { nh, nh\->nlmsg_len };
+struct msghdr msg;
+\&
+msg = { &sa, sizeof(sa), &iov, 1, NULL, 0, 0 };
+memset(&sa, 0, sizeof(sa));
+sa.nl_family = AF_NETLINK;
+nh\->nlmsg_pid = 0;
+nh\->nlmsg_seq = ++sequence_number;
+/* Request an ack from kernel by setting NLM_F_ACK */
+nh\->nlmsg_flags |= NLM_F_ACK;
+\&
+sendmsg(fd, &msg, 0);
+.EE
+.in
+.P
+And the last example is about reading netlink message.
+.P
+.in +4n
+.EX
+int len;
+/* 8192 to avoid message truncation on platforms with
+ page size > 4096 */
+struct nlmsghdr buf[8192/sizeof(struct nlmsghdr)];
+struct iovec iov = { buf, sizeof(buf) };
+struct sockaddr_nl sa;
+struct msghdr msg;
+struct nlmsghdr *nh;
+\&
+msg = { &sa, sizeof(sa), &iov, 1, NULL, 0, 0 };
+len = recvmsg(fd, &msg, 0);
+\&
+for (nh = (struct nlmsghdr *) buf; NLMSG_OK (nh, len);
+ nh = NLMSG_NEXT (nh, len)) {
+ /* The end of multipart message */
+ if (nh\->nlmsg_type == NLMSG_DONE)
+ return;
+\&
+ if (nh\->nlmsg_type == NLMSG_ERROR)
+ /* Do some error handling */
+ ...
+\&
+ /* Continue with parsing payload */
+ ...
+}
+.EE
+.in
+.SH SEE ALSO
+.BR cmsg (3),
+.BR netlink (3),
+.BR capabilities (7),
+.BR rtnetlink (7),
+.BR sock_diag (7)
+.P
+.UR ftp://ftp.inr.ac.ru\:/ip\-routing\:/iproute2*
+information about libnetlink
+.UE
+.P
+.UR http://www.infradead.org\:/\[ti]tgr\:/libnl/
+information about libnl
+.UE
+.P
+RFC 3549 "Linux Netlink as an IP Services Protocol"