From aa19b9765c44c089af294f6a4e895ab080338086 Mon Sep 17 00:00:00 2001 From: Lars Eggert Date: Fri, 20 Sep 2024 13:24:12 +0300 Subject: [PATCH 01/28] feat: Faster UDP/IO on Apple platforms WIP --- quinn-udp/Cargo.toml | 3 + quinn-udp/build.rs | 22 + quinn-udp/src/bindings/socket.h | 1541 +++++++++++++++++++++++++++++++ quinn-udp/src/unix.rs | 76 +- 4 files changed, 1630 insertions(+), 12 deletions(-) create mode 100644 quinn-udp/build.rs create mode 100644 quinn-udp/src/bindings/socket.h diff --git a/quinn-udp/Cargo.toml b/quinn-udp/Cargo.toml index 257ee0015..44e1de3e3 100644 --- a/quinn-udp/Cargo.toml +++ b/quinn-udp/Cargo.toml @@ -29,6 +29,9 @@ tracing = { workspace = true, optional = true } once_cell = { workspace = true } windows-sys = { workspace = true } +[target.'cfg(any(target_os = "macos", target_os = "ios"))'.build-dependencies] +bindgen = "0.70.1" + [dev-dependencies] criterion = "0.5" diff --git a/quinn-udp/build.rs b/quinn-udp/build.rs new file mode 100644 index 000000000..2ed3b68f5 --- /dev/null +++ b/quinn-udp/build.rs @@ -0,0 +1,22 @@ +#![cfg(any(target_os = "macos", target_os = "ios"))] + +use std::env; +use std::path::PathBuf; + +fn main() { + // Generate the bindings for Apple's private `recvmsg_x` from + // https://github.com/apple-oss-distributions/xnu/blob/main/bsd/sys/socket.h. + let bindings = bindgen::Builder::default() + .clang_arg("-DPRIVATE=1") + .allowlist_function("recvmsg_x") // TODO: sendmsg_x + .no_copy("iovec") // msghdr_x + .header("src/bindings/socket.h") + .generate() + .expect("Unable to generate bindings"); + + // Write the bindings to the $OUT_DIR/bindings.rs file. + let out_path = PathBuf::from(env::var("OUT_DIR").unwrap()); + bindings + .write_to_file(out_path.join("bindings.rs")) + .expect("Couldn't write bindings!"); +} diff --git a/quinn-udp/src/bindings/socket.h b/quinn-udp/src/bindings/socket.h new file mode 100644 index 000000000..6d4f416f1 --- /dev/null +++ b/quinn-udp/src/bindings/socket.h @@ -0,0 +1,1541 @@ +/* + * Copyright (c) 2000-2022 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +/* Copyright (c) 1998, 1999 Apple Computer, Inc. All Rights Reserved */ +/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ +/* + * Copyright (c) 1982, 1985, 1986, 1988, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)socket.h 8.4 (Berkeley) 2/21/94 + * $FreeBSD: src/sys/sys/socket.h,v 1.39.2.7 2001/07/03 11:02:01 ume Exp $ + */ +/* + * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce + * support for mandatory and extensible security protections. This notice + * is included in support of clause 2.2 (b) of the Apple Public License, + * Version 2.0. + */ + +#ifndef _SYS_SOCKET_H_ +#define _SYS_SOCKET_H_ + +#include +#include +#include +#include +#include + +#ifdef PRIVATE +#include +#include +#endif /* PRIVATE */ + +#ifdef XNU_KERNEL_PRIVATE +#include +#include +#endif /* XNU_KERNEL_PRIVATE */ + +#ifndef KERNEL +#include +#endif + +/* + * Definitions related to sockets: types, address families, options. + */ + +/* + * Data types. + */ + +#include +#include +#include +#include +#include + +/* XXX Not explicitly defined by POSIX, but function return types are */ +#include + +/* XXX Not explicitly defined by POSIX, but function return types are */ +#include + +/* + * [XSI] The iovec structure shall be defined as described in . + */ +#include + +/* + * Types + */ +#define SOCK_STREAM 1 /* stream socket */ +#define SOCK_DGRAM 2 /* datagram socket */ +#define SOCK_RAW 3 /* raw-protocol interface */ +#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) +#define SOCK_RDM 4 /* reliably-delivered message */ +#endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ +#define SOCK_SEQPACKET 5 /* sequenced packet stream */ + +/* + * Option flags per-socket. + */ +#define SO_DEBUG 0x0001 /* turn on debugging info recording */ +#define SO_ACCEPTCONN 0x0002 /* socket has had listen() */ +#define SO_REUSEADDR 0x0004 /* allow local address reuse */ +#define SO_KEEPALIVE 0x0008 /* keep connections alive */ +#define SO_DONTROUTE 0x0010 /* just use interface addresses */ +#define SO_BROADCAST 0x0020 /* permit sending of broadcast msgs */ +#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) +#define SO_USELOOPBACK 0x0040 /* bypass hardware when possible */ +#define SO_LINGER 0x0080 /* linger on close if data present (in ticks) */ +#define SO_LINGER_SEC 0x1080 /* linger on close if data present (in seconds) */ +#else +#define SO_LINGER 0x1080 /* linger on close if data present (in seconds) */ +#endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ +#define SO_OOBINLINE 0x0100 /* leave received OOB data in line */ +#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) +#define SO_REUSEPORT 0x0200 /* allow local address & port reuse */ +#define SO_TIMESTAMP 0x0400 /* timestamp received dgram traffic */ +#define SO_TIMESTAMP_MONOTONIC 0x0800 /* Monotonically increasing timestamp on rcvd dgram */ +#ifndef __APPLE__ +#define SO_ACCEPTFILTER 0x1000 /* there is an accept filter */ +#else +#define SO_DONTTRUNC 0x2000 /* APPLE: Retain unread data */ + /* (ATOMIC proto) */ +#define SO_WANTMORE 0x4000 /* APPLE: Give hint when more data ready */ +#define SO_WANTOOBFLAG 0x8000 /* APPLE: Want OOB in MSG_FLAG on receive */ + +#ifdef PRIVATE +#define SO_NOWAKEFROMSLEEP 0x10000 /* Don't wake for traffic to this socket */ +#define SO_NOAPNFALLBK 0x20000 /* Don't attempt APN fallback for the socket */ +#define SO_TIMESTAMP_CONTINUOUS 0x40000 /* Continuous monotonic timestamp on rcvd dgram */ +#endif + +#endif /* (!__APPLE__) */ +#endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ + +/* + * Additional options, not kept in so_options. + */ +#define SO_SNDBUF 0x1001 /* send buffer size */ +#define SO_RCVBUF 0x1002 /* receive buffer size */ +#define SO_SNDLOWAT 0x1003 /* send low-water mark */ +#define SO_RCVLOWAT 0x1004 /* receive low-water mark */ +#define SO_SNDTIMEO 0x1005 /* send timeout */ +#define SO_RCVTIMEO 0x1006 /* receive timeout */ +#define SO_ERROR 0x1007 /* get error status and clear */ +#define SO_TYPE 0x1008 /* get socket type */ +#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) +#define SO_LABEL 0x1010 /* deprecated */ +#define SO_PEERLABEL 0x1011 /* deprecated */ +#ifdef __APPLE__ +#define SO_NREAD 0x1020 /* APPLE: get 1st-packet byte count */ +#define SO_NKE 0x1021 /* APPLE: Install socket-level NKE */ +#define SO_NOSIGPIPE 0x1022 /* APPLE: No SIGPIPE on EPIPE */ +#define SO_NOADDRERR 0x1023 /* APPLE: Returns EADDRNOTAVAIL when src is not available anymore */ +#define SO_NWRITE 0x1024 /* APPLE: Get number of bytes currently in send socket buffer */ +#define SO_REUSESHAREUID 0x1025 /* APPLE: Allow reuse of port/socket by different userids */ +#ifdef __APPLE_API_PRIVATE +#define SO_NOTIFYCONFLICT 0x1026 /* APPLE: send notification if there is a bind on a port which is already in use */ +#define SO_UPCALLCLOSEWAIT 0x1027 /* APPLE: block on close until an upcall returns */ +#endif +#ifdef PRIVATE +#define SO_RESTRICTIONS 0x1081 /* APPLE: deny flag set */ +#define SO_RESTRICT_DENY_IN 0x1 /* deny inbound (trapdoor) */ +#define SO_RESTRICT_DENY_OUT 0x2 /* deny outbound (trapdoor) */ +#define SO_RESTRICT_DENY_CELLULAR 0x4 /* deny use of cellular (trapdoor) */ +#define SO_RESTRICT_DENY_EXPENSIVE 0x8 /* deny use of expensive if (trapdoor) */ +#define SO_RESTRICT_DENY_CONSTRAINED 0x10 /* deny use of expensive if (trapdoor) */ +#endif /* PRIVATE */ +#define SO_RANDOMPORT 0x1082 /* APPLE: request local port randomization */ +#define SO_NP_EXTENSIONS 0x1083 /* To turn off some POSIX behavior */ +#endif + +#ifdef PRIVATE +#define SO_EXECPATH 0x1085 /* Application Firewall Socket option */ + +/* + * Traffic service class definitions (lowest to highest): + * + * SO_TC_BK_SYS + * "Background System-Initiated", high delay tolerant, high loss + * tolerant, elastic flow, variable size & long-lived. E.g: system- + * initiated iCloud synching or Time Capsule backup, for which there + * is no progress feedbacks. + * + * SO_TC_BK + * "Background", user-initiated, high delay tolerant, high loss tolerant, + * elastic flow, variable size. E.g. user-initiated iCloud synching or + * Time Capsule backup; or traffics of background applications, for which + * there is some progress feedbacks. + * + * SO_TC_BE + * "Best Effort", unclassified/standard. This is the default service + * class; pretty much a mix of everything. + * + * SO_TC_RD + * "Responsive Data", a notch higher than "Best Effort", medium delay + * tolerant, elastic & inelastic flow, bursty, long-lived. E.g. email, + * instant messaging, for which there is a sense of interactivity and + * urgency (user waiting for output). + * + * SO_TC_OAM + * "Operations, Administration, and Management", medium delay tolerant, + * low-medium loss tolerant, elastic & inelastic flows, variable size. + * E.g. VPN tunnels. + * + * SO_TC_AV + * "Multimedia Audio/Video Streaming", medium delay tolerant, low-medium + * loss tolerant, elastic flow, constant packet interval, variable rate & + * size. E.g. AirPlay playback (both video and audio). + * + * SO_TC_RV + * "Responsive Multimedia Audio/Video", low delay tolerant, low-medium + * loss tolerant, elastic flow, variable packet interval, rate and size. + * E.g. AirPlay mirroring, screen sharing. + * + * SO_TC_VI + * "Interactive Video", low delay tolerant, low-medium loss tolerant, + * elastic flow, constant packet interval, variable rate & size. E.g. + * FaceTime video. + * + * SO_TC_VO + * "Interactive Voice", low delay tolerant, low loss tolerant, inelastic + * flow, constant packet rate, somewhat fixed size. E.g. VoIP including + * FaceTime audio. + * + * SO_TC_CTL + * "Network Control", low delay tolerant, low loss tolerant, inelastic + * flow, rate is bursty but short, variable size. E.g. DNS queries; + * certain types of locally-originated ICMP, ICMPv6; IGMP/MLD join/leave, + * ARP. + */ +#define SO_TRAFFIC_CLASS 0x1086 /* Traffic service class (int) */ +#define SO_TC_BK_SYS 100 /* lowest class */ +#define SO_TC_BK 200 +#define SO_TC_BE 0 +#define SO_TC_RD 300 +#define SO_TC_OAM 400 +#define SO_TC_AV 500 +#define SO_TC_RV 600 +#define SO_TC_VI 700 +#define SO_TC_VO 800 +#define SO_TC_CTL 900 /* highest class */ +#define SO_TC_MAX 10 /* Total # of traffic classes */ +#ifdef XNU_KERNEL_PRIVATE +#define _SO_TC_BK 1 /* deprecated */ +#define _SO_TC_VI 2 /* deprecated */ +#define _SO_TC_VO 3 /* deprecated */ +#define _SO_TC_MAX 4 /* deprecated */ + +#define SO_VALID_TC(c) \ + (c == SO_TC_BK_SYS || c == SO_TC_BK || c == SO_TC_BE || \ + c == SO_TC_RD || c == SO_TC_OAM || c == SO_TC_AV || \ + c == SO_TC_RV || c == SO_TC_VI || c == SO_TC_VO || \ + c == SO_TC_CTL || c == SO_TC_NETSVC_SIG) + +#define SO_TC_UNSPEC ((int)-1) /* Traffic class not specified */ + +#define SO_TC_SIG SO_TC_VI /* to be removed XXX */ + +#define SOTCIX_BK_SYS 0 +#define SOTCIX_BK 1 +#define SOTCIX_BE 2 +#define SOTCIX_RD 3 +#define SOTCIX_OAM 4 +#define SOTCIX_AV 5 +#define SOTCIX_RV 6 +#define SOTCIX_VI 7 +#define SOTCIX_VO 8 +#define SOTCIX_CTL 9 +#endif /* XNU_KERNEL_PRIVATE */ + +/* Background socket configuration flags */ +#define TRAFFIC_MGT_SO_BACKGROUND 0x0001 /* background socket */ +#define TRAFFIC_MGT_TCP_RECVBG 0x0002 /* Only TCP sockets, receiver throttling */ + +#define SO_RECV_TRAFFIC_CLASS 0x1087 /* Receive traffic class (bool) */ +#define SO_TRAFFIC_CLASS_DBG 0x1088 /* Debug traffic class (struct so_tcdbg) */ +#define SO_OPTION_UNUSED_0 0x1089 /* Traffic class statistics */ +#define SO_PRIVILEGED_TRAFFIC_CLASS 0x1090 /* Privileged traffic class (bool) */ +#define SO_DEFUNCTIT 0x1091 /* Defunct a socket (only in internal builds) */ +#define SO_DEFUNCTOK 0x1100 /* can be defunct'd */ +#define SO_ISDEFUNCT 0x1101 /* get defunct status */ + +#define SO_OPPORTUNISTIC 0x1102 /* deprecated; use SO_TRAFFIC_CLASS */ + +/* + * SO_FLUSH flushes any unsent data generated by a given socket. It takes + * an integer parameter, which can be any of the SO_TC traffic class values, + * or the special SO_TC_ALL value. + */ +#define SO_FLUSH 0x1103 /* flush unsent data (int) */ +#define SO_TC_ALL (-1) + +#define SO_RECV_ANYIF 0x1104 /* unrestricted inbound processing */ +#define SO_TRAFFIC_MGT_BACKGROUND 0x1105 /* Background traffic management */ + +#define SO_FLOW_DIVERT_TOKEN 0x1106 /* flow divert token */ + +#define SO_DELEGATED 0x1107 /* set socket as delegate (pid_t) */ +#define SO_DELEGATED_UUID 0x1108 /* set socket as delegate (uuid_t) */ +#define SO_NECP_ATTRIBUTES 0x1109 /* NECP socket attributes (domain, account, etc.) */ +#define SO_CFIL_SOCK_ID 0x1110 /* get content filter socket ID (cfil_sock_id_t) */ +#define SO_NECP_CLIENTUUID 0x1111 /* NECP Client uuid */ +#endif /* PRIVATE */ +#define SO_NUMRCVPKT 0x1112 /* number of datagrams in receive socket buffer */ +#ifdef PRIVATE +#define SO_AWDL_UNRESTRICTED 0x1113 /* try to use AWDL in restricted mode */ +#define SO_EXTENDED_BK_IDLE 0x1114 /* extended time to keep socket idle after app is suspended (int) */ +#define SO_MARK_CELLFALLBACK 0x1115 /* Mark as initiated by cell fallback */ +#endif /* PRIVATE */ +#define SO_NET_SERVICE_TYPE 0x1116 /* Network service type */ + +#ifdef PRIVATE +#define SO_QOSMARKING_POLICY_OVERRIDE 0x1117 /* int */ +#define SO_INTCOPROC_ALLOW 0x1118 /* Try to use internal co-processor interfaces. */ +#endif /* PRIVATE */ + +#define SO_NETSVC_MARKING_LEVEL 0x1119 /* Get QoS marking in effect for socket */ + +#ifdef PRIVATE +#define SO_NECP_LISTENUUID 0x1120 /* NECP client UUID for listener */ +#define SO_MPKL_SEND_INFO 0x1122 /* (struct so_mpkl_send_info) */ +#define SO_STATISTICS_EVENT 0x1123 /* int64 argument, an event in statistics collection */ +#define SO_WANT_KEV_SOCKET_CLOSED 0x1124 /* want delivery of KEV_SOCKET_CLOSED (int) */ +#define SO_MARK_KNOWN_TRACKER 0x1125 /* Mark as a connection to a known tracker */ +#define SO_MARK_KNOWN_TRACKER_NON_APP_INITIATED 0x1126 /* Mark tracker connection to be non-app initiated */ +#define SO_MARK_WAKE_PKT 0x1127 /* Mark next packet as a wake packet, one shot (int) */ +#define SO_RECV_WAKE_PKT 0x1128 /* Receive wake packet indication as ancillary data (int) */ +#define SO_MARK_APPROVED_APP_DOMAIN 0x1129 /* Mark connection as being for an approved associated app domain */ +#define SO_FALLBACK_MODE 0x1130 /* Indicates the mode of fallback used */ +#endif /* PRIVATE */ + +#define SO_RESOLVER_SIGNATURE 0x1131 /* A signed data blob from the system resolver */ +#ifdef PRIVATE +#define SO_MARK_CELLFALLBACK_UUID 0x1132 /* Mark as initiated by cell fallback using UUID of the connection */ +#define SO_APPLICATION_ID 0x1133 /* ID of attributing app - so_application_id_t */ + +struct so_mark_cellfallback_uuid_args { + uuid_t flow_uuid; + int flow_cellfallback; +}; + +typedef struct { + uid_t uid; + uuid_t effective_uuid; + uid_t persona_id; +} so_application_id_t; + +#endif + + +/* When adding new socket-options, you need to make sure MPTCP supports these as well! */ + +/* + * Network Service Type for option SO_NET_SERVICE_TYPE + * + * The vast majority of sockets should use Best Effort that is the default + * Network Service Type. Other Network Service Types have to be used only if + * the traffic actually matches the description of the Network Service Type. + * + * Network Service Types do not represent priorities but rather describe + * different categories of delay, jitter and loss parameters. + * Those parameters may influence protocols from layer 4 protocols like TCP + * to layer 2 protocols like Wi-Fi. The Network Service Type can determine + * how the traffic is queued and scheduled by the host networking stack and + * by other entities on the network like switches and routers. For example + * for Wi-Fi, the Network Service Type can select the marking of the + * layer 2 packet with the appropriate WMM Access Category. + * + * There is no point in attempting to game the system and use + * a Network Service Type that does not correspond to the actual + * traffic characteristic but one that seems to have a higher precedence. + * The reason is that for service classes that have lower tolerance + * for delay and jitter, the queues size is lower than for service + * classes that are more tolerant to delay and jitter. + * + * For example using a voice service type for bulk data transfer will lead + * to disastrous results as soon as congestion happens because the voice + * queue overflows and packets get dropped. This is not only bad for the bulk + * data transfer but it is also bad for VoIP apps that legitimately are using + * the voice service type. + * + * The characteristics of the Network Service Types are based on the service + * classes defined in RFC 4594 "Configuration Guidelines for DiffServ Service + * Classes" + * + * When system detects the outgoing interface belongs to a DiffServ domain + * that follows the recommendation of the IETF draft "Guidelines for DiffServ to + * IEEE 802.11 Mapping", the packet will marked at layer 3 with a DSCP value + * that corresponds to Network Service Type. + * + * NET_SERVICE_TYPE_BE + * "Best Effort", unclassified/standard. This is the default service + * class and cover the majority of the traffic. + * + * NET_SERVICE_TYPE_BK + * "Background", high delay tolerant, loss tolerant. elastic flow, + * variable size & long-lived. E.g: non-interactive network bulk transfer + * like synching or backup. + * + * NET_SERVICE_TYPE_RD + * "Responsive Data", a notch higher than "Best Effort", medium delay + * tolerant, elastic & inelastic flow, bursty, long-lived. E.g. email, + * instant messaging, for which there is a sense of interactivity and + * urgency (user waiting for output). + * + * NET_SERVICE_TYPE_OAM + * "Operations, Administration, and Management", medium delay tolerant, + * low-medium loss tolerant, elastic & inelastic flows, variable size. + * E.g. VPN tunnels. + * + * NET_SERVICE_TYPE_AV + * "Multimedia Audio/Video Streaming", medium delay tolerant, low-medium + * loss tolerant, elastic flow, constant packet interval, variable rate + * and size. E.g. video and audio playback with buffering. + * + * NET_SERVICE_TYPE_RV + * "Responsive Multimedia Audio/Video", low delay tolerant, low-medium + * loss tolerant, elastic flow, variable packet interval, rate and size. + * E.g. screen sharing. + * + * NET_SERVICE_TYPE_VI + * "Interactive Video", low delay tolerant, low-medium loss tolerant, + * elastic flow, constant packet interval, variable rate & size. E.g. + * video telephony. + * + * NET_SERVICE_TYPE_SIG + * "Signaling", low delay tolerant, low loss tolerant, inelastic flow, + * jitter tolerant, rate is bursty but short, variable size. E.g. SIP. + * + * NET_SERVICE_TYPE_VO + * "Interactive Voice", very low delay tolerant, very low loss tolerant, + * inelastic flow, constant packet rate, somewhat fixed size. + * E.g. VoIP. + */ + +#define NET_SERVICE_TYPE_BE 0 /* Best effort */ +#define NET_SERVICE_TYPE_BK 1 /* Background system initiated */ +#define NET_SERVICE_TYPE_SIG 2 /* Signaling */ +#define NET_SERVICE_TYPE_VI 3 /* Interactive Video */ +#define NET_SERVICE_TYPE_VO 4 /* Interactive Voice */ +#define NET_SERVICE_TYPE_RV 5 /* Responsive Multimedia Audio/Video */ +#define NET_SERVICE_TYPE_AV 6 /* Multimedia Audio/Video Streaming */ +#define NET_SERVICE_TYPE_OAM 7 /* Operations, Administration, and Management */ +#define NET_SERVICE_TYPE_RD 8 /* Responsive Data */ + +#if PRIVATE +#define _NET_SERVICE_TYPE_COUNT 9 +#define _NET_SERVICE_TYPE_UNSPEC ((int)-1) + +#define IS_VALID_NET_SERVICE_TYPE(c) \ + (c >= NET_SERVICE_TYPE_BE && c <= NET_SERVICE_TYPE_RD) + +extern const int sotc_by_netservicetype[_NET_SERVICE_TYPE_COUNT]; + +/* + * Facility to pass Network Service Type values using SO_TRAFFIC_CLASS + * Mostly useful to simplify implementation of frameworks to adopt the new + * Network Service Type values for Signaling. + */ +#define SO_TC_NET_SERVICE_OFFSET 10000 +#define SO_TC_NETSVC_SIG (SO_TC_NET_SERVICE_OFFSET + NET_SERVICE_TYPE_SIG) +#endif /* PRIVATE */ + +/* These are supported values for SO_NETSVC_MARKING_LEVEL */ +#define NETSVC_MRKNG_UNKNOWN 0 /* The outgoing network interface is not known */ +#define NETSVC_MRKNG_LVL_L2 1 /* Default marking at layer 2 (for example Wi-Fi WMM) */ +#define NETSVC_MRKNG_LVL_L3L2_ALL 2 /* Layer 3 DSCP marking and layer 2 marking for all Network Service Types */ +#define NETSVC_MRKNG_LVL_L3L2_BK 3 /* The system policy limits layer 3 DSCP marking and layer 2 marking + * to background Network Service Types */ + + +typedef __uint32_t sae_associd_t; +#define SAE_ASSOCID_ANY 0 +#define SAE_ASSOCID_ALL ((sae_associd_t)(-1ULL)) + +typedef __uint32_t sae_connid_t; +#define SAE_CONNID_ANY 0 +#define SAE_CONNID_ALL ((sae_connid_t)(-1ULL)) + +/* connectx() flag parameters */ +#define CONNECT_RESUME_ON_READ_WRITE 0x1 /* resume connect() on read/write */ +#define CONNECT_DATA_IDEMPOTENT 0x2 /* data is idempotent */ +#define CONNECT_DATA_AUTHENTICATED 0x4 /* data includes security that replaces the TFO-cookie */ + +/* sockaddr endpoints */ +typedef struct sa_endpoints { + unsigned int sae_srcif; /* optional source interface */ + const struct sockaddr *sae_srcaddr; /* optional source address */ + socklen_t sae_srcaddrlen; /* size of source address */ + const struct sockaddr *sae_dstaddr; /* destination address */ + socklen_t sae_dstaddrlen; /* size of destination address */ +} sa_endpoints_t; +#endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ + +/* + * Structure used for manipulating linger option. + */ +struct linger { + int l_onoff; /* option on/off */ + int l_linger; /* linger time */ +}; + +#ifndef __APPLE__ +struct accept_filter_arg { + char af_name[16]; + char af_arg[256 - 16]; +}; +#endif + +#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) +#ifdef __APPLE__ + +/* + * Structure to control non-portable Sockets extension to POSIX + */ +struct so_np_extensions { + u_int32_t npx_flags; + u_int32_t npx_mask; +}; + +#define SONPX_SETOPTSHUT 0x000000001 /* flag for allowing setsockopt after shutdown */ + + +#ifdef KERNEL_PRIVATE +#define SONPX_MASK_VALID (SONPX_SETOPTSHUT) +#define IS_SO_TC_BACKGROUND(_tc_) ((_tc_) == SO_TC_BK || (_tc_) == SO_TC_BK_SYS) +#define IS_SO_TC_BACKGROUNDSYSTEM(_tc_) ((_tc_) == SO_TC_BK_SYS) +#endif /* KERNEL_PRIVATE */ + +#endif +#endif + +/* + * Level number for (get/set)sockopt() to apply to socket itself. + */ +#define SOL_SOCKET 0xffff /* options for socket level */ + + +/* + * Address families. + */ +#define AF_UNSPEC 0 /* unspecified */ +#define AF_UNIX 1 /* local to host (pipes) */ +#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) +#define AF_LOCAL AF_UNIX /* backward compatibility */ +#endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ +#define AF_INET 2 /* internetwork: UDP, TCP, etc. */ +#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) +#define AF_IMPLINK 3 /* arpanet imp addresses */ +#define AF_PUP 4 /* pup protocols: e.g. BSP */ +#define AF_CHAOS 5 /* mit CHAOS protocols */ +#define AF_NS 6 /* XEROX NS protocols */ +#define AF_ISO 7 /* ISO protocols */ +#define AF_OSI AF_ISO +#define AF_ECMA 8 /* European computer manufacturers */ +#define AF_DATAKIT 9 /* datakit protocols */ +#define AF_CCITT 10 /* CCITT protocols, X.25 etc */ +#define AF_SNA 11 /* IBM SNA */ +#define AF_DECnet 12 /* DECnet */ +#define AF_DLI 13 /* DEC Direct data link interface */ +#define AF_LAT 14 /* LAT */ +#define AF_HYLINK 15 /* NSC Hyperchannel */ +#define AF_APPLETALK 16 /* Apple Talk */ +#define AF_ROUTE 17 /* Internal Routing Protocol */ +#define AF_LINK 18 /* Link layer interface */ +#define pseudo_AF_XTP 19 /* eXpress Transfer Protocol (no AF) */ +#define AF_COIP 20 /* connection-oriented IP, aka ST II */ +#define AF_CNT 21 /* Computer Network Technology */ +#define pseudo_AF_RTIP 22 /* Help Identify RTIP packets */ +#define AF_IPX 23 /* Novell Internet Protocol */ +#define AF_SIP 24 /* Simple Internet Protocol */ +#define pseudo_AF_PIP 25 /* Help Identify PIP packets */ +#define AF_NDRV 27 /* Network Driver 'raw' access */ +#define AF_ISDN 28 /* Integrated Services Digital Network */ +#define AF_E164 AF_ISDN /* CCITT E.164 recommendation */ +#define pseudo_AF_KEY 29 /* Internal key-management function */ +#endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ +#define AF_INET6 30 /* IPv6 */ +#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) +#define AF_NATM 31 /* native ATM access */ +#define AF_SYSTEM 32 /* Kernel event messages */ +#define AF_NETBIOS 33 /* NetBIOS */ +#define AF_PPP 34 /* PPP communication protocol */ +#define pseudo_AF_HDRCMPLT 35 /* Used by BPF to not rewrite headers + * in interface output routine */ +#ifdef PRIVATE +#define AF_AFP 36 /* Used by AFP */ +#else +#define AF_RESERVED_36 36 /* Reserved for internal usage */ +#endif +#define AF_IEEE80211 37 /* IEEE 802.11 protocol */ +#define AF_UTUN 38 +#ifdef PRIVATE +#define AF_MULTIPATH 39 +#endif /* PRIVATE */ +#define AF_VSOCK 40 /* VM Sockets */ +#define AF_MAX 41 +#endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ + +/* + * [XSI] Structure used by kernel to store most addresses. + */ +struct sockaddr { + __uint8_t sa_len; /* total length */ + sa_family_t sa_family; /* [XSI] address family */ + char sa_data[14]; /* [XSI] addr value */ +}; +__CCT_DECLARE_CONSTRAINED_PTR_TYPES(struct sockaddr, sockaddr); + +/* + * Least amount of information that a sockaddr requires. + * Sockaddr_header is a compatible prefix structure of + * all sockaddr objects. + */ +struct __sockaddr_header { + __uint8_t sa_len; + sa_family_t sa_family; +}; + +#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) +#define SOCK_MAXADDRLEN 255 /* longest possible addresses */ + +/* + * Structure used by kernel to pass protocol + * information in raw sockets. + */ +struct sockproto { + __uint16_t sp_family; /* address family */ + __uint16_t sp_protocol; /* protocol */ +}; +#endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ + +/* + * RFC 2553: protocol-independent placeholder for socket addresses + */ +#define _SS_MAXSIZE 128 +#define _SS_ALIGNSIZE (sizeof(__int64_t)) +#define _SS_PAD1SIZE \ + (_SS_ALIGNSIZE - sizeof(__uint8_t) - sizeof(sa_family_t)) +#define _SS_PAD2SIZE \ + (_SS_MAXSIZE - sizeof(__uint8_t) - sizeof(sa_family_t) - \ + _SS_PAD1SIZE - _SS_ALIGNSIZE) + +/* + * [XSI] sockaddr_storage + */ +struct sockaddr_storage { + __uint8_t ss_len; /* address length */ + sa_family_t ss_family; /* [XSI] address family */ + char __ss_pad1[_SS_PAD1SIZE]; + __int64_t __ss_align; /* force structure storage alignment */ + char __ss_pad2[_SS_PAD2SIZE]; +}; +__CCT_DECLARE_CONSTRAINED_PTR_TYPES(struct sockaddr_storage, sockaddr_storage); + +/* + * Protocol families, same as address families for now. + */ +#define PF_UNSPEC AF_UNSPEC +#define PF_LOCAL AF_LOCAL +#define PF_UNIX PF_LOCAL /* backward compatibility */ +#define PF_INET AF_INET +#define PF_IMPLINK AF_IMPLINK +#define PF_PUP AF_PUP +#define PF_CHAOS AF_CHAOS +#define PF_NS AF_NS +#define PF_ISO AF_ISO +#define PF_OSI AF_ISO +#define PF_ECMA AF_ECMA +#define PF_DATAKIT AF_DATAKIT +#define PF_CCITT AF_CCITT +#define PF_SNA AF_SNA +#define PF_DECnet AF_DECnet +#define PF_DLI AF_DLI +#define PF_LAT AF_LAT +#define PF_HYLINK AF_HYLINK +#define PF_APPLETALK AF_APPLETALK +#define PF_ROUTE AF_ROUTE +#define PF_LINK AF_LINK +#define PF_XTP pseudo_AF_XTP /* really just proto family, no AF */ +#define PF_COIP AF_COIP +#define PF_CNT AF_CNT +#define PF_SIP AF_SIP +#define PF_IPX AF_IPX /* same format as AF_NS */ +#define PF_RTIP pseudo_AF_RTIP /* same format as AF_INET */ +#define PF_PIP pseudo_AF_PIP +#define PF_NDRV AF_NDRV +#define PF_ISDN AF_ISDN +#define PF_KEY pseudo_AF_KEY +#define PF_INET6 AF_INET6 +#define PF_NATM AF_NATM +#define PF_SYSTEM AF_SYSTEM +#define PF_NETBIOS AF_NETBIOS +#define PF_PPP AF_PPP +#ifdef PRIVATE +#define PF_AFP AF_AFP +#else +#define PF_RESERVED_36 AF_RESERVED_36 +#endif +#define PF_UTUN AF_UTUN +#ifdef PRIVATE +#define PF_MULTIPATH AF_MULTIPATH +#endif /* PRIVATE */ +#define PF_VSOCK AF_VSOCK +#define PF_MAX AF_MAX + +/* + * These do not have socket-layer support: + */ +#define PF_VLAN ((uint32_t)0x766c616e) /* 'vlan' */ +#define PF_BOND ((uint32_t)0x626f6e64) /* 'bond' */ +#ifdef KERNEL_PRIVATE +#define PF_BRIDGE ((uint32_t)0x62726467) /* 'brdg' */ +#define PF_NULL ((uint32_t)0x6e756c6c) /* 'null' */ +#endif /* KERNEL_PRIVATE */ + +/* + * Definitions for network related sysctl, CTL_NET. + * + * Second level is protocol family. + * Third level is protocol number. + * + * Further levels are defined by the individual families below. + */ +#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) +#define NET_MAXID AF_MAX +#endif /* (_POSIX_C_SOURCE && !_DARWIN_C_SOURCE) */ + +#ifdef KERNEL_PRIVATE +#define CTL_NET_NAMES { \ + { 0, 0 }, \ + { "local", CTLTYPE_NODE }, \ + { "inet", CTLTYPE_NODE }, \ + { "implink", CTLTYPE_NODE }, \ + { "pup", CTLTYPE_NODE }, \ + { "chaos", CTLTYPE_NODE }, \ + { "xerox_ns", CTLTYPE_NODE }, \ + { "iso", CTLTYPE_NODE }, \ + { "emca", CTLTYPE_NODE }, \ + { "datakit", CTLTYPE_NODE }, \ + { "ccitt", CTLTYPE_NODE }, \ + { "ibm_sna", CTLTYPE_NODE }, \ + { "decnet", CTLTYPE_NODE }, \ + { "dec_dli", CTLTYPE_NODE }, \ + { "lat", CTLTYPE_NODE }, \ + { "hylink", CTLTYPE_NODE }, \ + { "appletalk", CTLTYPE_NODE }, \ + { "route", CTLTYPE_NODE }, \ + { "link_layer", CTLTYPE_NODE }, \ + { "xtp", CTLTYPE_NODE }, \ + { "coip", CTLTYPE_NODE }, \ + { "cnt", CTLTYPE_NODE }, \ + { "rtip", CTLTYPE_NODE }, \ + { "ipx", CTLTYPE_NODE }, \ + { "sip", CTLTYPE_NODE }, \ + { "pip", CTLTYPE_NODE }, \ + { 0, 0 }, \ + { "ndrv", CTLTYPE_NODE }, \ + { "isdn", CTLTYPE_NODE }, \ + { "key", CTLTYPE_NODE }, \ + { "inet6", CTLTYPE_NODE }, \ + { "natm", CTLTYPE_NODE }, \ + { "sys", CTLTYPE_NODE }, \ + { "netbios", CTLTYPE_NODE }, \ + { "ppp", CTLTYPE_NODE }, \ + { "hdrcomplete", CTLTYPE_NODE }, \ + { "vsock", CTLTYPE_NODE }, \ +} +#endif /* KERNEL_PRIVATE */ + +#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) +/* + * PF_ROUTE - Routing table + * + * Three additional levels are defined: + * Fourth: address family, 0 is wildcard + * Fifth: type of info, defined below + * Sixth: flag(s) to mask with for NET_RT_FLAGS + */ +#define NET_RT_DUMP 1 /* dump; may limit to a.f. */ +#define NET_RT_FLAGS 2 /* by flags, e.g. RESOLVING */ +#define NET_RT_IFLIST 3 /* survey interface list */ +#define NET_RT_STAT 4 /* routing statistics */ +#define NET_RT_TRASH 5 /* routes not in table but not freed */ +#define NET_RT_IFLIST2 6 /* interface list with addresses */ +#define NET_RT_DUMP2 7 /* dump; may limit to a.f. */ +#ifdef PRIVATE +#define NET_RT_DUMPX 8 /* private */ +#define NET_RT_DUMPX_FLAGS 9 /* private */ +#endif /* PRIVATE */ +/* + * Allows read access non-local host's MAC address + * if the process has neighbor cache entitlement. + */ +#define NET_RT_FLAGS_PRIV 10 +#define NET_RT_MAXID 11 +#endif /* (_POSIX_C_SOURCE && !_DARWIN_C_SOURCE) */ + +#ifdef PRIVATE +/* These are supported values for SO_STATISTICS_EVENT */ +#define SO_STATISTICS_EVENT_ENTER_CELLFALLBACK (1 << 0) +#define SO_STATISTICS_EVENT_EXIT_CELLFALLBACK (1 << 1) +#define SO_STATISTICS_EVENT_ATTRIBUTION_CHANGE (1 << 2) +#define SO_STATISTICS_EVENT_RESERVED_2 (1 << 3) +#endif /* PRIVATE */ + + +#ifdef KERNEL_PRIVATE +#define CTL_NET_RT_NAMES { \ + { 0, 0 }, \ + { "dump", CTLTYPE_STRUCT }, \ + { "flags", CTLTYPE_STRUCT }, \ + { "iflist", CTLTYPE_STRUCT }, \ + { "stat", CTLTYPE_STRUCT }, \ + { "trash", CTLTYPE_INT }, \ + { "iflist2", CTLTYPE_STRUCT }, \ + { "dump2", CTLTYPE_STRUCT }, \ + { "dumpx", CTLTYPE_STRUCT }, \ + { "dumpx_flags", CTLTYPE_STRUCT }, \ +} + +#endif /* KERNEL_PRIVATE */ + +/* + * Maximum queue length specifiable by listen. + */ +#define SOMAXCONN 128 + +/* + * [XSI] Message header for recvmsg and sendmsg calls. + * Used value-result for recvmsg, value only for sendmsg. + */ +struct msghdr { + void *msg_name; /* [XSI] optional address */ + socklen_t msg_namelen; /* [XSI] size of address */ + struct iovec *msg_iov; /* [XSI] scatter/gather array */ + int msg_iovlen; /* [XSI] # elements in msg_iov */ + void *msg_control; /* [XSI] ancillary data, see below */ + socklen_t msg_controllen; /* [XSI] ancillary data buffer len */ + int msg_flags; /* [XSI] flags on received message */ +}; + +#ifdef PRIVATE +/* + * Extended version for sendmsg_x() and recvmsg_x() calls + * + * For recvmsg_x(), the size of the data received is given by the field + * msg_datalen. + * + * For sendmsg_x(), the size of the data to send is given by the length of + * the iovec array -- like sendmsg(). The field msg_datalen is ignored. + */ +struct msghdr_x { + void *msg_name; /* optional address */ + socklen_t msg_namelen; /* size of address */ + struct iovec *msg_iov; /* scatter/gather array */ + int msg_iovlen; /* # elements in msg_iov */ + void *msg_control; /* ancillary data, see below */ + socklen_t msg_controllen; /* ancillary data buffer len */ + int msg_flags; /* flags on received message */ + size_t msg_datalen; /* byte length of buffer in msg_iov */ +}; +#endif /* PRIVATE */ + +#ifdef XNU_KERNEL_PRIVATE +/* + * In-kernel representation of "struct msghdr" from + * userspace. Has enough precision for 32-bit or + * 64-bit clients, but does not need to be packed. + */ + +struct user_msghdr { + user_addr_t msg_name; /* optional address */ + socklen_t msg_namelen; /* size of address */ + vm_address_t msg_iov; /* scatter/gather array */ + int msg_iovlen; /* # elements in msg_iov */ + user_addr_t msg_control; /* ancillary data, see below */ + socklen_t msg_controllen; /* ancillary data buffer len */ + int msg_flags; /* flags on received message */ +}; +__CCT_DECLARE_CONSTRAINED_PTR_TYPES(struct user_msghdr, user_msghdr); + +/* + * LP64 user version of struct msghdr. + * WARNING - keep in sync with struct msghdr + */ + +struct user64_msghdr { + user64_addr_t msg_name; /* optional address */ + socklen_t msg_namelen; /* size of address */ + user64_addr_t msg_iov; /* scatter/gather array */ + int msg_iovlen; /* # elements in msg_iov */ + user64_addr_t msg_control; /* ancillary data, see below */ + socklen_t msg_controllen; /* ancillary data buffer len */ + int msg_flags; /* flags on received message */ +}; +__CCT_DECLARE_CONSTRAINED_PTR_TYPES(struct user64_msghdr, user64_msghdr); + +/* + * ILP32 user version of struct msghdr. + * WARNING - keep in sync with struct msghdr + */ + +struct user32_msghdr { + user32_addr_t msg_name; /* optional address */ + socklen_t msg_namelen; /* size of address */ + user32_addr_t msg_iov; /* scatter/gather array */ + int msg_iovlen; /* # elements in msg_iov */ + user32_addr_t msg_control; /* ancillary data, see below */ + socklen_t msg_controllen; /* ancillary data buffer len */ + int msg_flags; /* flags on received message */ +}; +__CCT_DECLARE_CONSTRAINED_PTR_TYPES(struct user32_msghdr, user32_msghdr); + +/* + * In-kernel representation of "struct msghdr_x" from + * userspace. Has enough precision for 32-bit or + * 64-bit clients, but does not need to be packed. + */ + +struct user_msghdr_x { + user_addr_t msg_name; /* optional address */ + socklen_t msg_namelen; /* size of address */ + vm_address_t msg_iov; /* scatter/gather array */ + int msg_iovlen; /* # elements in msg_iov */ + user_addr_t msg_control; /* ancillary data, see below */ + socklen_t msg_controllen; /* ancillary data buffer len */ + int msg_flags; /* flags on received message */ + size_t msg_datalen; /* byte length of buffer in msg_iov */ +}; +__CCT_DECLARE_CONSTRAINED_PTR_TYPES(struct user_msghdr_x, user_msghdr_x); + + +/* + * LP64 user version of struct msghdr_x + * WARNING - keep in sync with struct msghdr_x + */ + +struct user64_msghdr_x { + user64_addr_t msg_name; /* optional address */ + socklen_t msg_namelen; /* size of address */ + user64_addr_t msg_iov; /* scatter/gather array */ + int msg_iovlen; /* # elements in msg_iov */ + user64_addr_t msg_control; /* ancillary data, see below */ + socklen_t msg_controllen; /* ancillary data buffer len */ + int msg_flags; /* flags on received message */ + user64_size_t msg_datalen; /* byte length of buffer in msg_iov */ +}; +__CCT_DECLARE_CONSTRAINED_PTR_TYPES(struct user64_msghdr_x, user64_msghdr_x); + +/* + * ILP32 user version of struct msghdr_x + * WARNING - keep in sync with struct msghdr_x + */ + +struct user32_msghdr_x { + user32_addr_t msg_name; /* optional address */ + socklen_t msg_namelen; /* size of address */ + user32_addr_t msg_iov; /* scatter/gather array */ + int msg_iovlen; /* # elements in msg_iov */ + user32_addr_t msg_control; /* ancillary data, see below */ + socklen_t msg_controllen; /* ancillary data buffer len */ + int msg_flags; /* flags on received message */ + user32_size_t msg_datalen; /* byte length of buffer in msg_iov */ +}; +__CCT_DECLARE_CONSTRAINED_PTR_TYPES(struct user32_msghdr_x, user32_msghdr_x); + +/* + * In-kernel representation of "struct sa_endpoints" from + * userspace. Has enough precision for 32-bit or + * 64-bit clients, but does not need to be packed. + */ + +struct user_sa_endpoints { + unsigned int sae_srcif; /* optional source interface */ + user_addr_t sae_srcaddr; /* optional source address */ + socklen_t sae_srcaddrlen; /* size of source address */ + user_addr_t sae_dstaddr; /* destination address */ + socklen_t sae_dstaddrlen; /* size of destination address */ +}; + +/* + * LP64 user version of struct sa_endpoints + * WARNING - keep in sync with struct sa_endpoints + */ + +struct user64_sa_endpoints { + unsigned int sae_srcif; /* optional source interface */ + user64_addr_t sae_srcaddr; /* optional source address */ + socklen_t sae_srcaddrlen; /* size of source address */ + user64_addr_t sae_dstaddr; /* destination address */ + socklen_t sae_dstaddrlen; /* size of destination address */ +}; + +/* + * ILP32 user version of struct sa_endpoints + * WARNING - keep in sync with struct sa_endpoints + */ + +struct user32_sa_endpoints { + unsigned int sae_srcif; /* optional source interface */ + user32_addr_t sae_srcaddr; /* optional source address */ + socklen_t sae_srcaddrlen; /* size of source address */ + user32_addr_t sae_dstaddr; /* destination address */ + socklen_t sae_dstaddrlen; /* size of destination address */ +}; + +#endif /* XNU_KERNEL_PRIVATE */ + +#define MSG_OOB 0x1 /* process out-of-band data */ +#define MSG_PEEK 0x2 /* peek at incoming message */ +#define MSG_DONTROUTE 0x4 /* send without using routing tables */ +#define MSG_EOR 0x8 /* data completes record */ +#define MSG_TRUNC 0x10 /* data discarded before delivery */ +#define MSG_CTRUNC 0x20 /* control data lost before delivery */ +#define MSG_WAITALL 0x40 /* wait for full request or error */ +#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) +#define MSG_DONTWAIT 0x80 /* this message should be nonblocking */ +#define MSG_EOF 0x100 /* data completes connection */ +#ifdef __APPLE__ +#ifndef PRIVATE +#ifdef __APPLE_API_OBSOLETE +#define MSG_WAITSTREAM 0x200 /* wait up to full request.. may return partial */ +#endif +#else +#define MSG_WAITSTREAM 0x200 /* wait up to full request.. may return partial */ +#endif +#define MSG_FLUSH 0x400 /* Start of 'hold' seq; dump so_temp, deprecated */ +#define MSG_HOLD 0x800 /* Hold frag in so_temp, deprecated */ +#define MSG_SEND 0x1000 /* Send the packet in so_temp, deprecated */ +#define MSG_HAVEMORE 0x2000 /* Data ready to be read */ +#define MSG_RCVMORE 0x4000 /* Data remains in current pkt */ +#endif +#ifdef KERNEL_PRIVATE +#define MSG_COMPAT 0x8000 /* deprecated */ +#endif /* KERNEL_PRIVATE */ +#define MSG_NEEDSA 0x10000 /* Fail receive if socket address cannot be allocated */ +#ifdef KERNEL_PRIVATE +#define MSG_NBIO 0x20000 /* FIONBIO mode, used by fifofs */ +#define MSG_SKIPCFIL 0x40000 /* skip pass content filter */ +#endif +#endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ + +#if __DARWIN_C_LEVEL >= 200809L +#define MSG_NOSIGNAL 0x80000 /* do not generate SIGPIPE on EOF */ +#endif /* __DARWIN_C_LEVEL */ + +#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) +#ifdef KERNEL +#define MSG_USEUPCALL 0x80000000 /* Inherit upcall in sock_accept */ +#endif +#endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ + +/* + * Header for ancillary data objects in msg_control buffer. + * Used for additional information with/about a datagram + * not expressible by flags. The format is a sequence + * of message elements headed by cmsghdr structures. + */ +struct cmsghdr { + socklen_t cmsg_len; /* [XSI] data byte count, including hdr */ + int cmsg_level; /* [XSI] originating protocol */ + int cmsg_type; /* [XSI] protocol-specific type */ +/* followed by unsigned char cmsg_data[]; */ +}; + +#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) +#ifndef __APPLE__ +/* + * While we may have more groups than this, the cmsgcred struct must + * be able to fit in an mbuf, and NGROUPS_MAX is too large to allow + * this. + */ +#define CMGROUP_MAX 16 + +/* + * Credentials structure, used to verify the identity of a peer + * process that has sent us a message. This is allocated by the + * peer process but filled in by the kernel. This prevents the + * peer from lying about its identity. (Note that cmcred_groups[0] + * is the effective GID.) + */ +struct cmsgcred { + pid_t cmcred_pid; /* PID of sending process */ + uid_t cmcred_uid; /* real UID of sending process */ + uid_t cmcred_euid; /* effective UID of sending process */ + gid_t cmcred_gid; /* real GID of sending process */ + short cmcred_ngroups; /* number or groups */ + gid_t cmcred_groups[CMGROUP_MAX]; /* groups */ +}; +#endif +#endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ + +/* given pointer to struct cmsghdr, return pointer to data */ +#define CMSG_DATA(cmsg) ((unsigned char *)(cmsg) + \ + __DARWIN_ALIGN32(sizeof(struct cmsghdr))) + +/* + * RFC 2292 requires to check msg_controllen, in case that the kernel returns + * an empty list for some reasons. + */ +#define CMSG_FIRSTHDR(mhdr) \ + ((mhdr)->msg_controllen >= sizeof(struct cmsghdr) ? \ + (struct cmsghdr *)(mhdr)->msg_control : \ + (struct cmsghdr *)0L) + + +/* + * Given pointer to struct cmsghdr, return pointer to next cmsghdr + * RFC 2292 says that CMSG_NXTHDR(mhdr, NULL) is equivalent to CMSG_FIRSTHDR(mhdr) + */ +#define CMSG_NXTHDR(mhdr, cmsg) \ + ((char *)(cmsg) == (char *)0L ? CMSG_FIRSTHDR(mhdr) : \ + ((((unsigned char *)(cmsg) + \ + __DARWIN_ALIGN32((__uint32_t)(cmsg)->cmsg_len) + \ + __DARWIN_ALIGN32(sizeof(struct cmsghdr))) > \ + ((unsigned char *)(mhdr)->msg_control + \ + (mhdr)->msg_controllen)) ? \ + (struct cmsghdr *)0L /* NULL */ : \ + (struct cmsghdr *)(void *)((unsigned char *)(cmsg) + \ + __DARWIN_ALIGN32((__uint32_t)(cmsg)->cmsg_len)))) + +#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) +/* RFC 2292 additions */ +#define CMSG_SPACE(l) (__DARWIN_ALIGN32(sizeof(struct cmsghdr)) + __DARWIN_ALIGN32(l)) +#define CMSG_LEN(l) (__DARWIN_ALIGN32(sizeof(struct cmsghdr)) + (l)) + +#ifdef KERNEL +#define CMSG_ALIGN(n) ((typeof(n))__DARWIN_ALIGN32(n)) +#endif +#endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ + +/* "Socket"-level control message types: */ +#define SCM_RIGHTS 0x01 /* access rights (array of int) */ +#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) +#define SCM_TIMESTAMP 0x02 /* timestamp (struct timeval) */ +#define SCM_CREDS 0x03 /* process creds (struct cmsgcred) */ +#define SCM_TIMESTAMP_MONOTONIC 0x04 /* timestamp (uint64_t) */ + +#ifdef PRIVATE +#define SCM_TIMESTAMP_CONTINUOUS 0x07 /* timestamp (uint64_t) */ +#define SCM_MPKL_SEND_INFO 0x08 /* send info for multi-layer packet logging (struct so_mpkl_send_info) */ +#define SCM_MPKL_RECV_INFO 0x09 /* receive info for multi-layer packet logging (struct so_mpkl_recv_info */ +#endif /* PRIVATE */ + +#ifdef KERNEL_PRIVATE +/* + * 4.3 compat sockaddr (deprecated) + */ +struct osockaddr { + __uint16_t sa_family; /* address family */ + char sa_data[14]; /* up to 14 bytes of direct address */ +}; + +/* + * 4.3-compat message header (deprecated) + */ +struct omsghdr { + void *msg_name; /* optional address */ + socklen_t msg_namelen; /* size of address */ + struct iovec *msg_iov; /* scatter/gather array */ + int msg_iovlen; /* # elements in msg_iov */ + void *msg_accrights; /* access rights sent/rcvd */ + int msg_accrightslen; +}; + +#define SA(s) ((struct sockaddr *)(void *)(s)) +#endif /* KERNEL_PRIVATE */ +#endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ + +/* + * howto arguments for shutdown(2), specified by Posix.1g. + */ +#define SHUT_RD 0 /* shut down the reading side */ +#define SHUT_WR 1 /* shut down the writing side */ +#define SHUT_RDWR 2 /* shut down both sides */ + +#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) +/* + * sendfile(2) header/trailer struct + */ +struct sf_hdtr { + struct iovec *headers; /* pointer to an array of header struct iovec's */ + int hdr_cnt; /* number of header iovec's */ + struct iovec *trailers; /* pointer to an array of trailer struct iovec's */ + int trl_cnt; /* number of trailer iovec's */ +}; + +#ifdef KERNEL + +/* In-kernel representation */ +struct user_sf_hdtr { + user_addr_t headers; /* pointer to an array of header struct iovec's */ + int hdr_cnt; /* number of header iovec's */ + user_addr_t trailers; /* pointer to an array of trailer struct iovec's */ + int trl_cnt; /* number of trailer iovec's */ +}; + +/* LP64 user version of struct sf_hdtr */ +struct user64_sf_hdtr { + user64_addr_t headers; /* pointer to an array of header struct iovec's */ + int hdr_cnt; /* number of header iovec's */ + user64_addr_t trailers; /* pointer to an array of trailer struct iovec's */ + int trl_cnt; /* number of trailer iovec's */ +}; + +/* ILP32 user version of struct sf_hdtr */ +struct user32_sf_hdtr { + user32_addr_t headers; /* pointer to an array of header struct iovec's */ + int hdr_cnt; /* number of header iovec's */ + user32_addr_t trailers; /* pointer to an array of trailer struct iovec's */ + int trl_cnt; /* number of trailer iovec's */ +}; + +#endif /* KERNEL */ + +#endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ + +#ifdef PRIVATE +#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) + +/* + * Structure for SIOCGASSOCIDS + */ +struct so_aidreq { + __uint32_t sar_cnt; /* number of associations */ + sae_associd_t *sar_aidp; /* array of association IDs */ +}; + +#ifdef BSD_KERNEL_PRIVATE +struct so_aidreq32 { + __uint32_t sar_cnt; + user32_addr_t sar_aidp; +}; + +struct so_aidreq64 { + __uint32_t sar_cnt; + user64_addr_t sar_aidp __attribute__((aligned(8))); +}; +#endif /* BSD_KERNEL_PRIVATE */ + +/* + * Structure for SIOCGCONNIDS + */ +struct so_cidreq { + sae_associd_t scr_aid; /* association ID */ + __uint32_t scr_cnt; /* number of connections */ + sae_connid_t *scr_cidp; /* array of connection IDs */ +}; + +#ifdef BSD_KERNEL_PRIVATE +struct so_cidreq32 { + sae_associd_t scr_aid; + __uint32_t scr_cnt; + user32_addr_t scr_cidp; +}; + +struct so_cidreq64 { + sae_associd_t scr_aid; + __uint32_t scr_cnt; + user64_addr_t scr_cidp __attribute__((aligned(8))); +}; +#endif /* BSD_KERNEL_PRIVATE */ + +/* + * Structure for SIOCGCONNINFO + */ +struct so_cinforeq { + sae_connid_t scir_cid; /* connection ID */ + __uint32_t scir_flags; /* see flags below */ + __uint32_t scir_ifindex; /* (last) outbound interface */ + __int32_t scir_error; /* most recent error */ + struct sockaddr *scir_src; /* source address */ + socklen_t scir_src_len; /* source address len */ + struct sockaddr *scir_dst; /* destination address */ + socklen_t scir_dst_len; /* destination address len */ + __uint32_t scir_aux_type; /* aux data type (CIAUX) */ + void *scir_aux_data; /* aux data */ + __uint32_t scir_aux_len; /* aux data len */ +}; + +#ifdef BSD_KERNEL_PRIVATE +struct so_cinforeq32 { + sae_connid_t scir_cid; + __uint32_t scir_flags; + __uint32_t scir_ifindex; + __int32_t scir_error; + user32_addr_t scir_src; + socklen_t scir_src_len; + user32_addr_t scir_dst; + socklen_t scir_dst_len; + __uint32_t scir_aux_type; + user32_addr_t scir_aux_data; + __uint32_t scir_aux_len; +}; + +struct so_cinforeq64 { + sae_connid_t scir_cid; + __uint32_t scir_flags; + __uint32_t scir_ifindex; + __int32_t scir_error; + user64_addr_t scir_src __attribute__((aligned(8))); + socklen_t scir_src_len; + user64_addr_t scir_dst __attribute__((aligned(8))); + socklen_t scir_dst_len; + __uint32_t scir_aux_type; + user64_addr_t scir_aux_data __attribute__((aligned(8))); + __uint32_t scir_aux_len; +}; +#endif /* BSD_KERNEL_PRIVATE */ + +/* valid connection info flags */ +#define CIF_CONNECTING 0x1 /* connection was attempted */ +#define CIF_CONNECTED 0x2 /* connection is established */ +#define CIF_DISCONNECTING 0x4 /* disconnection was attempted */ +#define CIF_DISCONNECTED 0x8 /* has been disconnected */ +#define CIF_BOUND_IF 0x10 /* bound to an interface */ +#define CIF_BOUND_IP 0x20 /* bound to a src address */ +#define CIF_BOUND_PORT 0x40 /* bound to a src port */ +#define CIF_PREFERRED 0x80 /* connection is primary/preferred */ +#define CIF_MP_CAPABLE 0x100 /* supports multipath protocol */ +#define CIF_MP_READY 0x200 /* multipath protocol confirmed */ +#define CIF_MP_DEGRADED 0x400 /* has lost its multipath capability */ +#define CIF_MP_ACTIVE 0x800 /* this is the active subflow */ +#define CIF_MP_V1 0x1000 /* MPTCP v1 is used */ + +/* valid connection info auxiliary data types */ +#define CIAUX_TCP 0x1 /* TCP auxiliary data (conninfo_tcp_t) */ +#define CIAUX_MPTCP 0x2 /* MPTCP auxiliary data (conninfo_multipathtcp) */ + +/* + * Structure for SIOC{S,G}CONNORDER + */ +struct so_cordreq { + sae_connid_t sco_cid; /* connection ID */ + __uint32_t sco_rank; /* rank (0 means unspecified) */ +}; + +/* + * Common structure for KEV_NETPOLICY_SUBCLASS + */ +struct netpolicy_event_data { + __uint64_t eupid; /* effective unique PID */ + __uint64_t epid; /* effective PID */ + uuid_t euuid; /* effective UUID */ +}; + +/* + * NETPOLICY_IFDENIED event structure + */ +struct kev_netpolicy_ifdenied { + struct netpolicy_event_data ev_data; + __uint32_t ev_if_functional_type; +}; + +/* + * KEV_NETPOLICY_NETDENIED event structure + */ +struct kev_netpolicy_netdenied { + struct netpolicy_event_data ev_data; + __uint32_t ev_network_type; +}; + +/* + * Network Service Type to DiffServ Code Point mapping + */ +struct netsvctype_dscp_map { + int netsvctype; + u_int8_t dscp; /* 6 bits diffserv code point */ +}; + +/* + * Multi-layer packet logging require SO_MPK_LOG to be set + */ +struct so_mpkl_send_info { + uuid_t mpkl_uuid; + __uint8_t mpkl_proto; /* see net/multi_layer_pkt_log.h */ +}; + +struct so_mpkl_recv_info { + __uint32_t mpkl_seq; + __uint8_t mpkl_proto; /* see net/multi_layer_pkt_log.h */ +}; + +#ifndef KERNEL +__BEGIN_DECLS + +extern int peeloff(int s, sae_associd_t); +extern int socket_delegate(int, int, int, pid_t); + +/* + * recvmsg_x() is a system call similar to recvmsg(2) to receive + * several datagrams at once in the array of message headers "msgp". + * + * recvmsg_x() can be used only with protocols handlers that have been specially + * modified to support sending and receiving several datagrams at once. + * + * The size of the array "msgp" is given by the argument "cnt". + * + * The "flags" arguments supports only the value MSG_DONTWAIT. + * + * Each member of "msgp" array is of type "struct msghdr_x". + * + * The "msg_iov" and "msg_iovlen" are input parameters that describe where to + * store a datagram in a scatter gather locations of buffers -- see recvmsg(2). + * On output the field "msg_datalen" gives the length of the received datagram. + * + * The field "msg_flags" must be set to zero on input. On output, "msg_flags" + * may have MSG_TRUNC set to indicate the trailing portion of the datagram was + * discarded because the datagram was larger than the buffer supplied. + * recvmsg_x() returns as soon as a datagram is truncated. + * + * recvmsg_x() may return with less than "cnt" datagrams received based on + * the low water mark and the amount of data pending in the socket buffer. + * + * recvmsg_x() returns the number of datagrams that have been received, + * or -1 if an error occurred. + * + * NOTE: This a private system call, the API is subject to change. + */ +ssize_t recvmsg_x(int s, const struct msghdr_x *msgp, u_int cnt, int flags); + +/* + * sendmsg_x() is a system call similar to send(2) to send + * several datagrams at once in the array of message headers "msgp". + * + * sendmsg_x() can be used only with protocols handlers that have been specially + * modified to support sending and receiving several datagrams at once. + * + * The size of the array "msgp" is given by the argument "cnt". + * + * The "flags" arguments supports only the value MSG_DONTWAIT. + * + * Each member of "msgp" array is of type "struct msghdr_x". + * + * The "msg_iov" and "msg_iovlen" are input parameters that specify the + * data to be sent in a scatter gather locations of buffers -- see sendmsg(2). + * + * sendmsg_x() fails with EMSGSIZE if the sum of the length of the datagrams + * is greater than the high water mark. + * + * Address and ancillary data are not supported so the following fields + * must be set to zero on input: + * "msg_name", "msg_namelen", "msg_control" and "msg_controllen". + * + * The field "msg_flags" and "msg_datalen" must be set to zero on input. + * + * sendmsg_x() returns the number of datagrams that have been sent, + * or -1 if an error occurred. + * + * NOTE: This a private system call, the API is subject to change. + */ +ssize_t sendmsg_x(int s, const struct msghdr_x *msgp, u_int cnt, int flags); +__END_DECLS +#endif /* !KERNEL */ +#endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ +#endif /* PRIVATE */ + +#ifndef KERNEL +__BEGIN_DECLS + +int accept(int, struct sockaddr * __restrict, socklen_t * __restrict) +__DARWIN_ALIAS_C(accept); +int bind(int, const struct sockaddr *, socklen_t) __DARWIN_ALIAS(bind); +int connect(int, const struct sockaddr *, socklen_t) __DARWIN_ALIAS_C(connect); +int getpeername(int, struct sockaddr * __restrict, socklen_t * __restrict) +__DARWIN_ALIAS(getpeername); +int getsockname(int, struct sockaddr * __restrict, socklen_t * __restrict) +__DARWIN_ALIAS(getsockname); +int getsockopt(int, int, int, void * __restrict, socklen_t * __restrict); +int listen(int, int) __DARWIN_ALIAS(listen); +ssize_t recv(int, void *, size_t, int) __DARWIN_ALIAS_C(recv); +ssize_t recvfrom(int, void *, size_t, int, struct sockaddr * __restrict, + socklen_t * __restrict) __DARWIN_ALIAS_C(recvfrom); +ssize_t recvmsg(int, struct msghdr *, int) __DARWIN_ALIAS_C(recvmsg); +ssize_t send(int, const void *, size_t, int) __DARWIN_ALIAS_C(send); +ssize_t sendmsg(int, const struct msghdr *, int) __DARWIN_ALIAS_C(sendmsg); +ssize_t sendto(int, const void *, size_t, + int, const struct sockaddr *, socklen_t) __DARWIN_ALIAS_C(sendto); +int setsockopt(int, int, int, const void *, socklen_t); +int shutdown(int, int); +int sockatmark(int) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0); +int socket(int, int, int); +int socketpair(int, int, int, int *) __DARWIN_ALIAS(socketpair); + +#if !defined(_POSIX_C_SOURCE) +int sendfile(int, int, off_t, off_t *, struct sf_hdtr *, int); +#endif /* !_POSIX_C_SOURCE */ + +#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) +void pfctlinput(int, struct sockaddr *); + +__API_AVAILABLE(macosx(10.11), ios(9.0), tvos(9.0), watchos(2.0)) +int connectx(int, const sa_endpoints_t *, sae_associd_t, unsigned int, + const struct iovec *, unsigned int, size_t *, sae_connid_t *); + +__API_AVAILABLE(macosx(10.11), ios(9.0), tvos(9.0), watchos(2.0)) +int disconnectx(int, sae_associd_t, sae_connid_t); +#endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ +__END_DECLS +#endif /* !KERNEL */ + +#ifdef KERNEL +#include +#endif + +#endif /* !_SYS_SOCKET_H_ */ diff --git a/quinn-udp/src/unix.rs b/quinn-udp/src/unix.rs index 0650a7bd8..b14bf6ba6 100644 --- a/quinn-udp/src/unix.rs +++ b/quinn-udp/src/unix.rs @@ -1,3 +1,8 @@ +#![allow(non_camel_case_types)] +#![allow(unreachable_pub)] +#[cfg(any(target_os = "macos", target_os = "ios"))] +include!(concat!(env!("OUT_DIR"), "/bindings.rs")); + #[cfg(not(any( target_os = "macos", target_os = "ios", @@ -24,6 +29,25 @@ use super::{ IO_ERROR_LOG_INTERVAL, }; +#[cfg(any(target_os = "macos", target_os = "ios"))] +#[allow(non_camel_case_types)] +type msghdr = msghdr_x; + +#[cfg(any(target_os = "macos", target_os = "ios"))] +impl From for libc::msghdr { + fn from(val: msghdr_x) -> Self { + Self { + msg_name: val.msg_name, + msg_namelen: val.msg_namelen, + msg_iov: val.msg_iov as _, + msg_iovlen: val.msg_iovlen, + msg_control: val.msg_control, + msg_controllen: val.msg_controllen, + msg_flags: val.msg_flags, + } + } +} + // Defined in netinet6/in6.h on OpenBSD, this is not yet exported by the libc crate // directly. See https://github.com/rust-lang/libc/issues/3704 for when we might be able to // rely on this from the libc crate. @@ -418,12 +442,40 @@ fn recv(io: SockRef<'_>, bufs: &mut [IoSliceMut<'_>], meta: &mut [RecvMeta]) -> Ok(msg_count as usize) } -#[cfg(any( - target_os = "macos", - target_os = "ios", - target_os = "openbsd", - target_os = "solaris", -))] +#[cfg(any(target_os = "macos", target_os = "ios"))] +fn recv(io: SockRef<'_>, bufs: &mut [IoSliceMut<'_>], meta: &mut [RecvMeta]) -> io::Result { + let mut names = [MaybeUninit::::uninit(); BATCH_SIZE]; + let mut ctrls = [cmsg::Aligned(MaybeUninit::<[u8; CMSG_LEN]>::uninit()); BATCH_SIZE]; + let mut hdrs = unsafe { mem::zeroed::<[msghdr_x; BATCH_SIZE]>() }; + let max_msg_count = bufs.len().min(BATCH_SIZE); + for i in 0..max_msg_count { + prepare_recv(&mut bufs[i], &mut names[i], &mut ctrls[i], &mut hdrs[i]); + } + let msg_count = loop { + let n = unsafe { + recvmsg_x( + io.as_raw_fd(), + hdrs.as_mut_ptr(), + bufs.len().min(BATCH_SIZE) as _, + 0, + ) + }; + if n == -1 { + let e = io::Error::last_os_error(); + if e.kind() == io::ErrorKind::Interrupted { + continue; + } + return Err(e); + } + break n; + }; + for i in 0..(msg_count as usize) { + meta[i] = decode_recv(&names[i], &hdrs[i].into(), hdrs[i].msg_datalen as usize); + } + Ok(msg_count as usize) +} + +#[cfg(any(target_os = "openbsd", target_os = "solaris",))] fn recv(io: SockRef<'_>, bufs: &mut [IoSliceMut<'_>], meta: &mut [RecvMeta]) -> io::Result { let mut name = MaybeUninit::::uninit(); let mut ctrl = cmsg::Aligned(MaybeUninit::<[u8; CMSG_LEN]>::uninit()); @@ -546,15 +598,19 @@ fn prepare_recv( buf: &mut IoSliceMut, name: &mut MaybeUninit, ctrl: &mut cmsg::Aligned>, - hdr: &mut libc::msghdr, + hdr: &mut msghdr, ) { hdr.msg_name = name.as_mut_ptr() as _; hdr.msg_namelen = mem::size_of::() as _; - hdr.msg_iov = buf as *mut IoSliceMut as *mut libc::iovec; + hdr.msg_iov = buf as *mut IoSliceMut as *mut iovec; hdr.msg_iovlen = 1; hdr.msg_control = ctrl.0.as_mut_ptr() as _; hdr.msg_controllen = CMSG_LEN as _; hdr.msg_flags = 0; + #[cfg(any(target_os = "macos", target_os = "ios"))] + { + hdr.msg_datalen = buf.len(); + } } fn decode_recv( @@ -654,13 +710,9 @@ fn decode_recv( } } -#[cfg(not(any(target_os = "macos", target_os = "ios")))] // Chosen somewhat arbitrarily; might benefit from additional tuning. pub(crate) const BATCH_SIZE: usize = 32; -#[cfg(any(target_os = "macos", target_os = "ios"))] -pub(crate) const BATCH_SIZE: usize = 1; - #[cfg(target_os = "linux")] mod gso { use super::*; From c36d95424221d4afaca75664e966e69c8289aaba Mon Sep 17 00:00:00 2001 From: Lars Eggert Date: Fri, 20 Sep 2024 13:29:47 +0300 Subject: [PATCH 02/28] `no_main` on non-Apple platforms --- quinn-udp/build.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/quinn-udp/build.rs b/quinn-udp/build.rs index 2ed3b68f5..5b1dcb333 100644 --- a/quinn-udp/build.rs +++ b/quinn-udp/build.rs @@ -1,9 +1,10 @@ -#![cfg(any(target_os = "macos", target_os = "ios"))] - -use std::env; -use std::path::PathBuf; +#![cfg_attr(not(any(target_os = "macos", target_os = "ios")), no_main)] +#[cfg(any(target_os = "macos", target_os = "ios"))] fn main() { + use std::env; + use std::path::PathBuf; + // Generate the bindings for Apple's private `recvmsg_x` from // https://github.com/apple-oss-distributions/xnu/blob/main/bsd/sys/socket.h. let bindings = bindgen::Builder::default() From adf614df24317cba6d41698e67004a6f6610e78c Mon Sep 17 00:00:00 2001 From: Lars Eggert Date: Fri, 20 Sep 2024 13:32:21 +0300 Subject: [PATCH 03/28] Empty main on non-Apple platforms --- quinn-udp/build.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/quinn-udp/build.rs b/quinn-udp/build.rs index 5b1dcb333..95dca7903 100644 --- a/quinn-udp/build.rs +++ b/quinn-udp/build.rs @@ -1,5 +1,3 @@ -#![cfg_attr(not(any(target_os = "macos", target_os = "ios")), no_main)] - #[cfg(any(target_os = "macos", target_os = "ios"))] fn main() { use std::env; @@ -21,3 +19,6 @@ fn main() { .write_to_file(out_path.join("bindings.rs")) .expect("Couldn't write bindings!"); } + +#[cfg(not(any(target_os = "macos", target_os = "ios")))] +fn main() {} From c8dba515d3d6af18fae98db11cc4ea82163c682d Mon Sep 17 00:00:00 2001 From: Lars Eggert Date: Fri, 20 Sep 2024 13:35:11 +0300 Subject: [PATCH 04/28] Use `libc::msghdr` and `libc::iovec` on non-Apple platforms --- quinn-udp/src/unix.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/quinn-udp/src/unix.rs b/quinn-udp/src/unix.rs index b14bf6ba6..08f31a34a 100644 --- a/quinn-udp/src/unix.rs +++ b/quinn-udp/src/unix.rs @@ -29,6 +29,11 @@ use super::{ IO_ERROR_LOG_INTERVAL, }; +#[cfg(not(any(target_os = "macos", target_os = "ios")))] +type msghdr = libc::msghdr; +#[cfg(not(any(target_os = "macos", target_os = "ios")))] +type iovec = libc::iovec; + #[cfg(any(target_os = "macos", target_os = "ios"))] #[allow(non_camel_case_types)] type msghdr = msghdr_x; From c642dbb803611ee0892202f7167c33cd86b8017b Mon Sep 17 00:00:00 2001 From: Lars Eggert Date: Mon, 23 Sep 2024 08:50:13 +0300 Subject: [PATCH 05/28] Static bindings --- fuzz/fuzz_targets/packet.rs | 42 +- fuzz/fuzz_targets/streams.rs | 128 +-- quinn-udp/Cargo.toml | 3 - quinn-udp/build.rs | 24 - quinn-udp/src/bindings/socket.h | 1541 ------------------------------- quinn-udp/src/cmsg/unix.rs | 28 + quinn-udp/src/unix.rs | 81 +- 7 files changed, 163 insertions(+), 1684 deletions(-) delete mode 100644 quinn-udp/build.rs delete mode 100644 quinn-udp/src/bindings/socket.h diff --git a/fuzz/fuzz_targets/packet.rs b/fuzz/fuzz_targets/packet.rs index a8320a87a..4f227a877 100644 --- a/fuzz/fuzz_targets/packet.rs +++ b/fuzz/fuzz_targets/packet.rs @@ -1,25 +1,25 @@ #![no_main] -extern crate proto; +// extern crate proto; -use libfuzzer_sys::fuzz_target; -use proto::{ - fuzzing::{PacketParams, PartialDecode}, - FixedLengthConnectionIdParser, DEFAULT_SUPPORTED_VERSIONS, -}; +// use libfuzzer_sys::fuzz_target; +// use proto::{ +// fuzzing::{PacketParams, PartialDecode}, +// FixedLengthConnectionIdParser, DEFAULT_SUPPORTED_VERSIONS, +// }; -fuzz_target!(|data: PacketParams| { - let len = data.buf.len(); - let supported_versions = DEFAULT_SUPPORTED_VERSIONS.to_vec(); - if let Ok(decoded) = PartialDecode::new( - data.buf, - &FixedLengthConnectionIdParser::new(data.local_cid_len), - &supported_versions, - data.grease_quic_bit, - ) { - match decoded.1 { - Some(x) => assert_eq!(len, decoded.0.len() + x.len()), - None => assert_eq!(len, decoded.0.len()), - } - } -}); +// fuzz_target!(|data: PacketParams| { +// let len = data.buf.len(); +// let supported_versions = DEFAULT_SUPPORTED_VERSIONS.to_vec(); +// if let Ok(decoded) = PartialDecode::new( +// data.buf, +// &FixedLengthConnectionIdParser::new(data.local_cid_len), +// &supported_versions, +// data.grease_quic_bit, +// ) { +// match decoded.1 { +// Some(x) => assert_eq!(len, decoded.0.len() + x.len()), +// None => assert_eq!(len, decoded.0.len()), +// } +// } +// }); diff --git a/fuzz/fuzz_targets/streams.rs b/fuzz/fuzz_targets/streams.rs index 340078e5f..34fa6e3ad 100644 --- a/fuzz/fuzz_targets/streams.rs +++ b/fuzz/fuzz_targets/streams.rs @@ -1,71 +1,71 @@ #![no_main] -use arbitrary::Arbitrary; -use libfuzzer_sys::fuzz_target; +// use arbitrary::Arbitrary; +// use libfuzzer_sys::fuzz_target; -extern crate proto; -use proto::fuzzing::{ConnectionState, ResetStream, Retransmits, StreamsState}; -use proto::{Dir, Side, StreamId, VarInt}; -use proto::{SendStream, Streams}; +// extern crate proto; +// use proto::fuzzing::{ConnectionState, ResetStream, Retransmits, StreamsState}; +// use proto::{Dir, Side, StreamId, VarInt}; +// use proto::{SendStream, Streams}; -#[derive(Arbitrary, Debug)] -struct StreamParams { - side: Side, - max_remote_uni: u16, - max_remote_bi: u16, - send_window: u16, - receive_window: u16, - stream_receive_window: u16, - dir: Dir, -} +// #[derive(Arbitrary, Debug)] +// struct StreamParams { +// side: Side, +// max_remote_uni: u16, +// max_remote_bi: u16, +// send_window: u16, +// receive_window: u16, +// stream_receive_window: u16, +// dir: Dir, +// } -#[derive(Arbitrary, Debug)] -enum Operation { - Open, - Accept(Dir), - Finish(StreamId), - ReceivedStopSending(StreamId, VarInt), - ReceivedReset(ResetStream), - Reset(StreamId), -} +// #[derive(Arbitrary, Debug)] +// enum Operation { +// Open, +// Accept(Dir), +// Finish(StreamId), +// ReceivedStopSending(StreamId, VarInt), +// ReceivedReset(ResetStream), +// Reset(StreamId), +// } -fuzz_target!(|input: (StreamParams, Vec)| { - let (params, operations) = input; - let (mut pending, conn_state) = (Retransmits::default(), ConnectionState::Established); - let mut state = StreamsState::new( - params.side, - params.max_remote_uni.into(), - params.max_remote_bi.into(), - params.send_window.into(), - params.receive_window.into(), - params.stream_receive_window.into(), - ); +// fuzz_target!(|input: (StreamParams, Vec)| { +// let (params, operations) = input; +// let (mut pending, conn_state) = (Retransmits::default(), ConnectionState::Established); +// let mut state = StreamsState::new( +// params.side, +// params.max_remote_uni.into(), +// params.max_remote_bi.into(), +// params.send_window.into(), +// params.receive_window.into(), +// params.stream_receive_window.into(), +// ); - for operation in operations { - match operation { - Operation::Open => { - Streams::new(&mut state, &conn_state).open(params.dir); - } - Operation::Accept(dir) => { - Streams::new(&mut state, &conn_state).accept(dir); - } - Operation::Finish(id) => { - let _ = SendStream::new(id, &mut state, &mut pending, &conn_state).finish(); - } - Operation::ReceivedStopSending(sid, err_code) => { - Streams::new(&mut state, &conn_state) - .state() - .received_stop_sending(sid, err_code); - } - Operation::ReceivedReset(rs) => { - let _ = Streams::new(&mut state, &conn_state) - .state() - .received_reset(rs); - } - Operation::Reset(id) => { - let _ = - SendStream::new(id, &mut state, &mut pending, &conn_state).reset(0u32.into()); - } - } - } -}); +// for operation in operations { +// match operation { +// Operation::Open => { +// Streams::new(&mut state, &conn_state).open(params.dir); +// } +// Operation::Accept(dir) => { +// Streams::new(&mut state, &conn_state).accept(dir); +// } +// Operation::Finish(id) => { +// let _ = SendStream::new(id, &mut state, &mut pending, &conn_state).finish(); +// } +// Operation::ReceivedStopSending(sid, err_code) => { +// Streams::new(&mut state, &conn_state) +// .state() +// .received_stop_sending(sid, err_code); +// } +// Operation::ReceivedReset(rs) => { +// let _ = Streams::new(&mut state, &conn_state) +// .state() +// .received_reset(rs); +// } +// Operation::Reset(id) => { +// let _ = +// SendStream::new(id, &mut state, &mut pending, &conn_state).reset(0u32.into()); +// } +// } +// } +// }); diff --git a/quinn-udp/Cargo.toml b/quinn-udp/Cargo.toml index 44e1de3e3..257ee0015 100644 --- a/quinn-udp/Cargo.toml +++ b/quinn-udp/Cargo.toml @@ -29,9 +29,6 @@ tracing = { workspace = true, optional = true } once_cell = { workspace = true } windows-sys = { workspace = true } -[target.'cfg(any(target_os = "macos", target_os = "ios"))'.build-dependencies] -bindgen = "0.70.1" - [dev-dependencies] criterion = "0.5" diff --git a/quinn-udp/build.rs b/quinn-udp/build.rs deleted file mode 100644 index 95dca7903..000000000 --- a/quinn-udp/build.rs +++ /dev/null @@ -1,24 +0,0 @@ -#[cfg(any(target_os = "macos", target_os = "ios"))] -fn main() { - use std::env; - use std::path::PathBuf; - - // Generate the bindings for Apple's private `recvmsg_x` from - // https://github.com/apple-oss-distributions/xnu/blob/main/bsd/sys/socket.h. - let bindings = bindgen::Builder::default() - .clang_arg("-DPRIVATE=1") - .allowlist_function("recvmsg_x") // TODO: sendmsg_x - .no_copy("iovec") // msghdr_x - .header("src/bindings/socket.h") - .generate() - .expect("Unable to generate bindings"); - - // Write the bindings to the $OUT_DIR/bindings.rs file. - let out_path = PathBuf::from(env::var("OUT_DIR").unwrap()); - bindings - .write_to_file(out_path.join("bindings.rs")) - .expect("Couldn't write bindings!"); -} - -#[cfg(not(any(target_os = "macos", target_os = "ios")))] -fn main() {} diff --git a/quinn-udp/src/bindings/socket.h b/quinn-udp/src/bindings/socket.h deleted file mode 100644 index 6d4f416f1..000000000 --- a/quinn-udp/src/bindings/socket.h +++ /dev/null @@ -1,1541 +0,0 @@ -/* - * Copyright (c) 2000-2022 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* Copyright (c) 1998, 1999 Apple Computer, Inc. All Rights Reserved */ -/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ -/* - * Copyright (c) 1982, 1985, 1986, 1988, 1993, 1994 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)socket.h 8.4 (Berkeley) 2/21/94 - * $FreeBSD: src/sys/sys/socket.h,v 1.39.2.7 2001/07/03 11:02:01 ume Exp $ - */ -/* - * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce - * support for mandatory and extensible security protections. This notice - * is included in support of clause 2.2 (b) of the Apple Public License, - * Version 2.0. - */ - -#ifndef _SYS_SOCKET_H_ -#define _SYS_SOCKET_H_ - -#include -#include -#include -#include -#include - -#ifdef PRIVATE -#include -#include -#endif /* PRIVATE */ - -#ifdef XNU_KERNEL_PRIVATE -#include -#include -#endif /* XNU_KERNEL_PRIVATE */ - -#ifndef KERNEL -#include -#endif - -/* - * Definitions related to sockets: types, address families, options. - */ - -/* - * Data types. - */ - -#include -#include -#include -#include -#include - -/* XXX Not explicitly defined by POSIX, but function return types are */ -#include - -/* XXX Not explicitly defined by POSIX, but function return types are */ -#include - -/* - * [XSI] The iovec structure shall be defined as described in . - */ -#include - -/* - * Types - */ -#define SOCK_STREAM 1 /* stream socket */ -#define SOCK_DGRAM 2 /* datagram socket */ -#define SOCK_RAW 3 /* raw-protocol interface */ -#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) -#define SOCK_RDM 4 /* reliably-delivered message */ -#endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ -#define SOCK_SEQPACKET 5 /* sequenced packet stream */ - -/* - * Option flags per-socket. - */ -#define SO_DEBUG 0x0001 /* turn on debugging info recording */ -#define SO_ACCEPTCONN 0x0002 /* socket has had listen() */ -#define SO_REUSEADDR 0x0004 /* allow local address reuse */ -#define SO_KEEPALIVE 0x0008 /* keep connections alive */ -#define SO_DONTROUTE 0x0010 /* just use interface addresses */ -#define SO_BROADCAST 0x0020 /* permit sending of broadcast msgs */ -#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) -#define SO_USELOOPBACK 0x0040 /* bypass hardware when possible */ -#define SO_LINGER 0x0080 /* linger on close if data present (in ticks) */ -#define SO_LINGER_SEC 0x1080 /* linger on close if data present (in seconds) */ -#else -#define SO_LINGER 0x1080 /* linger on close if data present (in seconds) */ -#endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ -#define SO_OOBINLINE 0x0100 /* leave received OOB data in line */ -#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) -#define SO_REUSEPORT 0x0200 /* allow local address & port reuse */ -#define SO_TIMESTAMP 0x0400 /* timestamp received dgram traffic */ -#define SO_TIMESTAMP_MONOTONIC 0x0800 /* Monotonically increasing timestamp on rcvd dgram */ -#ifndef __APPLE__ -#define SO_ACCEPTFILTER 0x1000 /* there is an accept filter */ -#else -#define SO_DONTTRUNC 0x2000 /* APPLE: Retain unread data */ - /* (ATOMIC proto) */ -#define SO_WANTMORE 0x4000 /* APPLE: Give hint when more data ready */ -#define SO_WANTOOBFLAG 0x8000 /* APPLE: Want OOB in MSG_FLAG on receive */ - -#ifdef PRIVATE -#define SO_NOWAKEFROMSLEEP 0x10000 /* Don't wake for traffic to this socket */ -#define SO_NOAPNFALLBK 0x20000 /* Don't attempt APN fallback for the socket */ -#define SO_TIMESTAMP_CONTINUOUS 0x40000 /* Continuous monotonic timestamp on rcvd dgram */ -#endif - -#endif /* (!__APPLE__) */ -#endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ - -/* - * Additional options, not kept in so_options. - */ -#define SO_SNDBUF 0x1001 /* send buffer size */ -#define SO_RCVBUF 0x1002 /* receive buffer size */ -#define SO_SNDLOWAT 0x1003 /* send low-water mark */ -#define SO_RCVLOWAT 0x1004 /* receive low-water mark */ -#define SO_SNDTIMEO 0x1005 /* send timeout */ -#define SO_RCVTIMEO 0x1006 /* receive timeout */ -#define SO_ERROR 0x1007 /* get error status and clear */ -#define SO_TYPE 0x1008 /* get socket type */ -#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) -#define SO_LABEL 0x1010 /* deprecated */ -#define SO_PEERLABEL 0x1011 /* deprecated */ -#ifdef __APPLE__ -#define SO_NREAD 0x1020 /* APPLE: get 1st-packet byte count */ -#define SO_NKE 0x1021 /* APPLE: Install socket-level NKE */ -#define SO_NOSIGPIPE 0x1022 /* APPLE: No SIGPIPE on EPIPE */ -#define SO_NOADDRERR 0x1023 /* APPLE: Returns EADDRNOTAVAIL when src is not available anymore */ -#define SO_NWRITE 0x1024 /* APPLE: Get number of bytes currently in send socket buffer */ -#define SO_REUSESHAREUID 0x1025 /* APPLE: Allow reuse of port/socket by different userids */ -#ifdef __APPLE_API_PRIVATE -#define SO_NOTIFYCONFLICT 0x1026 /* APPLE: send notification if there is a bind on a port which is already in use */ -#define SO_UPCALLCLOSEWAIT 0x1027 /* APPLE: block on close until an upcall returns */ -#endif -#ifdef PRIVATE -#define SO_RESTRICTIONS 0x1081 /* APPLE: deny flag set */ -#define SO_RESTRICT_DENY_IN 0x1 /* deny inbound (trapdoor) */ -#define SO_RESTRICT_DENY_OUT 0x2 /* deny outbound (trapdoor) */ -#define SO_RESTRICT_DENY_CELLULAR 0x4 /* deny use of cellular (trapdoor) */ -#define SO_RESTRICT_DENY_EXPENSIVE 0x8 /* deny use of expensive if (trapdoor) */ -#define SO_RESTRICT_DENY_CONSTRAINED 0x10 /* deny use of expensive if (trapdoor) */ -#endif /* PRIVATE */ -#define SO_RANDOMPORT 0x1082 /* APPLE: request local port randomization */ -#define SO_NP_EXTENSIONS 0x1083 /* To turn off some POSIX behavior */ -#endif - -#ifdef PRIVATE -#define SO_EXECPATH 0x1085 /* Application Firewall Socket option */ - -/* - * Traffic service class definitions (lowest to highest): - * - * SO_TC_BK_SYS - * "Background System-Initiated", high delay tolerant, high loss - * tolerant, elastic flow, variable size & long-lived. E.g: system- - * initiated iCloud synching or Time Capsule backup, for which there - * is no progress feedbacks. - * - * SO_TC_BK - * "Background", user-initiated, high delay tolerant, high loss tolerant, - * elastic flow, variable size. E.g. user-initiated iCloud synching or - * Time Capsule backup; or traffics of background applications, for which - * there is some progress feedbacks. - * - * SO_TC_BE - * "Best Effort", unclassified/standard. This is the default service - * class; pretty much a mix of everything. - * - * SO_TC_RD - * "Responsive Data", a notch higher than "Best Effort", medium delay - * tolerant, elastic & inelastic flow, bursty, long-lived. E.g. email, - * instant messaging, for which there is a sense of interactivity and - * urgency (user waiting for output). - * - * SO_TC_OAM - * "Operations, Administration, and Management", medium delay tolerant, - * low-medium loss tolerant, elastic & inelastic flows, variable size. - * E.g. VPN tunnels. - * - * SO_TC_AV - * "Multimedia Audio/Video Streaming", medium delay tolerant, low-medium - * loss tolerant, elastic flow, constant packet interval, variable rate & - * size. E.g. AirPlay playback (both video and audio). - * - * SO_TC_RV - * "Responsive Multimedia Audio/Video", low delay tolerant, low-medium - * loss tolerant, elastic flow, variable packet interval, rate and size. - * E.g. AirPlay mirroring, screen sharing. - * - * SO_TC_VI - * "Interactive Video", low delay tolerant, low-medium loss tolerant, - * elastic flow, constant packet interval, variable rate & size. E.g. - * FaceTime video. - * - * SO_TC_VO - * "Interactive Voice", low delay tolerant, low loss tolerant, inelastic - * flow, constant packet rate, somewhat fixed size. E.g. VoIP including - * FaceTime audio. - * - * SO_TC_CTL - * "Network Control", low delay tolerant, low loss tolerant, inelastic - * flow, rate is bursty but short, variable size. E.g. DNS queries; - * certain types of locally-originated ICMP, ICMPv6; IGMP/MLD join/leave, - * ARP. - */ -#define SO_TRAFFIC_CLASS 0x1086 /* Traffic service class (int) */ -#define SO_TC_BK_SYS 100 /* lowest class */ -#define SO_TC_BK 200 -#define SO_TC_BE 0 -#define SO_TC_RD 300 -#define SO_TC_OAM 400 -#define SO_TC_AV 500 -#define SO_TC_RV 600 -#define SO_TC_VI 700 -#define SO_TC_VO 800 -#define SO_TC_CTL 900 /* highest class */ -#define SO_TC_MAX 10 /* Total # of traffic classes */ -#ifdef XNU_KERNEL_PRIVATE -#define _SO_TC_BK 1 /* deprecated */ -#define _SO_TC_VI 2 /* deprecated */ -#define _SO_TC_VO 3 /* deprecated */ -#define _SO_TC_MAX 4 /* deprecated */ - -#define SO_VALID_TC(c) \ - (c == SO_TC_BK_SYS || c == SO_TC_BK || c == SO_TC_BE || \ - c == SO_TC_RD || c == SO_TC_OAM || c == SO_TC_AV || \ - c == SO_TC_RV || c == SO_TC_VI || c == SO_TC_VO || \ - c == SO_TC_CTL || c == SO_TC_NETSVC_SIG) - -#define SO_TC_UNSPEC ((int)-1) /* Traffic class not specified */ - -#define SO_TC_SIG SO_TC_VI /* to be removed XXX */ - -#define SOTCIX_BK_SYS 0 -#define SOTCIX_BK 1 -#define SOTCIX_BE 2 -#define SOTCIX_RD 3 -#define SOTCIX_OAM 4 -#define SOTCIX_AV 5 -#define SOTCIX_RV 6 -#define SOTCIX_VI 7 -#define SOTCIX_VO 8 -#define SOTCIX_CTL 9 -#endif /* XNU_KERNEL_PRIVATE */ - -/* Background socket configuration flags */ -#define TRAFFIC_MGT_SO_BACKGROUND 0x0001 /* background socket */ -#define TRAFFIC_MGT_TCP_RECVBG 0x0002 /* Only TCP sockets, receiver throttling */ - -#define SO_RECV_TRAFFIC_CLASS 0x1087 /* Receive traffic class (bool) */ -#define SO_TRAFFIC_CLASS_DBG 0x1088 /* Debug traffic class (struct so_tcdbg) */ -#define SO_OPTION_UNUSED_0 0x1089 /* Traffic class statistics */ -#define SO_PRIVILEGED_TRAFFIC_CLASS 0x1090 /* Privileged traffic class (bool) */ -#define SO_DEFUNCTIT 0x1091 /* Defunct a socket (only in internal builds) */ -#define SO_DEFUNCTOK 0x1100 /* can be defunct'd */ -#define SO_ISDEFUNCT 0x1101 /* get defunct status */ - -#define SO_OPPORTUNISTIC 0x1102 /* deprecated; use SO_TRAFFIC_CLASS */ - -/* - * SO_FLUSH flushes any unsent data generated by a given socket. It takes - * an integer parameter, which can be any of the SO_TC traffic class values, - * or the special SO_TC_ALL value. - */ -#define SO_FLUSH 0x1103 /* flush unsent data (int) */ -#define SO_TC_ALL (-1) - -#define SO_RECV_ANYIF 0x1104 /* unrestricted inbound processing */ -#define SO_TRAFFIC_MGT_BACKGROUND 0x1105 /* Background traffic management */ - -#define SO_FLOW_DIVERT_TOKEN 0x1106 /* flow divert token */ - -#define SO_DELEGATED 0x1107 /* set socket as delegate (pid_t) */ -#define SO_DELEGATED_UUID 0x1108 /* set socket as delegate (uuid_t) */ -#define SO_NECP_ATTRIBUTES 0x1109 /* NECP socket attributes (domain, account, etc.) */ -#define SO_CFIL_SOCK_ID 0x1110 /* get content filter socket ID (cfil_sock_id_t) */ -#define SO_NECP_CLIENTUUID 0x1111 /* NECP Client uuid */ -#endif /* PRIVATE */ -#define SO_NUMRCVPKT 0x1112 /* number of datagrams in receive socket buffer */ -#ifdef PRIVATE -#define SO_AWDL_UNRESTRICTED 0x1113 /* try to use AWDL in restricted mode */ -#define SO_EXTENDED_BK_IDLE 0x1114 /* extended time to keep socket idle after app is suspended (int) */ -#define SO_MARK_CELLFALLBACK 0x1115 /* Mark as initiated by cell fallback */ -#endif /* PRIVATE */ -#define SO_NET_SERVICE_TYPE 0x1116 /* Network service type */ - -#ifdef PRIVATE -#define SO_QOSMARKING_POLICY_OVERRIDE 0x1117 /* int */ -#define SO_INTCOPROC_ALLOW 0x1118 /* Try to use internal co-processor interfaces. */ -#endif /* PRIVATE */ - -#define SO_NETSVC_MARKING_LEVEL 0x1119 /* Get QoS marking in effect for socket */ - -#ifdef PRIVATE -#define SO_NECP_LISTENUUID 0x1120 /* NECP client UUID for listener */ -#define SO_MPKL_SEND_INFO 0x1122 /* (struct so_mpkl_send_info) */ -#define SO_STATISTICS_EVENT 0x1123 /* int64 argument, an event in statistics collection */ -#define SO_WANT_KEV_SOCKET_CLOSED 0x1124 /* want delivery of KEV_SOCKET_CLOSED (int) */ -#define SO_MARK_KNOWN_TRACKER 0x1125 /* Mark as a connection to a known tracker */ -#define SO_MARK_KNOWN_TRACKER_NON_APP_INITIATED 0x1126 /* Mark tracker connection to be non-app initiated */ -#define SO_MARK_WAKE_PKT 0x1127 /* Mark next packet as a wake packet, one shot (int) */ -#define SO_RECV_WAKE_PKT 0x1128 /* Receive wake packet indication as ancillary data (int) */ -#define SO_MARK_APPROVED_APP_DOMAIN 0x1129 /* Mark connection as being for an approved associated app domain */ -#define SO_FALLBACK_MODE 0x1130 /* Indicates the mode of fallback used */ -#endif /* PRIVATE */ - -#define SO_RESOLVER_SIGNATURE 0x1131 /* A signed data blob from the system resolver */ -#ifdef PRIVATE -#define SO_MARK_CELLFALLBACK_UUID 0x1132 /* Mark as initiated by cell fallback using UUID of the connection */ -#define SO_APPLICATION_ID 0x1133 /* ID of attributing app - so_application_id_t */ - -struct so_mark_cellfallback_uuid_args { - uuid_t flow_uuid; - int flow_cellfallback; -}; - -typedef struct { - uid_t uid; - uuid_t effective_uuid; - uid_t persona_id; -} so_application_id_t; - -#endif - - -/* When adding new socket-options, you need to make sure MPTCP supports these as well! */ - -/* - * Network Service Type for option SO_NET_SERVICE_TYPE - * - * The vast majority of sockets should use Best Effort that is the default - * Network Service Type. Other Network Service Types have to be used only if - * the traffic actually matches the description of the Network Service Type. - * - * Network Service Types do not represent priorities but rather describe - * different categories of delay, jitter and loss parameters. - * Those parameters may influence protocols from layer 4 protocols like TCP - * to layer 2 protocols like Wi-Fi. The Network Service Type can determine - * how the traffic is queued and scheduled by the host networking stack and - * by other entities on the network like switches and routers. For example - * for Wi-Fi, the Network Service Type can select the marking of the - * layer 2 packet with the appropriate WMM Access Category. - * - * There is no point in attempting to game the system and use - * a Network Service Type that does not correspond to the actual - * traffic characteristic but one that seems to have a higher precedence. - * The reason is that for service classes that have lower tolerance - * for delay and jitter, the queues size is lower than for service - * classes that are more tolerant to delay and jitter. - * - * For example using a voice service type for bulk data transfer will lead - * to disastrous results as soon as congestion happens because the voice - * queue overflows and packets get dropped. This is not only bad for the bulk - * data transfer but it is also bad for VoIP apps that legitimately are using - * the voice service type. - * - * The characteristics of the Network Service Types are based on the service - * classes defined in RFC 4594 "Configuration Guidelines for DiffServ Service - * Classes" - * - * When system detects the outgoing interface belongs to a DiffServ domain - * that follows the recommendation of the IETF draft "Guidelines for DiffServ to - * IEEE 802.11 Mapping", the packet will marked at layer 3 with a DSCP value - * that corresponds to Network Service Type. - * - * NET_SERVICE_TYPE_BE - * "Best Effort", unclassified/standard. This is the default service - * class and cover the majority of the traffic. - * - * NET_SERVICE_TYPE_BK - * "Background", high delay tolerant, loss tolerant. elastic flow, - * variable size & long-lived. E.g: non-interactive network bulk transfer - * like synching or backup. - * - * NET_SERVICE_TYPE_RD - * "Responsive Data", a notch higher than "Best Effort", medium delay - * tolerant, elastic & inelastic flow, bursty, long-lived. E.g. email, - * instant messaging, for which there is a sense of interactivity and - * urgency (user waiting for output). - * - * NET_SERVICE_TYPE_OAM - * "Operations, Administration, and Management", medium delay tolerant, - * low-medium loss tolerant, elastic & inelastic flows, variable size. - * E.g. VPN tunnels. - * - * NET_SERVICE_TYPE_AV - * "Multimedia Audio/Video Streaming", medium delay tolerant, low-medium - * loss tolerant, elastic flow, constant packet interval, variable rate - * and size. E.g. video and audio playback with buffering. - * - * NET_SERVICE_TYPE_RV - * "Responsive Multimedia Audio/Video", low delay tolerant, low-medium - * loss tolerant, elastic flow, variable packet interval, rate and size. - * E.g. screen sharing. - * - * NET_SERVICE_TYPE_VI - * "Interactive Video", low delay tolerant, low-medium loss tolerant, - * elastic flow, constant packet interval, variable rate & size. E.g. - * video telephony. - * - * NET_SERVICE_TYPE_SIG - * "Signaling", low delay tolerant, low loss tolerant, inelastic flow, - * jitter tolerant, rate is bursty but short, variable size. E.g. SIP. - * - * NET_SERVICE_TYPE_VO - * "Interactive Voice", very low delay tolerant, very low loss tolerant, - * inelastic flow, constant packet rate, somewhat fixed size. - * E.g. VoIP. - */ - -#define NET_SERVICE_TYPE_BE 0 /* Best effort */ -#define NET_SERVICE_TYPE_BK 1 /* Background system initiated */ -#define NET_SERVICE_TYPE_SIG 2 /* Signaling */ -#define NET_SERVICE_TYPE_VI 3 /* Interactive Video */ -#define NET_SERVICE_TYPE_VO 4 /* Interactive Voice */ -#define NET_SERVICE_TYPE_RV 5 /* Responsive Multimedia Audio/Video */ -#define NET_SERVICE_TYPE_AV 6 /* Multimedia Audio/Video Streaming */ -#define NET_SERVICE_TYPE_OAM 7 /* Operations, Administration, and Management */ -#define NET_SERVICE_TYPE_RD 8 /* Responsive Data */ - -#if PRIVATE -#define _NET_SERVICE_TYPE_COUNT 9 -#define _NET_SERVICE_TYPE_UNSPEC ((int)-1) - -#define IS_VALID_NET_SERVICE_TYPE(c) \ - (c >= NET_SERVICE_TYPE_BE && c <= NET_SERVICE_TYPE_RD) - -extern const int sotc_by_netservicetype[_NET_SERVICE_TYPE_COUNT]; - -/* - * Facility to pass Network Service Type values using SO_TRAFFIC_CLASS - * Mostly useful to simplify implementation of frameworks to adopt the new - * Network Service Type values for Signaling. - */ -#define SO_TC_NET_SERVICE_OFFSET 10000 -#define SO_TC_NETSVC_SIG (SO_TC_NET_SERVICE_OFFSET + NET_SERVICE_TYPE_SIG) -#endif /* PRIVATE */ - -/* These are supported values for SO_NETSVC_MARKING_LEVEL */ -#define NETSVC_MRKNG_UNKNOWN 0 /* The outgoing network interface is not known */ -#define NETSVC_MRKNG_LVL_L2 1 /* Default marking at layer 2 (for example Wi-Fi WMM) */ -#define NETSVC_MRKNG_LVL_L3L2_ALL 2 /* Layer 3 DSCP marking and layer 2 marking for all Network Service Types */ -#define NETSVC_MRKNG_LVL_L3L2_BK 3 /* The system policy limits layer 3 DSCP marking and layer 2 marking - * to background Network Service Types */ - - -typedef __uint32_t sae_associd_t; -#define SAE_ASSOCID_ANY 0 -#define SAE_ASSOCID_ALL ((sae_associd_t)(-1ULL)) - -typedef __uint32_t sae_connid_t; -#define SAE_CONNID_ANY 0 -#define SAE_CONNID_ALL ((sae_connid_t)(-1ULL)) - -/* connectx() flag parameters */ -#define CONNECT_RESUME_ON_READ_WRITE 0x1 /* resume connect() on read/write */ -#define CONNECT_DATA_IDEMPOTENT 0x2 /* data is idempotent */ -#define CONNECT_DATA_AUTHENTICATED 0x4 /* data includes security that replaces the TFO-cookie */ - -/* sockaddr endpoints */ -typedef struct sa_endpoints { - unsigned int sae_srcif; /* optional source interface */ - const struct sockaddr *sae_srcaddr; /* optional source address */ - socklen_t sae_srcaddrlen; /* size of source address */ - const struct sockaddr *sae_dstaddr; /* destination address */ - socklen_t sae_dstaddrlen; /* size of destination address */ -} sa_endpoints_t; -#endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ - -/* - * Structure used for manipulating linger option. - */ -struct linger { - int l_onoff; /* option on/off */ - int l_linger; /* linger time */ -}; - -#ifndef __APPLE__ -struct accept_filter_arg { - char af_name[16]; - char af_arg[256 - 16]; -}; -#endif - -#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) -#ifdef __APPLE__ - -/* - * Structure to control non-portable Sockets extension to POSIX - */ -struct so_np_extensions { - u_int32_t npx_flags; - u_int32_t npx_mask; -}; - -#define SONPX_SETOPTSHUT 0x000000001 /* flag for allowing setsockopt after shutdown */ - - -#ifdef KERNEL_PRIVATE -#define SONPX_MASK_VALID (SONPX_SETOPTSHUT) -#define IS_SO_TC_BACKGROUND(_tc_) ((_tc_) == SO_TC_BK || (_tc_) == SO_TC_BK_SYS) -#define IS_SO_TC_BACKGROUNDSYSTEM(_tc_) ((_tc_) == SO_TC_BK_SYS) -#endif /* KERNEL_PRIVATE */ - -#endif -#endif - -/* - * Level number for (get/set)sockopt() to apply to socket itself. - */ -#define SOL_SOCKET 0xffff /* options for socket level */ - - -/* - * Address families. - */ -#define AF_UNSPEC 0 /* unspecified */ -#define AF_UNIX 1 /* local to host (pipes) */ -#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) -#define AF_LOCAL AF_UNIX /* backward compatibility */ -#endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ -#define AF_INET 2 /* internetwork: UDP, TCP, etc. */ -#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) -#define AF_IMPLINK 3 /* arpanet imp addresses */ -#define AF_PUP 4 /* pup protocols: e.g. BSP */ -#define AF_CHAOS 5 /* mit CHAOS protocols */ -#define AF_NS 6 /* XEROX NS protocols */ -#define AF_ISO 7 /* ISO protocols */ -#define AF_OSI AF_ISO -#define AF_ECMA 8 /* European computer manufacturers */ -#define AF_DATAKIT 9 /* datakit protocols */ -#define AF_CCITT 10 /* CCITT protocols, X.25 etc */ -#define AF_SNA 11 /* IBM SNA */ -#define AF_DECnet 12 /* DECnet */ -#define AF_DLI 13 /* DEC Direct data link interface */ -#define AF_LAT 14 /* LAT */ -#define AF_HYLINK 15 /* NSC Hyperchannel */ -#define AF_APPLETALK 16 /* Apple Talk */ -#define AF_ROUTE 17 /* Internal Routing Protocol */ -#define AF_LINK 18 /* Link layer interface */ -#define pseudo_AF_XTP 19 /* eXpress Transfer Protocol (no AF) */ -#define AF_COIP 20 /* connection-oriented IP, aka ST II */ -#define AF_CNT 21 /* Computer Network Technology */ -#define pseudo_AF_RTIP 22 /* Help Identify RTIP packets */ -#define AF_IPX 23 /* Novell Internet Protocol */ -#define AF_SIP 24 /* Simple Internet Protocol */ -#define pseudo_AF_PIP 25 /* Help Identify PIP packets */ -#define AF_NDRV 27 /* Network Driver 'raw' access */ -#define AF_ISDN 28 /* Integrated Services Digital Network */ -#define AF_E164 AF_ISDN /* CCITT E.164 recommendation */ -#define pseudo_AF_KEY 29 /* Internal key-management function */ -#endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ -#define AF_INET6 30 /* IPv6 */ -#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) -#define AF_NATM 31 /* native ATM access */ -#define AF_SYSTEM 32 /* Kernel event messages */ -#define AF_NETBIOS 33 /* NetBIOS */ -#define AF_PPP 34 /* PPP communication protocol */ -#define pseudo_AF_HDRCMPLT 35 /* Used by BPF to not rewrite headers - * in interface output routine */ -#ifdef PRIVATE -#define AF_AFP 36 /* Used by AFP */ -#else -#define AF_RESERVED_36 36 /* Reserved for internal usage */ -#endif -#define AF_IEEE80211 37 /* IEEE 802.11 protocol */ -#define AF_UTUN 38 -#ifdef PRIVATE -#define AF_MULTIPATH 39 -#endif /* PRIVATE */ -#define AF_VSOCK 40 /* VM Sockets */ -#define AF_MAX 41 -#endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ - -/* - * [XSI] Structure used by kernel to store most addresses. - */ -struct sockaddr { - __uint8_t sa_len; /* total length */ - sa_family_t sa_family; /* [XSI] address family */ - char sa_data[14]; /* [XSI] addr value */ -}; -__CCT_DECLARE_CONSTRAINED_PTR_TYPES(struct sockaddr, sockaddr); - -/* - * Least amount of information that a sockaddr requires. - * Sockaddr_header is a compatible prefix structure of - * all sockaddr objects. - */ -struct __sockaddr_header { - __uint8_t sa_len; - sa_family_t sa_family; -}; - -#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) -#define SOCK_MAXADDRLEN 255 /* longest possible addresses */ - -/* - * Structure used by kernel to pass protocol - * information in raw sockets. - */ -struct sockproto { - __uint16_t sp_family; /* address family */ - __uint16_t sp_protocol; /* protocol */ -}; -#endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ - -/* - * RFC 2553: protocol-independent placeholder for socket addresses - */ -#define _SS_MAXSIZE 128 -#define _SS_ALIGNSIZE (sizeof(__int64_t)) -#define _SS_PAD1SIZE \ - (_SS_ALIGNSIZE - sizeof(__uint8_t) - sizeof(sa_family_t)) -#define _SS_PAD2SIZE \ - (_SS_MAXSIZE - sizeof(__uint8_t) - sizeof(sa_family_t) - \ - _SS_PAD1SIZE - _SS_ALIGNSIZE) - -/* - * [XSI] sockaddr_storage - */ -struct sockaddr_storage { - __uint8_t ss_len; /* address length */ - sa_family_t ss_family; /* [XSI] address family */ - char __ss_pad1[_SS_PAD1SIZE]; - __int64_t __ss_align; /* force structure storage alignment */ - char __ss_pad2[_SS_PAD2SIZE]; -}; -__CCT_DECLARE_CONSTRAINED_PTR_TYPES(struct sockaddr_storage, sockaddr_storage); - -/* - * Protocol families, same as address families for now. - */ -#define PF_UNSPEC AF_UNSPEC -#define PF_LOCAL AF_LOCAL -#define PF_UNIX PF_LOCAL /* backward compatibility */ -#define PF_INET AF_INET -#define PF_IMPLINK AF_IMPLINK -#define PF_PUP AF_PUP -#define PF_CHAOS AF_CHAOS -#define PF_NS AF_NS -#define PF_ISO AF_ISO -#define PF_OSI AF_ISO -#define PF_ECMA AF_ECMA -#define PF_DATAKIT AF_DATAKIT -#define PF_CCITT AF_CCITT -#define PF_SNA AF_SNA -#define PF_DECnet AF_DECnet -#define PF_DLI AF_DLI -#define PF_LAT AF_LAT -#define PF_HYLINK AF_HYLINK -#define PF_APPLETALK AF_APPLETALK -#define PF_ROUTE AF_ROUTE -#define PF_LINK AF_LINK -#define PF_XTP pseudo_AF_XTP /* really just proto family, no AF */ -#define PF_COIP AF_COIP -#define PF_CNT AF_CNT -#define PF_SIP AF_SIP -#define PF_IPX AF_IPX /* same format as AF_NS */ -#define PF_RTIP pseudo_AF_RTIP /* same format as AF_INET */ -#define PF_PIP pseudo_AF_PIP -#define PF_NDRV AF_NDRV -#define PF_ISDN AF_ISDN -#define PF_KEY pseudo_AF_KEY -#define PF_INET6 AF_INET6 -#define PF_NATM AF_NATM -#define PF_SYSTEM AF_SYSTEM -#define PF_NETBIOS AF_NETBIOS -#define PF_PPP AF_PPP -#ifdef PRIVATE -#define PF_AFP AF_AFP -#else -#define PF_RESERVED_36 AF_RESERVED_36 -#endif -#define PF_UTUN AF_UTUN -#ifdef PRIVATE -#define PF_MULTIPATH AF_MULTIPATH -#endif /* PRIVATE */ -#define PF_VSOCK AF_VSOCK -#define PF_MAX AF_MAX - -/* - * These do not have socket-layer support: - */ -#define PF_VLAN ((uint32_t)0x766c616e) /* 'vlan' */ -#define PF_BOND ((uint32_t)0x626f6e64) /* 'bond' */ -#ifdef KERNEL_PRIVATE -#define PF_BRIDGE ((uint32_t)0x62726467) /* 'brdg' */ -#define PF_NULL ((uint32_t)0x6e756c6c) /* 'null' */ -#endif /* KERNEL_PRIVATE */ - -/* - * Definitions for network related sysctl, CTL_NET. - * - * Second level is protocol family. - * Third level is protocol number. - * - * Further levels are defined by the individual families below. - */ -#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) -#define NET_MAXID AF_MAX -#endif /* (_POSIX_C_SOURCE && !_DARWIN_C_SOURCE) */ - -#ifdef KERNEL_PRIVATE -#define CTL_NET_NAMES { \ - { 0, 0 }, \ - { "local", CTLTYPE_NODE }, \ - { "inet", CTLTYPE_NODE }, \ - { "implink", CTLTYPE_NODE }, \ - { "pup", CTLTYPE_NODE }, \ - { "chaos", CTLTYPE_NODE }, \ - { "xerox_ns", CTLTYPE_NODE }, \ - { "iso", CTLTYPE_NODE }, \ - { "emca", CTLTYPE_NODE }, \ - { "datakit", CTLTYPE_NODE }, \ - { "ccitt", CTLTYPE_NODE }, \ - { "ibm_sna", CTLTYPE_NODE }, \ - { "decnet", CTLTYPE_NODE }, \ - { "dec_dli", CTLTYPE_NODE }, \ - { "lat", CTLTYPE_NODE }, \ - { "hylink", CTLTYPE_NODE }, \ - { "appletalk", CTLTYPE_NODE }, \ - { "route", CTLTYPE_NODE }, \ - { "link_layer", CTLTYPE_NODE }, \ - { "xtp", CTLTYPE_NODE }, \ - { "coip", CTLTYPE_NODE }, \ - { "cnt", CTLTYPE_NODE }, \ - { "rtip", CTLTYPE_NODE }, \ - { "ipx", CTLTYPE_NODE }, \ - { "sip", CTLTYPE_NODE }, \ - { "pip", CTLTYPE_NODE }, \ - { 0, 0 }, \ - { "ndrv", CTLTYPE_NODE }, \ - { "isdn", CTLTYPE_NODE }, \ - { "key", CTLTYPE_NODE }, \ - { "inet6", CTLTYPE_NODE }, \ - { "natm", CTLTYPE_NODE }, \ - { "sys", CTLTYPE_NODE }, \ - { "netbios", CTLTYPE_NODE }, \ - { "ppp", CTLTYPE_NODE }, \ - { "hdrcomplete", CTLTYPE_NODE }, \ - { "vsock", CTLTYPE_NODE }, \ -} -#endif /* KERNEL_PRIVATE */ - -#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) -/* - * PF_ROUTE - Routing table - * - * Three additional levels are defined: - * Fourth: address family, 0 is wildcard - * Fifth: type of info, defined below - * Sixth: flag(s) to mask with for NET_RT_FLAGS - */ -#define NET_RT_DUMP 1 /* dump; may limit to a.f. */ -#define NET_RT_FLAGS 2 /* by flags, e.g. RESOLVING */ -#define NET_RT_IFLIST 3 /* survey interface list */ -#define NET_RT_STAT 4 /* routing statistics */ -#define NET_RT_TRASH 5 /* routes not in table but not freed */ -#define NET_RT_IFLIST2 6 /* interface list with addresses */ -#define NET_RT_DUMP2 7 /* dump; may limit to a.f. */ -#ifdef PRIVATE -#define NET_RT_DUMPX 8 /* private */ -#define NET_RT_DUMPX_FLAGS 9 /* private */ -#endif /* PRIVATE */ -/* - * Allows read access non-local host's MAC address - * if the process has neighbor cache entitlement. - */ -#define NET_RT_FLAGS_PRIV 10 -#define NET_RT_MAXID 11 -#endif /* (_POSIX_C_SOURCE && !_DARWIN_C_SOURCE) */ - -#ifdef PRIVATE -/* These are supported values for SO_STATISTICS_EVENT */ -#define SO_STATISTICS_EVENT_ENTER_CELLFALLBACK (1 << 0) -#define SO_STATISTICS_EVENT_EXIT_CELLFALLBACK (1 << 1) -#define SO_STATISTICS_EVENT_ATTRIBUTION_CHANGE (1 << 2) -#define SO_STATISTICS_EVENT_RESERVED_2 (1 << 3) -#endif /* PRIVATE */ - - -#ifdef KERNEL_PRIVATE -#define CTL_NET_RT_NAMES { \ - { 0, 0 }, \ - { "dump", CTLTYPE_STRUCT }, \ - { "flags", CTLTYPE_STRUCT }, \ - { "iflist", CTLTYPE_STRUCT }, \ - { "stat", CTLTYPE_STRUCT }, \ - { "trash", CTLTYPE_INT }, \ - { "iflist2", CTLTYPE_STRUCT }, \ - { "dump2", CTLTYPE_STRUCT }, \ - { "dumpx", CTLTYPE_STRUCT }, \ - { "dumpx_flags", CTLTYPE_STRUCT }, \ -} - -#endif /* KERNEL_PRIVATE */ - -/* - * Maximum queue length specifiable by listen. - */ -#define SOMAXCONN 128 - -/* - * [XSI] Message header for recvmsg and sendmsg calls. - * Used value-result for recvmsg, value only for sendmsg. - */ -struct msghdr { - void *msg_name; /* [XSI] optional address */ - socklen_t msg_namelen; /* [XSI] size of address */ - struct iovec *msg_iov; /* [XSI] scatter/gather array */ - int msg_iovlen; /* [XSI] # elements in msg_iov */ - void *msg_control; /* [XSI] ancillary data, see below */ - socklen_t msg_controllen; /* [XSI] ancillary data buffer len */ - int msg_flags; /* [XSI] flags on received message */ -}; - -#ifdef PRIVATE -/* - * Extended version for sendmsg_x() and recvmsg_x() calls - * - * For recvmsg_x(), the size of the data received is given by the field - * msg_datalen. - * - * For sendmsg_x(), the size of the data to send is given by the length of - * the iovec array -- like sendmsg(). The field msg_datalen is ignored. - */ -struct msghdr_x { - void *msg_name; /* optional address */ - socklen_t msg_namelen; /* size of address */ - struct iovec *msg_iov; /* scatter/gather array */ - int msg_iovlen; /* # elements in msg_iov */ - void *msg_control; /* ancillary data, see below */ - socklen_t msg_controllen; /* ancillary data buffer len */ - int msg_flags; /* flags on received message */ - size_t msg_datalen; /* byte length of buffer in msg_iov */ -}; -#endif /* PRIVATE */ - -#ifdef XNU_KERNEL_PRIVATE -/* - * In-kernel representation of "struct msghdr" from - * userspace. Has enough precision for 32-bit or - * 64-bit clients, but does not need to be packed. - */ - -struct user_msghdr { - user_addr_t msg_name; /* optional address */ - socklen_t msg_namelen; /* size of address */ - vm_address_t msg_iov; /* scatter/gather array */ - int msg_iovlen; /* # elements in msg_iov */ - user_addr_t msg_control; /* ancillary data, see below */ - socklen_t msg_controllen; /* ancillary data buffer len */ - int msg_flags; /* flags on received message */ -}; -__CCT_DECLARE_CONSTRAINED_PTR_TYPES(struct user_msghdr, user_msghdr); - -/* - * LP64 user version of struct msghdr. - * WARNING - keep in sync with struct msghdr - */ - -struct user64_msghdr { - user64_addr_t msg_name; /* optional address */ - socklen_t msg_namelen; /* size of address */ - user64_addr_t msg_iov; /* scatter/gather array */ - int msg_iovlen; /* # elements in msg_iov */ - user64_addr_t msg_control; /* ancillary data, see below */ - socklen_t msg_controllen; /* ancillary data buffer len */ - int msg_flags; /* flags on received message */ -}; -__CCT_DECLARE_CONSTRAINED_PTR_TYPES(struct user64_msghdr, user64_msghdr); - -/* - * ILP32 user version of struct msghdr. - * WARNING - keep in sync with struct msghdr - */ - -struct user32_msghdr { - user32_addr_t msg_name; /* optional address */ - socklen_t msg_namelen; /* size of address */ - user32_addr_t msg_iov; /* scatter/gather array */ - int msg_iovlen; /* # elements in msg_iov */ - user32_addr_t msg_control; /* ancillary data, see below */ - socklen_t msg_controllen; /* ancillary data buffer len */ - int msg_flags; /* flags on received message */ -}; -__CCT_DECLARE_CONSTRAINED_PTR_TYPES(struct user32_msghdr, user32_msghdr); - -/* - * In-kernel representation of "struct msghdr_x" from - * userspace. Has enough precision for 32-bit or - * 64-bit clients, but does not need to be packed. - */ - -struct user_msghdr_x { - user_addr_t msg_name; /* optional address */ - socklen_t msg_namelen; /* size of address */ - vm_address_t msg_iov; /* scatter/gather array */ - int msg_iovlen; /* # elements in msg_iov */ - user_addr_t msg_control; /* ancillary data, see below */ - socklen_t msg_controllen; /* ancillary data buffer len */ - int msg_flags; /* flags on received message */ - size_t msg_datalen; /* byte length of buffer in msg_iov */ -}; -__CCT_DECLARE_CONSTRAINED_PTR_TYPES(struct user_msghdr_x, user_msghdr_x); - - -/* - * LP64 user version of struct msghdr_x - * WARNING - keep in sync with struct msghdr_x - */ - -struct user64_msghdr_x { - user64_addr_t msg_name; /* optional address */ - socklen_t msg_namelen; /* size of address */ - user64_addr_t msg_iov; /* scatter/gather array */ - int msg_iovlen; /* # elements in msg_iov */ - user64_addr_t msg_control; /* ancillary data, see below */ - socklen_t msg_controllen; /* ancillary data buffer len */ - int msg_flags; /* flags on received message */ - user64_size_t msg_datalen; /* byte length of buffer in msg_iov */ -}; -__CCT_DECLARE_CONSTRAINED_PTR_TYPES(struct user64_msghdr_x, user64_msghdr_x); - -/* - * ILP32 user version of struct msghdr_x - * WARNING - keep in sync with struct msghdr_x - */ - -struct user32_msghdr_x { - user32_addr_t msg_name; /* optional address */ - socklen_t msg_namelen; /* size of address */ - user32_addr_t msg_iov; /* scatter/gather array */ - int msg_iovlen; /* # elements in msg_iov */ - user32_addr_t msg_control; /* ancillary data, see below */ - socklen_t msg_controllen; /* ancillary data buffer len */ - int msg_flags; /* flags on received message */ - user32_size_t msg_datalen; /* byte length of buffer in msg_iov */ -}; -__CCT_DECLARE_CONSTRAINED_PTR_TYPES(struct user32_msghdr_x, user32_msghdr_x); - -/* - * In-kernel representation of "struct sa_endpoints" from - * userspace. Has enough precision for 32-bit or - * 64-bit clients, but does not need to be packed. - */ - -struct user_sa_endpoints { - unsigned int sae_srcif; /* optional source interface */ - user_addr_t sae_srcaddr; /* optional source address */ - socklen_t sae_srcaddrlen; /* size of source address */ - user_addr_t sae_dstaddr; /* destination address */ - socklen_t sae_dstaddrlen; /* size of destination address */ -}; - -/* - * LP64 user version of struct sa_endpoints - * WARNING - keep in sync with struct sa_endpoints - */ - -struct user64_sa_endpoints { - unsigned int sae_srcif; /* optional source interface */ - user64_addr_t sae_srcaddr; /* optional source address */ - socklen_t sae_srcaddrlen; /* size of source address */ - user64_addr_t sae_dstaddr; /* destination address */ - socklen_t sae_dstaddrlen; /* size of destination address */ -}; - -/* - * ILP32 user version of struct sa_endpoints - * WARNING - keep in sync with struct sa_endpoints - */ - -struct user32_sa_endpoints { - unsigned int sae_srcif; /* optional source interface */ - user32_addr_t sae_srcaddr; /* optional source address */ - socklen_t sae_srcaddrlen; /* size of source address */ - user32_addr_t sae_dstaddr; /* destination address */ - socklen_t sae_dstaddrlen; /* size of destination address */ -}; - -#endif /* XNU_KERNEL_PRIVATE */ - -#define MSG_OOB 0x1 /* process out-of-band data */ -#define MSG_PEEK 0x2 /* peek at incoming message */ -#define MSG_DONTROUTE 0x4 /* send without using routing tables */ -#define MSG_EOR 0x8 /* data completes record */ -#define MSG_TRUNC 0x10 /* data discarded before delivery */ -#define MSG_CTRUNC 0x20 /* control data lost before delivery */ -#define MSG_WAITALL 0x40 /* wait for full request or error */ -#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) -#define MSG_DONTWAIT 0x80 /* this message should be nonblocking */ -#define MSG_EOF 0x100 /* data completes connection */ -#ifdef __APPLE__ -#ifndef PRIVATE -#ifdef __APPLE_API_OBSOLETE -#define MSG_WAITSTREAM 0x200 /* wait up to full request.. may return partial */ -#endif -#else -#define MSG_WAITSTREAM 0x200 /* wait up to full request.. may return partial */ -#endif -#define MSG_FLUSH 0x400 /* Start of 'hold' seq; dump so_temp, deprecated */ -#define MSG_HOLD 0x800 /* Hold frag in so_temp, deprecated */ -#define MSG_SEND 0x1000 /* Send the packet in so_temp, deprecated */ -#define MSG_HAVEMORE 0x2000 /* Data ready to be read */ -#define MSG_RCVMORE 0x4000 /* Data remains in current pkt */ -#endif -#ifdef KERNEL_PRIVATE -#define MSG_COMPAT 0x8000 /* deprecated */ -#endif /* KERNEL_PRIVATE */ -#define MSG_NEEDSA 0x10000 /* Fail receive if socket address cannot be allocated */ -#ifdef KERNEL_PRIVATE -#define MSG_NBIO 0x20000 /* FIONBIO mode, used by fifofs */ -#define MSG_SKIPCFIL 0x40000 /* skip pass content filter */ -#endif -#endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ - -#if __DARWIN_C_LEVEL >= 200809L -#define MSG_NOSIGNAL 0x80000 /* do not generate SIGPIPE on EOF */ -#endif /* __DARWIN_C_LEVEL */ - -#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) -#ifdef KERNEL -#define MSG_USEUPCALL 0x80000000 /* Inherit upcall in sock_accept */ -#endif -#endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ - -/* - * Header for ancillary data objects in msg_control buffer. - * Used for additional information with/about a datagram - * not expressible by flags. The format is a sequence - * of message elements headed by cmsghdr structures. - */ -struct cmsghdr { - socklen_t cmsg_len; /* [XSI] data byte count, including hdr */ - int cmsg_level; /* [XSI] originating protocol */ - int cmsg_type; /* [XSI] protocol-specific type */ -/* followed by unsigned char cmsg_data[]; */ -}; - -#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) -#ifndef __APPLE__ -/* - * While we may have more groups than this, the cmsgcred struct must - * be able to fit in an mbuf, and NGROUPS_MAX is too large to allow - * this. - */ -#define CMGROUP_MAX 16 - -/* - * Credentials structure, used to verify the identity of a peer - * process that has sent us a message. This is allocated by the - * peer process but filled in by the kernel. This prevents the - * peer from lying about its identity. (Note that cmcred_groups[0] - * is the effective GID.) - */ -struct cmsgcred { - pid_t cmcred_pid; /* PID of sending process */ - uid_t cmcred_uid; /* real UID of sending process */ - uid_t cmcred_euid; /* effective UID of sending process */ - gid_t cmcred_gid; /* real GID of sending process */ - short cmcred_ngroups; /* number or groups */ - gid_t cmcred_groups[CMGROUP_MAX]; /* groups */ -}; -#endif -#endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ - -/* given pointer to struct cmsghdr, return pointer to data */ -#define CMSG_DATA(cmsg) ((unsigned char *)(cmsg) + \ - __DARWIN_ALIGN32(sizeof(struct cmsghdr))) - -/* - * RFC 2292 requires to check msg_controllen, in case that the kernel returns - * an empty list for some reasons. - */ -#define CMSG_FIRSTHDR(mhdr) \ - ((mhdr)->msg_controllen >= sizeof(struct cmsghdr) ? \ - (struct cmsghdr *)(mhdr)->msg_control : \ - (struct cmsghdr *)0L) - - -/* - * Given pointer to struct cmsghdr, return pointer to next cmsghdr - * RFC 2292 says that CMSG_NXTHDR(mhdr, NULL) is equivalent to CMSG_FIRSTHDR(mhdr) - */ -#define CMSG_NXTHDR(mhdr, cmsg) \ - ((char *)(cmsg) == (char *)0L ? CMSG_FIRSTHDR(mhdr) : \ - ((((unsigned char *)(cmsg) + \ - __DARWIN_ALIGN32((__uint32_t)(cmsg)->cmsg_len) + \ - __DARWIN_ALIGN32(sizeof(struct cmsghdr))) > \ - ((unsigned char *)(mhdr)->msg_control + \ - (mhdr)->msg_controllen)) ? \ - (struct cmsghdr *)0L /* NULL */ : \ - (struct cmsghdr *)(void *)((unsigned char *)(cmsg) + \ - __DARWIN_ALIGN32((__uint32_t)(cmsg)->cmsg_len)))) - -#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) -/* RFC 2292 additions */ -#define CMSG_SPACE(l) (__DARWIN_ALIGN32(sizeof(struct cmsghdr)) + __DARWIN_ALIGN32(l)) -#define CMSG_LEN(l) (__DARWIN_ALIGN32(sizeof(struct cmsghdr)) + (l)) - -#ifdef KERNEL -#define CMSG_ALIGN(n) ((typeof(n))__DARWIN_ALIGN32(n)) -#endif -#endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ - -/* "Socket"-level control message types: */ -#define SCM_RIGHTS 0x01 /* access rights (array of int) */ -#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) -#define SCM_TIMESTAMP 0x02 /* timestamp (struct timeval) */ -#define SCM_CREDS 0x03 /* process creds (struct cmsgcred) */ -#define SCM_TIMESTAMP_MONOTONIC 0x04 /* timestamp (uint64_t) */ - -#ifdef PRIVATE -#define SCM_TIMESTAMP_CONTINUOUS 0x07 /* timestamp (uint64_t) */ -#define SCM_MPKL_SEND_INFO 0x08 /* send info for multi-layer packet logging (struct so_mpkl_send_info) */ -#define SCM_MPKL_RECV_INFO 0x09 /* receive info for multi-layer packet logging (struct so_mpkl_recv_info */ -#endif /* PRIVATE */ - -#ifdef KERNEL_PRIVATE -/* - * 4.3 compat sockaddr (deprecated) - */ -struct osockaddr { - __uint16_t sa_family; /* address family */ - char sa_data[14]; /* up to 14 bytes of direct address */ -}; - -/* - * 4.3-compat message header (deprecated) - */ -struct omsghdr { - void *msg_name; /* optional address */ - socklen_t msg_namelen; /* size of address */ - struct iovec *msg_iov; /* scatter/gather array */ - int msg_iovlen; /* # elements in msg_iov */ - void *msg_accrights; /* access rights sent/rcvd */ - int msg_accrightslen; -}; - -#define SA(s) ((struct sockaddr *)(void *)(s)) -#endif /* KERNEL_PRIVATE */ -#endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ - -/* - * howto arguments for shutdown(2), specified by Posix.1g. - */ -#define SHUT_RD 0 /* shut down the reading side */ -#define SHUT_WR 1 /* shut down the writing side */ -#define SHUT_RDWR 2 /* shut down both sides */ - -#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) -/* - * sendfile(2) header/trailer struct - */ -struct sf_hdtr { - struct iovec *headers; /* pointer to an array of header struct iovec's */ - int hdr_cnt; /* number of header iovec's */ - struct iovec *trailers; /* pointer to an array of trailer struct iovec's */ - int trl_cnt; /* number of trailer iovec's */ -}; - -#ifdef KERNEL - -/* In-kernel representation */ -struct user_sf_hdtr { - user_addr_t headers; /* pointer to an array of header struct iovec's */ - int hdr_cnt; /* number of header iovec's */ - user_addr_t trailers; /* pointer to an array of trailer struct iovec's */ - int trl_cnt; /* number of trailer iovec's */ -}; - -/* LP64 user version of struct sf_hdtr */ -struct user64_sf_hdtr { - user64_addr_t headers; /* pointer to an array of header struct iovec's */ - int hdr_cnt; /* number of header iovec's */ - user64_addr_t trailers; /* pointer to an array of trailer struct iovec's */ - int trl_cnt; /* number of trailer iovec's */ -}; - -/* ILP32 user version of struct sf_hdtr */ -struct user32_sf_hdtr { - user32_addr_t headers; /* pointer to an array of header struct iovec's */ - int hdr_cnt; /* number of header iovec's */ - user32_addr_t trailers; /* pointer to an array of trailer struct iovec's */ - int trl_cnt; /* number of trailer iovec's */ -}; - -#endif /* KERNEL */ - -#endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ - -#ifdef PRIVATE -#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) - -/* - * Structure for SIOCGASSOCIDS - */ -struct so_aidreq { - __uint32_t sar_cnt; /* number of associations */ - sae_associd_t *sar_aidp; /* array of association IDs */ -}; - -#ifdef BSD_KERNEL_PRIVATE -struct so_aidreq32 { - __uint32_t sar_cnt; - user32_addr_t sar_aidp; -}; - -struct so_aidreq64 { - __uint32_t sar_cnt; - user64_addr_t sar_aidp __attribute__((aligned(8))); -}; -#endif /* BSD_KERNEL_PRIVATE */ - -/* - * Structure for SIOCGCONNIDS - */ -struct so_cidreq { - sae_associd_t scr_aid; /* association ID */ - __uint32_t scr_cnt; /* number of connections */ - sae_connid_t *scr_cidp; /* array of connection IDs */ -}; - -#ifdef BSD_KERNEL_PRIVATE -struct so_cidreq32 { - sae_associd_t scr_aid; - __uint32_t scr_cnt; - user32_addr_t scr_cidp; -}; - -struct so_cidreq64 { - sae_associd_t scr_aid; - __uint32_t scr_cnt; - user64_addr_t scr_cidp __attribute__((aligned(8))); -}; -#endif /* BSD_KERNEL_PRIVATE */ - -/* - * Structure for SIOCGCONNINFO - */ -struct so_cinforeq { - sae_connid_t scir_cid; /* connection ID */ - __uint32_t scir_flags; /* see flags below */ - __uint32_t scir_ifindex; /* (last) outbound interface */ - __int32_t scir_error; /* most recent error */ - struct sockaddr *scir_src; /* source address */ - socklen_t scir_src_len; /* source address len */ - struct sockaddr *scir_dst; /* destination address */ - socklen_t scir_dst_len; /* destination address len */ - __uint32_t scir_aux_type; /* aux data type (CIAUX) */ - void *scir_aux_data; /* aux data */ - __uint32_t scir_aux_len; /* aux data len */ -}; - -#ifdef BSD_KERNEL_PRIVATE -struct so_cinforeq32 { - sae_connid_t scir_cid; - __uint32_t scir_flags; - __uint32_t scir_ifindex; - __int32_t scir_error; - user32_addr_t scir_src; - socklen_t scir_src_len; - user32_addr_t scir_dst; - socklen_t scir_dst_len; - __uint32_t scir_aux_type; - user32_addr_t scir_aux_data; - __uint32_t scir_aux_len; -}; - -struct so_cinforeq64 { - sae_connid_t scir_cid; - __uint32_t scir_flags; - __uint32_t scir_ifindex; - __int32_t scir_error; - user64_addr_t scir_src __attribute__((aligned(8))); - socklen_t scir_src_len; - user64_addr_t scir_dst __attribute__((aligned(8))); - socklen_t scir_dst_len; - __uint32_t scir_aux_type; - user64_addr_t scir_aux_data __attribute__((aligned(8))); - __uint32_t scir_aux_len; -}; -#endif /* BSD_KERNEL_PRIVATE */ - -/* valid connection info flags */ -#define CIF_CONNECTING 0x1 /* connection was attempted */ -#define CIF_CONNECTED 0x2 /* connection is established */ -#define CIF_DISCONNECTING 0x4 /* disconnection was attempted */ -#define CIF_DISCONNECTED 0x8 /* has been disconnected */ -#define CIF_BOUND_IF 0x10 /* bound to an interface */ -#define CIF_BOUND_IP 0x20 /* bound to a src address */ -#define CIF_BOUND_PORT 0x40 /* bound to a src port */ -#define CIF_PREFERRED 0x80 /* connection is primary/preferred */ -#define CIF_MP_CAPABLE 0x100 /* supports multipath protocol */ -#define CIF_MP_READY 0x200 /* multipath protocol confirmed */ -#define CIF_MP_DEGRADED 0x400 /* has lost its multipath capability */ -#define CIF_MP_ACTIVE 0x800 /* this is the active subflow */ -#define CIF_MP_V1 0x1000 /* MPTCP v1 is used */ - -/* valid connection info auxiliary data types */ -#define CIAUX_TCP 0x1 /* TCP auxiliary data (conninfo_tcp_t) */ -#define CIAUX_MPTCP 0x2 /* MPTCP auxiliary data (conninfo_multipathtcp) */ - -/* - * Structure for SIOC{S,G}CONNORDER - */ -struct so_cordreq { - sae_connid_t sco_cid; /* connection ID */ - __uint32_t sco_rank; /* rank (0 means unspecified) */ -}; - -/* - * Common structure for KEV_NETPOLICY_SUBCLASS - */ -struct netpolicy_event_data { - __uint64_t eupid; /* effective unique PID */ - __uint64_t epid; /* effective PID */ - uuid_t euuid; /* effective UUID */ -}; - -/* - * NETPOLICY_IFDENIED event structure - */ -struct kev_netpolicy_ifdenied { - struct netpolicy_event_data ev_data; - __uint32_t ev_if_functional_type; -}; - -/* - * KEV_NETPOLICY_NETDENIED event structure - */ -struct kev_netpolicy_netdenied { - struct netpolicy_event_data ev_data; - __uint32_t ev_network_type; -}; - -/* - * Network Service Type to DiffServ Code Point mapping - */ -struct netsvctype_dscp_map { - int netsvctype; - u_int8_t dscp; /* 6 bits diffserv code point */ -}; - -/* - * Multi-layer packet logging require SO_MPK_LOG to be set - */ -struct so_mpkl_send_info { - uuid_t mpkl_uuid; - __uint8_t mpkl_proto; /* see net/multi_layer_pkt_log.h */ -}; - -struct so_mpkl_recv_info { - __uint32_t mpkl_seq; - __uint8_t mpkl_proto; /* see net/multi_layer_pkt_log.h */ -}; - -#ifndef KERNEL -__BEGIN_DECLS - -extern int peeloff(int s, sae_associd_t); -extern int socket_delegate(int, int, int, pid_t); - -/* - * recvmsg_x() is a system call similar to recvmsg(2) to receive - * several datagrams at once in the array of message headers "msgp". - * - * recvmsg_x() can be used only with protocols handlers that have been specially - * modified to support sending and receiving several datagrams at once. - * - * The size of the array "msgp" is given by the argument "cnt". - * - * The "flags" arguments supports only the value MSG_DONTWAIT. - * - * Each member of "msgp" array is of type "struct msghdr_x". - * - * The "msg_iov" and "msg_iovlen" are input parameters that describe where to - * store a datagram in a scatter gather locations of buffers -- see recvmsg(2). - * On output the field "msg_datalen" gives the length of the received datagram. - * - * The field "msg_flags" must be set to zero on input. On output, "msg_flags" - * may have MSG_TRUNC set to indicate the trailing portion of the datagram was - * discarded because the datagram was larger than the buffer supplied. - * recvmsg_x() returns as soon as a datagram is truncated. - * - * recvmsg_x() may return with less than "cnt" datagrams received based on - * the low water mark and the amount of data pending in the socket buffer. - * - * recvmsg_x() returns the number of datagrams that have been received, - * or -1 if an error occurred. - * - * NOTE: This a private system call, the API is subject to change. - */ -ssize_t recvmsg_x(int s, const struct msghdr_x *msgp, u_int cnt, int flags); - -/* - * sendmsg_x() is a system call similar to send(2) to send - * several datagrams at once in the array of message headers "msgp". - * - * sendmsg_x() can be used only with protocols handlers that have been specially - * modified to support sending and receiving several datagrams at once. - * - * The size of the array "msgp" is given by the argument "cnt". - * - * The "flags" arguments supports only the value MSG_DONTWAIT. - * - * Each member of "msgp" array is of type "struct msghdr_x". - * - * The "msg_iov" and "msg_iovlen" are input parameters that specify the - * data to be sent in a scatter gather locations of buffers -- see sendmsg(2). - * - * sendmsg_x() fails with EMSGSIZE if the sum of the length of the datagrams - * is greater than the high water mark. - * - * Address and ancillary data are not supported so the following fields - * must be set to zero on input: - * "msg_name", "msg_namelen", "msg_control" and "msg_controllen". - * - * The field "msg_flags" and "msg_datalen" must be set to zero on input. - * - * sendmsg_x() returns the number of datagrams that have been sent, - * or -1 if an error occurred. - * - * NOTE: This a private system call, the API is subject to change. - */ -ssize_t sendmsg_x(int s, const struct msghdr_x *msgp, u_int cnt, int flags); -__END_DECLS -#endif /* !KERNEL */ -#endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ -#endif /* PRIVATE */ - -#ifndef KERNEL -__BEGIN_DECLS - -int accept(int, struct sockaddr * __restrict, socklen_t * __restrict) -__DARWIN_ALIAS_C(accept); -int bind(int, const struct sockaddr *, socklen_t) __DARWIN_ALIAS(bind); -int connect(int, const struct sockaddr *, socklen_t) __DARWIN_ALIAS_C(connect); -int getpeername(int, struct sockaddr * __restrict, socklen_t * __restrict) -__DARWIN_ALIAS(getpeername); -int getsockname(int, struct sockaddr * __restrict, socklen_t * __restrict) -__DARWIN_ALIAS(getsockname); -int getsockopt(int, int, int, void * __restrict, socklen_t * __restrict); -int listen(int, int) __DARWIN_ALIAS(listen); -ssize_t recv(int, void *, size_t, int) __DARWIN_ALIAS_C(recv); -ssize_t recvfrom(int, void *, size_t, int, struct sockaddr * __restrict, - socklen_t * __restrict) __DARWIN_ALIAS_C(recvfrom); -ssize_t recvmsg(int, struct msghdr *, int) __DARWIN_ALIAS_C(recvmsg); -ssize_t send(int, const void *, size_t, int) __DARWIN_ALIAS_C(send); -ssize_t sendmsg(int, const struct msghdr *, int) __DARWIN_ALIAS_C(sendmsg); -ssize_t sendto(int, const void *, size_t, - int, const struct sockaddr *, socklen_t) __DARWIN_ALIAS_C(sendto); -int setsockopt(int, int, int, const void *, socklen_t); -int shutdown(int, int); -int sockatmark(int) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0); -int socket(int, int, int); -int socketpair(int, int, int, int *) __DARWIN_ALIAS(socketpair); - -#if !defined(_POSIX_C_SOURCE) -int sendfile(int, int, off_t, off_t *, struct sf_hdtr *, int); -#endif /* !_POSIX_C_SOURCE */ - -#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) -void pfctlinput(int, struct sockaddr *); - -__API_AVAILABLE(macosx(10.11), ios(9.0), tvos(9.0), watchos(2.0)) -int connectx(int, const sa_endpoints_t *, sae_associd_t, unsigned int, - const struct iovec *, unsigned int, size_t *, sae_connid_t *); - -__API_AVAILABLE(macosx(10.11), ios(9.0), tvos(9.0), watchos(2.0)) -int disconnectx(int, sae_associd_t, sae_connid_t); -#endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ -__END_DECLS -#endif /* !KERNEL */ - -#ifdef KERNEL -#include -#endif - -#endif /* !_SYS_SOCKET_H_ */ diff --git a/quinn-udp/src/cmsg/unix.rs b/quinn-udp/src/cmsg/unix.rs index 112bd5ebe..4fcb6f48d 100644 --- a/quinn-udp/src/cmsg/unix.rs +++ b/quinn-udp/src/cmsg/unix.rs @@ -32,6 +32,34 @@ impl MsgHdr for libc::msghdr { } } +#[cfg(any(target_os = "macos", target_os = "ios"))] +impl MsgHdr for crate::imp::msghdr_x { + type ControlMessage = libc::cmsghdr; + + fn cmsg_first_hdr(&self) -> *mut Self::ControlMessage { + let selfp = self as *const _ as *mut libc::msghdr; + unsafe { libc::CMSG_FIRSTHDR(selfp) } + } + + fn cmsg_nxt_hdr(&self, cmsg: &Self::ControlMessage) -> *mut Self::ControlMessage { + let selfp = self as *const _ as *mut libc::msghdr; + unsafe { libc::CMSG_NXTHDR(selfp, cmsg) } + } + + fn set_control_len(&mut self, len: usize) { + self.msg_controllen = len as _; + if len == 0 { + // netbsd is particular about this being a NULL pointer if there are no control + // messages. + self.msg_control = std::ptr::null_mut(); + } + } + + fn control_len(&self) -> usize { + self.msg_controllen as _ + } +} + /// Helpers for [`libc::cmsghdr`] impl CMsgHdr for libc::cmsghdr { fn cmsg_len(length: usize) -> usize { diff --git a/quinn-udp/src/unix.rs b/quinn-udp/src/unix.rs index 08f31a34a..afc731ecc 100644 --- a/quinn-udp/src/unix.rs +++ b/quinn-udp/src/unix.rs @@ -1,8 +1,3 @@ -#![allow(non_camel_case_types)] -#![allow(unreachable_pub)] -#[cfg(any(target_os = "macos", target_os = "ios"))] -include!(concat!(env!("OUT_DIR"), "/bindings.rs")); - #[cfg(not(any( target_os = "macos", target_os = "ios", @@ -29,28 +24,37 @@ use super::{ IO_ERROR_LOG_INTERVAL, }; -#[cfg(not(any(target_os = "macos", target_os = "ios")))] -type msghdr = libc::msghdr; -#[cfg(not(any(target_os = "macos", target_os = "ios")))] -type iovec = libc::iovec; - +// Adapted from https://github.com/apple-oss-distributions/xnu/blob/main/bsd/sys/socket.h #[cfg(any(target_os = "macos", target_os = "ios"))] +#[repr(C)] #[allow(non_camel_case_types)] -type msghdr = msghdr_x; +// #[derive(Debug, Copy, Clone)] +pub(crate) struct msghdr_x { + pub msg_name: *mut std::ffi::c_void, + pub msg_namelen: libc::socklen_t, + pub msg_iov: *mut libc::iovec, + pub msg_iovlen: std::ffi::c_int, + pub msg_control: *mut std::ffi::c_void, + pub msg_controllen: libc::socklen_t, + pub msg_flags: std::ffi::c_int, + pub msg_datalen: usize, +} #[cfg(any(target_os = "macos", target_os = "ios"))] -impl From for libc::msghdr { - fn from(val: msghdr_x) -> Self { - Self { - msg_name: val.msg_name, - msg_namelen: val.msg_namelen, - msg_iov: val.msg_iov as _, - msg_iovlen: val.msg_iovlen, - msg_control: val.msg_control, - msg_controllen: val.msg_controllen, - msg_flags: val.msg_flags, - } - } +extern "C" { + fn recvmsg_x( + s: std::ffi::c_int, + msgp: *const msghdr_x, + cnt: std::ffi::c_uint, + flags: std::ffi::c_int, + ) -> isize; + + // fn sendmsg_x( + // s: std::ffi::c_int, + // msgp: *const msghdr_x, + // cnt: std::ffi::c_uint, + // flags: std::ffi::c_int, + // ) -> isize; } // Defined in netinet6/in6.h on OpenBSD, this is not yet exported by the libc crate @@ -475,7 +479,7 @@ fn recv(io: SockRef<'_>, bufs: &mut [IoSliceMut<'_>], meta: &mut [RecvMeta]) -> break n; }; for i in 0..(msg_count as usize) { - meta[i] = decode_recv(&names[i], &hdrs[i].into(), hdrs[i].msg_datalen as usize); + meta[i] = decode_recv(&names[i], &hdrs[i], hdrs[i].msg_datalen as usize); } Ok(msg_count as usize) } @@ -599,28 +603,43 @@ fn prepare_msg( encoder.finish(); } +#[cfg(not(any(target_os = "macos", target_os = "ios")))] fn prepare_recv( buf: &mut IoSliceMut, name: &mut MaybeUninit, ctrl: &mut cmsg::Aligned>, - hdr: &mut msghdr, + hdr: &mut libc::msghdr, ) { hdr.msg_name = name.as_mut_ptr() as _; hdr.msg_namelen = mem::size_of::() as _; - hdr.msg_iov = buf as *mut IoSliceMut as *mut iovec; + hdr.msg_iov = buf as *mut IoSliceMut as *mut libc::iovec; hdr.msg_iovlen = 1; hdr.msg_control = ctrl.0.as_mut_ptr() as _; hdr.msg_controllen = CMSG_LEN as _; hdr.msg_flags = 0; - #[cfg(any(target_os = "macos", target_os = "ios"))] - { - hdr.msg_datalen = buf.len(); - } +} + +#[cfg(any(target_os = "macos", target_os = "ios"))] +fn prepare_recv( + buf: &mut IoSliceMut, + name: &mut MaybeUninit, + ctrl: &mut cmsg::Aligned>, + hdr: &mut msghdr_x, +) { + hdr.msg_name = name.as_mut_ptr() as _; + hdr.msg_namelen = mem::size_of::() as _; + hdr.msg_iov = buf as *mut IoSliceMut as *mut libc::iovec; + hdr.msg_iovlen = 1; + hdr.msg_control = ctrl.0.as_mut_ptr() as _; + hdr.msg_controllen = CMSG_LEN as _; + hdr.msg_flags = 0; + hdr.msg_datalen = buf.len(); } fn decode_recv( name: &MaybeUninit, - hdr: &libc::msghdr, + #[cfg(not(any(target_os = "macos", target_os = "ios")))] hdr: &libc::msghdr, + #[cfg(any(target_os = "macos", target_os = "ios"))] hdr: &msghdr_x, len: usize, ) -> RecvMeta { let name = unsafe { name.assume_init() }; From 0cc5f783244fde38ca60f3850c1c0242101d35e7 Mon Sep 17 00:00:00 2001 From: Lars Eggert Date: Mon, 23 Sep 2024 08:51:11 +0300 Subject: [PATCH 06/28] Undo --- fuzz/fuzz_targets/packet.rs | 42 ++++++------ fuzz/fuzz_targets/streams.rs | 128 +++++++++++++++++------------------ 2 files changed, 85 insertions(+), 85 deletions(-) diff --git a/fuzz/fuzz_targets/packet.rs b/fuzz/fuzz_targets/packet.rs index 4f227a877..a8320a87a 100644 --- a/fuzz/fuzz_targets/packet.rs +++ b/fuzz/fuzz_targets/packet.rs @@ -1,25 +1,25 @@ #![no_main] -// extern crate proto; +extern crate proto; -// use libfuzzer_sys::fuzz_target; -// use proto::{ -// fuzzing::{PacketParams, PartialDecode}, -// FixedLengthConnectionIdParser, DEFAULT_SUPPORTED_VERSIONS, -// }; +use libfuzzer_sys::fuzz_target; +use proto::{ + fuzzing::{PacketParams, PartialDecode}, + FixedLengthConnectionIdParser, DEFAULT_SUPPORTED_VERSIONS, +}; -// fuzz_target!(|data: PacketParams| { -// let len = data.buf.len(); -// let supported_versions = DEFAULT_SUPPORTED_VERSIONS.to_vec(); -// if let Ok(decoded) = PartialDecode::new( -// data.buf, -// &FixedLengthConnectionIdParser::new(data.local_cid_len), -// &supported_versions, -// data.grease_quic_bit, -// ) { -// match decoded.1 { -// Some(x) => assert_eq!(len, decoded.0.len() + x.len()), -// None => assert_eq!(len, decoded.0.len()), -// } -// } -// }); +fuzz_target!(|data: PacketParams| { + let len = data.buf.len(); + let supported_versions = DEFAULT_SUPPORTED_VERSIONS.to_vec(); + if let Ok(decoded) = PartialDecode::new( + data.buf, + &FixedLengthConnectionIdParser::new(data.local_cid_len), + &supported_versions, + data.grease_quic_bit, + ) { + match decoded.1 { + Some(x) => assert_eq!(len, decoded.0.len() + x.len()), + None => assert_eq!(len, decoded.0.len()), + } + } +}); diff --git a/fuzz/fuzz_targets/streams.rs b/fuzz/fuzz_targets/streams.rs index 34fa6e3ad..340078e5f 100644 --- a/fuzz/fuzz_targets/streams.rs +++ b/fuzz/fuzz_targets/streams.rs @@ -1,71 +1,71 @@ #![no_main] -// use arbitrary::Arbitrary; -// use libfuzzer_sys::fuzz_target; +use arbitrary::Arbitrary; +use libfuzzer_sys::fuzz_target; -// extern crate proto; -// use proto::fuzzing::{ConnectionState, ResetStream, Retransmits, StreamsState}; -// use proto::{Dir, Side, StreamId, VarInt}; -// use proto::{SendStream, Streams}; +extern crate proto; +use proto::fuzzing::{ConnectionState, ResetStream, Retransmits, StreamsState}; +use proto::{Dir, Side, StreamId, VarInt}; +use proto::{SendStream, Streams}; -// #[derive(Arbitrary, Debug)] -// struct StreamParams { -// side: Side, -// max_remote_uni: u16, -// max_remote_bi: u16, -// send_window: u16, -// receive_window: u16, -// stream_receive_window: u16, -// dir: Dir, -// } +#[derive(Arbitrary, Debug)] +struct StreamParams { + side: Side, + max_remote_uni: u16, + max_remote_bi: u16, + send_window: u16, + receive_window: u16, + stream_receive_window: u16, + dir: Dir, +} -// #[derive(Arbitrary, Debug)] -// enum Operation { -// Open, -// Accept(Dir), -// Finish(StreamId), -// ReceivedStopSending(StreamId, VarInt), -// ReceivedReset(ResetStream), -// Reset(StreamId), -// } +#[derive(Arbitrary, Debug)] +enum Operation { + Open, + Accept(Dir), + Finish(StreamId), + ReceivedStopSending(StreamId, VarInt), + ReceivedReset(ResetStream), + Reset(StreamId), +} -// fuzz_target!(|input: (StreamParams, Vec)| { -// let (params, operations) = input; -// let (mut pending, conn_state) = (Retransmits::default(), ConnectionState::Established); -// let mut state = StreamsState::new( -// params.side, -// params.max_remote_uni.into(), -// params.max_remote_bi.into(), -// params.send_window.into(), -// params.receive_window.into(), -// params.stream_receive_window.into(), -// ); +fuzz_target!(|input: (StreamParams, Vec)| { + let (params, operations) = input; + let (mut pending, conn_state) = (Retransmits::default(), ConnectionState::Established); + let mut state = StreamsState::new( + params.side, + params.max_remote_uni.into(), + params.max_remote_bi.into(), + params.send_window.into(), + params.receive_window.into(), + params.stream_receive_window.into(), + ); -// for operation in operations { -// match operation { -// Operation::Open => { -// Streams::new(&mut state, &conn_state).open(params.dir); -// } -// Operation::Accept(dir) => { -// Streams::new(&mut state, &conn_state).accept(dir); -// } -// Operation::Finish(id) => { -// let _ = SendStream::new(id, &mut state, &mut pending, &conn_state).finish(); -// } -// Operation::ReceivedStopSending(sid, err_code) => { -// Streams::new(&mut state, &conn_state) -// .state() -// .received_stop_sending(sid, err_code); -// } -// Operation::ReceivedReset(rs) => { -// let _ = Streams::new(&mut state, &conn_state) -// .state() -// .received_reset(rs); -// } -// Operation::Reset(id) => { -// let _ = -// SendStream::new(id, &mut state, &mut pending, &conn_state).reset(0u32.into()); -// } -// } -// } -// }); + for operation in operations { + match operation { + Operation::Open => { + Streams::new(&mut state, &conn_state).open(params.dir); + } + Operation::Accept(dir) => { + Streams::new(&mut state, &conn_state).accept(dir); + } + Operation::Finish(id) => { + let _ = SendStream::new(id, &mut state, &mut pending, &conn_state).finish(); + } + Operation::ReceivedStopSending(sid, err_code) => { + Streams::new(&mut state, &conn_state) + .state() + .received_stop_sending(sid, err_code); + } + Operation::ReceivedReset(rs) => { + let _ = Streams::new(&mut state, &conn_state) + .state() + .received_reset(rs); + } + Operation::Reset(id) => { + let _ = + SendStream::new(id, &mut state, &mut pending, &conn_state).reset(0u32.into()); + } + } + } +}); From a5523fd52e31e8bc9659d9e3c735eece762e0931 Mon Sep 17 00:00:00 2001 From: Lars Eggert Date: Mon, 23 Sep 2024 10:25:16 +0300 Subject: [PATCH 07/28] `sendmsg_x` --- quinn-udp/src/unix.rs | 89 +++++++++++++++++++++++++++++++++++-------- 1 file changed, 73 insertions(+), 16 deletions(-) diff --git a/quinn-udp/src/unix.rs b/quinn-udp/src/unix.rs index afc731ecc..3c9162f6e 100644 --- a/quinn-udp/src/unix.rs +++ b/quinn-udp/src/unix.rs @@ -49,12 +49,12 @@ extern "C" { flags: std::ffi::c_int, ) -> isize; - // fn sendmsg_x( - // s: std::ffi::c_int, - // msgp: *const msghdr_x, - // cnt: std::ffi::c_uint, - // flags: std::ffi::c_int, - // ) -> isize; + fn sendmsg_x( + s: std::ffi::c_int, + msgp: *const msghdr_x, + cnt: std::ffi::c_uint, + flags: std::ffi::c_int, + ) -> isize; } // Defined in netinet6/in6.h on OpenBSD, this is not yet exported by the libc crate @@ -358,12 +358,66 @@ fn send( } } -#[cfg(any( - target_os = "macos", - target_os = "ios", - target_os = "openbsd", - target_os = "netbsd" -))] +#[cfg(any(target_os = "macos", target_os = "ios",))] +fn send(state: &UdpSocketState, io: SockRef<'_>, transmit: &Transmit<'_>) -> io::Result<()> { + let mut hdrs = unsafe { mem::zeroed::<[msghdr_x; BATCH_SIZE]>() }; + let mut iovs = unsafe { mem::zeroed::<[libc::iovec; BATCH_SIZE]>() }; + let mut ctrls = [cmsg::Aligned([0u8; CMSG_LEN]); BATCH_SIZE]; + let addr = socket2::SockAddr::from(transmit.destination); + let max_msg_count = if let Some(segment_size) = transmit.segment_size { + panic!(); + transmit.contents.len() / segment_size + } else { + 1 + } + .min(BATCH_SIZE); + for i in 0..max_msg_count { + prepare_msg( + transmit, + &addr, + &mut hdrs[i], + &mut iovs[i], + &mut ctrls[i], + true, + state.sendmsg_einval(), + ); + hdrs[i].msg_datalen = if let Some(segment_size) = transmit.segment_size { + if i < max_msg_count - 1 { + segment_size + } else { + transmit.contents.len() - segment_size * i + } + } else { + transmit.contents.len() + }; + } + let n = unsafe { sendmsg_x(io.as_raw_fd(), hdrs.as_ptr(), max_msg_count as u32, 0) }; + if n == -1 { + let e = io::Error::last_os_error(); + match e.kind() { + io::ErrorKind::Interrupted => { + // Retry the transmission + } + io::ErrorKind::WouldBlock => return Err(e), + _ => { + // Other errors are ignored, since they will usually be handled + // by higher level retransmits and timeouts. + // - PermissionDenied errors have been observed due to iptable rules. + // Those are not fatal errors, since the + // configuration can be dynamically changed. + // - Destination unreachable errors have been observed for other + // - EMSGSIZE is expected for MTU probes. Future work might be able to avoid + // these by automatically clamping the MTUD upper bound to the interface MTU. + if e.raw_os_error() != Some(libc::EMSGSIZE) { + log_sendmsg_error(&state.last_send_error, e, transmit); + } + } + } + } + Ok(()) +} + +#[cfg(any(target_os = "openbsd", target_os = "netbsd"))] fn send(state: &UdpSocketState, io: SockRef<'_>, transmit: &Transmit<'_>) -> io::Result<()> { let mut hdr: libc::msghdr = unsafe { mem::zeroed() }; let mut iov: libc::iovec = unsafe { mem::zeroed() }; @@ -375,10 +429,7 @@ fn send(state: &UdpSocketState, io: SockRef<'_>, transmit: &Transmit<'_>) -> io: &mut hdr, &mut iov, &mut ctrl, - cfg!(target_os = "macos") - || cfg!(target_os = "ios") - || cfg!(target_os = "openbsd") - || cfg!(target_os = "netbsd"), + true, state.sendmsg_einval(), ); let n = unsafe { libc::sendmsg(io.as_raw_fd(), &hdr, 0) }; @@ -513,7 +564,10 @@ const CMSG_LEN: usize = 88; fn prepare_msg( transmit: &Transmit<'_>, dst_addr: &socket2::SockAddr, + #[cfg(not(any(target_os = "macos", target_os = "ios")))] hdr: &mut libc::msghdr, + #[cfg(any(target_os = "macos", target_os = "ios"))] + hdr: &mut msghdr_x, iov: &mut libc::iovec, ctrl: &mut cmsg::Aligned<[u8; CMSG_LEN]>, #[allow(unused_variables)] // only used on FreeBSD & macOS @@ -553,6 +607,7 @@ fn prepare_msg( encoder.push(libc::IPPROTO_IPV6, libc::IPV6_TCLASS, ecn); } + #[cfg(not(any(target_os = "macos", target_os = "ios")))] if let Some(segment_size) = transmit.segment_size { gso::set_segment_size(&mut encoder, segment_size as u16); } @@ -768,12 +823,14 @@ mod gso { #[cfg(not(target_os = "linux"))] mod gso { + #[cfg(not(any(target_os = "macos", target_os = "ios")))] use super::*; pub(super) fn max_gso_segments() -> usize { 1 } + #[cfg(not(any(target_os = "macos", target_os = "ios")))] pub(super) fn set_segment_size(_encoder: &mut cmsg::Encoder, _segment_size: u16) { panic!("Setting a segment size is not supported on current platform"); } From 450b7541fc0c90ebcffdd1c3400c9c038c904dd5 Mon Sep 17 00:00:00 2001 From: Lars Eggert Date: Mon, 23 Sep 2024 10:26:26 +0300 Subject: [PATCH 08/28] No panic --- quinn-udp/src/unix.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/quinn-udp/src/unix.rs b/quinn-udp/src/unix.rs index 3c9162f6e..c7f2978a4 100644 --- a/quinn-udp/src/unix.rs +++ b/quinn-udp/src/unix.rs @@ -365,7 +365,6 @@ fn send(state: &UdpSocketState, io: SockRef<'_>, transmit: &Transmit<'_>) -> io: let mut ctrls = [cmsg::Aligned([0u8; CMSG_LEN]); BATCH_SIZE]; let addr = socket2::SockAddr::from(transmit.destination); let max_msg_count = if let Some(segment_size) = transmit.segment_size { - panic!(); transmit.contents.len() / segment_size } else { 1 From de86e7fa7d433d881287ed5eeba24164b66de6c3 Mon Sep 17 00:00:00 2001 From: Lars Eggert Date: Mon, 23 Sep 2024 10:27:42 +0300 Subject: [PATCH 09/28] fmt --- quinn-udp/src/unix.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/quinn-udp/src/unix.rs b/quinn-udp/src/unix.rs index c7f2978a4..9d0ea8916 100644 --- a/quinn-udp/src/unix.rs +++ b/quinn-udp/src/unix.rs @@ -563,10 +563,8 @@ const CMSG_LEN: usize = 88; fn prepare_msg( transmit: &Transmit<'_>, dst_addr: &socket2::SockAddr, - #[cfg(not(any(target_os = "macos", target_os = "ios")))] - hdr: &mut libc::msghdr, - #[cfg(any(target_os = "macos", target_os = "ios"))] - hdr: &mut msghdr_x, + #[cfg(not(any(target_os = "macos", target_os = "ios")))] hdr: &mut libc::msghdr, + #[cfg(any(target_os = "macos", target_os = "ios"))] hdr: &mut msghdr_x, iov: &mut libc::iovec, ctrl: &mut cmsg::Aligned<[u8; CMSG_LEN]>, #[allow(unused_variables)] // only used on FreeBSD & macOS From fec606354cfbdad3116ff4174ed6afb4c4d7a959 Mon Sep 17 00:00:00 2001 From: Lars Eggert Date: Mon, 23 Sep 2024 15:18:55 +0300 Subject: [PATCH 10/28] Many fixes --- quinn-udp/Cargo.toml | 2 +- quinn-udp/benches/throughput.rs | 22 +++++---- quinn-udp/src/unix.rs | 81 ++++++++++++++++++++------------- 3 files changed, 62 insertions(+), 43 deletions(-) diff --git a/quinn-udp/Cargo.toml b/quinn-udp/Cargo.toml index 257ee0015..a8142785e 100644 --- a/quinn-udp/Cargo.toml +++ b/quinn-udp/Cargo.toml @@ -32,6 +32,6 @@ windows-sys = { workspace = true } [dev-dependencies] criterion = "0.5" -[target.'cfg(any(target_os = "linux", target_os = "windows"))'.bench] +[[bench]] name = "throughput" harness = false diff --git a/quinn-udp/benches/throughput.rs b/quinn-udp/benches/throughput.rs index 39ea6a291..8f6fc4adb 100644 --- a/quinn-udp/benches/throughput.rs +++ b/quinn-udp/benches/throughput.rs @@ -1,7 +1,7 @@ use criterion::{criterion_group, criterion_main, Criterion}; use quinn_udp::{RecvMeta, Transmit, UdpSocketState}; use std::cmp::min; -use std::{io::IoSliceMut, net::UdpSocket, slice}; +use std::{io::IoSliceMut, net::UdpSocket}; pub fn criterion_benchmark(c: &mut Criterion) { const TOTAL_BYTES: usize = 10 * 1024 * 1024; @@ -27,8 +27,13 @@ pub fn criterion_benchmark(c: &mut Criterion) { // Reverse non-blocking flag set by `UdpSocketState` to make the test non-racy recv.set_nonblocking(false).unwrap(); - let mut receive_buffer = vec![0; MAX_BUFFER_SIZE]; - let mut meta = RecvMeta::default(); + let gro_segments = UdpSocketState::new((&send).into()).unwrap().gro_segments(); + let mut receive_buffers = vec![[0; SEGMENT_SIZE]; gro_segments]; + let mut receive_slives = receive_buffers + .iter_mut() + .map(|buf| IoSliceMut::new(buf)) + .collect::>(); + let mut meta = vec![RecvMeta::default(); gro_segments]; for gso_enabled in [false, true] { let mut group = c.benchmark_group(format!("gso_{}", gso_enabled)); @@ -55,14 +60,11 @@ pub fn criterion_benchmark(c: &mut Criterion) { let mut received_segments = 0; while received_segments < segments { let n = recv_state - .recv( - (&recv).into(), - &mut [IoSliceMut::new(&mut receive_buffer)], - slice::from_mut(&mut meta), - ) + .recv((&recv).into(), &mut receive_slives, &mut meta) .unwrap(); - assert_eq!(n, 1); - received_segments += meta.len / meta.stride; + for i in meta.iter().take(n) { + received_segments += i.len / i.stride; + } } assert_eq!(received_segments, segments); } diff --git a/quinn-udp/src/unix.rs b/quinn-udp/src/unix.rs index 9d0ea8916..72498158b 100644 --- a/quinn-udp/src/unix.rs +++ b/quinn-udp/src/unix.rs @@ -364,33 +364,32 @@ fn send(state: &UdpSocketState, io: SockRef<'_>, transmit: &Transmit<'_>) -> io: let mut iovs = unsafe { mem::zeroed::<[libc::iovec; BATCH_SIZE]>() }; let mut ctrls = [cmsg::Aligned([0u8; CMSG_LEN]); BATCH_SIZE]; let addr = socket2::SockAddr::from(transmit.destination); - let max_msg_count = if let Some(segment_size) = transmit.segment_size { - transmit.contents.len() / segment_size - } else { - 1 - } - .min(BATCH_SIZE); - for i in 0..max_msg_count { - prepare_msg( - transmit, - &addr, - &mut hdrs[i], - &mut iovs[i], - &mut ctrls[i], - true, - state.sendmsg_einval(), - ); - hdrs[i].msg_datalen = if let Some(segment_size) = transmit.segment_size { - if i < max_msg_count - 1 { - segment_size - } else { - transmit.contents.len() - segment_size * i - } - } else { - transmit.contents.len() - }; - } - let n = unsafe { sendmsg_x(io.as_raw_fd(), hdrs.as_ptr(), max_msg_count as u32, 0) }; + let segment_size = transmit.segment_size.unwrap_or(transmit.contents.len()); + let cnt = transmit + .contents + .chunks(segment_size) + .enumerate() + .map(|(i, chunk)| { + prepare_msg( + &Transmit { + destination: transmit.destination, + ecn: transmit.ecn, + contents: chunk, + segment_size: Some(chunk.len()), + src_ip: transmit.src_ip, + }, + &addr, + &mut hdrs[i], + &mut iovs[i], + &mut ctrls[i], + true, + state.sendmsg_einval(), + ); + hdrs[i].msg_datalen = chunk.len(); + }) + .count(); + let n = unsafe { sendmsg_x(io.as_raw_fd(), hdrs.as_ptr(), cnt as u32, 0) }; + // print!("o{} ", n); if n == -1 { let e = io::Error::last_os_error(); match e.kind() { @@ -528,6 +527,7 @@ fn recv(io: SockRef<'_>, bufs: &mut [IoSliceMut<'_>], meta: &mut [RecvMeta]) -> } break n; }; + // print!("i{} ", msg_count); for i in 0..(msg_count as usize) { meta[i] = decode_recv(&names[i], &hdrs[i], hdrs[i].msg_datalen as usize); } @@ -604,7 +604,6 @@ fn prepare_msg( encoder.push(libc::IPPROTO_IPV6, libc::IPV6_TCLASS, ecn); } - #[cfg(not(any(target_os = "macos", target_os = "ios")))] if let Some(segment_size) = transmit.segment_size { gso::set_segment_size(&mut encoder, segment_size as u16); } @@ -818,16 +817,25 @@ mod gso { } } -#[cfg(not(target_os = "linux"))] +#[cfg(any(target_os = "macos", target_os = "ios"))] +mod gso { + use super::*; + + pub(super) fn max_gso_segments() -> usize { + BATCH_SIZE + } + + pub(super) fn set_segment_size(_encoder: &mut cmsg::Encoder, _segment_size: u16) {} +} + +#[cfg(not(any(target_os = "linux", target_os = "macos", target_os = "ios")))] mod gso { - #[cfg(not(any(target_os = "macos", target_os = "ios")))] use super::*; pub(super) fn max_gso_segments() -> usize { 1 } - #[cfg(not(any(target_os = "macos", target_os = "ios")))] pub(super) fn set_segment_size(_encoder: &mut cmsg::Encoder, _segment_size: u16) { panic!("Setting a segment size is not supported on current platform"); } @@ -900,7 +908,16 @@ fn set_socket_option( const OPTION_ON: libc::c_int = 1; -#[cfg(not(target_os = "linux"))] +#[cfg(any(target_os = "macos", target_os = "ios"))] +mod gro { + use super::BATCH_SIZE; + + pub(super) fn gro_segments() -> usize { + BATCH_SIZE + } +} + +#[cfg(not(any(target_os = "linux", target_os = "macos", target_os = "ios")))] mod gro { pub(super) fn gro_segments() -> usize { 1 From 83ae3e79958fe5692be420a02ee8d5533a021ded Mon Sep 17 00:00:00 2001 From: Lars Eggert Date: Mon, 23 Sep 2024 15:32:33 +0300 Subject: [PATCH 11/28] bench fix --- quinn-udp/Cargo.toml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/quinn-udp/Cargo.toml b/quinn-udp/Cargo.toml index a8142785e..ca9b3a374 100644 --- a/quinn-udp/Cargo.toml +++ b/quinn-udp/Cargo.toml @@ -32,6 +32,10 @@ windows-sys = { workspace = true } [dev-dependencies] criterion = "0.5" +[lib] +# See https://github.com/bheisler/criterion.rs/blob/master/book/src/faq.md#cargo-bench-gives-unrecognized-option-errors-for-valid-command-line-options +bench = false + [[bench]] name = "throughput" harness = false From 05dc111487e6e7dfc6621ed3d539e5a5063718bb Mon Sep 17 00:00:00 2001 From: Lars Eggert Date: Mon, 23 Sep 2024 16:01:40 +0300 Subject: [PATCH 12/28] Fix typo --- quinn-udp/benches/throughput.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/quinn-udp/benches/throughput.rs b/quinn-udp/benches/throughput.rs index 8f6fc4adb..cd5152689 100644 --- a/quinn-udp/benches/throughput.rs +++ b/quinn-udp/benches/throughput.rs @@ -29,7 +29,7 @@ pub fn criterion_benchmark(c: &mut Criterion) { let gro_segments = UdpSocketState::new((&send).into()).unwrap().gro_segments(); let mut receive_buffers = vec![[0; SEGMENT_SIZE]; gro_segments]; - let mut receive_slives = receive_buffers + let mut receive_slices = receive_buffers .iter_mut() .map(|buf| IoSliceMut::new(buf)) .collect::>(); @@ -60,7 +60,7 @@ pub fn criterion_benchmark(c: &mut Criterion) { let mut received_segments = 0; while received_segments < segments { let n = recv_state - .recv((&recv).into(), &mut receive_slives, &mut meta) + .recv((&recv).into(), &mut receive_slices, &mut meta) .unwrap(); for i in meta.iter().take(n) { received_segments += i.len / i.stride; From f0ff0d7f4a56e3cd220e26c0ea0c3aa55095c2fd Mon Sep 17 00:00:00 2001 From: Lars Eggert Date: Mon, 23 Sep 2024 16:03:22 +0300 Subject: [PATCH 13/28] Remove commented-out code --- quinn-udp/src/unix.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/quinn-udp/src/unix.rs b/quinn-udp/src/unix.rs index 72498158b..1f7174f5c 100644 --- a/quinn-udp/src/unix.rs +++ b/quinn-udp/src/unix.rs @@ -389,7 +389,6 @@ fn send(state: &UdpSocketState, io: SockRef<'_>, transmit: &Transmit<'_>) -> io: }) .count(); let n = unsafe { sendmsg_x(io.as_raw_fd(), hdrs.as_ptr(), cnt as u32, 0) }; - // print!("o{} ", n); if n == -1 { let e = io::Error::last_os_error(); match e.kind() { @@ -527,7 +526,6 @@ fn recv(io: SockRef<'_>, bufs: &mut [IoSliceMut<'_>], meta: &mut [RecvMeta]) -> } break n; }; - // print!("i{} ", msg_count); for i in 0..(msg_count as usize) { meta[i] = decode_recv(&names[i], &hdrs[i], hdrs[i].msg_datalen as usize); } From ff73229097e2aa6ec5430690ed87843e90b47480 Mon Sep 17 00:00:00 2001 From: Lars Eggert Date: Mon, 23 Sep 2024 16:13:10 +0300 Subject: [PATCH 14/28] Address review comment --- quinn-udp/src/unix.rs | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/quinn-udp/src/unix.rs b/quinn-udp/src/unix.rs index 1f7174f5c..d56fb2de2 100644 --- a/quinn-udp/src/unix.rs +++ b/quinn-udp/src/unix.rs @@ -517,14 +517,16 @@ fn recv(io: SockRef<'_>, bufs: &mut [IoSliceMut<'_>], meta: &mut [RecvMeta]) -> 0, ) }; - if n == -1 { - let e = io::Error::last_os_error(); - if e.kind() == io::ErrorKind::Interrupted { - continue; + match n { + -1 => { + let e = io::Error::last_os_error(); + if e.kind() == io::ErrorKind::Interrupted { + continue; + } + return Err(e); } - return Err(e); + n => break n, } - break n; }; for i in 0..(msg_count as usize) { meta[i] = decode_recv(&names[i], &hdrs[i], hdrs[i].msg_datalen as usize); From 8df7cabe9513958604b0cec152dde8c50d5b24b6 Mon Sep 17 00:00:00 2001 From: Lars Eggert Date: Mon, 23 Sep 2024 16:35:24 +0300 Subject: [PATCH 15/28] Remove comment --- quinn-udp/src/unix.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/quinn-udp/src/unix.rs b/quinn-udp/src/unix.rs index d56fb2de2..3609e50c6 100644 --- a/quinn-udp/src/unix.rs +++ b/quinn-udp/src/unix.rs @@ -28,7 +28,6 @@ use super::{ #[cfg(any(target_os = "macos", target_os = "ios"))] #[repr(C)] #[allow(non_camel_case_types)] -// #[derive(Debug, Copy, Clone)] pub(crate) struct msghdr_x { pub msg_name: *mut std::ffi::c_void, pub msg_namelen: libc::socklen_t, From 10cf609e0f5cc88e05c4f0a633d2aeb49678525d Mon Sep 17 00:00:00 2001 From: Lars Eggert Date: Tue, 24 Sep 2024 11:47:51 +0300 Subject: [PATCH 16/28] Make sure ECN works --- quinn-udp/benches/throughput.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/quinn-udp/benches/throughput.rs b/quinn-udp/benches/throughput.rs index cd5152689..52f24a2fb 100644 --- a/quinn-udp/benches/throughput.rs +++ b/quinn-udp/benches/throughput.rs @@ -44,7 +44,7 @@ pub fn criterion_benchmark(c: &mut Criterion) { let transmit = Transmit { destination: dst_addr, - ecn: None, + ecn: Some(quinn_udp::EcnCodepoint::Ect0), contents: &msg, segment_size: gso_enabled.then_some(SEGMENT_SIZE), src_ip: None, @@ -64,6 +64,7 @@ pub fn criterion_benchmark(c: &mut Criterion) { .unwrap(); for i in meta.iter().take(n) { received_segments += i.len / i.stride; + assert_eq!(i.ecn, Some(quinn_udp::EcnCodepoint::Ect0)); } } assert_eq!(received_segments, segments); From ef51f1f5731d7546af4ecd07647ef4c360976c72 Mon Sep 17 00:00:00 2001 From: Lars Eggert Date: Tue, 24 Sep 2024 11:48:25 +0300 Subject: [PATCH 17/28] Address review comments --- quinn-udp/src/cmsg/unix.rs | 5 -- quinn-udp/src/unix.rs | 115 ++++++++++++++++++------------------- 2 files changed, 56 insertions(+), 64 deletions(-) diff --git a/quinn-udp/src/cmsg/unix.rs b/quinn-udp/src/cmsg/unix.rs index 4fcb6f48d..4a87dc37a 100644 --- a/quinn-udp/src/cmsg/unix.rs +++ b/quinn-udp/src/cmsg/unix.rs @@ -48,11 +48,6 @@ impl MsgHdr for crate::imp::msghdr_x { fn set_control_len(&mut self, len: usize) { self.msg_controllen = len as _; - if len == 0 { - // netbsd is particular about this being a NULL pointer if there are no control - // messages. - self.msg_control = std::ptr::null_mut(); - } } fn control_len(&self) -> usize { diff --git a/quinn-udp/src/unix.rs b/quinn-udp/src/unix.rs index 3609e50c6..bec9f5962 100644 --- a/quinn-udp/src/unix.rs +++ b/quinn-udp/src/unix.rs @@ -17,6 +17,9 @@ use std::{ time::Instant, }; +#[cfg(any(target_os = "macos", target_os = "ios"))] +use std::ffi::{c_int, c_uint, c_void}; + use socket2::SockRef; use super::{ @@ -29,31 +32,21 @@ use super::{ #[repr(C)] #[allow(non_camel_case_types)] pub(crate) struct msghdr_x { - pub msg_name: *mut std::ffi::c_void, + pub msg_name: *mut c_void, pub msg_namelen: libc::socklen_t, pub msg_iov: *mut libc::iovec, - pub msg_iovlen: std::ffi::c_int, - pub msg_control: *mut std::ffi::c_void, + pub msg_iovlen: c_int, + pub msg_control: *mut c_void, pub msg_controllen: libc::socklen_t, - pub msg_flags: std::ffi::c_int, + pub msg_flags: c_int, pub msg_datalen: usize, } #[cfg(any(target_os = "macos", target_os = "ios"))] extern "C" { - fn recvmsg_x( - s: std::ffi::c_int, - msgp: *const msghdr_x, - cnt: std::ffi::c_uint, - flags: std::ffi::c_int, - ) -> isize; - - fn sendmsg_x( - s: std::ffi::c_int, - msgp: *const msghdr_x, - cnt: std::ffi::c_uint, - flags: std::ffi::c_int, - ) -> isize; + fn recvmsg_x(s: c_int, msgp: *const msghdr_x, cnt: c_uint, flags: c_int) -> isize; + + fn sendmsg_x(s: c_int, msgp: *const msghdr_x, cnt: c_uint, flags: c_int) -> isize; } // Defined in netinet6/in6.h on OpenBSD, this is not yet exported by the libc crate @@ -364,49 +357,47 @@ fn send(state: &UdpSocketState, io: SockRef<'_>, transmit: &Transmit<'_>) -> io: let mut ctrls = [cmsg::Aligned([0u8; CMSG_LEN]); BATCH_SIZE]; let addr = socket2::SockAddr::from(transmit.destination); let segment_size = transmit.segment_size.unwrap_or(transmit.contents.len()); - let cnt = transmit - .contents - .chunks(segment_size) - .enumerate() - .map(|(i, chunk)| { - prepare_msg( - &Transmit { - destination: transmit.destination, - ecn: transmit.ecn, - contents: chunk, - segment_size: Some(chunk.len()), - src_ip: transmit.src_ip, - }, - &addr, - &mut hdrs[i], - &mut iovs[i], - &mut ctrls[i], - true, - state.sendmsg_einval(), - ); - hdrs[i].msg_datalen = chunk.len(); - }) - .count(); + let mut cnt = 0; + for (i, chunk) in transmit.contents.chunks(segment_size).enumerate() { + prepare_msg( + &Transmit { + destination: transmit.destination, + ecn: transmit.ecn, + contents: chunk, + segment_size: Some(chunk.len()), + src_ip: transmit.src_ip, + }, + &addr, + &mut hdrs[i], + &mut iovs[i], + &mut ctrls[i], + true, + state.sendmsg_einval(), + ); + hdrs[i].msg_datalen = chunk.len(); + cnt += 1; + } let n = unsafe { sendmsg_x(io.as_raw_fd(), hdrs.as_ptr(), cnt as u32, 0) }; - if n == -1 { - let e = io::Error::last_os_error(); - match e.kind() { - io::ErrorKind::Interrupted => { - // Retry the transmission - } - io::ErrorKind::WouldBlock => return Err(e), - _ => { - // Other errors are ignored, since they will usually be handled - // by higher level retransmits and timeouts. - // - PermissionDenied errors have been observed due to iptable rules. - // Those are not fatal errors, since the - // configuration can be dynamically changed. - // - Destination unreachable errors have been observed for other - // - EMSGSIZE is expected for MTU probes. Future work might be able to avoid - // these by automatically clamping the MTUD upper bound to the interface MTU. - if e.raw_os_error() != Some(libc::EMSGSIZE) { - log_sendmsg_error(&state.last_send_error, e, transmit); - } + if n >= 0 { + return Ok(()); + } + let e = io::Error::last_os_error(); + match e.kind() { + io::ErrorKind::Interrupted => { + // Retry the transmission + } + io::ErrorKind::WouldBlock => return Err(e), + _ => { + // Other errors are ignored, since they will usually be handled + // by higher level retransmits and timeouts. + // - PermissionDenied errors have been observed due to iptable rules. + // Those are not fatal errors, since the + // configuration can be dynamically changed. + // - Destination unreachable errors have been observed for other + // - EMSGSIZE is expected for MTU probes. Future work might be able to avoid + // these by automatically clamping the MTUD upper bound to the interface MTU. + if e.raw_os_error() != Some(libc::EMSGSIZE) { + log_sendmsg_error(&state.last_send_error, e, transmit); } } } @@ -817,6 +808,9 @@ mod gso { } #[cfg(any(target_os = "macos", target_os = "ios"))] +// On Apple platforms using the `sendmsg_x` call, UDP datagram segmentation is not +// offloaded to the NIC or even the kernel, but instead done here in user space in +// [`send`]) and then passed to the OS as individual `iovec`s (up to `BATCH_SIZE`). mod gso { use super::*; @@ -908,6 +902,9 @@ fn set_socket_option( const OPTION_ON: libc::c_int = 1; #[cfg(any(target_os = "macos", target_os = "ios"))] +// On Apple platforms using the `recvmsg_x` call, UDP datagram reassembly is not +// offloaded to the NIC or even the kernel; [`recv`] will instead return multiple +// individual `iovec`s (up to `BATCH_SIZE`). mod gro { use super::BATCH_SIZE; From 2c92fb01324ecc402c810abda5d0a7a71ace442a Mon Sep 17 00:00:00 2001 From: Lars Eggert Date: Tue, 24 Sep 2024 14:56:04 +0300 Subject: [PATCH 18/28] Suggestion from @mxinden --- quinn-udp/benches/throughput.rs | 7 +++---- quinn-udp/src/unix.rs | 14 +------------- 2 files changed, 4 insertions(+), 17 deletions(-) diff --git a/quinn-udp/benches/throughput.rs b/quinn-udp/benches/throughput.rs index 52f24a2fb..a247e1c56 100644 --- a/quinn-udp/benches/throughput.rs +++ b/quinn-udp/benches/throughput.rs @@ -1,5 +1,5 @@ use criterion::{criterion_group, criterion_main, Criterion}; -use quinn_udp::{RecvMeta, Transmit, UdpSocketState}; +use quinn_udp::{RecvMeta, Transmit, UdpSocketState, BATCH_SIZE}; use std::cmp::min; use std::{io::IoSliceMut, net::UdpSocket}; @@ -27,13 +27,12 @@ pub fn criterion_benchmark(c: &mut Criterion) { // Reverse non-blocking flag set by `UdpSocketState` to make the test non-racy recv.set_nonblocking(false).unwrap(); - let gro_segments = UdpSocketState::new((&send).into()).unwrap().gro_segments(); - let mut receive_buffers = vec![[0; SEGMENT_SIZE]; gro_segments]; + let mut receive_buffers = vec![[0; SEGMENT_SIZE]; BATCH_SIZE]; let mut receive_slices = receive_buffers .iter_mut() .map(|buf| IoSliceMut::new(buf)) .collect::>(); - let mut meta = vec![RecvMeta::default(); gro_segments]; + let mut meta = vec![RecvMeta::default(); BATCH_SIZE]; for gso_enabled in [false, true] { let mut group = c.benchmark_group(format!("gso_{}", gso_enabled)); diff --git a/quinn-udp/src/unix.rs b/quinn-udp/src/unix.rs index bec9f5962..4558e9b6c 100644 --- a/quinn-udp/src/unix.rs +++ b/quinn-udp/src/unix.rs @@ -901,19 +901,7 @@ fn set_socket_option( const OPTION_ON: libc::c_int = 1; -#[cfg(any(target_os = "macos", target_os = "ios"))] -// On Apple platforms using the `recvmsg_x` call, UDP datagram reassembly is not -// offloaded to the NIC or even the kernel; [`recv`] will instead return multiple -// individual `iovec`s (up to `BATCH_SIZE`). -mod gro { - use super::BATCH_SIZE; - - pub(super) fn gro_segments() -> usize { - BATCH_SIZE - } -} - -#[cfg(not(any(target_os = "linux", target_os = "macos", target_os = "ios")))] +#[cfg(not(target_os = "linux"))] mod gro { pub(super) fn gro_segments() -> usize { 1 From 8e88f603228474590a2c6d5ce3eee4b535cf83d6 Mon Sep 17 00:00:00 2001 From: Lars Eggert Date: Wed, 25 Sep 2024 08:39:12 +0300 Subject: [PATCH 19/28] Undo --- quinn-udp/benches/throughput.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/quinn-udp/benches/throughput.rs b/quinn-udp/benches/throughput.rs index a247e1c56..d7b94b5b6 100644 --- a/quinn-udp/benches/throughput.rs +++ b/quinn-udp/benches/throughput.rs @@ -43,7 +43,7 @@ pub fn criterion_benchmark(c: &mut Criterion) { let transmit = Transmit { destination: dst_addr, - ecn: Some(quinn_udp::EcnCodepoint::Ect0), + ecn: None, contents: &msg, segment_size: gso_enabled.then_some(SEGMENT_SIZE), src_ip: None, @@ -63,7 +63,6 @@ pub fn criterion_benchmark(c: &mut Criterion) { .unwrap(); for i in meta.iter().take(n) { received_segments += i.len / i.stride; - assert_eq!(i.ecn, Some(quinn_udp::EcnCodepoint::Ect0)); } } assert_eq!(received_segments, segments); From 3b5ed21964c9764bd8c289dc7ffa904101e67787 Mon Sep 17 00:00:00 2001 From: Lars Eggert Date: Tue, 8 Oct 2024 17:43:51 +0300 Subject: [PATCH 20/28] Update quinn-udp/benches/throughput.rs Co-authored-by: Max Inden --- quinn-udp/benches/throughput.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/quinn-udp/benches/throughput.rs b/quinn-udp/benches/throughput.rs index d7b94b5b6..feae79a66 100644 --- a/quinn-udp/benches/throughput.rs +++ b/quinn-udp/benches/throughput.rs @@ -27,7 +27,7 @@ pub fn criterion_benchmark(c: &mut Criterion) { // Reverse non-blocking flag set by `UdpSocketState` to make the test non-racy recv.set_nonblocking(false).unwrap(); - let mut receive_buffers = vec![[0; SEGMENT_SIZE]; BATCH_SIZE]; + let mut receive_buffers = vec![[0; SEGMENT_SIZE * recv.gro_segments]; BATCH_SIZE]; let mut receive_slices = receive_buffers .iter_mut() .map(|buf| IoSliceMut::new(buf)) From c69940e4c8c0eb3874d86a4cff1d388d5237d409 Mon Sep 17 00:00:00 2001 From: Lars Eggert Date: Tue, 8 Oct 2024 17:44:44 +0300 Subject: [PATCH 21/28] Update quinn-udp/src/unix.rs Co-authored-by: Max Inden --- quinn-udp/src/unix.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/quinn-udp/src/unix.rs b/quinn-udp/src/unix.rs index 4558e9b6c..61922f603 100644 --- a/quinn-udp/src/unix.rs +++ b/quinn-udp/src/unix.rs @@ -358,7 +358,8 @@ fn send(state: &UdpSocketState, io: SockRef<'_>, transmit: &Transmit<'_>) -> io: let addr = socket2::SockAddr::from(transmit.destination); let segment_size = transmit.segment_size.unwrap_or(transmit.contents.len()); let mut cnt = 0; - for (i, chunk) in transmit.contents.chunks(segment_size).enumerate() { + debug_assert!(transmit.contents.div_ceil(segment_size) <= BATCH_SIZE); + for (i, chunk) in transmit.contents.chunks(segment_size).enumerate().take(BATCH_SIZE) { prepare_msg( &Transmit { destination: transmit.destination, From 04be03406aa8615c83d58776dac0ae4f22dac756 Mon Sep 17 00:00:00 2001 From: Lars Eggert Date: Tue, 8 Oct 2024 17:45:11 +0300 Subject: [PATCH 22/28] Update quinn-udp/src/unix.rs Co-authored-by: Max Inden --- quinn-udp/src/unix.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/quinn-udp/src/unix.rs b/quinn-udp/src/unix.rs index 61922f603..823e316d8 100644 --- a/quinn-udp/src/unix.rs +++ b/quinn-udp/src/unix.rs @@ -808,10 +808,10 @@ mod gso { } } -#[cfg(any(target_os = "macos", target_os = "ios"))] // On Apple platforms using the `sendmsg_x` call, UDP datagram segmentation is not // offloaded to the NIC or even the kernel, but instead done here in user space in // [`send`]) and then passed to the OS as individual `iovec`s (up to `BATCH_SIZE`). +#[cfg(any(target_os = "macos", target_os = "ios"))] mod gso { use super::*; From 155708ed06bd5488703bb505e7295ab19782191b Mon Sep 17 00:00:00 2001 From: Lars Eggert Date: Tue, 8 Oct 2024 17:58:20 +0300 Subject: [PATCH 23/28] Fixes --- quinn-udp/benches/throughput.rs | 2 +- quinn-udp/src/unix.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/quinn-udp/benches/throughput.rs b/quinn-udp/benches/throughput.rs index feae79a66..7231208ee 100644 --- a/quinn-udp/benches/throughput.rs +++ b/quinn-udp/benches/throughput.rs @@ -27,7 +27,7 @@ pub fn criterion_benchmark(c: &mut Criterion) { // Reverse non-blocking flag set by `UdpSocketState` to make the test non-racy recv.set_nonblocking(false).unwrap(); - let mut receive_buffers = vec![[0; SEGMENT_SIZE * recv.gro_segments]; BATCH_SIZE]; + let mut receive_buffers = vec![vec![0; SEGMENT_SIZE * recv_state.gro_segments()]; BATCH_SIZE]; let mut receive_slices = receive_buffers .iter_mut() .map(|buf| IoSliceMut::new(buf)) diff --git a/quinn-udp/src/unix.rs b/quinn-udp/src/unix.rs index 823e316d8..245760112 100644 --- a/quinn-udp/src/unix.rs +++ b/quinn-udp/src/unix.rs @@ -358,7 +358,7 @@ fn send(state: &UdpSocketState, io: SockRef<'_>, transmit: &Transmit<'_>) -> io: let addr = socket2::SockAddr::from(transmit.destination); let segment_size = transmit.segment_size.unwrap_or(transmit.contents.len()); let mut cnt = 0; - debug_assert!(transmit.contents.div_ceil(segment_size) <= BATCH_SIZE); + debug_assert!(transmit.contents.len().div_ceil(segment_size) <= BATCH_SIZE); for (i, chunk) in transmit.contents.chunks(segment_size).enumerate().take(BATCH_SIZE) { prepare_msg( &Transmit { From d191358ab406d812b6ec77e7fb26f51c02b1d951 Mon Sep 17 00:00:00 2001 From: Lars Eggert Date: Tue, 8 Oct 2024 18:39:23 +0300 Subject: [PATCH 24/28] Add `fast-apple-datapath` feature --- quinn-udp/Cargo.toml | 2 + quinn-udp/src/cmsg/unix.rs | 10 +- quinn-udp/src/unix.rs | 218 ++++++++++++++++++++++++++++++------- 3 files changed, 192 insertions(+), 38 deletions(-) diff --git a/quinn-udp/Cargo.toml b/quinn-udp/Cargo.toml index ca9b3a374..f98c8ec73 100644 --- a/quinn-udp/Cargo.toml +++ b/quinn-udp/Cargo.toml @@ -18,6 +18,8 @@ default = ["tracing", "log"] # Configure `tracing` to log events via `log` if no `tracing` subscriber exists. log = ["tracing/log"] direct-log = ["dep:log"] +# Use private Apple APIs to send multiple packets in a single syscall. +fast-apple-datapath = [] [dependencies] libc = "0.2.158" diff --git a/quinn-udp/src/cmsg/unix.rs b/quinn-udp/src/cmsg/unix.rs index 4a87dc37a..8c7aca62d 100644 --- a/quinn-udp/src/cmsg/unix.rs +++ b/quinn-udp/src/cmsg/unix.rs @@ -32,7 +32,15 @@ impl MsgHdr for libc::msghdr { } } -#[cfg(any(target_os = "macos", target_os = "ios"))] +#[cfg(all( + feature = "fast-apple-datapath", + any( + target_os = "macos", + target_os = "ios", + target_os = "tvos", + target_os = "visionos", + ) +))] impl MsgHdr for crate::imp::msghdr_x { type ControlMessage = libc::cmsghdr; diff --git a/quinn-udp/src/unix.rs b/quinn-udp/src/unix.rs index 60e5f17a1..b353f40a8 100644 --- a/quinn-udp/src/unix.rs +++ b/quinn-udp/src/unix.rs @@ -19,7 +19,15 @@ use std::{ time::Instant, }; -#[cfg(any(target_os = "macos", target_os = "ios"))] +#[cfg(all( + feature = "fast-apple-datapath", + any( + target_os = "macos", + target_os = "ios", + target_os = "tvos", + target_os = "visionos", + ) +))] use std::ffi::{c_int, c_uint, c_void}; use socket2::SockRef; @@ -30,7 +38,15 @@ use super::{ }; // Adapted from https://github.com/apple-oss-distributions/xnu/blob/main/bsd/sys/socket.h -#[cfg(any(target_os = "macos", target_os = "ios"))] +#[cfg(all( + feature = "fast-apple-datapath", + any( + target_os = "macos", + target_os = "ios", + target_os = "tvos", + target_os = "visionos", + ) +))] #[repr(C)] #[allow(non_camel_case_types)] pub(crate) struct msghdr_x { @@ -44,7 +60,15 @@ pub(crate) struct msghdr_x { pub msg_datalen: usize, } -#[cfg(any(target_os = "macos", target_os = "ios"))] +#[cfg(all( + feature = "fast-apple-datapath", + any( + target_os = "macos", + target_os = "ios", + target_os = "tvos", + target_os = "visionos", + ) +))] extern "C" { fn recvmsg_x(s: c_int, msgp: *const msghdr_x, cnt: c_uint, flags: c_int) -> isize; @@ -366,11 +390,14 @@ fn send( } } -#[cfg(any( - target_os = "macos", - target_os = "ios", - target_os = "tvos", - target_os = "visionos", +#[cfg(all( + feature = "fast-apple-datapath", + any( + target_os = "macos", + target_os = "ios", + target_os = "tvos", + target_os = "visionos", + ) ))] fn send(state: &UdpSocketState, io: SockRef<'_>, transmit: &Transmit<'_>) -> io::Result<()> { let mut hdrs = unsafe { mem::zeroed::<[msghdr_x; BATCH_SIZE]>() }; @@ -380,7 +407,12 @@ fn send(state: &UdpSocketState, io: SockRef<'_>, transmit: &Transmit<'_>) -> io: let segment_size = transmit.segment_size.unwrap_or(transmit.contents.len()); let mut cnt = 0; debug_assert!(transmit.contents.len().div_ceil(segment_size) <= BATCH_SIZE); - for (i, chunk) in transmit.contents.chunks(segment_size).enumerate().take(BATCH_SIZE) { + for (i, chunk) in transmit + .contents + .chunks(segment_size) + .enumerate() + .take(BATCH_SIZE) + { prepare_msg( &Transmit { destination: transmit.destination, @@ -426,7 +458,17 @@ fn send(state: &UdpSocketState, io: SockRef<'_>, transmit: &Transmit<'_>) -> io: Ok(()) } -#[cfg(any(target_os = "openbsd", target_os = "netbsd"))] +#[cfg(all( + not(feature = "fast-apple-datapath"), + any( + target_os = "macos", + target_os = "ios", + target_os = "tvos", + target_os = "visionos", + target_os = "openbsd", + target_os = "netbsd" + ) +))] fn send(state: &UdpSocketState, io: SockRef<'_>, transmit: &Transmit<'_>) -> io::Result<()> { let mut hdr: libc::msghdr = unsafe { mem::zeroed() }; let mut iov: libc::iovec = unsafe { mem::zeroed() }; @@ -438,7 +480,12 @@ fn send(state: &UdpSocketState, io: SockRef<'_>, transmit: &Transmit<'_>) -> io: &mut hdr, &mut iov, &mut ctrl, - true, + cfg!(target_os = "macos") + || cfg!(target_os = "ios") + || cfg!(target_os = "tvos") + || cfg!(target_os = "visionos") + || cfg!(target_os = "openbsd") + || cfg!(target_os = "netbsd"), state.sendmsg_einval(), ); let n = unsafe { libc::sendmsg(io.as_raw_fd(), &hdr, 0) }; @@ -513,11 +560,14 @@ fn recv(io: SockRef<'_>, bufs: &mut [IoSliceMut<'_>], meta: &mut [RecvMeta]) -> Ok(msg_count as usize) } -#[cfg(any( - target_os = "macos", - target_os = "ios", - target_os = "tvos", - target_os = "visionos", +#[cfg(all( + feature = "fast-apple-datapath", + any( + target_os = "macos", + target_os = "ios", + target_os = "tvos", + target_os = "visionos", + ) ))] fn recv(io: SockRef<'_>, bufs: &mut [IoSliceMut<'_>], meta: &mut [RecvMeta]) -> io::Result { let mut names = [MaybeUninit::::uninit(); BATCH_SIZE]; @@ -553,7 +603,17 @@ fn recv(io: SockRef<'_>, bufs: &mut [IoSliceMut<'_>], meta: &mut [RecvMeta]) -> Ok(msg_count as usize) } -#[cfg(any(target_os = "openbsd", target_os = "solaris",))] +#[cfg(all( + not(feature = "fast-apple-datapath"), + any( + target_os = "macos", + target_os = "ios", + target_os = "tvos", + target_os = "visionos", + target_os = "openbsd", + target_os = "solaris" + ) +))] fn recv(io: SockRef<'_>, bufs: &mut [IoSliceMut<'_>], meta: &mut [RecvMeta]) -> io::Result { let mut name = MaybeUninit::::uninit(); let mut ctrl = cmsg::Aligned(MaybeUninit::<[u8; CMSG_LEN]>::uninit()); @@ -582,8 +642,26 @@ const CMSG_LEN: usize = 88; fn prepare_msg( transmit: &Transmit<'_>, dst_addr: &socket2::SockAddr, - #[cfg(not(any(target_os = "macos", target_os = "ios")))] hdr: &mut libc::msghdr, - #[cfg(any(target_os = "macos", target_os = "ios"))] hdr: &mut msghdr_x, + #[cfg(not(all( + feature = "fast-apple-datapath", + any( + target_os = "macos", + target_os = "ios", + target_os = "tvos", + target_os = "visionos", + ) + )))] + hdr: &mut libc::msghdr, + #[cfg(all( + feature = "fast-apple-datapath", + any( + target_os = "macos", + target_os = "ios", + target_os = "tvos", + target_os = "visionos", + ) + ))] + hdr: &mut msghdr_x, iov: &mut libc::iovec, ctrl: &mut cmsg::Aligned<[u8; CMSG_LEN]>, #[allow(unused_variables)] // only used on FreeBSD & macOS @@ -675,7 +753,17 @@ fn prepare_msg( encoder.finish(); } -#[cfg(not(any(target_os = "macos", target_os = "ios")))] +#[cfg(all( + not(feature = "fast-apple-datapath"), + any( + target_os = "macos", + target_os = "ios", + target_os = "tvos", + target_os = "visionos", + target_os = "openbsd", + target_os = "netbsd" + ) +))] fn prepare_recv( buf: &mut IoSliceMut, name: &mut MaybeUninit, @@ -691,7 +779,15 @@ fn prepare_recv( hdr.msg_flags = 0; } -#[cfg(any(target_os = "macos", target_os = "ios"))] +#[cfg(all( + feature = "fast-apple-datapath", + any( + target_os = "macos", + target_os = "ios", + target_os = "tvos", + target_os = "visionos", + ) +))] fn prepare_recv( buf: &mut IoSliceMut, name: &mut MaybeUninit, @@ -710,8 +806,26 @@ fn prepare_recv( fn decode_recv( name: &MaybeUninit, - #[cfg(not(any(target_os = "macos", target_os = "ios")))] hdr: &libc::msghdr, - #[cfg(any(target_os = "macos", target_os = "ios"))] hdr: &msghdr_x, + #[cfg(not(all( + feature = "fast-apple-datapath", + any( + target_os = "macos", + target_os = "ios", + target_os = "tvos", + target_os = "visionos", + ) + )))] + hdr: &libc::msghdr, + #[cfg(all( + feature = "fast-apple-datapath", + any( + target_os = "macos", + target_os = "ios", + target_os = "tvos", + target_os = "visionos", + ) + ))] + hdr: &msghdr_x, len: usize, ) -> RecvMeta { let name = unsafe { name.assume_init() }; @@ -811,9 +925,29 @@ fn decode_recv( } } +#[cfg(any( + feature = "fast-apple-datapath", + not(any( + target_os = "macos", + target_os = "ios", + target_os = "tvos", + target_os = "visionos" + )) +))] // Chosen somewhat arbitrarily; might benefit from additional tuning. pub(crate) const BATCH_SIZE: usize = 32; +#[cfg(not(any( + feature = "fast-apple-datapath", + not(any( + target_os = "macos", + target_os = "ios", + target_os = "tvos", + target_os = "visionos" + )) +)))] +pub(crate) const BATCH_SIZE: usize = 1; + #[cfg(target_os = "linux")] mod gso { use super::*; @@ -846,7 +980,7 @@ mod gso { // On Apple platforms using the `sendmsg_x` call, UDP datagram segmentation is not // offloaded to the NIC or even the kernel, but instead done here in user space in // [`send`]) and then passed to the OS as individual `iovec`s (up to `BATCH_SIZE`). -#[cfg(any(target_os = "macos", target_os = "ios"))] +#[cfg(not(target_os = "linux"))] mod gso { use super::*; @@ -854,19 +988,29 @@ mod gso { BATCH_SIZE } - pub(super) fn set_segment_size(_encoder: &mut cmsg::Encoder, _segment_size: u16) {} -} - -#[cfg(not(any(target_os = "linux", target_os = "macos", target_os = "ios")))] -mod gso { - use super::*; - - pub(super) fn max_gso_segments() -> usize { - 1 - } - - pub(super) fn set_segment_size(_encoder: &mut cmsg::Encoder, _segment_size: u16) { - panic!("Setting a segment size is not supported on current platform"); + pub(super) fn set_segment_size( + #[cfg(not(all( + feature = "fast-apple-datapath", + any( + target_os = "macos", + target_os = "ios", + target_os = "tvos", + target_os = "visionos", + ) + )))] + _encoder: &mut cmsg::Encoder, + #[cfg(all( + feature = "fast-apple-datapath", + any( + target_os = "macos", + target_os = "ios", + target_os = "tvos", + target_os = "visionos", + ) + ))] + _encoder: &mut cmsg::Encoder, + _segment_size: u16, + ) { } } From 5d9677dbe15cec1ac72150cbfbc0b286e20187aa Mon Sep 17 00:00:00 2001 From: Lars Eggert Date: Wed, 9 Oct 2024 09:27:44 +0300 Subject: [PATCH 25/28] Use `cfg_aliases` to simplify the code --- quinn-udp/Cargo.toml | 5 +- quinn-udp/build.rs | 19 +++ quinn-udp/src/cmsg/unix.rs | 10 +- quinn-udp/src/unix.rs | 258 ++++--------------------------------- 4 files changed, 52 insertions(+), 240 deletions(-) create mode 100644 quinn-udp/build.rs diff --git a/quinn-udp/Cargo.toml b/quinn-udp/Cargo.toml index f98c8ec73..c4c0ada25 100644 --- a/quinn-udp/Cargo.toml +++ b/quinn-udp/Cargo.toml @@ -32,7 +32,10 @@ once_cell = { workspace = true } windows-sys = { workspace = true } [dev-dependencies] -criterion = "0.5" +criterion = { version = "0.5", default-features = false } + +[build-dependencies] +cfg_aliases = "0.2" [lib] # See https://github.com/bheisler/criterion.rs/blob/master/book/src/faq.md#cargo-bench-gives-unrecognized-option-errors-for-valid-command-line-options diff --git a/quinn-udp/build.rs b/quinn-udp/build.rs new file mode 100644 index 000000000..5a3390f19 --- /dev/null +++ b/quinn-udp/build.rs @@ -0,0 +1,19 @@ +use cfg_aliases::cfg_aliases; + +fn main() { + // Setup cfg aliases + cfg_aliases! { + // Platforms + apple: { + any( + target_os = "macos", + target_os = "ios", + target_os = "tvos", + target_os = "visionos" + ) + }, + // Convenience aliases + apple_fast: { all(apple, feature = "fast-apple-datapath") }, + apple_slow: { all(apple, not(feature = "fast-apple-datapath")) }, + } +} diff --git a/quinn-udp/src/cmsg/unix.rs b/quinn-udp/src/cmsg/unix.rs index 8c7aca62d..93ac76ba8 100644 --- a/quinn-udp/src/cmsg/unix.rs +++ b/quinn-udp/src/cmsg/unix.rs @@ -32,15 +32,7 @@ impl MsgHdr for libc::msghdr { } } -#[cfg(all( - feature = "fast-apple-datapath", - any( - target_os = "macos", - target_os = "ios", - target_os = "tvos", - target_os = "visionos", - ) -))] +#[cfg(apple_fast)] impl MsgHdr for crate::imp::msghdr_x { type ControlMessage = libc::cmsghdr; diff --git a/quinn-udp/src/unix.rs b/quinn-udp/src/unix.rs index b353f40a8..482df834a 100644 --- a/quinn-udp/src/unix.rs +++ b/quinn-udp/src/unix.rs @@ -1,11 +1,4 @@ -#[cfg(not(any( - target_os = "macos", - target_os = "ios", - target_os = "tvos", - target_os = "visionos", - target_os = "openbsd", - target_os = "solaris", -)))] +#[cfg(not(any(apple, target_os = "openbsd", target_os = "solaris",)))] use std::ptr; use std::{ io::{self, IoSliceMut}, @@ -19,15 +12,7 @@ use std::{ time::Instant, }; -#[cfg(all( - feature = "fast-apple-datapath", - any( - target_os = "macos", - target_os = "ios", - target_os = "tvos", - target_os = "visionos", - ) -))] +#[cfg(apple_fast)] use std::ffi::{c_int, c_uint, c_void}; use socket2::SockRef; @@ -38,15 +23,7 @@ use super::{ }; // Adapted from https://github.com/apple-oss-distributions/xnu/blob/main/bsd/sys/socket.h -#[cfg(all( - feature = "fast-apple-datapath", - any( - target_os = "macos", - target_os = "ios", - target_os = "tvos", - target_os = "visionos", - ) -))] +#[cfg(apple_fast)] #[repr(C)] #[allow(non_camel_case_types)] pub(crate) struct msghdr_x { @@ -60,15 +37,7 @@ pub(crate) struct msghdr_x { pub msg_datalen: usize, } -#[cfg(all( - feature = "fast-apple-datapath", - any( - target_os = "macos", - target_os = "ios", - target_os = "tvos", - target_os = "visionos", - ) -))] +#[cfg(apple_fast)] extern "C" { fn recvmsg_x(s: c_int, msgp: *const msghdr_x, cnt: c_uint, flags: c_int) -> isize; @@ -115,10 +84,7 @@ impl UdpSocketState { || cfg!(target_os = "freebsd") || cfg!(target_os = "openbsd") || cfg!(target_os = "netbsd") - || cfg!(target_os = "macos") - || cfg!(target_os = "ios") - || cfg!(target_os = "tvos") - || cfg!(target_os = "visionos") + || cfg!(apple) || cfg!(target_os = "android") || cfg!(target_os = "solaris") { @@ -180,13 +146,7 @@ impl UdpSocketState { )?; } } - #[cfg(any( - target_os = "freebsd", - target_os = "macos", - target_os = "ios", - target_os = "tvos", - target_os = "visionos" - ))] + #[cfg(any(target_os = "freebsd", apple,))] { if is_ipv4 { // Set `may_fragment` to `true` if this option is not supported on the platform. @@ -202,10 +162,7 @@ impl UdpSocketState { target_os = "freebsd", target_os = "openbsd", target_os = "netbsd", - target_os = "macos", - target_os = "ios", - target_os = "tvos", - target_os = "visionos", + apple, target_os = "solaris", ))] // IP_RECVDSTADDR == IP_SENDSRCADDR on FreeBSD @@ -285,27 +242,13 @@ impl UdpSocketState { } /// Sets the flag indicating we got EINVAL error from `sendmsg` syscall. - #[cfg(not(any( - target_os = "macos", - target_os = "ios", - target_os = "tvos", - target_os = "visionos", - target_os = "openbsd", - target_os = "netbsd" - )))] + #[cfg(not(any(apple, target_os = "openbsd", target_os = "netbsd")))] fn set_sendmsg_einval(&self) { self.sendmsg_einval.store(true, Ordering::Relaxed) } } -#[cfg(not(any( - target_os = "macos", - target_os = "ios", - target_os = "tvos", - target_os = "visionos", - target_os = "openbsd", - target_os = "netbsd" -)))] +#[cfg(not(any(apple, target_os = "openbsd", target_os = "netbsd")))] fn send( #[allow(unused_variables)] // only used on Linux state: &UdpSocketState, @@ -390,15 +333,7 @@ fn send( } } -#[cfg(all( - feature = "fast-apple-datapath", - any( - target_os = "macos", - target_os = "ios", - target_os = "tvos", - target_os = "visionos", - ) -))] +#[cfg(apple_fast)] fn send(state: &UdpSocketState, io: SockRef<'_>, transmit: &Transmit<'_>) -> io::Result<()> { let mut hdrs = unsafe { mem::zeroed::<[msghdr_x; BATCH_SIZE]>() }; let mut iovs = unsafe { mem::zeroed::<[libc::iovec; BATCH_SIZE]>() }; @@ -458,17 +393,7 @@ fn send(state: &UdpSocketState, io: SockRef<'_>, transmit: &Transmit<'_>) -> io: Ok(()) } -#[cfg(all( - not(feature = "fast-apple-datapath"), - any( - target_os = "macos", - target_os = "ios", - target_os = "tvos", - target_os = "visionos", - target_os = "openbsd", - target_os = "netbsd" - ) -))] +#[cfg(any(target_os = "openbsd", target_os = "netbsd", apple_slow))] fn send(state: &UdpSocketState, io: SockRef<'_>, transmit: &Transmit<'_>) -> io::Result<()> { let mut hdr: libc::msghdr = unsafe { mem::zeroed() }; let mut iov: libc::iovec = unsafe { mem::zeroed() }; @@ -480,12 +405,7 @@ fn send(state: &UdpSocketState, io: SockRef<'_>, transmit: &Transmit<'_>) -> io: &mut hdr, &mut iov, &mut ctrl, - cfg!(target_os = "macos") - || cfg!(target_os = "ios") - || cfg!(target_os = "tvos") - || cfg!(target_os = "visionos") - || cfg!(target_os = "openbsd") - || cfg!(target_os = "netbsd"), + cfg!(apple) || cfg!(target_os = "openbsd") || cfg!(target_os = "netbsd"), state.sendmsg_einval(), ); let n = unsafe { libc::sendmsg(io.as_raw_fd(), &hdr, 0) }; @@ -514,14 +434,7 @@ fn send(state: &UdpSocketState, io: SockRef<'_>, transmit: &Transmit<'_>) -> io: Ok(()) } -#[cfg(not(any( - target_os = "macos", - target_os = "ios", - target_os = "tvos", - target_os = "visionos", - target_os = "openbsd", - target_os = "solaris", -)))] +#[cfg(not(any(apple, target_os = "openbsd", target_os = "solaris",)))] fn recv(io: SockRef<'_>, bufs: &mut [IoSliceMut<'_>], meta: &mut [RecvMeta]) -> io::Result { let mut names = [MaybeUninit::::uninit(); BATCH_SIZE]; let mut ctrls = [cmsg::Aligned(MaybeUninit::<[u8; CMSG_LEN]>::uninit()); BATCH_SIZE]; @@ -560,15 +473,7 @@ fn recv(io: SockRef<'_>, bufs: &mut [IoSliceMut<'_>], meta: &mut [RecvMeta]) -> Ok(msg_count as usize) } -#[cfg(all( - feature = "fast-apple-datapath", - any( - target_os = "macos", - target_os = "ios", - target_os = "tvos", - target_os = "visionos", - ) -))] +#[cfg(apple_fast)] fn recv(io: SockRef<'_>, bufs: &mut [IoSliceMut<'_>], meta: &mut [RecvMeta]) -> io::Result { let mut names = [MaybeUninit::::uninit(); BATCH_SIZE]; let mut ctrls = [cmsg::Aligned(MaybeUninit::<[u8; CMSG_LEN]>::uninit()); BATCH_SIZE]; @@ -603,17 +508,7 @@ fn recv(io: SockRef<'_>, bufs: &mut [IoSliceMut<'_>], meta: &mut [RecvMeta]) -> Ok(msg_count as usize) } -#[cfg(all( - not(feature = "fast-apple-datapath"), - any( - target_os = "macos", - target_os = "ios", - target_os = "tvos", - target_os = "visionos", - target_os = "openbsd", - target_os = "solaris" - ) -))] +#[cfg(any(target_os = "openbsd", target_os = "netbsd", apple_slow))] fn recv(io: SockRef<'_>, bufs: &mut [IoSliceMut<'_>], meta: &mut [RecvMeta]) -> io::Result { let mut name = MaybeUninit::::uninit(); let mut ctrl = cmsg::Aligned(MaybeUninit::<[u8; CMSG_LEN]>::uninit()); @@ -642,26 +537,8 @@ const CMSG_LEN: usize = 88; fn prepare_msg( transmit: &Transmit<'_>, dst_addr: &socket2::SockAddr, - #[cfg(not(all( - feature = "fast-apple-datapath", - any( - target_os = "macos", - target_os = "ios", - target_os = "tvos", - target_os = "visionos", - ) - )))] - hdr: &mut libc::msghdr, - #[cfg(all( - feature = "fast-apple-datapath", - any( - target_os = "macos", - target_os = "ios", - target_os = "tvos", - target_os = "visionos", - ) - ))] - hdr: &mut msghdr_x, + #[cfg(not(apple_fast))] hdr: &mut libc::msghdr, + #[cfg(apple_fast)] hdr: &mut msghdr_x, iov: &mut libc::iovec, ctrl: &mut cmsg::Aligned<[u8; CMSG_LEN]>, #[allow(unused_variables)] // only used on FreeBSD & macOS @@ -723,10 +600,7 @@ fn prepare_msg( target_os = "freebsd", target_os = "openbsd", target_os = "netbsd", - target_os = "macos", - target_os = "ios", - target_os = "tvos", - target_os = "visionos", + apple, target_os = "solaris", ))] { @@ -753,17 +627,7 @@ fn prepare_msg( encoder.finish(); } -#[cfg(all( - not(feature = "fast-apple-datapath"), - any( - target_os = "macos", - target_os = "ios", - target_os = "tvos", - target_os = "visionos", - target_os = "openbsd", - target_os = "netbsd" - ) -))] +#[cfg(not(apple_fast))] fn prepare_recv( buf: &mut IoSliceMut, name: &mut MaybeUninit, @@ -779,15 +643,7 @@ fn prepare_recv( hdr.msg_flags = 0; } -#[cfg(all( - feature = "fast-apple-datapath", - any( - target_os = "macos", - target_os = "ios", - target_os = "tvos", - target_os = "visionos", - ) -))] +#[cfg(apple_fast)] fn prepare_recv( buf: &mut IoSliceMut, name: &mut MaybeUninit, @@ -806,26 +662,8 @@ fn prepare_recv( fn decode_recv( name: &MaybeUninit, - #[cfg(not(all( - feature = "fast-apple-datapath", - any( - target_os = "macos", - target_os = "ios", - target_os = "tvos", - target_os = "visionos", - ) - )))] - hdr: &libc::msghdr, - #[cfg(all( - feature = "fast-apple-datapath", - any( - target_os = "macos", - target_os = "ios", - target_os = "tvos", - target_os = "visionos", - ) - ))] - hdr: &msghdr_x, + #[cfg(not(apple_fast))] hdr: &libc::msghdr, + #[cfg(apple_fast)] hdr: &msghdr_x, len: usize, ) -> RecvMeta { let name = unsafe { name.assume_init() }; @@ -849,10 +687,7 @@ fn decode_recv( // Temporary hack around broken macos ABI. Remove once upstream fixes it. // https://bugreport.apple.com/web/?problemID=48761855 #[allow(clippy::unnecessary_cast)] // cmsg.cmsg_len defined as size_t - if (cfg!(target_os = "macos") - || cfg!(target_os = "ios") - || cfg!(target_os = "tvos") - || cfg!(target_os = "visionos")) + if cfg!(apple) && cmsg.cmsg_len as usize == libc::CMSG_LEN(mem::size_of::() as _) as usize { ecn_bits = cmsg::decode::(cmsg); @@ -871,10 +706,7 @@ fn decode_recv( target_os = "freebsd", target_os = "openbsd", target_os = "netbsd", - target_os = "macos", - target_os = "ios", - target_os = "tvos", - target_os = "visionos", + apple ))] (libc::IPPROTO_IP, libc::IP_RECVDSTADDR) => { let in_addr = unsafe { cmsg::decode::(cmsg) }; @@ -925,27 +757,11 @@ fn decode_recv( } } -#[cfg(any( - feature = "fast-apple-datapath", - not(any( - target_os = "macos", - target_os = "ios", - target_os = "tvos", - target_os = "visionos" - )) -))] +#[cfg(not(apple_slow))] // Chosen somewhat arbitrarily; might benefit from additional tuning. pub(crate) const BATCH_SIZE: usize = 32; -#[cfg(not(any( - feature = "fast-apple-datapath", - not(any( - target_os = "macos", - target_os = "ios", - target_os = "tvos", - target_os = "visionos" - )) -)))] +#[cfg(apple_slow)] pub(crate) const BATCH_SIZE: usize = 1; #[cfg(target_os = "linux")] @@ -989,26 +805,8 @@ mod gso { } pub(super) fn set_segment_size( - #[cfg(not(all( - feature = "fast-apple-datapath", - any( - target_os = "macos", - target_os = "ios", - target_os = "tvos", - target_os = "visionos", - ) - )))] - _encoder: &mut cmsg::Encoder, - #[cfg(all( - feature = "fast-apple-datapath", - any( - target_os = "macos", - target_os = "ios", - target_os = "tvos", - target_os = "visionos", - ) - ))] - _encoder: &mut cmsg::Encoder, + #[cfg(not(apple_fast))] _encoder: &mut cmsg::Encoder, + #[cfg(apple_fast)] _encoder: &mut cmsg::Encoder, _segment_size: u16, ) { } From 25db226980f52f9721a284920af91e644199f04d Mon Sep 17 00:00:00 2001 From: Lars Eggert Date: Wed, 9 Oct 2024 09:41:04 +0300 Subject: [PATCH 26/28] Nits --- quinn-udp/src/unix.rs | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/quinn-udp/src/unix.rs b/quinn-udp/src/unix.rs index 482df834a..b2a107919 100644 --- a/quinn-udp/src/unix.rs +++ b/quinn-udp/src/unix.rs @@ -12,9 +12,6 @@ use std::{ time::Instant, }; -#[cfg(apple_fast)] -use std::ffi::{c_int, c_uint, c_void}; - use socket2::SockRef; use super::{ @@ -22,26 +19,36 @@ use super::{ IO_ERROR_LOG_INTERVAL, }; -// Adapted from https://github.com/apple-oss-distributions/xnu/blob/main/bsd/sys/socket.h +// Adapted from https://github.com/apple-oss-distributions/xnu/blob/8d741a5de7ff4191bf97d57b9f54c2f6d4a15585/bsd/sys/socket_private.h #[cfg(apple_fast)] #[repr(C)] #[allow(non_camel_case_types)] pub(crate) struct msghdr_x { - pub msg_name: *mut c_void, + pub msg_name: *mut libc::c_void, pub msg_namelen: libc::socklen_t, pub msg_iov: *mut libc::iovec, - pub msg_iovlen: c_int, - pub msg_control: *mut c_void, + pub msg_iovlen: libc::c_int, + pub msg_control: *mut libc::c_void, pub msg_controllen: libc::socklen_t, - pub msg_flags: c_int, + pub msg_flags: libc::c_int, pub msg_datalen: usize, } #[cfg(apple_fast)] extern "C" { - fn recvmsg_x(s: c_int, msgp: *const msghdr_x, cnt: c_uint, flags: c_int) -> isize; - - fn sendmsg_x(s: c_int, msgp: *const msghdr_x, cnt: c_uint, flags: c_int) -> isize; + fn recvmsg_x( + s: libc::c_int, + msgp: *const msghdr_x, + cnt: libc::c_uint, + flags: libc::c_int, + ) -> isize; + + fn sendmsg_x( + s: libc::c_int, + msgp: *const msghdr_x, + cnt: libc::c_uint, + flags: libc::c_int, + ) -> isize; } // Defined in netinet6/in6.h on OpenBSD, this is not yet exported by the libc crate From 137422d72a44551b892a97b72abd6fa5809d5323 Mon Sep 17 00:00:00 2001 From: Lars Eggert Date: Wed, 9 Oct 2024 11:08:09 +0300 Subject: [PATCH 27/28] `bsd` macro --- quinn-udp/build.rs | 7 +++++++ quinn-udp/src/unix.rs | 27 ++++----------------------- 2 files changed, 11 insertions(+), 23 deletions(-) diff --git a/quinn-udp/build.rs b/quinn-udp/build.rs index 5a3390f19..ef89ff70f 100644 --- a/quinn-udp/build.rs +++ b/quinn-udp/build.rs @@ -12,6 +12,13 @@ fn main() { target_os = "visionos" ) }, + bsd: { + any( + target_os = "freebsd", + target_os = "openbsd", + target_os = "netbsd" + ) + }, // Convenience aliases apple_fast: { all(apple, feature = "fast-apple-datapath") }, apple_slow: { all(apple, not(feature = "fast-apple-datapath")) }, diff --git a/quinn-udp/src/unix.rs b/quinn-udp/src/unix.rs index b2a107919..f7b74804e 100644 --- a/quinn-udp/src/unix.rs +++ b/quinn-udp/src/unix.rs @@ -88,9 +88,7 @@ impl UdpSocketState { let io = sock.0; let mut cmsg_platform_space = 0; if cfg!(target_os = "linux") - || cfg!(target_os = "freebsd") - || cfg!(target_os = "openbsd") - || cfg!(target_os = "netbsd") + || cfg!(bsd) || cfg!(apple) || cfg!(target_os = "android") || cfg!(target_os = "solaris") @@ -165,13 +163,7 @@ impl UdpSocketState { )?; } } - #[cfg(any( - target_os = "freebsd", - target_os = "openbsd", - target_os = "netbsd", - apple, - target_os = "solaris", - ))] + #[cfg(any(bsd, apple, target_os = "solaris"))] // IP_RECVDSTADDR == IP_SENDSRCADDR on FreeBSD // macOS uses only IP_RECVDSTADDR, no IP_SENDSRCADDR on macOS (the same on Solaris) // macOS also supports IP_PKTINFO @@ -603,13 +595,7 @@ fn prepare_msg( }; encoder.push(libc::IPPROTO_IP, libc::IP_PKTINFO, pktinfo); } - #[cfg(any( - target_os = "freebsd", - target_os = "openbsd", - target_os = "netbsd", - apple, - target_os = "solaris", - ))] + #[cfg(any(bsd, apple, target_os = "solaris"))] { if encode_src_ip { let addr = libc::in_addr { @@ -709,12 +695,7 @@ fn decode_recv( pktinfo.ipi_addr.s_addr.to_ne_bytes(), ))); } - #[cfg(any( - target_os = "freebsd", - target_os = "openbsd", - target_os = "netbsd", - apple - ))] + #[cfg(any(bsd, apple))] (libc::IPPROTO_IP, libc::IP_RECVDSTADDR) => { let in_addr = unsafe { cmsg::decode::(cmsg) }; dst_ip = Some(IpAddr::V4(Ipv4Addr::from(in_addr.s_addr.to_ne_bytes()))); From c10487aad9a331319ca20b24e95069a2c75c4557 Mon Sep 17 00:00:00 2001 From: Lars Eggert Date: Wed, 9 Oct 2024 11:09:30 +0300 Subject: [PATCH 28/28] fmt --- quinn-udp/src/unix.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/quinn-udp/src/unix.rs b/quinn-udp/src/unix.rs index f7b74804e..ce5a4d9d8 100644 --- a/quinn-udp/src/unix.rs +++ b/quinn-udp/src/unix.rs @@ -1,4 +1,4 @@ -#[cfg(not(any(apple, target_os = "openbsd", target_os = "solaris",)))] +#[cfg(not(any(apple, target_os = "openbsd", target_os = "solaris")))] use std::ptr; use std::{ io::{self, IoSliceMut}, @@ -151,7 +151,7 @@ impl UdpSocketState { )?; } } - #[cfg(any(target_os = "freebsd", apple,))] + #[cfg(any(target_os = "freebsd", apple))] { if is_ipv4 { // Set `may_fragment` to `true` if this option is not supported on the platform. @@ -433,7 +433,7 @@ fn send(state: &UdpSocketState, io: SockRef<'_>, transmit: &Transmit<'_>) -> io: Ok(()) } -#[cfg(not(any(apple, target_os = "openbsd", target_os = "solaris",)))] +#[cfg(not(any(apple, target_os = "openbsd", target_os = "solaris")))] fn recv(io: SockRef<'_>, bufs: &mut [IoSliceMut<'_>], meta: &mut [RecvMeta]) -> io::Result { let mut names = [MaybeUninit::::uninit(); BATCH_SIZE]; let mut ctrls = [cmsg::Aligned(MaybeUninit::<[u8; CMSG_LEN]>::uninit()); BATCH_SIZE];