summaryrefslogtreecommitdiff
path: root/include/net
diff options
context:
space:
mode:
Diffstat (limited to 'include/net')
-rw-r--r--include/net/act_api.h14
-rw-r--r--include/net/bluetooth/bluetooth.h3
-rw-r--r--include/net/bluetooth/hci.h1
-rw-r--r--include/net/bluetooth/hci_core.h32
-rw-r--r--include/net/bluetooth/hci_drv.h2
-rw-r--r--include/net/bluetooth/hci_sync.h2
-rw-r--r--include/net/bluetooth/mgmt.h9
-rw-r--r--include/net/bond_3ad.h2
-rw-r--r--include/net/bond_options.h1
-rw-r--r--include/net/bonding.h2
-rw-r--r--include/net/cfg80211.h282
-rw-r--r--include/net/cls_cgroup.h2
-rw-r--r--include/net/devlink.h24
-rw-r--r--include/net/dropreason-core.h6
-rw-r--r--include/net/dst.h16
-rw-r--r--include/net/dst_metadata.h11
-rw-r--r--include/net/flow.h11
-rw-r--r--include/net/genetlink.h2
-rw-r--r--include/net/gro.h32
-rw-r--r--include/net/hotdata.h7
-rw-r--r--include/net/icmp.h10
-rw-r--r--include/net/inet6_hashtables.h20
-rw-r--r--include/net/inet_connection_sock.h13
-rw-r--r--include/net/inet_dscp.h6
-rw-r--r--include/net/inet_hashtables.h40
-rw-r--r--include/net/inet_timewait_sock.h11
-rw-r--r--include/net/ip.h15
-rw-r--r--include/net/ip6_route.h10
-rw-r--r--include/net/ip_fib.h2
-rw-r--r--include/net/ip_tunnels.h4
-rw-r--r--include/net/libeth/xdp.h11
-rw-r--r--include/net/mac80211.h10
-rw-r--r--include/net/mana/mana.h4
-rw-r--r--include/net/net_namespace.h17
-rw-r--r--include/net/netdev_queues.h9
-rw-r--r--include/net/netfilter/ipv4/nf_reject.h8
-rw-r--r--include/net/netfilter/ipv6/nf_reject.h10
-rw-r--r--include/net/netfilter/nf_tables.h3
-rw-r--r--include/net/netfilter/nf_tables_core.h12
-rw-r--r--include/net/netns/ipv4.h3
-rw-r--r--include/net/netns/nftables.h1
-rw-r--r--include/net/netns/sctp.h4
-rw-r--r--include/net/nfc/nci_core.h2
-rw-r--r--include/net/page_pool/helpers.h17
-rw-r--r--include/net/ping.h1
-rw-r--r--include/net/proto_memory.h4
-rw-r--r--include/net/psp.h12
-rw-r--r--include/net/psp/functions.h209
-rw-r--r--include/net/psp/types.h184
-rw-r--r--include/net/raw.h1
-rw-r--r--include/net/request_sock.h2
-rw-r--r--include/net/rose.h18
-rw-r--r--include/net/route.h4
-rw-r--r--include/net/rps.h92
-rw-r--r--include/net/sctp/auth.h17
-rw-r--r--include/net/sctp/constants.h9
-rw-r--r--include/net/sctp/structs.h35
-rw-r--r--include/net/seg6_hmac.h20
-rw-r--r--include/net/smc.h51
-rw-r--r--include/net/snmp.h5
-rw-r--r--include/net/sock.h155
-rw-r--r--include/net/tc_act/tc_skbmod.h1
-rw-r--r--include/net/tc_act/tc_tunnel_key.h1
-rw-r--r--include/net/tc_act/tc_vlan.h1
-rw-r--r--include/net/tcp.h108
-rw-r--r--include/net/tcp_ao.h1
-rw-r--r--include/net/tcp_ecn.h642
-rw-r--r--include/net/timewait_sock.h7
-rw-r--r--include/net/udp.h20
-rw-r--r--include/net/xdp.h69
-rw-r--r--include/net/xdp_sock_drv.h21
71 files changed, 1951 insertions, 442 deletions
diff --git a/include/net/act_api.h b/include/net/act_api.h
index 2894cfff2da3..91a24b5e0b93 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -33,7 +33,10 @@ struct tc_action {
struct tcf_t tcfa_tm;
struct gnet_stats_basic_sync tcfa_bstats;
struct gnet_stats_basic_sync tcfa_bstats_hw;
- struct gnet_stats_queue tcfa_qstats;
+
+ atomic_t tcfa_drops;
+ atomic_t tcfa_overlimits;
+
struct net_rate_estimator __rcu *tcfa_rate_est;
spinlock_t tcfa_lock;
struct gnet_stats_basic_sync __percpu *cpu_bstats;
@@ -53,7 +56,6 @@ struct tc_action {
#define tcf_action common.tcfa_action
#define tcf_tm common.tcfa_tm
#define tcf_bstats common.tcfa_bstats
-#define tcf_qstats common.tcfa_qstats
#define tcf_rate_est common.tcfa_rate_est
#define tcf_lock common.tcfa_lock
@@ -241,9 +243,7 @@ static inline void tcf_action_inc_drop_qstats(struct tc_action *a)
qstats_drop_inc(this_cpu_ptr(a->cpu_qstats));
return;
}
- spin_lock(&a->tcfa_lock);
- qstats_drop_inc(&a->tcfa_qstats);
- spin_unlock(&a->tcfa_lock);
+ atomic_inc(&a->tcfa_drops);
}
static inline void tcf_action_inc_overlimit_qstats(struct tc_action *a)
@@ -252,9 +252,7 @@ static inline void tcf_action_inc_overlimit_qstats(struct tc_action *a)
qstats_overlimit_inc(this_cpu_ptr(a->cpu_qstats));
return;
}
- spin_lock(&a->tcfa_lock);
- qstats_overlimit_inc(&a->tcfa_qstats);
- spin_unlock(&a->tcfa_lock);
+ atomic_inc(&a->tcfa_overlimits);
}
void tcf_action_update_stats(struct tc_action *a, u64 bytes, u64 packets,
diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index e5751f3070b8..d46ed9011ee5 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -272,7 +272,8 @@ void bt_err_ratelimited(const char *fmt, ...);
#define BT_ERR(fmt, ...) bt_err(fmt "\n", ##__VA_ARGS__)
#if IS_ENABLED(CONFIG_BT_FEATURE_DEBUG)
-#define BT_DBG(fmt, ...) bt_dbg(fmt "\n", ##__VA_ARGS__)
+#define BT_DBG(fmt, ...) \
+ bt_dbg("%s:%d: " fmt "\n", __func__, __LINE__, ##__VA_ARGS__)
#else
#define BT_DBG(fmt, ...) pr_debug(fmt "\n", ##__VA_ARGS__)
#endif
diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index df1847b74e55..9ecc70baaca9 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -488,6 +488,7 @@ enum {
#define HCI_AUTO_OFF_TIMEOUT msecs_to_jiffies(2000) /* 2 seconds */
#define HCI_ACL_CONN_TIMEOUT msecs_to_jiffies(20000) /* 20 seconds */
#define HCI_LE_CONN_TIMEOUT msecs_to_jiffies(20000) /* 20 seconds */
+#define HCI_ISO_TX_TIMEOUT usecs_to_jiffies(0x7fffff) /* 8388607 usecs */
/* HCI data types */
#define HCI_COMMAND_PKT 0x01
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 6906af7a8f24..2924c2bf2a98 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -487,6 +487,7 @@ struct hci_dev {
unsigned long acl_last_tx;
unsigned long le_last_tx;
+ unsigned long iso_last_tx;
__u8 le_tx_def_phys;
__u8 le_rx_def_phys;
@@ -1245,6 +1246,27 @@ static inline struct hci_conn *hci_conn_hash_lookup_ba(struct hci_dev *hdev,
return NULL;
}
+static inline struct hci_conn *hci_conn_hash_lookup_role(struct hci_dev *hdev,
+ __u8 type, __u8 role,
+ bdaddr_t *ba)
+{
+ struct hci_conn_hash *h = &hdev->conn_hash;
+ struct hci_conn *c;
+
+ rcu_read_lock();
+
+ list_for_each_entry_rcu(c, &h->list, list) {
+ if (c->type == type && c->role == role && !bacmp(&c->dst, ba)) {
+ rcu_read_unlock();
+ return c;
+ }
+ }
+
+ rcu_read_unlock();
+
+ return NULL;
+}
+
static inline struct hci_conn *hci_conn_hash_lookup_le(struct hci_dev *hdev,
bdaddr_t *ba,
__u8 ba_type)
@@ -1566,16 +1588,18 @@ struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type, bdaddr_t *dst,
__u16 setting, struct bt_codec *codec,
u16 timeout);
struct hci_conn *hci_bind_cis(struct hci_dev *hdev, bdaddr_t *dst,
- __u8 dst_type, struct bt_iso_qos *qos);
+ __u8 dst_type, struct bt_iso_qos *qos,
+ u16 timeout);
struct hci_conn *hci_bind_bis(struct hci_dev *hdev, bdaddr_t *dst, __u8 sid,
struct bt_iso_qos *qos,
- __u8 base_len, __u8 *base);
+ __u8 base_len, __u8 *base, u16 timeout);
struct hci_conn *hci_connect_cis(struct hci_dev *hdev, bdaddr_t *dst,
- __u8 dst_type, struct bt_iso_qos *qos);
+ __u8 dst_type, struct bt_iso_qos *qos,
+ u16 timeout);
struct hci_conn *hci_connect_bis(struct hci_dev *hdev, bdaddr_t *dst,
__u8 dst_type, __u8 sid,
struct bt_iso_qos *qos,
- __u8 data_len, __u8 *data);
+ __u8 data_len, __u8 *data, u16 timeout);
struct hci_conn *hci_pa_create_sync(struct hci_dev *hdev, bdaddr_t *dst,
__u8 dst_type, __u8 sid, struct bt_iso_qos *qos);
int hci_conn_big_create_sync(struct hci_dev *hdev, struct hci_conn *hcon,
diff --git a/include/net/bluetooth/hci_drv.h b/include/net/bluetooth/hci_drv.h
index 2f01c44f05ec..3fd6fdbdb02e 100644
--- a/include/net/bluetooth/hci_drv.h
+++ b/include/net/bluetooth/hci_drv.h
@@ -47,7 +47,7 @@ struct hci_drv_ev_cmd_complete {
struct hci_drv_rp_read_info {
__u8 driver_name[HCI_DRV_MAX_DRIVER_NAME_LENGTH];
__le16 num_supported_commands;
- __le16 supported_commands[];
+ __le16 supported_commands[] __counted_by_le(num_supported_commands);
} __packed;
/* Driver specific OGF (Opcode Group Field)
diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h
index 5224f57f6af2..e352a4e0ef8d 100644
--- a/include/net/bluetooth/hci_sync.h
+++ b/include/net/bluetooth/hci_sync.h
@@ -93,7 +93,7 @@ int hci_update_class_sync(struct hci_dev *hdev);
int hci_update_eir_sync(struct hci_dev *hdev);
int hci_update_class_sync(struct hci_dev *hdev);
-int hci_update_name_sync(struct hci_dev *hdev);
+int hci_update_name_sync(struct hci_dev *hdev, const u8 *name);
int hci_write_ssp_mode_sync(struct hci_dev *hdev, u8 mode);
int hci_get_random_address(struct hci_dev *hdev, bool require_privacy,
diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h
index 3575cd16049a..74edea06985b 100644
--- a/include/net/bluetooth/mgmt.h
+++ b/include/net/bluetooth/mgmt.h
@@ -53,10 +53,15 @@ struct mgmt_hdr {
} __packed;
struct mgmt_tlv {
- __le16 type;
- __u8 length;
+ /* New members MUST be added within the __struct_group() macro below. */
+ __struct_group(mgmt_tlv_hdr, __hdr, __packed,
+ __le16 type;
+ __u8 length;
+ );
__u8 value[];
} __packed;
+static_assert(offsetof(struct mgmt_tlv, value) == sizeof(struct mgmt_tlv_hdr),
+ "struct member likely outside of __struct_group()");
struct mgmt_addr_info {
bdaddr_t bdaddr;
diff --git a/include/net/bond_3ad.h b/include/net/bond_3ad.h
index dba369a2cf27..c92d4a976246 100644
--- a/include/net/bond_3ad.h
+++ b/include/net/bond_3ad.h
@@ -26,6 +26,7 @@ enum {
BOND_AD_STABLE = 0,
BOND_AD_BANDWIDTH = 1,
BOND_AD_COUNT = 2,
+ BOND_AD_PRIO = 3,
};
/* rx machine states(43.4.11 in the 802.3ad standard) */
@@ -274,6 +275,7 @@ struct ad_slave_info {
struct port port; /* 802.3ad port structure */
struct bond_3ad_stats stats;
u16 id;
+ u16 port_priority;
};
static inline const char *bond_3ad_churn_desc(churn_state_t state)
diff --git a/include/net/bond_options.h b/include/net/bond_options.h
index 022b122a9fb6..e6eedf23aea1 100644
--- a/include/net/bond_options.h
+++ b/include/net/bond_options.h
@@ -78,6 +78,7 @@ enum {
BOND_OPT_PRIO,
BOND_OPT_COUPLED_CONTROL,
BOND_OPT_BROADCAST_NEIGH,
+ BOND_OPT_ACTOR_PORT_PRIO,
BOND_OPT_LAST
};
diff --git a/include/net/bonding.h b/include/net/bonding.h
index e06f0d63b2c1..49edc7da0586 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -126,7 +126,6 @@ struct bond_params {
int arp_interval;
int arp_validate;
int arp_all_targets;
- int use_carrier;
int fail_over_mac;
int updelay;
int downdelay;
@@ -711,6 +710,7 @@ struct bond_vlan_tag *bond_verify_device_path(struct net_device *start_dev,
int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave);
void bond_slave_arr_work_rearm(struct bonding *bond, unsigned long delay);
void bond_work_init_all(struct bonding *bond);
+void bond_work_cancel_all(struct bonding *bond);
#ifdef CONFIG_PROC_FS
void bond_create_proc_entry(struct bonding *bond);
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 406626ff6cc8..781624f5913a 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -101,16 +101,6 @@ struct wiphy;
* @IEEE80211_CHAN_NO_10MHZ: 10 MHz bandwidth is not permitted
* on this channel.
* @IEEE80211_CHAN_NO_HE: HE operation is not permitted on this channel.
- * @IEEE80211_CHAN_1MHZ: 1 MHz bandwidth is permitted
- * on this channel.
- * @IEEE80211_CHAN_2MHZ: 2 MHz bandwidth is permitted
- * on this channel.
- * @IEEE80211_CHAN_4MHZ: 4 MHz bandwidth is permitted
- * on this channel.
- * @IEEE80211_CHAN_8MHZ: 8 MHz bandwidth is permitted
- * on this channel.
- * @IEEE80211_CHAN_16MHZ: 16 MHz bandwidth is permitted
- * on this channel.
* @IEEE80211_CHAN_NO_320MHZ: If the driver supports 320 MHz on the band,
* this flag indicates that a 320 MHz channel cannot use this
* channel as the control or any of the secondary channels.
@@ -129,6 +119,13 @@ struct wiphy;
* with very low power (VLP), even if otherwise set to NO_IR.
* @IEEE80211_CHAN_ALLOW_20MHZ_ACTIVITY: Allow activity on a 20 MHz channel,
* even if otherwise set to NO_IR.
+ * @IEEE80211_CHAN_S1G_NO_PRIMARY: Prevents the channel for use as an S1G
+ * primary channel. Does not prevent the wider operating channel
+ * described by the chandef from being used. In order for a 2MHz primary
+ * to be used, both 1MHz subchannels shall not contain this flag.
+ * @IEEE80211_CHAN_NO_4MHZ: 4 MHz bandwidth is not permitted on this channel.
+ * @IEEE80211_CHAN_NO_8MHZ: 8 MHz bandwidth is not permitted on this channel.
+ * @IEEE80211_CHAN_NO_16MHZ: 16 MHz bandwidth is not permitted on this channel.
*/
enum ieee80211_channel_flags {
IEEE80211_CHAN_DISABLED = BIT(0),
@@ -145,11 +142,7 @@ enum ieee80211_channel_flags {
IEEE80211_CHAN_NO_20MHZ = BIT(11),
IEEE80211_CHAN_NO_10MHZ = BIT(12),
IEEE80211_CHAN_NO_HE = BIT(13),
- IEEE80211_CHAN_1MHZ = BIT(14),
- IEEE80211_CHAN_2MHZ = BIT(15),
- IEEE80211_CHAN_4MHZ = BIT(16),
- IEEE80211_CHAN_8MHZ = BIT(17),
- IEEE80211_CHAN_16MHZ = BIT(18),
+ /* can use free bits here */
IEEE80211_CHAN_NO_320MHZ = BIT(19),
IEEE80211_CHAN_NO_EHT = BIT(20),
IEEE80211_CHAN_DFS_CONCURRENT = BIT(21),
@@ -158,6 +151,10 @@ enum ieee80211_channel_flags {
IEEE80211_CHAN_CAN_MONITOR = BIT(24),
IEEE80211_CHAN_ALLOW_6GHZ_VLP_AP = BIT(25),
IEEE80211_CHAN_ALLOW_20MHZ_ACTIVITY = BIT(26),
+ IEEE80211_CHAN_S1G_NO_PRIMARY = BIT(27),
+ IEEE80211_CHAN_NO_4MHZ = BIT(28),
+ IEEE80211_CHAN_NO_8MHZ = BIT(29),
+ IEEE80211_CHAN_NO_16MHZ = BIT(30),
};
#define IEEE80211_CHAN_NO_HT40 \
@@ -821,6 +818,9 @@ struct key_params {
* @punctured: mask of the punctured 20 MHz subchannels, with
* bits turned on being disabled (punctured); numbered
* from lower to higher frequency (like in the spec)
+ * @s1g_primary_2mhz: Indicates if the control channel pointed to
+ * by 'chan' exists as a 1MHz primary subchannel within an
+ * S1G 2MHz primary channel.
*/
struct cfg80211_chan_def {
struct ieee80211_channel *chan;
@@ -830,6 +830,7 @@ struct cfg80211_chan_def {
struct ieee80211_edmg edmg;
u16 freq1_offset;
u16 punctured;
+ bool s1g_primary_2mhz;
};
/*
@@ -841,9 +842,12 @@ struct cfg80211_bitrate_mask {
u8 ht_mcs[IEEE80211_HT_MCS_MASK_LEN];
u16 vht_mcs[NL80211_VHT_NSS_MAX];
u16 he_mcs[NL80211_HE_NSS_MAX];
+ u16 eht_mcs[NL80211_EHT_NSS_MAX];
enum nl80211_txrate_gi gi;
enum nl80211_he_gi he_gi;
+ enum nl80211_eht_gi eht_gi;
enum nl80211_he_ltf he_ltf;
+ enum nl80211_eht_ltf eht_ltf;
} control[NUM_NL80211_BANDS];
};
@@ -988,6 +992,18 @@ cfg80211_chandef_is_edmg(const struct cfg80211_chan_def *chandef)
}
/**
+ * cfg80211_chandef_is_s1g - check if chandef represents an S1G channel
+ * @chandef: the channel definition
+ *
+ * Return: %true if S1G.
+ */
+static inline bool
+cfg80211_chandef_is_s1g(const struct cfg80211_chan_def *chandef)
+{
+ return chandef->chan->band == NL80211_BAND_S1GHZ;
+}
+
+/**
* cfg80211_chandef_compatible - check if two channel definitions are compatible
* @chandef1: first channel definition
* @chandef2: second channel definition
@@ -2457,6 +2473,29 @@ struct mpath_info {
};
/**
+ * enum wiphy_bss_param_flags - bit positions for supported bss parameters.
+ *
+ * @WIPHY_BSS_PARAM_CTS_PROT: support changing CTS protection.
+ * @WIPHY_BSS_PARAM_SHORT_PREAMBLE: support changing short preamble usage.
+ * @WIPHY_BSS_PARAM_SHORT_SLOT_TIME: support changing short slot time usage.
+ * @WIPHY_BSS_PARAM_BASIC_RATES: support reconfiguring basic rates.
+ * @WIPHY_BSS_PARAM_AP_ISOLATE: support changing AP isolation.
+ * @WIPHY_BSS_PARAM_HT_OPMODE: support changing HT operating mode.
+ * @WIPHY_BSS_PARAM_P2P_CTWINDOW: support reconfiguring ctwindow.
+ * @WIPHY_BSS_PARAM_P2P_OPPPS: support changing P2P opportunistic power-save.
+ */
+enum wiphy_bss_param_flags {
+ WIPHY_BSS_PARAM_CTS_PROT = BIT(0),
+ WIPHY_BSS_PARAM_SHORT_PREAMBLE = BIT(1),
+ WIPHY_BSS_PARAM_SHORT_SLOT_TIME = BIT(2),
+ WIPHY_BSS_PARAM_BASIC_RATES = BIT(3),
+ WIPHY_BSS_PARAM_AP_ISOLATE = BIT(4),
+ WIPHY_BSS_PARAM_HT_OPMODE = BIT(5),
+ WIPHY_BSS_PARAM_P2P_CTWINDOW = BIT(6),
+ WIPHY_BSS_PARAM_P2P_OPPPS = BIT(7),
+};
+
+/**
* struct bss_parameters - BSS parameters
*
* Used to change BSS parameters (mainly for AP mode).
@@ -3887,6 +3926,38 @@ struct cfg80211_qos_map {
};
/**
+ * struct cfg80211_nan_band_config - NAN band specific configuration
+ *
+ * @chan: Pointer to the IEEE 802.11 channel structure. The channel to be used
+ * for NAN operations on this band. For 2.4 GHz band, this is always
+ * channel 6. For 5 GHz band, the channel is either 44 or 149, according
+ * to the regulatory constraints. If chan pointer is NULL the entire band
+ * configuration entry is considered invalid and should not be used.
+ * @rssi_close: RSSI close threshold used for NAN state transition algorithm
+ * as described in chapters 3.3.6 and 3.3.7 "NAN Device Role and State
+ * Transition" of Wi-Fi Aware Specification v4.0. If not
+ * specified (set to 0), default device value is used. The value should
+ * be greater than -60 dBm.
+ * @rssi_middle: RSSI middle threshold used for NAN state transition algorithm.
+ * as described in chapters 3.3.6 and 3.3.7 "NAN Device Role and State
+ * Transition" of Wi-Fi Aware Specification v4.0. If not
+ * specified (set to 0), default device value is used. The value should be
+ * greater than -75 dBm and less than rssi_close.
+ * @awake_dw_interval: Committed DW interval. Valid values range: 0-5. 0
+ * indicates no wakeup for DW and can't be used on 2.4GHz band, otherwise
+ * 2^(n-1).
+ * @disable_scan: If true, the device will not scan this band for cluster
+ * merge. Disabling scan on 2.4 GHz band is not allowed.
+ */
+struct cfg80211_nan_band_config {
+ struct ieee80211_channel *chan;
+ s8 rssi_close;
+ s8 rssi_middle;
+ u8 awake_dw_interval;
+ bool disable_scan;
+};
+
+/**
* struct cfg80211_nan_conf - NAN configuration
*
* This struct defines NAN configuration parameters
@@ -3895,10 +3966,34 @@ struct cfg80211_qos_map {
* @bands: operating bands, a bitmap of &enum nl80211_band values.
* For instance, for NL80211_BAND_2GHZ, bit 0 would be set
* (i.e. BIT(NL80211_BAND_2GHZ)).
+ * @cluster_id: cluster ID used for NAN synchronization. This is a MAC address
+ * that can take a value from 50-6F-9A-01-00-00 to 50-6F-9A-01-FF-FF.
+ * If NULL, the device will pick a random Cluster ID.
+ * @scan_period: period (in seconds) between NAN scans.
+ * @scan_dwell_time: dwell time (in milliseconds) for NAN scans.
+ * @discovery_beacon_interval: interval (in TUs) for discovery beacons.
+ * @enable_dw_notification: flag to enable/disable discovery window
+ * notifications.
+ * @band_cfgs: array of band specific configurations, indexed by
+ * &enum nl80211_band values.
+ * @extra_nan_attrs: pointer to additional NAN attributes.
+ * @extra_nan_attrs_len: length of the additional NAN attributes.
+ * @vendor_elems: pointer to vendor-specific elements.
+ * @vendor_elems_len: length of the vendor-specific elements.
*/
struct cfg80211_nan_conf {
u8 master_pref;
u8 bands;
+ const u8 *cluster_id;
+ u16 scan_period;
+ u16 scan_dwell_time;
+ u8 discovery_beacon_interval;
+ bool enable_dw_notification;
+ struct cfg80211_nan_band_config band_cfgs[NUM_NL80211_BANDS];
+ const u8 *extra_nan_attrs;
+ u16 extra_nan_attrs_len;
+ const u8 *vendor_elems;
+ u16 vendor_elems_len;
};
/**
@@ -3907,10 +4002,17 @@ struct cfg80211_nan_conf {
*
* @CFG80211_NAN_CONF_CHANGED_PREF: master preference
* @CFG80211_NAN_CONF_CHANGED_BANDS: operating bands
+ * @CFG80211_NAN_CONF_CHANGED_CONFIG: changed additional configuration.
+ * When this flag is set, it indicates that some additional attribute(s)
+ * (other then master_pref and bands) have been changed. In this case,
+ * all the unchanged attributes will be properly configured to their
+ * previous values. The driver doesn't need to store any
+ * previous configuration besides master_pref and bands.
*/
enum cfg80211_nan_conf_changes {
CFG80211_NAN_CONF_CHANGED_PREF = BIT(0),
CFG80211_NAN_CONF_CHANGED_BANDS = BIT(1),
+ CFG80211_NAN_CONF_CHANGED_CONFIG = BIT(2),
};
/**
@@ -5622,6 +5724,42 @@ struct wiphy_radio {
u32 antenna_mask;
};
+/**
+ * enum wiphy_nan_flags - NAN capabilities
+ *
+ * @WIPHY_NAN_FLAGS_CONFIGURABLE_SYNC: Device supports NAN configurable
+ * synchronization.
+ * @WIPHY_NAN_FLAGS_USERSPACE_DE: Device doesn't support DE offload.
+ */
+enum wiphy_nan_flags {
+ WIPHY_NAN_FLAGS_CONFIGURABLE_SYNC = BIT(0),
+ WIPHY_NAN_FLAGS_USERSPACE_DE = BIT(1),
+};
+
+/**
+ * struct wiphy_nan_capa - NAN capabilities
+ *
+ * This structure describes the NAN capabilities of a wiphy.
+ *
+ * @flags: NAN capabilities flags, see &enum wiphy_nan_flags
+ * @op_mode: NAN operation mode, as defined in Wi-Fi Aware (TM) specification
+ * Table 81.
+ * @n_antennas: number of antennas supported by the device for Tx/Rx. Lower
+ * nibble indicates the number of TX antennas and upper nibble indicates the
+ * number of RX antennas. Value 0 indicates the information is not
+ * available.
+ * @max_channel_switch_time: maximum channel switch time in milliseconds.
+ * @dev_capabilities: NAN device capabilities as defined in Wi-Fi Aware (TM)
+ * specification Table 79 (Capabilities field).
+ */
+struct wiphy_nan_capa {
+ u32 flags;
+ u8 op_mode;
+ u8 n_antennas;
+ u16 max_channel_switch_time;
+ u8 dev_capabilities;
+};
+
#define CFG80211_HW_TIMESTAMP_ALL_PEERS 0xffff
/**
@@ -5782,6 +5920,11 @@ struct wiphy_radio {
* and probe responses. This value should be set if the driver
* wishes to limit the number of csa counters. Default (0) means
* infinite.
+ * @bss_param_support: bitmask indicating which bss_parameters as defined in
+ * &struct bss_parameters the driver can actually handle in the
+ * .change_bss() callback. The bit positions are defined in &enum
+ * wiphy_bss_param_flags.
+ *
* @bss_select_support: bitmask indicating the BSS selection criteria supported
* by the driver in the .connect() callback. The bit position maps to the
* attribute indices defined in &enum nl80211_bss_select_attr.
@@ -5790,6 +5933,7 @@ struct wiphy_radio {
* bitmap of &enum nl80211_band values. For instance, for
* NL80211_BAND_2GHZ, bit 0 would be set
* (i.e. BIT(NL80211_BAND_2GHZ)).
+ * @nan_capa: NAN capabilities
*
* @txq_limit: configuration of internal TX queue frame limit
* @txq_memory_limit: configuration internal TX queue memory limit
@@ -5967,9 +6111,11 @@ struct wiphy {
u8 max_num_csa_counters;
+ u32 bss_param_support;
u32 bss_select_support;
u8 nan_supported_bands;
+ struct wiphy_nan_capa nan_capa;
u32 txq_limit;
u32 txq_memory_limit;
@@ -6548,6 +6694,9 @@ struct wireless_dev {
struct {
struct cfg80211_chan_def chandef;
} ocb;
+ struct {
+ u8 cluster_id[ETH_ALEN] __aligned(2);
+ } nan;
} u;
struct {
@@ -6656,16 +6805,6 @@ ieee80211_channel_to_khz(const struct ieee80211_channel *chan)
}
/**
- * ieee80211_s1g_channel_width - get allowed channel width from @chan
- *
- * Only allowed for band NL80211_BAND_S1GHZ
- * @chan: channel
- * Return: The allowed channel width for this center_freq
- */
-enum nl80211_chan_width
-ieee80211_s1g_channel_width(const struct ieee80211_channel *chan);
-
-/**
* ieee80211_channel_to_freq_khz - convert channel number to frequency
* @chan: channel number
* @band: band, necessary due to channel number overlap
@@ -9548,7 +9687,7 @@ int cfg80211_iter_combinations(struct wiphy *wiphy,
* @wiphy: the wiphy
* @chan: channel for which the supported radio index is required
*
- * Return: radio index on success or a negative error code
+ * Return: radio index on success or -EINVAL otherwise
*/
int cfg80211_get_radio_idx_by_chan(struct wiphy *wiphy,
const struct ieee80211_channel *chan);
@@ -9970,6 +10109,29 @@ void cfg80211_schedule_channels_check(struct wireless_dev *wdev);
*/
void cfg80211_epcs_changed(struct net_device *netdev, bool enabled);
+/**
+ * cfg80211_next_nan_dw_notif - Notify about the next NAN Discovery Window (DW)
+ * @wdev: Pointer to the wireless device structure
+ * @chan: DW channel (6, 44 or 149)
+ * @gfp: Memory allocation flags
+ */
+void cfg80211_next_nan_dw_notif(struct wireless_dev *wdev,
+ struct ieee80211_channel *chan, gfp_t gfp);
+
+/**
+ * cfg80211_nan_cluster_joined - Notify about NAN cluster join
+ * @wdev: Pointer to the wireless device structure
+ * @cluster_id: Cluster ID of the NAN cluster that was joined or started
+ * @new_cluster: Indicates if this is a new cluster or an existing one
+ * @gfp: Memory allocation flags
+ *
+ * This function is used to notify user space when a NAN cluster has been
+ * joined, providing the cluster ID and a flag whether it is a new cluster.
+ */
+void cfg80211_nan_cluster_joined(struct wireless_dev *wdev,
+ const u8 *cluster_id, bool new_cluster,
+ gfp_t gfp);
+
#ifdef CONFIG_CFG80211_DEBUGFS
/**
* wiphy_locked_debugfs_read - do a locked read in debugfs
@@ -10020,4 +10182,72 @@ ssize_t wiphy_locked_debugfs_write(struct wiphy *wiphy, struct file *file,
void *data);
#endif
+/**
+ * cfg80211_s1g_get_start_freq_khz - get S1G chandef start frequency
+ * @chandef: the chandef to use
+ *
+ * Return: the chandefs starting frequency in KHz
+ */
+static inline u32
+cfg80211_s1g_get_start_freq_khz(const struct cfg80211_chan_def *chandef)
+{
+ u32 bw_mhz = cfg80211_chandef_get_width(chandef);
+ u32 center_khz =
+ MHZ_TO_KHZ(chandef->center_freq1) + chandef->freq1_offset;
+ return center_khz - bw_mhz * 500 + 500;
+}
+
+/**
+ * cfg80211_s1g_get_end_freq_khz - get S1G chandef end frequency
+ * @chandef: the chandef to use
+ *
+ * Return: the chandefs ending frequency in KHz
+ */
+static inline u32
+cfg80211_s1g_get_end_freq_khz(const struct cfg80211_chan_def *chandef)
+{
+ u32 bw_mhz = cfg80211_chandef_get_width(chandef);
+ u32 center_khz =
+ MHZ_TO_KHZ(chandef->center_freq1) + chandef->freq1_offset;
+ return center_khz + bw_mhz * 500 - 500;
+}
+
+/**
+ * cfg80211_s1g_get_primary_sibling - retrieve the sibling 1MHz subchannel
+ * for an S1G chandef using a 2MHz primary channel.
+ * @wiphy: wiphy the channel belongs to
+ * @chandef: the chandef to use
+ *
+ * When chandef::s1g_primary_2mhz is set to true, we are operating on a 2MHz
+ * primary channel. The 1MHz subchannel designated by the primary channel
+ * location exists within chandef::chan, whilst the 'sibling' is denoted as
+ * being the other 1MHz subchannel that make up the 2MHz primary channel.
+ *
+ * Returns: the sibling 1MHz &struct ieee80211_channel, or %NULL on failure.
+ */
+static inline struct ieee80211_channel *
+cfg80211_s1g_get_primary_sibling(struct wiphy *wiphy,
+ const struct cfg80211_chan_def *chandef)
+{
+ int width_mhz = cfg80211_chandef_get_width(chandef);
+ u32 pri_1mhz_khz, sibling_1mhz_khz, op_low_1mhz_khz, pri_index;
+
+ if (!chandef->s1g_primary_2mhz || width_mhz < 2)
+ return NULL;
+
+ pri_1mhz_khz = ieee80211_channel_to_khz(chandef->chan);
+ op_low_1mhz_khz = cfg80211_s1g_get_start_freq_khz(chandef);
+
+ /*
+ * Compute the index of the primary 1 MHz subchannel within the
+ * operating channel, relative to the lowest 1 MHz center frequency.
+ * Flip the least significant bit to select the even/odd sibling,
+ * then translate that index back into a channel frequency.
+ */
+ pri_index = (pri_1mhz_khz - op_low_1mhz_khz) / 1000;
+ sibling_1mhz_khz = op_low_1mhz_khz + ((pri_index ^ 1) * 1000);
+
+ return ieee80211_get_channel_khz(wiphy, sibling_1mhz_khz);
+}
+
#endif /* __NET_CFG80211_H */
diff --git a/include/net/cls_cgroup.h b/include/net/cls_cgroup.h
index 7e78e7d6f015..668aeee9b3f6 100644
--- a/include/net/cls_cgroup.h
+++ b/include/net/cls_cgroup.h
@@ -63,7 +63,7 @@ static inline u32 task_get_classid(const struct sk_buff *skb)
* calls by looking at the number of nested bh disable calls because
* softirqs always disables bh.
*/
- if (in_serving_softirq()) {
+ if (softirq_count()) {
struct sock *sk = skb_to_full_sk(skb);
/* If there is an sock_cgroup_classid we'll use that. */
diff --git a/include/net/devlink.h b/include/net/devlink.h
index b32c9ceeb81d..9e824f61e40f 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -530,6 +530,8 @@ enum devlink_param_generic_id {
DEVLINK_PARAM_GENERIC_ID_EVENT_EQ_SIZE,
DEVLINK_PARAM_GENERIC_ID_ENABLE_PHC,
DEVLINK_PARAM_GENERIC_ID_CLOCK_ID,
+ DEVLINK_PARAM_GENERIC_ID_TOTAL_VFS,
+ DEVLINK_PARAM_GENERIC_ID_NUM_DOORBELLS,
/* add new param generic ids above here*/
__DEVLINK_PARAM_GENERIC_ID_MAX,
@@ -594,6 +596,12 @@ enum devlink_param_generic_id {
#define DEVLINK_PARAM_GENERIC_CLOCK_ID_NAME "clock_id"
#define DEVLINK_PARAM_GENERIC_CLOCK_ID_TYPE DEVLINK_PARAM_TYPE_U64
+#define DEVLINK_PARAM_GENERIC_TOTAL_VFS_NAME "total_vfs"
+#define DEVLINK_PARAM_GENERIC_TOTAL_VFS_TYPE DEVLINK_PARAM_TYPE_U32
+
+#define DEVLINK_PARAM_GENERIC_NUM_DOORBELLS_NAME "num_doorbells"
+#define DEVLINK_PARAM_GENERIC_NUM_DOORBELLS_TYPE DEVLINK_PARAM_TYPE_U32
+
#define DEVLINK_PARAM_GENERIC(_id, _cmodes, _get, _set, _validate) \
{ \
.id = DEVLINK_PARAM_GENERIC_ID_##_id, \
@@ -746,6 +754,10 @@ enum devlink_health_reporter_state {
* if priv_ctx is NULL, run a full dump
* @diagnose: callback to diagnose the current status
* @test: callback to trigger a test event
+ * @default_graceful_period: default min time (in msec)
+ * between recovery attempts
+ * @default_burst_period: default time (in msec) for
+ * error recoveries before starting the grace period
*/
struct devlink_health_reporter_ops {
@@ -760,6 +772,8 @@ struct devlink_health_reporter_ops {
struct netlink_ext_ack *extack);
int (*test)(struct devlink_health_reporter *reporter,
struct netlink_ext_ack *extack);
+ u64 default_graceful_period;
+ u64 default_burst_period;
};
/**
@@ -1743,7 +1757,7 @@ void devlink_port_type_ib_set(struct devlink_port *devlink_port,
struct ib_device *ibdev);
void devlink_port_type_clear(struct devlink_port *devlink_port);
void devlink_port_attrs_set(struct devlink_port *devlink_port,
- struct devlink_port_attrs *devlink_port_attrs);
+ const struct devlink_port_attrs *attrs);
void devlink_port_attrs_pci_pf_set(struct devlink_port *devlink_port, u32 controller,
u16 pf, bool external);
void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port, u32 controller,
@@ -1928,22 +1942,22 @@ void devlink_fmsg_binary_pair_put(struct devlink_fmsg *fmsg, const char *name,
struct devlink_health_reporter *
devl_port_health_reporter_create(struct devlink_port *port,
const struct devlink_health_reporter_ops *ops,
- u64 graceful_period, void *priv);
+ void *priv);
struct devlink_health_reporter *
devlink_port_health_reporter_create(struct devlink_port *port,
const struct devlink_health_reporter_ops *ops,
- u64 graceful_period, void *priv);
+ void *priv);
struct devlink_health_reporter *
devl_health_reporter_create(struct devlink *devlink,
const struct devlink_health_reporter_ops *ops,
- u64 graceful_period, void *priv);
+ void *priv);
struct devlink_health_reporter *
devlink_health_reporter_create(struct devlink *devlink,
const struct devlink_health_reporter_ops *ops,
- u64 graceful_period, void *priv);
+ void *priv);
void
devl_health_reporter_destroy(struct devlink_health_reporter *reporter);
diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h
index d8ff24a33459..58d91ccc56e0 100644
--- a/include/net/dropreason-core.h
+++ b/include/net/dropreason-core.h
@@ -127,6 +127,8 @@
FN(CANXL_RX_INVALID_FRAME) \
FN(PFMEMALLOC) \
FN(DUALPI2_STEP_DROP) \
+ FN(PSP_INPUT) \
+ FN(PSP_OUTPUT) \
FNe(MAX)
/**
@@ -610,6 +612,10 @@ enum skb_drop_reason {
* threshold of DualPI2 qdisc.
*/
SKB_DROP_REASON_DUALPI2_STEP_DROP,
+ /** @SKB_DROP_REASON_PSP_INPUT: PSP input checks failed */
+ SKB_DROP_REASON_PSP_INPUT,
+ /** @SKB_DROP_REASON_PSP_OUTPUT: PSP output checks failed */
+ SKB_DROP_REASON_PSP_OUTPUT,
/**
* @SKB_DROP_REASON_MAX: the maximum of core drop reasons, which
* shouldn't be used as a real 'reason' - only for tracing code gen
diff --git a/include/net/dst.h b/include/net/dst.h
index bab01363bb97..f8aa1239b4db 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -24,7 +24,10 @@
struct sk_buff;
struct dst_entry {
- struct net_device *dev;
+ union {
+ struct net_device *dev;
+ struct net_device __rcu *dev_rcu;
+ };
struct dst_ops *ops;
unsigned long _metrics;
unsigned long expires;
@@ -570,9 +573,12 @@ static inline struct net_device *dst_dev(const struct dst_entry *dst)
static inline struct net_device *dst_dev_rcu(const struct dst_entry *dst)
{
- /* In the future, use rcu_dereference(dst->dev) */
- WARN_ON_ONCE(!rcu_read_lock_held());
- return READ_ONCE(dst->dev);
+ return rcu_dereference(dst->dev_rcu);
+}
+
+static inline struct net *dst_dev_net_rcu(const struct dst_entry *dst)
+{
+ return dev_net_rcu(dst_dev_rcu(dst));
}
static inline struct net_device *skb_dst_dev(const struct sk_buff *skb)
@@ -592,7 +598,7 @@ static inline struct net *skb_dst_dev_net(const struct sk_buff *skb)
static inline struct net *skb_dst_dev_net_rcu(const struct sk_buff *skb)
{
- return dev_net_rcu(skb_dst_dev(skb));
+ return dev_net_rcu(skb_dst_dev_rcu(skb));
}
struct dst_entry *dst_blackhole_check(struct dst_entry *dst, u32 cookie);
diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h
index 4160731dcb6e..1fc2fb03ce3f 100644
--- a/include/net/dst_metadata.h
+++ b/include/net/dst_metadata.h
@@ -3,6 +3,7 @@
#define __NET_DST_METADATA_H 1
#include <linux/skbuff.h>
+#include <net/ip.h>
#include <net/ip_tunnels.h>
#include <net/macsec.h>
#include <net/dst.h>
@@ -220,9 +221,15 @@ static inline struct metadata_dst *ip_tun_rx_dst(struct sk_buff *skb,
int md_size)
{
const struct iphdr *iph = ip_hdr(skb);
+ struct metadata_dst *tun_dst;
+
+ tun_dst = __ip_tun_set_dst(iph->saddr, iph->daddr, iph->tos, iph->ttl,
+ 0, flags, tunnel_id, md_size);
- return __ip_tun_set_dst(iph->saddr, iph->daddr, iph->tos, iph->ttl,
- 0, flags, tunnel_id, md_size);
+ if (tun_dst && (iph->frag_off & htons(IP_DF)))
+ __set_bit(IP_TUNNEL_DONT_FRAGMENT_BIT,
+ tun_dst->u.tun_info.key.tun_flags);
+ return tun_dst;
}
static inline struct metadata_dst *__ipv6_tun_set_dst(const struct in6_addr *saddr,
diff --git a/include/net/flow.h b/include/net/flow.h
index a1839c278d87..ae9481c40063 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -12,6 +12,7 @@
#include <linux/atomic.h>
#include <linux/container_of.h>
#include <linux/uidgid.h>
+#include <net/inet_dscp.h>
struct flow_keys;
@@ -32,7 +33,7 @@ struct flowi_common {
int flowic_iif;
int flowic_l3mdev;
__u32 flowic_mark;
- __u8 flowic_tos;
+ dscp_t flowic_dscp;
__u8 flowic_scope;
__u8 flowic_proto;
__u8 flowic_flags;
@@ -70,7 +71,7 @@ struct flowi4 {
#define flowi4_iif __fl_common.flowic_iif
#define flowi4_l3mdev __fl_common.flowic_l3mdev
#define flowi4_mark __fl_common.flowic_mark
-#define flowi4_tos __fl_common.flowic_tos
+#define flowi4_dscp __fl_common.flowic_dscp
#define flowi4_scope __fl_common.flowic_scope
#define flowi4_proto __fl_common.flowic_proto
#define flowi4_flags __fl_common.flowic_flags
@@ -103,7 +104,7 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif,
fl4->flowi4_iif = LOOPBACK_IFINDEX;
fl4->flowi4_l3mdev = 0;
fl4->flowi4_mark = mark;
- fl4->flowi4_tos = tos;
+ fl4->flowi4_dscp = inet_dsfield_to_dscp(tos);
fl4->flowi4_scope = scope;
fl4->flowi4_proto = proto;
fl4->flowi4_flags = flags;
@@ -141,7 +142,7 @@ struct flowi6 {
#define flowi6_uid __fl_common.flowic_uid
struct in6_addr daddr;
struct in6_addr saddr;
- /* Note: flowi6_tos is encoded in flowlabel, too. */
+ /* Note: flowi6_dscp is encoded in flowlabel, too. */
__be32 flowlabel;
union flowi_uli uli;
#define fl6_sport uli.ports.sport
@@ -163,7 +164,7 @@ struct flowi {
#define flowi_iif u.__fl_common.flowic_iif
#define flowi_l3mdev u.__fl_common.flowic_l3mdev
#define flowi_mark u.__fl_common.flowic_mark
-#define flowi_tos u.__fl_common.flowic_tos
+#define flowi_dscp u.__fl_common.flowic_dscp
#define flowi_scope u.__fl_common.flowic_scope
#define flowi_proto u.__fl_common.flowic_proto
#define flowi_flags u.__fl_common.flowic_flags
diff --git a/include/net/genetlink.h b/include/net/genetlink.h
index a03d56765832..7b84f2cef8b1 100644
--- a/include/net/genetlink.h
+++ b/include/net/genetlink.h
@@ -62,7 +62,7 @@ struct genl_info;
* @small_ops: the small-struct operations supported by this family
* @n_small_ops: number of small-struct operations supported by this family
* @split_ops: the split do/dump form of operation definition
- * @n_split_ops: number of entries in @split_ops, not that with split do/dump
+ * @n_split_ops: number of entries in @split_ops, note that with split do/dump
* ops the number of entries is not the same as number of commands
* @sock_priv_size: the size of per-socket private memory
* @sock_priv_init: the per-socket private memory initializer
diff --git a/include/net/gro.h b/include/net/gro.h
index a0fca7ac6e7e..e3affb2e2ca8 100644
--- a/include/net/gro.h
+++ b/include/net/gro.h
@@ -71,14 +71,11 @@ struct napi_gro_cb {
/* Free the skb? */
u8 free:2;
- /* Used in foo-over-udp, set in udp[46]_gro_receive */
- u8 is_ipv6:1;
-
/* Used in GRE, set in fou/gue_gro_receive */
u8 is_fou:1;
/* Used to determine if ipid_offset can be ignored */
- u8 ip_fixedid:1;
+ u8 ip_fixedid:2;
/* Number of gro_receive callbacks this packet already went through */
u8 recursion_counter:4;
@@ -445,29 +442,25 @@ static inline __wsum ip6_gro_compute_pseudo(const struct sk_buff *skb,
}
static inline int inet_gro_flush(const struct iphdr *iph, const struct iphdr *iph2,
- struct sk_buff *p, bool outer)
+ struct sk_buff *p, bool inner)
{
const u32 id = ntohl(*(__be32 *)&iph->id);
const u32 id2 = ntohl(*(__be32 *)&iph2->id);
const u16 ipid_offset = (id >> 16) - (id2 >> 16);
const u16 count = NAPI_GRO_CB(p)->count;
- const u32 df = id & IP_DF;
- int flush;
/* All fields must match except length and checksum. */
- flush = (iph->ttl ^ iph2->ttl) | (iph->tos ^ iph2->tos) | (df ^ (id2 & IP_DF));
-
- if (flush | (outer && df))
- return flush;
+ if ((iph->ttl ^ iph2->ttl) | (iph->tos ^ iph2->tos) | ((id ^ id2) & IP_DF))
+ return true;
/* When we receive our second frame we can make a decision on if we
* continue this flow as an atomic flow with a fixed ID or if we use
* an incrementing ID.
*/
- if (count == 1 && df && !ipid_offset)
- NAPI_GRO_CB(p)->ip_fixedid = true;
+ if (count == 1 && !ipid_offset)
+ NAPI_GRO_CB(p)->ip_fixedid |= 1 << inner;
- return ipid_offset ^ (count * !NAPI_GRO_CB(p)->ip_fixedid);
+ return ipid_offset ^ (count * !(NAPI_GRO_CB(p)->ip_fixedid & (1 << inner)));
}
static inline int ipv6_gro_flush(const struct ipv6hdr *iph, const struct ipv6hdr *iph2)
@@ -482,7 +475,7 @@ static inline int ipv6_gro_flush(const struct ipv6hdr *iph, const struct ipv6hdr
static inline int __gro_receive_network_flush(const void *th, const void *th2,
struct sk_buff *p, const u16 diff,
- bool outer)
+ bool inner)
{
const void *nh = th - diff;
const void *nh2 = th2 - diff;
@@ -490,19 +483,18 @@ static inline int __gro_receive_network_flush(const void *th, const void *th2,
if (((struct iphdr *)nh)->version == 6)
return ipv6_gro_flush(nh, nh2);
else
- return inet_gro_flush(nh, nh2, p, outer);
+ return inet_gro_flush(nh, nh2, p, inner);
}
static inline int gro_receive_network_flush(const void *th, const void *th2,
struct sk_buff *p)
{
- const bool encap_mark = NAPI_GRO_CB(p)->encap_mark;
int off = skb_transport_offset(p);
int flush;
- flush = __gro_receive_network_flush(th, th2, p, off - NAPI_GRO_CB(p)->network_offset, encap_mark);
- if (encap_mark)
- flush |= __gro_receive_network_flush(th, th2, p, off - NAPI_GRO_CB(p)->inner_network_offset, false);
+ flush = __gro_receive_network_flush(th, th2, p, off - NAPI_GRO_CB(p)->network_offset, false);
+ if (NAPI_GRO_CB(p)->encap_mark)
+ flush |= __gro_receive_network_flush(th, th2, p, off - NAPI_GRO_CB(p)->inner_network_offset, true);
return flush;
}
diff --git a/include/net/hotdata.h b/include/net/hotdata.h
index fda94b2647ff..4acec191c54a 100644
--- a/include/net/hotdata.h
+++ b/include/net/hotdata.h
@@ -2,10 +2,16 @@
#ifndef _NET_HOTDATA_H
#define _NET_HOTDATA_H
+#include <linux/llist.h>
#include <linux/types.h>
#include <linux/netdevice.h>
#include <net/protocol.h>
+struct skb_defer_node {
+ struct llist_head defer_list;
+ atomic_long_t defer_count;
+} ____cacheline_aligned_in_smp;
+
/* Read mostly data used in network fast paths. */
struct net_hotdata {
#if IS_ENABLED(CONFIG_INET)
@@ -30,6 +36,7 @@ struct net_hotdata {
struct rps_sock_flow_table __rcu *rps_sock_flow_table;
u32 rps_cpu_mask;
#endif
+ struct skb_defer_node __percpu *skb_defer_nodes;
int gro_normal_batch;
int netdev_budget;
int netdev_budget_usecs;
diff --git a/include/net/icmp.h b/include/net/icmp.h
index caddf4a59ad1..935ee13d9ae9 100644
--- a/include/net/icmp.h
+++ b/include/net/icmp.h
@@ -37,10 +37,10 @@ struct sk_buff;
struct net;
void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
- const struct ip_options *opt);
+ const struct inet_skb_parm *parm);
static inline void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
{
- __icmp_send(skb_in, type, code, info, &IPCB(skb_in)->opt);
+ __icmp_send(skb_in, type, code, info, IPCB(skb_in));
}
#if IS_ENABLED(CONFIG_NF_NAT)
@@ -48,8 +48,10 @@ void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info);
#else
static inline void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info)
{
- struct ip_options opts = { 0 };
- __icmp_send(skb_in, type, code, info, &opts);
+ struct inet_skb_parm parm;
+
+ memset(&parm, 0, sizeof(parm));
+ __icmp_send(skb_in, type, code, info, &parm);
}
#endif
diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h
index ab3929a2a956..282e29237d93 100644
--- a/include/net/inet6_hashtables.h
+++ b/include/net/inet6_hashtables.h
@@ -41,7 +41,6 @@ static inline unsigned int __inet6_ehashfn(const u32 lhash,
* The sockhash lock must be held as a reader here.
*/
struct sock *__inet6_lookup_established(const struct net *net,
- struct inet_hashinfo *hashinfo,
const struct in6_addr *saddr,
const __be16 sport,
const struct in6_addr *daddr,
@@ -65,7 +64,6 @@ struct sock *inet6_lookup_reuseport(const struct net *net, struct sock *sk,
inet6_ehashfn_t *ehashfn);
struct sock *inet6_lookup_listener(const struct net *net,
- struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
const struct in6_addr *saddr,
const __be16 sport,
@@ -83,7 +81,6 @@ struct sock *inet6_lookup_run_sk_lookup(const struct net *net,
inet6_ehashfn_t *ehashfn);
static inline struct sock *__inet6_lookup(const struct net *net,
- struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
const struct in6_addr *saddr,
const __be16 sport,
@@ -92,14 +89,14 @@ static inline struct sock *__inet6_lookup(const struct net *net,
const int dif, const int sdif,
bool *refcounted)
{
- struct sock *sk = __inet6_lookup_established(net, hashinfo, saddr,
- sport, daddr, hnum,
+ struct sock *sk = __inet6_lookup_established(net, saddr, sport,
+ daddr, hnum,
dif, sdif);
*refcounted = true;
if (sk)
return sk;
*refcounted = false;
- return inet6_lookup_listener(net, hashinfo, skb, doff, saddr, sport,
+ return inet6_lookup_listener(net, skb, doff, saddr, sport,
daddr, hnum, dif, sdif);
}
@@ -143,8 +140,7 @@ struct sock *inet6_steal_sock(struct net *net, struct sk_buff *skb, int doff,
return reuse_sk;
}
-static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo,
- struct sk_buff *skb, int doff,
+static inline struct sock *__inet6_lookup_skb(struct sk_buff *skb, int doff,
const __be16 sport,
const __be16 dport,
int iif, int sdif,
@@ -161,20 +157,16 @@ static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo,
if (sk)
return sk;
- return __inet6_lookup(net, hashinfo, skb,
- doff, &ip6h->saddr, sport,
+ return __inet6_lookup(net, skb, doff, &ip6h->saddr, sport,
&ip6h->daddr, ntohs(dport),
iif, sdif, refcounted);
}
-struct sock *inet6_lookup(const struct net *net, struct inet_hashinfo *hashinfo,
- struct sk_buff *skb, int doff,
+struct sock *inet6_lookup(const struct net *net, struct sk_buff *skb, int doff,
const struct in6_addr *saddr, const __be16 sport,
const struct in6_addr *daddr, const __be16 dport,
const int dif);
-int inet6_hash(struct sock *sk);
-
static inline bool inet6_match(const struct net *net, const struct sock *sk,
const struct in6_addr *saddr,
const struct in6_addr *daddr,
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 1735db332aab..b4b886647607 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -299,14 +299,8 @@ reqsk_timeout(struct request_sock *req, unsigned long max_timeout)
return (unsigned long)min_t(u64, timeout, max_timeout);
}
-static inline void inet_csk_prepare_for_destroy_sock(struct sock *sk)
-{
- /* The below has to be done to allow calling inet_csk_destroy_sock */
- sock_set_flag(sk, SOCK_DEAD);
- this_cpu_inc(*sk->sk_prot->orphan_count);
-}
-
void inet_csk_destroy_sock(struct sock *sk);
+void inet_csk_prepare_for_destroy_sock(struct sock *sk);
void inet_csk_prepare_forced_close(struct sock *sk);
/*
@@ -322,8 +316,9 @@ int inet_csk_listen_start(struct sock *sk);
void inet_csk_listen_stop(struct sock *sk);
/* update the fast reuse flag when adding a socket */
-void inet_csk_update_fastreuse(struct inet_bind_bucket *tb,
- struct sock *sk);
+void inet_csk_update_fastreuse(const struct sock *sk,
+ struct inet_bind_bucket *tb,
+ struct inet_bind2_bucket *tb2);
struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu);
diff --git a/include/net/inet_dscp.h b/include/net/inet_dscp.h
index 72f250dffada..1aa9f04ed1ab 100644
--- a/include/net/inet_dscp.h
+++ b/include/net/inet_dscp.h
@@ -39,6 +39,12 @@ typedef u8 __bitwise dscp_t;
#define INET_DSCP_MASK 0xfc
+/* A few places in the IPv4 code need to ignore the three high order bits of
+ * DSCP because of backward compatibility (as these bits used to represent the
+ * IPv4 Precedence in RFC 791's TOS field and were ignored).
+ */
+#define INET_DSCP_LEGACY_TOS_MASK ((__force dscp_t)0x1c)
+
static inline dscp_t inet_dsfield_to_dscp(__u8 dsfield)
{
return (__force dscp_t)(dsfield & INET_DSCP_MASK);
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 19dbd9081d5a..ac05a52d9e13 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -108,6 +108,8 @@ struct inet_bind2_bucket {
struct hlist_node bhash_node;
/* List of sockets hashed to this bucket */
struct hlist_head owners;
+ signed char fastreuse;
+ signed char fastreuseport;
};
static inline struct net *ib_net(const struct inet_bind_bucket *ib)
@@ -289,12 +291,10 @@ int inet_hashinfo2_init_mod(struct inet_hashinfo *h);
bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk);
bool inet_ehash_nolisten(struct sock *sk, struct sock *osk,
bool *found_dup_sk);
-int __inet_hash(struct sock *sk, struct sock *osk);
int inet_hash(struct sock *sk);
void inet_unhash(struct sock *sk);
struct sock *__inet_lookup_listener(const struct net *net,
- struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
const __be32 saddr, const __be16 sport,
const __be32 daddr,
@@ -302,12 +302,12 @@ struct sock *__inet_lookup_listener(const struct net *net,
const int dif, const int sdif);
static inline struct sock *inet_lookup_listener(struct net *net,
- struct inet_hashinfo *hashinfo,
- struct sk_buff *skb, int doff,
- __be32 saddr, __be16 sport,
- __be32 daddr, __be16 dport, int dif, int sdif)
+ struct sk_buff *skb, int doff,
+ __be32 saddr, __be16 sport,
+ __be32 daddr, __be16 dport,
+ int dif, int sdif)
{
- return __inet_lookup_listener(net, hashinfo, skb, doff, saddr, sport,
+ return __inet_lookup_listener(net, skb, doff, saddr, sport,
daddr, ntohs(dport), dif, sdif);
}
@@ -358,7 +358,6 @@ static inline bool inet_match(const struct net *net, const struct sock *sk,
* not check it for lookups anymore, thanks Alexey. -DaveM
*/
struct sock *__inet_lookup_established(const struct net *net,
- struct inet_hashinfo *hashinfo,
const __be32 saddr, const __be16 sport,
const __be32 daddr, const u16 hnum,
const int dif, const int sdif);
@@ -384,18 +383,16 @@ struct sock *inet_lookup_run_sk_lookup(const struct net *net,
__be32 daddr, u16 hnum, const int dif,
inet_ehashfn_t *ehashfn);
-static inline struct sock *
- inet_lookup_established(struct net *net, struct inet_hashinfo *hashinfo,
- const __be32 saddr, const __be16 sport,
- const __be32 daddr, const __be16 dport,
- const int dif)
+static inline struct sock *inet_lookup_established(struct net *net,
+ const __be32 saddr, const __be16 sport,
+ const __be32 daddr, const __be16 dport,
+ const int dif)
{
- return __inet_lookup_established(net, hashinfo, saddr, sport, daddr,
+ return __inet_lookup_established(net, saddr, sport, daddr,
ntohs(dport), dif, 0);
}
static inline struct sock *__inet_lookup(struct net *net,
- struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
const __be32 saddr, const __be16 sport,
const __be32 daddr, const __be16 dport,
@@ -405,18 +402,17 @@ static inline struct sock *__inet_lookup(struct net *net,
u16 hnum = ntohs(dport);
struct sock *sk;
- sk = __inet_lookup_established(net, hashinfo, saddr, sport,
+ sk = __inet_lookup_established(net, saddr, sport,
daddr, hnum, dif, sdif);
*refcounted = true;
if (sk)
return sk;
*refcounted = false;
- return __inet_lookup_listener(net, hashinfo, skb, doff, saddr,
+ return __inet_lookup_listener(net, skb, doff, saddr,
sport, daddr, hnum, dif, sdif);
}
static inline struct sock *inet_lookup(struct net *net,
- struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
const __be32 saddr, const __be16 sport,
const __be32 daddr, const __be16 dport,
@@ -425,7 +421,7 @@ static inline struct sock *inet_lookup(struct net *net,
struct sock *sk;
bool refcounted;
- sk = __inet_lookup(net, hashinfo, skb, doff, saddr, sport, daddr,
+ sk = __inet_lookup(net, skb, doff, saddr, sport, daddr,
dport, dif, 0, &refcounted);
if (sk && !refcounted && !refcount_inc_not_zero(&sk->sk_refcnt))
@@ -473,8 +469,7 @@ struct sock *inet_steal_sock(struct net *net, struct sk_buff *skb, int doff,
return reuse_sk;
}
-static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo,
- struct sk_buff *skb,
+static inline struct sock *__inet_lookup_skb(struct sk_buff *skb,
int doff,
const __be16 sport,
const __be16 dport,
@@ -492,8 +487,7 @@ static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo,
if (sk)
return sk;
- return __inet_lookup(net, hashinfo, skb,
- doff, iph->saddr, sport,
+ return __inet_lookup(net, skb, doff, iph->saddr, sport,
iph->daddr, dport, inet_iif(skb), sdif,
refcounted);
}
diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
index 67a313575780..63a644ff30de 100644
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -70,7 +70,8 @@ struct inet_timewait_sock {
unsigned int tw_transparent : 1,
tw_flowlabel : 20,
tw_usec_ts : 1,
- tw_pad : 2, /* 2 bits hole */
+ tw_connect_bind : 1,
+ tw_pad : 1, /* 1 bit hole */
tw_tos : 8;
u32 tw_txhash;
u32 tw_priority;
@@ -81,6 +82,14 @@ struct inet_timewait_sock {
struct timer_list tw_timer;
struct inet_bind_bucket *tw_tb;
struct inet_bind2_bucket *tw_tb2;
+#if IS_ENABLED(CONFIG_INET_PSP)
+ struct psp_assoc __rcu *psp_assoc;
+#endif
+#ifdef CONFIG_SOCK_VALIDATE_XMIT
+ struct sk_buff* (*tw_validate_xmit_skb)(struct sock *sk,
+ struct net_device *dev,
+ struct sk_buff *skb);
+#endif
};
#define tw_tclass tw_tos
diff --git a/include/net/ip.h b/include/net/ip.h
index befcba575129..380afb691c41 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -326,11 +326,12 @@ static inline u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_o
}
#endif
-#define snmp_get_cpu_field64_batch(buff64, stats_list, mib_statistic, offset) \
+#define snmp_get_cpu_field64_batch_cnt(buff64, stats_list, cnt, \
+ mib_statistic, offset) \
{ \
int i, c; \
for_each_possible_cpu(c) { \
- for (i = 0; stats_list[i].name; i++) \
+ for (i = 0; i < cnt; i++) \
buff64[i] += snmp_get_cpu_field64( \
mib_statistic, \
c, stats_list[i].entry, \
@@ -338,11 +339,11 @@ static inline u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_o
} \
}
-#define snmp_get_cpu_field_batch(buff, stats_list, mib_statistic) \
+#define snmp_get_cpu_field_batch_cnt(buff, stats_list, cnt, mib_statistic) \
{ \
int i, c; \
for_each_possible_cpu(c) { \
- for (i = 0; stats_list[i].name; i++) \
+ for (i = 0; i < cnt; i++) \
buff[i] += snmp_get_cpu_field( \
mib_statistic, \
c, stats_list[i].entry); \
@@ -467,12 +468,14 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst,
bool forwarding)
{
const struct rtable *rt = dst_rtable(dst);
+ const struct net_device *dev;
unsigned int mtu, res;
struct net *net;
rcu_read_lock();
- net = dev_net_rcu(dst_dev(dst));
+ dev = dst_dev_rcu(dst);
+ net = dev_net_rcu(dev);
if (READ_ONCE(net->ipv4.sysctl_ip_fwd_use_pmtu) ||
ip_mtu_locked(dst) ||
!forwarding) {
@@ -486,7 +489,7 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst,
if (mtu)
goto out;
- mtu = READ_ONCE(dst_dev(dst)->mtu);
+ mtu = READ_ONCE(dev->mtu);
if (unlikely(ip_mtu_locked(dst))) {
if (rt->rt_uses_gateway && mtu > 576)
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 9255f21818ee..7c5512baa4b2 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -229,16 +229,16 @@ static inline const struct rt6_info *skb_rt6_info(const struct sk_buff *skb)
* Store a destination cache entry in a socket
*/
static inline void ip6_dst_store(struct sock *sk, struct dst_entry *dst,
- const struct in6_addr *daddr,
- const struct in6_addr *saddr)
+ bool daddr_set,
+ bool saddr_set)
{
struct ipv6_pinfo *np = inet6_sk(sk);
np->dst_cookie = rt6_get_cookie(dst_rt6_info(dst));
sk_setup_caps(sk, dst);
- np->daddr_cache = daddr;
+ np->daddr_cache = daddr_set;
#ifdef CONFIG_IPV6_SUBTREES
- np->saddr_cache = saddr;
+ np->saddr_cache = saddr_set;
#endif
}
@@ -337,7 +337,7 @@ static inline unsigned int ip6_dst_mtu_maybe_forward(const struct dst_entry *dst
mtu = IPV6_MIN_MTU;
rcu_read_lock();
- idev = __in6_dev_get(dst_dev(dst));
+ idev = __in6_dev_get(dst_dev_rcu(dst));
if (idev)
mtu = READ_ONCE(idev->cnf.mtu6);
rcu_read_unlock();
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 48bb3cf41469..b4495c38e0a0 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -440,7 +440,7 @@ static inline bool fib4_rules_early_flow_dissect(struct net *net,
static inline bool fib_dscp_masked_match(dscp_t dscp, const struct flowi4 *fl4)
{
- return dscp == inet_dsfield_to_dscp(RT_TOS(fl4->flowi4_tos));
+ return dscp == (fl4->flowi4_dscp & INET_DSCP_LEGACY_TOS_MASK);
}
/* Exported by fib_frontend.c */
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 8cf1380f3656..4314a97702ea 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -11,7 +11,9 @@
#include <linux/bitops.h>
#include <net/dsfield.h>
+#include <net/flow.h>
#include <net/gro_cells.h>
+#include <net/inet_dscp.h>
#include <net/inet_ecn.h>
#include <net/netns/generic.h>
#include <net/rtnetlink.h>
@@ -362,7 +364,7 @@ static inline void ip_tunnel_init_flow(struct flowi4 *fl4,
fl4->daddr = daddr;
fl4->saddr = saddr;
- fl4->flowi4_tos = tos;
+ fl4->flowi4_dscp = inet_dsfield_to_dscp(tos);
fl4->flowi4_proto = proto;
fl4->fl4_gre_key = key;
fl4->flowi4_mark = mark;
diff --git a/include/net/libeth/xdp.h b/include/net/libeth/xdp.h
index f4880b50e804..bc3507edd589 100644
--- a/include/net/libeth/xdp.h
+++ b/include/net/libeth/xdp.h
@@ -1274,7 +1274,6 @@ bool libeth_xdp_buff_add_frag(struct libeth_xdp_buff *xdp,
* Internal, use libeth_xdp_process_buff() instead. Initializes XDP buffer
* head with the Rx buffer data: data pointer, length, headroom, and
* truesize/tailroom. Zeroes the flags.
- * Uses faster single u64 write instead of per-field access.
*/
static inline void libeth_xdp_prepare_buff(struct libeth_xdp_buff *xdp,
const struct libeth_fqe *fqe,
@@ -1282,17 +1281,9 @@ static inline void libeth_xdp_prepare_buff(struct libeth_xdp_buff *xdp,
{
const struct page *page = __netmem_to_page(fqe->netmem);
-#ifdef __LIBETH_WORD_ACCESS
- static_assert(offsetofend(typeof(xdp->base), flags) -
- offsetof(typeof(xdp->base), frame_sz) ==
- sizeof(u64));
-
- *(u64 *)&xdp->base.frame_sz = fqe->truesize;
-#else
- xdp_init_buff(&xdp->base, fqe->truesize, xdp->base.rxq);
-#endif
xdp_prepare_buff(&xdp->base, page_address(page) + fqe->offset,
pp_page_to_nmdesc(page)->pp->p.offset, len, true);
+ xdp_init_buff(&xdp->base, fqe->truesize, xdp->base.rxq);
}
/**
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index a45e4bee65d4..a55085cf4ec4 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -3192,6 +3192,10 @@ ieee80211_get_tx_rate(const struct ieee80211_hw *hw,
{
if (WARN_ON_ONCE(c->control.rates[0].idx < 0))
return NULL;
+
+ if (c->band >= NUM_NL80211_BANDS)
+ return NULL;
+
return &hw->wiphy->bands[c->band]->bitrates[c->control.rates[0].idx];
}
@@ -7834,4 +7838,10 @@ int ieee80211_emulate_switch_vif_chanctx(struct ieee80211_hw *hw,
int n_vifs,
enum ieee80211_chanctx_switch_mode mode);
+/**
+ * ieee80211_vif_nan_started - Return whether a NAN vif is started
+ * @vif: the vif
+ * Return: %true iff the vif is a NAN interface and NAN is started
+ */
+bool ieee80211_vif_nan_started(struct ieee80211_vif *vif);
#endif /* MAC80211_H */
diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
index e1030a7d2daa..0921485565c0 100644
--- a/include/net/mana/mana.h
+++ b/include/net/mana/mana.h
@@ -65,6 +65,8 @@ enum TRI_STATE {
#define MANA_STATS_RX_COUNT 5
#define MANA_STATS_TX_COUNT 11
+#define MANA_RX_FRAG_ALIGNMENT 64
+
struct mana_stats_rx {
u64 packets;
u64 bytes;
@@ -328,6 +330,7 @@ struct mana_rxq {
u32 datasize;
u32 alloc_size;
u32 headroom;
+ u32 frag_count;
mana_handle_t rxobj;
@@ -510,6 +513,7 @@ struct mana_port_context {
u32 rxbpre_datasize;
u32 rxbpre_alloc_size;
u32 rxbpre_headroom;
+ u32 rxbpre_frag_count;
struct bpf_prog *bpf_prog;
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 025a7574b275..cb664f6e3558 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -204,7 +204,7 @@ struct net {
extern struct net init_net;
#ifdef CONFIG_NET_NS
-struct net *copy_net_ns(unsigned long flags, struct user_namespace *user_ns,
+struct net *copy_net_ns(u64 flags, struct user_namespace *user_ns,
struct net *old_net);
void net_ns_get_ownership(const struct net *net, kuid_t *uid, kgid_t *gid);
@@ -218,7 +218,7 @@ extern struct task_struct *cleanup_net_task;
#else /* CONFIG_NET_NS */
#include <linux/sched.h>
#include <linux/nsproxy.h>
-static inline struct net *copy_net_ns(unsigned long flags,
+static inline struct net *copy_net_ns(u64 flags,
struct user_namespace *user_ns, struct net *old_net)
{
if (flags & CLONE_NEWNET)
@@ -262,10 +262,15 @@ void ipx_unregister_sysctl(void);
#ifdef CONFIG_NET_NS
void __put_net(struct net *net);
+static inline struct net *to_net_ns(struct ns_common *ns)
+{
+ return container_of(ns, struct net, ns);
+}
+
/* Try using get_net_track() instead */
static inline struct net *get_net(struct net *net)
{
- refcount_inc(&net->ns.count);
+ ns_ref_inc(net);
return net;
}
@@ -276,7 +281,7 @@ static inline struct net *maybe_get_net(struct net *net)
* exists. If the reference count is zero this
* function fails and returns NULL.
*/
- if (!refcount_inc_not_zero(&net->ns.count))
+ if (!ns_ref_get(net))
net = NULL;
return net;
}
@@ -284,7 +289,7 @@ static inline struct net *maybe_get_net(struct net *net)
/* Try using put_net_track() instead */
static inline void put_net(struct net *net)
{
- if (refcount_dec_and_test(&net->ns.count))
+ if (ns_ref_put(net))
__put_net(net);
}
@@ -296,7 +301,7 @@ int net_eq(const struct net *net1, const struct net *net2)
static inline int check_net(const struct net *net)
{
- return refcount_read(&net->ns.count) != 0;
+ return ns_ref_read(net) != 0;
}
void net_drop_ns(void *);
diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h
index 6e835972abd1..cd00e0406cf4 100644
--- a/include/net/netdev_queues.h
+++ b/include/net/netdev_queues.h
@@ -127,6 +127,9 @@ void netdev_stat_queue_sum(struct net_device *netdev,
* @ndo_queue_stop: Stop the RX queue at the specified index. The stopped
* queue's memory is written at the specified address.
*
+ * @ndo_queue_get_dma_dev: Get dma device for zero-copy operations to be used
+ * for this queue. Return NULL on error.
+ *
* Note that @ndo_queue_mem_alloc and @ndo_queue_mem_free may be called while
* the interface is closed. @ndo_queue_start and @ndo_queue_stop will only
* be called for an interface which is open.
@@ -144,8 +147,12 @@ struct netdev_queue_mgmt_ops {
int (*ndo_queue_stop)(struct net_device *dev,
void *per_queue_mem,
int idx);
+ struct device * (*ndo_queue_get_dma_dev)(struct net_device *dev,
+ int idx);
};
+bool netif_rxq_has_unreadable_mp(struct net_device *dev, int idx);
+
/**
* DOC: Lockless queue stopping / waking helpers.
*
@@ -321,4 +328,6 @@ static inline void netif_subqueue_sent(const struct net_device *dev,
get_desc, start_thrs); \
})
+struct device *netdev_queue_get_dma_dev(struct net_device *dev, int idx);
+
#endif
diff --git a/include/net/netfilter/ipv4/nf_reject.h b/include/net/netfilter/ipv4/nf_reject.h
index c653fcb88354..09de2f2686b5 100644
--- a/include/net/netfilter/ipv4/nf_reject.h
+++ b/include/net/netfilter/ipv4/nf_reject.h
@@ -10,14 +10,6 @@
void nf_send_unreach(struct sk_buff *skb_in, int code, int hook);
void nf_send_reset(struct net *net, struct sock *, struct sk_buff *oldskb,
int hook);
-const struct tcphdr *nf_reject_ip_tcphdr_get(struct sk_buff *oldskb,
- struct tcphdr *_oth, int hook);
-struct iphdr *nf_reject_iphdr_put(struct sk_buff *nskb,
- const struct sk_buff *oldskb,
- __u8 protocol, int ttl);
-void nf_reject_ip_tcphdr_put(struct sk_buff *nskb, const struct sk_buff *oldskb,
- const struct tcphdr *oth);
-
struct sk_buff *nf_reject_skb_v4_unreach(struct net *net,
struct sk_buff *oldskb,
const struct net_device *dev,
diff --git a/include/net/netfilter/ipv6/nf_reject.h b/include/net/netfilter/ipv6/nf_reject.h
index d729344ba644..94ec0b9f2838 100644
--- a/include/net/netfilter/ipv6/nf_reject.h
+++ b/include/net/netfilter/ipv6/nf_reject.h
@@ -9,16 +9,6 @@ void nf_send_unreach6(struct net *net, struct sk_buff *skb_in, unsigned char cod
unsigned int hooknum);
void nf_send_reset6(struct net *net, struct sock *sk, struct sk_buff *oldskb,
int hook);
-const struct tcphdr *nf_reject_ip6_tcphdr_get(struct sk_buff *oldskb,
- struct tcphdr *otcph,
- unsigned int *otcplen, int hook);
-struct ipv6hdr *nf_reject_ip6hdr_put(struct sk_buff *nskb,
- const struct sk_buff *oldskb,
- __u8 protocol, int hoplimit);
-void nf_reject_ip6_tcphdr_put(struct sk_buff *nskb,
- const struct sk_buff *oldskb,
- const struct tcphdr *oth, unsigned int otcplen);
-
struct sk_buff *nf_reject_skb_v6_tcp_reset(struct net *net,
struct sk_buff *oldskb,
const struct net_device *dev,
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 891e43a01bdc..fab7dc73f738 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -556,6 +556,7 @@ struct nft_set_elem_expr {
* @size: maximum set size
* @field_len: length of each field in concatenation, bytes
* @field_count: number of concatenated fields in element
+ * @in_update_walk: true during ->walk() in transaction phase
* @use: number of rules references to this set
* @nelems: number of elements
* @ndeact: number of deactivated elements queued for removal
@@ -590,6 +591,7 @@ struct nft_set {
u32 size;
u8 field_len[NFT_REG32_COUNT];
u8 field_count;
+ bool in_update_walk;
u32 use;
atomic_t nelems;
u32 ndeact;
@@ -1912,7 +1914,6 @@ struct nftables_pernet {
struct mutex commit_mutex;
u64 table_handle;
u64 tstamp;
- unsigned int base_seq;
unsigned int gc_seq;
u8 validate_state;
struct work_struct destroy_work;
diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h
index 6c2f483d9828..b8df5acbb723 100644
--- a/include/net/netfilter/nf_tables_core.h
+++ b/include/net/netfilter/nf_tables_core.h
@@ -73,7 +73,7 @@ struct nft_ct {
struct nft_payload {
enum nft_payload_bases base:8;
- u8 offset;
+ u16 offset;
u8 len;
u8 dreg;
};
@@ -109,17 +109,11 @@ nft_hash_lookup_fast(const struct net *net, const struct nft_set *set,
const struct nft_set_ext *
nft_hash_lookup(const struct net *net, const struct nft_set *set,
const u32 *key);
+#endif
+
const struct nft_set_ext *
nft_set_do_lookup(const struct net *net, const struct nft_set *set,
const u32 *key);
-#else
-static inline const struct nft_set_ext *
-nft_set_do_lookup(const struct net *net, const struct nft_set *set,
- const u32 *key)
-{
- return set->ops->lookup(net, set, key);
-}
-#endif
/* called from nft_pipapo_avx2.c */
const struct nft_set_ext *
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 6373e3f17da8..34eb3aecb3f2 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -148,6 +148,8 @@ struct netns_ipv4 {
struct local_ports ip_local_ports;
u8 sysctl_tcp_ecn;
+ u8 sysctl_tcp_ecn_option;
+ u8 sysctl_tcp_ecn_option_beacon;
u8 sysctl_tcp_ecn_fallback;
u8 sysctl_ip_default_ttl;
@@ -251,6 +253,7 @@ struct netns_ipv4 {
int sysctl_igmp_qrv;
struct ping_group_range ping_group_range;
+ u16 ping_port_rover;
atomic_t dev_addr_genid;
diff --git a/include/net/netns/nftables.h b/include/net/netns/nftables.h
index cc8060c017d5..99dd166c5d07 100644
--- a/include/net/netns/nftables.h
+++ b/include/net/netns/nftables.h
@@ -3,6 +3,7 @@
#define _NETNS_NFTABLES_H_
struct netns_nftables {
+ unsigned int base_seq;
u8 gencursor;
};
diff --git a/include/net/netns/sctp.h b/include/net/netns/sctp.h
index d25cd7a9c5ff..c0f97f36389e 100644
--- a/include/net/netns/sctp.h
+++ b/include/net/netns/sctp.h
@@ -75,8 +75,8 @@ struct netns_sctp {
/* Whether Cookie Preservative is enabled(1) or not(0) */
int cookie_preserve_enable;
- /* The namespace default hmac alg */
- char *sctp_hmac_alg;
+ /* Whether cookie authentication is enabled(1) or not(0) */
+ int cookie_auth_enable;
/* Valid.Cookie.Life - 60 seconds */
unsigned int valid_cookie_life;
diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h
index e180bdf2f82b..664d5058e66e 100644
--- a/include/net/nfc/nci_core.h
+++ b/include/net/nfc/nci_core.h
@@ -52,7 +52,7 @@ enum nci_state {
#define NCI_RF_DISC_SELECT_TIMEOUT 5000
#define NCI_RF_DEACTIVATE_TIMEOUT 30000
#define NCI_CMD_TIMEOUT 5000
-#define NCI_DATA_TIMEOUT 700
+#define NCI_DATA_TIMEOUT 3000
struct nci_dev;
diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h
index db180626be06..3247026e096a 100644
--- a/include/net/page_pool/helpers.h
+++ b/include/net/page_pool/helpers.h
@@ -489,6 +489,11 @@ page_pool_dma_sync_netmem_for_cpu(const struct page_pool *pool,
offset, dma_sync_size);
}
+static inline void page_pool_get(struct page_pool *pool)
+{
+ refcount_inc(&pool->user_cnt);
+}
+
static inline bool page_pool_put(struct page_pool *pool)
{
return refcount_dec_and_test(&pool->user_cnt);
@@ -500,6 +505,18 @@ static inline void page_pool_nid_changed(struct page_pool *pool, int new_nid)
page_pool_update_nid(pool, new_nid);
}
+/**
+ * page_pool_is_unreadable() - will allocated buffers be unreadable for the CPU
+ * @pool: queried page pool
+ *
+ * Check if page pool will return buffers which are unreadable to the CPU /
+ * kernel. This will only be the case if user space bound a memory provider (mp)
+ * which returns unreadable memory to the queue served by the page pool.
+ * If %PP_FLAG_ALLOW_UNREADABLE_NETMEM was set but there is no mp bound
+ * this helper will return false. See also netif_rxq_has_unreadable_mp().
+ *
+ * Return: true if memory allocated by the page pool may be unreadable
+ */
static inline bool page_pool_is_unreadable(struct page_pool *pool)
{
return !!pool->mp_ops;
diff --git a/include/net/ping.h b/include/net/ping.h
index bc7779262e60..9634b8800814 100644
--- a/include/net/ping.h
+++ b/include/net/ping.h
@@ -54,7 +54,6 @@ struct pingfakehdr {
};
int ping_get_port(struct sock *sk, unsigned short ident);
-int ping_hash(struct sock *sk);
void ping_unhash(struct sock *sk);
int ping_init_sock(struct sock *sk);
diff --git a/include/net/proto_memory.h b/include/net/proto_memory.h
index a6ab2f4f5e28..8e91a8fa31b5 100644
--- a/include/net/proto_memory.h
+++ b/include/net/proto_memory.h
@@ -31,8 +31,8 @@ static inline bool sk_under_memory_pressure(const struct sock *sk)
if (!sk->sk_prot->memory_pressure)
return false;
- if (mem_cgroup_sockets_enabled && sk->sk_memcg &&
- mem_cgroup_under_socket_pressure(sk->sk_memcg))
+ if (mem_cgroup_sk_enabled(sk) &&
+ mem_cgroup_sk_under_memory_pressure(sk))
return true;
return !!READ_ONCE(*sk->sk_prot->memory_pressure);
diff --git a/include/net/psp.h b/include/net/psp.h
new file mode 100644
index 000000000000..33bb4d1dc46e
--- /dev/null
+++ b/include/net/psp.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef __NET_PSP_ALL_H
+#define __NET_PSP_ALL_H
+
+#include <uapi/linux/psp.h>
+#include <net/psp/functions.h>
+#include <net/psp/types.h>
+
+/* Do not add any code here. Put it in the sub-headers instead. */
+
+#endif /* __NET_PSP_ALL_H */
diff --git a/include/net/psp/functions.h b/include/net/psp/functions.h
new file mode 100644
index 000000000000..ef7743664da3
--- /dev/null
+++ b/include/net/psp/functions.h
@@ -0,0 +1,209 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef __NET_PSP_HELPERS_H
+#define __NET_PSP_HELPERS_H
+
+#include <linux/skbuff.h>
+#include <linux/rcupdate.h>
+#include <linux/udp.h>
+#include <net/sock.h>
+#include <net/tcp.h>
+#include <net/psp/types.h>
+
+struct inet_timewait_sock;
+
+/* Driver-facing API */
+struct psp_dev *
+psp_dev_create(struct net_device *netdev, struct psp_dev_ops *psd_ops,
+ struct psp_dev_caps *psd_caps, void *priv_ptr);
+void psp_dev_unregister(struct psp_dev *psd);
+bool psp_dev_encapsulate(struct net *net, struct sk_buff *skb, __be32 spi,
+ u8 ver, __be16 sport);
+int psp_dev_rcv(struct sk_buff *skb, u16 dev_id, u8 generation, bool strip_icv);
+
+/* Kernel-facing API */
+void psp_assoc_put(struct psp_assoc *pas);
+
+static inline void *psp_assoc_drv_data(struct psp_assoc *pas)
+{
+ return pas->drv_data;
+}
+
+#if IS_ENABLED(CONFIG_INET_PSP)
+unsigned int psp_key_size(u32 version);
+void psp_sk_assoc_free(struct sock *sk);
+void psp_twsk_init(struct inet_timewait_sock *tw, const struct sock *sk);
+void psp_twsk_assoc_free(struct inet_timewait_sock *tw);
+void psp_reply_set_decrypted(struct sk_buff *skb);
+
+static inline struct psp_assoc *psp_sk_assoc(const struct sock *sk)
+{
+ return rcu_dereference_check(sk->psp_assoc, lockdep_sock_is_held(sk));
+}
+
+static inline void
+psp_enqueue_set_decrypted(struct sock *sk, struct sk_buff *skb)
+{
+ struct psp_assoc *pas;
+
+ pas = psp_sk_assoc(sk);
+ if (pas && pas->tx.spi)
+ skb->decrypted = 1;
+}
+
+static inline unsigned long
+__psp_skb_coalesce_diff(const struct sk_buff *one, const struct sk_buff *two,
+ unsigned long diffs)
+{
+ struct psp_skb_ext *a, *b;
+
+ a = skb_ext_find(one, SKB_EXT_PSP);
+ b = skb_ext_find(two, SKB_EXT_PSP);
+
+ diffs |= (!!a) ^ (!!b);
+ if (!diffs && unlikely(a))
+ diffs |= memcmp(a, b, sizeof(*a));
+ return diffs;
+}
+
+static inline bool
+psp_is_allowed_nondata(struct sk_buff *skb, struct psp_assoc *pas)
+{
+ bool fin = !!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN);
+ u32 end_seq = TCP_SKB_CB(skb)->end_seq;
+ u32 seq = TCP_SKB_CB(skb)->seq;
+ bool pure_fin;
+
+ pure_fin = fin && end_seq - seq == 1;
+
+ return seq == end_seq || (pure_fin && seq == pas->upgrade_seq);
+}
+
+static inline bool
+psp_pse_matches_pas(struct psp_skb_ext *pse, struct psp_assoc *pas)
+{
+ return pse && pas->rx.spi == pse->spi &&
+ pas->generation == pse->generation &&
+ pas->version == pse->version &&
+ pas->dev_id == pse->dev_id;
+}
+
+static inline enum skb_drop_reason
+__psp_sk_rx_policy_check(struct sk_buff *skb, struct psp_assoc *pas)
+{
+ struct psp_skb_ext *pse = skb_ext_find(skb, SKB_EXT_PSP);
+
+ if (!pas)
+ return pse ? SKB_DROP_REASON_PSP_INPUT : 0;
+
+ if (likely(psp_pse_matches_pas(pse, pas))) {
+ if (unlikely(!pas->peer_tx))
+ pas->peer_tx = 1;
+
+ return 0;
+ }
+
+ if (!pse) {
+ if (!pas->tx.spi ||
+ (!pas->peer_tx && psp_is_allowed_nondata(skb, pas)))
+ return 0;
+ }
+
+ return SKB_DROP_REASON_PSP_INPUT;
+}
+
+static inline enum skb_drop_reason
+psp_sk_rx_policy_check(struct sock *sk, struct sk_buff *skb)
+{
+ return __psp_sk_rx_policy_check(skb, psp_sk_assoc(sk));
+}
+
+static inline enum skb_drop_reason
+psp_twsk_rx_policy_check(struct inet_timewait_sock *tw, struct sk_buff *skb)
+{
+ return __psp_sk_rx_policy_check(skb, rcu_dereference(tw->psp_assoc));
+}
+
+static inline struct psp_assoc *psp_sk_get_assoc_rcu(const struct sock *sk)
+{
+ struct psp_assoc *pas;
+ int state;
+
+ state = READ_ONCE(sk->sk_state);
+ if (!sk_is_inet(sk) || state == TCP_NEW_SYN_RECV)
+ return NULL;
+
+ pas = state == TCP_TIME_WAIT ?
+ rcu_dereference(inet_twsk(sk)->psp_assoc) :
+ rcu_dereference(sk->psp_assoc);
+ return pas;
+}
+
+static inline struct psp_assoc *psp_skb_get_assoc_rcu(struct sk_buff *skb)
+{
+ if (!skb->decrypted || !skb->sk)
+ return NULL;
+
+ return psp_sk_get_assoc_rcu(skb->sk);
+}
+
+static inline unsigned int psp_sk_overhead(const struct sock *sk)
+{
+ int psp_encap = sizeof(struct udphdr) + PSP_HDR_SIZE + PSP_TRL_SIZE;
+ bool has_psp = rcu_access_pointer(sk->psp_assoc);
+
+ return has_psp ? psp_encap : 0;
+}
+#else
+static inline void psp_sk_assoc_free(struct sock *sk) { }
+static inline void
+psp_twsk_init(struct inet_timewait_sock *tw, const struct sock *sk) { }
+static inline void psp_twsk_assoc_free(struct inet_timewait_sock *tw) { }
+static inline void
+psp_reply_set_decrypted(struct sk_buff *skb) { }
+
+static inline struct psp_assoc *psp_sk_assoc(const struct sock *sk)
+{
+ return NULL;
+}
+
+static inline void
+psp_enqueue_set_decrypted(struct sock *sk, struct sk_buff *skb) { }
+
+static inline unsigned long
+__psp_skb_coalesce_diff(const struct sk_buff *one, const struct sk_buff *two,
+ unsigned long diffs)
+{
+ return diffs;
+}
+
+static inline enum skb_drop_reason
+psp_sk_rx_policy_check(struct sock *sk, struct sk_buff *skb)
+{
+ return 0;
+}
+
+static inline enum skb_drop_reason
+psp_twsk_rx_policy_check(struct inet_timewait_sock *tw, struct sk_buff *skb)
+{
+ return 0;
+}
+
+static inline struct psp_assoc *psp_skb_get_assoc_rcu(struct sk_buff *skb)
+{
+ return NULL;
+}
+
+static inline unsigned int psp_sk_overhead(const struct sock *sk)
+{
+ return 0;
+}
+#endif
+
+static inline unsigned long
+psp_skb_coalesce_diff(const struct sk_buff *one, const struct sk_buff *two)
+{
+ return __psp_skb_coalesce_diff(one, two, 0);
+}
+
+#endif /* __NET_PSP_HELPERS_H */
diff --git a/include/net/psp/types.h b/include/net/psp/types.h
new file mode 100644
index 000000000000..31cee64b7c86
--- /dev/null
+++ b/include/net/psp/types.h
@@ -0,0 +1,184 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef __NET_PSP_H
+#define __NET_PSP_H
+
+#include <linux/mutex.h>
+#include <linux/refcount.h>
+
+struct netlink_ext_ack;
+
+#define PSP_DEFAULT_UDP_PORT 1000
+
+struct psphdr {
+ u8 nexthdr;
+ u8 hdrlen;
+ u8 crypt_offset;
+ u8 verfl;
+ __be32 spi;
+ __be64 iv;
+ __be64 vc[]; /* optional */
+};
+
+#define PSP_ENCAP_HLEN (sizeof(struct udphdr) + sizeof(struct psphdr))
+
+#define PSP_SPI_KEY_ID GENMASK(30, 0)
+#define PSP_SPI_KEY_PHASE BIT(31)
+
+#define PSPHDR_CRYPT_OFFSET GENMASK(5, 0)
+
+#define PSPHDR_VERFL_SAMPLE BIT(7)
+#define PSPHDR_VERFL_DROP BIT(6)
+#define PSPHDR_VERFL_VERSION GENMASK(5, 2)
+#define PSPHDR_VERFL_VIRT BIT(1)
+#define PSPHDR_VERFL_ONE BIT(0)
+
+#define PSP_HDRLEN_NOOPT ((sizeof(struct psphdr) - 8) / 8)
+
+/**
+ * struct psp_dev_config - PSP device configuration
+ * @versions: PSP versions enabled on the device
+ */
+struct psp_dev_config {
+ u32 versions;
+};
+
+/**
+ * struct psp_dev - PSP device struct
+ * @main_netdev: original netdevice of this PSP device
+ * @ops: driver callbacks
+ * @caps: device capabilities
+ * @drv_priv: driver priv pointer
+ * @lock: instance lock, protects all fields
+ * @refcnt: reference count for the instance
+ * @id: instance id
+ * @generation: current generation of the device key
+ * @config: current device configuration
+ * @active_assocs: list of registered associations
+ * @prev_assocs: associations which use old (but still usable)
+ * device key
+ * @stale_assocs: associations which use a rotated out key
+ *
+ * @rcu: RCU head for freeing the structure
+ */
+struct psp_dev {
+ struct net_device *main_netdev;
+
+ struct psp_dev_ops *ops;
+ struct psp_dev_caps *caps;
+ void *drv_priv;
+
+ struct mutex lock;
+ refcount_t refcnt;
+
+ u32 id;
+
+ u8 generation;
+
+ struct psp_dev_config config;
+
+ struct list_head active_assocs;
+ struct list_head prev_assocs;
+ struct list_head stale_assocs;
+
+ struct rcu_head rcu;
+};
+
+#define PSP_GEN_VALID_MASK 0x7f
+
+/**
+ * struct psp_dev_caps - PSP device capabilities
+ */
+struct psp_dev_caps {
+ /**
+ * @versions: mask of supported PSP versions
+ * Set this field to 0 to indicate PSP is not supported at all.
+ */
+ u32 versions;
+
+ /**
+ * @assoc_drv_spc: size of driver-specific state in Tx assoc
+ * Determines the size of struct psp_assoc::drv_data
+ */
+ u32 assoc_drv_spc;
+};
+
+#define PSP_MAX_KEY 32
+
+#define PSP_HDR_SIZE 16 /* We don't support optional fields, yet */
+#define PSP_TRL_SIZE 16 /* AES-GCM/GMAC trailer size */
+
+struct psp_skb_ext {
+ __be32 spi;
+ u16 dev_id;
+ u8 generation;
+ u8 version;
+};
+
+struct psp_key_parsed {
+ __be32 spi;
+ u8 key[PSP_MAX_KEY];
+};
+
+struct psp_assoc {
+ struct psp_dev *psd;
+
+ u16 dev_id;
+ u8 generation;
+ u8 version;
+ u8 peer_tx;
+
+ u32 upgrade_seq;
+
+ struct psp_key_parsed tx;
+ struct psp_key_parsed rx;
+
+ refcount_t refcnt;
+ struct rcu_head rcu;
+ struct work_struct work;
+ struct list_head assocs_list;
+
+ u8 drv_data[] __aligned(8);
+};
+
+/**
+ * struct psp_dev_ops - netdev driver facing PSP callbacks
+ */
+struct psp_dev_ops {
+ /**
+ * @set_config: set configuration of a PSP device
+ * Driver can inspect @psd->config for the previous configuration.
+ * Core will update @psd->config with @config on success.
+ */
+ int (*set_config)(struct psp_dev *psd, struct psp_dev_config *conf,
+ struct netlink_ext_ack *extack);
+
+ /**
+ * @key_rotate: rotate the device key
+ */
+ int (*key_rotate)(struct psp_dev *psd, struct netlink_ext_ack *extack);
+
+ /**
+ * @rx_spi_alloc: allocate an Rx SPI+key pair
+ * Allocate an Rx SPI and resulting derived key.
+ * This key should remain valid until key rotation.
+ */
+ int (*rx_spi_alloc)(struct psp_dev *psd, u32 version,
+ struct psp_key_parsed *assoc,
+ struct netlink_ext_ack *extack);
+
+ /**
+ * @tx_key_add: add a Tx key to the device
+ * Install an association in the device. Core will allocate space
+ * for the driver to use at drv_data.
+ */
+ int (*tx_key_add)(struct psp_dev *psd, struct psp_assoc *pas,
+ struct netlink_ext_ack *extack);
+ /**
+ * @tx_key_del: remove a Tx key from the device
+ * Remove an association from the device.
+ */
+ void (*tx_key_del)(struct psp_dev *psd, struct psp_assoc *pas);
+};
+
+#endif /* __NET_PSP_H */
diff --git a/include/net/raw.h b/include/net/raw.h
index 32a61481a253..66c0ffeada2e 100644
--- a/include/net/raw.h
+++ b/include/net/raw.h
@@ -81,6 +81,7 @@ struct raw_sock {
struct inet_sock inet;
struct icmp_filter filter;
u32 ipmr_table;
+ struct numa_drop_counters drop_counters;
};
#define raw_sk(ptr) container_of_const(ptr, struct raw_sock, inet.sk)
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index 6a5ec1418e85..cd4d4cf71d0d 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -185,8 +185,8 @@ struct fastopen_queue {
struct request_sock_queue {
spinlock_t rskq_lock;
u8 rskq_defer_accept;
+ u8 synflood_warned;
- u32 synflood_warned;
atomic_t qlen;
atomic_t young;
diff --git a/include/net/rose.h b/include/net/rose.h
index 23267b4efcfa..2b5491bbf39a 100644
--- a/include/net/rose.h
+++ b/include/net/rose.h
@@ -8,6 +8,7 @@
#ifndef _ROSE_H
#define _ROSE_H
+#include <linux/refcount.h>
#include <linux/rose.h>
#include <net/ax25.h>
#include <net/sock.h>
@@ -96,7 +97,7 @@ struct rose_neigh {
ax25_cb *ax25;
struct net_device *dev;
unsigned short count;
- unsigned short use;
+ refcount_t use;
unsigned int number;
char restarted;
char dce_mode;
@@ -151,6 +152,21 @@ struct rose_sock {
#define rose_sk(sk) ((struct rose_sock *)(sk))
+static inline void rose_neigh_hold(struct rose_neigh *rose_neigh)
+{
+ refcount_inc(&rose_neigh->use);
+}
+
+static inline void rose_neigh_put(struct rose_neigh *rose_neigh)
+{
+ if (refcount_dec_and_test(&rose_neigh->use)) {
+ if (rose_neigh->ax25)
+ ax25_cb_put(rose_neigh->ax25);
+ kfree(rose_neigh->digipeat);
+ kfree(rose_neigh);
+ }
+}
+
/* af_rose.c */
extern ax25_address rose_callsign;
extern int sysctl_rose_restart_request_timeout;
diff --git a/include/net/route.h b/include/net/route.h
index 7ea840daa775..f90106f383c5 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -189,7 +189,7 @@ static inline struct rtable *ip_route_output(struct net *net, __be32 daddr,
{
struct flowi4 fl4 = {
.flowi4_oif = oif,
- .flowi4_tos = inet_dscp_to_dsfield(dscp),
+ .flowi4_dscp = dscp,
.flowi4_scope = scope,
.daddr = daddr,
.saddr = saddr,
@@ -390,7 +390,7 @@ static inline int ip4_dst_hoplimit(const struct dst_entry *dst)
const struct net *net;
rcu_read_lock();
- net = dev_net_rcu(dst_dev(dst));
+ net = dst_dev_net_rcu(dst);
hoplimit = READ_ONCE(net->ipv4.sysctl_ip_default_ttl);
rcu_read_unlock();
}
diff --git a/include/net/rps.h b/include/net/rps.h
index d8ab3a08bcc4..f1794cd2e7fb 100644
--- a/include/net/rps.h
+++ b/include/net/rps.h
@@ -25,13 +25,16 @@ struct rps_map {
/*
* The rps_dev_flow structure contains the mapping of a flow to a CPU, the
- * tail pointer for that CPU's input queue at the time of last enqueue, and
- * a hardware filter index.
+ * tail pointer for that CPU's input queue at the time of last enqueue, a
+ * hardware filter index, and the hash of the flow if aRFS is enabled.
*/
struct rps_dev_flow {
u16 cpu;
u16 filter;
unsigned int last_qtail;
+#ifdef CONFIG_RFS_ACCEL
+ u32 hash;
+#endif
};
#define RPS_NO_FILTER 0xffff
@@ -82,11 +85,8 @@ static inline void rps_record_sock_flow(struct rps_sock_flow_table *table,
WRITE_ONCE(table->ents[index], val);
}
-#endif /* CONFIG_RPS */
-
-static inline void sock_rps_record_flow_hash(__u32 hash)
+static inline void _sock_rps_record_flow_hash(__u32 hash)
{
-#ifdef CONFIG_RPS
struct rps_sock_flow_table *sock_flow_table;
if (!hash)
@@ -96,42 +96,33 @@ static inline void sock_rps_record_flow_hash(__u32 hash)
if (sock_flow_table)
rps_record_sock_flow(sock_flow_table, hash);
rcu_read_unlock();
-#endif
}
-static inline void sock_rps_record_flow(const struct sock *sk)
+static inline void _sock_rps_record_flow(const struct sock *sk)
{
-#ifdef CONFIG_RPS
- if (static_branch_unlikely(&rfs_needed)) {
- /* Reading sk->sk_rxhash might incur an expensive cache line
- * miss.
- *
- * TCP_ESTABLISHED does cover almost all states where RFS
- * might be useful, and is cheaper [1] than testing :
- * IPv4: inet_sk(sk)->inet_daddr
- * IPv6: ipv6_addr_any(&sk->sk_v6_daddr)
- * OR an additional socket flag
- * [1] : sk_state and sk_prot are in the same cache line.
+ /* Reading sk->sk_rxhash might incur an expensive cache line
+ * miss.
+ *
+ * TCP_ESTABLISHED does cover almost all states where RFS
+ * might be useful, and is cheaper [1] than testing :
+ * IPv4: inet_sk(sk)->inet_daddr
+ * IPv6: ipv6_addr_any(&sk->sk_v6_daddr)
+ * OR an additional socket flag
+ * [1] : sk_state and sk_prot are in the same cache line.
+ */
+ if (sk->sk_state == TCP_ESTABLISHED) {
+ /* This READ_ONCE() is paired with the WRITE_ONCE()
+ * from sock_rps_save_rxhash() and sock_rps_reset_rxhash().
*/
- if (sk->sk_state == TCP_ESTABLISHED) {
- /* This READ_ONCE() is paired with the WRITE_ONCE()
- * from sock_rps_save_rxhash() and sock_rps_reset_rxhash().
- */
- sock_rps_record_flow_hash(READ_ONCE(sk->sk_rxhash));
- }
+ _sock_rps_record_flow_hash(READ_ONCE(sk->sk_rxhash));
}
-#endif
}
-static inline void sock_rps_delete_flow(const struct sock *sk)
+static inline void _sock_rps_delete_flow(const struct sock *sk)
{
-#ifdef CONFIG_RPS
struct rps_sock_flow_table *table;
u32 hash, index;
- if (!static_branch_unlikely(&rfs_needed))
- return;
-
hash = READ_ONCE(sk->sk_rxhash);
if (!hash)
return;
@@ -144,6 +135,45 @@ static inline void sock_rps_delete_flow(const struct sock *sk)
WRITE_ONCE(table->ents[index], RPS_NO_CPU);
}
rcu_read_unlock();
+}
+#endif /* CONFIG_RPS */
+
+static inline bool rfs_is_needed(void)
+{
+#ifdef CONFIG_RPS
+ return static_branch_unlikely(&rfs_needed);
+#else
+ return false;
+#endif
+}
+
+static inline void sock_rps_record_flow_hash(__u32 hash)
+{
+#ifdef CONFIG_RPS
+ if (!rfs_is_needed())
+ return;
+
+ _sock_rps_record_flow_hash(hash);
+#endif
+}
+
+static inline void sock_rps_record_flow(const struct sock *sk)
+{
+#ifdef CONFIG_RPS
+ if (!rfs_is_needed())
+ return;
+
+ _sock_rps_record_flow(sk);
+#endif
+}
+
+static inline void sock_rps_delete_flow(const struct sock *sk)
+{
+#ifdef CONFIG_RPS
+ if (!rfs_is_needed())
+ return;
+
+ _sock_rps_delete_flow(sk);
#endif
}
diff --git a/include/net/sctp/auth.h b/include/net/sctp/auth.h
index d4b3b2dcd15b..3d5879e08e78 100644
--- a/include/net/sctp/auth.h
+++ b/include/net/sctp/auth.h
@@ -22,16 +22,11 @@ struct sctp_endpoint;
struct sctp_association;
struct sctp_authkey;
struct sctp_hmacalgo;
-struct crypto_shash;
-/*
- * Define a generic struct that will hold all the info
- * necessary for an HMAC transform
- */
+/* Defines an HMAC algorithm supported by SCTP chunk authentication */
struct sctp_hmac {
- __u16 hmac_id; /* one of the above ids */
- char *hmac_name; /* name for loading */
- __u16 hmac_len; /* length of the signature */
+ __u16 hmac_id; /* one of SCTP_AUTH_HMAC_ID_* */
+ __u16 hmac_len; /* length of the HMAC value in bytes */
};
/* This is generic structure that containst authentication bytes used
@@ -78,9 +73,9 @@ int sctp_auth_asoc_copy_shkeys(const struct sctp_endpoint *ep,
struct sctp_association *asoc,
gfp_t gfp);
int sctp_auth_init_hmacs(struct sctp_endpoint *ep, gfp_t gfp);
-void sctp_auth_destroy_hmacs(struct crypto_shash *auth_hmacs[]);
-struct sctp_hmac *sctp_auth_get_hmac(__u16 hmac_id);
-struct sctp_hmac *sctp_auth_asoc_get_hmac(const struct sctp_association *asoc);
+const struct sctp_hmac *sctp_auth_get_hmac(__u16 hmac_id);
+const struct sctp_hmac *
+sctp_auth_asoc_get_hmac(const struct sctp_association *asoc);
void sctp_auth_asoc_set_default_hmac(struct sctp_association *asoc,
struct sctp_hmac_algo_param *hmacs);
int sctp_auth_asoc_verify_hmac_id(const struct sctp_association *asoc,
diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h
index 5859e0a16a58..ae3376ba0b99 100644
--- a/include/net/sctp/constants.h
+++ b/include/net/sctp/constants.h
@@ -296,9 +296,8 @@ enum { SCTP_MAX_GABS = 16 };
*/
#define SCTP_DEFAULT_MINSEGMENT 512 /* MTU size ... if no mtu disc */
-#define SCTP_SECRET_SIZE 32 /* Number of octets in a 256 bits. */
-
-#define SCTP_SIGNATURE_SIZE 20 /* size of a SLA-1 signature */
+#define SCTP_COOKIE_KEY_SIZE 32 /* size of cookie HMAC key */
+#define SCTP_COOKIE_MAC_SIZE 32 /* size of HMAC field in cookies */
#define SCTP_COOKIE_MULTIPLE 32 /* Pad out our cookie to make our hash
* functions simpler to write.
@@ -417,16 +416,12 @@ enum {
SCTP_AUTH_HMAC_ID_RESERVED_0,
SCTP_AUTH_HMAC_ID_SHA1,
SCTP_AUTH_HMAC_ID_RESERVED_2,
-#if defined (CONFIG_CRYPTO_SHA256) || defined (CONFIG_CRYPTO_SHA256_MODULE)
SCTP_AUTH_HMAC_ID_SHA256,
-#endif
__SCTP_AUTH_HMAC_MAX
};
#define SCTP_AUTH_HMAC_ID_MAX __SCTP_AUTH_HMAC_MAX - 1
#define SCTP_AUTH_NUM_HMACS __SCTP_AUTH_HMAC_MAX
-#define SCTP_SHA1_SIG_SIZE 20
-#define SCTP_SHA256_SIG_SIZE 32
/* SCTP-AUTH, Section 3.2
* The chunk types for INIT, INIT-ACK, SHUTDOWN-COMPLETE and AUTH chunks
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 8a540ad9b509..2ae390219efd 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -32,6 +32,7 @@
#ifndef __sctp_structs_h__
#define __sctp_structs_h__
+#include <crypto/sha2.h>
#include <linux/ktime.h>
#include <linux/generic-radix-tree.h>
#include <linux/rhashtable-types.h>
@@ -68,7 +69,6 @@ struct sctp_outq;
struct sctp_bind_addr;
struct sctp_ulpq;
struct sctp_ep_common;
-struct crypto_shash;
struct sctp_stream;
@@ -155,10 +155,6 @@ struct sctp_sock {
/* PF_ family specific functions. */
struct sctp_pf *pf;
- /* Access to HMAC transform. */
- struct crypto_shash *hmac;
- char *sctp_hmac_alg;
-
/* What is our base endpointer? */
struct sctp_endpoint *ep;
@@ -227,7 +223,8 @@ struct sctp_sock {
frag_interleave:1,
recvrcvinfo:1,
recvnxtinfo:1,
- data_ready_signalled:1;
+ data_ready_signalled:1,
+ cookie_auth_enable:1;
atomic_t pd_mode;
@@ -335,7 +332,7 @@ struct sctp_cookie {
/* The format of our cookie that we send to our peer. */
struct sctp_signed_cookie {
- __u8 signature[SCTP_SECRET_SIZE];
+ __u8 mac[SCTP_COOKIE_MAC_SIZE];
__u32 __pad; /* force sctp_cookie alignment to 64 bits */
struct sctp_cookie c;
} __packed;
@@ -1307,33 +1304,15 @@ struct sctp_endpoint {
/* This is really a list of struct sctp_association entries. */
struct list_head asocs;
- /* Secret Key: A secret key used by this endpoint to compute
- * the MAC. This SHOULD be a cryptographic quality
- * random number with a sufficient length.
- * Discussion in [RFC1750] can be helpful in
- * selection of the key.
- */
- __u8 secret_key[SCTP_SECRET_SIZE];
-
- /* digest: This is a digest of the sctp cookie. This field is
- * only used on the receive path when we try to validate
- * that the cookie has not been tampered with. We put
- * this here so we pre-allocate this once and can re-use
- * on every receive.
- */
- __u8 *digest;
-
+ /* Cookie authentication key used by this endpoint */
+ struct hmac_sha256_key cookie_auth_key;
+
/* sendbuf acct. policy. */
__u32 sndbuf_policy;
/* rcvbuf acct. policy. */
__u32 rcvbuf_policy;
- /* SCTP AUTH: array of the HMACs that will be allocated
- * we need this per association so that we don't serialize
- */
- struct crypto_shash **auth_hmacs;
-
/* SCTP-AUTH: hmacs for the endpoint encoded into parameter */
struct sctp_hmac_algo_param *auth_hmacs_list;
diff --git a/include/net/seg6_hmac.h b/include/net/seg6_hmac.h
index 24f733b3e3fe..e9f41725933e 100644
--- a/include/net/seg6_hmac.h
+++ b/include/net/seg6_hmac.h
@@ -9,6 +9,8 @@
#ifndef _NET_SEG6_HMAC_H
#define _NET_SEG6_HMAC_H
+#include <crypto/sha1.h>
+#include <crypto/sha2.h>
#include <net/flow.h>
#include <net/ip6_fib.h>
#include <net/sock.h>
@@ -19,7 +21,6 @@
#include <linux/seg6_hmac.h>
#include <linux/rhashtable-types.h>
-#define SEG6_HMAC_MAX_DIGESTSIZE 160
#define SEG6_HMAC_RING_SIZE 256
struct seg6_hmac_info {
@@ -27,16 +28,15 @@ struct seg6_hmac_info {
struct rcu_head rcu;
u32 hmackeyid;
+ /* The raw key, kept only so it can be returned back to userspace */
char secret[SEG6_HMAC_SECRET_LEN];
u8 slen;
u8 alg_id;
-};
-
-struct seg6_hmac_algo {
- u8 alg_id;
- char name[64];
- struct crypto_shash * __percpu *tfms;
- struct shash_desc * __percpu *shashs;
+ /* The prepared key, which the calculations actually use */
+ union {
+ struct hmac_sha1_key sha1;
+ struct hmac_sha256_key sha256;
+ } key;
};
extern int seg6_hmac_compute(struct seg6_hmac_info *hinfo,
@@ -50,13 +50,9 @@ extern int seg6_push_hmac(struct net *net, struct in6_addr *saddr,
struct ipv6_sr_hdr *srh);
extern bool seg6_hmac_validate_skb(struct sk_buff *skb);
#ifdef CONFIG_IPV6_SEG6_HMAC
-extern int seg6_hmac_init(void);
-extern void seg6_hmac_exit(void);
extern int seg6_hmac_net_init(struct net *net);
extern void seg6_hmac_net_exit(struct net *net);
#else
-static inline int seg6_hmac_init(void) { return 0; }
-static inline void seg6_hmac_exit(void) {}
static inline int seg6_hmac_net_init(struct net *net) { return 0; }
static inline void seg6_hmac_net_exit(struct net *net) {}
#endif
diff --git a/include/net/smc.h b/include/net/smc.h
index db84e4e35080..08bee529ed8d 100644
--- a/include/net/smc.h
+++ b/include/net/smc.h
@@ -15,7 +15,7 @@
#include <linux/spinlock.h>
#include <linux/types.h>
#include <linux/wait.h>
-#include "linux/ism.h"
+#include <linux/dibs.h>
struct sock;
@@ -27,62 +27,15 @@ struct smc_hashinfo {
};
/* SMCD/ISM device driver interface */
-struct smcd_dmb {
- u64 dmb_tok;
- u64 rgid;
- u32 dmb_len;
- u32 sba_idx;
- u32 vlan_valid;
- u32 vlan_id;
- void *cpu_addr;
- dma_addr_t dma_addr;
-};
-
-#define ISM_EVENT_DMB 0
-#define ISM_EVENT_GID 1
-#define ISM_EVENT_SWR 2
-
#define ISM_RESERVED_VLANID 0x1FFF
-#define ISM_ERROR 0xFFFF
-
-struct smcd_dev;
-
struct smcd_gid {
u64 gid;
u64 gid_ext;
};
-struct smcd_ops {
- int (*query_remote_gid)(struct smcd_dev *dev, struct smcd_gid *rgid,
- u32 vid_valid, u32 vid);
- int (*register_dmb)(struct smcd_dev *dev, struct smcd_dmb *dmb,
- void *client);
- int (*unregister_dmb)(struct smcd_dev *dev, struct smcd_dmb *dmb);
- int (*move_data)(struct smcd_dev *dev, u64 dmb_tok, unsigned int idx,
- bool sf, unsigned int offset, void *data,
- unsigned int size);
- int (*supports_v2)(void);
- void (*get_local_gid)(struct smcd_dev *dev, struct smcd_gid *gid);
- u16 (*get_chid)(struct smcd_dev *dev);
- struct device* (*get_dev)(struct smcd_dev *dev);
-
- /* optional operations */
- int (*add_vlan_id)(struct smcd_dev *dev, u64 vlan_id);
- int (*del_vlan_id)(struct smcd_dev *dev, u64 vlan_id);
- int (*set_vlan_required)(struct smcd_dev *dev);
- int (*reset_vlan_required)(struct smcd_dev *dev);
- int (*signal_event)(struct smcd_dev *dev, struct smcd_gid *rgid,
- u32 trigger_irq, u32 event_code, u64 info);
- int (*support_dmb_nocopy)(struct smcd_dev *dev);
- int (*attach_dmb)(struct smcd_dev *dev, struct smcd_dmb *dmb);
- int (*detach_dmb)(struct smcd_dev *dev, u64 token);
-};
-
struct smcd_dev {
- const struct smcd_ops *ops;
- void *priv;
- void *client;
+ struct dibs_dev *dibs;
struct list_head list;
spinlock_t lock;
struct smc_connection **conn;
diff --git a/include/net/snmp.h b/include/net/snmp.h
index 4cb4326dfebe..584e70742e9b 100644
--- a/include/net/snmp.h
+++ b/include/net/snmp.h
@@ -36,11 +36,6 @@ struct snmp_mib {
.entry = _entry, \
}
-#define SNMP_MIB_SENTINEL { \
- .name = NULL, \
- .entry = 0, \
-}
-
/*
* We use unsigned longs for most mibs but u64 for ipstats.
*/
diff --git a/include/net/sock.h b/include/net/sock.h
index c8a4b283df6f..60bcb13f045c 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -249,6 +249,7 @@ struct sk_filter;
* @sk_dst_cache: destination cache
* @sk_dst_pending_confirm: need to confirm neighbour
* @sk_policy: flow policy
+ * @psp_assoc: PSP association, if socket is PSP-secured
* @sk_receive_queue: incoming packets
* @sk_wmem_alloc: transmit queue bytes committed
* @sk_tsq_flags: TCP Small Queues flags
@@ -282,9 +283,11 @@ struct sk_filter;
* @sk_err_soft: errors that don't cause failure but are the cause of a
* persistent failure not just 'timed out'
* @sk_drops: raw/udp drops counter
+ * @sk_drop_counters: optional pointer to numa_drop_counters
* @sk_ack_backlog: current listen backlog
* @sk_max_ack_backlog: listen backlog set in listen()
* @sk_uid: user id of owner
+ * @sk_ino: inode number (zero if orphaned)
* @sk_prefer_busy_poll: prefer busypolling over softirq processing
* @sk_busy_poll_budget: napi processing budget when busypolling
* @sk_priority: %SO_PRIORITY setting
@@ -443,10 +446,15 @@ struct sock {
__cacheline_group_begin(sock_read_rxtx);
int sk_err;
struct socket *sk_socket;
+#ifdef CONFIG_MEMCG
struct mem_cgroup *sk_memcg;
+#endif
#ifdef CONFIG_XFRM
struct xfrm_policy __rcu *sk_policy[2];
#endif
+#if IS_ENABLED(CONFIG_INET_PSP)
+ struct psp_assoc __rcu *psp_assoc;
+#endif
__cacheline_group_end(sock_read_rxtx);
__cacheline_group_begin(sock_write_rxtx);
@@ -459,7 +467,7 @@ struct sock {
__cacheline_group_begin(sock_write_tx);
int sk_write_pending;
atomic_t sk_omem_alloc;
- int sk_sndbuf;
+ int sk_err_soft;
int sk_wmem_queued;
refcount_t sk_wmem_alloc;
@@ -484,6 +492,9 @@ struct sock {
long sk_sndtimeo;
u32 sk_priority;
u32 sk_mark;
+ kuid_t sk_uid;
+ u16 sk_protocol;
+ u16 sk_type;
struct dst_entry __rcu *sk_dst_cache;
netdev_features_t sk_route_caps;
#ifdef CONFIG_SOCK_VALIDATE_XMIT
@@ -496,6 +507,7 @@ struct sock {
unsigned int sk_gso_max_size;
gfp_t sk_allocation;
u32 sk_txhash;
+ int sk_sndbuf;
u8 sk_pacing_shift;
bool sk_use_task_frag;
__cacheline_group_end(sock_read_tx);
@@ -509,15 +521,12 @@ struct sock {
sk_no_check_tx : 1,
sk_no_check_rx : 1;
u8 sk_shutdown;
- u16 sk_type;
- u16 sk_protocol;
unsigned long sk_lingertime;
struct proto *sk_prot_creator;
rwlock_t sk_callback_lock;
- int sk_err_soft;
u32 sk_ack_backlog;
u32 sk_max_ack_backlog;
- kuid_t sk_uid;
+ unsigned long sk_ino;
spinlock_t sk_peer_lock;
int sk_bind_phc;
struct pid *sk_peer_pid;
@@ -562,6 +571,7 @@ struct sock {
#ifdef CONFIG_BPF_SYSCALL
struct bpf_local_storage __rcu *sk_bpf_storage;
#endif
+ struct numa_drop_counters *sk_drop_counters;
struct rcu_head sk_rcu;
netns_tracker ns_tracker;
struct xarray sk_user_frags;
@@ -1344,8 +1354,6 @@ struct proto {
unsigned int useroffset; /* Usercopy region offset */
unsigned int usersize; /* Usercopy region size */
- unsigned int __percpu *orphan_count;
-
struct request_sock_ops *rsk_prot;
struct timewait_sock_ops *twsk_prot;
@@ -1486,6 +1494,10 @@ static inline int __sk_prot_rehash(struct sock *sk)
#define SOCK_BINDADDR_LOCK 4
#define SOCK_BINDPORT_LOCK 8
+/**
+ * define SOCK_CONNECT_BIND - &sock->sk_userlocks flag for auto-bind at connect() time
+ */
+#define SOCK_CONNECT_BIND 16
struct socket_alloc {
struct socket socket;
@@ -2056,6 +2068,13 @@ static inline int sk_rx_queue_get(const struct sock *sk)
static inline void sk_set_socket(struct sock *sk, struct socket *sock)
{
sk->sk_socket = sock;
+ if (sock) {
+ WRITE_ONCE(sk->sk_uid, SOCK_INODE(sock)->i_uid);
+ WRITE_ONCE(sk->sk_ino, SOCK_INODE(sock)->i_ino);
+ } else {
+ /* Note: sk_uid is unchanged. */
+ WRITE_ONCE(sk->sk_ino, 0);
+ }
}
static inline wait_queue_head_t *sk_sleep(struct sock *sk)
@@ -2076,7 +2095,6 @@ static inline void sock_orphan(struct sock *sk)
sock_set_flag(sk, SOCK_DEAD);
sk_set_socket(sk, NULL);
sk->sk_wq = NULL;
- /* Note: sk_uid is unchanged. */
write_unlock_bh(&sk->sk_callback_lock);
}
@@ -2087,20 +2105,22 @@ static inline void sock_graft(struct sock *sk, struct socket *parent)
rcu_assign_pointer(sk->sk_wq, &parent->wq);
parent->sk = sk;
sk_set_socket(sk, parent);
- WRITE_ONCE(sk->sk_uid, SOCK_INODE(parent)->i_uid);
security_sock_graft(sk, parent);
write_unlock_bh(&sk->sk_callback_lock);
}
+static inline unsigned long sock_i_ino(const struct sock *sk)
+{
+ /* Paired with WRITE_ONCE() in sock_graft() and sock_orphan() */
+ return READ_ONCE(sk->sk_ino);
+}
+
static inline kuid_t sk_uid(const struct sock *sk)
{
/* Paired with WRITE_ONCE() in sockfs_setattr() */
return READ_ONCE(sk->sk_uid);
}
-unsigned long __sock_i_ino(struct sock *sk);
-unsigned long sock_i_ino(struct sock *sk);
-
static inline kuid_t sock_net_uid(const struct net *net, const struct sock *sk)
{
return sk ? sk_uid(sk) : make_kuid(net->user_ns, 0);
@@ -2594,6 +2614,50 @@ static inline gfp_t gfp_memcg_charge(void)
return in_softirq() ? GFP_ATOMIC : GFP_KERNEL;
}
+#ifdef CONFIG_MEMCG
+static inline struct mem_cgroup *mem_cgroup_from_sk(const struct sock *sk)
+{
+ return sk->sk_memcg;
+}
+
+static inline bool mem_cgroup_sk_enabled(const struct sock *sk)
+{
+ return mem_cgroup_sockets_enabled && mem_cgroup_from_sk(sk);
+}
+
+static inline bool mem_cgroup_sk_under_memory_pressure(const struct sock *sk)
+{
+ struct mem_cgroup *memcg = mem_cgroup_from_sk(sk);
+
+#ifdef CONFIG_MEMCG_V1
+ if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
+ return !!memcg->tcpmem_pressure;
+#endif /* CONFIG_MEMCG_V1 */
+
+ do {
+ if (time_before64(get_jiffies_64(), mem_cgroup_get_socket_pressure(memcg)))
+ return true;
+ } while ((memcg = parent_mem_cgroup(memcg)));
+
+ return false;
+}
+#else
+static inline struct mem_cgroup *mem_cgroup_from_sk(const struct sock *sk)
+{
+ return NULL;
+}
+
+static inline bool mem_cgroup_sk_enabled(const struct sock *sk)
+{
+ return false;
+}
+
+static inline bool mem_cgroup_sk_under_memory_pressure(const struct sock *sk)
+{
+ return false;
+}
+#endif
+
static inline long sock_rcvtimeo(const struct sock *sk, bool noblock)
{
return noblock ? 0 : READ_ONCE(sk->sk_rcvtimeo);
@@ -2636,18 +2700,53 @@ struct sock_skb_cb {
#define sock_skb_cb_check_size(size) \
BUILD_BUG_ON((size) > SOCK_SKB_CB_OFFSET)
+static inline void sk_drops_add(struct sock *sk, int segs)
+{
+ struct numa_drop_counters *ndc = sk->sk_drop_counters;
+
+ if (ndc)
+ numa_drop_add(ndc, segs);
+ else
+ atomic_add(segs, &sk->sk_drops);
+}
+
+static inline void sk_drops_inc(struct sock *sk)
+{
+ sk_drops_add(sk, 1);
+}
+
+static inline int sk_drops_read(const struct sock *sk)
+{
+ const struct numa_drop_counters *ndc = sk->sk_drop_counters;
+
+ if (ndc) {
+ DEBUG_NET_WARN_ON_ONCE(atomic_read(&sk->sk_drops));
+ return numa_drop_read(ndc);
+ }
+ return atomic_read(&sk->sk_drops);
+}
+
+static inline void sk_drops_reset(struct sock *sk)
+{
+ struct numa_drop_counters *ndc = sk->sk_drop_counters;
+
+ if (ndc)
+ numa_drop_reset(ndc);
+ atomic_set(&sk->sk_drops, 0);
+}
+
static inline void
sock_skb_set_dropcount(const struct sock *sk, struct sk_buff *skb)
{
SOCK_SKB_CB(skb)->dropcount = sock_flag(sk, SOCK_RXQ_OVFL) ?
- atomic_read(&sk->sk_drops) : 0;
+ sk_drops_read(sk) : 0;
}
-static inline void sk_drops_add(struct sock *sk, const struct sk_buff *skb)
+static inline void sk_drops_skbadd(struct sock *sk, const struct sk_buff *skb)
{
int segs = max_t(u16, 1, skb_shinfo(skb)->gso_segs);
- atomic_add(segs, &sk->sk_drops);
+ sk_drops_add(sk, segs);
}
static inline ktime_t sock_read_timestamp(struct sock *sk)
@@ -2866,28 +2965,6 @@ sk_requests_wifi_status(struct sock *sk)
return sk && sk_fullsock(sk) && sock_flag(sk, SOCK_WIFI_STATUS);
}
-/* Checks if this SKB belongs to an HW offloaded socket
- * and whether any SW fallbacks are required based on dev.
- * Check decrypted mark in case skb_orphan() cleared socket.
- */
-static inline struct sk_buff *sk_validate_xmit_skb(struct sk_buff *skb,
- struct net_device *dev)
-{
-#ifdef CONFIG_SOCK_VALIDATE_XMIT
- struct sock *sk = skb->sk;
-
- if (sk && sk_fullsock(sk) && sk->sk_validate_xmit_skb) {
- skb = sk->sk_validate_xmit_skb(sk, dev, skb);
- } else if (unlikely(skb_is_decrypted(skb))) {
- pr_warn_ratelimited("unencrypted skb with no associated socket - dropping\n");
- kfree_skb(skb);
- skb = NULL;
- }
-#endif
-
- return skb;
-}
-
/* This helper checks if a socket is a LISTEN or NEW_SYN_RECV
* SYNACK messages can be attached to either ones (depending on SYNCOOKIE)
*/
@@ -2924,8 +3001,8 @@ void sk_get_meminfo(const struct sock *sk, u32 *meminfo);
*/
#define _SK_MEM_PACKETS 256
#define _SK_MEM_OVERHEAD SKB_TRUESIZE(256)
-#define SK_WMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
-#define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
+#define SK_WMEM_DEFAULT (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
+#define SK_RMEM_DEFAULT (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
extern __u32 sysctl_wmem_max;
extern __u32 sysctl_rmem_max;
diff --git a/include/net/tc_act/tc_skbmod.h b/include/net/tc_act/tc_skbmod.h
index 7c240d2fed4e..626704cd6241 100644
--- a/include/net/tc_act/tc_skbmod.h
+++ b/include/net/tc_act/tc_skbmod.h
@@ -12,6 +12,7 @@
struct tcf_skbmod_params {
struct rcu_head rcu;
u64 flags; /*up to 64 types of operations; extend if needed */
+ int action;
u8 eth_dst[ETH_ALEN];
u16 eth_type;
u8 eth_src[ETH_ALEN];
diff --git a/include/net/tc_act/tc_tunnel_key.h b/include/net/tc_act/tc_tunnel_key.h
index 879fe8cff581..0f1925f97520 100644
--- a/include/net/tc_act/tc_tunnel_key.h
+++ b/include/net/tc_act/tc_tunnel_key.h
@@ -14,6 +14,7 @@
struct tcf_tunnel_key_params {
struct rcu_head rcu;
int tcft_action;
+ int action;
struct metadata_dst *tcft_enc_metadata;
};
diff --git a/include/net/tc_act/tc_vlan.h b/include/net/tc_act/tc_vlan.h
index 3f5e9242b5e8..beadee41669a 100644
--- a/include/net/tc_act/tc_vlan.h
+++ b/include/net/tc_act/tc_vlan.h
@@ -10,6 +10,7 @@
#include <linux/tc_act/tc_vlan.h>
struct tcf_vlan_params {
+ int action;
int tcfv_action;
unsigned char tcfv_push_dst[ETH_ALEN];
unsigned char tcfv_push_src[ETH_ALEN];
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 526a26e7a150..5ca230ed526a 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -54,6 +54,16 @@ extern struct inet_hashinfo tcp_hashinfo;
DECLARE_PER_CPU(unsigned int, tcp_orphan_count);
int tcp_orphan_count_sum(void);
+static inline void tcp_orphan_count_inc(void)
+{
+ this_cpu_inc(tcp_orphan_count);
+}
+
+static inline void tcp_orphan_count_dec(void)
+{
+ this_cpu_dec(tcp_orphan_count);
+}
+
DECLARE_PER_CPU(u32, tcp_tw_isn);
void tcp_time_wait(struct sock *sk, int state, int timeo);
@@ -90,6 +100,9 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
/* Maximal number of window scale according to RFC1323 */
#define TCP_MAX_WSCALE 14U
+/* Default sending frequency of accurate ECN option per RTT */
+#define TCP_ACCECN_OPTION_BEACON 3
+
/* urg_data states */
#define TCP_URG_VALID 0x0100
#define TCP_URG_NOTYET 0x0200
@@ -203,6 +216,8 @@ static_assert((1 << ATO_BITS) > TCP_DELACK_MAX);
#define TCPOPT_AO 29 /* Authentication Option (RFC5925) */
#define TCPOPT_MPTCP 30 /* Multipath TCP (RFC6824) */
#define TCPOPT_FASTOPEN 34 /* Fast open (RFC7413) */
+#define TCPOPT_ACCECN0 172 /* 0xAC: Accurate ECN Order 0 */
+#define TCPOPT_ACCECN1 174 /* 0xAE: Accurate ECN Order 1 */
#define TCPOPT_EXP 254 /* Experimental */
/* Magic number to be after the option value for sharing TCP
* experimental options. See draft-ietf-tcpm-experimental-options-00.txt
@@ -220,6 +235,7 @@ static_assert((1 << ATO_BITS) > TCP_DELACK_MAX);
#define TCPOLEN_TIMESTAMP 10
#define TCPOLEN_MD5SIG 18
#define TCPOLEN_FASTOPEN_BASE 2
+#define TCPOLEN_ACCECN_BASE 2
#define TCPOLEN_EXP_FASTOPEN_BASE 4
#define TCPOLEN_EXP_SMC_BASE 6
@@ -233,6 +249,14 @@ static_assert((1 << ATO_BITS) > TCP_DELACK_MAX);
#define TCPOLEN_MD5SIG_ALIGNED 20
#define TCPOLEN_MSS_ALIGNED 4
#define TCPOLEN_EXP_SMC_BASE_ALIGNED 8
+#define TCPOLEN_ACCECN_PERFIELD 3
+
+/* Maximum number of byte counters in AccECN option + size */
+#define TCP_ACCECN_NUMFIELDS 3
+#define TCP_ACCECN_MAXSIZE (TCPOLEN_ACCECN_BASE + \
+ TCPOLEN_ACCECN_PERFIELD * \
+ TCP_ACCECN_NUMFIELDS)
+#define TCP_ACCECN_SAFETY_SHIFT 1 /* SAFETY_FACTOR in accecn draft */
/* Flags in tp->nonagle */
#define TCP_NAGLE_OFF 1 /* Nagle's algo is disabled */
@@ -275,8 +299,8 @@ extern unsigned long tcp_memory_pressure;
/* optimized version of sk_under_memory_pressure() for TCP sockets */
static inline bool tcp_under_memory_pressure(const struct sock *sk)
{
- if (mem_cgroup_sockets_enabled && sk->sk_memcg &&
- mem_cgroup_under_socket_pressure(sk->sk_memcg))
+ if (mem_cgroup_sk_enabled(sk) &&
+ mem_cgroup_sk_under_memory_pressure(sk))
return true;
return READ_ONCE(tcp_memory_pressure);
@@ -346,6 +370,7 @@ void tcp_delack_timer_handler(struct sock *sk);
int tcp_ioctl(struct sock *sk, int cmd, int *karg);
enum skb_drop_reason tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb);
void tcp_rcv_established(struct sock *sk, struct sk_buff *skb);
+void tcp_rcvbuf_grow(struct sock *sk);
void tcp_rcv_space_adjust(struct sock *sk);
int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp);
void tcp_twsk_destructor(struct sock *sk);
@@ -811,33 +836,6 @@ static inline u32 __tcp_set_rto(const struct tcp_sock *tp)
return usecs_to_jiffies((tp->srtt_us >> 3) + tp->rttvar_us);
}
-static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd)
-{
- /* mptcp hooks are only on the slow path */
- if (sk_is_mptcp((struct sock *)tp))
- return;
-
- tp->pred_flags = htonl((tp->tcp_header_len << 26) |
- ntohl(TCP_FLAG_ACK) |
- snd_wnd);
-}
-
-static inline void tcp_fast_path_on(struct tcp_sock *tp)
-{
- __tcp_fast_path_on(tp, tp->snd_wnd >> tp->rx_opt.snd_wscale);
-}
-
-static inline void tcp_fast_path_check(struct sock *sk)
-{
- struct tcp_sock *tp = tcp_sk(sk);
-
- if (RB_EMPTY_ROOT(&tp->out_of_order_queue) &&
- tp->rcv_wnd &&
- atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf &&
- !tp->urg_data)
- tcp_fast_path_on(tp);
-}
-
u32 tcp_delack_max(const struct sock *sk);
/* Compute the actual rto_min value */
@@ -989,6 +987,18 @@ static inline u32 tcp_rsk_tsval(const struct tcp_request_sock *treq)
#define TCPHDR_ACE (TCPHDR_ECE | TCPHDR_CWR | TCPHDR_AE)
#define TCPHDR_SYN_ECN (TCPHDR_SYN | TCPHDR_ECE | TCPHDR_CWR)
+#define TCPHDR_SYNACK_ACCECN (TCPHDR_SYN | TCPHDR_ACK | TCPHDR_CWR)
+
+#define TCP_ACCECN_CEP_ACE_MASK 0x7
+#define TCP_ACCECN_ACE_MAX_DELTA 6
+
+/* To avoid/detect middlebox interference, not all counters start at 0.
+ * See draft-ietf-tcpm-accurate-ecn for the latest values.
+ */
+#define TCP_ACCECN_CEP_INIT_OFFSET 5
+#define TCP_ACCECN_E1B_INIT_OFFSET 1
+#define TCP_ACCECN_E0B_INIT_OFFSET 1
+#define TCP_ACCECN_CEB_INIT_OFFSET 0
/* State flags for sacked in struct tcp_skb_cb */
enum tcp_skb_cb_sacked_flags {
@@ -1797,6 +1807,40 @@ static inline bool tcp_paws_reject(const struct tcp_options_received *rx_opt,
return true;
}
+static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd)
+{
+ u32 ace;
+
+ /* mptcp hooks are only on the slow path */
+ if (sk_is_mptcp((struct sock *)tp))
+ return;
+
+ ace = tcp_ecn_mode_accecn(tp) ?
+ ((tp->delivered_ce + TCP_ACCECN_CEP_INIT_OFFSET) &
+ TCP_ACCECN_CEP_ACE_MASK) : 0;
+
+ tp->pred_flags = htonl((tp->tcp_header_len << 26) |
+ (ace << 22) |
+ ntohl(TCP_FLAG_ACK) |
+ snd_wnd);
+}
+
+static inline void tcp_fast_path_on(struct tcp_sock *tp)
+{
+ __tcp_fast_path_on(tp, tp->snd_wnd >> tp->rx_opt.snd_wscale);
+}
+
+static inline void tcp_fast_path_check(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ if (RB_EMPTY_ROOT(&tp->out_of_order_queue) &&
+ tp->rcv_wnd &&
+ atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf &&
+ !tp->urg_data)
+ tcp_fast_path_on(tp);
+}
+
bool tcp_oow_rate_limited(struct net *net, const struct sk_buff *skb,
int mib_idx, u32 *last_oow_ack_time);
@@ -1931,6 +1975,7 @@ tcp_md5_do_lookup_any_l3index(const struct sock *sk,
}
#define tcp_twsk_md5_key(twsk) ((twsk)->tw_md5_key)
+void tcp_md5_destruct_sock(struct sock *sk);
#else
static inline struct tcp_md5sig_key *
tcp_md5_do_lookup(const struct sock *sk, int l3index,
@@ -1947,6 +1992,9 @@ tcp_md5_do_lookup_any_l3index(const struct sock *sk,
}
#define tcp_twsk_md5_key(twsk) NULL
+static inline void tcp_md5_destruct_sock(struct sock *sk)
+{
+}
#endif
int tcp_md5_alloc_sigpool(void);
@@ -2612,7 +2660,7 @@ static inline void tcp_segs_in(struct tcp_sock *tp, const struct sk_buff *skb)
*/
static inline void tcp_listendrop(const struct sock *sk)
{
- atomic_inc(&((struct sock *)sk)->sk_drops);
+ sk_drops_inc((struct sock *)sk);
__NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENDROPS);
}
diff --git a/include/net/tcp_ao.h b/include/net/tcp_ao.h
index df655ce6987d..1e9e27d6e06b 100644
--- a/include/net/tcp_ao.h
+++ b/include/net/tcp_ao.h
@@ -130,7 +130,6 @@ struct tcp_ao_info {
u32 snd_sne;
u32 rcv_sne;
refcount_t refcnt; /* Protects twsk destruction */
- struct rcu_head rcu;
};
#ifdef CONFIG_TCP_MD5SIG
diff --git a/include/net/tcp_ecn.h b/include/net/tcp_ecn.h
new file mode 100644
index 000000000000..f13e5cd2b1ac
--- /dev/null
+++ b/include/net/tcp_ecn.h
@@ -0,0 +1,642 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _TCP_ECN_H
+#define _TCP_ECN_H
+
+#include <linux/tcp.h>
+#include <linux/skbuff.h>
+#include <linux/bitfield.h>
+
+#include <net/inet_connection_sock.h>
+#include <net/sock.h>
+#include <net/tcp.h>
+#include <net/inet_ecn.h>
+
+/* The highest ECN variant (Accurate ECN, ECN, or no ECN) that is
+ * attemped to be negotiated and requested for incoming connection
+ * and outgoing connection, respectively.
+ */
+enum tcp_ecn_mode {
+ TCP_ECN_IN_NOECN_OUT_NOECN = 0,
+ TCP_ECN_IN_ECN_OUT_ECN = 1,
+ TCP_ECN_IN_ECN_OUT_NOECN = 2,
+ TCP_ECN_IN_ACCECN_OUT_ACCECN = 3,
+ TCP_ECN_IN_ACCECN_OUT_ECN = 4,
+ TCP_ECN_IN_ACCECN_OUT_NOECN = 5,
+};
+
+/* AccECN option sending when AccECN has been successfully negotiated */
+enum tcp_accecn_option {
+ TCP_ACCECN_OPTION_DISABLED = 0,
+ TCP_ACCECN_OPTION_MINIMUM = 1,
+ TCP_ACCECN_OPTION_FULL = 2,
+};
+
+static inline void tcp_ecn_queue_cwr(struct tcp_sock *tp)
+{
+ /* Do not set CWR if in AccECN mode! */
+ if (tcp_ecn_mode_rfc3168(tp))
+ tp->ecn_flags |= TCP_ECN_QUEUE_CWR;
+}
+
+static inline void tcp_ecn_accept_cwr(struct sock *sk,
+ const struct sk_buff *skb)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ if (tcp_ecn_mode_rfc3168(tp) && tcp_hdr(skb)->cwr) {
+ tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
+
+ /* If the sender is telling us it has entered CWR, then its
+ * cwnd may be very low (even just 1 packet), so we should ACK
+ * immediately.
+ */
+ if (TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq)
+ inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
+ }
+}
+
+static inline void tcp_ecn_withdraw_cwr(struct tcp_sock *tp)
+{
+ tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR;
+}
+
+/* tp->accecn_fail_mode */
+#define TCP_ACCECN_ACE_FAIL_SEND BIT(0)
+#define TCP_ACCECN_ACE_FAIL_RECV BIT(1)
+#define TCP_ACCECN_OPT_FAIL_SEND BIT(2)
+#define TCP_ACCECN_OPT_FAIL_RECV BIT(3)
+
+static inline bool tcp_accecn_ace_fail_send(const struct tcp_sock *tp)
+{
+ return tp->accecn_fail_mode & TCP_ACCECN_ACE_FAIL_SEND;
+}
+
+static inline bool tcp_accecn_ace_fail_recv(const struct tcp_sock *tp)
+{
+ return tp->accecn_fail_mode & TCP_ACCECN_ACE_FAIL_RECV;
+}
+
+static inline bool tcp_accecn_opt_fail_send(const struct tcp_sock *tp)
+{
+ return tp->accecn_fail_mode & TCP_ACCECN_OPT_FAIL_SEND;
+}
+
+static inline bool tcp_accecn_opt_fail_recv(const struct tcp_sock *tp)
+{
+ return tp->accecn_fail_mode & TCP_ACCECN_OPT_FAIL_RECV;
+}
+
+static inline void tcp_accecn_fail_mode_set(struct tcp_sock *tp, u8 mode)
+{
+ tp->accecn_fail_mode |= mode;
+}
+
+#define TCP_ACCECN_OPT_NOT_SEEN 0x0
+#define TCP_ACCECN_OPT_EMPTY_SEEN 0x1
+#define TCP_ACCECN_OPT_COUNTER_SEEN 0x2
+#define TCP_ACCECN_OPT_FAIL_SEEN 0x3
+
+static inline u8 tcp_accecn_ace(const struct tcphdr *th)
+{
+ return (th->ae << 2) | (th->cwr << 1) | th->ece;
+}
+
+/* Infer the ECT value our SYN arrived with from the echoed ACE field */
+static inline int tcp_accecn_extract_syn_ect(u8 ace)
+{
+ /* Below is an excerpt from the 1st block of Table 2 of AccECN spec */
+ static const int ace_to_ecn[8] = {
+ INET_ECN_ECT_0, /* 0b000 (Undefined) */
+ INET_ECN_ECT_1, /* 0b001 (Undefined) */
+ INET_ECN_NOT_ECT, /* 0b010 (Not-ECT is received) */
+ INET_ECN_ECT_1, /* 0b011 (ECT-1 is received) */
+ INET_ECN_ECT_0, /* 0b100 (ECT-0 is received) */
+ INET_ECN_ECT_1, /* 0b101 (Reserved) */
+ INET_ECN_CE, /* 0b110 (CE is received) */
+ INET_ECN_ECT_1 /* 0b111 (Undefined) */
+ };
+
+ return ace_to_ecn[ace & 0x7];
+}
+
+/* Check ECN field transition to detect invalid transitions */
+static inline bool tcp_ect_transition_valid(u8 snt, u8 rcv)
+{
+ if (rcv == snt)
+ return true;
+
+ /* Non-ECT altered to something or something became non-ECT */
+ if (snt == INET_ECN_NOT_ECT || rcv == INET_ECN_NOT_ECT)
+ return false;
+ /* CE -> ECT(0/1)? */
+ if (snt == INET_ECN_CE)
+ return false;
+ return true;
+}
+
+static inline bool tcp_accecn_validate_syn_feedback(struct sock *sk, u8 ace,
+ u8 sent_ect)
+{
+ u8 ect = tcp_accecn_extract_syn_ect(ace);
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn_fallback))
+ return true;
+
+ if (!tcp_ect_transition_valid(sent_ect, ect)) {
+ tcp_accecn_fail_mode_set(tp, TCP_ACCECN_ACE_FAIL_RECV);
+ return false;
+ }
+
+ return true;
+}
+
+static inline void tcp_accecn_saw_opt_fail_recv(struct tcp_sock *tp,
+ u8 saw_opt)
+{
+ tp->saw_accecn_opt = saw_opt;
+ if (tp->saw_accecn_opt == TCP_ACCECN_OPT_FAIL_SEEN)
+ tcp_accecn_fail_mode_set(tp, TCP_ACCECN_OPT_FAIL_RECV);
+}
+
+/* Validate the 3rd ACK based on the ACE field, see Table 4 of AccECN spec */
+static inline void tcp_accecn_third_ack(struct sock *sk,
+ const struct sk_buff *skb, u8 sent_ect)
+{
+ u8 ace = tcp_accecn_ace(tcp_hdr(skb));
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ switch (ace) {
+ case 0x0:
+ /* Invalid value */
+ tcp_accecn_fail_mode_set(tp, TCP_ACCECN_ACE_FAIL_RECV);
+ break;
+ case 0x7:
+ case 0x5:
+ case 0x1:
+ /* Unused but legal values */
+ break;
+ default:
+ /* Validation only applies to first non-data packet */
+ if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq &&
+ !TCP_SKB_CB(skb)->sacked &&
+ tcp_accecn_validate_syn_feedback(sk, ace, sent_ect)) {
+ if ((tcp_accecn_extract_syn_ect(ace) == INET_ECN_CE) &&
+ !tp->delivered_ce)
+ tp->delivered_ce++;
+ }
+ break;
+ }
+}
+
+/* Demand the minimum # to send AccECN optnio */
+static inline void tcp_accecn_opt_demand_min(struct sock *sk,
+ u8 opt_demand_min)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ u8 opt_demand;
+
+ opt_demand = max_t(u8, opt_demand_min, tp->accecn_opt_demand);
+ tp->accecn_opt_demand = opt_demand;
+}
+
+/* Maps IP ECN field ECT/CE code point to AccECN option field number, given
+ * we are sending fields with Accurate ECN Order 1: ECT(1), CE, ECT(0).
+ */
+static inline u8 tcp_ecnfield_to_accecn_optfield(u8 ecnfield)
+{
+ switch (ecnfield & INET_ECN_MASK) {
+ case INET_ECN_NOT_ECT:
+ return 0; /* AccECN does not send counts of NOT_ECT */
+ case INET_ECN_ECT_1:
+ return 1;
+ case INET_ECN_CE:
+ return 2;
+ case INET_ECN_ECT_0:
+ return 3;
+ }
+ return 0;
+}
+
+/* Maps IP ECN field ECT/CE code point to AccECN option field value offset.
+ * Some fields do not start from zero, to detect zeroing by middleboxes.
+ */
+static inline u32 tcp_accecn_field_init_offset(u8 ecnfield)
+{
+ switch (ecnfield & INET_ECN_MASK) {
+ case INET_ECN_NOT_ECT:
+ return 0; /* AccECN does not send counts of NOT_ECT */
+ case INET_ECN_ECT_1:
+ return TCP_ACCECN_E1B_INIT_OFFSET;
+ case INET_ECN_CE:
+ return TCP_ACCECN_CEB_INIT_OFFSET;
+ case INET_ECN_ECT_0:
+ return TCP_ACCECN_E0B_INIT_OFFSET;
+ }
+ return 0;
+}
+
+/* Maps AccECN option field #nr to IP ECN field ECT/CE bits */
+static inline unsigned int tcp_accecn_optfield_to_ecnfield(unsigned int option,
+ bool order)
+{
+ /* Based on Table 5 of the AccECN spec to map (option, order) to
+ * the corresponding ECN conuters (ECT-1, ECT-0, or CE).
+ */
+ static const u8 optfield_lookup[2][3] = {
+ /* order = 0: 1st field ECT-0, 2nd field CE, 3rd field ECT-1 */
+ { INET_ECN_ECT_0, INET_ECN_CE, INET_ECN_ECT_1 },
+ /* order = 1: 1st field ECT-1, 2nd field CE, 3rd field ECT-0 */
+ { INET_ECN_ECT_1, INET_ECN_CE, INET_ECN_ECT_0 }
+ };
+
+ return optfield_lookup[order][option % 3];
+}
+
+/* Handles AccECN option ECT and CE 24-bit byte counters update into
+ * the u32 value in tcp_sock. As we're processing TCP options, it is
+ * safe to access from - 1.
+ */
+static inline s32 tcp_update_ecn_bytes(u32 *cnt, const char *from,
+ u32 init_offset)
+{
+ u32 truncated = (get_unaligned_be32(from - 1) - init_offset) &
+ 0xFFFFFFU;
+ u32 delta = (truncated - *cnt) & 0xFFFFFFU;
+
+ /* If delta has the highest bit set (24th bit) indicating
+ * negative, sign extend to correct an estimation using
+ * sign_extend32(delta, 24 - 1)
+ */
+ delta = sign_extend32(delta, 23);
+ *cnt += delta;
+ return (s32)delta;
+}
+
+/* Updates Accurate ECN received counters from the received IP ECN field */
+static inline void tcp_ecn_received_counters(struct sock *sk,
+ const struct sk_buff *skb, u32 len)
+{
+ u8 ecnfield = TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK;
+ u8 is_ce = INET_ECN_is_ce(ecnfield);
+ struct tcp_sock *tp = tcp_sk(sk);
+ bool ecn_edge;
+
+ if (!INET_ECN_is_not_ect(ecnfield)) {
+ u32 pcount = is_ce * max_t(u16, 1, skb_shinfo(skb)->gso_segs);
+
+ /* As for accurate ECN, the TCP_ECN_SEEN flag is set by
+ * tcp_ecn_received_counters() when the ECN codepoint of
+ * received TCP data or ACK contains ECT(0), ECT(1), or CE.
+ */
+ if (!tcp_ecn_mode_rfc3168(tp))
+ tp->ecn_flags |= TCP_ECN_SEEN;
+
+ /* ACE counter tracks *all* segments including pure ACKs */
+ tp->received_ce += pcount;
+ tp->received_ce_pending = min(tp->received_ce_pending + pcount,
+ 0xfU);
+
+ if (len > 0) {
+ u8 minlen = tcp_ecnfield_to_accecn_optfield(ecnfield);
+ u32 oldbytes = tp->received_ecn_bytes[ecnfield - 1];
+ u32 bytes_mask = GENMASK_U32(31, 22);
+
+ tp->received_ecn_bytes[ecnfield - 1] += len;
+ tp->accecn_minlen = max_t(u8, tp->accecn_minlen,
+ minlen);
+
+ /* Send AccECN option at least once per 2^22-byte
+ * increase in any ECN byte counter.
+ */
+ if ((tp->received_ecn_bytes[ecnfield - 1] ^ oldbytes) &
+ bytes_mask) {
+ tcp_accecn_opt_demand_min(sk, 1);
+ }
+ }
+ }
+
+ ecn_edge = tp->prev_ecnfield != ecnfield;
+ if (ecn_edge || is_ce) {
+ tp->prev_ecnfield = ecnfield;
+ /* Demand Accurate ECN change-triggered ACKs. Two ACK are
+ * demanded to indicate unambiguously the ecnfield value
+ * in the latter ACK.
+ */
+ if (tcp_ecn_mode_accecn(tp)) {
+ if (ecn_edge)
+ inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
+ tp->accecn_opt_demand = 2;
+ }
+ }
+}
+
+/* AccECN specification, 2.2: [...] A Data Receiver maintains four counters
+ * initialized at the start of the half-connection. [...] These byte counters
+ * reflect only the TCP payload length, excluding TCP header and TCP options.
+ */
+static inline void tcp_ecn_received_counters_payload(struct sock *sk,
+ const struct sk_buff *skb)
+{
+ const struct tcphdr *th = (const struct tcphdr *)skb->data;
+
+ tcp_ecn_received_counters(sk, skb, skb->len - th->doff * 4);
+}
+
+/* AccECN specification, 5.1: [...] a server can determine that it
+ * negotiated AccECN as [...] if the ACK contains an ACE field with
+ * the value 0b010 to 0b111 (decimal 2 to 7).
+ */
+static inline bool cookie_accecn_ok(const struct tcphdr *th)
+{
+ return tcp_accecn_ace(th) > 0x1;
+}
+
+/* Used to form the ACE flags for SYN/ACK */
+static inline u16 tcp_accecn_reflector_flags(u8 ect)
+{
+ /* TCP ACE flags of SYN/ACK are set based on IP-ECN received from SYN.
+ * Below is an excerpt from the 1st block of Table 2 of AccECN spec,
+ * in which TCP ACE flags are encoded as: (AE << 2) | (CWR << 1) | ECE
+ */
+ static const u8 ecn_to_ace_flags[4] = {
+ 0b010, /* Not-ECT is received */
+ 0b011, /* ECT(1) is received */
+ 0b100, /* ECT(0) is received */
+ 0b110 /* CE is received */
+ };
+
+ return FIELD_PREP(TCPHDR_ACE, ecn_to_ace_flags[ect & 0x3]);
+}
+
+/* AccECN specification, 3.1.2: If a TCP server that implements AccECN
+ * receives a SYN with the three TCP header flags (AE, CWR and ECE) set
+ * to any combination other than 000, 011 or 111, it MUST negotiate the
+ * use of AccECN as if they had been set to 111.
+ */
+static inline bool tcp_accecn_syn_requested(const struct tcphdr *th)
+{
+ u8 ace = tcp_accecn_ace(th);
+
+ return ace && ace != 0x3;
+}
+
+static inline void __tcp_accecn_init_bytes_counters(int *counter_array)
+{
+ BUILD_BUG_ON(INET_ECN_ECT_1 != 0x1);
+ BUILD_BUG_ON(INET_ECN_ECT_0 != 0x2);
+ BUILD_BUG_ON(INET_ECN_CE != 0x3);
+
+ counter_array[INET_ECN_ECT_1 - 1] = 0;
+ counter_array[INET_ECN_ECT_0 - 1] = 0;
+ counter_array[INET_ECN_CE - 1] = 0;
+}
+
+static inline void tcp_accecn_init_counters(struct tcp_sock *tp)
+{
+ tp->received_ce = 0;
+ tp->received_ce_pending = 0;
+ __tcp_accecn_init_bytes_counters(tp->received_ecn_bytes);
+ __tcp_accecn_init_bytes_counters(tp->delivered_ecn_bytes);
+ tp->accecn_minlen = 0;
+ tp->accecn_opt_demand = 0;
+ tp->est_ecnfield = 0;
+}
+
+/* Used for make_synack to form the ACE flags */
+static inline void tcp_accecn_echo_syn_ect(struct tcphdr *th, u8 ect)
+{
+ /* TCP ACE flags of SYN/ACK are set based on IP-ECN codepoint received
+ * from SYN. Below is an excerpt from Table 2 of the AccECN spec:
+ * +====================+====================================+
+ * | IP-ECN codepoint | Respective ACE falgs on SYN/ACK |
+ * | received on SYN | AE CWR ECE |
+ * +====================+====================================+
+ * | Not-ECT | 0 1 0 |
+ * | ECT(1) | 0 1 1 |
+ * | ECT(0) | 1 0 0 |
+ * | CE | 1 1 0 |
+ * +====================+====================================+
+ */
+ th->ae = !!(ect & INET_ECN_ECT_0);
+ th->cwr = ect != INET_ECN_ECT_0;
+ th->ece = ect == INET_ECN_ECT_1;
+}
+
+static inline void tcp_accecn_set_ace(struct tcp_sock *tp, struct sk_buff *skb,
+ struct tcphdr *th)
+{
+ u32 wire_ace;
+
+ /* The final packet of the 3WHS or anything like it must reflect
+ * the SYN/ACK ECT instead of putting CEP into ACE field, such
+ * case show up in tcp_flags.
+ */
+ if (likely(!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACE))) {
+ wire_ace = tp->received_ce + TCP_ACCECN_CEP_INIT_OFFSET;
+ th->ece = !!(wire_ace & 0x1);
+ th->cwr = !!(wire_ace & 0x2);
+ th->ae = !!(wire_ace & 0x4);
+ tp->received_ce_pending = 0;
+ }
+}
+
+static inline u8 tcp_accecn_option_init(const struct sk_buff *skb,
+ u8 opt_offset)
+{
+ u8 *ptr = skb_transport_header(skb) + opt_offset;
+ unsigned int optlen = ptr[1] - 2;
+
+ if (WARN_ON_ONCE(ptr[0] != TCPOPT_ACCECN0 && ptr[0] != TCPOPT_ACCECN1))
+ return TCP_ACCECN_OPT_FAIL_SEEN;
+ ptr += 2;
+
+ /* Detect option zeroing: an AccECN connection "MAY check that the
+ * initial value of the EE0B field or the EE1B field is non-zero"
+ */
+ if (optlen < TCPOLEN_ACCECN_PERFIELD)
+ return TCP_ACCECN_OPT_EMPTY_SEEN;
+ if (get_unaligned_be24(ptr) == 0)
+ return TCP_ACCECN_OPT_FAIL_SEEN;
+ if (optlen < TCPOLEN_ACCECN_PERFIELD * 3)
+ return TCP_ACCECN_OPT_COUNTER_SEEN;
+ ptr += TCPOLEN_ACCECN_PERFIELD * 2;
+ if (get_unaligned_be24(ptr) == 0)
+ return TCP_ACCECN_OPT_FAIL_SEEN;
+
+ return TCP_ACCECN_OPT_COUNTER_SEEN;
+}
+
+/* See Table 2 of the AccECN draft */
+static inline void tcp_ecn_rcv_synack(struct sock *sk, const struct sk_buff *skb,
+ const struct tcphdr *th, u8 ip_dsfield)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ u8 ace = tcp_accecn_ace(th);
+
+ switch (ace) {
+ case 0x0:
+ case 0x7:
+ /* +========+========+============+=============+
+ * | A | B | SYN/ACK | Feedback |
+ * | | | B->A | Mode of A |
+ * | | | AE CWR ECE | |
+ * +========+========+============+=============+
+ * | AccECN | No ECN | 0 0 0 | Not ECN |
+ * | AccECN | Broken | 1 1 1 | Not ECN |
+ * +========+========+============+=============+
+ */
+ tcp_ecn_mode_set(tp, TCP_ECN_DISABLED);
+ break;
+ case 0x1:
+ case 0x5:
+ /* +========+========+============+=============+
+ * | A | B | SYN/ACK | Feedback |
+ * | | | B->A | Mode of A |
+ * | | | AE CWR ECE | |
+ * +========+========+============+=============+
+ * | AccECN | Nonce | 1 0 1 | (Reserved) |
+ * | AccECN | ECN | 0 0 1 | Classic ECN |
+ * | Nonce | AccECN | 0 0 1 | Classic ECN |
+ * | ECN | AccECN | 0 0 1 | Classic ECN |
+ * +========+========+============+=============+
+ */
+ if (tcp_ecn_mode_pending(tp))
+ /* Downgrade from AccECN, or requested initially */
+ tcp_ecn_mode_set(tp, TCP_ECN_MODE_RFC3168);
+ break;
+ default:
+ tcp_ecn_mode_set(tp, TCP_ECN_MODE_ACCECN);
+ tp->syn_ect_rcv = ip_dsfield & INET_ECN_MASK;
+ if (tp->rx_opt.accecn &&
+ tp->saw_accecn_opt < TCP_ACCECN_OPT_COUNTER_SEEN) {
+ u8 saw_opt = tcp_accecn_option_init(skb, tp->rx_opt.accecn);
+
+ tcp_accecn_saw_opt_fail_recv(tp, saw_opt);
+ tp->accecn_opt_demand = 2;
+ }
+ if (INET_ECN_is_ce(ip_dsfield) &&
+ tcp_accecn_validate_syn_feedback(sk, ace,
+ tp->syn_ect_snt)) {
+ tp->received_ce++;
+ tp->received_ce_pending++;
+ }
+ break;
+ }
+}
+
+static inline void tcp_ecn_rcv_syn(struct tcp_sock *tp, const struct tcphdr *th,
+ const struct sk_buff *skb)
+{
+ if (tcp_ecn_mode_pending(tp)) {
+ if (!tcp_accecn_syn_requested(th)) {
+ /* Downgrade to classic ECN feedback */
+ tcp_ecn_mode_set(tp, TCP_ECN_MODE_RFC3168);
+ } else {
+ tp->syn_ect_rcv = TCP_SKB_CB(skb)->ip_dsfield &
+ INET_ECN_MASK;
+ tp->prev_ecnfield = tp->syn_ect_rcv;
+ tcp_ecn_mode_set(tp, TCP_ECN_MODE_ACCECN);
+ }
+ }
+ if (tcp_ecn_mode_rfc3168(tp) && (!th->ece || !th->cwr))
+ tcp_ecn_mode_set(tp, TCP_ECN_DISABLED);
+}
+
+static inline bool tcp_ecn_rcv_ecn_echo(const struct tcp_sock *tp,
+ const struct tcphdr *th)
+{
+ if (th->ece && !th->syn && tcp_ecn_mode_rfc3168(tp))
+ return true;
+ return false;
+}
+
+/* Packet ECN state for a SYN-ACK */
+static inline void tcp_ecn_send_synack(struct sock *sk, struct sk_buff *skb)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_CWR;
+ if (tcp_ecn_disabled(tp))
+ TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE;
+ else if (tcp_ca_needs_ecn(sk) ||
+ tcp_bpf_ca_needs_ecn(sk))
+ INET_ECN_xmit(sk);
+
+ if (tp->ecn_flags & TCP_ECN_MODE_ACCECN) {
+ TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ACE;
+ TCP_SKB_CB(skb)->tcp_flags |=
+ tcp_accecn_reflector_flags(tp->syn_ect_rcv);
+ tp->syn_ect_snt = inet_sk(sk)->tos & INET_ECN_MASK;
+ }
+}
+
+/* Packet ECN state for a SYN. */
+static inline void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ bool bpf_needs_ecn = tcp_bpf_ca_needs_ecn(sk);
+ bool use_ecn, use_accecn;
+ u8 tcp_ecn = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn);
+
+ use_accecn = tcp_ecn == TCP_ECN_IN_ACCECN_OUT_ACCECN;
+ use_ecn = tcp_ecn == TCP_ECN_IN_ECN_OUT_ECN ||
+ tcp_ecn == TCP_ECN_IN_ACCECN_OUT_ECN ||
+ tcp_ca_needs_ecn(sk) || bpf_needs_ecn || use_accecn;
+
+ if (!use_ecn) {
+ const struct dst_entry *dst = __sk_dst_get(sk);
+
+ if (dst && dst_feature(dst, RTAX_FEATURE_ECN))
+ use_ecn = true;
+ }
+
+ tp->ecn_flags = 0;
+
+ if (use_ecn) {
+ if (tcp_ca_needs_ecn(sk) || bpf_needs_ecn)
+ INET_ECN_xmit(sk);
+
+ TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR;
+ if (use_accecn) {
+ TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_AE;
+ tcp_ecn_mode_set(tp, TCP_ECN_MODE_PENDING);
+ tp->syn_ect_snt = inet_sk(sk)->tos & INET_ECN_MASK;
+ } else {
+ tcp_ecn_mode_set(tp, TCP_ECN_MODE_RFC3168);
+ }
+ }
+}
+
+static inline void tcp_ecn_clear_syn(struct sock *sk, struct sk_buff *skb)
+{
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn_fallback)) {
+ /* tp->ecn_flags are cleared at a later point in time when
+ * SYN ACK is ultimatively being received.
+ */
+ TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ACE;
+ }
+}
+
+static inline void
+tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th)
+{
+ if (tcp_rsk(req)->accecn_ok)
+ tcp_accecn_echo_syn_ect(th, tcp_rsk(req)->syn_ect_rcv);
+ else if (inet_rsk(req)->ecn_ok)
+ th->ece = 1;
+}
+
+static inline bool tcp_accecn_option_beacon_check(const struct sock *sk)
+{
+ u32 ecn_beacon = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn_option_beacon);
+ const struct tcp_sock *tp = tcp_sk(sk);
+
+ if (!ecn_beacon)
+ return false;
+
+ return tcp_stamp_us_delta(tp->tcp_mstamp, tp->accecn_opt_tstamp) * ecn_beacon >=
+ (tp->srtt_us >> 3);
+}
+
+#endif /* _LINUX_TCP_ECN_H */
diff --git a/include/net/timewait_sock.h b/include/net/timewait_sock.h
index 62b3e9f2aed4..0a85ac64a66d 100644
--- a/include/net/timewait_sock.h
+++ b/include/net/timewait_sock.h
@@ -15,13 +15,6 @@ struct timewait_sock_ops {
struct kmem_cache *twsk_slab;
char *twsk_slab_name;
unsigned int twsk_obj_size;
- void (*twsk_destructor)(struct sock *sk);
};
-static inline void twsk_destructor(struct sock *sk)
-{
- if (sk->sk_prot->twsk_prot->twsk_destructor != NULL)
- sk->sk_prot->twsk_prot->twsk_destructor(sk);
-}
-
#endif /* _TIMEWAIT_SOCK_H */
diff --git a/include/net/udp.h b/include/net/udp.h
index e2af3bda90c9..cffedb3e40f2 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -284,14 +284,28 @@ INDIRECT_CALLABLE_DECLARE(int udpv6_rcv(struct sk_buff *));
struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
netdev_features_t features, bool is_ipv6);
-static inline void udp_lib_init_sock(struct sock *sk)
+static inline int udp_lib_init_sock(struct sock *sk)
{
struct udp_sock *up = udp_sk(sk);
+ sk->sk_drop_counters = &up->drop_counters;
skb_queue_head_init(&up->reader_queue);
INIT_HLIST_NODE(&up->tunnel_list);
up->forward_threshold = sk->sk_rcvbuf >> 2;
set_bit(SOCK_CUSTOM_SOCKOPT, &sk->sk_socket->flags);
+
+ up->udp_prod_queue = kcalloc(nr_node_ids, sizeof(*up->udp_prod_queue),
+ GFP_KERNEL);
+ if (!up->udp_prod_queue)
+ return -ENOMEM;
+ for (int i = 0; i < nr_node_ids; i++)
+ init_llist_head(&up->udp_prod_queue[i].ll_root);
+ return 0;
+}
+
+static inline void udp_drops_inc(struct sock *sk)
+{
+ numa_drop_add(&udp_sk(sk)->drop_counters, 1);
}
/* hash routines shared between UDPv4/6 and UDP-Litev4/6 */
@@ -397,7 +411,7 @@ static inline struct sk_buff *skb_recv_udp(struct sock *sk, unsigned int flags,
return __skb_recv_udp(sk, flags, &off, err);
}
-int udp_v4_early_demux(struct sk_buff *skb);
+enum skb_drop_reason udp_v4_early_demux(struct sk_buff *skb);
bool udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst);
int udp_err(struct sk_buff *, u32);
int udp_abort(struct sock *sk, int err);
@@ -627,7 +641,7 @@ static inline struct sk_buff *udp_rcv_segment(struct sock *sk,
return segs;
drop:
- atomic_add(drop_count, &sk->sk_drops);
+ sk_drops_add(sk, drop_count);
SNMP_ADD_STATS(__UDPX_MIB(sk, ipv4), UDP_MIB_INERRORS, drop_count);
kfree_skb(skb);
return NULL;
diff --git a/include/net/xdp.h b/include/net/xdp.h
index b40f1f96cb11..aa742f413c35 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -76,6 +76,11 @@ enum xdp_buff_flags {
XDP_FLAGS_FRAGS_PF_MEMALLOC = BIT(1), /* xdp paged memory is under
* pressure
*/
+ /* frags have unreadable mem, this can't be true for real XDP packets,
+ * but drivers may use XDP helpers to construct Rx pkt state even when
+ * XDP program is not attached.
+ */
+ XDP_FLAGS_FRAGS_UNREADABLE = BIT(2),
};
struct xdp_buff {
@@ -85,8 +90,20 @@ struct xdp_buff {
void *data_hard_start;
struct xdp_rxq_info *rxq;
struct xdp_txq_info *txq;
- u32 frame_sz; /* frame size to deduce data_hard_end/reserved tailroom*/
- u32 flags; /* supported values defined in xdp_buff_flags */
+
+ union {
+ struct {
+ /* frame size to deduce data_hard_end/tailroom */
+ u32 frame_sz;
+ /* supported values defined in xdp_buff_flags */
+ u32 flags;
+ };
+
+#ifdef __LITTLE_ENDIAN
+ /* Used to micro-optimize xdp_init_buff(), don't use directly */
+ u64 frame_sz_flags_init;
+#endif
+ };
};
static __always_inline bool xdp_buff_has_frags(const struct xdp_buff *xdp)
@@ -104,23 +121,42 @@ static __always_inline void xdp_buff_clear_frags_flag(struct xdp_buff *xdp)
xdp->flags &= ~XDP_FLAGS_HAS_FRAGS;
}
-static __always_inline bool
-xdp_buff_is_frag_pfmemalloc(const struct xdp_buff *xdp)
+static __always_inline void xdp_buff_set_frag_pfmemalloc(struct xdp_buff *xdp)
{
- return !!(xdp->flags & XDP_FLAGS_FRAGS_PF_MEMALLOC);
+ xdp->flags |= XDP_FLAGS_FRAGS_PF_MEMALLOC;
}
-static __always_inline void xdp_buff_set_frag_pfmemalloc(struct xdp_buff *xdp)
+static __always_inline void xdp_buff_set_frag_unreadable(struct xdp_buff *xdp)
{
- xdp->flags |= XDP_FLAGS_FRAGS_PF_MEMALLOC;
+ xdp->flags |= XDP_FLAGS_FRAGS_UNREADABLE;
+}
+
+static __always_inline u32 xdp_buff_get_skb_flags(const struct xdp_buff *xdp)
+{
+ return xdp->flags;
+}
+
+static __always_inline void xdp_buff_clear_frag_pfmemalloc(struct xdp_buff *xdp)
+{
+ xdp->flags &= ~XDP_FLAGS_FRAGS_PF_MEMALLOC;
}
static __always_inline void
xdp_init_buff(struct xdp_buff *xdp, u32 frame_sz, struct xdp_rxq_info *rxq)
{
- xdp->frame_sz = frame_sz;
xdp->rxq = rxq;
+
+#ifdef __LITTLE_ENDIAN
+ /*
+ * Force the compilers to initialize ::flags and assign ::frame_sz with
+ * one write on 64-bit LE architectures as they're often unable to do
+ * it themselves.
+ */
+ xdp->frame_sz_flags_init = frame_sz;
+#else
+ xdp->frame_sz = frame_sz;
xdp->flags = 0;
+#endif
}
static __always_inline void
@@ -249,6 +285,8 @@ static inline bool xdp_buff_add_frag(struct xdp_buff *xdp, netmem_ref netmem,
if (unlikely(netmem_is_pfmemalloc(netmem)))
xdp_buff_set_frag_pfmemalloc(xdp);
+ if (unlikely(netmem_is_net_iov(netmem)))
+ xdp_buff_set_frag_unreadable(xdp);
return true;
}
@@ -272,10 +310,10 @@ static __always_inline bool xdp_frame_has_frags(const struct xdp_frame *frame)
return !!(frame->flags & XDP_FLAGS_HAS_FRAGS);
}
-static __always_inline bool
-xdp_frame_is_frag_pfmemalloc(const struct xdp_frame *frame)
+static __always_inline u32
+xdp_frame_get_skb_flags(const struct xdp_frame *frame)
{
- return !!(frame->flags & XDP_FLAGS_FRAGS_PF_MEMALLOC);
+ return frame->flags;
}
#define XDP_BULK_QUEUE_SIZE 16
@@ -312,9 +350,9 @@ static inline void xdp_scrub_frame(struct xdp_frame *frame)
}
static inline void
-xdp_update_skb_shared_info(struct sk_buff *skb, u8 nr_frags,
- unsigned int size, unsigned int truesize,
- bool pfmemalloc)
+xdp_update_skb_frags_info(struct sk_buff *skb, u8 nr_frags,
+ unsigned int size, unsigned int truesize,
+ u32 xdp_flags)
{
struct skb_shared_info *sinfo = skb_shinfo(skb);
@@ -328,7 +366,8 @@ xdp_update_skb_shared_info(struct sk_buff *skb, u8 nr_frags,
skb->len += size;
skb->data_len += size;
skb->truesize += truesize;
- skb->pfmemalloc |= pfmemalloc;
+ skb->pfmemalloc |= !!(xdp_flags & XDP_FLAGS_FRAGS_PF_MEMALLOC);
+ skb->unreadable |= !!(xdp_flags & XDP_FLAGS_FRAGS_UNREADABLE);
}
/* Avoids inlining WARN macro in fast-path */
diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h
index 513c8e9704f6..4f2d3268a676 100644
--- a/include/net/xdp_sock_drv.h
+++ b/include/net/xdp_sock_drv.h
@@ -160,13 +160,23 @@ static inline struct xdp_buff *xsk_buff_get_frag(const struct xdp_buff *first)
return ret;
}
-static inline void xsk_buff_del_tail(struct xdp_buff *tail)
+static inline void xsk_buff_del_frag(struct xdp_buff *xdp)
{
- struct xdp_buff_xsk *xskb = container_of(tail, struct xdp_buff_xsk, xdp);
+ struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
list_del(&xskb->list_node);
}
+static inline struct xdp_buff *xsk_buff_get_head(struct xdp_buff *first)
+{
+ struct xdp_buff_xsk *xskb = container_of(first, struct xdp_buff_xsk, xdp);
+ struct xdp_buff_xsk *frag;
+
+ frag = list_first_entry(&xskb->pool->xskb_list, struct xdp_buff_xsk,
+ list_node);
+ return &frag->xdp;
+}
+
static inline struct xdp_buff *xsk_buff_get_tail(struct xdp_buff *first)
{
struct xdp_buff_xsk *xskb = container_of(first, struct xdp_buff_xsk, xdp);
@@ -389,8 +399,13 @@ static inline struct xdp_buff *xsk_buff_get_frag(const struct xdp_buff *first)
return NULL;
}
-static inline void xsk_buff_del_tail(struct xdp_buff *tail)
+static inline void xsk_buff_del_frag(struct xdp_buff *xdp)
+{
+}
+
+static inline struct xdp_buff *xsk_buff_get_head(struct xdp_buff *first)
{
+ return NULL;
}
static inline struct xdp_buff *xsk_buff_get_tail(struct xdp_buff *first)