summaryrefslogtreecommitdiff
path: root/net/sched
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2026-02-11 19:31:52 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2026-02-11 19:31:52 -0800
commit37a93dd5c49b5fda807fd204edf2547c3493319c (patch)
treece1ef5a642b9ea3d7242156438eb96dc5607a752 /net/sched
parent098b6e44cbaa2d526d06af90c862d13fb414a0ec (diff)
parent83310d613382f74070fc8b402f3f6c2af8439ead (diff)
Merge tag 'net-next-7.0' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-nextipvs-next/mainipvs-next/HEAD
Pull networking updates from Paolo Abeni: "Core & protocols: - A significant effort all around the stack to guide the compiler to make the right choice when inlining code, to avoid unneeded calls for small helper and stack canary overhead in the fast-path. This generates better and faster code with very small or no text size increases, as in many cases the call generated more code than the actual inlined helper. - Extend AccECN implementation so that is now functionally complete, also allow the user-space enabling it on a per network namespace basis. - Add support for memory providers with large (above 4K) rx buffer. Paired with hw-gro, larger rx buffer sizes reduce the number of buffers traversing the stack, dincreasing single stream CPU usage by up to ~30%. - Do not add HBH header to Big TCP GSO packets. This simplifies the RX path, the TX path and the NIC drivers, and is possible because user-space taps can now interpret correctly such packets without the HBH hint. - Allow IPv6 routes to be configured with a gateway address that is resolved out of a different interface than the one specified, aligning IPv6 to IPv4 behavior. - Multi-queue aware sch_cake. This makes it possible to scale the rate shaper of sch_cake across multiple CPUs, while still enforcing a single global rate on the interface. - Add support for the nbcon (new buffer console) infrastructure to netconsole, enabling lock-free, priority-based console operations that are safer in crash scenarios. - Improve the TCP ipv6 output path to cache the flow information, saving cpu cycles, reducing cache line misses and stack use. - Improve netfilter packet tracker to resolve clashes for most protocols, avoiding unneeded drops on rare occasions. - Add IP6IP6 tunneling acceleration to the flowtable infrastructure. - Reduce tcp socket size by one cache line. - Notify neighbour changes atomically, avoiding inconsistencies between the notification sequence and the actual states sequence. - Add vsock namespace support, allowing complete isolation of vsocks across different network namespaces. - Improve xsk generic performances with cache-alignment-oriented optimizations. - Support netconsole automatic target recovery, allowing netconsole to reestablish targets when underlying low-level interface comes back online. Driver API: - Support for switching the working mode (automatic vs manual) of a DPLL device via netlink. - Introduce PHY ports representation to expose multiple front-facing media ports over a single MAC. - Introduce "rx-polarity" and "tx-polarity" device tree properties, to generalize polarity inversion requirements for differential signaling. - Add helper to create, prepare and enable managed clocks. Device drivers: - Add Huawei hinic3 PF etherner driver. - Add DWMAC glue driver for Motorcomm YT6801 PCIe ethernet controller. - Add ethernet driver for MaxLinear MxL862xx switches - Remove parallel-port Ethernet driver. - Convert existing driver timestamp configuration reporting to hwtstamp_get and remove legacy ioctl(). - Convert existing drivers to .get_rx_ring_count(), simplifing the RX ring count retrieval. Also remove the legacy fallback path. - Ethernet high-speed NICs: - Broadcom (bnxt, bng): - bnxt: add FW interface update to support FEC stats histogram and NVRAM defragmentation - bng: add TSO and H/W GRO support - nVidia/Mellanox (mlx5): - improve latency of channel restart operations, reducing the used H/W resources - add TSO support for UDP over GRE over VLAN - add flow counters support for hardware steering (HWS) rules - use a static memory area to store headers for H/W GRO, leading to 12% RX tput improvement - Intel (100G, ice, idpf): - ice: reorganizes layout of Tx and Rx rings for cacheline locality and utilizes __cacheline_group* macros on the new layouts - ice: introduces Synchronous Ethernet (SyncE) support - Meta (fbnic): - adds debugfs for firmware mailbox and tx/rx rings vectors - Ethernet virtual: - geneve: introduce GRO/GSO support for double UDP encapsulation - Ethernet NICs consumer, and embedded: - Synopsys (stmmac): - some code refactoring and cleanups - RealTek (r8169): - add support for RTL8127ATF (10G Fiber SFP) - add dash and LTR support - Airoha: - AN8811HB 2.5 Gbps phy support - Freescale (fec): - add XDP zero-copy support - Thunderbolt: - add get link setting support to allow bonding - Renesas: - add support for RZ/G3L GBETH SoC - Ethernet switches: - Maxlinear: - support R(G)MII slow rate configuration - add support for Intel GSW150 - Motorcomm (yt921x): - add DCB/QoS support - TI: - icssm-prueth: support bridging (STP/RSTP) via the switchdev framework - Ethernet PHYs: - Realtek: - enable SGMII and 2500Base-X in-band auto-negotiation - simplify and reunify C22/C45 drivers - Micrel: convert bindings to DT schema - CAN: - move skb headroom content into skb extensions, making CAN metadata access more robust - CAN drivers: - rcar_canfd: - add support for FD-only mode - add support for the RZ/T2H SoC - sja1000: cleanup the CAN state handling - WiFi: - implement EPPKE/802.1X over auth frames support - split up drop reasons better, removing generic RX_DROP - additional FTM capabilities: 6 GHz support, supported number of spatial streams and supported number of LTF repetitions - better mac80211 iterators to enumerate resources - initial UHR (Wi-Fi 8) support for cfg80211/mac80211 - WiFi drivers: - Qualcomm/Atheros: - ath11k: support for Channel Frequency Response measurement - ath12k: a significant driver refactor to support multi-wiphy devices and and pave the way for future device support in the same driver (rather than splitting to ath13k) - ath12k: support for the QCC2072 chipset - Intel: - iwlwifi: partial Neighbor Awareness Networking (NAN) support - iwlwifi: initial support for U-NII-9 and IEEE 802.11bn - RealTek (rtw89): - preparations for RTL8922DE support - Bluetooth: - implement setsockopt(BT_PHY) to set the connection packet type/PHY - set link_policy on incoming ACL connections - Bluetooth drivers: - btusb: add support for MediaTek7920, Realtek RTL8761BU and 8851BE - btqca: add WCN6855 firmware priority selection feature" * tag 'net-next-7.0' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (1254 commits) bnge/bng_re: Add a new HSI net: macb: Fix tx/rx malfunction after phy link down and up af_unix: Fix memleak of newsk in unix_stream_connect(). net: ti: icssg-prueth: Add optional dependency on HSR net: dsa: add basic initial driver for MxL862xx switches net: mdio: add unlocked mdiodev C45 bus accessors net: dsa: add tag format for MxL862xx switches dt-bindings: net: dsa: add MaxLinear MxL862xx selftests: drivers: net: hw: Modify toeplitz.c to poll for packets octeontx2-pf: Unregister devlink on probe failure net: renesas: rswitch: fix forwarding offload statemachine ionic: Rate limit unknown xcvr type messages tcp: inet6_csk_xmit() optimization tcp: populate inet->cork.fl.u.ip6 in tcp_v6_syn_recv_sock() tcp: populate inet->cork.fl.u.ip6 in tcp_v6_connect() ipv6: inet6_csk_xmit() and inet6_csk_update_pmtu() use inet->cork.fl.u.ip6 ipv6: use inet->cork.fl.u.ip6 and np->final in ip6_datagram_dst_update() ipv6: use np->final in inet6_sk_rebuild_header() ipv6: add daddr/final storage in struct ipv6_pinfo net: stmmac: qcom-ethqos: fix qcom_ethqos_serdes_powerup() ...
Diffstat (limited to 'net/sched')
-rw-r--r--net/sched/act_ct.c2
-rw-r--r--net/sched/act_ctinfo.c1
-rw-r--r--net/sched/sch_api.c2
-rw-r--r--net/sched/sch_cake.c495
-rw-r--r--net/sched/sch_fq.c28
-rw-r--r--net/sched/sch_generic.c8
-rw-r--r--net/sched/sch_mq.c71
7 files changed, 429 insertions, 178 deletions
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index 2b6ac7069dc1..81d488655793 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -13,9 +13,11 @@
#include <linux/skbuff.h>
#include <linux/rtnetlink.h>
#include <linux/pkt_cls.h>
+#include <linux/if_tunnel.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/rhashtable.h>
+#include <net/gre.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c
index 71efe04d00b5..d2c750bab1d3 100644
--- a/net/sched/act_ctinfo.c
+++ b/net/sched/act_ctinfo.c
@@ -16,6 +16,7 @@
#include <net/pkt_sched.h>
#include <net/act_api.h>
#include <net/pkt_cls.h>
+#include <net/inet_ecn.h>
#include <uapi/linux/tc_act/tc_ctinfo.h>
#include <net/tc_act/tc_ctinfo.h>
#include <net/tc_wrapper.h>
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index f56b18c8aebf..443c116e8663 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1353,7 +1353,7 @@ err_out4:
ops->destroy(sch);
qdisc_put_stab(rtnl_dereference(sch->stab));
err_out3:
- lockdep_unregister_key(&sch->root_lock_key);
+ qdisc_lock_uninit(sch, ops);
netdev_put(dev, &sch->dev_tracker);
qdisc_free(sch);
err_out2:
diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index 4a64d6397b6f..d2bbd5654d5b 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -67,6 +67,7 @@
#include <linux/if_vlan.h>
#include <net/gso.h>
#include <net/pkt_sched.h>
+#include <net/sch_priv.h>
#include <net/pkt_cls.h>
#include <net/tcp.h>
#include <net/flow_dissector.h>
@@ -197,40 +198,45 @@ struct cake_tin_data {
u32 way_collisions;
}; /* number of tins is small, so size of this struct doesn't matter much */
+struct cake_sched_config {
+ u64 rate_bps;
+ u64 interval;
+ u64 target;
+ u64 sync_time;
+ u32 buffer_config_limit;
+ u32 fwmark_mask;
+ u16 fwmark_shft;
+ s16 rate_overhead;
+ u16 rate_mpu;
+ u16 rate_flags;
+ u8 tin_mode;
+ u8 flow_mode;
+ u8 atm_mode;
+ u8 ack_filter;
+ u8 is_shared;
+};
+
struct cake_sched_data {
struct tcf_proto __rcu *filter_list; /* optional external classifier */
struct tcf_block *block;
struct cake_tin_data *tins;
+ struct cake_sched_config *config;
+ struct cake_sched_config initial_config;
struct cake_heap_entry overflow_heap[CAKE_QUEUES * CAKE_MAX_TINS];
- u16 overflow_timeout;
-
- u16 tin_cnt;
- u8 tin_mode;
- u8 flow_mode;
- u8 ack_filter;
- u8 atm_mode;
-
- u32 fwmark_mask;
- u16 fwmark_shft;
/* time_next = time_this + ((len * rate_ns) >> rate_shft) */
- u16 rate_shft;
ktime_t time_next_packet;
ktime_t failsafe_next_packet;
u64 rate_ns;
- u64 rate_bps;
- u16 rate_flags;
- s16 rate_overhead;
- u16 rate_mpu;
- u64 interval;
- u64 target;
+ u16 rate_shft;
+ u16 overflow_timeout;
+ u16 tin_cnt;
/* resource tracking */
u32 buffer_used;
u32 buffer_max_used;
u32 buffer_limit;
- u32 buffer_config_limit;
/* indices for dequeue */
u16 cur_tin;
@@ -254,6 +260,11 @@ struct cake_sched_data {
u16 max_adjlen;
u16 min_netlen;
u16 min_adjlen;
+
+ /* mq sync state */
+ u64 last_checked_active;
+ u64 last_active;
+ u32 active_queues;
};
enum {
@@ -380,6 +391,8 @@ static const u32 inv_sqrt_cache[REC_INV_SQRT_CACHE] = {
1239850263, 1191209601, 1147878294, 1108955788
};
+static void cake_set_rate(struct cake_tin_data *b, u64 rate, u32 mtu,
+ u64 target_ns, u64 rtt_est_ns);
/* http://en.wikipedia.org/wiki/Methods_of_computing_square_roots
* new_invsqrt = (invsqrt / 2) * (3 - count * invsqrt^2)
*
@@ -1198,7 +1211,7 @@ static bool cake_tcph_may_drop(const struct tcphdr *tcph,
static struct sk_buff *cake_ack_filter(struct cake_sched_data *q,
struct cake_flow *flow)
{
- bool aggressive = q->ack_filter == CAKE_ACK_AGGRESSIVE;
+ bool aggressive = q->config->ack_filter == CAKE_ACK_AGGRESSIVE;
struct sk_buff *elig_ack = NULL, *elig_ack_prev = NULL;
struct sk_buff *skb_check, *skb_prev = NULL;
const struct ipv6hdr *ipv6h, *ipv6h_check;
@@ -1266,7 +1279,7 @@ static struct sk_buff *cake_ack_filter(struct cake_sched_data *q,
ipv6_addr_cmp(&ipv6h_check->daddr, &ipv6h->daddr))
continue;
- seglen = ntohs(ipv6h_check->payload_len);
+ seglen = ipv6_payload_len(skb, ipv6h_check);
} else {
WARN_ON(1); /* shouldn't happen */
continue;
@@ -1358,15 +1371,17 @@ static u64 cake_ewma(u64 avg, u64 sample, u32 shift)
return avg;
}
-static u32 cake_calc_overhead(struct cake_sched_data *q, u32 len, u32 off)
+static u32 cake_calc_overhead(struct cake_sched_data *qd, u32 len, u32 off)
{
+ struct cake_sched_config *q = qd->config;
+
if (q->rate_flags & CAKE_FLAG_OVERHEAD)
len -= off;
- if (q->max_netlen < len)
- q->max_netlen = len;
- if (q->min_netlen > len)
- q->min_netlen = len;
+ if (qd->max_netlen < len)
+ qd->max_netlen = len;
+ if (qd->min_netlen > len)
+ qd->min_netlen = len;
len += q->rate_overhead;
@@ -1385,10 +1400,10 @@ static u32 cake_calc_overhead(struct cake_sched_data *q, u32 len, u32 off)
len += (len + 63) / 64;
}
- if (q->max_adjlen < len)
- q->max_adjlen = len;
- if (q->min_adjlen > len)
- q->min_adjlen = len;
+ if (qd->max_adjlen < len)
+ qd->max_adjlen = len;
+ if (qd->min_adjlen > len)
+ qd->min_adjlen = len;
return len;
}
@@ -1586,7 +1601,7 @@ static unsigned int cake_drop(struct Qdisc *sch, struct sk_buff **to_free)
flow->dropped++;
b->tin_dropped++;
- if (q->rate_flags & CAKE_FLAG_INGRESS)
+ if (q->config->rate_flags & CAKE_FLAG_INGRESS)
cake_advance_shaper(q, b, skb, now, true);
qdisc_drop_reason(skb, sch, to_free, SKB_DROP_REASON_QDISC_OVERLIMIT);
@@ -1656,7 +1671,8 @@ static u8 cake_handle_diffserv(struct sk_buff *skb, bool wash)
static struct cake_tin_data *cake_select_tin(struct Qdisc *sch,
struct sk_buff *skb)
{
- struct cake_sched_data *q = qdisc_priv(sch);
+ struct cake_sched_data *qd = qdisc_priv(sch);
+ struct cake_sched_config *q = qd->config;
u32 tin, mark;
bool wash;
u8 dscp;
@@ -1673,24 +1689,24 @@ static struct cake_tin_data *cake_select_tin(struct Qdisc *sch,
if (q->tin_mode == CAKE_DIFFSERV_BESTEFFORT)
tin = 0;
- else if (mark && mark <= q->tin_cnt)
- tin = q->tin_order[mark - 1];
+ else if (mark && mark <= qd->tin_cnt)
+ tin = qd->tin_order[mark - 1];
else if (TC_H_MAJ(skb->priority) == sch->handle &&
TC_H_MIN(skb->priority) > 0 &&
- TC_H_MIN(skb->priority) <= q->tin_cnt)
- tin = q->tin_order[TC_H_MIN(skb->priority) - 1];
+ TC_H_MIN(skb->priority) <= qd->tin_cnt)
+ tin = qd->tin_order[TC_H_MIN(skb->priority) - 1];
else {
if (!wash)
dscp = cake_handle_diffserv(skb, wash);
- tin = q->tin_index[dscp];
+ tin = qd->tin_index[dscp];
- if (unlikely(tin >= q->tin_cnt))
+ if (unlikely(tin >= qd->tin_cnt))
tin = 0;
}
- return &q->tins[tin];
+ return &qd->tins[tin];
}
static u32 cake_classify(struct Qdisc *sch, struct cake_tin_data **t,
@@ -1746,7 +1762,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
bool same_flow = false;
/* choose flow to insert into */
- idx = cake_classify(sch, &b, skb, q->flow_mode, &ret);
+ idx = cake_classify(sch, &b, skb, q->config->flow_mode, &ret);
if (idx == 0) {
if (ret & __NET_XMIT_BYPASS)
qdisc_qstats_drop(sch);
@@ -1781,7 +1797,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
if (unlikely(len > b->max_skblen))
b->max_skblen = len;
- if (qdisc_pkt_segs(skb) > 1 && q->rate_flags & CAKE_FLAG_SPLIT_GSO) {
+ if (qdisc_pkt_segs(skb) > 1 && q->config->rate_flags & CAKE_FLAG_SPLIT_GSO) {
struct sk_buff *segs, *nskb;
netdev_features_t features = netif_skb_features(skb);
unsigned int slen = 0, numsegs = 0;
@@ -1823,7 +1839,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
get_cobalt_cb(skb)->adjusted_len = cake_overhead(q, skb);
flow_queue_add(flow, skb);
- if (q->ack_filter)
+ if (q->config->ack_filter)
ack = cake_ack_filter(q, flow);
if (ack) {
@@ -1832,7 +1848,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
ack_pkt_len = qdisc_pkt_len(ack);
b->bytes += ack_pkt_len;
q->buffer_used += skb->truesize - ack->truesize;
- if (q->rate_flags & CAKE_FLAG_INGRESS)
+ if (q->config->rate_flags & CAKE_FLAG_INGRESS)
cake_advance_shaper(q, b, ack, now, true);
qdisc_tree_reduce_backlog(sch, 1, ack_pkt_len);
@@ -1855,7 +1871,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
cake_heapify_up(q, b->overflow_idx[idx]);
/* incoming bandwidth capacity estimate */
- if (q->rate_flags & CAKE_FLAG_AUTORATE_INGRESS) {
+ if (q->config->rate_flags & CAKE_FLAG_AUTORATE_INGRESS) {
u64 packet_interval = \
ktime_to_ns(ktime_sub(now, q->last_packet_time));
@@ -1887,7 +1903,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
if (ktime_after(now,
ktime_add_ms(q->last_reconfig_time,
250))) {
- q->rate_bps = (q->avg_peak_bandwidth * 15) >> 4;
+ q->config->rate_bps = (q->avg_peak_bandwidth * 15) >> 4;
cake_reconfigure(sch);
}
}
@@ -1907,7 +1923,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
flow->set = CAKE_SET_SPARSE;
b->sparse_flow_count++;
- flow->deficit = cake_get_flow_quantum(b, flow, q->flow_mode);
+ flow->deficit = cake_get_flow_quantum(b, flow, q->config->flow_mode);
} else if (flow->set == CAKE_SET_SPARSE_WAIT) {
/* this flow was empty, accounted as a sparse flow, but actually
* in the bulk rotation.
@@ -1916,8 +1932,8 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
b->sparse_flow_count--;
b->bulk_flow_count++;
- cake_inc_srchost_bulk_flow_count(b, flow, q->flow_mode);
- cake_inc_dsthost_bulk_flow_count(b, flow, q->flow_mode);
+ cake_inc_srchost_bulk_flow_count(b, flow, q->config->flow_mode);
+ cake_inc_dsthost_bulk_flow_count(b, flow, q->config->flow_mode);
}
if (q->buffer_used > q->buffer_max_used)
@@ -1997,6 +2013,40 @@ static struct sk_buff *cake_dequeue(struct Qdisc *sch)
u64 delay;
u32 len;
+ if (q->config->is_shared && now - q->last_checked_active >= q->config->sync_time) {
+ struct net_device *dev = qdisc_dev(sch);
+ struct cake_sched_data *other_priv;
+ u64 new_rate = q->config->rate_bps;
+ u64 other_qlen, other_last_active;
+ struct Qdisc *other_sch;
+ u32 num_active_qs = 1;
+ unsigned int ntx;
+
+ for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
+ other_sch = rcu_dereference(netdev_get_tx_queue(dev, ntx)->qdisc_sleeping);
+ other_priv = qdisc_priv(other_sch);
+
+ if (other_priv == q)
+ continue;
+
+ other_qlen = READ_ONCE(other_sch->q.qlen);
+ other_last_active = READ_ONCE(other_priv->last_active);
+
+ if (other_qlen || other_last_active > q->last_checked_active)
+ num_active_qs++;
+ }
+
+ if (num_active_qs > 1)
+ new_rate = div64_u64(q->config->rate_bps, num_active_qs);
+
+ /* mtu = 0 is used to only update the rate and not mess with cobalt params */
+ cake_set_rate(b, new_rate, 0, 0, 0);
+ q->last_checked_active = now;
+ q->active_queues = num_active_qs;
+ q->rate_ns = b->tin_rate_ns;
+ q->rate_shft = b->tin_rate_shft;
+ }
+
begin:
if (!sch->q.qlen)
return NULL;
@@ -2104,8 +2154,8 @@ retry:
b->sparse_flow_count--;
b->bulk_flow_count++;
- cake_inc_srchost_bulk_flow_count(b, flow, q->flow_mode);
- cake_inc_dsthost_bulk_flow_count(b, flow, q->flow_mode);
+ cake_inc_srchost_bulk_flow_count(b, flow, q->config->flow_mode);
+ cake_inc_dsthost_bulk_flow_count(b, flow, q->config->flow_mode);
flow->set = CAKE_SET_BULK;
} else {
@@ -2117,7 +2167,7 @@ retry:
}
}
- flow->deficit += cake_get_flow_quantum(b, flow, q->flow_mode);
+ flow->deficit += cake_get_flow_quantum(b, flow, q->config->flow_mode);
list_move_tail(&flow->flowchain, &b->old_flows);
goto retry;
@@ -2141,8 +2191,8 @@ retry:
if (flow->set == CAKE_SET_BULK) {
b->bulk_flow_count--;
- cake_dec_srchost_bulk_flow_count(b, flow, q->flow_mode);
- cake_dec_dsthost_bulk_flow_count(b, flow, q->flow_mode);
+ cake_dec_srchost_bulk_flow_count(b, flow, q->config->flow_mode);
+ cake_dec_dsthost_bulk_flow_count(b, flow, q->config->flow_mode);
b->decaying_flow_count++;
} else if (flow->set == CAKE_SET_SPARSE ||
@@ -2160,8 +2210,8 @@ retry:
else if (flow->set == CAKE_SET_BULK) {
b->bulk_flow_count--;
- cake_dec_srchost_bulk_flow_count(b, flow, q->flow_mode);
- cake_dec_dsthost_bulk_flow_count(b, flow, q->flow_mode);
+ cake_dec_srchost_bulk_flow_count(b, flow, q->config->flow_mode);
+ cake_dec_dsthost_bulk_flow_count(b, flow, q->config->flow_mode);
} else
b->decaying_flow_count--;
@@ -2172,14 +2222,14 @@ retry:
reason = cobalt_should_drop(&flow->cvars, &b->cparams, now, skb,
(b->bulk_flow_count *
- !!(q->rate_flags &
+ !!(q->config->rate_flags &
CAKE_FLAG_INGRESS)));
/* Last packet in queue may be marked, shouldn't be dropped */
if (reason == SKB_NOT_DROPPED_YET || !flow->head)
break;
/* drop this packet, get another one */
- if (q->rate_flags & CAKE_FLAG_INGRESS) {
+ if (q->config->rate_flags & CAKE_FLAG_INGRESS) {
len = cake_advance_shaper(q, b, skb,
now, true);
flow->deficit -= len;
@@ -2190,12 +2240,13 @@ retry:
qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb));
qdisc_qstats_drop(sch);
qdisc_dequeue_drop(sch, skb, reason);
- if (q->rate_flags & CAKE_FLAG_INGRESS)
+ if (q->config->rate_flags & CAKE_FLAG_INGRESS)
goto retry;
}
b->tin_ecn_mark += !!flow->cvars.ecn_marked;
qdisc_bstats_update(sch, skb);
+ WRITE_ONCE(q->last_active, now);
/* collect delay stats */
delay = ktime_to_ns(ktime_sub(now, cobalt_get_enqueue_time(skb)));
@@ -2296,6 +2347,9 @@ static void cake_set_rate(struct cake_tin_data *b, u64 rate, u32 mtu,
b->tin_rate_ns = rate_ns;
b->tin_rate_shft = rate_shft;
+ if (mtu == 0)
+ return;
+
byte_target_ns = (byte_target * rate_ns) >> rate_shft;
b->cparams.target = max((byte_target_ns * 3) / 2, target_ns);
@@ -2312,7 +2366,7 @@ static int cake_config_besteffort(struct Qdisc *sch)
struct cake_sched_data *q = qdisc_priv(sch);
struct cake_tin_data *b = &q->tins[0];
u32 mtu = psched_mtu(qdisc_dev(sch));
- u64 rate = q->rate_bps;
+ u64 rate = q->config->rate_bps;
q->tin_cnt = 1;
@@ -2320,7 +2374,7 @@ static int cake_config_besteffort(struct Qdisc *sch)
q->tin_order = normal_order;
cake_set_rate(b, rate, mtu,
- us_to_ns(q->target), us_to_ns(q->interval));
+ us_to_ns(q->config->target), us_to_ns(q->config->interval));
b->tin_quantum = 65535;
return 0;
@@ -2331,7 +2385,7 @@ static int cake_config_precedence(struct Qdisc *sch)
/* convert high-level (user visible) parameters into internal format */
struct cake_sched_data *q = qdisc_priv(sch);
u32 mtu = psched_mtu(qdisc_dev(sch));
- u64 rate = q->rate_bps;
+ u64 rate = q->config->rate_bps;
u32 quantum = 256;
u32 i;
@@ -2342,8 +2396,8 @@ static int cake_config_precedence(struct Qdisc *sch)
for (i = 0; i < q->tin_cnt; i++) {
struct cake_tin_data *b = &q->tins[i];
- cake_set_rate(b, rate, mtu, us_to_ns(q->target),
- us_to_ns(q->interval));
+ cake_set_rate(b, rate, mtu, us_to_ns(q->config->target),
+ us_to_ns(q->config->interval));
b->tin_quantum = max_t(u16, 1U, quantum);
@@ -2420,7 +2474,7 @@ static int cake_config_diffserv8(struct Qdisc *sch)
struct cake_sched_data *q = qdisc_priv(sch);
u32 mtu = psched_mtu(qdisc_dev(sch));
- u64 rate = q->rate_bps;
+ u64 rate = q->config->rate_bps;
u32 quantum = 256;
u32 i;
@@ -2434,8 +2488,8 @@ static int cake_config_diffserv8(struct Qdisc *sch)
for (i = 0; i < q->tin_cnt; i++) {
struct cake_tin_data *b = &q->tins[i];
- cake_set_rate(b, rate, mtu, us_to_ns(q->target),
- us_to_ns(q->interval));
+ cake_set_rate(b, rate, mtu, us_to_ns(q->config->target),
+ us_to_ns(q->config->interval));
b->tin_quantum = max_t(u16, 1U, quantum);
@@ -2464,7 +2518,7 @@ static int cake_config_diffserv4(struct Qdisc *sch)
struct cake_sched_data *q = qdisc_priv(sch);
u32 mtu = psched_mtu(qdisc_dev(sch));
- u64 rate = q->rate_bps;
+ u64 rate = q->config->rate_bps;
u32 quantum = 1024;
q->tin_cnt = 4;
@@ -2475,13 +2529,13 @@ static int cake_config_diffserv4(struct Qdisc *sch)
/* class characteristics */
cake_set_rate(&q->tins[0], rate, mtu,
- us_to_ns(q->target), us_to_ns(q->interval));
+ us_to_ns(q->config->target), us_to_ns(q->config->interval));
cake_set_rate(&q->tins[1], rate >> 4, mtu,
- us_to_ns(q->target), us_to_ns(q->interval));
+ us_to_ns(q->config->target), us_to_ns(q->config->interval));
cake_set_rate(&q->tins[2], rate >> 1, mtu,
- us_to_ns(q->target), us_to_ns(q->interval));
+ us_to_ns(q->config->target), us_to_ns(q->config->interval));
cake_set_rate(&q->tins[3], rate >> 2, mtu,
- us_to_ns(q->target), us_to_ns(q->interval));
+ us_to_ns(q->config->target), us_to_ns(q->config->interval));
/* bandwidth-sharing weights */
q->tins[0].tin_quantum = quantum;
@@ -2501,7 +2555,7 @@ static int cake_config_diffserv3(struct Qdisc *sch)
*/
struct cake_sched_data *q = qdisc_priv(sch);
u32 mtu = psched_mtu(qdisc_dev(sch));
- u64 rate = q->rate_bps;
+ u64 rate = q->config->rate_bps;
u32 quantum = 1024;
q->tin_cnt = 3;
@@ -2512,11 +2566,11 @@ static int cake_config_diffserv3(struct Qdisc *sch)
/* class characteristics */
cake_set_rate(&q->tins[0], rate, mtu,
- us_to_ns(q->target), us_to_ns(q->interval));
+ us_to_ns(q->config->target), us_to_ns(q->config->interval));
cake_set_rate(&q->tins[1], rate >> 4, mtu,
- us_to_ns(q->target), us_to_ns(q->interval));
+ us_to_ns(q->config->target), us_to_ns(q->config->interval));
cake_set_rate(&q->tins[2], rate >> 2, mtu,
- us_to_ns(q->target), us_to_ns(q->interval));
+ us_to_ns(q->config->target), us_to_ns(q->config->interval));
/* bandwidth-sharing weights */
q->tins[0].tin_quantum = quantum;
@@ -2528,7 +2582,8 @@ static int cake_config_diffserv3(struct Qdisc *sch)
static void cake_reconfigure(struct Qdisc *sch)
{
- struct cake_sched_data *q = qdisc_priv(sch);
+ struct cake_sched_data *qd = qdisc_priv(sch);
+ struct cake_sched_config *q = qd->config;
int c, ft;
switch (q->tin_mode) {
@@ -2554,39 +2609,38 @@ static void cake_reconfigure(struct Qdisc *sch)
break;
}
- for (c = q->tin_cnt; c < CAKE_MAX_TINS; c++) {
+ for (c = qd->tin_cnt; c < CAKE_MAX_TINS; c++) {
cake_clear_tin(sch, c);
- q->tins[c].cparams.mtu_time = q->tins[ft].cparams.mtu_time;
+ qd->tins[c].cparams.mtu_time = qd->tins[ft].cparams.mtu_time;
}
- q->rate_ns = q->tins[ft].tin_rate_ns;
- q->rate_shft = q->tins[ft].tin_rate_shft;
+ qd->rate_ns = qd->tins[ft].tin_rate_ns;
+ qd->rate_shft = qd->tins[ft].tin_rate_shft;
if (q->buffer_config_limit) {
- q->buffer_limit = q->buffer_config_limit;
+ qd->buffer_limit = q->buffer_config_limit;
} else if (q->rate_bps) {
u64 t = q->rate_bps * q->interval;
do_div(t, USEC_PER_SEC / 4);
- q->buffer_limit = max_t(u32, t, 4U << 20);
+ qd->buffer_limit = max_t(u32, t, 4U << 20);
} else {
- q->buffer_limit = ~0;
+ qd->buffer_limit = ~0;
}
sch->flags &= ~TCQ_F_CAN_BYPASS;
- q->buffer_limit = min(q->buffer_limit,
- max(sch->limit * psched_mtu(qdisc_dev(sch)),
- q->buffer_config_limit));
+ qd->buffer_limit = min(qd->buffer_limit,
+ max(sch->limit * psched_mtu(qdisc_dev(sch)),
+ q->buffer_config_limit));
}
-static int cake_change(struct Qdisc *sch, struct nlattr *opt,
- struct netlink_ext_ack *extack)
+static int cake_config_change(struct cake_sched_config *q, struct nlattr *opt,
+ struct netlink_ext_ack *extack, bool *overhead_changed)
{
- struct cake_sched_data *q = qdisc_priv(sch);
struct nlattr *tb[TCA_CAKE_MAX + 1];
- u16 rate_flags;
- u8 flow_mode;
+ u16 rate_flags = q->rate_flags;
+ u8 flow_mode = q->flow_mode;
int err;
err = nla_parse_nested_deprecated(tb, TCA_CAKE_MAX, opt, cake_policy,
@@ -2594,7 +2648,6 @@ static int cake_change(struct Qdisc *sch, struct nlattr *opt,
if (err < 0)
return err;
- flow_mode = q->flow_mode;
if (tb[TCA_CAKE_NAT]) {
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
flow_mode &= ~CAKE_FLOW_NAT_FLAG;
@@ -2607,6 +2660,19 @@ static int cake_change(struct Qdisc *sch, struct nlattr *opt,
#endif
}
+ if (tb[TCA_CAKE_AUTORATE]) {
+ if (!!nla_get_u32(tb[TCA_CAKE_AUTORATE])) {
+ if (q->is_shared) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[TCA_CAKE_AUTORATE],
+ "Can't use autorate-ingress with cake_mq");
+ return -EOPNOTSUPP;
+ }
+ rate_flags |= CAKE_FLAG_AUTORATE_INGRESS;
+ } else {
+ rate_flags &= ~CAKE_FLAG_AUTORATE_INGRESS;
+ }
+ }
+
if (tb[TCA_CAKE_BASE_RATE64])
WRITE_ONCE(q->rate_bps,
nla_get_u64(tb[TCA_CAKE_BASE_RATE64]));
@@ -2615,7 +2681,6 @@ static int cake_change(struct Qdisc *sch, struct nlattr *opt,
WRITE_ONCE(q->tin_mode,
nla_get_u32(tb[TCA_CAKE_DIFFSERV_MODE]));
- rate_flags = q->rate_flags;
if (tb[TCA_CAKE_WASH]) {
if (!!nla_get_u32(tb[TCA_CAKE_WASH]))
rate_flags |= CAKE_FLAG_WASH;
@@ -2636,20 +2701,12 @@ static int cake_change(struct Qdisc *sch, struct nlattr *opt,
WRITE_ONCE(q->rate_overhead,
nla_get_s32(tb[TCA_CAKE_OVERHEAD]));
rate_flags |= CAKE_FLAG_OVERHEAD;
-
- q->max_netlen = 0;
- q->max_adjlen = 0;
- q->min_netlen = ~0;
- q->min_adjlen = ~0;
+ *overhead_changed = true;
}
if (tb[TCA_CAKE_RAW]) {
rate_flags &= ~CAKE_FLAG_OVERHEAD;
-
- q->max_netlen = 0;
- q->max_adjlen = 0;
- q->min_netlen = ~0;
- q->min_adjlen = ~0;
+ *overhead_changed = true;
}
if (tb[TCA_CAKE_MPU])
@@ -2668,13 +2725,6 @@ static int cake_change(struct Qdisc *sch, struct nlattr *opt,
WRITE_ONCE(q->target, max(target, 1U));
}
- if (tb[TCA_CAKE_AUTORATE]) {
- if (!!nla_get_u32(tb[TCA_CAKE_AUTORATE]))
- rate_flags |= CAKE_FLAG_AUTORATE_INGRESS;
- else
- rate_flags &= ~CAKE_FLAG_AUTORATE_INGRESS;
- }
-
if (tb[TCA_CAKE_INGRESS]) {
if (!!nla_get_u32(tb[TCA_CAKE_INGRESS]))
rate_flags |= CAKE_FLAG_INGRESS;
@@ -2705,7 +2755,35 @@ static int cake_change(struct Qdisc *sch, struct nlattr *opt,
WRITE_ONCE(q->rate_flags, rate_flags);
WRITE_ONCE(q->flow_mode, flow_mode);
- if (q->tins) {
+
+ return 0;
+}
+
+static int cake_change(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
+{
+ struct cake_sched_data *qd = qdisc_priv(sch);
+ struct cake_sched_config *q = qd->config;
+ bool overhead_changed = false;
+ int ret;
+
+ if (q->is_shared) {
+ NL_SET_ERR_MSG(extack, "can't reconfigure cake_mq sub-qdiscs");
+ return -EOPNOTSUPP;
+ }
+
+ ret = cake_config_change(q, opt, extack, &overhead_changed);
+ if (ret)
+ return ret;
+
+ if (overhead_changed) {
+ qd->max_netlen = 0;
+ qd->max_adjlen = 0;
+ qd->min_netlen = ~0;
+ qd->min_adjlen = ~0;
+ }
+
+ if (qd->tins) {
sch_tree_lock(sch);
cake_reconfigure(sch);
sch_tree_unlock(sch);
@@ -2723,15 +2801,8 @@ static void cake_destroy(struct Qdisc *sch)
kvfree(q->tins);
}
-static int cake_init(struct Qdisc *sch, struct nlattr *opt,
- struct netlink_ext_ack *extack)
+static void cake_config_init(struct cake_sched_config *q, bool is_shared)
{
- struct cake_sched_data *q = qdisc_priv(sch);
- int i, j, err;
-
- sch->limit = 10240;
- sch->flags |= TCQ_F_DEQUEUE_DROPS;
-
q->tin_mode = CAKE_DIFFSERV_DIFFSERV3;
q->flow_mode = CAKE_FLOW_TRIPLE;
@@ -2742,19 +2813,35 @@ static int cake_init(struct Qdisc *sch, struct nlattr *opt,
* for 5 to 10% of interval
*/
q->rate_flags |= CAKE_FLAG_SPLIT_GSO;
- q->cur_tin = 0;
- q->cur_flow = 0;
+ q->is_shared = is_shared;
+ q->sync_time = 200 * NSEC_PER_USEC;
+}
+
+static int cake_init(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
+{
+ struct cake_sched_data *qd = qdisc_priv(sch);
+ struct cake_sched_config *q = &qd->initial_config;
+ int i, j, err;
+
+ cake_config_init(q, false);
+
+ sch->limit = 10240;
+ sch->flags |= TCQ_F_DEQUEUE_DROPS;
+
+ qd->cur_tin = 0;
+ qd->cur_flow = 0;
+ qd->config = q;
- qdisc_watchdog_init(&q->watchdog, sch);
+ qdisc_watchdog_init(&qd->watchdog, sch);
if (opt) {
err = cake_change(sch, opt, extack);
-
if (err)
return err;
}
- err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
+ err = tcf_block_get(&qd->block, &qd->filter_list, sch, extack);
if (err)
return err;
@@ -2762,13 +2849,13 @@ static int cake_init(struct Qdisc *sch, struct nlattr *opt,
for (i = 1; i <= CAKE_QUEUES; i++)
quantum_div[i] = 65535 / i;
- q->tins = kvcalloc(CAKE_MAX_TINS, sizeof(struct cake_tin_data),
- GFP_KERNEL);
- if (!q->tins)
+ qd->tins = kvcalloc(CAKE_MAX_TINS, sizeof(struct cake_tin_data),
+ GFP_KERNEL);
+ if (!qd->tins)
return -ENOMEM;
for (i = 0; i < CAKE_MAX_TINS; i++) {
- struct cake_tin_data *b = q->tins + i;
+ struct cake_tin_data *b = qd->tins + i;
INIT_LIST_HEAD(&b->new_flows);
INIT_LIST_HEAD(&b->old_flows);
@@ -2784,22 +2871,32 @@ static int cake_init(struct Qdisc *sch, struct nlattr *opt,
INIT_LIST_HEAD(&flow->flowchain);
cobalt_vars_init(&flow->cvars);
- q->overflow_heap[k].t = i;
- q->overflow_heap[k].b = j;
+ qd->overflow_heap[k].t = i;
+ qd->overflow_heap[k].b = j;
b->overflow_idx[j] = k;
}
}
cake_reconfigure(sch);
- q->avg_peak_bandwidth = q->rate_bps;
- q->min_netlen = ~0;
- q->min_adjlen = ~0;
+ qd->avg_peak_bandwidth = q->rate_bps;
+ qd->min_netlen = ~0;
+ qd->min_adjlen = ~0;
+ qd->active_queues = 0;
+ qd->last_checked_active = 0;
+
return 0;
}
-static int cake_dump(struct Qdisc *sch, struct sk_buff *skb)
+static void cake_config_replace(struct Qdisc *sch, struct cake_sched_config *cfg)
+{
+ struct cake_sched_data *qd = qdisc_priv(sch);
+
+ qd->config = cfg;
+ cake_reconfigure(sch);
+}
+
+static int cake_config_dump(struct cake_sched_config *q, struct sk_buff *skb)
{
- struct cake_sched_data *q = qdisc_priv(sch);
struct nlattr *opts;
u16 rate_flags;
u8 flow_mode;
@@ -2875,6 +2972,13 @@ nla_put_failure:
return -1;
}
+static int cake_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+ struct cake_sched_data *qd = qdisc_priv(sch);
+
+ return cake_config_dump(qd->config, skb);
+}
+
static int cake_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
{
struct nlattr *stats = nla_nest_start_noflag(d->skb, TCA_STATS_APP);
@@ -2903,6 +3007,7 @@ static int cake_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
PUT_STAT_U32(MAX_ADJLEN, q->max_adjlen);
PUT_STAT_U32(MIN_NETLEN, q->min_netlen);
PUT_STAT_U32(MIN_ADJLEN, q->min_adjlen);
+ PUT_STAT_U32(ACTIVE_QUEUES, q->active_queues);
#undef PUT_STAT_U32
#undef PUT_STAT_U64
@@ -3136,14 +3241,133 @@ static struct Qdisc_ops cake_qdisc_ops __read_mostly = {
};
MODULE_ALIAS_NET_SCH("cake");
+struct cake_mq_sched {
+ struct mq_sched mq_priv; /* must be first */
+ struct cake_sched_config cake_config;
+};
+
+static void cake_mq_destroy(struct Qdisc *sch)
+{
+ mq_destroy_common(sch);
+}
+
+static int cake_mq_init(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
+{
+ struct cake_mq_sched *priv = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
+ int ret, ntx;
+ bool _unused;
+
+ cake_config_init(&priv->cake_config, true);
+ if (opt) {
+ ret = cake_config_change(&priv->cake_config, opt, extack, &_unused);
+ if (ret)
+ return ret;
+ }
+
+ ret = mq_init_common(sch, opt, extack, &cake_qdisc_ops);
+ if (ret)
+ return ret;
+
+ for (ntx = 0; ntx < dev->num_tx_queues; ntx++)
+ cake_config_replace(priv->mq_priv.qdiscs[ntx], &priv->cake_config);
+
+ return 0;
+}
+
+static int cake_mq_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+ struct cake_mq_sched *priv = qdisc_priv(sch);
+
+ mq_dump_common(sch, skb);
+ return cake_config_dump(&priv->cake_config, skb);
+}
+
+static int cake_mq_change(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
+{
+ struct cake_mq_sched *priv = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
+ bool overhead_changed = false;
+ unsigned int ntx;
+ int ret;
+
+ ret = cake_config_change(&priv->cake_config, opt, extack, &overhead_changed);
+ if (ret)
+ return ret;
+
+ for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
+ struct Qdisc *chld = rtnl_dereference(netdev_get_tx_queue(dev, ntx)->qdisc_sleeping);
+ struct cake_sched_data *qd = qdisc_priv(chld);
+
+ if (overhead_changed) {
+ qd->max_netlen = 0;
+ qd->max_adjlen = 0;
+ qd->min_netlen = ~0;
+ qd->min_adjlen = ~0;
+ }
+
+ if (qd->tins) {
+ sch_tree_lock(chld);
+ cake_reconfigure(chld);
+ sch_tree_unlock(chld);
+ }
+ }
+
+ return 0;
+}
+
+static int cake_mq_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
+ struct Qdisc **old, struct netlink_ext_ack *extack)
+{
+ NL_SET_ERR_MSG(extack, "can't replace cake_mq sub-qdiscs");
+ return -EOPNOTSUPP;
+}
+
+static const struct Qdisc_class_ops cake_mq_class_ops = {
+ .select_queue = mq_select_queue,
+ .graft = cake_mq_graft,
+ .leaf = mq_leaf,
+ .find = mq_find,
+ .walk = mq_walk,
+ .dump = mq_dump_class,
+ .dump_stats = mq_dump_class_stats,
+};
+
+static struct Qdisc_ops cake_mq_qdisc_ops __read_mostly = {
+ .cl_ops = &cake_mq_class_ops,
+ .id = "cake_mq",
+ .priv_size = sizeof(struct cake_mq_sched),
+ .init = cake_mq_init,
+ .destroy = cake_mq_destroy,
+ .attach = mq_attach,
+ .change = cake_mq_change,
+ .change_real_num_tx = mq_change_real_num_tx,
+ .dump = cake_mq_dump,
+ .owner = THIS_MODULE,
+};
+MODULE_ALIAS_NET_SCH("cake_mq");
+
static int __init cake_module_init(void)
{
- return register_qdisc(&cake_qdisc_ops);
+ int ret;
+
+ ret = register_qdisc(&cake_qdisc_ops);
+ if (ret)
+ return ret;
+
+ ret = register_qdisc(&cake_mq_qdisc_ops);
+ if (ret)
+ unregister_qdisc(&cake_qdisc_ops);
+
+ return ret;
}
static void __exit cake_module_exit(void)
{
unregister_qdisc(&cake_qdisc_ops);
+ unregister_qdisc(&cake_mq_qdisc_ops);
}
module_init(cake_module_init)
@@ -3151,3 +3375,4 @@ module_exit(cake_module_exit)
MODULE_AUTHOR("Jonathan Morton");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_DESCRIPTION("The CAKE shaper.");
+MODULE_IMPORT_NS("NET_SCHED_INTERNAL");
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 6e5f2f4f2415..80235e85f844 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -245,8 +245,6 @@ static void fq_flow_set_throttled(struct fq_sched_data *q, struct fq_flow *f)
static struct kmem_cache *fq_flow_cachep __read_mostly;
-/* limit number of collected flows per round */
-#define FQ_GC_MAX 8
#define FQ_GC_AGE (3*HZ)
static bool fq_gc_candidate(const struct fq_flow *f)
@@ -259,10 +257,9 @@ static void fq_gc(struct fq_sched_data *q,
struct rb_root *root,
struct sock *sk)
{
+ struct fq_flow *f, *tofree = NULL;
struct rb_node **p, *parent;
- void *tofree[FQ_GC_MAX];
- struct fq_flow *f;
- int i, fcnt = 0;
+ int fcnt;
p = &root->rb_node;
parent = NULL;
@@ -274,9 +271,8 @@ static void fq_gc(struct fq_sched_data *q,
break;
if (fq_gc_candidate(f)) {
- tofree[fcnt++] = f;
- if (fcnt == FQ_GC_MAX)
- break;
+ f->next = tofree;
+ tofree = f;
}
if (f->sk > sk)
@@ -285,18 +281,20 @@ static void fq_gc(struct fq_sched_data *q,
p = &parent->rb_left;
}
- if (!fcnt)
+ if (!tofree)
return;
- for (i = fcnt; i > 0; ) {
- f = tofree[--i];
+ fcnt = 0;
+ while (tofree) {
+ f = tofree;
+ tofree = f->next;
rb_erase(&f->fq_node, root);
+ kmem_cache_free(fq_flow_cachep, f);
+ fcnt++;
}
q->flows -= fcnt;
q->inactive_flows -= fcnt;
q->stat_gc_flows += fcnt;
-
- kmem_cache_free_bulk(fq_flow_cachep, fcnt, tofree);
}
/* Fast path can be used if :
@@ -665,7 +663,7 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch)
return NULL;
skb = fq_peek(&q->internal);
- if (unlikely(skb)) {
+ if (skb) {
q->internal.qlen--;
fq_dequeue_skb(sch, &q->internal, skb);
goto out;
@@ -716,7 +714,7 @@ begin:
}
prefetch(&skb->end);
fq_dequeue_skb(sch, f, skb);
- if ((s64)(now - time_next_packet - q->ce_threshold) > 0) {
+ if (unlikely((s64)(now - time_next_packet - q->ce_threshold) > 0)) {
INET_ECN_set_ce(skb);
q->stat_ce_mark++;
}
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 852e603c1755..98ffe64de51f 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -955,9 +955,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
__skb_queue_head_init(&sch->gso_skb);
__skb_queue_head_init(&sch->skb_bad_txq);
gnet_stats_basic_sync_init(&sch->bstats);
- lockdep_register_key(&sch->root_lock_key);
- spin_lock_init(&sch->q.lock);
- lockdep_set_class(&sch->q.lock, &sch->root_lock_key);
+ qdisc_lock_init(sch, ops);
if (ops->static_flags & TCQ_F_CPUSTATS) {
sch->cpu_bstats =
@@ -987,7 +985,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
return sch;
errout1:
- lockdep_unregister_key(&sch->root_lock_key);
+ qdisc_lock_uninit(sch, ops);
kfree(sch);
errout:
return ERR_PTR(err);
@@ -1076,7 +1074,7 @@ static void __qdisc_destroy(struct Qdisc *qdisc)
if (ops->destroy)
ops->destroy(qdisc);
- lockdep_unregister_key(&qdisc->root_lock_key);
+ qdisc_lock_uninit(qdisc, ops);
bpf_module_put(ops, ops->owner);
netdev_put(dev, &qdisc->dev_tracker);
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index c860119a8f09..bb94cd577943 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -15,11 +15,7 @@
#include <net/netlink.h>
#include <net/pkt_cls.h>
#include <net/pkt_sched.h>
-#include <net/sch_generic.h>
-
-struct mq_sched {
- struct Qdisc **qdiscs;
-};
+#include <net/sch_priv.h>
static int mq_offload(struct Qdisc *sch, enum tc_mq_command cmd)
{
@@ -49,23 +45,29 @@ static int mq_offload_stats(struct Qdisc *sch)
return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_MQ, &opt);
}
-static void mq_destroy(struct Qdisc *sch)
+void mq_destroy_common(struct Qdisc *sch)
{
struct net_device *dev = qdisc_dev(sch);
struct mq_sched *priv = qdisc_priv(sch);
unsigned int ntx;
- mq_offload(sch, TC_MQ_DESTROY);
-
if (!priv->qdiscs)
return;
for (ntx = 0; ntx < dev->num_tx_queues && priv->qdiscs[ntx]; ntx++)
qdisc_put(priv->qdiscs[ntx]);
kfree(priv->qdiscs);
}
+EXPORT_SYMBOL_NS_GPL(mq_destroy_common, "NET_SCHED_INTERNAL");
-static int mq_init(struct Qdisc *sch, struct nlattr *opt,
- struct netlink_ext_ack *extack)
+static void mq_destroy(struct Qdisc *sch)
+{
+ mq_offload(sch, TC_MQ_DESTROY);
+ mq_destroy_common(sch);
+}
+
+int mq_init_common(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack,
+ const struct Qdisc_ops *qdisc_ops)
{
struct net_device *dev = qdisc_dev(sch);
struct mq_sched *priv = qdisc_priv(sch);
@@ -87,7 +89,8 @@ static int mq_init(struct Qdisc *sch, struct nlattr *opt,
for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
dev_queue = netdev_get_tx_queue(dev, ntx);
- qdisc = qdisc_create_dflt(dev_queue, get_default_qdisc_ops(dev, ntx),
+ qdisc = qdisc_create_dflt(dev_queue,
+ qdisc_ops ?: get_default_qdisc_ops(dev, ntx),
TC_H_MAKE(TC_H_MAJ(sch->handle),
TC_H_MIN(ntx + 1)),
extack);
@@ -98,12 +101,24 @@ static int mq_init(struct Qdisc *sch, struct nlattr *opt,
}
sch->flags |= TCQ_F_MQROOT;
+ return 0;
+}
+EXPORT_SYMBOL_NS_GPL(mq_init_common, "NET_SCHED_INTERNAL");
+
+static int mq_init(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
+{
+ int ret;
+
+ ret = mq_init_common(sch, opt, extack, NULL);
+ if (ret)
+ return ret;
mq_offload(sch, TC_MQ_CREATE);
return 0;
}
-static void mq_attach(struct Qdisc *sch)
+void mq_attach(struct Qdisc *sch)
{
struct net_device *dev = qdisc_dev(sch);
struct mq_sched *priv = qdisc_priv(sch);
@@ -124,8 +139,9 @@ static void mq_attach(struct Qdisc *sch)
kfree(priv->qdiscs);
priv->qdiscs = NULL;
}
+EXPORT_SYMBOL_NS_GPL(mq_attach, "NET_SCHED_INTERNAL");
-static int mq_dump(struct Qdisc *sch, struct sk_buff *skb)
+void mq_dump_common(struct Qdisc *sch, struct sk_buff *skb)
{
struct net_device *dev = qdisc_dev(sch);
struct Qdisc *qdisc;
@@ -152,7 +168,12 @@ static int mq_dump(struct Qdisc *sch, struct sk_buff *skb)
spin_unlock_bh(qdisc_lock(qdisc));
}
+}
+EXPORT_SYMBOL_NS_GPL(mq_dump_common, "NET_SCHED_INTERNAL");
+static int mq_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+ mq_dump_common(sch, skb);
return mq_offload_stats(sch);
}
@@ -166,11 +187,12 @@ static struct netdev_queue *mq_queue_get(struct Qdisc *sch, unsigned long cl)
return netdev_get_tx_queue(dev, ntx);
}
-static struct netdev_queue *mq_select_queue(struct Qdisc *sch,
- struct tcmsg *tcm)
+struct netdev_queue *mq_select_queue(struct Qdisc *sch,
+ struct tcmsg *tcm)
{
return mq_queue_get(sch, TC_H_MIN(tcm->tcm_parent));
}
+EXPORT_SYMBOL_NS_GPL(mq_select_queue, "NET_SCHED_INTERNAL");
static int mq_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
struct Qdisc **old, struct netlink_ext_ack *extack)
@@ -198,14 +220,15 @@ static int mq_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
return 0;
}
-static struct Qdisc *mq_leaf(struct Qdisc *sch, unsigned long cl)
+struct Qdisc *mq_leaf(struct Qdisc *sch, unsigned long cl)
{
struct netdev_queue *dev_queue = mq_queue_get(sch, cl);
return rtnl_dereference(dev_queue->qdisc_sleeping);
}
+EXPORT_SYMBOL_NS_GPL(mq_leaf, "NET_SCHED_INTERNAL");
-static unsigned long mq_find(struct Qdisc *sch, u32 classid)
+unsigned long mq_find(struct Qdisc *sch, u32 classid)
{
unsigned int ntx = TC_H_MIN(classid);
@@ -213,9 +236,10 @@ static unsigned long mq_find(struct Qdisc *sch, u32 classid)
return 0;
return ntx;
}
+EXPORT_SYMBOL_NS_GPL(mq_find, "NET_SCHED_INTERNAL");
-static int mq_dump_class(struct Qdisc *sch, unsigned long cl,
- struct sk_buff *skb, struct tcmsg *tcm)
+int mq_dump_class(struct Qdisc *sch, unsigned long cl,
+ struct sk_buff *skb, struct tcmsg *tcm)
{
struct netdev_queue *dev_queue = mq_queue_get(sch, cl);
@@ -224,9 +248,10 @@ static int mq_dump_class(struct Qdisc *sch, unsigned long cl,
tcm->tcm_info = rtnl_dereference(dev_queue->qdisc_sleeping)->handle;
return 0;
}
+EXPORT_SYMBOL_NS_GPL(mq_dump_class, "NET_SCHED_INTERNAL");
-static int mq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
- struct gnet_dump *d)
+int mq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
+ struct gnet_dump *d)
{
struct netdev_queue *dev_queue = mq_queue_get(sch, cl);
@@ -236,8 +261,9 @@ static int mq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
return -1;
return 0;
}
+EXPORT_SYMBOL_NS_GPL(mq_dump_class_stats, "NET_SCHED_INTERNAL");
-static void mq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
+void mq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
{
struct net_device *dev = qdisc_dev(sch);
unsigned int ntx;
@@ -251,6 +277,7 @@ static void mq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
break;
}
}
+EXPORT_SYMBOL_NS_GPL(mq_walk, "NET_SCHED_INTERNAL");
static const struct Qdisc_class_ops mq_class_ops = {
.select_queue = mq_select_queue,