summaryrefslogtreecommitdiff
path: root/include/net/sock.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/net/sock.h')
-rw-r--r--include/net/sock.h99
1 files changed, 63 insertions, 36 deletions
diff --git a/include/net/sock.h b/include/net/sock.h
index 60bcb13f045c..02253c6a578b 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -118,6 +118,7 @@ typedef __u64 __bitwise __addrpair;
* @skc_reuseport: %SO_REUSEPORT setting
* @skc_ipv6only: socket is IPV6 only
* @skc_net_refcnt: socket is using net ref counting
+ * @skc_bypass_prot_mem: bypass the per-protocol memory accounting for skb
* @skc_bound_dev_if: bound device index if != 0
* @skc_bind_node: bind hash linkage for various protocol lookup tables
* @skc_portaddr_node: second hash linkage for UDP/UDP-Lite protocol
@@ -174,6 +175,7 @@ struct sock_common {
unsigned char skc_reuseport:1;
unsigned char skc_ipv6only:1;
unsigned char skc_net_refcnt:1;
+ unsigned char skc_bypass_prot_mem:1;
int skc_bound_dev_if;
union {
struct hlist_node skc_bind_node;
@@ -303,6 +305,8 @@ struct sk_filter;
* @sk_txrehash: enable TX hash rethink
* @sk_filter: socket filtering instructions
* @sk_timer: sock cleanup timer
+ * @tcp_retransmit_timer: tcp retransmit timer
+ * @mptcp_retransmit_timer: mptcp retransmit timer
* @sk_stamp: time stamp of last packet received
* @sk_stamp_seq: lock for accessing sk_stamp on 32 bit architectures only
* @sk_tsflags: SO_TIMESTAMPING flags
@@ -313,6 +317,7 @@ struct sk_filter;
* @sk_bind_phc: SO_TIMESTAMPING bind PHC index of PTP virtual clock
* for timestamping
* @sk_tskey: counter to disambiguate concurrent tstamp requests
+ * @sk_tx_queue_mapping_jiffies: time in jiffies of last @sk_tx_queue_mapping refresh.
* @sk_zckey: counter to order MSG_ZEROCOPY notifications
* @sk_socket: Identd and reporting IO signals
* @sk_user_data: RPC layer private data. Write-protected by @sk_callback_lock.
@@ -380,6 +385,7 @@ struct sock {
#define sk_reuseport __sk_common.skc_reuseport
#define sk_ipv6only __sk_common.skc_ipv6only
#define sk_net_refcnt __sk_common.skc_net_refcnt
+#define sk_bypass_prot_mem __sk_common.skc_bypass_prot_mem
#define sk_bound_dev_if __sk_common.skc_bound_dev_if
#define sk_bind_node __sk_common.skc_bind_node
#define sk_prot __sk_common.skc_prot
@@ -477,17 +483,21 @@ struct sock {
struct rb_root tcp_rtx_queue;
};
struct sk_buff_head sk_write_queue;
- u32 sk_dst_pending_confirm;
- u32 sk_pacing_status; /* see enum sk_pacing */
struct page_frag sk_frag;
- struct timer_list sk_timer;
-
+ union {
+ struct timer_list sk_timer;
+ struct timer_list tcp_retransmit_timer;
+ struct timer_list mptcp_retransmit_timer;
+ };
unsigned long sk_pacing_rate; /* bytes per second */
atomic_t sk_zckey;
atomic_t sk_tskey;
+ unsigned long sk_tx_queue_mapping_jiffies;
__cacheline_group_end(sock_write_tx);
__cacheline_group_begin(sock_read_tx);
+ u32 sk_dst_pending_confirm;
+ u32 sk_pacing_status; /* see enum sk_pacing */
unsigned long sk_max_pacing_rate;
long sk_sndtimeo;
u32 sk_priority;
@@ -828,11 +838,9 @@ static inline bool sk_del_node_init(struct sock *sk)
{
bool rc = __sk_del_node_init(sk);
- if (rc) {
- /* paranoid for a while -acme */
- WARN_ON(refcount_read(&sk->sk_refcnt) == 1);
+ if (rc)
__sock_put(sk);
- }
+
return rc;
}
#define sk_del_node_init_rcu(sk) sk_del_node_init(sk)
@@ -850,14 +858,25 @@ static inline bool sk_nulls_del_node_init_rcu(struct sock *sk)
{
bool rc = __sk_nulls_del_node_init_rcu(sk);
- if (rc) {
- /* paranoid for a while -acme */
- WARN_ON(refcount_read(&sk->sk_refcnt) == 1);
+ if (rc)
__sock_put(sk);
- }
+
return rc;
}
+static inline bool sk_nulls_replace_node_init_rcu(struct sock *old,
+ struct sock *new)
+{
+ if (sk_hashed(old)) {
+ hlist_nulls_replace_init_rcu(&old->sk_nulls_node,
+ &new->sk_nulls_node);
+ __sock_put(old);
+ return true;
+ }
+
+ return false;
+}
+
static inline void __sk_add_node(struct sock *sk, struct hlist_head *list)
{
hlist_add_head(&sk->sk_node, list);
@@ -1260,10 +1279,10 @@ struct proto {
void (*close)(struct sock *sk,
long timeout);
int (*pre_connect)(struct sock *sk,
- struct sockaddr *uaddr,
+ struct sockaddr_unsized *uaddr,
int addr_len);
int (*connect)(struct sock *sk,
- struct sockaddr *uaddr,
+ struct sockaddr_unsized *uaddr,
int addr_len);
int (*disconnect)(struct sock *sk, int flags);
@@ -1292,9 +1311,9 @@ struct proto {
size_t len, int flags, int *addr_len);
void (*splice_eof)(struct socket *sock);
int (*bind)(struct sock *sk,
- struct sockaddr *addr, int addr_len);
+ struct sockaddr_unsized *addr, int addr_len);
int (*bind_add)(struct sock *sk,
- struct sockaddr *addr, int addr_len);
+ struct sockaddr_unsized *addr, int addr_len);
int (*backlog_rcv) (struct sock *sk,
struct sk_buff *skb);
@@ -1617,6 +1636,8 @@ static inline void sk_mem_uncharge(struct sock *sk, int size)
sk_mem_reclaim(sk);
}
+void __sk_charge(struct sock *sk, gfp_t gfp);
+
#if IS_ENABLED(CONFIG_PROVE_LOCKING) && IS_ENABLED(CONFIG_MODULES)
static inline void sk_owner_set(struct sock *sk, struct module *owner)
{
@@ -1808,7 +1829,12 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
void sk_free(struct sock *sk);
void sk_net_refcnt_upgrade(struct sock *sk);
void sk_destruct(struct sock *sk);
-struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority);
+struct sock *sk_clone(const struct sock *sk, const gfp_t priority, bool lock);
+
+static inline struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
+{
+ return sk_clone(sk, priority, true);
+}
struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
gfp_t priority);
@@ -1901,8 +1927,8 @@ int sock_cmsg_send(struct sock *sk, struct msghdr *msg,
* Functions to fill in entries in struct proto_ops when a protocol
* does not implement a particular function.
*/
-int sock_no_bind(struct socket *, struct sockaddr *, int);
-int sock_no_connect(struct socket *, struct sockaddr *, int, int);
+int sock_no_bind(struct socket *sock, struct sockaddr_unsized *saddr, int len);
+int sock_no_connect(struct socket *sock, struct sockaddr_unsized *saddr, int len, int flags);
int sock_no_socketpair(struct socket *, struct socket *);
int sock_no_accept(struct socket *, struct socket *, struct proto_accept_arg *);
int sock_no_getname(struct socket *, struct sockaddr *, int);
@@ -1992,7 +2018,15 @@ static inline void sk_tx_queue_set(struct sock *sk, int tx_queue)
/* Paired with READ_ONCE() in sk_tx_queue_get() and
* other WRITE_ONCE() because socket lock might be not held.
*/
- WRITE_ONCE(sk->sk_tx_queue_mapping, tx_queue);
+ if (READ_ONCE(sk->sk_tx_queue_mapping) != tx_queue) {
+ WRITE_ONCE(sk->sk_tx_queue_mapping, tx_queue);
+ WRITE_ONCE(sk->sk_tx_queue_mapping_jiffies, jiffies);
+ return;
+ }
+
+ /* Refresh sk_tx_queue_mapping_jiffies if too old. */
+ if (time_is_before_jiffies(READ_ONCE(sk->sk_tx_queue_mapping_jiffies) + HZ))
+ WRITE_ONCE(sk->sk_tx_queue_mapping_jiffies, jiffies);
}
#define NO_QUEUE_MAPPING USHRT_MAX
@@ -2005,19 +2039,7 @@ static inline void sk_tx_queue_clear(struct sock *sk)
WRITE_ONCE(sk->sk_tx_queue_mapping, NO_QUEUE_MAPPING);
}
-static inline int sk_tx_queue_get(const struct sock *sk)
-{
- if (sk) {
- /* Paired with WRITE_ONCE() in sk_tx_queue_clear()
- * and sk_tx_queue_set().
- */
- int val = READ_ONCE(sk->sk_tx_queue_mapping);
-
- if (val != NO_QUEUE_MAPPING)
- return val;
- }
- return -1;
-}
+int sk_tx_queue_get(const struct sock *sk);
static inline void __sk_rx_queue_set(struct sock *sk,
const struct sk_buff *skb,
@@ -2303,6 +2325,7 @@ static inline int skb_copy_to_page_nocache(struct sock *sk, struct iov_iter *fro
return 0;
}
+#define SK_WMEM_ALLOC_BIAS 1
/**
* sk_wmem_alloc_get - returns write allocations
* @sk: socket
@@ -2311,7 +2334,7 @@ static inline int skb_copy_to_page_nocache(struct sock *sk, struct iov_iter *fro
*/
static inline int sk_wmem_alloc_get(const struct sock *sk)
{
- return refcount_read(&sk->sk_wmem_alloc) - 1;
+ return refcount_read(&sk->sk_wmem_alloc) - SK_WMEM_ALLOC_BIAS;
}
/**
@@ -2596,12 +2619,16 @@ static inline struct page_frag *sk_page_frag(struct sock *sk)
bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag);
+static inline bool __sock_writeable(const struct sock *sk, int wmem_alloc)
+{
+ return wmem_alloc < (READ_ONCE(sk->sk_sndbuf) >> 1);
+}
/*
* Default write policy as shown to user space via poll/select/SIGIO
*/
static inline bool sock_writeable(const struct sock *sk)
{
- return refcount_read(&sk->sk_wmem_alloc) < (READ_ONCE(sk->sk_sndbuf) >> 1);
+ return __sock_writeable(sk, refcount_read(&sk->sk_wmem_alloc));
}
static inline gfp_t gfp_any(void)
@@ -3085,7 +3112,7 @@ void sock_set_reuseaddr(struct sock *sk);
void sock_set_reuseport(struct sock *sk);
void sock_set_sndtimeo(struct sock *sk, s64 secs);
-int sock_bind_add(struct sock *sk, struct sockaddr *addr, int addr_len);
+int sock_bind_add(struct sock *sk, struct sockaddr_unsized *addr, int addr_len);
int sock_get_timeout(long timeo, void *optval, bool old_timeval);
int sock_copy_user_timeval(struct __kernel_sock_timeval *tv,