From 25a91d8d91911f84a03a039339b29537e7f1970d Mon Sep 17 00:00:00 2001 From: Fan Du Date: Sat, 18 Jan 2014 09:54:23 +0800 Subject: skbuff: Introduce skb_to_sgvec_nomark to map skb without mark new end As compared with skb_to_sgvec, skb_to_sgvec_nomark only map skb to given sglist without mark the sg which contain last skb data as the end. So the caller can mannipulate sg list as will when padding new data after the first call without calling sg_unmark_end to expend sg list. Signed-off-by: Fan Du Signed-off-by: Steffen Klassert --- include/linux/skbuff.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/skbuff.h') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f589c9af8cbf..c574bf3bd6f6 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -691,6 +691,8 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom); struct sk_buff *skb_copy_expand(const struct sk_buff *skb, int newheadroom, int newtailroom, gfp_t priority); +int skb_to_sgvec_nomark(struct sk_buff *skb, struct scatterlist *sg, + int offset, int len); int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len); int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer); -- cgit v1.2.3 From 363ec392352e55c61ce2799c3f15f89f9429bba7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 26 Feb 2014 14:02:11 -0800 Subject: net: add skb_mstamp infrastructure ktime_get() is too expensive on some cases, and we'd like to get usec resolution timestamps in TCP stack. This patch adds a light weight facility using a combination of local_clock() and jiffies samples. Instead of : u64 t0, t1; t0 = ktime_get(); // stuff t1 = ktime_get(); delta_us = ktime_us_delta(t1, t0); use : struct skb_mstamp t0, t1; skb_mstamp_get(&t0); // stuff skb_mstamp_get(&t1); delta_us = skb_mstamp_us_delta(&t1, &t0); Note : local_clock() might have a (bounded) drift between cpus. Do not use this infra in place of ktime_get() without understanding the issues. Signed-off-by: Eric Dumazet Cc: Stephen Hemminger Cc: Yuchung Cheng Cc: Neal Cardwell Cc: Larry Brakmo Cc: Julian Anastasov Signed-off-by: David S. Miller --- include/linux/skbuff.h | 59 ++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 57 insertions(+), 2 deletions(-) (limited to 'include/linux/skbuff.h') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 11b6925f0e96..6d8ed22accb4 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -32,6 +32,7 @@ #include #include #include +#include #include /* A. Checksumming of received packets by device. @@ -356,11 +357,62 @@ typedef unsigned int sk_buff_data_t; typedef unsigned char *sk_buff_data_t; #endif +/** + * struct skb_mstamp - multi resolution time stamps + * @stamp_us: timestamp in us resolution + * @stamp_jiffies: timestamp in jiffies + */ +struct skb_mstamp { + union { + u64 v64; + struct { + u32 stamp_us; + u32 stamp_jiffies; + }; + }; +}; + +/** + * skb_mstamp_get - get current timestamp + * @cl: place to store timestamps + */ +static inline void skb_mstamp_get(struct skb_mstamp *cl) +{ + u64 val = local_clock(); + + do_div(val, NSEC_PER_USEC); + cl->stamp_us = (u32)val; + cl->stamp_jiffies = (u32)jiffies; +} + +/** + * skb_mstamp_delta - compute the difference in usec between two skb_mstamp + * @t1: pointer to newest sample + * @t0: pointer to oldest sample + */ +static inline u32 skb_mstamp_us_delta(const struct skb_mstamp *t1, + const struct skb_mstamp *t0) +{ + s32 delta_us = t1->stamp_us - t0->stamp_us; + u32 delta_jiffies = t1->stamp_jiffies - t0->stamp_jiffies; + + /* If delta_us is negative, this might be because interval is too big, + * or local_clock() drift is too big : fallback using jiffies. + */ + if (delta_us <= 0 || + delta_jiffies >= (INT_MAX / (USEC_PER_SEC / HZ))) + + delta_us = jiffies_to_usecs(delta_jiffies); + + return delta_us; +} + + /** * struct sk_buff - socket buffer * @next: Next buffer in list * @prev: Previous buffer in list - * @tstamp: Time we arrived + * @tstamp: Time we arrived/left * @sk: Socket we are owned by * @dev: Device we arrived on/are leaving by * @cb: Control buffer. Free for use by every layer. Put private vars here @@ -429,7 +481,10 @@ struct sk_buff { struct sk_buff *next; struct sk_buff *prev; - ktime_t tstamp; + union { + ktime_t tstamp; + struct skb_mstamp skb_mstamp; + }; struct sock *sk; struct net_device *dev; -- cgit v1.2.3 From 61b905da33ae25edb6b9d2a5de21e34c3a77efe3 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 24 Mar 2014 15:34:47 -0700 Subject: net: Rename skb->rxhash to skb->hash The packet hash can be considered a property of the packet, not just on RX path. This patch changes name of rxhash and l4_rxhash skbuff fields to be hash and l4_hash respectively. This includes changing uses of the field in the code which don't call the access functions. Signed-off-by: Tom Herbert Signed-off-by: Eric Dumazet Cc: Mahesh Bandewar Signed-off-by: David S. Miller --- arch/arm/net/bpf_jit_32.c | 4 ++-- arch/powerpc/net/bpf_jit_comp.c | 4 ++-- arch/s390/net/bpf_jit_comp.c | 8 ++++---- arch/sparc/net/bpf_jit_comp.c | 2 +- arch/x86/net/bpf_jit_comp.c | 8 ++++---- include/linux/skbuff.h | 28 ++++++++++++++-------------- include/net/sock.h | 4 ++-- include/trace/events/net.h | 12 ++++++------ net/core/dev.c | 13 +++++++------ net/core/filter.c | 2 +- net/core/flow_dissector.c | 10 +++++----- net/packet/af_packet.c | 3 +-- 12 files changed, 49 insertions(+), 49 deletions(-) (limited to 'include/linux/skbuff.h') diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 271b5e971568..7ddb9c83cdfc 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -825,8 +825,8 @@ b_epilogue: break; case BPF_S_ANC_RXHASH: ctx->seen |= SEEN_SKB; - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, rxhash) != 4); - off = offsetof(struct sk_buff, rxhash); + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4); + off = offsetof(struct sk_buff, hash); emit(ARM_LDR_I(r_A, r_skb, off), ctx); break; case BPF_S_ANC_VLAN_TAG: diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 555034f8505e..4afad6c17d50 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -390,9 +390,9 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, mark)); break; case BPF_S_ANC_RXHASH: - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, rxhash) != 4); + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4); PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, - rxhash)); + hash)); break; case BPF_S_ANC_VLAN_TAG: case BPF_S_ANC_VLAN_TAG_PRESENT: diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 708d60e40066..153f8f2cfd56 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -737,10 +737,10 @@ call_fn: /* lg %r1,(%r13) */ /* icm %r5,3,(%r1) */ EMIT4_DISP(0xbf531000, offsetof(struct net_device, type)); break; - case BPF_S_ANC_RXHASH: /* A = skb->rxhash */ - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, rxhash) != 4); - /* l %r5,(%r2) */ - EMIT4_DISP(0x58502000, offsetof(struct sk_buff, rxhash)); + case BPF_S_ANC_RXHASH: /* A = skb->hash */ + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4); + /* l %r5,(%r2) */ + EMIT4_DISP(0x58502000, offsetof(struct sk_buff, hash)); break; case BPF_S_ANC_VLAN_TAG: case BPF_S_ANC_VLAN_TAG_PRESENT: diff --git a/arch/sparc/net/bpf_jit_comp.c b/arch/sparc/net/bpf_jit_comp.c index 01fe9946d388..d96d2a7c78ee 100644 --- a/arch/sparc/net/bpf_jit_comp.c +++ b/arch/sparc/net/bpf_jit_comp.c @@ -618,7 +618,7 @@ void bpf_jit_compile(struct sk_filter *fp) emit_load16(r_A, struct net_device, type, r_A); break; case BPF_S_ANC_RXHASH: - emit_skb_load32(rxhash, r_A); + emit_skb_load32(hash, r_A); break; case BPF_S_ANC_VLAN_TAG: case BPF_S_ANC_VLAN_TAG_PRESENT: diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 4ed75dd81d05..293c57b74edc 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -553,13 +553,13 @@ void bpf_jit_compile(struct sk_filter *fp) } break; case BPF_S_ANC_RXHASH: - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, rxhash) != 4); - if (is_imm8(offsetof(struct sk_buff, rxhash))) { + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4); + if (is_imm8(offsetof(struct sk_buff, hash))) { /* mov off8(%rdi),%eax */ - EMIT3(0x8b, 0x47, offsetof(struct sk_buff, rxhash)); + EMIT3(0x8b, 0x47, offsetof(struct sk_buff, hash)); } else { EMIT2(0x8b, 0x87); - EMIT(offsetof(struct sk_buff, rxhash), 4); + EMIT(offsetof(struct sk_buff, hash), 4); } break; case BPF_S_ANC_QUEUE: diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 03db95ab8a8c..aa2c22cb8158 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -444,11 +444,11 @@ static inline u32 skb_mstamp_us_delta(const struct skb_mstamp *t1, * @skb_iif: ifindex of device we arrived on * @tc_index: Traffic control index * @tc_verd: traffic control verdict - * @rxhash: the packet hash computed on receive + * @hash: the packet hash * @queue_mapping: Queue mapping for multiqueue devices * @ndisc_nodetype: router type (from link layer) * @ooo_okay: allow the mapping of a socket to a queue to be changed - * @l4_rxhash: indicate rxhash is a canonical 4-tuple hash over transport + * @l4_hash: indicate hash is a canonical 4-tuple hash over transport * ports. * @wifi_acked_valid: wifi_acked was set * @wifi_acked: whether frame was acked on wifi or not @@ -537,7 +537,7 @@ struct sk_buff { int skb_iif; - __u32 rxhash; + __u32 hash; __be16 vlan_proto; __u16 vlan_tci; @@ -556,7 +556,7 @@ struct sk_buff { #endif __u8 pfmemalloc:1; __u8 ooo_okay:1; - __u8 l4_rxhash:1; + __u8 l4_hash:1; __u8 wifi_acked_valid:1; __u8 wifi_acked:1; __u8 no_fcs:1; @@ -815,40 +815,40 @@ enum pkt_hash_types { static inline void skb_set_hash(struct sk_buff *skb, __u32 hash, enum pkt_hash_types type) { - skb->l4_rxhash = (type == PKT_HASH_TYPE_L4); - skb->rxhash = hash; + skb->l4_hash = (type == PKT_HASH_TYPE_L4); + skb->hash = hash; } void __skb_get_hash(struct sk_buff *skb); static inline __u32 skb_get_hash(struct sk_buff *skb) { - if (!skb->l4_rxhash) + if (!skb->l4_hash) __skb_get_hash(skb); - return skb->rxhash; + return skb->hash; } static inline __u32 skb_get_hash_raw(const struct sk_buff *skb) { - return skb->rxhash; + return skb->hash; } static inline void skb_clear_hash(struct sk_buff *skb) { - skb->rxhash = 0; - skb->l4_rxhash = 0; + skb->hash = 0; + skb->l4_hash = 0; } static inline void skb_clear_hash_if_not_l4(struct sk_buff *skb) { - if (!skb->l4_rxhash) + if (!skb->l4_hash) skb_clear_hash(skb); } static inline void skb_copy_hash(struct sk_buff *to, const struct sk_buff *from) { - to->rxhash = from->rxhash; - to->l4_rxhash = from->l4_rxhash; + to->hash = from->hash; + to->l4_hash = from->l4_hash; }; #ifdef NET_SKBUFF_DATA_USES_OFFSET diff --git a/include/net/sock.h b/include/net/sock.h index 625e65b12366..8d7c431a0660 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -862,9 +862,9 @@ static inline void sock_rps_save_rxhash(struct sock *sk, const struct sk_buff *skb) { #ifdef CONFIG_RPS - if (unlikely(sk->sk_rxhash != skb->rxhash)) { + if (unlikely(sk->sk_rxhash != skb->hash)) { sock_rps_reset_flow(sk); - sk->sk_rxhash = skb->rxhash; + sk->sk_rxhash = skb->hash; } #endif } diff --git a/include/trace/events/net.h b/include/trace/events/net.h index a34f27b2e394..1de256b35807 100644 --- a/include/trace/events/net.h +++ b/include/trace/events/net.h @@ -153,8 +153,8 @@ DECLARE_EVENT_CLASS(net_dev_rx_verbose_template, __field( u16, vlan_tci ) __field( u16, protocol ) __field( u8, ip_summed ) - __field( u32, rxhash ) - __field( bool, l4_rxhash ) + __field( u32, hash ) + __field( bool, l4_hash ) __field( unsigned int, len ) __field( unsigned int, data_len ) __field( unsigned int, truesize ) @@ -179,8 +179,8 @@ DECLARE_EVENT_CLASS(net_dev_rx_verbose_template, __entry->vlan_tci = vlan_tx_tag_get(skb); __entry->protocol = ntohs(skb->protocol); __entry->ip_summed = skb->ip_summed; - __entry->rxhash = skb->rxhash; - __entry->l4_rxhash = skb->l4_rxhash; + __entry->hash = skb->hash; + __entry->l4_hash = skb->l4_hash; __entry->len = skb->len; __entry->data_len = skb->data_len; __entry->truesize = skb->truesize; @@ -191,11 +191,11 @@ DECLARE_EVENT_CLASS(net_dev_rx_verbose_template, __entry->gso_type = skb_shinfo(skb)->gso_type; ), - TP_printk("dev=%s napi_id=%#x queue_mapping=%u skbaddr=%p vlan_tagged=%d vlan_proto=0x%04x vlan_tci=0x%04x protocol=0x%04x ip_summed=%d rxhash=0x%08x l4_rxhash=%d len=%u data_len=%u truesize=%u mac_header_valid=%d mac_header=%d nr_frags=%d gso_size=%d gso_type=%#x", + TP_printk("dev=%s napi_id=%#x queue_mapping=%u skbaddr=%p vlan_tagged=%d vlan_proto=0x%04x vlan_tci=0x%04x protocol=0x%04x ip_summed=%d hash=0x%08x l4_hash=%d len=%u data_len=%u truesize=%u mac_header_valid=%d mac_header=%d nr_frags=%d gso_size=%d gso_type=%#x", __get_str(name), __entry->napi_id, __entry->queue_mapping, __entry->skbaddr, __entry->vlan_tagged, __entry->vlan_proto, __entry->vlan_tci, __entry->protocol, __entry->ip_summed, - __entry->rxhash, __entry->l4_rxhash, __entry->len, + __entry->hash, __entry->l4_hash, __entry->len, __entry->data_len, __entry->truesize, __entry->mac_header_valid, __entry->mac_header, __entry->nr_frags, __entry->gso_size, __entry->gso_type) diff --git a/net/core/dev.c b/net/core/dev.c index 55f8e64c03a2..48dd323d5918 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2952,7 +2952,7 @@ set_rps_cpu(struct net_device *dev, struct sk_buff *skb, flow_table = rcu_dereference(rxqueue->rps_flow_table); if (!flow_table) goto out; - flow_id = skb->rxhash & flow_table->mask; + flow_id = skb_get_hash(skb) & flow_table->mask; rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb, rxq_index, flow_id); if (rc < 0) @@ -2986,6 +2986,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, struct rps_sock_flow_table *sock_flow_table; int cpu = -1; u16 tcpu; + u32 hash; if (skb_rx_queue_recorded(skb)) { u16 index = skb_get_rx_queue(skb); @@ -3014,7 +3015,8 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, } skb_reset_network_header(skb); - if (!skb_get_hash(skb)) + hash = skb_get_hash(skb); + if (!hash) goto done; flow_table = rcu_dereference(rxqueue->rps_flow_table); @@ -3023,11 +3025,10 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, u16 next_cpu; struct rps_dev_flow *rflow; - rflow = &flow_table->flows[skb->rxhash & flow_table->mask]; + rflow = &flow_table->flows[hash & flow_table->mask]; tcpu = rflow->cpu; - next_cpu = sock_flow_table->ents[skb->rxhash & - sock_flow_table->mask]; + next_cpu = sock_flow_table->ents[hash & sock_flow_table->mask]; /* * If the desired CPU (where last recvmsg was done) is @@ -3056,7 +3057,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, } if (map) { - tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32]; + tcpu = map->cpus[((u64) hash * map->len) >> 32]; if (cpu_online(tcpu)) { cpu = tcpu; diff --git a/net/core/filter.c b/net/core/filter.c index ad30d626a5bd..65b75966e206 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -336,7 +336,7 @@ load_b: A = skb->dev->type; continue; case BPF_S_ANC_RXHASH: - A = skb->rxhash; + A = skb->hash; continue; case BPF_S_ANC_CPU: A = raw_smp_processor_id(); diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 80201bf69d59..107ed12a5323 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -203,8 +203,8 @@ static __always_inline u32 __flow_hash_1word(u32 a) /* * __skb_get_hash: calculate a flow hash based on src/dst addresses - * and src/dst port numbers. Sets rxhash in skb to non-zero hash value - * on success, zero indicates no valid hash. Also, sets l4_rxhash in skb + * and src/dst port numbers. Sets hash in skb to non-zero hash value + * on success, zero indicates no valid hash. Also, sets l4_hash in skb * if hash is a canonical 4-tuple hash over transport ports. */ void __skb_get_hash(struct sk_buff *skb) @@ -216,7 +216,7 @@ void __skb_get_hash(struct sk_buff *skb) return; if (keys.ports) - skb->l4_rxhash = 1; + skb->l4_hash = 1; /* get a consistent hash (same value on both flow directions) */ if (((__force u32)keys.dst < (__force u32)keys.src) || @@ -232,7 +232,7 @@ void __skb_get_hash(struct sk_buff *skb) if (!hash) hash = 1; - skb->rxhash = hash; + skb->hash = hash; } EXPORT_SYMBOL(__skb_get_hash); @@ -344,7 +344,7 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb) hash = skb->sk->sk_hash; else hash = (__force u16) skb->protocol ^ - skb->rxhash; + skb->hash; hash = __flow_hash_1word(hash); queue_index = map->queues[ ((u64)hash * map->len) >> 32]; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 292304404fda..097a354ec8cd 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1277,7 +1277,7 @@ static unsigned int fanout_demux_hash(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) { - return reciprocal_scale(skb->rxhash, num); + return reciprocal_scale(skb_get_hash(skb), num); } static unsigned int fanout_demux_lb(struct packet_fanout *f, @@ -1362,7 +1362,6 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev, if (!skb) return 0; } - skb_get_hash(skb); idx = fanout_demux_hash(f, skb, num); break; case PACKET_FANOUT_LB: -- cgit v1.2.3 From 408eccce32044ee3285a7f6a812723ba3540c3e7 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 1 Apr 2014 16:20:23 +0200 Subject: net: ptp: move PTP classifier in its own file This commit fixes a build error reported by Fengguang, that is triggered when CONFIG_NETWORK_PHY_TIMESTAMPING is not set: ERROR: "ptp_classify_raw" [drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.ko] undefined! The fix is to introduce its own file for the PTP BPF classifier, so that PTP_1588_CLOCK and/or NETWORK_PHY_TIMESTAMPING can select it independently from each other. IXP4xx driver on ARM needs to select it as well since it does not seem to select PTP_1588_CLOCK or similar that would pull it in automatically. This also allows for hiding all of the internals of the BPF PTP program inside that file, and only exporting relevant API bits to drivers. This patch also adds a kdoc documentation of ptp_classify_raw() API to make it clear that it can return PTP_CLASS_* defines. Also, the BPF program has been translated into bpf_asm code, so that it can be more easily read and altered (extensively documented in [1]). In the kernel tree under tools/net/ we have bpf_asm and bpf_dbg tools, so the commented program can simply be translated via `./bpf_asm -c prog` where prog is a file that contains the commented code. This makes it easily readable/verifiable and when there's a need to change something, jump offsets etc do not need to be replaced manually which can be very error prone. Instead, a newly translated version via bpf_asm can simply replace the old code. I have checked opcode diffs before/after and it's the very same filter. [1] Documentation/networking/filter.txt Fixes: 164d8c666521 ("net: ptp: do not reimplement PTP/BPF classifier") Reported-by: Fengguang Wu Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Cc: Richard Cochran Cc: Jiri Benc Acked-by: Richard Cochran Signed-off-by: David S. Miller --- drivers/net/ethernet/xscale/Kconfig | 1 + drivers/net/phy/dp83640.c | 1 + drivers/ptp/Kconfig | 1 + include/linux/ptp_classify.h | 95 ++++++------------------ include/linux/skbuff.h | 2 - net/Kconfig | 4 + net/core/Makefile | 1 + net/core/ptp_classifier.c | 141 ++++++++++++++++++++++++++++++++++++ net/core/timestamping.c | 18 ----- net/socket.c | 5 +- 10 files changed, 173 insertions(+), 96 deletions(-) create mode 100644 net/core/ptp_classifier.c (limited to 'include/linux/skbuff.h') diff --git a/drivers/net/ethernet/xscale/Kconfig b/drivers/net/ethernet/xscale/Kconfig index 3f431019e615..b81bc9fca378 100644 --- a/drivers/net/ethernet/xscale/Kconfig +++ b/drivers/net/ethernet/xscale/Kconfig @@ -23,6 +23,7 @@ config IXP4XX_ETH tristate "Intel IXP4xx Ethernet support" depends on ARM && ARCH_IXP4XX && IXP4XX_NPE && IXP4XX_QMGR select PHYLIB + select NET_PTP_CLASSIFY ---help--- Say Y here if you want to use built-in Ethernet ports on IXP4xx processor. diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c index 352c5e45fe9c..6a999e6814a0 100644 --- a/drivers/net/phy/dp83640.c +++ b/drivers/net/phy/dp83640.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/ptp/Kconfig b/drivers/ptp/Kconfig index 5a7910e61e17..6963bdf54175 100644 --- a/drivers/ptp/Kconfig +++ b/drivers/ptp/Kconfig @@ -7,6 +7,7 @@ menu "PTP clock support" config PTP_1588_CLOCK tristate "PTP clock support" select PPS + select NET_PTP_CLASSIFY help The IEEE 1588 standard defines a method to precisely synchronize distributed clocks over Ethernet networks. The diff --git a/include/linux/ptp_classify.h b/include/linux/ptp_classify.h index 6d3b0a2ef9ce..7dfed71d76a6 100644 --- a/include/linux/ptp_classify.h +++ b/include/linux/ptp_classify.h @@ -23,11 +23,8 @@ #ifndef _PTP_CLASSIFY_H_ #define _PTP_CLASSIFY_H_ -#include -#include #include -#include -#include +#include #define PTP_CLASS_NONE 0x00 /* not a PTP event message */ #define PTP_CLASS_V1 0x01 /* protocol version 1 */ @@ -40,7 +37,7 @@ #define PTP_CLASS_PMASK 0xf0 /* mask for the packet type field */ #define PTP_CLASS_V1_IPV4 (PTP_CLASS_V1 | PTP_CLASS_IPV4) -#define PTP_CLASS_V1_IPV6 (PTP_CLASS_V1 | PTP_CLASS_IPV6) /*probably DNE*/ +#define PTP_CLASS_V1_IPV6 (PTP_CLASS_V1 | PTP_CLASS_IPV6) /* probably DNE */ #define PTP_CLASS_V2_IPV4 (PTP_CLASS_V2 | PTP_CLASS_IPV4) #define PTP_CLASS_V2_IPV6 (PTP_CLASS_V2 | PTP_CLASS_IPV6) #define PTP_CLASS_V2_L2 (PTP_CLASS_V2 | PTP_CLASS_L2) @@ -49,82 +46,34 @@ #define PTP_EV_PORT 319 #define PTP_GEN_BIT 0x08 /* indicates general message, if set in message type */ -#define OFF_ETYPE 12 -#define OFF_IHL 14 -#define OFF_FRAG 20 -#define OFF_PROTO4 23 -#define OFF_NEXT 6 -#define OFF_UDP_DST 2 - #define OFF_PTP_SOURCE_UUID 22 /* PTPv1 only */ #define OFF_PTP_SEQUENCE_ID 30 #define OFF_PTP_CONTROL 32 /* PTPv1 only */ -#define IPV4_HLEN(data) (((struct iphdr *)(data + OFF_IHL))->ihl << 2) - +/* Below defines should actually be removed at some point in time. */ #define IP6_HLEN 40 #define UDP_HLEN 8 - -#define RELOFF_DST4 (ETH_HLEN + OFF_UDP_DST) -#define OFF_DST6 (ETH_HLEN + IP6_HLEN + OFF_UDP_DST) +#define OFF_IHL 14 #define OFF_PTP6 (ETH_HLEN + IP6_HLEN + UDP_HLEN) +#define IPV4_HLEN(data) (((struct iphdr *)(data + OFF_IHL))->ihl << 2) -#define OP_AND (BPF_ALU | BPF_AND | BPF_K) -#define OP_JEQ (BPF_JMP | BPF_JEQ | BPF_K) -#define OP_JSET (BPF_JMP | BPF_JSET | BPF_K) -#define OP_LDB (BPF_LD | BPF_B | BPF_ABS) -#define OP_LDH (BPF_LD | BPF_H | BPF_ABS) -#define OP_LDHI (BPF_LD | BPF_H | BPF_IND) -#define OP_LDX (BPF_LDX | BPF_B | BPF_MSH) -#define OP_OR (BPF_ALU | BPF_OR | BPF_K) -#define OP_RETA (BPF_RET | BPF_A) -#define OP_RETK (BPF_RET | BPF_K) - -#define PTP_FILTER \ - {OP_LDH, 0, 0, OFF_ETYPE }, /* */ \ - {OP_JEQ, 0, 12, ETH_P_IP }, /* f goto L20 */ \ - {OP_LDB, 0, 0, OFF_PROTO4 }, /* */ \ - {OP_JEQ, 0, 9, IPPROTO_UDP }, /* f goto L10 */ \ - {OP_LDH, 0, 0, OFF_FRAG }, /* */ \ - {OP_JSET, 7, 0, 0x1fff }, /* t goto L11 */ \ - {OP_LDX, 0, 0, OFF_IHL }, /* */ \ - {OP_LDHI, 0, 0, RELOFF_DST4 }, /* */ \ - {OP_JEQ, 0, 4, PTP_EV_PORT }, /* f goto L12 */ \ - {OP_LDHI, 0, 0, ETH_HLEN + UDP_HLEN }, /* */ \ - {OP_AND, 0, 0, PTP_CLASS_VMASK }, /* */ \ - {OP_OR, 0, 0, PTP_CLASS_IPV4 }, /* */ \ - {OP_RETA, 0, 0, 0 }, /* */ \ -/*L1x*/ {OP_RETK, 0, 0, PTP_CLASS_NONE }, /* */ \ -/*L20*/ {OP_JEQ, 0, 9, ETH_P_IPV6 }, /* f goto L40 */ \ - {OP_LDB, 0, 0, ETH_HLEN + OFF_NEXT }, /* */ \ - {OP_JEQ, 0, 6, IPPROTO_UDP }, /* f goto L30 */ \ - {OP_LDH, 0, 0, OFF_DST6 }, /* */ \ - {OP_JEQ, 0, 4, PTP_EV_PORT }, /* f goto L31 */ \ - {OP_LDH, 0, 0, OFF_PTP6 }, /* */ \ - {OP_AND, 0, 0, PTP_CLASS_VMASK }, /* */ \ - {OP_OR, 0, 0, PTP_CLASS_IPV6 }, /* */ \ - {OP_RETA, 0, 0, 0 }, /* */ \ -/*L3x*/ {OP_RETK, 0, 0, PTP_CLASS_NONE }, /* */ \ -/*L40*/ {OP_JEQ, 0, 9, ETH_P_8021Q }, /* f goto L50 */ \ - {OP_LDH, 0, 0, OFF_ETYPE + 4 }, /* */ \ - {OP_JEQ, 0, 15, ETH_P_1588 }, /* f goto L60 */ \ - {OP_LDB, 0, 0, ETH_HLEN + VLAN_HLEN }, /* */ \ - {OP_AND, 0, 0, PTP_GEN_BIT }, /* */ \ - {OP_JEQ, 0, 12, 0 }, /* f goto L6x */ \ - {OP_LDH, 0, 0, ETH_HLEN + VLAN_HLEN }, /* */ \ - {OP_AND, 0, 0, PTP_CLASS_VMASK }, /* */ \ - {OP_OR, 0, 0, PTP_CLASS_VLAN }, /* */ \ - {OP_RETA, 0, 0, 0 }, /* */ \ -/*L50*/ {OP_JEQ, 0, 7, ETH_P_1588 }, /* f goto L61 */ \ - {OP_LDB, 0, 0, ETH_HLEN }, /* */ \ - {OP_AND, 0, 0, PTP_GEN_BIT }, /* */ \ - {OP_JEQ, 0, 4, 0 }, /* f goto L6x */ \ - {OP_LDH, 0, 0, ETH_HLEN }, /* */ \ - {OP_AND, 0, 0, PTP_CLASS_VMASK }, /* */ \ - {OP_OR, 0, 0, PTP_CLASS_L2 }, /* */ \ - {OP_RETA, 0, 0, 0 }, /* */ \ -/*L6x*/ {OP_RETK, 0, 0, PTP_CLASS_NONE }, - +#if defined(CONFIG_NET_PTP_CLASSIFY) +/** + * ptp_classify_raw - classify a PTP packet + * @skb: buffer + * + * Runs a minimal BPF dissector to classify a network packet to + * determine the PTP class. In case the skb does not contain any + * PTP protocol data, PTP_CLASS_NONE will be returned, otherwise + * PTP_CLASS_V1_IPV{4,6}, PTP_CLASS_V2_IPV{4,6} or + * PTP_CLASS_V2_{L2,VLAN}, depending on the packet content. + */ unsigned int ptp_classify_raw(const struct sk_buff *skb); +void __init ptp_classifier_init(void); +#else +static inline void ptp_classifier_init(void) +{ +} #endif +#endif /* _PTP_CLASSIFY_H_ */ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 18ef0224fb6a..31edf63937a1 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2630,8 +2630,6 @@ static inline ktime_t net_invalid_timestamp(void) return ktime_set(0, 0); } -void skb_timestamping_init(void); - #ifdef CONFIG_NETWORK_PHY_TIMESTAMPING void skb_clone_tx_timestamp(struct sk_buff *skb); diff --git a/net/Kconfig b/net/Kconfig index e411046a62e3..d1f6f968fc09 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -89,8 +89,12 @@ config NETWORK_SECMARK to nfmark, but designated for security purposes. If you are unsure how to answer this question, answer N. +config NET_PTP_CLASSIFY + def_bool n + config NETWORK_PHY_TIMESTAMPING bool "Timestamping in PHY devices" + select NET_PTP_CLASSIFY help This allows timestamping of network packets by PHYs with hardware timestamping capabilities. This option adds some diff --git a/net/core/Makefile b/net/core/Makefile index 9628c20acff6..826b925aa453 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -21,5 +21,6 @@ obj-$(CONFIG_FIB_RULES) += fib_rules.o obj-$(CONFIG_TRACEPOINTS) += net-traces.o obj-$(CONFIG_NET_DROP_MONITOR) += drop_monitor.o obj-$(CONFIG_NETWORK_PHY_TIMESTAMPING) += timestamping.o +obj-$(CONFIG_NET_PTP_CLASSIFY) += ptp_classifier.o obj-$(CONFIG_CGROUP_NET_PRIO) += netprio_cgroup.o obj-$(CONFIG_CGROUP_NET_CLASSID) += netclassid_cgroup.o diff --git a/net/core/ptp_classifier.c b/net/core/ptp_classifier.c new file mode 100644 index 000000000000..eaba0f68f860 --- /dev/null +++ b/net/core/ptp_classifier.c @@ -0,0 +1,141 @@ +/* PTP classifier + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +/* The below program is the bpf_asm (tools/net/) representation of + * the opcode array in the ptp_filter structure. + * + * For convenience, this can easily be altered and reviewed with + * bpf_asm and bpf_dbg, e.g. `./bpf_asm -c prog` where prog is a + * simple file containing the below program: + * + * ldh [12] ; load ethertype + * + * ; PTP over UDP over IPv4 over Ethernet + * test_ipv4: + * jneq #0x800, test_ipv6 ; ETH_P_IP ? + * ldb [23] ; load proto + * jneq #17, drop_ipv4 ; IPPROTO_UDP ? + * ldh [20] ; load frag offset field + * jset #0x1fff, drop_ipv4 ; don't allow fragments + * ldxb 4*([14]&0xf) ; load IP header len + * ldh [x + 16] ; load UDP dst port + * jneq #319, drop_ipv4 ; is port PTP_EV_PORT ? + * ldh [x + 22] ; load payload + * and #0xf ; mask PTP_CLASS_VMASK + * or #0x10 ; PTP_CLASS_IPV4 + * ret a ; return PTP class + * drop_ipv4: ret #0x0 ; PTP_CLASS_NONE + * + * ; PTP over UDP over IPv6 over Ethernet + * test_ipv6: + * jneq #0x86dd, test_8021q ; ETH_P_IPV6 ? + * ldb [20] ; load proto + * jneq #17, drop_ipv6 ; IPPROTO_UDP ? + * ldh [56] ; load UDP dst port + * jneq #319, drop_ipv6 ; is port PTP_EV_PORT ? + * ldh [62] ; load payload + * and #0xf ; mask PTP_CLASS_VMASK + * or #0x20 ; PTP_CLASS_IPV6 + * ret a ; return PTP class + * drop_ipv6: ret #0x0 ; PTP_CLASS_NONE + * + * ; PTP over 802.1Q over Ethernet + * test_8021q: + * jneq #0x8100, test_ieee1588 ; ETH_P_8021Q ? + * ldh [16] ; load inner type + * jneq #0x88f7, drop_ieee1588 ; ETH_P_1588 ? + * ldb [18] ; load payload + * and #0x8 ; as we don't have ports here, test + * jneq #0x0, drop_ieee1588 ; for PTP_GEN_BIT and drop these + * ldh [18] ; reload payload + * and #0xf ; mask PTP_CLASS_VMASK + * or #0x40 ; PTP_CLASS_V2_VLAN + * ret a ; return PTP class + * + * ; PTP over Ethernet + * test_ieee1588: + * jneq #0x88f7, drop_ieee1588 ; ETH_P_1588 ? + * ldb [14] ; load payload + * and #0x8 ; as we don't have ports here, test + * jneq #0x0, drop_ieee1588 ; for PTP_GEN_BIT and drop these + * ldh [14] ; reload payload + * and #0xf ; mask PTP_CLASS_VMASK + * or #0x30 ; PTP_CLASS_L2 + * ret a ; return PTP class + * drop_ieee1588: ret #0x0 ; PTP_CLASS_NONE + */ + +#include +#include +#include + +static struct sk_filter *ptp_insns __read_mostly; + +unsigned int ptp_classify_raw(const struct sk_buff *skb) +{ + return SK_RUN_FILTER(ptp_insns, skb); +} +EXPORT_SYMBOL_GPL(ptp_classify_raw); + +void __init ptp_classifier_init(void) +{ + static struct sock_filter ptp_filter[] = { + { 0x28, 0, 0, 0x0000000c }, + { 0x15, 0, 12, 0x00000800 }, + { 0x30, 0, 0, 0x00000017 }, + { 0x15, 0, 9, 0x00000011 }, + { 0x28, 0, 0, 0x00000014 }, + { 0x45, 7, 0, 0x00001fff }, + { 0xb1, 0, 0, 0x0000000e }, + { 0x48, 0, 0, 0x00000010 }, + { 0x15, 0, 4, 0x0000013f }, + { 0x48, 0, 0, 0x00000016 }, + { 0x54, 0, 0, 0x0000000f }, + { 0x44, 0, 0, 0x00000010 }, + { 0x16, 0, 0, 0x00000000 }, + { 0x06, 0, 0, 0x00000000 }, + { 0x15, 0, 9, 0x000086dd }, + { 0x30, 0, 0, 0x00000014 }, + { 0x15, 0, 6, 0x00000011 }, + { 0x28, 0, 0, 0x00000038 }, + { 0x15, 0, 4, 0x0000013f }, + { 0x28, 0, 0, 0x0000003e }, + { 0x54, 0, 0, 0x0000000f }, + { 0x44, 0, 0, 0x00000020 }, + { 0x16, 0, 0, 0x00000000 }, + { 0x06, 0, 0, 0x00000000 }, + { 0x15, 0, 9, 0x00008100 }, + { 0x28, 0, 0, 0x00000010 }, + { 0x15, 0, 15, 0x000088f7 }, + { 0x30, 0, 0, 0x00000012 }, + { 0x54, 0, 0, 0x00000008 }, + { 0x15, 0, 12, 0x00000000 }, + { 0x28, 0, 0, 0x00000012 }, + { 0x54, 0, 0, 0x0000000f }, + { 0x44, 0, 0, 0x00000040 }, + { 0x16, 0, 0, 0x00000000 }, + { 0x15, 0, 7, 0x000088f7 }, + { 0x30, 0, 0, 0x0000000e }, + { 0x54, 0, 0, 0x00000008 }, + { 0x15, 0, 4, 0x00000000 }, + { 0x28, 0, 0, 0x0000000e }, + { 0x54, 0, 0, 0x0000000f }, + { 0x44, 0, 0, 0x00000030 }, + { 0x16, 0, 0, 0x00000000 }, + { 0x06, 0, 0, 0x00000000 }, + }; + struct sock_fprog ptp_prog = { + .len = ARRAY_SIZE(ptp_filter), .filter = ptp_filter, + }; + + BUG_ON(sk_unattached_filter_create(&ptp_insns, &ptp_prog)); +} diff --git a/net/core/timestamping.c b/net/core/timestamping.c index 9ff26b3cc021..6521dfd8b7c8 100644 --- a/net/core/timestamping.c +++ b/net/core/timestamping.c @@ -23,14 +23,6 @@ #include #include -static struct sk_filter *ptp_insns __read_mostly; - -unsigned int ptp_classify_raw(const struct sk_buff *skb) -{ - return SK_RUN_FILTER(ptp_insns, skb); -} -EXPORT_SYMBOL_GPL(ptp_classify_raw); - static unsigned int classify(const struct sk_buff *skb) { if (likely(skb->dev && skb->dev->phydev && @@ -140,13 +132,3 @@ bool skb_defer_rx_timestamp(struct sk_buff *skb) return false; } EXPORT_SYMBOL_GPL(skb_defer_rx_timestamp); - -void __init skb_timestamping_init(void) -{ - static struct sock_filter ptp_filter[] = { PTP_FILTER }; - struct sock_fprog ptp_prog = { - .len = ARRAY_SIZE(ptp_filter), .filter = ptp_filter, - }; - - BUG_ON(sk_unattached_filter_create(&ptp_insns, &ptp_prog)); -} diff --git a/net/socket.c b/net/socket.c index f25eaa30b690..1b1e7e6a960f 100644 --- a/net/socket.c +++ b/net/socket.c @@ -72,6 +72,7 @@ #include #include #include +#include #include #include #include @@ -2685,9 +2686,7 @@ static int __init sock_init(void) goto out; #endif -#ifdef CONFIG_NETWORK_PHY_TIMESTAMPING - skb_timestamping_init(); -#endif + ptp_classifier_init(); out: return err; -- cgit v1.2.3 From 574f7194f693cd80de96a39f0c43dbb346c38a15 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 1 Apr 2014 12:20:24 -0700 Subject: net: Add a test to see if a skb is freeable in irq context Currently netpoll and skb_release_head_state assume that a skb is freeable in hard irq context except when skb->destructor is set. The reality is far from this. So add a function skb_irq_freeable to compute the full test and in the process be the living documentation of what the requirements are of actually freeing a skb in hard irq context. Signed-off-by: "Eric W. Biederman" Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux/skbuff.h') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 31edf63937a1..350eebe770d9 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2831,6 +2831,19 @@ static inline void skb_init_secmark(struct sk_buff *skb) { } #endif +static inline bool skb_irq_freeable(const struct sk_buff *skb) +{ + return !skb->destructor && +#if IS_ENABLED(CONFIG_XFRM) + !skb->sp && +#endif +#if IS_ENABLED(CONFIG_NF_CONNTRACK) + !skb->nfct && +#endif + !skb->_skb_refdst && + !skb_has_frag_list(skb); +} + static inline void skb_set_queue_mapping(struct sk_buff *skb, u16 queue_mapping) { skb->queue_mapping = queue_mapping; -- cgit v1.2.3