diff options
Diffstat (limited to 'net/core/skbuff.c')
| -rw-r--r-- | net/core/skbuff.c | 740 |
1 files changed, 717 insertions, 23 deletions
diff --git a/net/core/skbuff.c b/net/core/skbuff.c index c9b760caea30..e776e520b6b9 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4,7 +4,7 @@ * Authors: Alan Cox <iiitac@pyr.swan.ac.uk> * Florian La Roche <rzsfl@rz.uni-sb.de> * - * Version: $Id: skbuff.c,v 1.75 2000/12/08 17:15:53 davem Exp $ + * Version: $Id: skbuff.c,v 1.87 2001/03/06 22:09:50 davem Exp $ * * Fixes: * Alan Cox : Fixed the worst of the load balancer bugs. @@ -50,6 +50,7 @@ #include <linux/skbuff.h> #include <linux/cache.h> #include <linux/init.h> +#include <linux/highmem.h> #include <net/ip.h> #include <net/protocol.h> @@ -185,8 +186,8 @@ struct sk_buff *alloc_skb(unsigned int size,int gfp_mask) } /* Get the DATA. Size must match skb_add_mtu(). */ - size = ((size + 15) & ~15); - data = kmalloc(size + sizeof(atomic_t), gfp_mask); + size = SKB_DATA_ALIGN(size); + data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); if (data == NULL) goto nodata; @@ -202,9 +203,12 @@ struct sk_buff *alloc_skb(unsigned int size,int gfp_mask) /* Set up other state */ skb->len = 0; skb->cloned = 0; + skb->data_len = 0; atomic_set(&skb->users, 1); - atomic_set(skb_datarefp(skb), 1); + atomic_set(&(skb_shinfo(skb)->dataref), 1); + skb_shinfo(skb)->nr_frags = 0; + skb_shinfo(skb)->frag_list = NULL; return skb; nodata: @@ -248,14 +252,50 @@ static inline void skb_headerinit(void *p, kmem_cache_t *cache, #endif } +static void skb_drop_fraglist(struct sk_buff *skb) +{ + struct sk_buff *list = skb_shinfo(skb)->frag_list; + + skb_shinfo(skb)->frag_list = NULL; + + do { + struct sk_buff *this = list; + list = list->next; + kfree_skb(this); + } while (list); +} + +static void skb_clone_fraglist(struct sk_buff *skb) +{ + struct sk_buff *list; + + for (list = skb_shinfo(skb)->frag_list; list; list=list->next) + skb_get(list); +} + +static void skb_release_data(struct sk_buff *skb) +{ + if (!skb->cloned || + atomic_dec_and_test(&(skb_shinfo(skb)->dataref))) { + if (skb_shinfo(skb)->nr_frags) { + int i; + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) + put_page(skb_shinfo(skb)->frags[i].page); + } + + if (skb_shinfo(skb)->frag_list) + skb_drop_fraglist(skb); + + kfree(skb->head); + } +} + /* * Free an skbuff by memory without cleaning the state. */ void kfree_skbmem(struct sk_buff *skb) { - if (!skb->cloned || atomic_dec_and_test(skb_datarefp(skb))) - kfree(skb->head); - + skb_release_data(skb); skb_head_to_pool(skb); } @@ -316,18 +356,53 @@ struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask) return NULL; } - memcpy(n, skb, sizeof(*n)); - atomic_inc(skb_datarefp(skb)); - skb->cloned = 1; - - dst_clone(n->dst); - n->cloned = 1; +#define C(x) n->x = skb->x + n->next = n->prev = NULL; n->list = NULL; n->sk = NULL; + C(stamp); + C(dev); + C(h); + C(nh); + C(mac); + C(dst); + dst_clone(n->dst); + memcpy(n->cb, skb->cb, sizeof(skb->cb)); + C(len); + C(data_len); + C(csum); + n->cloned = 1; + C(pkt_type); + C(ip_summed); + C(priority); atomic_set(&n->users, 1); + C(protocol); + C(security); + C(truesize); + C(head); + C(data); + C(tail); + C(end); n->destructor = NULL; #ifdef CONFIG_NETFILTER + C(nfmark); + C(nfcache); + C(nfct); +#ifdef CONFIG_NETFILTER_DEBUG + C(nf_debug); +#endif +#endif /*CONFIG_NETFILTER*/ +#if defined(CONFIG_HIPPI) + C(private); +#endif +#ifdef CONFIG_NET_SCHED + C(tc_index); +#endif + + atomic_inc(&(skb_shinfo(skb)->dataref)); + skb->cloned = 1; +#ifdef CONFIG_NETFILTER nf_conntrack_get(skb->nfct); #endif return n; @@ -350,7 +425,6 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->nh.raw=old->nh.raw+offset; new->mac.raw=old->mac.raw+offset; memcpy(new->cb, old->cb, sizeof(old->cb)); - new->used=old->used; atomic_set(&new->users, 1); new->pkt_type=old->pkt_type; new->stamp=old->stamp; @@ -371,7 +445,7 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) } /** - * skb_copy - copy an sk_buff + * skb_copy - create private copy of an sk_buff * @skb: buffer to copy * @gfp_mask: allocation priority * @@ -380,18 +454,115 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) * data to alter. Returns %NULL on failure or the pointer to the buffer * on success. The returned buffer has a reference count of 1. * - * You must pass %GFP_ATOMIC as the allocation priority if this function - * is called from an interrupt. + * As by-product this function converts non-linear &sk_buff to linear + * one, so that &sk_buff becomes completely private and caller is allowed + * to modify all the data of returned buffer. This means that this + * function is not recommended for use in circumstances when only + * header is going to be modified. Use pskb_copy() instead. */ struct sk_buff *skb_copy(const struct sk_buff *skb, int gfp_mask) { struct sk_buff *n; + int headerlen = skb->data-skb->head; + + /* + * Allocate the copy buffer + */ + n=alloc_skb(skb->end - skb->head + skb->data_len, gfp_mask); + if(n==NULL) + return NULL; + + /* Set the data pointer */ + skb_reserve(n,headerlen); + /* Set the tail pointer and length */ + skb_put(n,skb->len); + n->csum = skb->csum; + n->ip_summed = skb->ip_summed; + + if (skb_copy_bits(skb, -headerlen, n->head, headerlen+skb->len)) + BUG(); + + copy_skb_header(n, skb); + + return n; +} + +/* Keep head the same: replace data */ +int skb_linearize(struct sk_buff *skb, int gfp_mask) +{ + unsigned int size; + u8 *data; + long offset; + int headerlen = skb->data - skb->head; + int expand = (skb->tail+skb->data_len) - skb->end; + + if (skb_shared(skb)) + BUG(); + + if (expand <= 0) + expand = 0; + + size = (skb->end - skb->head + expand); + size = SKB_DATA_ALIGN(size); + data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); + if (data == NULL) + return -ENOMEM; + + /* Copy entire thing */ + if (skb_copy_bits(skb, -headerlen, data, headerlen+skb->len)) + BUG(); + + /* Offset between the two in bytes */ + offset = data - skb->head; + + /* Free old data. */ + skb_release_data(skb); + + skb->head = data; + skb->end = data + size; + + /* Set up new pointers */ + skb->h.raw += offset; + skb->nh.raw += offset; + skb->mac.raw += offset; + skb->tail += offset; + skb->data += offset; + + /* Set up shinfo */ + atomic_set(&(skb_shinfo(skb)->dataref), 1); + skb_shinfo(skb)->nr_frags = 0; + skb_shinfo(skb)->frag_list = NULL; + + /* We are no longer a clone, even if we were. */ + skb->cloned = 0; + + skb->tail += skb->data_len; + skb->data_len = 0; + return 0; +} + + +/** + * pskb_copy - create copy of an sk_buff with private head. + * @skb: buffer to copy + * @gfp_mask: allocation priority + * + * Make a copy of both an &sk_buff and part of its data, located + * in header. Fragmented data remain shared. This is used when + * the caller wishes to modify only header of &sk_buff and needs + * private copy of the header to alter. Returns %NULL on failure + * or the pointer to the buffer on success. + * The returned buffer has a reference count of 1. + */ + +struct sk_buff *pskb_copy(struct sk_buff *skb, int gfp_mask) +{ + struct sk_buff *n; /* * Allocate the copy buffer */ - n=alloc_skb(skb->end - skb->head, gfp_mask); if(n==NULL) return NULL; @@ -399,16 +570,120 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, int gfp_mask) /* Set the data pointer */ skb_reserve(n,skb->data-skb->head); /* Set the tail pointer and length */ - skb_put(n,skb->len); + skb_put(n,skb_headlen(skb)); /* Copy the bytes */ - memcpy(n->head,skb->head,skb->end-skb->head); + memcpy(n->data, skb->data, n->len); n->csum = skb->csum; + n->ip_summed = skb->ip_summed; + + n->data_len = skb->data_len; + n->len = skb->len; + + if (skb_shinfo(skb)->nr_frags) { + int i; + + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i]; + get_page(skb_shinfo(n)->frags[i].page); + } + skb_shinfo(n)->nr_frags = i; + } + + if (skb_shinfo(skb)->frag_list) { + skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list; + skb_clone_fraglist(n); + } + copy_skb_header(n, skb); return n; } /** + * pskb_expand_head - reallocate header of &sk_buff + * @skb: buffer to reallocate + * @nhead: room to add at head + * @ntail: room to add at tail + * @gfp_mask: allocation priority + * + * Expands (or creates identical copy, if &nhead and &ntail are zero) + * header of skb. &sk_buff itself is not changed. &sk_buff MUST have + * reference count of 1. Returns zero in the case of success or error, + * if expansion failed. In the last case, &sk_buff is not changed. + * + * All the pointers pointing into skb header may change and must be + * reloaded after call to this function. + */ + +int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask) +{ + int i; + u8 *data; + int size = nhead + (skb->end - skb->head) + ntail; + long off; + + if (skb_shared(skb)) + BUG(); + + size = SKB_DATA_ALIGN(size); + + data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); + if (data == NULL) + goto nodata; + + /* Copy only real data... and, alas, header. This should be + * optimized for the cases when header is void. */ + memcpy(data+nhead, skb->head, skb->tail-skb->head); + memcpy(data+size, skb->end, sizeof(struct skb_shared_info)); + + for (i=0; i<skb_shinfo(skb)->nr_frags; i++) + get_page(skb_shinfo(skb)->frags[i].page); + + if (skb_shinfo(skb)->frag_list) + skb_clone_fraglist(skb); + + skb_release_data(skb); + + off = (data+nhead) - skb->head; + + skb->head = data; + skb->end = data+size; + + skb->data += off; + skb->tail += off; + skb->mac.raw += off; + skb->h.raw += off; + skb->nh.raw += off; + skb->cloned = 0; + atomic_set(&skb_shinfo(skb)->dataref, 1); + return 0; + +nodata: + return -ENOMEM; +} + +/* Make private copy of skb with writable head and some headroom */ + +struct sk_buff * +skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom) +{ + struct sk_buff *skb2; + int delta = headroom - skb_headroom(skb); + + if (delta <= 0) + return pskb_copy(skb, GFP_ATOMIC); + + skb2 = skb_clone(skb, GFP_ATOMIC); + if (skb2 == NULL || + !pskb_expand_head(skb2, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) + return skb2; + + kfree_skb(skb2); + return NULL; +} + + +/** * skb_copy_expand - copy and expand sk_buff * @skb: buffer to copy * @newheadroom: new free bytes at head @@ -439,7 +714,7 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, * Allocate the copy buffer */ - n=alloc_skb(newheadroom + (skb->tail - skb->data) + newtailroom, + n=alloc_skb(newheadroom + skb->len + newtailroom, gfp_mask); if(n==NULL) return NULL; @@ -450,12 +725,431 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, skb_put(n,skb->len); /* Copy the data only. */ - memcpy(n->data, skb->data, skb->len); + if (skb_copy_bits(skb, 0, n->data, skb->len)) + BUG(); copy_skb_header(n, skb); return n; } +/* Trims skb to length len. It can change skb pointers, if "realloc" is 1. + * If realloc==0 and trimming is impossible without change of data, + * it is BUG(). + */ + +int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc) +{ + int offset = skb_headlen(skb); + int nfrags = skb_shinfo(skb)->nr_frags; + int i; + + for (i=0; i<nfrags; i++) { + int end = offset + skb_shinfo(skb)->frags[i].size; + if (end > len) { + if (skb_cloned(skb)) { + if (!realloc) + BUG(); + if (!pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + return -ENOMEM; + } + if (len <= offset) { + put_page(skb_shinfo(skb)->frags[i].page); + skb_shinfo(skb)->nr_frags--; + } else { + skb_shinfo(skb)->frags[i].size = len-offset; + } + } + offset = end; + } + + if (offset < len) { + skb->data_len -= skb->len - len; + skb->len = len; + } else { + if (len <= skb_headlen(skb)) { + skb->len = len; + skb->data_len = 0; + skb->tail = skb->data + len; + if (skb_shinfo(skb)->frag_list && !skb_cloned(skb)) + skb_drop_fraglist(skb); + } else { + skb->data_len -= skb->len - len; + skb->len = len; + } + } + + return 0; +} + +/** + * __pskb_pull_tail - advance tail of skb header + * @skb: buffer to reallocate + * @delta: number of bytes to advance tail + * + * The function makes a sense only on a fragmented &sk_buff, + * it expands header moving its tail forward and copying necessary + * data from fragmented part. + * + * &sk_buff MUST have reference count of 1. + * + * Returns %NULL (and &sk_buff does not change) if pull failed + * or value of new tail of skb in the case of success. + * + * All the pointers pointing into skb header may change and must be + * reloaded after call to this function. + */ + +/* Moves tail of skb head forward, copying data from fragmented part, + * when it is necessary. + * 1. It may fail due to malloc failure. + * 2. It may change skb pointers. + * + * It is pretty complicated. Luckily, it is called only in exceptional cases. + */ +unsigned char * __pskb_pull_tail(struct sk_buff *skb, int delta) +{ + int i, k, eat; + + /* If skb has not enough free space at tail, get new one + * plus 128 bytes for future expansions. If we have enough + * room at tail, reallocate without expansion only if skb is cloned. + */ + eat = (skb->tail+delta) - skb->end; + + if (eat > 0 || skb_cloned(skb)) { + if (pskb_expand_head(skb, 0, eat>0 ? eat+128 : 0, GFP_ATOMIC)) + return NULL; + } + + if (skb_copy_bits(skb, skb_headlen(skb), skb->tail, delta)) + BUG(); + + /* Optimization: no fragments, no reasons to preestimate + * size of pulled pages. Superb. + */ + if (skb_shinfo(skb)->frag_list == NULL) + goto pull_pages; + + /* Estimate size of pulled pages. */ + eat = delta; + for (i=0; i<skb_shinfo(skb)->nr_frags; i++) { + if (skb_shinfo(skb)->frags[i].size >= eat) + goto pull_pages; + eat -= skb_shinfo(skb)->frags[i].size; + } + + /* If we need update frag list, we are in troubles. + * Certainly, it possible to add an offset to skb data, + * but taking into account that pulling is expected to + * be very rare operation, it is worth to fight against + * further bloating skb head and crucify ourselves here instead. + * Pure masohism, indeed. 8)8) + */ + if (eat) { + struct sk_buff *list = skb_shinfo(skb)->frag_list; + struct sk_buff *clone = NULL; + struct sk_buff *insp = NULL; + + do { + if (list == NULL) + BUG(); + + if (list->len <= eat) { + /* Eaten as whole. */ + eat -= list->len; + list = list->next; + insp = list; + } else { + /* Eaten partially. */ + + if (skb_shared(list)) { + /* Sucks! We need to fork list. :-( */ + clone = skb_clone(list, GFP_ATOMIC); + if (clone == NULL) + return NULL; + insp = list->next; + list = clone; + } else { + /* This may be pulled without + * problems. */ + insp = list; + } + if (pskb_pull(list, eat) == NULL) { + if (clone) + kfree_skb(clone); + return NULL; + } + break; + } + } while (eat); + + /* Free pulled out fragments. */ + while ((list = skb_shinfo(skb)->frag_list) != insp) { + skb_shinfo(skb)->frag_list = list->next; + kfree_skb(list); + } + /* And insert new clone at head. */ + if (clone) { + clone->next = list; + skb_shinfo(skb)->frag_list = clone; + } + } + /* Success! Now we may commit changes to skb data. */ + +pull_pages: + eat = delta; + k = 0; + for (i=0; i<skb_shinfo(skb)->nr_frags; i++) { + if (skb_shinfo(skb)->frags[i].size <= eat) { + put_page(skb_shinfo(skb)->frags[i].page); + eat -= skb_shinfo(skb)->frags[i].size; + } else { + skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i]; + if (eat) { + skb_shinfo(skb)->frags[k].page_offset += eat; + skb_shinfo(skb)->frags[k].size -= eat; + eat = 0; + } + k++; + } + } + skb_shinfo(skb)->nr_frags = k; + + skb->tail += delta; + skb->data_len -= delta; + + return skb->tail; +} + +/* Copy some data bits from skb to kernel buffer. */ + +int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len) +{ + int i, copy; + int start = skb->len - skb->data_len; + + if (offset > (int)skb->len-len) + goto fault; + + /* Copy header. */ + if ((copy = start-offset) > 0) { + if (copy > len) + copy = len; + memcpy(to, skb->data + offset, copy); + if ((len -= copy) == 0) + return 0; + offset += copy; + to += copy; + } + + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + int end; + + BUG_TRAP(start <= offset+len); + + end = start + skb_shinfo(skb)->frags[i].size; + if ((copy = end-offset) > 0) { + u8 *vaddr; + + if (copy > len) + copy = len; + + vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]); + memcpy(to, vaddr+skb_shinfo(skb)->frags[i].page_offset+ + offset-start, copy); + kunmap_skb_frag(vaddr); + + if ((len -= copy) == 0) + return 0; + offset += copy; + to += copy; + } + start = end; + } + + if (skb_shinfo(skb)->frag_list) { + struct sk_buff *list; + + for (list = skb_shinfo(skb)->frag_list; list; list=list->next) { + int end; + + BUG_TRAP(start <= offset+len); + + end = start + list->len; + if ((copy = end-offset) > 0) { + if (copy > len) + copy = len; + if (skb_copy_bits(list, offset-start, to, copy)) + goto fault; + if ((len -= copy) == 0) + return 0; + offset += copy; + to += copy; + } + start = end; + } + } + if (len == 0) + return 0; + +fault: + return -EFAULT; +} + +/* Checksum skb data. */ + +unsigned int skb_checksum(const struct sk_buff *skb, int offset, int len, unsigned int csum) +{ + int i, copy; + int start = skb->len - skb->data_len; + int pos = 0; + + /* Checksum header. */ + if ((copy = start-offset) > 0) { + if (copy > len) + copy = len; + csum = csum_partial(skb->data+offset, copy, csum); + if ((len -= copy) == 0) + return csum; + offset += copy; + pos = copy; + } + + for (i=0; i<skb_shinfo(skb)->nr_frags; i++) { + int end; + + BUG_TRAP(start <= offset+len); + + end = start + skb_shinfo(skb)->frags[i].size; + if ((copy = end-offset) > 0) { + unsigned int csum2; + u8 *vaddr; + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + + if (copy > len) + copy = len; + vaddr = kmap_skb_frag(frag); + csum2 = csum_partial(vaddr + frag->page_offset + + offset-start, copy, 0); + kunmap_skb_frag(vaddr); + csum = csum_block_add(csum, csum2, pos); + if (!(len -= copy)) + return csum; + offset += copy; + pos += copy; + } + start = end; + } + + if (skb_shinfo(skb)->frag_list) { + struct sk_buff *list; + + for (list = skb_shinfo(skb)->frag_list; list; list=list->next) { + int end; + + BUG_TRAP(start <= offset+len); + + end = start + list->len; + if ((copy = end-offset) > 0) { + unsigned int csum2; + if (copy > len) + copy = len; + csum2 = skb_checksum(list, offset-start, copy, 0); + csum = csum_block_add(csum, csum2, pos); + if ((len -= copy) == 0) + return csum; + offset += copy; + pos += copy; + } + start = end; + } + } + if (len == 0) + return csum; + + BUG(); + return csum; +} + +/* Both of above in one bottle. */ + +unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int csum) +{ + int i, copy; + int start = skb->len - skb->data_len; + int pos = 0; + + /* Copy header. */ + if ((copy = start-offset) > 0) { + if (copy > len) + copy = len; + csum = csum_partial_copy_nocheck(skb->data+offset, to, copy, csum); + if ((len -= copy) == 0) + return csum; + offset += copy; + to += copy; + pos = copy; + } + + for (i=0; i<skb_shinfo(skb)->nr_frags; i++) { + int end; + + BUG_TRAP(start <= offset+len); + + end = start + skb_shinfo(skb)->frags[i].size; + if ((copy = end-offset) > 0) { + unsigned int csum2; + u8 *vaddr; + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + + if (copy > len) + copy = len; + vaddr = kmap_skb_frag(frag); + csum2 = csum_partial_copy_nocheck(vaddr + frag->page_offset + + offset-start, to, copy, 0); + kunmap_skb_frag(vaddr); + csum = csum_block_add(csum, csum2, pos); + if (!(len -= copy)) + return csum; + offset += copy; + to += copy; + pos += copy; + } + start = end; + } + + if (skb_shinfo(skb)->frag_list) { + struct sk_buff *list; + + for (list = skb_shinfo(skb)->frag_list; list; list=list->next) { + unsigned int csum2; + int end; + + BUG_TRAP(start <= offset+len); + + end = start + list->len; + if ((copy = end-offset) > 0) { + if (copy > len) + copy = len; + csum2 = skb_copy_and_csum_bits(list, offset-start, to, copy, 0); + csum = csum_block_add(csum, csum2, pos); + if ((len -= copy) == 0) + return csum; + offset += copy; + to += copy; + pos += copy; + } + start = end; + } + } + if (len == 0) + return csum; + + BUG(); + return csum; +} + + #if 0 /* * Tune the memory allocator for a new MTU size. @@ -463,7 +1157,7 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, void skb_add_mtu(int mtu) { /* Must match allocation in alloc_skb */ - mtu = ((mtu + 15) & ~15) + sizeof(atomic_t); + mtu = SKB_DATA_ALIGN(mtu) + sizeof(struct skb_shared_info); kmem_add_cache_size(mtu); } |
