From 6ac4b57fa1525c9e923a96f3b69fa8f2248b7ec2 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Mon, 4 Oct 2004 22:29:45 -0700 Subject: [NET]: Fix fddi_statistics for 64-bit There is a problem with "struct fddi_statistics" for 64-bit systems. The starting members of the struct are expected to correspond to the respective members of "struct net_device_stats" (drivers for FDDI devices return "struct fddi_statistics" in the response to the get_stats() call of "struct net_device"). Unfortunately, due to using different types (u32 vs ulong) they do not. "struct net_device_stats" is a public interface and as a result, bogus results are retrieved, e.g. for /proc/net/dev. Here is my proposal to address the problem. I think there is no point in duplicating the layout of "struct net_device_stats" in "struct fddi_statistics" as the former can simply be included as a member avoiding this problem and actually any possible discrepancy in the future. This also preserves the layout of the structure for 32-bit systems. Signed-off-by: Maciej W. Rozycki Signed-off-by: David S. Miller --- include/linux/if_fddi.h | 36 +++++------------------------------- 1 file changed, 5 insertions(+), 31 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_fddi.h b/include/linux/if_fddi.h index 221796362253..a912818e6361 100644 --- a/include/linux/if_fddi.h +++ b/include/linux/if_fddi.h @@ -5,7 +5,7 @@ * * Global definitions for the ANSI FDDI interface. * - * Version: @(#)if_fddi.h 1.0.1 09/16/96 + * Version: @(#)if_fddi.h 1.0.2 Sep 29 2004 * * Author: Lawrence V. Stefani, * @@ -103,38 +103,12 @@ struct fddihdr } __attribute__ ((packed)); /* Define FDDI statistics structure */ -struct fddi_statistics - { - __u32 rx_packets; /* total packets received */ - __u32 tx_packets; /* total packets transmitted */ - __u32 rx_bytes; /* total bytes received */ - __u32 tx_bytes; /* total bytes transmitted */ - __u32 rx_errors; /* bad packets received */ - __u32 tx_errors; /* packet transmit problems */ - __u32 rx_dropped; /* no space in linux buffers */ - __u32 tx_dropped; /* no space available in linux */ - __u32 multicast; /* multicast packets received */ - __u32 transmit_collision; /* always 0 for FDDI */ +struct fddi_statistics { + + /* Generic statistics. */ - /* detailed rx_errors */ - __u32 rx_length_errors; - __u32 rx_over_errors; /* receiver ring buff overflow */ - __u32 rx_crc_errors; /* recved pkt with crc error */ - __u32 rx_frame_errors; /* recv'd frame alignment error */ - __u32 rx_fifo_errors; /* recv'r fifo overrun */ - __u32 rx_missed_errors; /* receiver missed packet */ + struct net_device_stats gen; - /* detailed tx_errors */ - __u32 tx_aborted_errors; - __u32 tx_carrier_errors; - __u32 tx_fifo_errors; - __u32 tx_heartbeat_errors; - __u32 tx_window_errors; - - /* for cslip etc */ - __u32 rx_compressed; - __u32 tx_compressed; - /* Detailed FDDI statistics. Adopted from RFC 1512 */ __u8 smt_station_id[8]; -- cgit v1.2.3 From 05529e0e46056e6bc4c43324f8d4e357334956b1 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 4 Oct 2004 23:38:47 -0700 Subject: [NET]: Generic network statistics/estimator Work done by Thomas Graf and Jamal Hadi Salim The following patchset introduces generic network statistics for netlink users. It uses nested TLV which prevents further compatibility problems when introducing new statistics. Backward compatibility to existing TLV types TCA_STATS and TCA_XSTATS is ensured but can be easly removed once it is no longer needed. Therefore prior users of struct tc_stats can be converted to this API and existing userspace applications will not notice a difference while converted applications can use the new extendable statistic interface. Changes: - Add generic network statistics API for netlink users. - Introduces a generic rate estimator based on timers. Patch is based on Jamals patch and adapted to the new generic network statistics API. - Add documentation of generic network statistics and estimator API. Signed-off-by: Thomas Graf Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- Documentation/networking/gen_stats.txt | 117 +++++++++++++++++++ include/linux/gen_stats.h | 62 ++++++++++ include/net/gen_stats.h | 45 ++++++++ net/core/Makefile | 2 +- net/core/gen_estimator.c | 204 +++++++++++++++++++++++++++++++++ net/core/gen_stats.c | 132 +++++++++++++++++++++ 6 files changed, 561 insertions(+), 1 deletion(-) create mode 100644 Documentation/networking/gen_stats.txt create mode 100644 include/linux/gen_stats.h create mode 100644 include/net/gen_stats.h create mode 100644 net/core/gen_estimator.c create mode 100644 net/core/gen_stats.c (limited to 'include/linux') diff --git a/Documentation/networking/gen_stats.txt b/Documentation/networking/gen_stats.txt new file mode 100644 index 000000000000..c3297f79c137 --- /dev/null +++ b/Documentation/networking/gen_stats.txt @@ -0,0 +1,117 @@ +Generic networking statistics for netlink users +====================================================================== + +Statistic counters are grouped into structs: + +Struct TLV type Description +---------------------------------------------------------------------- +gnet_stats_basic TCA_STATS_BASIC Basic statistics +gnet_stats_rate_est TCA_STATS_RATE_EST Rate estimator +gnet_stats_queue TCA_STATS_QUEUE Queue statistics +none TCA_STATS_APP Application specific + + +Collecting: +----------- + +Declare the statistic structs you need: +struct mystruct { + struct gnet_stats_basic bstats; + struct gnet_stats_queue qstats; + ... +}; + +Update statistics: +mystruct->tstats.packet++; +mystruct->qstats.backlog += skb->pkt_len; + + +Export to userspace (Dump): +--------------------------- + +my_dumping_routine(struct sk_buff *skb, ...) +{ + struct gnet_dump dump; + + if (gnet_stats_start_copy(skb, TCA_STATS2, &mystruct->lock, &dump) < 0) + goto rtattr_failure; + + if (gnet_stats_copy_basic(&dump, &mystruct->bstats) < 0 || + gnet_stats_copy_queue(&dump, &mystruct->qstats) < 0 || + gnet_stats_copy_app(&dump, &xstats, sizeof(xstats)) < 0) + goto rtattr_failure; + + if (gnet_stats_finish_copy(&dump) < 0) + goto rtattr_failure; + ... +} + +TCA_STATS/TCA_XSTATS backward compatibility: +-------------------------------------------- + +Prior users of struct tc_stats and xstats can maintain backward +compatibility by calling the compat wrappers to keep providing the +existing TLV types. + +my_dumping_routine(struct sk_buff *skb, ...) +{ + if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, + TCA_XSTATS, &mystruct->lock, &dump) < 0) + goto rtattr_failure; + ... +} + +A struct tc_stats will be filled out during gnet_stats_copy_* calls +and appended to the skb. TCA_XSTATS is provided if gnet_stats_copy_app +was called. + + +Locking: +-------- + +Locks are taken before writing and released once all statistics have +been written. Locks are always released in case of an error. You +are responsible for making sure that the lock is initialized. + + +Rate Estimator: +-------------- + +0) Prepare an estimator attribute. Most likely this would be in user + space. The value of this TLV should contain a tc_estimator structure. + As usual, such a TLV nees to be 32 bit aligned and therefore the + length needs to be appropriately set etc. The estimator interval + and ewma log need to be converted to the appropriate values. + tc_estimator.c::tc_setup_estimator() is advisable to be used as the + conversion routine. It does a few clever things. It takes a time + interval in microsecs, a time constant also in microsecs and a struct + tc_estimator to be populated. The returned tc_estimator can be + transported to the kernel. Transfer such a structure in a TLV of type + TCA_RATE to your code in the kernel. + +In the kernel when setting up: +1) make sure you have basic stats and rate stats setup first. +2) make sure you have initialized stats lock that is used to setup such + stats. +3) Now initialize a new estimator: + + int ret = gen_new_estimator(my_basicstats,my_rate_est_stats, + mystats_lock, attr_with_tcestimator_struct); + + if ret == 0 + success + else + failed + +From now on, everytime you dump my_rate_est_stats it will contain +uptodate info. + +Once you are done, call gen_kill_estimator(my_basicstats, +my_rate_est_stats) Make sure that my_basicstats and my_rate_est_stats +are still valid (i.e still exist) at the time of making this call. + + +Authors: +-------- +Thomas Graf +Jamal Hadi Salim diff --git a/include/linux/gen_stats.h b/include/linux/gen_stats.h new file mode 100644 index 000000000000..ab631c3571ce --- /dev/null +++ b/include/linux/gen_stats.h @@ -0,0 +1,62 @@ +#ifndef __LINUX_GEN_STATS_H +#define __LINUX_GEN_STATS_H + +#include + +enum { + TCA_STATS_UNSPEC, + TCA_STATS_BASIC, + TCA_STATS_RATE_EST, + TCA_STATS_QUEUE, + TCA_STATS_APP, + __TCA_STATS_MAX, +}; +#define TCA_STATS_MAX (__TCA_STATS_MAX - 1) + +/** + * @bytes: number of seen bytes + * @packets: number of seen packets + */ +struct gnet_stats_basic +{ + __u64 bytes; + __u32 packets; +}; + +/** + * @bps: current byte rate + * @pps: current packet rate + */ +struct gnet_stats_rate_est +{ + __u32 bps; + __u32 pps; +}; + +/** + * @qlen: queue length + * @backlog: backlog size of queue + * @drops: number of dropped packets + * @requeues: number of requeues + */ +struct gnet_stats_queue +{ + __u32 qlen; + __u32 backlog; + __u32 drops; + __u32 requeues; + __u32 overlimits; +}; + +/** + * @interval: sampling period + * @ewma_log: the log of measurement window weight + */ +struct gnet_estimator +{ + signed char interval; + unsigned char ewma_log; +}; + + +#endif /* __LINUX_GEN_STATS_H */ diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h new file mode 100644 index 000000000000..e3ec2ebdd62e --- /dev/null +++ b/include/net/gen_stats.h @@ -0,0 +1,45 @@ +#ifndef __NET_GEN_STATS_H +#define __NET_GEN_STATS_H + +#include +#include +#include +#include + +struct gnet_dump +{ + spinlock_t * lock; + struct sk_buff * skb; + struct rtattr * tail; + + /* Backward compatability */ + int compat_tc_stats; + int compat_xstats; + struct rtattr * xstats; + struct tc_stats tc_stats; +}; + +extern int gnet_stats_start_copy(struct sk_buff *skb, int type, + spinlock_t *lock, struct gnet_dump *d); + +extern int gnet_stats_start_copy_compat(struct sk_buff *skb, int type, + int tc_stats_type,int xstats_type, + spinlock_t *lock, struct gnet_dump *d); + +extern int gnet_stats_copy_basic(struct gnet_dump *d, + struct gnet_stats_basic *b); +extern int gnet_stats_copy_rate_est(struct gnet_dump *d, + struct gnet_stats_rate_est *r); +extern int gnet_stats_copy_queue(struct gnet_dump *d, + struct gnet_stats_queue *q); +extern int gnet_stats_copy_app(struct gnet_dump *d, void *st, int len); + +extern int gnet_stats_finish_copy(struct gnet_dump *d); + +extern int gen_new_estimator(struct gnet_stats_basic *bstats, + struct gnet_stats_rate_est *rate_est, + spinlock_t *stats_lock, struct rtattr *opt); +extern void gen_kill_estimator(struct gnet_stats_basic *bstats, + struct gnet_stats_rate_est *rate_est); + +#endif diff --git a/net/core/Makefile b/net/core/Makefile index e9b21b9418c4..81f03243fe2f 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -2,7 +2,7 @@ # Makefile for the Linux networking core. # -obj-y := sock.o skbuff.o iovec.o datagram.o stream.o scm.o +obj-y := sock.o skbuff.o iovec.o datagram.o stream.o scm.o gen_stats.o gen_estimator.o obj-$(CONFIG_SYSCTL) += sysctl_net_core.o diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c new file mode 100644 index 000000000000..4d65f937ece8 --- /dev/null +++ b/net/core/gen_estimator.c @@ -0,0 +1,204 @@ +/* + * net/sched/gen_estimator.c Simple rate estimator. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Alexey Kuznetsov, + * + * Changes: + * Jamal Hadi Salim - moved it to net/core and reshulfed + * names to make it usable in general net subsystem. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + This code is NOT intended to be used for statistics collection, + its purpose is to provide a base for statistical multiplexing + for controlled load service. + If you need only statistics, run a user level daemon which + periodically reads byte counters. + + Unfortunately, rate estimation is not a very easy task. + F.e. I did not find a simple way to estimate the current peak rate + and even failed to formulate the problem 8)8) + + So I preferred not to built an estimator into the scheduler, + but run this task separately. + Ideally, it should be kernel thread(s), but for now it runs + from timers, which puts apparent top bounds on the number of rated + flows, has minimal overhead on small, but is enough + to handle controlled load service, sets of aggregates. + + We measure rate over A=(1<next) { + u64 nbytes; + u32 npackets; + u32 rate; + + spin_lock(e->stats_lock); + nbytes = e->bstats->bytes; + npackets = e->bstats->packets; + rate = (nbytes - e->last_bytes)<<(7 - idx); + e->last_bytes = nbytes; + e->avbps += ((long)rate - (long)e->avbps) >> e->ewma_log; + e->rate_est->bps = (e->avbps+0xF)>>5; + + rate = (npackets - e->last_packets)<<(12 - idx); + e->last_packets = npackets; + e->avpps += ((long)rate - (long)e->avpps) >> e->ewma_log; + e->rate_est->pps = (e->avpps+0x1FF)>>10; + spin_unlock(e->stats_lock); + } + + mod_timer(&elist[idx].timer, jiffies + ((HZ<interval < -2 || parm->interval > 3) + return -EINVAL; + + est = kmalloc(sizeof(*est), GFP_KERNEL); + if (est == NULL) + return -ENOBUFS; + + memset(est, 0, sizeof(*est)); + est->interval = parm->interval + 2; + est->bstats = bstats; + est->rate_est = rate_est; + est->stats_lock = stats_lock; + est->ewma_log = parm->ewma_log; + est->last_bytes = bstats->bytes; + est->avbps = rate_est->bps<<5; + est->last_packets = bstats->packets; + est->avpps = rate_est->pps<<10; + + est->next = elist[est->interval].list; + if (est->next == NULL) { + init_timer(&elist[est->interval].timer); + elist[est->interval].timer.data = est->interval; + elist[est->interval].timer.expires = jiffies + ((HZ<interval)/4); + elist[est->interval].timer.function = est_timer; + add_timer(&elist[est->interval].timer); + } + write_lock_bh(&est_lock); + elist[est->interval].list = est; + write_unlock_bh(&est_lock); + return 0; +} + +void gen_kill_estimator(struct gnet_stats_basic *bstats, + struct gnet_stats_rate_est *rate_est) +{ + int idx; + struct gen_estimator *est, **pest; + + for (idx=0; idx <= EST_MAX_INTERVAL; idx++) { + int killed = 0; + pest = &elist[idx].list; + while ((est=*pest) != NULL) { + if (est->rate_est != rate_est || est->bstats != bstats) { + pest = &est->next; + continue; + } + + write_lock_bh(&est_lock); + *pest = est->next; + write_unlock_bh(&est_lock); + + kfree(est); + killed++; + } + if (killed && elist[idx].list == NULL) + del_timer(&elist[idx].timer); + } +} + +EXPORT_SYMBOL(gen_kill_estimator); +EXPORT_SYMBOL(gen_new_estimator); diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c new file mode 100644 index 000000000000..2b2ba6570b8b --- /dev/null +++ b/net/core/gen_stats.c @@ -0,0 +1,132 @@ +/* + * net/core/gen_stats.c + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Thomas Graf + * Jamal Hadi Salim + * Alexey Kuznetsov, + * + * See Documentation/networking/gen_stats.txt + */ + +#include +#include +#include +#include +#include +#include +#include +#include + + +static inline int +gnet_stats_copy(struct gnet_dump *d, int type, void *buf, int size) +{ + RTA_PUT(d->skb, type, size, buf); + return 0; + +rtattr_failure: + spin_unlock_bh(d->lock); + return -1; +} + +int +gnet_stats_start_copy_compat(struct sk_buff *skb, int type, int tc_stats_type, + int xstats_type, spinlock_t *lock, struct gnet_dump *d) +{ + spin_lock_bh(lock); + d->lock = lock; + d->tail = (struct rtattr *) skb->tail; + d->skb = skb; + d->compat_tc_stats = tc_stats_type; + d->compat_xstats = xstats_type; + d->xstats = NULL; + + if (d->compat_tc_stats) + memset(&d->tc_stats, 0, sizeof(d->tc_stats)); + + return gnet_stats_copy(d, type, NULL, 0); +} + +int +gnet_stats_start_copy(struct sk_buff *skb, int type, spinlock_t *lock, + struct gnet_dump *d) +{ + return gnet_stats_start_copy_compat(skb, type, 0, 0, lock, d); +} + + +int +gnet_stats_copy_basic(struct gnet_dump *d, struct gnet_stats_basic *b) +{ + if (d->compat_tc_stats) { + d->tc_stats.bytes = b->bytes; + d->tc_stats.packets = b->packets; + } + + return gnet_stats_copy(d, TCA_STATS_BASIC, b, sizeof(*b)); +} + +int +gnet_stats_copy_rate_est(struct gnet_dump *d, struct gnet_stats_rate_est *r) +{ + if (d->compat_tc_stats) { + d->tc_stats.bps = r->bps; + d->tc_stats.pps = r->pps; + } + + return gnet_stats_copy(d, TCA_STATS_RATE_EST, r, sizeof(*r)); +} + +int +gnet_stats_copy_queue(struct gnet_dump *d, struct gnet_stats_queue *q) +{ + if (d->compat_tc_stats) { + d->tc_stats.drops = q->drops; + d->tc_stats.qlen = q->qlen; + d->tc_stats.backlog = q->backlog; + d->tc_stats.overlimits = q->overlimits; + } + + return gnet_stats_copy(d, TCA_STATS_QUEUE, q, sizeof(*q)); +} + +int +gnet_stats_copy_app(struct gnet_dump *d, void *st, int len) +{ + if (d->compat_xstats) + d->xstats = (struct rtattr *) d->skb->tail; + return gnet_stats_copy(d, TCA_STATS_APP, st, len); +} + +int +gnet_stats_finish_copy(struct gnet_dump *d) +{ + d->tail->rta_len = d->skb->tail - (u8 *) d->tail; + + if (d->compat_tc_stats) + if (gnet_stats_copy(d, d->compat_tc_stats, &d->tc_stats, + sizeof(d->tc_stats)) < 0) + return -1; + + if (d->compat_xstats && d->xstats) { + if (gnet_stats_copy(d, d->compat_xstats, RTA_DATA(d->xstats), + RTA_PAYLOAD(d->xstats)) < 0) + return -1; + } + + spin_unlock_bh(d->lock); + return 0; +} + + +EXPORT_SYMBOL(gnet_stats_start_copy); +EXPORT_SYMBOL(gnet_stats_copy_basic); +EXPORT_SYMBOL(gnet_stats_copy_rate_est); +EXPORT_SYMBOL(gnet_stats_copy_queue); +EXPORT_SYMBOL(gnet_stats_copy_app); +EXPORT_SYMBOL(gnet_stats_finish_copy); -- cgit v1.2.3