diff options
Diffstat (limited to 'drivers/net/bonding.c')
| -rw-r--r-- | drivers/net/bonding.c | 2030 |
1 files changed, 1886 insertions, 144 deletions
diff --git a/drivers/net/bonding.c b/drivers/net/bonding.c index f67756b700af..dbc0cb4c7685 100644 --- a/drivers/net/bonding.c +++ b/drivers/net/bonding.c @@ -4,11 +4,14 @@ * Copyright 1999, Thomas Davis, tadavis@lbl.gov. * Licensed under the GPL. Based on dummy.c, and eql.c devices. * - * bond.c: a bonding/etherchannel/sun trunking net driver + * bonding.c: an Ethernet Bonding driver * - * This is useful to talk to a Cisco 5500, running Etherchannel, aka: - * Linux Channel Bonding + * This is useful to talk to a Cisco EtherChannel compatible equipment: + * Cisco 5500 * Sun Trunking (Solaris) + * Alteon AceDirector Trunks + * Linux Bonding + * and probably many L2 switches ... * * How it works: * ifconfig bond0 ipaddress netmask up @@ -21,280 +24,2019 @@ * will release all slaves, marking them as down. * * ifenslave bond0 eth0 - * will attache eth0 to bond0 as a slave. eth0 hw mac address will either + * will attach eth0 to bond0 as a slave. eth0 hw mac address will either * a: be used as initial mac address * b: if a hw mac address already is there, eth0's hw mac address - * will then be set from bond0. + * will then be set from bond0. * * v0.1 - first working version. * v0.2 - changed stats to be calculated by summing slaves stats. + * + * Changes: + * Arnaldo Carvalho de Melo <acme@conectiva.com.br> + * - fix leaks on failure at bond_init + * + * 2000/09/30 - Willy Tarreau <willy at meta-x.org> + * - added trivial code to release a slave device. + * - fixed security bug (CAP_NET_ADMIN not checked) + * - implemented MII link monitoring to disable dead links : + * All MII capable slaves are checked every <miimon> milliseconds + * (100 ms seems good). This value can be changed by passing it to + * insmod. A value of zero disables the monitoring (default). + * - fixed an infinite loop in bond_xmit_roundrobin() when there's no + * good slave. + * - made the code hopefully SMP safe + * + * 2000/10/03 - Willy Tarreau <willy at meta-x.org> + * - optimized slave lists based on relevant suggestions from Thomas Davis + * - implemented active-backup method to obtain HA with two switches: + * stay as long as possible on the same active interface, while we + * also monitor the backup one (MII link status) because we want to know + * if we are able to switch at any time. ( pass "mode=1" to insmod ) + * - lots of stress testings because we need it to be more robust than the + * wires ! :-> + * + * 2000/10/09 - Willy Tarreau <willy at meta-x.org> + * - added up and down delays after link state change. + * - optimized the slaves chaining so that when we run forward, we never + * repass through the bond itself, but we can find it by searching + * backwards. Renders the deletion more difficult, but accelerates the + * scan. + * - smarter enslaving and releasing. + * - finer and more robust SMP locking + * + * 2000/10/17 - Willy Tarreau <willy at meta-x.org> + * - fixed two potential SMP race conditions + * + * 2000/10/18 - Willy Tarreau <willy at meta-x.org> + * - small fixes to the monitoring FSM in case of zero delays + * 2000/11/01 - Willy Tarreau <willy at meta-x.org> + * - fixed first slave not automatically used in trunk mode. + * 2000/11/10 : spelling of "EtherChannel" corrected. + * 2000/11/13 : fixed a race condition in case of concurrent accesses to ioctl(). + * 2000/12/16 : fixed improper usage of rtnl_exlock_nowait(). + * + * 2001/1/3 - Chad N. Tindel <ctindel at ieee dot org> + * - The bonding driver now simulates MII status monitoring, just like + * a normal network device. It will show that the link is down iff + * every slave in the bond shows that their links are down. If at least + * one slave is up, the bond's MII status will appear as up. + * + * 2001/2/7 - Chad N. Tindel <ctindel at ieee dot org> + * - Applications can now query the bond from user space to get + * information which may be useful. They do this by calling + * the BOND_INFO_QUERY ioctl. Once the app knows how many slaves + * are in the bond, it can call the BOND_SLAVE_INFO_QUERY ioctl to + * get slave specific information (# link failures, etc). See + * <linux/if_bonding.h> for more details. The structs of interest + * are ifbond and ifslave. + * + * 2001/4/5 - Chad N. Tindel <ctindel at ieee dot org> + * - Ported to 2.4 Kernel * + * 2001/5/2 - Jeffrey E. Mast <jeff at mastfamily dot com> + * - When a device is detached from a bond, the slave device is no longer + * left thinking that is has a master. + * + * 2001/5/16 - Jeffrey E. Mast <jeff at mastfamily dot com> + * - memset did not appropriately initialized the bond rw_locks. Used + * rwlock_init to initialize to unlocked state to prevent deadlock when + * first attempting a lock + * - Called SET_MODULE_OWNER for bond device + * + * 2001/5/17 - Tim Anderson <tsa at mvista.com> + * - 2 paths for releasing for slave release; 1 through ioctl + * and 2) through close. Both paths need to release the same way. + * - the free slave in bond release is changing slave status before + * the free. The netdev_set_master() is intended to change slave state + * so it should not be done as part of the release process. + * - Simple rule for slave state at release: only the active in A/B and + * only one in the trunked case. + * + * 2001/6/01 - Tim Anderson <tsa at mvista.com> + * - Now call dev_close when releasing a slave so it doesn't screw up + * out routing table. + * + * 2001/6/01 - Chad N. Tindel <ctindel at ieee dot org> + * - Added /proc support for getting bond and slave information. + * Information is in /proc/net/<bond device>/info. + * - Changed the locking when calling bond_close to prevent deadlock. + * + * 2001/8/05 - Janice Girouard <girouard at us.ibm.com> + * - correct problem where refcnt of slave is not incremented in bond_ioctl + * so the system hangs when halting. + * - correct locking problem when unable to malloc in bond_enslave. + * - adding bond_xmit_xor logic. + * - adding multiple bond device support. + * + * 2001/8/13 - Erik Habbinga <erik_habbinga at hp dot com> + * - correct locking problem with rtnl_exlock_nowait + * + * 2001/8/23 - Janice Girouard <girouard at us.ibm.com> + * - bzero initial dev_bonds, to correct oops + * - convert SIOCDEVPRIVATE to new MII ioctl calls + * + * 2001/9/13 - Takao Indoh <indou dot takao at jp dot fujitsu dot com> + * - Add the BOND_CHANGE_ACTIVE ioctl implementation + * + * 2001/9/14 - Mark Huth <mhuth at mvista dot com> + * - Change MII_LINK_READY to not check for end of auto-negotiation, + * but only for an up link. + * + * 2001/9/20 - Chad N. Tindel <ctindel at ieee dot org> + * - Add the device field to bonding_t. Previously the net_device + * corresponding to a bond wasn't available from the bonding_t + * structure. + * + * 2001/9/25 - Janice Girouard <girouard at us.ibm.com> + * - add arp_monitor for active backup mode + * + * 2001/10/23 - Takao Indoh <indou dot takao at jp dot fujitsu dot com> + * - Various memory leak fixes */ -#include <linux/module.h> +#include <linux/config.h> #include <linux/kernel.h> -#include <linux/netdevice.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/types.h> +#include <linux/fcntl.h> +#include <linux/interrupt.h> +#include <linux/ptrace.h> +#include <linux/ioport.h> +#include <linux/in.h> +#include <linux/malloc.h> +#include <linux/string.h> #include <linux/init.h> +#include <linux/timer.h> +#include <linux/socket.h> +#include <asm/system.h> +#include <asm/bitops.h> +#include <asm/io.h> +#include <asm/dma.h> +#include <asm/uaccess.h> +#include <linux/errno.h> + +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/skbuff.h> +#include <net/sock.h> +#include <linux/rtnetlink.h> + #include <linux/if_bonding.h> +#include <linux/smp.h> +#include <limits.h> +#include <linux/if_ether.h> +#include <linux/if_arp.h> + +/* monitor all links that often (in milliseconds). <=0 disables monitoring */ +#ifndef BOND_LINK_MON_INTERV +#define BOND_LINK_MON_INTERV 0 +#endif + +#undef MII_LINK_UP +#define MII_LINK_UP 0x04 + +#undef MII_ENDOF_NWAY +#define MII_ENDOF_NWAY 0x20 + +#undef MII_LINK_READY +/*#define MII_LINK_READY (MII_LINK_UP | MII_ENDOF_NWAY)*/ +#define MII_LINK_READY (MII_LINK_UP) + +#define MAX_BOND_ADDR 256 + +#ifndef BOND_LINK_ARP_INTERV +#define BOND_LINK_ARP_INTERV 0 +#endif + +static int arp_interval = BOND_LINK_ARP_INTERV; +static char *arp_ip_target = NULL; +static unsigned long arp_target = 0; +static u32 my_ip = 0; +char *arp_target_hw_addr = NULL; + +static int max_bonds = MAX_BONDS; +static int miimon = BOND_LINK_MON_INTERV; +static int mode = BOND_MODE_ROUNDROBIN; +static int updelay = 0; +static int downdelay = 0; + +static int first_pass = 1; +int bond_cnt; +static struct bonding *these_bonds = NULL; +static struct net_device *dev_bonds = NULL; + +MODULE_PARM(max_bonds, "1-" __MODULE_STRING(INT_MAX) "i"); +MODULE_PARM_DESC(max_bonds, "Max number of bonded devices"); +MODULE_PARM(miimon, "i"); +MODULE_PARM_DESC(miimon, "Link check interval in milliseconds"); +MODULE_PARM(mode, "i"); +MODULE_PARM(arp_interval, "i"); +MODULE_PARM_DESC(arp_interval, "arp interval in milliseconds"); +MODULE_PARM(arp_ip_target, "1-12s"); +MODULE_PARM_DESC(arp_ip_target, "arp target in n.n.n.n form"); +MODULE_PARM_DESC(mode, "Mode of operation : 0 for round robin, 1 for active-backup, 2 for xor"); +MODULE_PARM(updelay, "i"); +MODULE_PARM_DESC(updelay, "Delay before considering link up, in milliseconds"); +MODULE_PARM(downdelay, "i"); +MODULE_PARM_DESC(downdelay, "Delay before considering link down, in milliseconds"); + +extern void arp_send( int type, int ptype, u32 dest_ip, struct net_device *dev, + u32 src_ip, unsigned char *dest_hw, unsigned char *src_hw, + unsigned char *target_hw); + +static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *dev); +static int bond_xmit_xor(struct sk_buff *skb, struct net_device *dev); +static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *dev); +static struct net_device_stats *bond_get_stats(struct net_device *dev); +static void bond_mii_monitor(struct net_device *dev); +static void bond_arp_monitor(struct net_device *dev); +static int bond_event(struct notifier_block *this, unsigned long event, void *ptr); +static void bond_set_slave_inactive_flags(slave_t *slave); +static void bond_set_slave_active_flags(slave_t *slave); +static int bond_enslave(struct net_device *master, struct net_device *slave); +static int bond_release(struct net_device *master, struct net_device *slave); +static int bond_sethwaddr(struct net_device *master, struct net_device *slave); + +/* + * bond_get_info is the interface into the /proc filesystem. This is + * a different interface than the BOND_INFO_QUERY ioctl. That is done + * through the generic networking ioctl interface, and bond_info_query + * is the internal function which provides that information. + */ +static int bond_get_info(char *buf, char **start, off_t offset, int length); + +/* #define BONDING_DEBUG 1 */ + +/* several macros */ -typedef struct slave +#define IS_UP(dev) ((((dev)->flags & (IFF_UP)) == (IFF_UP)) && \ + (netif_running(dev) && netif_carrier_ok(dev))) + +static void bond_set_slave_inactive_flags(slave_t *slave) { - struct slave *next; - struct slave *prev; - struct net_device *dev; -} slave_t; + slave->state = BOND_STATE_BACKUP; + slave->dev->flags |= IFF_NOARP; +} -typedef struct bonding +static void bond_set_slave_active_flags(slave_t *slave) { - slave_t *next; - slave_t *prev; - struct net_device *master; + slave->state = BOND_STATE_ACTIVE; + slave->dev->flags &= ~IFF_NOARP; +} - slave_t *current_slave; - struct net_device_stats stats; -} bonding_t; +/* + * This function detaches the slave <slave> from the list <bond>. + * WARNING: no check is made to verify if the slave effectively + * belongs to <bond>. It returns <slave> in case it's needed. + * Nothing is freed on return, structures are just unchained. + * If the bond->current_slave pointer was pointing to <slave>, + * it's replaced with slave->next, or <bond> if not applicable. + */ +static slave_t *bond_detach_slave(bonding_t *bond, slave_t *slave) +{ + if ((bond == NULL) || (slave == NULL) || + ((void *)bond == (void *)slave)) { + printk(KERN_ERR + "bond_detach_slave(): trying to detach " + "slave %p from bond %p\n", bond, slave); + return slave; + } + if (bond->next == slave) { /* is the slave at the head ? */ + if (bond->prev == slave) { /* is the slave alone ? */ + write_lock(&bond->ptrlock); + bond->current_slave = NULL; /* no slave anymore */ + write_unlock(&bond->ptrlock); + bond->prev = bond->next = (slave_t *)bond; + } else { /* not alone */ + bond->next = slave->next; + slave->next->prev = (slave_t *)bond; + bond->prev->next = slave->next; + + write_lock(&bond->ptrlock); + if (bond->current_slave == slave) { + bond->current_slave = slave->next; + } + write_unlock(&bond->ptrlock); + } + } + else { + slave->prev->next = slave->next; + if (bond->prev == slave) { /* is this slave the last one ? */ + bond->prev = slave->prev; + } else { + slave->next->prev = slave->prev; + } -static int bond_xmit(struct sk_buff *skb, struct net_device *dev); -static struct net_device_stats *bond_get_stats(struct net_device *dev); + write_lock(&bond->ptrlock); + if (bond->current_slave == slave) { + bond->current_slave = slave->next; + } + write_unlock(&bond->ptrlock); + } -static struct net_device *this_bond; + return slave; +} -static void release_one_slave(struct net_device *master, slave_t *slave) +/* + * if <dev> supports MII link status reporting, check its link + * and report it as a bit field in a short int : + * - 0x04 means link is up, + * - 0x20 means end of autonegociation + * If the device doesn't support MII, then we only report 0x24, + * meaning that the link is up and running since we can't check it. + */ +static u16 bond_check_dev_link(struct net_device *dev) { - bonding_t *bond = master->priv; + static int (* ioctl)(struct net_device *, struct ifreq *, int); + struct ifreq ifr; + u16 *data = (u16 *)&ifr.ifr_data; + + /* data[0] automagically filled by the ioctl */ + data[1] = 1; /* MII location 1 reports Link Status */ + + if (((ioctl = dev->do_ioctl) != NULL) && /* ioctl to access MII */ + (ioctl(dev, &ifr, SIOCGMIIPHY) == 0)) { + /* now, data[3] contains info about link status : + - data[3] & 0x04 means link up + - data[3] & 0x20 means end of auto-negociation + */ + return data[3]; + } else { + return MII_LINK_READY; /* spoof link up ( we can't check it) */ + } +} - spin_lock_bh(&master->xmit_lock); - if (bond->current_slave == slave) - bond->current_slave = slave->next; - slave->next->prev = slave->prev; - slave->prev->next = slave->next; - spin_unlock_bh(&master->xmit_lock); +static u16 bond_check_mii_link(bonding_t *bond) +{ + int has_active_interface = 0; - netdev_set_master(slave->dev, NULL); + read_lock(&bond->ptrlock); + has_active_interface = (bond->current_slave != NULL); + read_unlock(&bond->ptrlock); - dev_put(slave->dev); - kfree(slave); + return (has_active_interface ? MII_LINK_READY : 0); +} + +static int bond_open(struct net_device *dev) +{ + struct timer_list *timer = &((struct bonding *)(dev->priv))->mii_timer; + struct timer_list *arp_timer = &((struct bonding *)(dev->priv))->arp_timer; + MOD_INC_USE_COUNT; + + if (miimon > 0) { /* link check interval, in milliseconds. */ + init_timer(timer); + timer->expires = jiffies + (miimon * HZ / 1000); + timer->data = (unsigned long)dev; + timer->function = (void *)&bond_mii_monitor; + add_timer(timer); + } + + if (arp_interval> 0) { /* arp interval, in milliseconds. */ + init_timer(arp_timer); + arp_timer->expires = jiffies + (arp_interval * HZ / 1000); + arp_timer->data = (unsigned long)dev; + arp_timer->function = (void *)&bond_arp_monitor; + add_timer(arp_timer); + } + return 0; } static int bond_close(struct net_device *master) { - bonding_t *bond = master->priv; + bonding_t *bond = (struct bonding *) master->priv; slave_t *slave; + unsigned long flags; + + write_lock_irqsave(&bond->lock, flags); + + if (miimon > 0) { /* link check interval, in milliseconds. */ + del_timer(&bond->mii_timer); + } + if (arp_interval> 0) { /* arp interval, in milliseconds. */ + del_timer(&bond->arp_timer); + } + /* We need to unlock this because bond_release will re-lock it */ + write_unlock_irqrestore(&bond->lock, flags); - while ((slave = bond->next) != (slave_t*)bond) - release_one_slave(master, slave); + /* Release the bonded slaves */ + while ((slave = bond->prev) != (slave_t *)bond) { + bond_release(master, slave->dev); + } + MOD_DEC_USE_COUNT; return 0; } -static void bond_set_multicast_list(struct net_device *master) +static void set_multicast_list(struct net_device *master) { +/* + bonding_t *bond = master->priv; + slave_t *slave; + + for (slave = bond->next; slave != (slave_t*)bond; slave = slave->next) { + slave->dev->mc_list = master->mc_list; + slave->dev->mc_count = master->mc_count; + slave->dev->flags = master->flags; + slave->dev->set_multicast_list(slave->dev); + } + */ } -static int bond_enslave(struct net_device *master, struct net_device *dev) +/* + * This function counts the the number of attached + * slaves for use by bond_xmit_xor. + */ +static void update_slave_cnt(bonding_t *bond) { - int err; - bonding_t *bond = master->priv; - slave_t *slave; + slave_t *slave = NULL; - if (dev->type != master->type) + bond->slave_cnt = 0; + for (slave = bond->prev; slave != (slave_t*)bond; slave = slave->prev) { + bond->slave_cnt++; + } +} + +/* enslave device <slave> to bond device <master> */ +static int bond_enslave(struct net_device *master_dev, + struct net_device *slave_dev) +{ + bonding_t *bond = NULL; + slave_t *new_slave = NULL; + unsigned long flags = 0; + int ndx = 0; + int err = 0; + + if (master_dev == NULL || slave_dev == NULL) { return -ENODEV; + } + bond = (struct bonding *) master_dev->priv; - if ((slave = kmalloc(sizeof(slave_t), GFP_KERNEL)) == NULL) + if (slave_dev->do_ioctl == NULL) { + printk(KERN_DEBUG + "Warning : no link monitoring support for %s\n", + slave_dev->name); + } + write_lock_irqsave(&bond->lock, flags); + + /* not running. */ + if ((slave_dev->flags & IFF_UP) != IFF_UP) { +#ifdef BONDING_DEBUG + printk(KERN_CRIT "Error, slave_dev is not running\n"); +#endif + write_unlock_irqrestore(&bond->lock, flags); + return -EINVAL; + } + + /* already enslaved */ + if (master_dev->flags & IFF_SLAVE || slave_dev->flags & IFF_SLAVE) { +#ifdef BONDING_DEBUG + printk(KERN_CRIT "Error, Device was already enslaved\n"); +#endif + write_unlock_irqrestore(&bond->lock, flags); + return -EBUSY; + } + + if ((new_slave = kmalloc(sizeof(slave_t), GFP_KERNEL)) == NULL) { + write_unlock_irqrestore(&bond->lock, flags); return -ENOMEM; + } + memset(new_slave, 0, sizeof(slave_t)); - memset(slave, 0, sizeof(slave_t)); + err = netdev_set_master(slave_dev, master_dev); - err = netdev_set_master(dev, master); if (err) { - kfree(slave); - return err; +#ifdef BONDING_DEBUG + printk(KERN_CRIT "Error %d calling netdev_set_master\n", err); +#endif + kfree(new_slave); + write_unlock_irqrestore(&bond->lock, flags); + return err; } - slave->dev = dev; - - spin_lock_bh(&master->xmit_lock); + new_slave->dev = slave_dev; + + /* + * queue to the end of the slaves list, make the first element its + * successor, the last one its predecessor, and make it the bond's + * predecessor. + */ + new_slave->prev = bond->prev; + new_slave->prev->next = new_slave; + bond->prev = new_slave; + new_slave->next = bond->next; + + new_slave->delay = 0; + new_slave->link_failure_count = 0; + + /* check for initial state */ + if ((miimon <= 0) || ((bond_check_dev_link(slave_dev) & MII_LINK_READY) + == MII_LINK_READY)) { +#ifdef BONDING_DEBUG + printk(KERN_CRIT "Initial state of slave_dev is BOND_LINK_UP\n"); +#endif + new_slave->link = BOND_LINK_UP; + } + else { +#ifdef BONDING_DEBUG + printk(KERN_CRIT "Initial state of slave_dev is BOND_LINK_DOWN\n"); +#endif + new_slave->link = BOND_LINK_DOWN; + } - dev_hold(dev); + /* if we're in active-backup mode, we need one and only one active + * interface. The backup interfaces will have their NOARP flag set + * because we need them to be completely deaf and not to respond to + * any ARP request on the network to avoid fooling a switch. Thus, + * since we guarantee that current_slave always point to the last + * usable interface, we just have to verify this interface's flag. + */ + if (mode == BOND_MODE_ACTIVEBACKUP) { + if (((bond->current_slave == NULL) + || (bond->current_slave->dev->flags & IFF_NOARP)) + && (new_slave->link == BOND_LINK_UP)) { +#ifdef BONDING_DEBUG + printk(KERN_CRIT "This is the first active slave\n"); +#endif + /* first slave or no active slave yet, and this link + is OK, so make this interface the active one */ + bond->current_slave = new_slave; + bond_set_slave_active_flags(new_slave); + } + else { +#ifdef BONDING_DEBUG + printk(KERN_CRIT "This is just a backup slave\n"); +#endif + bond_set_slave_inactive_flags(new_slave); + } + } else { +#ifdef BONDING_DEBUG + printk(KERN_CRIT "This slave is always active in trunk mode\n"); +#endif + /* always active in trunk mode */ + new_slave->state = BOND_STATE_ACTIVE; + if (bond->current_slave == NULL) { + bond->current_slave = new_slave; + } + } - slave->prev = bond->prev; - slave->next = (slave_t*)bond; - slave->prev->next = slave; - slave->next->prev = slave; + update_slave_cnt(bond); + + write_unlock_irqrestore(&bond->lock, flags); + + /* + * !!! This is to support old versions of ifenslave. We can remove + * this in 2.5 because our ifenslave takes care of this for us. + * We check to see if the master has a mac address yet. If not, + * we'll give it the mac address of our slave device. + */ + for (ndx = 0; ndx < slave_dev->addr_len; ndx++) { +#ifdef BONDING_DEBUG + printk(KERN_CRIT "Checking ndx=%d of master_dev->dev_addr\n", + ndx); +#endif + if (master_dev->dev_addr[ndx] != 0) { +#ifdef BONDING_DEBUG + printk(KERN_CRIT "Found non-zero byte at ndx=%d\n", + ndx); +#endif + break; + } + } + if (ndx == slave_dev->addr_len) { + /* + * We got all the way through the address and it was + * all 0's. + */ +#ifdef BONDING_DEBUG + printk(KERN_CRIT "%s doesn't have a MAC address yet. ", + master_dev->name); + printk(KERN_CRIT "Going to give assign it from %s.\n", + slave_dev->name); +#endif + bond_sethwaddr(master_dev, slave_dev); + } - spin_unlock_bh(&master->xmit_lock); + printk (KERN_INFO "%s: enslaving %s as a%s interface with a%s link.\n", + master_dev->name, slave_dev->name, + new_slave->state == BOND_STATE_ACTIVE ? "n active" : " backup", + new_slave->link == BOND_LINK_UP ? "n up" : " down"); return 0; } -static int bond_release(struct net_device *master, struct net_device *dev) +/* + * This function changes the active slave to slave <slave_dev>. + * It returns -EINVAL in the following cases. + * - <slave_dev> is not found in the list. + * - There is not active slave now. + * - <slave_dev> is already active. + * - The link state of <slave_dev> is not BOND_LINK_UP. + * - <slave_dev> is not running. + * In these cases, this fuction does nothing. + * In the other cases, currnt_slave pointer is changed and 0 is returned. + */ +static int bond_change_active(struct net_device *master_dev, struct net_device *slave_dev) { - bonding_t *bond = master->priv; + bonding_t *bond; slave_t *slave; + slave_t *oldactive = NULL; + slave_t *newactive = NULL; + unsigned long flags; + int ret = 0; + + if (master_dev == NULL || slave_dev == NULL) { + return -ENODEV; + } + + bond = (struct bonding *) master_dev->priv; + write_lock_irqsave(&bond->lock, flags); + slave = (slave_t *)bond; + oldactive = bond->current_slave; + + while ((slave = slave->prev) != (slave_t *)bond) { + if(slave_dev == slave->dev) { + newactive = slave; + break; + } + } + + if ((newactive != NULL)&& + (oldactive != NULL)&& + (newactive != oldactive)&& + (newactive->link == BOND_LINK_UP)&& + IS_UP(newactive->dev)) { + bond_set_slave_inactive_flags(oldactive); + bond_set_slave_active_flags(newactive); + bond->current_slave = newactive; + printk("%s : activate %s(old : %s)\n", + master_dev->name, newactive->dev->name, + oldactive->dev->name); + } + else { + ret = -EINVAL; + } + write_unlock_irqrestore(&bond->lock, flags); + return ret; +} + +/* Choose a new valid interface from the pool, set it active + * and make it the current slave. If no valid interface is + * found, the oldest slave in BACK state is choosen and + * activated. If none is found, it's considered as no + * interfaces left so the current slave is set to NULL. + * The result is a pointer to the current slave. + * + * Since this function sends messages tails through printk, the caller + * must have started something like `printk(KERN_INFO "xxxx ");'. + * + * Warning: must put locks around the call to this function if needed. + */ +slave_t *change_active_interface(bonding_t *bond) +{ + slave_t *newslave, *oldslave; + slave_t *bestslave = NULL; + int mintime; + + read_lock(&bond->ptrlock); + newslave = oldslave = bond->current_slave; + read_unlock(&bond->ptrlock); + + if (newslave == NULL) { /* there were no active slaves left */ + if (bond->next != (slave_t *)bond) { /* found one slave */ + write_lock(&bond->ptrlock); + newslave = bond->current_slave = bond->next; + write_unlock(&bond->ptrlock); + } else { + printk (" but could not find any %s interface.\n", + (mode == BOND_MODE_ACTIVEBACKUP) ? "backup":"other"); + write_lock(&bond->ptrlock); + bond->current_slave = (slave_t *)NULL; + write_unlock(&bond->ptrlock); + return NULL; /* still no slave, return NULL */ + } + } + + mintime = updelay; + + do { + if (IS_UP(newslave->dev)) { + if (newslave->link == BOND_LINK_UP) { + /* this one is immediately usable */ + if (mode == BOND_MODE_ACTIVEBACKUP) { + bond_set_slave_active_flags(newslave); + printk (" and making interface %s the active one.\n", + newslave->dev->name); + } + else { + printk (" and setting pointer to interface %s.\n", + newslave->dev->name); + } + + write_lock(&bond->ptrlock); + bond->current_slave = newslave; + write_unlock(&bond->ptrlock); + return newslave; + } + else if (newslave->link == BOND_LINK_BACK) { + /* link up, but waiting for stabilization */ + if (newslave->delay < mintime) { + mintime = newslave->delay; + bestslave = newslave; + } + } + } + } while ((newslave = newslave->next) != oldslave); + + /* no usable backup found, we'll see if we at least got a link that was + coming back for a long time, and could possibly already be usable. + */ + + if (bestslave != NULL) { + /* early take-over. */ + printk (" and making interface %s the active one %d ms earlier.\n", + bestslave->dev->name, + (updelay - bestslave->delay)*miimon); + + bestslave->delay = 0; + bestslave->link = BOND_LINK_UP; + bond_set_slave_active_flags(bestslave); + + write_lock(&bond->ptrlock); + bond->current_slave = bestslave; + write_unlock(&bond->ptrlock); + return bestslave; + } + + printk (" but could not find any %s interface.\n", + (mode == BOND_MODE_ACTIVEBACKUP) ? "backup":"other"); + + /* absolutely nothing found. let's return NULL */ + write_lock(&bond->ptrlock); + bond->current_slave = (slave_t *)NULL; + write_unlock(&bond->ptrlock); + return NULL; +} + +/* + * Try to release the slave device <slave> from the bond device <master> + * It is legal to access current_slave without a lock because all the function + * is write-locked. + * + * The rules for slave state should be: + * for Active/Backup: + * Active stays on all backups go down + * for Bonded connections: + * The first up interface should be left on and all others downed. + */ +static int bond_release(struct net_device *master, struct net_device *slave) +{ + bonding_t *bond; + slave_t *our_slave, *old_current; + unsigned long flags; + + if (master == NULL || slave == NULL) { + return -ENODEV; + } - if (dev->master != master) + bond = (struct bonding *) master->priv; + + write_lock_irqsave(&bond->lock, flags); + + /* master already enslaved, or slave not enslaved, + or no slave for this master */ + if ((master->flags & IFF_SLAVE) || !(slave->flags & IFF_SLAVE)) { + printk (KERN_DEBUG "%s: cannot release %s.\n", master->name, slave->name); + write_unlock_irqrestore(&bond->lock, flags); return -EINVAL; + } - for (slave = bond->next; slave != (slave_t*)bond; slave = slave->next) { - if (slave->dev == dev) { - release_one_slave(master, slave); + our_slave = (slave_t *)bond; + old_current = bond->current_slave; + while ((our_slave = our_slave->prev) != (slave_t *)bond) { + if (our_slave->dev == slave) { + bond_detach_slave(bond, our_slave); + + printk (KERN_INFO "%s: releasing %s interface %s", + master->name, + (our_slave->state == BOND_STATE_ACTIVE) ? "active" : "backup", + slave->name); + + if (our_slave == old_current) { + /* find a new interface and be verbose */ + change_active_interface(bond); + } else { + printk(".\n"); + } + kfree(our_slave); + + /* release the slave from its bond */ + + netdev_set_master(slave, NULL); + + /* only restore its RUNNING flag if monitoring set it down */ + if (slave->flags & IFF_UP) { + slave->flags |= IFF_RUNNING; + } + + if (slave->flags & IFF_NOARP || + bond->current_slave != NULL) { + dev_close(slave); + } + + if (bond->current_slave == NULL) { + printk(KERN_INFO + "%s: now running without any active interface !\n", + master->name); + } + + update_slave_cnt(bond); + + write_unlock_irqrestore(&bond->lock, flags); + return 0; /* deletion OK */ + } + } + + /* if we get here, it's because the device was not found */ + write_unlock_irqrestore(&bond->lock, flags); + + printk (KERN_INFO "%s: %s not enslaved\n", master->name, slave->name); + return -EINVAL; +} + +/* this function is called regularly to monitor each slave's link. */ +static void bond_mii_monitor(struct net_device *master) +{ + bonding_t *bond = (struct bonding *) master->priv; + slave_t *slave, *bestslave, *oldcurrent; + unsigned long flags; + int slave_died = 0; + + read_lock_irqsave(&bond->lock, flags); + + if (rtnl_shlock_nowait()) { + goto monitor_out; + } + + if (rtnl_exlock_nowait()) { + rtnl_shunlock(); + goto monitor_out; + } + /* we will try to read the link status of each of our slaves, and + * set their IFF_RUNNING flag appropriately. For each slave not + * supporting MII status, we won't do anything so that a user-space + * program could monitor the link itself if needed. + */ + + bestslave = NULL; + slave = (slave_t *)bond; + + read_lock(&bond->ptrlock); + oldcurrent = bond->current_slave; + read_unlock(&bond->ptrlock); + + while ((slave = slave->prev) != (slave_t *)bond) { + /* use updelay+1 to match an UP slave even when updelay is 0 */ + int mindelay = updelay + 1; + struct net_device *dev = slave->dev; + u16 link_state; + + link_state = bond_check_dev_link(dev); + + switch (slave->link) { + case BOND_LINK_UP: /* the link was up */ + if ((link_state & MII_LINK_UP) == MII_LINK_UP) { + /* link stays up, tell that this one + is immediately available */ + if (IS_UP(dev) && (mindelay > -2)) { + /* -2 is the best case : + this slave was already up */ + mindelay = -2; + bestslave = slave; + } + break; + } + else { /* link going down */ + slave->link = BOND_LINK_FAIL; + slave->delay = downdelay; + if (slave->link_failure_count < UINT_MAX) { + slave->link_failure_count++; + } + if (downdelay > 0) { + printk (KERN_INFO + "%s: link status down for %sinterface " + "%s, disabling it in %d ms.\n", + master->name, + IS_UP(dev) + ? ((mode == BOND_MODE_ACTIVEBACKUP) + ? ((slave == oldcurrent) + ? "active " : "backup ") + : "") + : "idle ", + dev->name, + downdelay * miimon); + } + } + /* no break ! fall through the BOND_LINK_FAIL test to + ensure proper action to be taken + */ + case BOND_LINK_FAIL: /* the link has just gone down */ + if ((link_state & MII_LINK_UP) == 0) { + /* link stays down */ + if (slave->delay <= 0) { + /* link down for too long time */ + slave->link = BOND_LINK_DOWN; + /* in active/backup mode, we must + completely disable this interface */ + if (mode == BOND_MODE_ACTIVEBACKUP) { + bond_set_slave_inactive_flags(slave); + } + printk(KERN_INFO + "%s: link status definitely down " + "for interface %s, disabling it", + master->name, + dev->name); + + read_lock(&bond->ptrlock); + if (slave == bond->current_slave) { + read_unlock(&bond->ptrlock); + /* find a new interface and be verbose */ + change_active_interface(bond); + } else { + read_unlock(&bond->ptrlock); + printk(".\n"); + } + slave_died = 1; + } else { + slave->delay--; + } + } else if ((link_state & MII_LINK_READY) == MII_LINK_READY) { + /* link up again */ + slave->link = BOND_LINK_UP; + printk(KERN_INFO + "%s: link status up again after %d ms " + "for interface %s.\n", + master->name, + (downdelay - slave->delay) * miimon, + dev->name); + + if (IS_UP(dev) && (mindelay > -1)) { + /* -1 is a good case : this slave went + down only for a short time */ + mindelay = -1; + bestslave = slave; + } + } break; + case BOND_LINK_DOWN: /* the link was down */ + if ((link_state & MII_LINK_READY) != MII_LINK_READY) { + /* the link stays down, nothing more to do */ + break; + } else { /* link going up */ + slave->link = BOND_LINK_BACK; + slave->delay = updelay; + + if (updelay > 0) { + /* if updelay == 0, no need to + advertise about a 0 ms delay */ + printk (KERN_INFO + "%s: link status up for interface" + " %s, enabling it in %d ms.\n", + master->name, + dev->name, + updelay * miimon); + } + } + /* no break ! fall through the BOND_LINK_BACK state in + case there's something to do. + */ + case BOND_LINK_BACK: /* the link has just come back */ + if ((link_state & MII_LINK_UP) == 0) { + /* link down again */ + slave->link = BOND_LINK_DOWN; + printk(KERN_INFO + "%s: link status down again after %d ms " + "for interface %s.\n", + master->name, + (updelay - slave->delay) * miimon, + dev->name); + } + else if ((link_state & MII_LINK_READY) == MII_LINK_READY) { + /* link stays up */ + if (slave->delay == 0) { + /* now the link has been up for long time enough */ + slave->link = BOND_LINK_UP; + + if (mode == BOND_MODE_ACTIVEBACKUP) { + /* prevent it from being the active one */ + slave->state = BOND_STATE_BACKUP; + } + else { + /* make it immediately active */ + slave->state = BOND_STATE_ACTIVE; + } + + printk(KERN_INFO + "%s: link status definitely up " + "for interface %s.\n", + master->name, + dev->name); + } + else + slave->delay--; + + /* we'll also look for the mostly eligible slave */ + if (IS_UP(dev) && (slave->delay < mindelay)) { + mindelay = slave->delay; + bestslave = slave; + } + } + break; + } /* end of switch */ + } /* end of while */ + + /* if there's no active interface and we discovered that one + of the slaves could be activated earlier, so we do it. + */ + read_lock(&bond->ptrlock); + oldcurrent = bond->current_slave; + read_unlock(&bond->ptrlock); + + if (oldcurrent == NULL) { /* no active interface at the moment */ + if (bestslave != NULL) { /* last chance to find one ? */ + if (bestslave->link == BOND_LINK_UP) { + printk (KERN_INFO + "%s: making interface %s the new active one.\n", + master->name, bestslave->dev->name); + } else { + printk (KERN_INFO + "%s: making interface %s the new " + "active one %d ms earlier.\n", + master->name, bestslave->dev->name, + (updelay - bestslave->delay) * miimon); + + bestslave->delay= 0; + bestslave->link = BOND_LINK_UP; + } + + if (mode == BOND_MODE_ACTIVEBACKUP) { + bond_set_slave_active_flags(bestslave); + } else { + bestslave->state = BOND_STATE_ACTIVE; + } + write_lock(&bond->ptrlock); + bond->current_slave = bestslave; + write_unlock(&bond->ptrlock); + } else if (slave_died) { + /* print this message only once a slave has just died */ + printk(KERN_INFO + "%s: now running without any active interface !\n", + master->name); } } - return 0; + rtnl_exunlock(); + rtnl_shunlock(); +monitor_out: + read_unlock_irqrestore(&bond->lock, flags); + /* re-arm the timer */ + mod_timer(&bond->mii_timer, jiffies + (miimon * HZ / 1000)); } -/* It is pretty silly, SIOCSIFHWADDR exists to make this. */ +/* + * this function is called regularly to monitor each slave's link + * insuring that traffic is being sent and received. If the adapter + * has been dormant, then an arp is transmitted to generate traffic + */ +static void bond_arp_monitor(struct net_device *master) +{ + bonding_t *bond; + unsigned long flags; + slave_t *slave; + int the_delta_in_ticks = arp_interval * HZ / 1000; + int next_timer = jiffies + (arp_interval * HZ / 1000); + + bond = (struct bonding *) master->priv; + if (master->priv == NULL) { + mod_timer(&bond->arp_timer, next_timer); + return; + } + + read_lock_irqsave(&bond->lock, flags); + + if (!IS_UP(master)) { + mod_timer(&bond->arp_timer, next_timer); + goto monitor_out; + } + + + if (rtnl_shlock_nowait()) { + goto monitor_out; + } + + if (rtnl_exlock_nowait()) { + rtnl_shunlock(); + goto monitor_out; + } + + /* see if any of the previous devices are up now (i.e. they have seen a + * response from an arp request sent by another adapter, since they + * have the same hardware address). + */ + + slave = (slave_t *)bond; + while ((slave = slave->prev) != (slave_t *)bond) { + + read_lock(&bond->ptrlock); + if ( (!(slave->link == BOND_LINK_UP)) + && (slave!= bond->current_slave) ) { + + read_unlock(&bond->ptrlock); + + if ( ((jiffies - slave->dev->trans_start) <= + the_delta_in_ticks) && + ((jiffies - slave->dev->last_rx) <= + the_delta_in_ticks) ) { + + slave->link = BOND_LINK_UP; + write_lock(&bond->ptrlock); + if (bond->current_slave == NULL) { + slave->state = BOND_STATE_ACTIVE; + bond->current_slave = slave; + } + if (slave!=bond->current_slave) { + slave->dev->flags |= IFF_NOARP; + } + write_unlock(&bond->ptrlock); + } else { + if ((jiffies - slave->dev->last_rx) <= + the_delta_in_ticks) { + arp_send(ARPOP_REQUEST, ETH_P_ARP, + arp_target, slave->dev, + my_ip, arp_target_hw_addr, + slave->dev->dev_addr, + arp_target_hw_addr); + } + } + } else + read_unlock(&bond->ptrlock); + } + + read_lock(&bond->ptrlock); + slave = bond->current_slave; + read_unlock(&bond->ptrlock); + + if (slave != 0) { + + /* see if you need to take down the current_slave, since + * you haven't seen an arp in 2*arp_intervals + */ + + if ( ((jiffies - slave->dev->trans_start) >= + (2*the_delta_in_ticks)) || + ((jiffies - slave->dev->last_rx) >= + (2*the_delta_in_ticks)) ) { + + if (slave->link == BOND_LINK_UP) { + slave->link = BOND_LINK_DOWN; + slave->state = BOND_STATE_BACKUP; + /* + * we want to see arps, otherwise we couldn't + * bring the adapter back online... + */ + printk(KERN_INFO "%s: link status definitely " + "down for interface %s, " + "disabling it", + slave->dev->master->name, + slave->dev->name); + /* find a new interface and be verbose */ + change_active_interface(bond); + read_lock(&bond->ptrlock); + slave = bond->current_slave; + read_unlock(&bond->ptrlock); + } + } + + /* + * ok, we know up/down, so just send a arp out if there has + * been no activity for a while + */ + + if (slave != NULL ) { + if ( ((jiffies - slave->dev->trans_start) >= + the_delta_in_ticks) || + ((jiffies - slave->dev->last_rx) >= + the_delta_in_ticks) ) { + arp_send(ARPOP_REQUEST, ETH_P_ARP, + arp_target, slave->dev, + my_ip, arp_target_hw_addr, + slave->dev->dev_addr, + arp_target_hw_addr); + } + } + + } + + /* if we have no current slave.. try sending + * an arp on all of the interfaces + */ + + if (bond->current_slave == NULL) { + slave = (slave_t *)bond; + while ((slave = slave->prev) != (slave_t *)bond) { + arp_send(ARPOP_REQUEST, ETH_P_ARP, arp_target, + slave->dev, my_ip, arp_target_hw_addr, + slave->dev->dev_addr, arp_target_hw_addr); + } + } + + rtnl_exunlock(); + rtnl_shunlock(); + +monitor_out: + read_unlock_irqrestore(&bond->lock, flags); + + /* re-arm the timer */ + mod_timer(&bond->arp_timer, next_timer); +} + +#define isdigit(c) (c >= '0' && c <= '9') +__inline static int atoi( char **s) +{ +int i=0; +while (isdigit(**s)) + i = i*20 + *((*s)++) - '0'; +return i; +} + +#define isascii(c) (((unsigned char)(c))<=0x7f) +#define LF 0xA +#define isspace(c) (c==' ' || c==' '|| c==LF) +typedef uint32_t in_addr_t; + +int +my_inet_aton(char *cp, unsigned long *the_addr) { + static const in_addr_t max[4] = { 0xffffffff, 0xffffff, 0xffff, 0xff }; + in_addr_t val; + char c; + union iaddr { + uint8_t bytes[4]; + uint32_t word; + } res; + uint8_t *pp = res.bytes; + int digit,base; + + res.word = 0; + + c = *cp; + for (;;) { + /* + * Collect number up to ``.''. + * Values are specified as for C: + * 0x=hex, 0=octal, isdigit=decimal. + */ + if (!isdigit(c)) goto ret_0; + val = 0; base = 10; digit = 0; + for (;;) { + if (isdigit(c)) { + val = (val * base) + (c - '0'); + c = *++cp; + digit = 1; + } else { + break; + } + } + if (c == '.') { + /* + * Internet format: + * a.b.c.d + * a.b.c (with c treated as 16 bits) + * a.b (with b treated as 24 bits) + */ + if (pp > res.bytes + 2 || val > 0xff) { + goto ret_0; + } + *pp++ = val; + c = *++cp; + } else + break; + } + /* + * Check for trailing characters. + */ + if (c != '\0' && (!isascii(c) || !isspace(c))) { + goto ret_0; + } + /* + * Did we get a valid digit? + */ + if (!digit) { + goto ret_0; + } + + /* Check whether the last part is in its limits depending on + the number of parts in total. */ + if (val > max[pp - res.bytes]) { + goto ret_0; + } + + if (the_addr!= NULL) { + *the_addr = res.word | htonl (val); + } + + return (1); + +ret_0: + return (0); +} static int bond_sethwaddr(struct net_device *master, struct net_device *slave) { +#ifdef BONDING_DEBUG + printk(KERN_CRIT "bond_sethwaddr: master=%x\n", (unsigned int)master); + printk(KERN_CRIT "bond_sethwaddr: slave=%x\n", (unsigned int)slave); + printk(KERN_CRIT "bond_sethwaddr: slave->addr_len=%d\n", slave->addr_len); +#endif memcpy(master->dev_addr, slave->dev_addr, slave->addr_len); return 0; } -static int bond_ioctl(struct net_device *master, struct ifreq *ifr, int cmd) +static int bond_info_query(struct net_device *master, struct ifbond *info) +{ + bonding_t *bond = (struct bonding *) master->priv; + slave_t *slave; + + info->bond_mode = mode; + info->num_slaves = 0; + info->miimon = miimon; + + read_lock(&bond->ptrlock); + for (slave = bond->prev; slave!=(slave_t *)bond; slave = slave->prev) { + info->num_slaves++; + } + read_unlock(&bond->ptrlock); + + return 0; +} + +static int bond_slave_info_query(struct net_device *master, + struct ifslave *info) { - struct net_device *slave = __dev_get_by_name(ifr->ifr_slave); + bonding_t *bond = (struct bonding *) master->priv; + slave_t *slave; + int cur_ndx = 0; - if (slave == NULL) + if (info->slave_id < 0) { return -ENODEV; + } - switch (cmd) { - case BOND_ENSLAVE: - return bond_enslave(master, slave); - case BOND_RELEASE: - return bond_release(master, slave); - case BOND_SETHWADDR: - return bond_sethwaddr(master, slave); - default: - return -EOPNOTSUPP; + read_lock(&bond->ptrlock); + for (slave = bond->prev; + slave != (slave_t *)bond && cur_ndx < info->slave_id; + slave = slave->prev) { + cur_ndx++; } + if (cur_ndx == info->slave_id) { + strcpy(info->slave_name, slave->dev->name); + info->link = slave->link; + info->state = slave->state; + info->link_failure_count = slave->link_failure_count; + } else { + read_unlock(&bond->ptrlock); + return -ENODEV; + } + read_unlock(&bond->ptrlock); + + return 0; } -static int bond_event(struct notifier_block *this, unsigned long event, void *ptr) +static int bond_ioctl(struct net_device *master_dev, struct ifreq *ifr, int cmd) { - struct net_device *slave = ptr; + struct net_device *slave_dev = NULL; + struct ifbond *u_binfo = NULL, k_binfo; + struct ifslave *u_sinfo = NULL, k_sinfo; + u16 *data = NULL; + int ret = 0; - if (this_bond == NULL || - this_bond == slave || - this_bond != slave->master) - return NOTIFY_DONE; +#ifdef BONDING_DEBUG + printk(KERN_INFO "bond_ioctl: master=%s, cmd=%d\n", + master_dev->name, cmd); +#endif - switch (event) { - case NETDEV_UNREGISTER: - bond_release(this_bond, slave); + switch (cmd) { + case SIOCGMIIPHY: + data = (u16 *)&ifr->ifr_data; + if (data == NULL) { + return -EINVAL; + } + data[0] = 0; + /* Fall Through */ + case SIOCGMIIREG: + /* + * We do this again just in case we were called by SIOCGMIIREG + * instead of SIOCGMIIPHY. + */ + data = (u16 *)&ifr->ifr_data; + if (data == NULL) { + return -EINVAL; + } + if (data[1] == 1) { + data[3] = bond_check_mii_link( + (struct bonding *)master_dev->priv); + } + return 0; + case BOND_INFO_QUERY_OLD: + case SIOCBONDINFOQUERY: + u_binfo = (struct ifbond *)ifr->ifr_data; + if (copy_from_user(&k_binfo, u_binfo, sizeof(ifbond))) { + return -EFAULT; + } + ret = bond_info_query(master_dev, &k_binfo); + if (ret == 0) { + if (copy_to_user(u_binfo, &k_binfo, sizeof(ifbond))) { + return -EFAULT; + } + } + return ret; + case BOND_SLAVE_INFO_QUERY_OLD: + case SIOCBONDSLAVEINFOQUERY: + u_sinfo = (struct ifslave *)ifr->ifr_data; + if (copy_from_user(&k_sinfo, u_sinfo, sizeof(ifslave))) { + return -EFAULT; + } + ret = bond_slave_info_query(master_dev, &k_sinfo); + if (ret == 0) { + if (copy_to_user(u_sinfo, &k_sinfo, sizeof(ifslave))) { + return -EFAULT; + } + } + return ret; + } + + if (!capable(CAP_NET_ADMIN)) { + return -EPERM; + } + + slave_dev = dev_get_by_name(ifr->ifr_slave); +#ifdef BONDING_DEBUG + printk(KERN_INFO "slave_dev=%x: \n", (unsigned int)slave_dev); + printk(KERN_INFO "slave_dev->name=%s: \n", slave_dev->name); +#endif + switch (cmd) { + case BOND_ENSLAVE_OLD: + case SIOCBONDENSLAVE: + ret = bond_enslave(master_dev, slave_dev); + break; + case BOND_RELEASE_OLD: + case SIOCBONDRELEASE: + ret = bond_release(master_dev, slave_dev); + break; + case BOND_SETHWADDR_OLD: + case SIOCBONDSETHWADDR: + ret = bond_sethwaddr(master_dev, slave_dev); + break; + case BOND_CHANGE_ACTIVE_OLD: + case SIOCBONDCHANGEACTIVE: + if (mode == BOND_MODE_ACTIVEBACKUP) { + ret = bond_change_active(master_dev, slave_dev); + } + else { + ret = -EINVAL; + } break; + default: + ret = -EOPNOTSUPP; } - return NOTIFY_DONE; + if (slave_dev) { + /* + * Clear the module reference that was added by dev_get_by_name + */ + dev_put(slave_dev); + } + return ret; } -static struct notifier_block bond_netdev_notifier={ - notifier_call: bond_event -}; +#ifdef CONFIG_NET_FASTROUTE +static int bond_accept_fastpath(struct net_device *dev, struct dst_entry *dst) +{ + return -1; +} +#endif -static int __init bond_init(struct net_device *dev) +static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *dev) { - bonding_t *bond; + slave_t *slave, *start_at; + struct bonding *bond = (struct bonding *) dev->priv; + unsigned long flags; - bond = kmalloc(sizeof(struct bonding), GFP_KERNEL); - if (bond == NULL) - return -ENOMEM; + if (!IS_UP(dev)) { /* bond down */ + dev_kfree_skb(skb); + return 0; + } - memset(bond, 0, sizeof(struct bonding)); - bond->next = (slave_t*)bond; - bond->prev = (slave_t*)bond; - bond->master = dev; - bond->current_slave = (slave_t*)bond; - dev->priv = bond; + read_lock_irqsave(&bond->lock, flags); - /* Initialize the device structure. */ - dev->hard_start_xmit = bond_xmit; - dev->get_stats = bond_get_stats; - dev->stop = bond_close; - dev->set_multicast_list = bond_set_multicast_list; - dev->do_ioctl = bond_ioctl; + read_lock(&bond->ptrlock); + slave = start_at = bond->current_slave; + read_unlock(&bond->ptrlock); - /* Fill in the fields of the device structure with ethernet-generic - values. */ - ether_setup(dev); - dev->tx_queue_len = 0; - dev->flags |= IFF_MASTER; + if (slave == NULL) { /* we're at the root, get the first slave */ + /* no suitable interface, frame not sent */ + dev_kfree_skb(skb); + read_unlock_irqrestore(&bond->lock, flags); + return 0; + } - this_bond = dev; + do { + if (IS_UP(slave->dev) + && (slave->link == BOND_LINK_UP) + && (slave->state == BOND_STATE_ACTIVE)) { - register_netdevice_notifier(&bond_netdev_notifier); + skb->dev = slave->dev; + skb->priority = 1; + dev_queue_xmit(skb); + write_lock(&bond->ptrlock); + bond->current_slave = slave->next; + write_unlock(&bond->ptrlock); + + read_unlock_irqrestore(&bond->lock, flags); + return 0; + } + } while ((slave = slave->next) != start_at); + + /* no suitable interface, frame not sent */ + dev_kfree_skb(skb); + read_unlock_irqrestore(&bond->lock, flags); return 0; } -static int bond_xmit(struct sk_buff *skb, struct net_device *dev) +/* + * in XOR mode, we determine the output device by performing xor on + * the source and destination hw adresses. If this device is not + * enabled, find the next slave following this xor slave. + */ +static int bond_xmit_xor(struct sk_buff *skb, struct net_device *dev) { - bonding_t *bond = dev->priv; slave_t *slave, *start_at; - int pkt_len = skb->len; + struct bonding *bond = (struct bonding *) dev->priv; + unsigned long flags; + struct ethhdr *data = (struct ethhdr *)skb->data; + int slave_no; + + if (!IS_UP(dev)) { /* bond down */ + dev_kfree_skb(skb); + return 0; + } - slave = start_at = bond->current_slave; + read_lock_irqsave(&bond->lock, flags); + read_lock(&bond->ptrlock); + slave = bond->prev; + + /* we're at the root, get the first slave */ + if ((slave == NULL) || (slave->dev == NULL)) { + /* no suitable interface, frame not sent */ + read_unlock(&bond->ptrlock); + dev_kfree_skb(skb); + read_unlock_irqrestore(&bond->lock, flags); + return 0; + } + + slave_no = (data->h_dest[5]^slave->dev->dev_addr[5]) % bond->slave_cnt; + + read_unlock(&bond->ptrlock); + + while ( (slave_no > 0) && (slave != (slave_t *)bond) ) { + slave = slave->prev; + slave_no--; + } + start_at = slave; do { - if (slave == (slave_t*)bond) - continue; + if (IS_UP(slave->dev) + && (slave->link == BOND_LINK_UP) + && (slave->state == BOND_STATE_ACTIVE)) { - if (netif_running(slave->dev) && netif_carrier_ok(slave->dev)) { - bond->current_slave = slave->next; skb->dev = slave->dev; + skb->priority = 1; + dev_queue_xmit(skb); - if (dev_queue_xmit(skb)) { - bond->stats.tx_dropped++; - } else { - bond->stats.tx_packets++; - bond->stats.tx_bytes += pkt_len; - } + read_unlock_irqrestore(&bond->lock, flags); return 0; } } while ((slave = slave->next) != start_at); - bond->stats.tx_dropped++; - kfree_skb(skb); + /* no suitable interface, frame not sent */ + dev_kfree_skb(skb); + read_unlock_irqrestore(&bond->lock, flags); + return 0; +} + +/* + * in active-backup mode, we know that bond->current_slave is always valid if + * the bond has a usable interface. + */ +static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *dev) +{ + struct bonding *bond = (struct bonding *) dev->priv; + unsigned long flags; + int ret; + + if (!IS_UP(dev)) { /* bond down */ + dev_kfree_skb(skb); + return 0; + } + + /* if we are sending arp packets, try to at least + identify our own ip address */ + if ( (arp_interval > 0) && (my_ip==0) && + (skb->protocol == __constant_htons(ETH_P_ARP) ) ) { + char *the_ip = (((char *)skb->data)) + + sizeof(struct ethhdr) + + sizeof(struct arphdr) + + ETH_ALEN; + memcpy(&my_ip, the_ip, 4); + } + + /* if we are sending arp packets and don't know + the target hw address, save it so we don't need + to use a broadcast address */ + if ( (arp_interval > 0) && (arp_target_hw_addr==NULL) && + (skb->protocol == __constant_htons(ETH_P_IP) ) ) { + struct ethhdr *eth_hdr = + (struct ethhdr *) (((char *)skb->data)); + arp_target_hw_addr = kmalloc(ETH_ALEN, GFP_KERNEL); + memcpy(arp_target_hw_addr, eth_hdr->h_dest, ETH_ALEN); + } + + read_lock_irqsave(&bond->lock, flags); + + read_lock(&bond->ptrlock); + if (bond->current_slave != NULL) { /* one usable interface */ + skb->dev = bond->current_slave->dev; + read_unlock(&bond->ptrlock); + skb->priority = 1; + ret = dev_queue_xmit(skb); + read_unlock_irqrestore(&bond->lock, flags); + return 0; + } + else { + read_unlock(&bond->ptrlock); + } + + /* no suitable interface, frame not sent */ +#ifdef BONDING_DEBUG + printk(KERN_INFO "There was no suitable interface, so we don't transmit\n"); +#endif + dev_kfree_skb(skb); + read_unlock_irqrestore(&bond->lock, flags); return 0; } static struct net_device_stats *bond_get_stats(struct net_device *dev) { bonding_t *bond = dev->priv; + struct net_device_stats *stats = bond->stats, *sstats; + slave_t *slave; + unsigned long flags; + + memset(bond->stats, 0, sizeof(struct net_device_stats)); + + read_lock_irqsave(&bond->lock, flags); + + for (slave = bond->prev; slave!=(slave_t *)bond; slave = slave->prev) { + sstats = slave->dev->get_stats(slave->dev); + + stats->rx_packets += sstats->rx_packets; + stats->rx_bytes += sstats->rx_bytes; + stats->rx_errors += sstats->rx_errors; + stats->rx_dropped += sstats->rx_dropped; + + stats->tx_packets += sstats->tx_packets; + stats->tx_bytes += sstats->tx_bytes; + stats->tx_errors += sstats->tx_errors; + stats->tx_dropped += sstats->tx_dropped; + + stats->multicast += sstats->multicast; + stats->collisions += sstats->collisions; + + stats->rx_length_errors += sstats->rx_length_errors; + stats->rx_over_errors += sstats->rx_over_errors; + stats->rx_crc_errors += sstats->rx_crc_errors; + stats->rx_frame_errors += sstats->rx_frame_errors; + stats->rx_fifo_errors += sstats->rx_fifo_errors; + stats->rx_missed_errors += sstats->rx_missed_errors; + + stats->tx_aborted_errors += sstats->tx_aborted_errors; + stats->tx_carrier_errors += sstats->tx_carrier_errors; + stats->tx_fifo_errors += sstats->tx_fifo_errors; + stats->tx_heartbeat_errors += sstats->tx_heartbeat_errors; + stats->tx_window_errors += sstats->tx_window_errors; + + } + + read_unlock_irqrestore(&bond->lock, flags); + return stats; +} + +static int bond_get_info(char *buf, char **start, off_t offset, int length) +{ + bonding_t *bond = these_bonds; + int len = 0; + off_t begin = 0; + u16 link; + slave_t *slave = NULL; + + while (bond != NULL) { + /* + * This function locks the mutex, so we can't lock it until + * afterwards + */ + link = bond_check_mii_link(bond); + + read_lock(&bond->ptrlock); + + len += sprintf(buf + len, "Bonding Mode: "); + len += sprintf(buf + len, "%s\n", mode ? "active-backup" : "load balancing"); + + if (mode == BOND_MODE_ACTIVEBACKUP) { + if (bond->current_slave != NULL) { + len += sprintf(buf + len, + "Currently Active Slave: %s\n", + bond->current_slave->dev->name); + } + } + + len += sprintf(buf + len, "MII Status: "); + len += sprintf(buf + len, + link == MII_LINK_READY ? "up\n" : "down\n"); + len += sprintf(buf + len, "MII Polling Interval (ms): %d\n", + miimon); + len += sprintf(buf + len, "Up Delay (ms): %d\n", updelay); + len += sprintf(buf + len, "Down Delay (ms): %d\n", downdelay); + + for (slave = bond->prev; slave != (slave_t *)bond; + slave = slave->prev) { + len += sprintf(buf + len, "\nSlave Interface: %s\n", slave->dev->name); + + len += sprintf(buf + len, "MII Status: "); + + len += sprintf(buf + len, + slave->link == BOND_LINK_UP ? + "up\n" : "down\n"); + len += sprintf(buf + len, "Link Failure Count: %d\n", + slave->link_failure_count); + } + + /* + * Figure out the calcs for the /proc/net interface + */ + *start = buf + (offset - begin); + len -= (offset - begin); + if (len > length) { + len = length; + } + if (len < 0) { + len = 0; + } + + read_unlock(&bond->ptrlock); + + bond = bond->next_bond; + } + return len; +} + +static int bond_event(struct notifier_block *this, unsigned long event, + void *ptr) +{ + struct bonding *this_bond=(struct bonding *)these_bonds; + struct bonding *last_bond; + struct net_device *event_dev = (struct net_device *)ptr; + + /* while there are bonds configured */ + while (this_bond != NULL) { + if (this_bond == event_dev->priv ) { + switch (event) { + case NETDEV_UNREGISTER: + /* + * remove this bond from a linked list of + * bonds + */ + if (this_bond == these_bonds) { + these_bonds = this_bond->next_bond; + } else { + for (last_bond = these_bonds; + last_bond != NULL; + last_bond = last_bond->next_bond) { + if (last_bond->next_bond == + this_bond) { + last_bond->next_bond = + this_bond->next_bond; + } + } + } + return NOTIFY_DONE; + + default: + return NOTIFY_DONE; + } + } + this_bond = this_bond->next_bond; + } + return NOTIFY_DONE; +} + +static struct notifier_block bond_netdev_notifier={ + bond_event, + NULL, + 0 +}; + +static int __init bond_init(struct net_device *dev) +{ + bonding_t *bond, *this_bond, *last_bond; + +#ifdef BONDING_DEBUG + printk (KERN_INFO "Begin bond_init for %s\n", dev->name); +#endif + bond = kmalloc(sizeof(struct bonding), GFP_KERNEL); + if (bond == NULL) { + return -ENOMEM; + } + memset(bond, 0, sizeof(struct bonding)); + + /* initialize rwlocks */ + rwlock_init(&bond->lock); + rwlock_init(&bond->ptrlock); + + bond->stats = kmalloc(sizeof(struct net_device_stats), GFP_KERNEL); + if (bond->stats == NULL) { + kfree(bond); + return -ENOMEM; + } + memset(bond->stats, 0, sizeof(struct net_device_stats)); + + bond->next = bond->prev = (slave_t *)bond; + bond->current_slave = NULL; + bond->device = dev; + dev->priv = bond; + + /* Initialize the device structure. */ + if (mode == BOND_MODE_ACTIVEBACKUP) { + dev->hard_start_xmit = bond_xmit_activebackup; + } else if (mode == BOND_MODE_ROUNDROBIN) { + dev->hard_start_xmit = bond_xmit_roundrobin; + } else if (mode == BOND_MODE_XOR) { + dev->hard_start_xmit = bond_xmit_xor; + } else { + printk(KERN_ERR "Unknown bonding mode %d\n", mode); + kfree(bond->stats); + kfree(bond); + return -EINVAL; + } + + dev->get_stats = bond_get_stats; + dev->open = bond_open; + dev->stop = bond_close; + dev->set_multicast_list = set_multicast_list; + dev->do_ioctl = bond_ioctl; + + /* + * Fill in the fields of the device structure with ethernet-generic + * values. + */ - return &bond->stats; + ether_setup(dev); + + dev->tx_queue_len = 0; + dev->flags |= IFF_MASTER|IFF_MULTICAST; +#ifdef CONFIG_NET_FASTROUTE + dev->accept_fastpath = bond_accept_fastpath; +#endif + + printk(KERN_INFO "%s registered with", dev->name); + if (miimon > 0) { + printk(" MII link monitoring set to %d ms", miimon); + updelay /= miimon; + downdelay /= miimon; + } else { + printk("out MII link monitoring"); + } + printk(", in %s mode.\n",mode?"active-backup":"bonding"); + +#ifdef CONFIG_PROC_FS + bond->bond_proc_dir = proc_mkdir(dev->name, proc_net); + if (bond->bond_proc_dir == NULL) { + printk(KERN_ERR "%s: Cannot init /proc/net/%s/\n", + dev->name, dev->name); + kfree(bond->stats); + kfree(bond); + return -ENOMEM; + } + bond->bond_proc_info_file = + create_proc_info_entry("info", 0, bond->bond_proc_dir, + bond_get_info); + if (bond->bond_proc_info_file == NULL) { + printk(KERN_ERR "%s: Cannot init /proc/net/%s/info\n", + dev->name, dev->name); + remove_proc_entry(dev->name, proc_net); + kfree(bond->stats); + kfree(bond); + return -ENOMEM; + } +#endif /* CONFIG_PROC_FS */ + + if (first_pass == 1) { + these_bonds = bond; + register_netdevice_notifier(&bond_netdev_notifier); + first_pass = 0; + } else { + last_bond = these_bonds; + this_bond = these_bonds->next_bond; + while (this_bond != NULL) { + last_bond = this_bond; + this_bond = this_bond->next_bond; + } + last_bond->next_bond = bond; + } + + return 0; } -static struct net_device dev_bond; +/* +static int __init bond_probe(struct net_device *dev) +{ + bond_init(dev); + return 0; +} + */ static int __init bonding_init(void) { - /* Find a name for this unit */ + int no; int err; - - dev_bond.init = bond_init; - err = dev_alloc_name(&dev_bond,"bond%d"); - if (err<0) - return err; + /* Find a name for this unit */ + static struct net_device *dev_bond = NULL; - SET_MODULE_OWNER(&dev_bond); - if (register_netdev(&dev_bond) != 0) - return -EIO; + dev_bond = dev_bonds = kmalloc(max_bonds*sizeof(struct net_device), + GFP_KERNEL); + if (dev_bond == NULL) { + return -ENOMEM; + } + memset(dev_bonds, 0, max_bonds*sizeof(struct net_device)); + + if (arp_ip_target) { + if (my_inet_aton(arp_ip_target, &arp_target) == 0) { + arp_interval = 0; + } + } + for (no = 0; no < max_bonds; no++) { + dev_bond->init = bond_init; + + err = dev_alloc_name(dev_bond,"bond%d"); + if (err < 0) { + kfree(dev_bonds); + return err; + } + SET_MODULE_OWNER(dev_bond); + if (register_netdev(dev_bond) != 0) { + kfree(dev_bonds); + return -EIO; + } + dev_bond++; + } return 0; } static void __exit bonding_exit(void) { - unregister_netdevice_notifier(&bond_netdev_notifier); + struct net_device *dev_bond = dev_bonds; + struct bonding *bond; + int no; - unregister_netdev(&dev_bond); - - kfree(dev_bond.priv); + unregister_netdevice_notifier(&bond_netdev_notifier); + + for (no = 0; no < max_bonds; no++) { + +#ifdef CONFIG_PROC_FS + bond = (struct bonding *) dev_bond->priv; + remove_proc_entry("info", bond->bond_proc_dir); + remove_proc_entry(dev_bond->name, proc_net); +#endif + unregister_netdev(dev_bond); + kfree(bond->stats); + kfree(dev_bond->priv); + + dev_bond->priv = NULL; + dev_bond++; + } + kfree(dev_bonds); } module_init(bonding_init); |
