diff options
| author | Linus Torvalds <torvalds@home.transmeta.com> | 2003-05-25 08:17:28 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@home.transmeta.com> | 2003-05-25 08:17:28 -0700 |
| commit | bc6d45201d66a4040bab7c8ae88520e9d1f76924 (patch) | |
| tree | 0a009542622411c8cda6a8117cd6e11aa9c0d1c5 | |
| parent | 947fc42fd58707a66cd37277aae2fe9d347647cd (diff) | |
| parent | 4133231a3cf44901df8c50ec071c0f1b8c9e32f6 (diff) | |
Merge bk://kernel.bkbits.net/davem/net-2.5
into home.transmeta.com:/home/torvalds/v2.5/linux
41 files changed, 6177 insertions, 541 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 2d87a0293cb5..dd844c9ff2f7 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -652,6 +652,9 @@ running once the system is up. opl3sa2= [HW,OSS] Format: <io>,<irq>,<dma>,<dma2>,<mss_io>,<mpu_io>,<ymode>,<loopback>[,<isapnp>,<multiple] + oprofile.timer= [HW] + Use timer interrupt instead of performance counters + optcd= [HW,CD] Format: <io> diff --git a/arch/m68knommu/platform/5282/pit.c b/arch/m68knommu/platform/5282/pit.c new file mode 100644 index 000000000000..d3bbca3fe1e9 --- /dev/null +++ b/arch/m68knommu/platform/5282/pit.c @@ -0,0 +1,86 @@ +/***************************************************************************/ + +/* + * pit.c -- Motorola ColdFire PIT timer. Currently this type of + * hardware timer only exists in the Motorola ColdFire + * 5282 CPU. + * + * Copyright (C) 1999-2003, Greg Ungerer (gerg@snapgear.com) + * Copyright (C) 2001-2003, SnapGear Inc. (www.snapgear.com) + * + */ + +/***************************************************************************/ + +#include <linux/config.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/param.h> +#include <linux/init.h> +#include <asm/irq.h> +#include <asm/coldfire.h> +#include <asm/mcfpit.h> +#include <asm/mcfsim.h> + +/***************************************************************************/ + +void coldfire_pit_tick(void) +{ + volatile struct mcfpit *tp; + + /* Reset the ColdFire timer */ + tp = (volatile struct mcfpit *) (MCF_IPSBAR + MCFPIT_BASE1); + tp->pcsr |= MCFPIT_PCSR_PIF; +} + +/***************************************************************************/ + +void coldfire_pit_init(void (*handler)(int, void *, struct pt_regs *)) +{ + volatile unsigned char *icrp; + volatile unsigned long *imrp; + volatile struct mcfpit *tp; + + request_irq(64+55, handler, SA_INTERRUPT, "ColdFire Timer", NULL); + + icrp = (volatile unsigned char *) (MCF_IPSBAR + MCFICM_INTC0 + + MCFINTC_ICR0 + MCFINT_PIT1); + *icrp = 0x2b; /* PIT1 with level 5, priority 3 */ + + imrp = (volatile unsigned long *) (MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_IMRH); + *imrp &= ~(1 << (55 - 32)); + + /* Set up PIT timer 1 as poll clock */ + tp = (volatile struct mcfpit *) (MCF_IPSBAR + MCFPIT_BASE1); + tp->pcsr = MCFPIT_PCSR_DISABLE; + + tp->pmr = ((MCF_CLK / 2) / 64) / HZ; + tp->pcsr = MCFPIT_PCSR_EN | MCFPIT_PCSR_PIE | MCFPIT_PCSR_OVW | + MCFPIT_PCSR_RLD | MCFPIT_PCSR_CLK64; +} + +/***************************************************************************/ + +unsigned long coldfire_pit_offset(void) +{ + volatile struct mcfpit *tp; + volatile unsigned long *ipr; + unsigned long pmr, pcntr, offset; + + tp = (volatile struct mcfpit *) (MCF_IPSBAR + MCFPIT_BASE1); + ipr = (volatile unsigned long *) (MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_IPRH); + + pmr = tp->pmr; + pcntr = tp->pcntr; + + /* + * If we are still in the first half of the upcount and a + * timer interupt is pending, then add on a ticks worth of time. + */ + offset = ((pcntr * (1000000 / HZ)) / pmr); + if ((offset < (1000000 / HZ / 2)) && (*ipr & (1 << (55 - 32)))) + offset += 1000000 / HZ; + return offset; +} + +/***************************************************************************/ diff --git a/arch/m68knommu/platform/5307/Makefile b/arch/m68knommu/platform/5307/Makefile index cf63cb941ab5..b4d5e43e0fd2 100644 --- a/arch/m68knommu/platform/5307/Makefile +++ b/arch/m68knommu/platform/5307/Makefile @@ -16,8 +16,13 @@ ifdef CONFIG_FULLDEBUG AFLAGS += -DDEBUGGER_COMPATIBLE_CACHE=1 endif -obj-$(CONFIG_COLDFIRE) += entry.o timers.o vectors.o -obj-$(CONFIG_M5307) += config.o +obj-$(CONFIG_COLDFIRE) += entry.o vectors.o +obj-$(CONFIG_M5206) += timers.o +obj-$(CONFIG_M5206e) += timers.o +obj-$(CONFIG_M5249) += timers.o +obj-$(CONFIG_M5272) += timers.o +obj-$(CONFIG_M5307) += config.o timers.o +obj-$(CONFIG_M5407) += timers.o ifeq ($(CONFIG_M5307),y) extra-y := $(BOARD)/crt0_$(MODEL).o diff --git a/drivers/net/3c505.c b/drivers/net/3c505.c index 3112d5deac88..de4ebe6d6504 100644 --- a/drivers/net/3c505.c +++ b/drivers/net/3c505.c @@ -312,7 +312,7 @@ inline static void adapter_reset(struct net_device *dev) outb_control(orig_hcr, dev); if (!start_receive(dev, &adapter->tx_pcb)) - printk("%s: start receive command failed \n", dev->name); + printk(KERN_ERR "%s: start receive command failed \n", dev->name); } /* Check to make sure that a DMA transfer hasn't timed out. This should @@ -324,7 +324,7 @@ static inline void check_3c505_dma(struct net_device *dev) elp_device *adapter = dev->priv; if (adapter->dmaing && time_after(jiffies, adapter->current_dma.start_time + 10)) { unsigned long flags, f; - printk("%s: DMA %s timed out, %d bytes left\n", dev->name, adapter->current_dma.direction ? "download" : "upload", get_dma_residue(dev->dma)); + printk(KERN_ERR "%s: DMA %s timed out, %d bytes left\n", dev->name, adapter->current_dma.direction ? "download" : "upload", get_dma_residue(dev->dma)); spin_lock_irqsave(&adapter->lock, flags); adapter->dmaing = 0; adapter->busy = 0; @@ -460,7 +460,7 @@ static int send_pcb(struct net_device *dev, pcb_struct * pcb) } if (elp_debug >= 1) - printk("%s: timeout waiting for PCB acknowledge (status %02x)\n", dev->name, inb_status(dev->base_addr)); + printk(KERN_DEBUG "%s: timeout waiting for PCB acknowledge (status %02x)\n", dev->name, inb_status(dev->base_addr)); sti_abort: spin_unlock_irqrestore(&adapter->lock, flags); @@ -509,7 +509,7 @@ static int receive_pcb(struct net_device *dev, pcb_struct * pcb) while (((stat = get_status(dev->base_addr)) & ACRF) == 0 && time_before(jiffies, timeout)); if (time_after_eq(jiffies, timeout)) { TIMEOUT_MSG(__LINE__); - printk("%s: status %02x\n", dev->name, stat); + printk(KERN_INFO "%s: status %02x\n", dev->name, stat); return FALSE; } pcb->length = inb_command(dev->base_addr); @@ -540,7 +540,7 @@ static int receive_pcb(struct net_device *dev, pcb_struct * pcb) /* safety check total length vs data length */ if (total_length != (pcb->length + 2)) { if (elp_debug >= 2) - printk("%s: mangled PCB received\n", dev->name); + printk(KERN_WARNING "%s: mangled PCB received\n", dev->name); set_hsf(dev, HSF_PCB_NAK); return FALSE; } @@ -549,7 +549,7 @@ static int receive_pcb(struct net_device *dev, pcb_struct * pcb) if (test_and_set_bit(0, (void *) &adapter->busy)) { if (backlog_next(adapter->rx_backlog.in) == adapter->rx_backlog.out) { set_hsf(dev, HSF_PCB_NAK); - printk("%s: PCB rejected, transfer in progress and backlog full\n", dev->name); + printk(KERN_WARNING "%s: PCB rejected, transfer in progress and backlog full\n", dev->name); pcb->command = 0; return TRUE; } else { @@ -574,7 +574,7 @@ static int start_receive(struct net_device *dev, pcb_struct * tx_pcb) elp_device *adapter = dev->priv; if (elp_debug >= 3) - printk("%s: restarting receiver\n", dev->name); + printk(KERN_DEBUG "%s: restarting receiver\n", dev->name); tx_pcb->command = CMD_RECEIVE_PACKET; tx_pcb->length = sizeof(struct Rcv_pkt); tx_pcb->data.rcv_pkt.buf_seg @@ -626,7 +626,7 @@ static void receive_packet(struct net_device *dev, int len) /* if this happens, we die */ if (test_and_set_bit(0, (void *) &adapter->dmaing)) - printk("%s: rx blocked, DMA in progress, dir %d\n", dev->name, adapter->current_dma.direction); + printk(KERN_ERR "%s: rx blocked, DMA in progress, dir %d\n", dev->name, adapter->current_dma.direction); skb->dev = dev; adapter->current_dma.direction = 0; @@ -646,7 +646,7 @@ static void receive_packet(struct net_device *dev, int len) release_dma_lock(flags); if (elp_debug >= 3) { - printk("%s: rx DMA transfer started\n", dev->name); + printk(KERN_DEBUG "%s: rx DMA transfer started\n", dev->name); } if (adapter->rx_active) @@ -682,10 +682,10 @@ static irqreturn_t elp_interrupt(int irq, void *dev_id, struct pt_regs *reg_ptr) */ if (inb_status(dev->base_addr) & DONE) { if (!adapter->dmaing) { - printk("%s: phantom DMA completed\n", dev->name); + printk(KERN_WARNING "%s: phantom DMA completed\n", dev->name); } if (elp_debug >= 3) { - printk("%s: %s DMA complete, status %02x\n", dev->name, adapter->current_dma.direction ? "tx" : "rx", inb_status(dev->base_addr)); + printk(KERN_DEBUG "%s: %s DMA complete, status %02x\n", dev->name, adapter->current_dma.direction ? "tx" : "rx", inb_status(dev->base_addr)); } outb_control(adapter->hcr_val & ~(DMAE | TCEN | DIR), dev); @@ -709,7 +709,7 @@ static irqreturn_t elp_interrupt(int irq, void *dev_id, struct pt_regs *reg_ptr) int t = adapter->rx_backlog.length[adapter->rx_backlog.out]; adapter->rx_backlog.out = backlog_next(adapter->rx_backlog.out); if (elp_debug >= 2) - printk("%s: receiving backlogged packet (%d)\n", dev->name, t); + printk(KERN_DEBUG "%s: receiving backlogged packet (%d)\n", dev->name, t); receive_packet(dev, t); } else { adapter->busy = 0; @@ -743,18 +743,18 @@ static irqreturn_t elp_interrupt(int irq, void *dev_id, struct pt_regs *reg_ptr) printk(KERN_ERR "%s: interrupt - packet not received correctly\n", dev->name); } else { if (elp_debug >= 3) { - printk("%s: interrupt - packet received of length %i (%i)\n", dev->name, len, dlen); + printk(KERN_DEBUG "%s: interrupt - packet received of length %i (%i)\n", dev->name, len, dlen); } if (adapter->irx_pcb.command == 0xff) { if (elp_debug >= 2) - printk("%s: adding packet to backlog (len = %d)\n", dev->name, dlen); + printk(KERN_DEBUG "%s: adding packet to backlog (len = %d)\n", dev->name, dlen); adapter->rx_backlog.length[adapter->rx_backlog.in] = dlen; adapter->rx_backlog.in = backlog_next(adapter->rx_backlog.in); } else { receive_packet(dev, dlen); } if (elp_debug >= 3) - printk("%s: packet received\n", dev->name); + printk(KERN_DEBUG "%s: packet received\n", dev->name); } break; @@ -764,7 +764,7 @@ static irqreturn_t elp_interrupt(int irq, void *dev_id, struct pt_regs *reg_ptr) case CMD_CONFIGURE_82586_RESPONSE: adapter->got[CMD_CONFIGURE_82586] = 1; if (elp_debug >= 3) - printk("%s: interrupt - configure response received\n", dev->name); + printk(KERN_DEBUG "%s: interrupt - configure response received\n", dev->name); break; /* @@ -773,7 +773,7 @@ static irqreturn_t elp_interrupt(int irq, void *dev_id, struct pt_regs *reg_ptr) case CMD_CONFIGURE_ADAPTER_RESPONSE: adapter->got[CMD_CONFIGURE_ADAPTER_MEMORY] = 1; if (elp_debug >= 3) - printk("%s: Adapter memory configuration %s.\n", dev->name, + printk(KERN_DEBUG "%s: Adapter memory configuration %s.\n", dev->name, adapter->irx_pcb.data.failed ? "failed" : "succeeded"); break; @@ -783,7 +783,7 @@ static irqreturn_t elp_interrupt(int irq, void *dev_id, struct pt_regs *reg_ptr) case CMD_LOAD_MULTICAST_RESPONSE: adapter->got[CMD_LOAD_MULTICAST_LIST] = 1; if (elp_debug >= 3) - printk("%s: Multicast address list loading %s.\n", dev->name, + printk(KERN_DEBUG "%s: Multicast address list loading %s.\n", dev->name, adapter->irx_pcb.data.failed ? "failed" : "succeeded"); break; @@ -793,7 +793,7 @@ static irqreturn_t elp_interrupt(int irq, void *dev_id, struct pt_regs *reg_ptr) case CMD_SET_ADDRESS_RESPONSE: adapter->got[CMD_SET_STATION_ADDRESS] = 1; if (elp_debug >= 3) - printk("%s: Ethernet address setting %s.\n", dev->name, + printk(KERN_DEBUG "%s: Ethernet address setting %s.\n", dev->name, adapter->irx_pcb.data.failed ? "failed" : "succeeded"); break; @@ -810,7 +810,7 @@ static irqreturn_t elp_interrupt(int irq, void *dev_id, struct pt_regs *reg_ptr) adapter->stats.rx_over_errors += adapter->irx_pcb.data.netstat.err_res; adapter->got[CMD_NETWORK_STATISTICS] = 1; if (elp_debug >= 3) - printk("%s: interrupt - statistics response received\n", dev->name); + printk(KERN_DEBUG "%s: interrupt - statistics response received\n", dev->name); break; /* @@ -818,7 +818,7 @@ static irqreturn_t elp_interrupt(int irq, void *dev_id, struct pt_regs *reg_ptr) */ case CMD_TRANSMIT_PACKET_COMPLETE: if (elp_debug >= 3) - printk("%s: interrupt - packet sent\n", dev->name); + printk(KERN_DEBUG "%s: interrupt - packet sent\n", dev->name); if (!netif_running(dev)) break; switch (adapter->irx_pcb.data.xmit_resp.c_stat) { @@ -842,7 +842,7 @@ static irqreturn_t elp_interrupt(int irq, void *dev_id, struct pt_regs *reg_ptr) break; } } else { - printk("%s: failed to read PCB on interrupt\n", dev->name); + printk(KERN_WARNING "%s: failed to read PCB on interrupt\n", dev->name); adapter_reset(dev); } } @@ -873,7 +873,7 @@ static int elp_open(struct net_device *dev) adapter = dev->priv; if (elp_debug >= 3) - printk("%s: request to open device\n", dev->name); + printk(KERN_DEBUG "%s: request to open device\n", dev->name); /* * make sure we actually found the device @@ -946,7 +946,7 @@ static int elp_open(struct net_device *dev) adapter->tx_pcb.length = sizeof(struct Memconf); adapter->got[CMD_CONFIGURE_ADAPTER_MEMORY] = 0; if (!send_pcb(dev, &adapter->tx_pcb)) - printk("%s: couldn't send memory configuration command\n", dev->name); + printk(KERN_ERR "%s: couldn't send memory configuration command\n", dev->name); else { unsigned long timeout = jiffies + TIMEOUT; while (adapter->got[CMD_CONFIGURE_ADAPTER_MEMORY] == 0 && time_before(jiffies, timeout)); @@ -959,13 +959,13 @@ static int elp_open(struct net_device *dev) * configure adapter to receive broadcast messages and wait for response */ if (elp_debug >= 3) - printk("%s: sending 82586 configure command\n", dev->name); + printk(KERN_DEBUG "%s: sending 82586 configure command\n", dev->name); adapter->tx_pcb.command = CMD_CONFIGURE_82586; adapter->tx_pcb.data.configure = NO_LOOPBACK | RECV_BROAD; adapter->tx_pcb.length = 2; adapter->got[CMD_CONFIGURE_82586] = 0; if (!send_pcb(dev, &adapter->tx_pcb)) - printk("%s: couldn't send 82586 configure command\n", dev->name); + printk(KERN_ERR "%s: couldn't send 82586 configure command\n", dev->name); else { unsigned long timeout = jiffies + TIMEOUT; while (adapter->got[CMD_CONFIGURE_82586] == 0 && time_before(jiffies, timeout)); @@ -981,7 +981,7 @@ static int elp_open(struct net_device *dev) */ prime_rx(dev); if (elp_debug >= 3) - printk("%s: %d receive PCBs active\n", dev->name, adapter->rx_active); + printk(KERN_DEBUG "%s: %d receive PCBs active\n", dev->name, adapter->rx_active); /* * device is now officially open! @@ -1011,7 +1011,7 @@ static int send_packet(struct net_device *dev, struct sk_buff *skb) if (test_and_set_bit(0, (void *) &adapter->busy)) { if (elp_debug >= 2) - printk("%s: transmit blocked\n", dev->name); + printk(KERN_DEBUG "%s: transmit blocked\n", dev->name); return FALSE; } @@ -1033,7 +1033,7 @@ static int send_packet(struct net_device *dev, struct sk_buff *skb) } /* if this happens, we die */ if (test_and_set_bit(0, (void *) &adapter->dmaing)) - printk("%s: tx: DMA %d in progress\n", dev->name, adapter->current_dma.direction); + printk(KERN_DEBUG "%s: tx: DMA %d in progress\n", dev->name, adapter->current_dma.direction); adapter->current_dma.direction = 1; adapter->current_dma.start_time = jiffies; @@ -1059,7 +1059,7 @@ static int send_packet(struct net_device *dev, struct sk_buff *skb) release_dma_lock(flags); if (elp_debug >= 3) - printk("%s: DMA transfer started\n", dev->name); + printk(KERN_DEBUG "%s: DMA transfer started\n", dev->name); return TRUE; } @@ -1076,7 +1076,7 @@ static void elp_timeout(struct net_device *dev) stat = inb_status(dev->base_addr); printk(KERN_WARNING "%s: transmit timed out, lost %s?\n", dev->name, (stat & ACRF) ? "interrupt" : "command"); if (elp_debug >= 1) - printk("%s: status %#02x\n", dev->name, stat); + printk(KERN_DEBUG "%s: status %#02x\n", dev->name, stat); dev->trans_start = jiffies; adapter->stats.tx_dropped++; netif_wake_queue(dev); @@ -1098,7 +1098,7 @@ static int elp_start_xmit(struct sk_buff *skb, struct net_device *dev) check_3c505_dma(dev); if (elp_debug >= 3) - printk("%s: request to send packet of length %d\n", dev->name, (int) skb->len); + printk(KERN_DEBUG "%s: request to send packet of length %d\n", dev->name, (int) skb->len); netif_stop_queue(dev); @@ -1107,13 +1107,13 @@ static int elp_start_xmit(struct sk_buff *skb, struct net_device *dev) */ if (!send_packet(dev, skb)) { if (elp_debug >= 2) { - printk("%s: failed to transmit packet\n", dev->name); + printk(KERN_DEBUG "%s: failed to transmit packet\n", dev->name); } spin_unlock_irqrestore(&adapter->lock, flags); return 1; } if (elp_debug >= 3) - printk("%s: packet of length %d sent\n", dev->name, (int) skb->len); + printk(KERN_DEBUG "%s: packet of length %d sent\n", dev->name, (int) skb->len); /* * start the transmit timeout @@ -1137,7 +1137,7 @@ static struct net_device_stats *elp_get_stats(struct net_device *dev) elp_device *adapter = (elp_device *) dev->priv; if (elp_debug >= 3) - printk("%s: request for stats\n", dev->name); + printk(KERN_DEBUG "%s: request for stats\n", dev->name); /* If the device is closed, just return the latest stats we have, - we cannot ask from the adapter without interrupts */ @@ -1149,7 +1149,7 @@ static struct net_device_stats *elp_get_stats(struct net_device *dev) adapter->tx_pcb.length = 0; adapter->got[CMD_NETWORK_STATISTICS] = 0; if (!send_pcb(dev, &adapter->tx_pcb)) - printk("%s: couldn't send get statistics command\n", dev->name); + printk(KERN_ERR "%s: couldn't send get statistics command\n", dev->name); else { unsigned long timeout = jiffies + TIMEOUT; while (adapter->got[CMD_NETWORK_STATISTICS] == 0 && time_before(jiffies, timeout)); @@ -1257,7 +1257,7 @@ static int elp_close(struct net_device *dev) adapter = dev->priv; if (elp_debug >= 3) - printk("%s: request to close device\n", dev->name); + printk(KERN_DEBUG "%s: request to close device\n", dev->name); netif_stop_queue(dev); @@ -1301,7 +1301,7 @@ static void elp_set_mc_list(struct net_device *dev) unsigned long flags; if (elp_debug >= 3) - printk("%s: request to set multicast list\n", dev->name); + printk(KERN_DEBUG "%s: request to set multicast list\n", dev->name); spin_lock_irqsave(&adapter->lock, flags); @@ -1316,7 +1316,7 @@ static void elp_set_mc_list(struct net_device *dev) } adapter->got[CMD_LOAD_MULTICAST_LIST] = 0; if (!send_pcb(dev, &adapter->tx_pcb)) - printk("%s: couldn't send set_multicast command\n", dev->name); + printk(KERN_ERR "%s: couldn't send set_multicast command\n", dev->name); else { unsigned long timeout = jiffies + TIMEOUT; while (adapter->got[CMD_LOAD_MULTICAST_LIST] == 0 && time_before(jiffies, timeout)); @@ -1335,14 +1335,14 @@ static void elp_set_mc_list(struct net_device *dev) * and wait for response */ if (elp_debug >= 3) - printk("%s: sending 82586 configure command\n", dev->name); + printk(KERN_DEBUG "%s: sending 82586 configure command\n", dev->name); adapter->tx_pcb.command = CMD_CONFIGURE_82586; adapter->tx_pcb.length = 2; adapter->got[CMD_CONFIGURE_82586] = 0; if (!send_pcb(dev, &adapter->tx_pcb)) { spin_unlock_irqrestore(&adapter->lock, flags); - printk("%s: couldn't send 82586 configure command\n", dev->name); + printk(KERN_ERR "%s: couldn't send 82586 configure command\n", dev->name); } else { unsigned long timeout = jiffies + TIMEOUT; @@ -1524,9 +1524,9 @@ int __init elplus_probe(struct net_device *dev) */ adapter = (elp_device *) (dev->priv = kmalloc(sizeof(elp_device), GFP_KERNEL)); if (adapter == NULL) { - printk("%s: out of memory\n", dev->name); + printk(KERN_ERR "%s: out of memory\n", dev->name); return -ENODEV; - } + } adapter->send_pcb_semaphore = 0; @@ -1549,7 +1549,7 @@ int __init elplus_probe(struct net_device *dev) /* Nope, it's ignoring the command register. This means that * either it's still booting up, or it's died. */ - printk("%s: command register wouldn't drain, ", dev->name); + printk(KERN_ERR "%s: command register wouldn't drain, ", dev->name); if ((inb_status(dev->base_addr) & 7) == 3) { /* If the adapter status is 3, it *could* still be booting. * Give it the benefit of the doubt for 10 seconds. @@ -1558,7 +1558,7 @@ int __init elplus_probe(struct net_device *dev) timeout = jiffies + 10*HZ; while (time_before(jiffies, timeout) && (inb_status(dev->base_addr) & 7)); if (inb_status(dev->base_addr) & 7) { - printk("%s: 3c505 failed to start\n", dev->name); + printk(KERN_ERR "%s: 3c505 failed to start\n", dev->name); } else { okay = 1; /* It started */ } @@ -1579,18 +1579,18 @@ int __init elplus_probe(struct net_device *dev) adapter->tx_pcb.length = 0; cookie = probe_irq_on(); if (!send_pcb(dev, &adapter->tx_pcb)) { - printk("%s: could not send first PCB\n", dev->name); + printk(KERN_ERR "%s: could not send first PCB\n", dev->name); probe_irq_off(cookie); continue; } if (!receive_pcb(dev, &adapter->rx_pcb)) { - printk("%s: could not read first PCB\n", dev->name); + printk(KERN_ERR "%s: could not read first PCB\n", dev->name); probe_irq_off(cookie); continue; } if ((adapter->rx_pcb.command != CMD_ADDRESS_RESPONSE) || (adapter->rx_pcb.length != 6)) { - printk("%s: first PCB wrong (%d, %d)\n", dev->name, adapter->rx_pcb.command, adapter->rx_pcb.length); + printk(KERN_ERR "%s: first PCB wrong (%d, %d)\n", dev->name, adapter->rx_pcb.command, adapter->rx_pcb.length); probe_irq_off(cookie); continue; } @@ -1603,7 +1603,7 @@ int __init elplus_probe(struct net_device *dev) outb_control(adapter->hcr_val | FLSH | ATTN, dev); outb_control(adapter->hcr_val & ~(FLSH | ATTN), dev); } - printk("%s: failed to initialise 3c505\n", dev->name); + printk(KERN_ERR "%s: failed to initialise 3c505\n", dev->name); release_region(dev->base_addr, ELP_IO_EXTENT); return -ENODEV; @@ -1611,21 +1611,21 @@ int __init elplus_probe(struct net_device *dev) if (dev->irq) { /* Is there a preset IRQ? */ int rpt = probe_irq_off(cookie); if (dev->irq != rpt) { - printk("%s: warning, irq %d configured but %d detected\n", dev->name, dev->irq, rpt); + printk(KERN_WARNING "%s: warning, irq %d configured but %d detected\n", dev->name, dev->irq, rpt); } /* if dev->irq == probe_irq_off(cookie), all is well */ } else /* No preset IRQ; just use what we can detect */ dev->irq = probe_irq_off(cookie); switch (dev->irq) { /* Legal, sane? */ case 0: - printk("%s: IRQ probe failed: check 3c505 jumpers.\n", + printk(KERN_ERR "%s: IRQ probe failed: check 3c505 jumpers.\n", dev->name); return -ENODEV; case 1: case 6: case 8: case 13: - printk("%s: Impossible IRQ %d reported by probe_irq_off().\n", + printk(KERN_ERR "%s: Impossible IRQ %d reported by probe_irq_off().\n", dev->name, dev->irq); return -ENODEV; } @@ -1655,7 +1655,7 @@ int __init elplus_probe(struct net_device *dev) /* * print remainder of startup message */ - printk("%s: 3c505 at %#lx, irq %d, dma %d, ", + printk(KERN_INFO "%s: 3c505 at %#lx, irq %d, dma %d, ", dev->name, dev->base_addr, dev->irq, dev->dma); printk("addr %02x:%02x:%02x:%02x:%02x:%02x, ", dev->dev_addr[0], dev->dev_addr[1], dev->dev_addr[2], @@ -1690,10 +1690,10 @@ int __init elplus_probe(struct net_device *dev) !receive_pcb(dev, &adapter->rx_pcb) || (adapter->rx_pcb.command != CMD_CONFIGURE_ADAPTER_RESPONSE) || (adapter->rx_pcb.length != 2)) { - printk("%s: could not configure adapter memory\n", dev->name); + printk(KERN_ERR "%s: could not configure adapter memory\n", dev->name); } if (adapter->rx_pcb.data.configure) { - printk("%s: adapter configuration failed\n", dev->name); + printk(KERN_ERR "%s: adapter configuration failed\n", dev->name); } /* diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 5dbd74649763..2af2547f61f5 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -1870,7 +1870,7 @@ config 68360_ENET config FEC bool "FEC ethernet controller (of ColdFire 5272)" - depends on M5272 + depends on M5272 || M5282 help Say Y here if you want to use the built-in 10/100 Fast ethernet controller on the Motorola ColdFire 5272 processor. diff --git a/drivers/net/Makefile b/drivers/net/Makefile index c38b92a4693f..cce3fa99684f 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -11,6 +11,7 @@ endif obj-$(CONFIG_E100) += e100/ obj-$(CONFIG_E1000) += e1000/ obj-$(CONFIG_IXGB) += ixgb/ +obj-$(CONFIG_BONDING) += bonding/ # # link order important here @@ -108,7 +109,6 @@ ifeq ($(CONFIG_SLIP_COMPRESSED),y) endif obj-$(CONFIG_DUMMY) += dummy.o -obj-$(CONFIG_BONDING) += bonding.o obj-$(CONFIG_DE600) += de600.o obj-$(CONFIG_DE620) += de620.o obj-$(CONFIG_AT1500) += lance.o diff --git a/drivers/net/au1000_eth.c b/drivers/net/au1000_eth.c index 07349d47c565..c8ae03f8563b 100644 --- a/drivers/net/au1000_eth.c +++ b/drivers/net/au1000_eth.c @@ -675,37 +675,24 @@ au1000_probe1(struct net_device *dev, long ioaddr, int irq, int port_num) char *pmac, *argptr; char ethaddr[6]; - if (!request_region(ioaddr, MAC_IOSIZE, "Au1000 ENET")) { + if (!request_region(ioaddr, MAC_IOSIZE, "Au1000 ENET")) return -ENODEV; - } if (version_printed++ == 0) printk(version); + if (!dev) + dev = init_etherdev(NULL, sizeof(struct au1000_private)); + if (!dev) { - dev = init_etherdev(0, sizeof(struct au1000_private)); - } - if (!dev) { - printk (KERN_ERR "au1000 eth: init_etherdev failed\n"); - return -ENODEV; + printk (KERN_ERR "au1000 eth: init_etherdev failed\n"); + release_region(ioaddr, MAC_IOSIZE); + return -ENODEV; } printk("%s: Au1xxx ethernet found at 0x%lx, irq %d\n", dev->name, ioaddr, irq); - /* Initialize our private structure */ - if (dev->priv == NULL) { - aup = (struct au1000_private *) - kmalloc(sizeof(*aup), GFP_KERNEL); - if (aup == NULL) { - retval = -ENOMEM; - goto free_region; - } - dev->priv = aup; - } - aup = dev->priv; - memset(aup, 0, sizeof(*aup)); - /* Allocate the data buffers */ aup->vaddr = (u32)dma_alloc(MAX_BUF_SIZE * @@ -834,8 +821,6 @@ free_region: if (aup->vaddr) dma_free((void *)aup->vaddr, MAX_BUF_SIZE * (NUM_TX_BUFFS+NUM_RX_BUFFS)); - if (dev->priv != NULL) - kfree(dev->priv); printk(KERN_ERR "%s: au1000_probe1 failed. Returns %d\n", dev->name, retval); kfree(dev); @@ -1003,15 +988,15 @@ static int au1000_close(struct net_device *dev) spin_lock_irqsave(&aup->lock, flags); /* stop the device */ - if (netif_device_present(dev)) { + if (netif_device_present(dev)) netif_stop_queue(dev); - } /* disable the interrupt */ free_irq(dev->irq, dev); spin_unlock_irqrestore(&aup->lock, flags); reset_mac(dev); + kfree(dev); MOD_DEC_USE_COUNT; return 0; } diff --git a/drivers/net/bmac.c b/drivers/net/bmac.c index 6804c7e22020..20dec2a7defd 100644 --- a/drivers/net/bmac.c +++ b/drivers/net/bmac.c @@ -1407,7 +1407,6 @@ static void __init bmac_probe1(struct device_node *bmac, int is_bmac_plus) skb_queue_head_init(bp->queue); init_timer(&bp->tx_timeout); - /* bp->timeout_active = 0; */ ret = request_irq(dev->irq, bmac_misc_intr, 0, "BMAC-misc", dev); if (ret) { diff --git a/drivers/net/bonding/Makefile b/drivers/net/bonding/Makefile new file mode 100644 index 000000000000..cf50384b469e --- /dev/null +++ b/drivers/net/bonding/Makefile @@ -0,0 +1,8 @@ +# +# Makefile for the Ethernet Bonding driver +# + +obj-$(CONFIG_BONDING) += bonding.o + +bonding-objs := bond_main.o bond_3ad.o bond_alb.o + diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c new file mode 100644 index 000000000000..c82910b8c382 --- /dev/null +++ b/drivers/net/bonding/bond_3ad.c @@ -0,0 +1,2476 @@ +/* + * Copyright(c) 1999 - 2003 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * The full GNU General Public License is included in this distribution in the + * file called LICENSE. + * + * + * Changes: + * + * 2003/05/01 - Tsippy Mendelson <tsippy.mendelson at intel dot com> and + * Amir Noam <amir.noam at intel dot com> + * - Added support for lacp_rate module param. + * + * 2003/05/01 - Shmulik Hen <shmulik.hen at intel dot com> + * - Based on discussion on mailing list, changed locking scheme + * to use lock/unlock or lock_bh/unlock_bh appropriately instead + * of lock_irqsave/unlock_irqrestore. The new scheme helps exposing + * hidden bugs and solves system hangs that occurred due to the fact + * that holding lock_irqsave doesn't prevent softirqs from running. + * This also increases total throughput since interrupts are not + * blocked on each transmitted packets or monitor timeout. + * + * 2003/05/01 - Shmulik Hen <shmulik.hen at intel dot com> + * - Renamed bond_3ad_link_status_changed() to + * bond_3ad_handle_link_change() for compatibility with TLB. + */ + +#include <linux/skbuff.h> +#include <linux/if_ether.h> +#include <linux/netdevice.h> +#include <linux/spinlock.h> +#include <linux/ethtool.h> +#include <linux/if_bonding.h> +#include "bonding.h" +#include "bond_3ad.h" + +// General definitions +#define AD_SHORT_TIMEOUT 1 +#define AD_LONG_TIMEOUT 0 +#define AD_STANDBY 0x2 +#define AD_MAX_TX_IN_SECOND 3 +#define AD_COLLECTOR_MAX_DELAY 0 + +// Timer definitions(43.4.4 in the 802.3ad standard) +#define AD_FAST_PERIODIC_TIME 1 +#define AD_SLOW_PERIODIC_TIME 30 +#define AD_SHORT_TIMEOUT_TIME (3*AD_FAST_PERIODIC_TIME) +#define AD_LONG_TIMEOUT_TIME (3*AD_SLOW_PERIODIC_TIME) +#define AD_CHURN_DETECTION_TIME 60 +#define AD_AGGREGATE_WAIT_TIME 2 + +// Port state definitions(43.4.2.2 in the 802.3ad standard) +#define AD_STATE_LACP_ACTIVITY 0x1 +#define AD_STATE_LACP_TIMEOUT 0x2 +#define AD_STATE_AGGREGATION 0x4 +#define AD_STATE_SYNCHRONIZATION 0x8 +#define AD_STATE_COLLECTING 0x10 +#define AD_STATE_DISTRIBUTING 0x20 +#define AD_STATE_DEFAULTED 0x40 +#define AD_STATE_EXPIRED 0x80 + +// Port Variables definitions used by the State Machines(43.4.7 in the 802.3ad standard) +#define AD_PORT_BEGIN 0x1 +#define AD_PORT_LACP_ENABLED 0x2 +#define AD_PORT_ACTOR_CHURN 0x4 +#define AD_PORT_PARTNER_CHURN 0x8 +#define AD_PORT_READY 0x10 +#define AD_PORT_READY_N 0x20 +#define AD_PORT_MATCHED 0x40 +#define AD_PORT_STANDBY 0x80 +#define AD_PORT_SELECTED 0x100 +#define AD_PORT_MOVED 0x200 + +// Port Key definitions +// key is determined according to the link speed, duplex and +// user key(which is yet not supported) +// ------------------------------------------------------------ +// Port key : | User key | Speed |Duplex| +// ------------------------------------------------------------ +// 16 6 1 0 +#define AD_DUPLEX_KEY_BITS 0x1 +#define AD_SPEED_KEY_BITS 0x3E +#define AD_USER_KEY_BITS 0xFFC0 + +//dalloun +#define AD_LINK_SPEED_BITMASK_1MBPS 0x1 +#define AD_LINK_SPEED_BITMASK_10MBPS 0x2 +#define AD_LINK_SPEED_BITMASK_100MBPS 0x4 +#define AD_LINK_SPEED_BITMASK_1000MBPS 0x8 +//endalloun + +// compare MAC addresses +#define MAC_ADDRESS_COMPARE(A, B) memcmp(A, B, ETH_ALEN) + +static struct mac_addr null_mac_addr = {{0, 0, 0, 0, 0, 0}}; +static u16 ad_ticks_per_sec; + +// ================= 3AD api to bonding and kernel code ================== +static u16 __get_link_speed(struct port *port); +static u8 __get_duplex(struct port *port); +static inline void __initialize_port_locks(struct port *port); +static inline void __deinitialize_port_locks(struct port *port); +//conversions +static void __ntohs_lacpdu(struct lacpdu *lacpdu); +static u16 __ad_timer_to_ticks(u16 timer_type, u16 Par); + + +// ================= ad code helper functions ================== +//needed by ad_rx_machine(...) +static void __record_pdu(struct lacpdu *lacpdu, struct port *port); +static void __record_default(struct port *port); +static void __update_selected(struct lacpdu *lacpdu, struct port *port); +static void __update_default_selected(struct port *port); +static void __choose_matched(struct lacpdu *lacpdu, struct port *port); +static void __update_ntt(struct lacpdu *lacpdu, struct port *port); + +//needed for ad_mux_machine(..) +static void __attach_bond_to_agg(struct port *port); +static void __detach_bond_from_agg(struct port *port); +static int __agg_ports_are_ready(struct aggregator *aggregator); +static void __set_agg_ports_ready(struct aggregator *aggregator, int val); + +//needed for ad_agg_selection_logic(...) +static u32 __get_agg_bandwidth(struct aggregator *aggregator); +static struct aggregator *__get_active_agg(struct aggregator *aggregator); + + +// ================= main 802.3ad protocol functions ================== +static int ad_lacpdu_send(struct port *port); +static int ad_marker_send(struct port *port, struct marker *marker); +static void ad_mux_machine(struct port *port); +static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port); +static void ad_tx_machine(struct port *port); +static void ad_periodic_machine(struct port *port); +static void ad_port_selection_logic(struct port *port); +static void ad_agg_selection_logic(struct aggregator *aggregator); +static void ad_clear_agg(struct aggregator *aggregator); +static void ad_initialize_agg(struct aggregator *aggregator); +static void ad_initialize_port(struct port *port, int lacp_fast); +static void ad_initialize_lacpdu(struct lacpdu *Lacpdu); +static void ad_enable_collecting_distributing(struct port *port); +static void ad_disable_collecting_distributing(struct port *port); +static void ad_marker_info_received(struct marker *marker_info, struct port *port); +static void ad_marker_response_received(struct marker *marker, struct port *port); + + +///////////////////////////////////////////////////////////////////////////////// +// ================= api to bonding and kernel code ================== +///////////////////////////////////////////////////////////////////////////////// + +/** + * __get_bond_by_port - get the port's bonding struct + * @port: the port we're looking at + * + * Return @port's bonding struct, or %NULL if it can't be found. + */ +static inline struct bonding *__get_bond_by_port(struct port *port) +{ + if (port->slave == NULL) { + return NULL; + } + + return bond_get_bond_by_slave(port->slave); +} + +/** + * __get_first_port - get the first port in the bond + * @bond: the bond we're looking at + * + * Return the port of the first slave in @bond, or %NULL if it can't be found. + */ +static inline struct port *__get_first_port(struct bonding *bond) +{ + struct slave *slave = bond->next; + + if (slave == (struct slave *)bond) { + return NULL; + } + + return &(SLAVE_AD_INFO(slave).port); +} + +/** + * __get_next_port - get the next port in the bond + * @port: the port we're looking at + * + * Return the port of the slave that is next in line of @port's slave in the + * bond, or %NULL if it can't be found. + */ +static inline struct port *__get_next_port(struct port *port) +{ + struct bonding *bond = __get_bond_by_port(port); + struct slave *slave = port->slave; + + // If there's no bond for this port, or this is the last slave + if ((bond == NULL) || (slave->next == bond->next)) { + return NULL; + } + + return &(SLAVE_AD_INFO(slave->next).port); +} + +/** + * __get_first_agg - get the first aggregator in the bond + * @bond: the bond we're looking at + * + * Return the aggregator of the first slave in @bond, or %NULL if it can't be + * found. + */ +static inline struct aggregator *__get_first_agg(struct port *port) +{ + struct bonding *bond = __get_bond_by_port(port); + + // If there's no bond for this port, or this is the last slave + if ((bond == NULL) || (bond->next == (struct slave *)bond)) { + return NULL; + } + + return &(SLAVE_AD_INFO(bond->next).aggregator); +} + +/** + * __get_next_agg - get the next aggregator in the bond + * @aggregator: the aggregator we're looking at + * + * Return the aggregator of the slave that is next in line of @aggregator's + * slave in the bond, or %NULL if it can't be found. + */ +static inline struct aggregator *__get_next_agg(struct aggregator *aggregator) +{ + struct slave *slave = aggregator->slave; + struct bonding *bond = bond_get_bond_by_slave(slave); + + // If there's no bond for this aggregator, or this is the last slave + if ((bond == NULL) || (slave->next == bond->next)) { + return NULL; + } + + return &(SLAVE_AD_INFO(slave->next).aggregator); +} + +/** + * __disable_port - disable the port's slave + * @port: the port we're looking at + * + */ +static inline void __disable_port(struct port *port) +{ + bond_set_slave_inactive_flags(port->slave); +} + +/** + * __enable_port - enable the port's slave, if it's up + * @port: the port we're looking at + * + */ +static inline void __enable_port(struct port *port) +{ + struct slave *slave = port->slave; + + if ((slave->link == BOND_LINK_UP) && IS_UP(slave->dev)) { + bond_set_slave_active_flags(slave); + } +} + +/** + * __port_is_enabled - check if the port's slave is in active state + * @port: the port we're looking at + * + */ +static inline int __port_is_enabled(struct port *port) +{ + return(port->slave->state == BOND_STATE_ACTIVE); +} + +/** + * __get_agg_selection_mode - get the aggregator selection mode + * @port: the port we're looking at + * + * Get the aggregator selection mode. Can be %BANDWIDTH or %COUNT. + */ +static inline u32 __get_agg_selection_mode(struct port *port) +{ + struct bonding *bond = __get_bond_by_port(port); + + if (bond == NULL) { + return AD_BANDWIDTH; + } + + return BOND_AD_INFO(bond).agg_select_mode; +} + +/** + * __check_agg_selection_timer - check if the selection timer has expired + * @port: the port we're looking at + * + */ +static inline int __check_agg_selection_timer(struct port *port) +{ + struct bonding *bond = __get_bond_by_port(port); + + if (bond == NULL) { + return 0; + } + + return BOND_AD_INFO(bond).agg_select_timer ? 1 : 0; +} + +/** + * __get_rx_machine_lock - lock the port's RX machine + * @port: the port we're looking at + * + */ +static inline void __get_rx_machine_lock(struct port *port) +{ + spin_lock(&(SLAVE_AD_INFO(port->slave).rx_machine_lock)); +} + +/** + * __release_rx_machine_lock - unlock the port's RX machine + * @port: the port we're looking at + * + */ +static inline void __release_rx_machine_lock(struct port *port) +{ + spin_unlock(&(SLAVE_AD_INFO(port->slave).rx_machine_lock)); +} + +/** + * __get_link_speed - get a port's speed + * @port: the port we're looking at + * + * Return @port's speed in 802.3ad bitmask format. i.e. one of: + * 0, + * %AD_LINK_SPEED_BITMASK_10MBPS, + * %AD_LINK_SPEED_BITMASK_100MBPS, + * %AD_LINK_SPEED_BITMASK_1000MBPS + */ +static u16 __get_link_speed(struct port *port) +{ + struct slave *slave = port->slave; + u16 speed; + + /* this if covers only a special case: when the configuration starts with + * link down, it sets the speed to 0. + * This is done in spite of the fact that the e100 driver reports 0 to be + * compatible with MVT in the future.*/ + if (slave->link != BOND_LINK_UP) { + speed=0; + } else { + switch (slave->speed) { + case SPEED_10: + speed = AD_LINK_SPEED_BITMASK_10MBPS; + break; + + case SPEED_100: + speed = AD_LINK_SPEED_BITMASK_100MBPS; + break; + + case SPEED_1000: + speed = AD_LINK_SPEED_BITMASK_1000MBPS; + break; + + default: + speed = 0; // unknown speed value from ethtool. shouldn't happen + break; + } + } + + BOND_PRINT_DBG(("Port %d Received link speed %d update from adapter", port->actor_port_number, speed)); + return speed; +} + +/** + * __get_duplex - get a port's duplex + * @port: the port we're looking at + * + * Return @port's duplex in 802.3ad bitmask format. i.e.: + * 0x01 if in full duplex + * 0x00 otherwise + */ +static u8 __get_duplex(struct port *port) +{ + struct slave *slave = port->slave; + + u8 retval; + + // handling a special case: when the configuration starts with + // link down, it sets the duplex to 0. + if (slave->link != BOND_LINK_UP) { + retval=0x0; + } else { + switch (slave->duplex) { + case DUPLEX_FULL: + retval=0x1; + BOND_PRINT_DBG(("Port %d Received status full duplex update from adapter", port->actor_port_number)); + break; + case DUPLEX_HALF: + default: + retval=0x0; + BOND_PRINT_DBG(("Port %d Received status NOT full duplex update from adapter", port->actor_port_number)); + break; + } + } + return retval; +} + +/** + * __initialize_port_locks - initialize a port's RX machine spinlock + * @port: the port we're looking at + * + */ +static inline void __initialize_port_locks(struct port *port) +{ + // make sure it isn't called twice + spin_lock_init(&(SLAVE_AD_INFO(port->slave).rx_machine_lock)); +} + +/** + * __deinitialize_port_locks - deinitialize a port's RX machine spinlock + * @port: the port we're looking at + * + */ +static inline void __deinitialize_port_locks(struct port *port) +{ +} + +//conversions +/** + * __ntohs_lacpdu - convert the contents of a LACPDU to host byte order + * @lacpdu: the speicifed lacpdu + * + * For each multi-byte field in the lacpdu, convert its content + */ +static void __ntohs_lacpdu(struct lacpdu *lacpdu) +{ + if (lacpdu) { + lacpdu->actor_system_priority = ntohs(lacpdu->actor_system_priority); + lacpdu->actor_key = ntohs(lacpdu->actor_key); + lacpdu->actor_port_priority = ntohs(lacpdu->actor_port_priority); + lacpdu->actor_port = ntohs(lacpdu->actor_port); + lacpdu->partner_system_priority = ntohs(lacpdu->partner_system_priority); + lacpdu->partner_key = ntohs(lacpdu->partner_key); + lacpdu->partner_port_priority = ntohs(lacpdu->partner_port_priority); + lacpdu->partner_port = ntohs(lacpdu->partner_port); + lacpdu->collector_max_delay = ntohs(lacpdu->collector_max_delay); + } +} + +/** + * __ad_timer_to_ticks - convert a given timer type to AD module ticks + * @timer_type: which timer to operate + * @par: timer parameter. see below + * + * If @timer_type is %current_while_timer, @par indicates long/short timer. + * If @timer_type is %periodic_timer, @par is one of %FAST_PERIODIC_TIME, + * %SLOW_PERIODIC_TIME. + */ +static u16 __ad_timer_to_ticks(u16 timer_type, u16 par) +{ + u16 retval=0; //to silence the compiler + + switch (timer_type) { + case AD_CURRENT_WHILE_TIMER: // for rx machine usage + if (par) { // for short or long timeout + retval = (AD_SHORT_TIMEOUT_TIME*ad_ticks_per_sec); // short timeout + } else { + retval = (AD_LONG_TIMEOUT_TIME*ad_ticks_per_sec); // long timeout + } + break; + case AD_ACTOR_CHURN_TIMER: // for local churn machine + retval = (AD_CHURN_DETECTION_TIME*ad_ticks_per_sec); + break; + case AD_PERIODIC_TIMER: // for periodic machine + retval = (par*ad_ticks_per_sec); // long timeout + break; + case AD_PARTNER_CHURN_TIMER: // for remote churn machine + retval = (AD_CHURN_DETECTION_TIME*ad_ticks_per_sec); + break; + case AD_WAIT_WHILE_TIMER: // for selection machine + retval = (AD_AGGREGATE_WAIT_TIME*ad_ticks_per_sec); + break; + } + return retval; +} + + +///////////////////////////////////////////////////////////////////////////////// +// ================= ad_rx_machine helper functions ================== +///////////////////////////////////////////////////////////////////////////////// + +/** + * __record_pdu - record parameters from a received lacpdu + * @lacpdu: the lacpdu we've received + * @port: the port we're looking at + * + * Record the parameter values for the Actor carried in a received lacpdu as + * the current partner operational parameter values and sets + * actor_oper_port_state.defaulted to FALSE. + */ +static void __record_pdu(struct lacpdu *lacpdu, struct port *port) +{ + // validate lacpdu and port + if (lacpdu && port) { + // record the new parameter values for the partner operational + port->partner_oper_port_number = lacpdu->actor_port; + port->partner_oper_port_priority = lacpdu->actor_port_priority; + port->partner_oper_system = lacpdu->actor_system; + port->partner_oper_system_priority = lacpdu->actor_system_priority; + port->partner_oper_key = lacpdu->actor_key; + // zero partener's lase states + port->partner_oper_port_state = 0; + port->partner_oper_port_state |= (lacpdu->actor_state & AD_STATE_LACP_ACTIVITY); + port->partner_oper_port_state |= (lacpdu->actor_state & AD_STATE_LACP_TIMEOUT); + port->partner_oper_port_state |= (lacpdu->actor_state & AD_STATE_AGGREGATION); + port->partner_oper_port_state |= (lacpdu->actor_state & AD_STATE_SYNCHRONIZATION); + port->partner_oper_port_state |= (lacpdu->actor_state & AD_STATE_COLLECTING); + port->partner_oper_port_state |= (lacpdu->actor_state & AD_STATE_DISTRIBUTING); + port->partner_oper_port_state |= (lacpdu->actor_state & AD_STATE_DEFAULTED); + port->partner_oper_port_state |= (lacpdu->actor_state & AD_STATE_EXPIRED); + + // set actor_oper_port_state.defaulted to FALSE + port->actor_oper_port_state &= ~AD_STATE_DEFAULTED; + + // set the partner sync. to on if the partner is sync. and the port is matched + if ((port->sm_vars & AD_PORT_MATCHED) && (lacpdu->actor_state & AD_STATE_SYNCHRONIZATION)) { + port->partner_oper_port_state |= AD_STATE_SYNCHRONIZATION; + } else { + port->partner_oper_port_state &= ~AD_STATE_SYNCHRONIZATION; + } + } +} + +/** + * __record_default - record default parameters + * @port: the port we're looking at + * + * This function records the default parameter values for the partner carried + * in the Partner Admin parameters as the current partner operational parameter + * values and sets actor_oper_port_state.defaulted to TRUE. + */ +static void __record_default(struct port *port) +{ + // validate the port + if (port) { + // record the partner admin parameters + port->partner_oper_port_number = port->partner_admin_port_number; + port->partner_oper_port_priority = port->partner_admin_port_priority; + port->partner_oper_system = port->partner_admin_system; + port->partner_oper_system_priority = port->partner_admin_system_priority; + port->partner_oper_key = port->partner_admin_key; + port->partner_oper_port_state = port->partner_admin_port_state; + + // set actor_oper_port_state.defaulted to true + port->actor_oper_port_state |= AD_STATE_DEFAULTED; + } +} + +/** + * __update_selected - update a port's Selected variable from a received lacpdu + * @lacpdu: the lacpdu we've received + * @port: the port we're looking at + * + * Update the value of the selected variable, using parameter values from a + * newly received lacpdu. The parameter values for the Actor carried in the + * received PDU are compared with the corresponding operational parameter + * values for the ports partner. If one or more of the comparisons shows that + * the value(s) received in the PDU differ from the current operational values, + * then selected is set to FALSE and actor_oper_port_state.synchronization is + * set to out_of_sync. Otherwise, selected remains unchanged. + */ +static void __update_selected(struct lacpdu *lacpdu, struct port *port) +{ + // validate lacpdu and port + if (lacpdu && port) { + // check if any parameter is different + if ((lacpdu->actor_port != port->partner_oper_port_number) || + (lacpdu->actor_port_priority != port->partner_oper_port_priority) || + MAC_ADDRESS_COMPARE(&(lacpdu->actor_system), &(port->partner_oper_system)) || + (lacpdu->actor_system_priority != port->partner_oper_system_priority) || + (lacpdu->actor_key != port->partner_oper_key) || + ((lacpdu->actor_state & AD_STATE_AGGREGATION) != (port->partner_oper_port_state & AD_STATE_AGGREGATION)) + ) { + // update the state machine Selected variable + port->sm_vars &= ~AD_PORT_SELECTED; + } + } +} + +/** + * __update_default_selected - update a port's Selected variable from Partner + * @port: the port we're looking at + * + * This function updates the value of the selected variable, using the partner + * administrative parameter values. The administrative values are compared with + * the corresponding operational parameter values for the partner. If one or + * more of the comparisons shows that the administrative value(s) differ from + * the current operational values, then Selected is set to FALSE and + * actor_oper_port_state.synchronization is set to OUT_OF_SYNC. Otherwise, + * Selected remains unchanged. + */ +static void __update_default_selected(struct port *port) +{ + // validate the port + if (port) { + // check if any parameter is different + if ((port->partner_admin_port_number != port->partner_oper_port_number) || + (port->partner_admin_port_priority != port->partner_oper_port_priority) || + MAC_ADDRESS_COMPARE(&(port->partner_admin_system), &(port->partner_oper_system)) || + (port->partner_admin_system_priority != port->partner_oper_system_priority) || + (port->partner_admin_key != port->partner_oper_key) || + ((port->partner_admin_port_state & AD_STATE_AGGREGATION) != (port->partner_oper_port_state & AD_STATE_AGGREGATION)) + ) { + // update the state machine Selected variable + port->sm_vars &= ~AD_PORT_SELECTED; + } + } +} + +/** + * __choose_matched - update a port's matched variable from a received lacpdu + * @lacpdu: the lacpdu we've received + * @port: the port we're looking at + * + * Update the value of the matched variable, using parameter values from a + * newly received lacpdu. Parameter values for the partner carried in the + * received PDU are compared with the corresponding operational parameter + * values for the actor. Matched is set to TRUE if all of these parameters + * match and the PDU parameter partner_state.aggregation has the same value as + * actor_oper_port_state.aggregation and lacp will actively maintain the link + * in the aggregation. Matched is also set to TRUE if the value of + * actor_state.aggregation in the received PDU is set to FALSE, i.e., indicates + * an individual link and lacp will actively maintain the link. Otherwise, + * matched is set to FALSE. LACP is considered to be actively maintaining the + * link if either the PDU's actor_state.lacp_activity variable is TRUE or both + * the actor's actor_oper_port_state.lacp_activity and the PDU's + * partner_state.lacp_activity variables are TRUE. + */ +static void __choose_matched(struct lacpdu *lacpdu, struct port *port) +{ + // validate lacpdu and port + if (lacpdu && port) { + // check if all parameters are alike + if (((lacpdu->partner_port == port->actor_port_number) && + (lacpdu->partner_port_priority == port->actor_port_priority) && + !MAC_ADDRESS_COMPARE(&(lacpdu->partner_system), &(port->actor_system)) && + (lacpdu->partner_system_priority == port->actor_system_priority) && + (lacpdu->partner_key == port->actor_oper_port_key) && + ((lacpdu->partner_state & AD_STATE_AGGREGATION) == (port->actor_oper_port_state & AD_STATE_AGGREGATION))) || + // or this is individual link(aggregation == FALSE) + ((lacpdu->actor_state & AD_STATE_AGGREGATION) == 0) + ) { + // update the state machine Matched variable + port->sm_vars |= AD_PORT_MATCHED; + } else { + port->sm_vars &= ~AD_PORT_MATCHED; + } + } +} + +/** + * __update_ntt - update a port's ntt variable from a received lacpdu + * @lacpdu: the lacpdu we've received + * @port: the port we're looking at + * + * Updates the value of the ntt variable, using parameter values from a newly + * received lacpdu. The parameter values for the partner carried in the + * received PDU are compared with the corresponding operational parameter + * values for the Actor. If one or more of the comparisons shows that the + * value(s) received in the PDU differ from the current operational values, + * then ntt is set to TRUE. Otherwise, ntt remains unchanged. + */ +static void __update_ntt(struct lacpdu *lacpdu, struct port *port) +{ + // validate lacpdu and port + if (lacpdu && port) { + // check if any parameter is different + if ((lacpdu->partner_port != port->actor_port_number) || + (lacpdu->partner_port_priority != port->actor_port_priority) || + MAC_ADDRESS_COMPARE(&(lacpdu->partner_system), &(port->actor_system)) || + (lacpdu->partner_system_priority != port->actor_system_priority) || + (lacpdu->partner_key != port->actor_oper_port_key) || + ((lacpdu->partner_state & AD_STATE_LACP_ACTIVITY) != (port->actor_oper_port_state & AD_STATE_LACP_ACTIVITY)) || + ((lacpdu->partner_state & AD_STATE_LACP_TIMEOUT) != (port->actor_oper_port_state & AD_STATE_LACP_TIMEOUT)) || + ((lacpdu->partner_state & AD_STATE_SYNCHRONIZATION) != (port->actor_oper_port_state & AD_STATE_SYNCHRONIZATION)) || + ((lacpdu->partner_state & AD_STATE_AGGREGATION) != (port->actor_oper_port_state & AD_STATE_AGGREGATION)) + ) { + // set ntt to be TRUE + port->ntt = 1; + } + } +} + +/** + * __attach_bond_to_agg + * @port: the port we're looking at + * + * Handle the attaching of the port's control parser/multiplexer and the + * aggregator. This function does nothing since the parser/multiplexer of the + * receive and the parser/multiplexer of the aggregator are already combined. + */ +static void __attach_bond_to_agg(struct port *port) +{ + port=NULL; // just to satisfy the compiler + // This function does nothing since the parser/multiplexer of the receive + // and the parser/multiplexer of the aggregator are already combined +} + +/** + * __detach_bond_to_agg + * @port: the port we're looking at + * + * Handle the detaching of the port's control parser/multiplexer from the + * aggregator. This function does nothing since the parser/multiplexer of the + * receive and the parser/multiplexer of the aggregator are already combined. + */ +static void __detach_bond_from_agg(struct port *port) +{ + port=NULL; // just to satisfy the compiler + // This function does nothing sience the parser/multiplexer of the receive + // and the parser/multiplexer of the aggregator are already combined +} + +/** + * __agg_ports_are_ready - check if all ports in an aggregator are ready + * @aggregator: the aggregator we're looking at + * + */ +static int __agg_ports_are_ready(struct aggregator *aggregator) +{ + struct port *port; + int retval = 1; + + if (aggregator) { + // scan all ports in this aggregator to verfy if they are all ready + for (port=aggregator->lag_ports; port; port=port->next_port_in_aggregator) { + if (!(port->sm_vars & AD_PORT_READY_N)) { + retval = 0; + break; + } + } + } + + return retval; +} + +/** + * __set_agg_ports_ready - set value of Ready bit in all ports of an aggregator + * @aggregator: the aggregator we're looking at + * @val: Should the ports' ready bit be set on or off + * + */ +static void __set_agg_ports_ready(struct aggregator *aggregator, int val) +{ + struct port *port; + + for (port=aggregator->lag_ports; port; port=port->next_port_in_aggregator) { + if (val) { + port->sm_vars |= AD_PORT_READY; + } else { + port->sm_vars &= ~AD_PORT_READY; + } + } +} + +/** + * __get_agg_bandwidth - get the total bandwidth of an aggregator + * @aggregator: the aggregator we're looking at + * + */ +static u32 __get_agg_bandwidth(struct aggregator *aggregator) +{ + u32 bandwidth=0; + u32 basic_speed; + + if (aggregator->num_of_ports) { + basic_speed = __get_link_speed(aggregator->lag_ports); + switch (basic_speed) { + case AD_LINK_SPEED_BITMASK_1MBPS: + bandwidth = aggregator->num_of_ports; + break; + case AD_LINK_SPEED_BITMASK_10MBPS: + bandwidth = aggregator->num_of_ports * 10; + break; + case AD_LINK_SPEED_BITMASK_100MBPS: + bandwidth = aggregator->num_of_ports * 100; + break; + case AD_LINK_SPEED_BITMASK_1000MBPS: + bandwidth = aggregator->num_of_ports * 1000; + break; + default: + bandwidth=0; // to silent the compilor .... + } + } + return bandwidth; +} + +/** + * __get_active_agg - get the current active aggregator + * @aggregator: the aggregator we're looking at + * + */ +static struct aggregator *__get_active_agg(struct aggregator *aggregator) +{ + struct aggregator *retval = NULL; + + for (; aggregator; aggregator = __get_next_agg(aggregator)) { + if (aggregator->is_active) { + retval = aggregator; + break; + } + } + + return retval; +} + +////////////////////////////////////////////////////////////////////////////////////// +// ================= main 802.3ad protocol code ====================================== +////////////////////////////////////////////////////////////////////////////////////// + +/** + * ad_lacpdu_send - send out a lacpdu packet on a given port + * @port: the port we're looking at + * + * Returns: 0 on success + * < 0 on error + */ +static int ad_lacpdu_send(struct port *port) +{ + struct slave *slave = port->slave; + struct sk_buff *skb; + struct lacpdu_header *lacpdu_header; + int length = sizeof(struct lacpdu_header); + struct mac_addr lacpdu_multicast_address = AD_MULTICAST_LACPDU_ADDR; + + skb = dev_alloc_skb(length); + if (!skb) { + return -ENOMEM; + } + + skb->dev = slave->dev; + skb->mac.raw = skb->data; + skb->nh.raw = skb->data + ETH_HLEN; + skb->protocol = PKT_TYPE_LACPDU; + + lacpdu_header = (struct lacpdu_header *)skb_put(skb, length); + + lacpdu_header->ad_header.destination_address = lacpdu_multicast_address; + /* Note: source addres is set to be the member's PERMANENT address, because we use it + to identify loopback lacpdus in receive. */ + lacpdu_header->ad_header.source_address = *((struct mac_addr *)(slave->perm_hwaddr)); + lacpdu_header->ad_header.length_type = PKT_TYPE_LACPDU; + + lacpdu_header->lacpdu = port->lacpdu; // struct copy + + dev_queue_xmit(skb); + + return 0; +} + +/** + * ad_marker_send - send marker information/response on a given port + * @port: the port we're looking at + * @marker: marker data to send + * + * Returns: 0 on success + * < 0 on error + */ +static int ad_marker_send(struct port *port, struct marker *marker) +{ + struct slave *slave = port->slave; + struct sk_buff *skb; + struct marker_header *marker_header; + int length = sizeof(struct marker_header); + struct mac_addr lacpdu_multicast_address = AD_MULTICAST_LACPDU_ADDR; + + skb = dev_alloc_skb(length + 16); + if (!skb) { + return -ENOMEM; + } + + skb_reserve(skb, 16); + + skb->dev = slave->dev; + skb->mac.raw = skb->data; + skb->nh.raw = skb->data + ETH_HLEN; + skb->protocol = PKT_TYPE_LACPDU; + + marker_header = (struct marker_header *)skb_put(skb, length); + + marker_header->ad_header.destination_address = lacpdu_multicast_address; + /* Note: source addres is set to be the member's PERMANENT address, because we use it + to identify loopback MARKERs in receive. */ + marker_header->ad_header.source_address = *((struct mac_addr *)(slave->perm_hwaddr)); + marker_header->ad_header.length_type = PKT_TYPE_LACPDU; + + marker_header->marker = *marker; // struct copy + + dev_queue_xmit(skb); + + return 0; +} + +/** + * ad_mux_machine - handle a port's mux state machine + * @port: the port we're looking at + * + */ +static void ad_mux_machine(struct port *port) +{ + mux_states_t last_state; + + // keep current State Machine state to compare later if it was changed + last_state = port->sm_mux_state; + + if (port->sm_vars & AD_PORT_BEGIN) { + port->sm_mux_state = AD_MUX_DETACHED; // next state + } else { + switch (port->sm_mux_state) { + case AD_MUX_DETACHED: + if ((port->sm_vars & AD_PORT_SELECTED) || (port->sm_vars & AD_PORT_STANDBY)) { // if SELECTED or STANDBY + port->sm_mux_state = AD_MUX_WAITING; // next state + } + break; + case AD_MUX_WAITING: + // if SELECTED == FALSE return to DETACH state + if (!(port->sm_vars & AD_PORT_SELECTED)) { // if UNSELECTED + port->sm_vars &= ~AD_PORT_READY_N; + // in order to withhold the Selection Logic to check all ports READY_N value + // every callback cycle to update ready variable, we check READY_N and update READY here + __set_agg_ports_ready(port->aggregator, __agg_ports_are_ready(port->aggregator)); + port->sm_mux_state = AD_MUX_DETACHED; // next state + break; + } + + // check if the wait_while_timer expired + if (port->sm_mux_timer_counter && !(--port->sm_mux_timer_counter)) { + port->sm_vars |= AD_PORT_READY_N; + } + + // in order to withhold the selection logic to check all ports READY_N value + // every callback cycle to update ready variable, we check READY_N and update READY here + __set_agg_ports_ready(port->aggregator, __agg_ports_are_ready(port->aggregator)); + + // if the wait_while_timer expired, and the port is in READY state, move to ATTACHED state + if ((port->sm_vars & AD_PORT_READY) && !port->sm_mux_timer_counter) { + port->sm_mux_state = AD_MUX_ATTACHED; // next state + } + break; + case AD_MUX_ATTACHED: + // check also if agg_select_timer expired(so the edable port will take place only after this timer) + if ((port->sm_vars & AD_PORT_SELECTED) && (port->partner_oper_port_state & AD_STATE_SYNCHRONIZATION) && !__check_agg_selection_timer(port)) { + port->sm_mux_state = AD_MUX_COLLECTING_DISTRIBUTING;// next state + } else if (!(port->sm_vars & AD_PORT_SELECTED) || (port->sm_vars & AD_PORT_STANDBY)) { // if UNSELECTED or STANDBY + port->sm_vars &= ~AD_PORT_READY_N; + // in order to withhold the selection logic to check all ports READY_N value + // every callback cycle to update ready variable, we check READY_N and update READY here + __set_agg_ports_ready(port->aggregator, __agg_ports_are_ready(port->aggregator)); + port->sm_mux_state = AD_MUX_DETACHED;// next state + } + break; + case AD_MUX_COLLECTING_DISTRIBUTING: + if (!(port->sm_vars & AD_PORT_SELECTED) || (port->sm_vars & AD_PORT_STANDBY) || + !(port->partner_oper_port_state & AD_STATE_SYNCHRONIZATION) + ) { + port->sm_mux_state = AD_MUX_ATTACHED;// next state + + } else { + // if port state hasn't changed make + // sure that a collecting distributing + // port in an active aggregator is enabled + if (port->aggregator && + port->aggregator->is_active && + !__port_is_enabled(port)) { + + __enable_port(port); + } + } + break; + default: //to silence the compiler + break; + } + } + + // check if the state machine was changed + if (port->sm_mux_state != last_state) { + BOND_PRINT_DBG(("Mux Machine: Port=%d, Last State=%d, Curr State=%d", port->actor_port_number, last_state, port->sm_mux_state)); + switch (port->sm_mux_state) { + case AD_MUX_DETACHED: + __detach_bond_from_agg(port); + port->actor_oper_port_state &= ~AD_STATE_SYNCHRONIZATION; + ad_disable_collecting_distributing(port); + port->actor_oper_port_state &= ~AD_STATE_COLLECTING; + port->actor_oper_port_state &= ~AD_STATE_DISTRIBUTING; + port->ntt = 1; + break; + case AD_MUX_WAITING: + port->sm_mux_timer_counter = __ad_timer_to_ticks(AD_WAIT_WHILE_TIMER, 0); + break; + case AD_MUX_ATTACHED: + __attach_bond_to_agg(port); + port->actor_oper_port_state |= AD_STATE_SYNCHRONIZATION; + port->actor_oper_port_state &= ~AD_STATE_COLLECTING; + port->actor_oper_port_state &= ~AD_STATE_DISTRIBUTING; + ad_disable_collecting_distributing(port); + port->ntt = 1; + break; + case AD_MUX_COLLECTING_DISTRIBUTING: + port->actor_oper_port_state |= AD_STATE_COLLECTING; + port->actor_oper_port_state |= AD_STATE_DISTRIBUTING; + ad_enable_collecting_distributing(port); + port->ntt = 1; + break; + default: //to silence the compiler + break; + } + } +} + +/** + * ad_rx_machine - handle a port's rx State Machine + * @lacpdu: the lacpdu we've received + * @port: the port we're looking at + * + * If lacpdu arrived, stop previous timer (if exists) and set the next state as + * CURRENT. If timer expired set the state machine in the proper state. + * In other cases, this function checks if we need to switch to other state. + */ +static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port) +{ + rx_states_t last_state; + + // Lock to prevent 2 instances of this function to run simultaneously(rx interrupt and periodic machine callback) + __get_rx_machine_lock(port); + + // keep current State Machine state to compare later if it was changed + last_state = port->sm_rx_state; + + // check if state machine should change state + // first, check if port was reinitialized + if (port->sm_vars & AD_PORT_BEGIN) { + port->sm_rx_state = AD_RX_INITIALIZE; // next state + } + // check if port is not enabled + else if (!(port->sm_vars & AD_PORT_BEGIN) && !port->is_enabled && !(port->sm_vars & AD_PORT_MOVED)) { + port->sm_rx_state = AD_RX_PORT_DISABLED; // next state + } + // check if new lacpdu arrived + else if (lacpdu && ((port->sm_rx_state == AD_RX_EXPIRED) || (port->sm_rx_state == AD_RX_DEFAULTED) || (port->sm_rx_state == AD_RX_CURRENT))) { + port->sm_rx_timer_counter = 0; // zero timer + port->sm_rx_state = AD_RX_CURRENT; + } else { + // if timer is on, and if it is expired + if (port->sm_rx_timer_counter && !(--port->sm_rx_timer_counter)) { + switch (port->sm_rx_state) { + case AD_RX_EXPIRED: + port->sm_rx_state = AD_RX_DEFAULTED; // next state + break; + case AD_RX_CURRENT: + port->sm_rx_state = AD_RX_EXPIRED; // next state + break; + default: //to silence the compiler + break; + } + } else { + // if no lacpdu arrived and no timer is on + switch (port->sm_rx_state) { + case AD_RX_PORT_DISABLED: + if (port->sm_vars & AD_PORT_MOVED) { + port->sm_rx_state = AD_RX_INITIALIZE; // next state + } else if (port->is_enabled && (port->sm_vars & AD_PORT_LACP_ENABLED)) { + port->sm_rx_state = AD_RX_EXPIRED; // next state + } else if (port->is_enabled && ((port->sm_vars & AD_PORT_LACP_ENABLED) == 0)) { + port->sm_rx_state = AD_RX_LACP_DISABLED; // next state + } + break; + default: //to silence the compiler + break; + + } + } + } + + // check if the State machine was changed or new lacpdu arrived + if ((port->sm_rx_state != last_state) || (lacpdu)) { + BOND_PRINT_DBG(("Rx Machine: Port=%d, Last State=%d, Curr State=%d", port->actor_port_number, last_state, port->sm_rx_state)); + switch (port->sm_rx_state) { + case AD_RX_INITIALIZE: + if (!(port->actor_oper_port_key & AD_DUPLEX_KEY_BITS)) { + port->sm_vars &= ~AD_PORT_LACP_ENABLED; + } else { + port->sm_vars |= AD_PORT_LACP_ENABLED; + } + port->sm_vars &= ~AD_PORT_SELECTED; + __record_default(port); + port->actor_oper_port_state &= ~AD_STATE_EXPIRED; + port->sm_vars &= ~AD_PORT_MOVED; + port->sm_rx_state = AD_RX_PORT_DISABLED; // next state + + /*- Fall Through -*/ + + case AD_RX_PORT_DISABLED: + port->sm_vars &= ~AD_PORT_MATCHED; + break; + case AD_RX_LACP_DISABLED: + port->sm_vars &= ~AD_PORT_SELECTED; + __record_default(port); + port->partner_oper_port_state &= ~AD_STATE_AGGREGATION; + port->sm_vars |= AD_PORT_MATCHED; + port->actor_oper_port_state &= ~AD_STATE_EXPIRED; + break; + case AD_RX_EXPIRED: + //Reset of the Synchronization flag. (Standard 43.4.12) + //This reset cause to disable this port in the COLLECTING_DISTRIBUTING state of the + //mux machine in case of EXPIRED even if LINK_DOWN didn't arrive for the port. + port->partner_oper_port_state &= ~AD_STATE_SYNCHRONIZATION; + port->sm_vars &= ~AD_PORT_MATCHED; + port->partner_oper_port_state |= AD_SHORT_TIMEOUT; + port->sm_rx_timer_counter = __ad_timer_to_ticks(AD_CURRENT_WHILE_TIMER, (u16)(AD_SHORT_TIMEOUT)); + port->actor_oper_port_state |= AD_STATE_EXPIRED; + break; + case AD_RX_DEFAULTED: + __update_default_selected(port); + __record_default(port); + port->sm_vars |= AD_PORT_MATCHED; + port->actor_oper_port_state &= ~AD_STATE_EXPIRED; + break; + case AD_RX_CURRENT: + // detect loopback situation + if (!MAC_ADDRESS_COMPARE(&(lacpdu->actor_system), &(port->actor_system))) { + // INFO_RECEIVED_LOOPBACK_FRAMES + printk(KERN_ERR "bonding: An illegal loopback occurred on adapter (%s)\n", + port->slave->dev->name); + printk(KERN_ERR "Check the configuration to verify that all Adapters " + "are connected to 802.3ad compliant switch ports\n"); + __release_rx_machine_lock(port); + return; + } + __update_selected(lacpdu, port); + __update_ntt(lacpdu, port); + __record_pdu(lacpdu, port); + __choose_matched(lacpdu, port); + port->sm_rx_timer_counter = __ad_timer_to_ticks(AD_CURRENT_WHILE_TIMER, (u16)(port->actor_oper_port_state & AD_STATE_LACP_TIMEOUT)); + port->actor_oper_port_state &= ~AD_STATE_EXPIRED; + // verify that if the aggregator is enabled, the port is enabled too. + //(because if the link goes down for a short time, the 802.3ad will not + // catch it, and the port will continue to be disabled) + if (port->aggregator && port->aggregator->is_active && !__port_is_enabled(port)) { + __enable_port(port); + } + break; + default: //to silence the compiler + break; + } + } + __release_rx_machine_lock(port); +} + +/** + * ad_tx_machine - handle a port's tx state machine + * @port: the port we're looking at + * + */ +static void ad_tx_machine(struct port *port) +{ + struct lacpdu *lacpdu = &port->lacpdu; + + // check if tx timer expired, to verify that we do not send more than 3 packets per second + if (port->sm_tx_timer_counter && !(--port->sm_tx_timer_counter)) { + // check if there is something to send + if (port->ntt && (port->sm_vars & AD_PORT_LACP_ENABLED)) { + //update current actual Actor parameters + //lacpdu->subtype initialized + //lacpdu->version_number initialized + //lacpdu->tlv_type_actor_info initialized + //lacpdu->actor_information_length initialized + lacpdu->actor_system_priority = port->actor_system_priority; + lacpdu->actor_system = port->actor_system; + lacpdu->actor_key = port->actor_oper_port_key; + lacpdu->actor_port_priority = port->actor_port_priority; + lacpdu->actor_port = port->actor_port_number; + lacpdu->actor_state = port->actor_oper_port_state; + //lacpdu->reserved_3_1 initialized + //lacpdu->tlv_type_partner_info initialized + //lacpdu->partner_information_length initialized + lacpdu->partner_system_priority = port->partner_oper_system_priority; + lacpdu->partner_system = port->partner_oper_system; + lacpdu->partner_key = port->partner_oper_key; + lacpdu->partner_port_priority = port->partner_oper_port_priority; + lacpdu->partner_port = port->partner_oper_port_number; + lacpdu->partner_state = port->partner_oper_port_state; + //lacpdu->reserved_3_2 initialized + //lacpdu->tlv_type_collector_info initialized + //lacpdu->collector_information_length initialized + //collector_max_delay initialized + //reserved_12[12] initialized + //tlv_type_terminator initialized + //terminator_length initialized + //reserved_50[50] initialized + + // We need to convert all non u8 parameters to Big Endian for transmit + __ntohs_lacpdu(lacpdu); + // send the lacpdu + if (ad_lacpdu_send(port) >= 0) { + BOND_PRINT_DBG(("Sent LACPDU on port %d", port->actor_port_number)); + // mark ntt as false, so it will not be sent again until demanded + port->ntt = 0; + } + } + // restart tx timer(to verify that we will not exceed AD_MAX_TX_IN_SECOND + port->sm_tx_timer_counter=ad_ticks_per_sec/AD_MAX_TX_IN_SECOND; + } +} + +/** + * ad_periodic_machine - handle a port's periodic state machine + * @port: the port we're looking at + * + * Turn ntt flag on priodically to perform periodic transmission of lacpdu's. + */ +static void ad_periodic_machine(struct port *port) +{ + periodic_states_t last_state; + + // keep current state machine state to compare later if it was changed + last_state = port->sm_periodic_state; + + // check if port was reinitialized + if (((port->sm_vars & AD_PORT_BEGIN) || !(port->sm_vars & AD_PORT_LACP_ENABLED) || !port->is_enabled) || + (!(port->actor_oper_port_state & AD_STATE_LACP_ACTIVITY) && !(port->partner_oper_port_state & AD_STATE_LACP_ACTIVITY)) + ) { + port->sm_periodic_state = AD_NO_PERIODIC; // next state + } + // check if state machine should change state + else if (port->sm_periodic_timer_counter) { + // check if periodic state machine expired + if (!(--port->sm_periodic_timer_counter)) { + // if expired then do tx + port->sm_periodic_state = AD_PERIODIC_TX; // next state + } else { + // If not expired, check if there is some new timeout parameter from the partner state + switch (port->sm_periodic_state) { + case AD_FAST_PERIODIC: + if (!(port->partner_oper_port_state & AD_STATE_LACP_TIMEOUT)) { + port->sm_periodic_state = AD_SLOW_PERIODIC; // next state + } + break; + case AD_SLOW_PERIODIC: + if ((port->partner_oper_port_state & AD_STATE_LACP_TIMEOUT)) { + // stop current timer + port->sm_periodic_timer_counter = 0; + port->sm_periodic_state = AD_PERIODIC_TX; // next state + } + break; + default: //to silence the compiler + break; + } + } + } else { + switch (port->sm_periodic_state) { + case AD_NO_PERIODIC: + port->sm_periodic_state = AD_FAST_PERIODIC; // next state + break; + case AD_PERIODIC_TX: + if (!(port->partner_oper_port_state & AD_STATE_LACP_TIMEOUT)) { + port->sm_periodic_state = AD_SLOW_PERIODIC; // next state + } else { + port->sm_periodic_state = AD_FAST_PERIODIC; // next state + } + break; + default: //to silence the compiler + break; + } + } + + // check if the state machine was changed + if (port->sm_periodic_state != last_state) { + BOND_PRINT_DBG(("Periodic Machine: Port=%d, Last State=%d, Curr State=%d", port->actor_port_number, last_state, port->sm_periodic_state)); + switch (port->sm_periodic_state) { + case AD_NO_PERIODIC: + port->sm_periodic_timer_counter = 0; // zero timer + break; + case AD_FAST_PERIODIC: + port->sm_periodic_timer_counter = __ad_timer_to_ticks(AD_PERIODIC_TIMER, (u16)(AD_FAST_PERIODIC_TIME))-1; // decrement 1 tick we lost in the PERIODIC_TX cycle + break; + case AD_SLOW_PERIODIC: + port->sm_periodic_timer_counter = __ad_timer_to_ticks(AD_PERIODIC_TIMER, (u16)(AD_SLOW_PERIODIC_TIME))-1; // decrement 1 tick we lost in the PERIODIC_TX cycle + break; + case AD_PERIODIC_TX: + port->ntt = 1; + break; + default: //to silence the compiler + break; + } + } +} + +/** + * ad_port_selection_logic - select aggregation groups + * @port: the port we're looking at + * + * Select aggregation groups, and assign each port for it's aggregetor. The + * selection logic is called in the inititalization (after all the handshkes), + * and after every lacpdu receive (if selected is off). + */ +static void ad_port_selection_logic(struct port *port) +{ + struct aggregator *aggregator, *free_aggregator = NULL, *temp_aggregator; + struct port *last_port = NULL, *curr_port; + int found = 0; + + // if the port is already Selected, do nothing + if (port->sm_vars & AD_PORT_SELECTED) { + return; + } + + // if the port is connected to other aggregator, detach it + if (port->aggregator) { + // detach the port from its former aggregator + temp_aggregator=port->aggregator; + for (curr_port=temp_aggregator->lag_ports; curr_port; last_port=curr_port, curr_port=curr_port->next_port_in_aggregator) { + if (curr_port == port) { + temp_aggregator->num_of_ports--; + if (!last_port) {// if it is the first port attached to the aggregator + temp_aggregator->lag_ports=port->next_port_in_aggregator; + } else {// not the first port attached to the aggregator + last_port->next_port_in_aggregator=port->next_port_in_aggregator; + } + + // clear the port's relations to this aggregator + port->aggregator = NULL; + port->next_port_in_aggregator=NULL; + port->actor_port_aggregator_identifier=0; + + BOND_PRINT_DBG(("Port %d left LAG %d", port->actor_port_number, temp_aggregator->aggregator_identifier)); + // if the aggregator is empty, clear its parameters, and set it ready to be attached + if (!temp_aggregator->lag_ports) { + ad_clear_agg(temp_aggregator); + } + break; + } + } + if (!curr_port) { // meaning: the port was related to an aggregator but was not on the aggregator port list + printk(KERN_WARNING "bonding: Warning: Port %d (on %s) was " + "related to aggregator %d but was not on its port list\n", + port->actor_port_number, port->slave->dev->name, + port->aggregator->aggregator_identifier); + } + } + // search on all aggregators for a suitable aggregator for this port + for (aggregator = __get_first_agg(port); aggregator; + aggregator = __get_next_agg(aggregator)) { + + // keep a free aggregator for later use(if needed) + if (!aggregator->lag_ports) { + if (!free_aggregator) { + free_aggregator=aggregator; + } + continue; + } + // check if current aggregator suits us + if (((aggregator->actor_oper_aggregator_key == port->actor_oper_port_key) && // if all parameters match AND + !MAC_ADDRESS_COMPARE(&(aggregator->partner_system), &(port->partner_oper_system)) && + (aggregator->partner_system_priority == port->partner_oper_system_priority) && + (aggregator->partner_oper_aggregator_key == port->partner_oper_key) + ) && + ((MAC_ADDRESS_COMPARE(&(port->partner_oper_system), &(null_mac_addr)) && // partner answers + !aggregator->is_individual) // but is not individual OR + ) + ) { + // attach to the founded aggregator + port->aggregator = aggregator; + port->actor_port_aggregator_identifier=port->aggregator->aggregator_identifier; + port->next_port_in_aggregator=aggregator->lag_ports; + port->aggregator->num_of_ports++; + aggregator->lag_ports=port; + BOND_PRINT_DBG(("Port %d joined LAG %d(existing LAG)", port->actor_port_number, port->aggregator->aggregator_identifier)); + + // mark this port as selected + port->sm_vars |= AD_PORT_SELECTED; + found = 1; + break; + } + } + + // the port couldn't find an aggregator - attach it to a new aggregator + if (!found) { + if (free_aggregator) { + // assign port a new aggregator + port->aggregator = free_aggregator; + port->actor_port_aggregator_identifier=port->aggregator->aggregator_identifier; + + // update the new aggregator's parameters + // if port was responsed from the end-user + if (port->actor_oper_port_key & AD_DUPLEX_KEY_BITS) {// if port is full duplex + port->aggregator->is_individual = 0; + } else { + port->aggregator->is_individual = 1; + } + + port->aggregator->actor_admin_aggregator_key = port->actor_admin_port_key; + port->aggregator->actor_oper_aggregator_key = port->actor_oper_port_key; + port->aggregator->partner_system=port->partner_oper_system; + port->aggregator->partner_system_priority = port->partner_oper_system_priority; + port->aggregator->partner_oper_aggregator_key = port->partner_oper_key; + port->aggregator->receive_state = 1; + port->aggregator->transmit_state = 1; + port->aggregator->lag_ports = port; + port->aggregator->num_of_ports++; + + // mark this port as selected + port->sm_vars |= AD_PORT_SELECTED; + + BOND_PRINT_DBG(("Port %d joined LAG %d(new LAG)", port->actor_port_number, port->aggregator->aggregator_identifier)); + } else { + printk(KERN_ERR "bonding: Port %d (on %s) did not find a suitable aggregator\n", + port->actor_port_number, port->slave->dev->name); + } + } + // if all aggregator's ports are READY_N == TRUE, set ready=TRUE in all aggregator's ports + // else set ready=FALSE in all aggregator's ports + __set_agg_ports_ready(port->aggregator, __agg_ports_are_ready(port->aggregator)); + + if (!__check_agg_selection_timer(port) && (aggregator = __get_first_agg(port))) { + ad_agg_selection_logic(aggregator); + } +} + +/** + * ad_agg_selection_logic - select an aggregation group for a team + * @aggregator: the aggregator we're looking at + * + * It is assumed that only one aggregator may be selected for a team. + * The logic of this function is to select (at first time) the aggregator with + * the most ports attached to it, and to reselect the active aggregator only if + * the previous aggregator has no more ports related to it. + * + * FIXME: this function MUST be called with the first agg in the bond, or + * __get_active_agg() won't work correctly. This function should be better + * called with the bond itself, and retrieve the first agg from it. + */ +static void ad_agg_selection_logic(struct aggregator *aggregator) +{ + struct aggregator *best_aggregator = NULL, *active_aggregator = NULL; + struct aggregator *last_active_aggregator = NULL, *origin_aggregator; + struct port *port; + u16 num_of_aggs=0; + + origin_aggregator = aggregator; + + //get current active aggregator + last_active_aggregator = __get_active_agg(aggregator); + + // search for the aggregator with the most ports attached to it. + do { + // count how many candidate lag's we have + if (aggregator->lag_ports) { + num_of_aggs++; + } + if (aggregator->is_active && !aggregator->is_individual && // if current aggregator is the active aggregator + MAC_ADDRESS_COMPARE(&(aggregator->partner_system), &(null_mac_addr))) { // and partner answers to 802.3ad PDUs + if (aggregator->num_of_ports) { // if any ports attached to the current aggregator + best_aggregator=NULL; // disregard the best aggregator that was chosen by now + break; // stop the selection of other aggregator if there are any ports attached to this active aggregator + } else { // no ports attached to this active aggregator + aggregator->is_active = 0; // mark this aggregator as not active anymore + } + } + if (aggregator->num_of_ports) { // if any ports attached + if (best_aggregator) { // if there is a candidte aggregator + //The reasons for choosing new best aggregator: + // 1. if current agg is NOT individual and the best agg chosen so far is individual OR + // current and best aggs are both individual or both not individual, AND + // 2a. current agg partner reply but best agg partner do not reply OR + // 2b. current agg partner reply OR current agg partner do not reply AND best agg partner also do not reply AND + // current has more ports/bandwidth, or same amount of ports but current has faster ports, THEN + // current agg become best agg so far + + //if current agg is NOT individual and the best agg chosen so far is individual change best_aggregator + if (!aggregator->is_individual && best_aggregator->is_individual) { + best_aggregator=aggregator; + } + // current and best aggs are both individual or both not individual + else if ((aggregator->is_individual && best_aggregator->is_individual) || + (!aggregator->is_individual && !best_aggregator->is_individual)) { + // current and best aggs are both individual or both not individual AND + // current agg partner reply but best agg partner do not reply + if ((MAC_ADDRESS_COMPARE(&(aggregator->partner_system), &(null_mac_addr)) && + !MAC_ADDRESS_COMPARE(&(best_aggregator->partner_system), &(null_mac_addr)))) { + best_aggregator=aggregator; + } + // current agg partner reply OR current agg partner do not reply AND best agg partner also do not reply + else if (! (!MAC_ADDRESS_COMPARE(&(aggregator->partner_system), &(null_mac_addr)) && + MAC_ADDRESS_COMPARE(&(best_aggregator->partner_system), &(null_mac_addr)))) { + if ((__get_agg_selection_mode(aggregator->lag_ports) == AD_BANDWIDTH)&& + (__get_agg_bandwidth(aggregator) > __get_agg_bandwidth(best_aggregator))) { + best_aggregator=aggregator; + } else if (__get_agg_selection_mode(aggregator->lag_ports) == AD_COUNT) { + if (((aggregator->num_of_ports > best_aggregator->num_of_ports) && + (aggregator->actor_oper_aggregator_key & AD_SPEED_KEY_BITS))|| + ((aggregator->num_of_ports == best_aggregator->num_of_ports) && + ((u16)(aggregator->actor_oper_aggregator_key & AD_SPEED_KEY_BITS) > + (u16)(best_aggregator->actor_oper_aggregator_key & AD_SPEED_KEY_BITS)))) { + best_aggregator=aggregator; + } + } + } + } + } else { + best_aggregator=aggregator; + } + } + aggregator->is_active = 0; // mark all aggregators as not active anymore + } while ((aggregator = __get_next_agg(aggregator))); + + // if we have new aggregator selected, don't replace the old aggregator if it has an answering partner, + // or if both old aggregator and new aggregator don't have answering partner + if (best_aggregator) { + if (last_active_aggregator && last_active_aggregator->lag_ports && last_active_aggregator->lag_ports->is_enabled && + (MAC_ADDRESS_COMPARE(&(last_active_aggregator->partner_system), &(null_mac_addr)) || // partner answers OR + (!MAC_ADDRESS_COMPARE(&(last_active_aggregator->partner_system), &(null_mac_addr)) && // both old and new + !MAC_ADDRESS_COMPARE(&(best_aggregator->partner_system), &(null_mac_addr)))) // partner do not answer + ) { + // if new aggregator has link, and old aggregator does not, replace old aggregator.(do nothing) + // -> don't replace otherwise. + if (!(!last_active_aggregator->actor_oper_aggregator_key && best_aggregator->actor_oper_aggregator_key)) { + best_aggregator=NULL; + last_active_aggregator->is_active = 1; // don't replace good old aggregator + + } + } + } + + // if there is new best aggregator, activate it + if (best_aggregator) { + for (aggregator = __get_first_agg(best_aggregator->lag_ports); + aggregator; + aggregator = __get_next_agg(aggregator)) { + + BOND_PRINT_DBG(("Agg=%d; Ports=%d; a key=%d; p key=%d; Indiv=%d; Active=%d", + aggregator->aggregator_identifier, aggregator->num_of_ports, + aggregator->actor_oper_aggregator_key, aggregator->partner_oper_aggregator_key, + aggregator->is_individual, aggregator->is_active)); + } + + // check if any partner replys + if (best_aggregator->is_individual) { + printk(KERN_WARNING "bonding: Warning: No 802.3ad response from the link partner " + "for any adapters in the bond\n"); + } + + // check if there are more than one aggregator + if (num_of_aggs > 1) { + BOND_PRINT_DBG(("Warning: More than one Link Aggregation Group was " + "found in the bond. Only one group will function in the bond")); + } + + best_aggregator->is_active = 1; + BOND_PRINT_DBG(("LAG %d choosed as the active LAG", best_aggregator->aggregator_identifier)); + BOND_PRINT_DBG(("Agg=%d; Ports=%d; a key=%d; p key=%d; Indiv=%d; Active=%d", + best_aggregator->aggregator_identifier, best_aggregator->num_of_ports, + best_aggregator->actor_oper_aggregator_key, best_aggregator->partner_oper_aggregator_key, + best_aggregator->is_individual, best_aggregator->is_active)); + + // disable the ports that were related to the former active_aggregator + if (last_active_aggregator) { + for (port=last_active_aggregator->lag_ports; port; port=port->next_port_in_aggregator) { + __disable_port(port); + } + } + } + + // if the selected aggregator is of join individuals(partner_system is NULL), enable their ports + active_aggregator = __get_active_agg(origin_aggregator); + + if (active_aggregator) { + if (!MAC_ADDRESS_COMPARE(&(active_aggregator->partner_system), &(null_mac_addr))) { + for (port=active_aggregator->lag_ports; port; port=port->next_port_in_aggregator) { + __enable_port(port); + } + } + } +} + +/** + * ad_clear_agg - clear a given aggregator's parameters + * @aggregator: the aggregator we're looking at + * + */ +static void ad_clear_agg(struct aggregator *aggregator) +{ + if (aggregator) { + aggregator->is_individual = 0; + aggregator->actor_admin_aggregator_key = 0; + aggregator->actor_oper_aggregator_key = 0; + aggregator->partner_system = null_mac_addr; + aggregator->partner_system_priority = 0; + aggregator->partner_oper_aggregator_key = 0; + aggregator->receive_state = 0; + aggregator->transmit_state = 0; + aggregator->lag_ports = NULL; + aggregator->is_active = 0; + aggregator->num_of_ports = 0; + BOND_PRINT_DBG(("LAG %d was cleared", aggregator->aggregator_identifier)); + } +} + +/** + * ad_initialize_agg - initialize a given aggregator's parameters + * @aggregator: the aggregator we're looking at + * + */ +static void ad_initialize_agg(struct aggregator *aggregator) +{ + if (aggregator) { + ad_clear_agg(aggregator); + + aggregator->aggregator_mac_address = null_mac_addr; + aggregator->aggregator_identifier = 0; + aggregator->slave = NULL; + } +} + +/** + * ad_initialize_port - initialize a given port's parameters + * @aggregator: the aggregator we're looking at + * @lacp_fast: boolean. whether fast periodic should be used + * + */ +static void ad_initialize_port(struct port *port, int lacp_fast) +{ + if (port) { + port->actor_port_number = 1; + port->actor_port_priority = 0xff; + port->actor_system = null_mac_addr; + port->actor_system_priority = 0xffff; + port->actor_port_aggregator_identifier = 0; + port->ntt = 0; + port->actor_admin_port_key = 1; + port->actor_oper_port_key = 1; + port->actor_admin_port_state = AD_STATE_AGGREGATION | AD_STATE_LACP_ACTIVITY; + port->actor_oper_port_state = AD_STATE_AGGREGATION | AD_STATE_LACP_ACTIVITY; + + if (lacp_fast) { + port->actor_oper_port_state |= AD_STATE_LACP_TIMEOUT; + } + + port->partner_admin_system = null_mac_addr; + port->partner_oper_system = null_mac_addr; + port->partner_admin_system_priority = 0xffff; + port->partner_oper_system_priority = 0xffff; + port->partner_admin_key = 1; + port->partner_oper_key = 1; + port->partner_admin_port_number = 1; + port->partner_oper_port_number = 1; + port->partner_admin_port_priority = 0xff; + port->partner_oper_port_priority = 0xff; + port->partner_admin_port_state = 1; + port->partner_oper_port_state = 1; + port->is_enabled = 1; + // ****** private parameters ****** + port->sm_vars = 0x3; + port->sm_rx_state = 0; + port->sm_rx_timer_counter = 0; + port->sm_periodic_state = 0; + port->sm_periodic_timer_counter = 0; + port->sm_mux_state = 0; + port->sm_mux_timer_counter = 0; + port->sm_tx_state = 0; + port->sm_tx_timer_counter = 0; + port->slave = NULL; + port->aggregator = NULL; + port->next_port_in_aggregator = NULL; + port->transaction_id = 0; + + ad_initialize_lacpdu(&(port->lacpdu)); + } +} + +/** + * ad_enable_collecting_distributing - enable a port's transmit/receive + * @port: the port we're looking at + * + * Enable @port if it's in an active aggregator + */ +static void ad_enable_collecting_distributing(struct port *port) +{ + if (port->aggregator->is_active) { + BOND_PRINT_DBG(("Enabling port %d(LAG %d)", port->actor_port_number, port->aggregator->aggregator_identifier)); + __enable_port(port); + } +} + +/** + * ad_disable_collecting_distributing - disable a port's transmit/receive + * @port: the port we're looking at + * + */ +static void ad_disable_collecting_distributing(struct port *port) +{ + if (port->aggregator && MAC_ADDRESS_COMPARE(&(port->aggregator->partner_system), &(null_mac_addr))) { + BOND_PRINT_DBG(("Disabling port %d(LAG %d)", port->actor_port_number, port->aggregator->aggregator_identifier)); + __disable_port(port); + } +} + +#if 0 +/** + * ad_marker_info_send - send a marker information frame + * @port: the port we're looking at + * + * This function does nothing since we decided not to implement send and handle + * response for marker PDU's, in this stage, but only to respond to marker + * information. + */ +static void ad_marker_info_send(struct port *port) +{ + struct marker marker; + u16 index; + + // fill the marker PDU with the appropriate values + marker.subtype = 0x02; + marker.version_number = 0x01; + marker.tlv_type = AD_MARKER_INFORMATION_SUBTYPE; + marker.marker_length = 0x16; + // convert requester_port to Big Endian + marker.requester_port = (((port->actor_port_number & 0xFF) << 8) |((u16)(port->actor_port_number & 0xFF00) >> 8)); + marker.requester_system = port->actor_system; + // convert requester_port(u32) to Big Endian + marker.requester_transaction_id = (((++port->transaction_id & 0xFF) << 24) |((port->transaction_id & 0xFF00) << 8) |((port->transaction_id & 0xFF0000) >> 8) |((port->transaction_id & 0xFF000000) >> 24)); + marker.pad = 0; + marker.tlv_type_terminator = 0x00; + marker.terminator_length = 0x00; + for (index=0; index<90; index++) { + marker.reserved_90[index]=0; + } + + // send the marker information + if (ad_marker_send(port, &marker) >= 0) { + BOND_PRINT_DBG(("Sent Marker Information on port %d", port->actor_port_number)); + } +} +#endif + +/** + * ad_marker_info_received - handle receive of a Marker information frame + * @marker_info: Marker info received + * @port: the port we're looking at + * + */ +static void ad_marker_info_received(struct marker *marker_info,struct port *port) +{ + struct marker marker; + + // copy the received marker data to the response marker + //marker = *marker_info; + memcpy(&marker, marker_info, sizeof(struct marker)); + // change the marker subtype to marker response + marker.tlv_type=AD_MARKER_RESPONSE_SUBTYPE; + // send the marker response + + if (ad_marker_send(port, &marker) >= 0) { + BOND_PRINT_DBG(("Sent Marker Response on port %d", port->actor_port_number)); + } +} + +/** + * ad_marker_response_received - handle receive of a marker response frame + * @marker: marker PDU received + * @port: the port we're looking at + * + * This function does nothing since we decided not to implement send and handle + * response for marker PDU's, in this stage, but only to respond to marker + * information. + */ +static void ad_marker_response_received(struct marker *marker, struct port *port) +{ + marker=NULL; // just to satisfy the compiler + port=NULL; // just to satisfy the compiler + // DO NOTHING, SINCE WE DECIDED NOT TO IMPLEMENT THIS FEATURE FOR NOW +} + +/** + * ad_initialize_lacpdu - initialize a given lacpdu structure + * @lacpdu: lacpdu structure to initialize + * + */ +static void ad_initialize_lacpdu(struct lacpdu *lacpdu) +{ + u16 index; + + // initialize lacpdu data + lacpdu->subtype = 0x01; + lacpdu->version_number = 0x01; + lacpdu->tlv_type_actor_info = 0x01; + lacpdu->actor_information_length = 0x14; + // lacpdu->actor_system_priority updated on send + // lacpdu->actor_system updated on send + // lacpdu->actor_key updated on send + // lacpdu->actor_port_priority updated on send + // lacpdu->actor_port updated on send + // lacpdu->actor_state updated on send + lacpdu->tlv_type_partner_info = 0x02; + lacpdu->partner_information_length = 0x14; + for (index=0; index<=2; index++) { + lacpdu->reserved_3_1[index]=0; + } + // lacpdu->partner_system_priority updated on send + // lacpdu->partner_system updated on send + // lacpdu->partner_key updated on send + // lacpdu->partner_port_priority updated on send + // lacpdu->partner_port updated on send + // lacpdu->partner_state updated on send + for (index=0; index<=2; index++) { + lacpdu->reserved_3_2[index]=0; + } + lacpdu->tlv_type_collector_info = 0x03; + lacpdu->collector_information_length= 0x10; + lacpdu->collector_max_delay = AD_COLLECTOR_MAX_DELAY; + for (index=0; index<=11; index++) { + lacpdu->reserved_12[index]=0; + } + lacpdu->tlv_type_terminator = 0x00; + lacpdu->terminator_length = 0; + for (index=0; index<=49; index++) { + lacpdu->reserved_50[index]=0; + } +} + +////////////////////////////////////////////////////////////////////////////////////// +// ================= AD exported functions to the main bonding code ================== +////////////////////////////////////////////////////////////////////////////////////// + +// Check aggregators status in team every T seconds +#define AD_AGGREGATOR_SELECTION_TIMER 8 + +static u16 aggregator_identifier; + +/** + * bond_3ad_initialize - initialize a bond's 802.3ad parameters and structures + * @bond: bonding struct to work on + * @tick_resolution: tick duration (millisecond resolution) + * @lacp_fast: boolean. whether fast periodic should be used + * + * Can be called only after the mac address of the bond is set. + */ +void bond_3ad_initialize(struct bonding *bond, u16 tick_resolution, int lacp_fast) +{ + // check that the bond is not initialized yet + if (MAC_ADDRESS_COMPARE(&(BOND_AD_INFO(bond).system.sys_mac_addr), &(bond->device->dev_addr))) { + + aggregator_identifier = 0; + + BOND_AD_INFO(bond).lacp_fast = lacp_fast; + BOND_AD_INFO(bond).system.sys_priority = 0xFFFF; + BOND_AD_INFO(bond).system.sys_mac_addr = *((struct mac_addr *)bond->device->dev_addr); + + // initialize how many times this module is called in one second(should be about every 100ms) + ad_ticks_per_sec = tick_resolution; + + // initialize the aggregator selection timer(to activate an aggregation selection after initialize) + BOND_AD_INFO(bond).agg_select_timer = (AD_AGGREGATOR_SELECTION_TIMER * ad_ticks_per_sec); + BOND_AD_INFO(bond).agg_select_mode = AD_BANDWIDTH; + } +} + +/** + * bond_3ad_bind_slave - initialize a slave's port + * @slave: slave struct to work on + * + * Returns: 0 on success + * < 0 on error + */ +int bond_3ad_bind_slave(struct slave *slave) +{ + struct bonding *bond = bond_get_bond_by_slave(slave); + struct port *port; + struct aggregator *aggregator; + + if (bond == NULL) { + printk(KERN_CRIT "The slave %s is not attached to its bond\n", slave->dev->name); + return -1; + } + + //check that the slave has not been intialized yet. + if (SLAVE_AD_INFO(slave).port.slave != slave) { + + // port initialization + port = &(SLAVE_AD_INFO(slave).port); + + ad_initialize_port(port, BOND_AD_INFO(bond).lacp_fast); + + port->slave = slave; + port->actor_port_number = SLAVE_AD_INFO(slave).id; + // key is determined according to the link speed, duplex and user key(which is yet not supported) + // ------------------------------------------------------------ + // Port key : | User key | Speed |Duplex| + // ------------------------------------------------------------ + // 16 6 1 0 + port->actor_admin_port_key = 0; // initialize this parameter + port->actor_admin_port_key |= __get_duplex(port); + port->actor_admin_port_key |= (__get_link_speed(port) << 1); + port->actor_oper_port_key = port->actor_admin_port_key; + // if the port is not full duplex, then the port should be not lacp Enabled + if (!(port->actor_oper_port_key & AD_DUPLEX_KEY_BITS)) { + port->sm_vars &= ~AD_PORT_LACP_ENABLED; + } + // actor system is the bond's system + port->actor_system = BOND_AD_INFO(bond).system.sys_mac_addr; + // tx timer(to verify that no more than MAX_TX_IN_SECOND lacpdu's are sent in one second) + port->sm_tx_timer_counter = ad_ticks_per_sec/AD_MAX_TX_IN_SECOND; + port->aggregator = NULL; + port->next_port_in_aggregator = NULL; + + __disable_port(port); + __initialize_port_locks(port); + + + // aggregator initialization + aggregator = &(SLAVE_AD_INFO(slave).aggregator); + + ad_initialize_agg(aggregator); + + aggregator->aggregator_mac_address = *((struct mac_addr *)bond->device->dev_addr); + aggregator->aggregator_identifier = (++aggregator_identifier); + aggregator->slave = slave; + aggregator->is_active = 0; + aggregator->num_of_ports = 0; + } + + return 0; +} + +/** + * bond_3ad_unbind_slave - deinitialize a slave's port + * @slave: slave struct to work on + * + * Search for the aggregator that is related to this port, remove the + * aggregator and assign another aggregator for other port related to it + * (if any), and remove the port. + */ +void bond_3ad_unbind_slave(struct slave *slave) +{ + struct port *port, *prev_port, *temp_port; + struct aggregator *aggregator, *new_aggregator, *temp_aggregator; + int select_new_active_agg = 0; + + // find the aggregator related to this slave + aggregator = &(SLAVE_AD_INFO(slave).aggregator); + + // find the port related to this slave + port = &(SLAVE_AD_INFO(slave).port); + + // if slave is null, the whole port is not initialized + if (!port->slave) { + printk(KERN_WARNING "bonding: Trying to unbind an uninitialized port on %s\n", slave->dev->name); + return; + } + + // disable the port + ad_disable_collecting_distributing(port); + + // deinitialize port's locks if necessary(os-specific) + __deinitialize_port_locks(port); + + BOND_PRINT_DBG(("Unbinding Link Aggregation Group %d", aggregator->aggregator_identifier)); + // check if this aggregator is occupied + if (aggregator->lag_ports) { + // check if there are other ports related to this aggregator except + // the port related to this slave(thats ensure us that there is a + // reason to search for new aggregator, and that we will find one + if ((aggregator->lag_ports != port) || (aggregator->lag_ports->next_port_in_aggregator)) { + // find new aggregator for the related port(s) + new_aggregator = __get_first_agg(port); + for (; new_aggregator; new_aggregator = __get_next_agg(new_aggregator)) { + // if the new aggregator is empty, or it connected to to our port only + if (!new_aggregator->lag_ports || ((new_aggregator->lag_ports == port) && !new_aggregator->lag_ports->next_port_in_aggregator)) { + break; + } + } + // if new aggregator found, copy the aggregator's parameters + // and connect the related lag_ports to the new aggregator + if ((new_aggregator) && ((!new_aggregator->lag_ports) || ((new_aggregator->lag_ports == port) && !new_aggregator->lag_ports->next_port_in_aggregator))) { + BOND_PRINT_DBG(("Some port(s) related to LAG %d - replaceing with LAG %d", aggregator->aggregator_identifier, new_aggregator->aggregator_identifier)); + + if ((new_aggregator->lag_ports == port) && new_aggregator->is_active) { + printk(KERN_INFO "bonding: Removing an active aggregator\n"); + // select new active aggregator + select_new_active_agg = 1; + } + + new_aggregator->is_individual = aggregator->is_individual; + new_aggregator->actor_admin_aggregator_key = aggregator->actor_admin_aggregator_key; + new_aggregator->actor_oper_aggregator_key = aggregator->actor_oper_aggregator_key; + new_aggregator->partner_system = aggregator->partner_system; + new_aggregator->partner_system_priority = aggregator->partner_system_priority; + new_aggregator->partner_oper_aggregator_key = aggregator->partner_oper_aggregator_key; + new_aggregator->receive_state = aggregator->receive_state; + new_aggregator->transmit_state = aggregator->transmit_state; + new_aggregator->lag_ports = aggregator->lag_ports; + new_aggregator->is_active = aggregator->is_active; + new_aggregator->num_of_ports = aggregator->num_of_ports; + + // update the information that is written on the ports about the aggregator + for (temp_port=aggregator->lag_ports; temp_port; temp_port=temp_port->next_port_in_aggregator) { + temp_port->aggregator=new_aggregator; + temp_port->actor_port_aggregator_identifier = new_aggregator->aggregator_identifier; + } + + // clear the aggregator + ad_clear_agg(aggregator); + + if (select_new_active_agg) { + ad_agg_selection_logic(__get_first_agg(port)); + } + } else { + printk(KERN_WARNING "bonding: Warning: unbinding aggregator, " + "and could not find a new aggregator for its ports\n"); + } + } else { // in case that the only port related to this aggregator is the one we want to remove + select_new_active_agg = aggregator->is_active; + // clear the aggregator + ad_clear_agg(aggregator); + if (select_new_active_agg) { + printk(KERN_INFO "Removing an active aggregator\n"); + // select new active aggregator + ad_agg_selection_logic(__get_first_agg(port)); + } + } + } + + BOND_PRINT_DBG(("Unbinding port %d", port->actor_port_number)); + // find the aggregator that this port is connected to + temp_aggregator = __get_first_agg(port); + for (; temp_aggregator; temp_aggregator = __get_next_agg(temp_aggregator)) { + prev_port = NULL; + // search the port in the aggregator's related ports + for (temp_port=temp_aggregator->lag_ports; temp_port; prev_port=temp_port, temp_port=temp_port->next_port_in_aggregator) { + if (temp_port == port) { // the aggregator found - detach the port from this aggregator + if (prev_port) { + prev_port->next_port_in_aggregator = temp_port->next_port_in_aggregator; + } else { + temp_aggregator->lag_ports = temp_port->next_port_in_aggregator; + } + temp_aggregator->num_of_ports--; + if (temp_aggregator->num_of_ports==0) { + select_new_active_agg = temp_aggregator->is_active; + // clear the aggregator + ad_clear_agg(temp_aggregator); + if (select_new_active_agg) { + printk(KERN_INFO "Removing an active aggregator\n"); + // select new active aggregator + ad_agg_selection_logic(__get_first_agg(port)); + } + } + break; + } + } + } + port->slave=NULL; +} + +/** + * bond_3ad_state_machine_handler - handle state machines timeout + * @bond: bonding struct to work on + * + * The state machine handling concept in this module is to check every tick + * which state machine should operate any function. The execution order is + * round robin, so when we have an interaction between state machines, the + * reply of one to each other might be delayed until next tick. + * + * This function also complete the initialization when the agg_select_timer + * times out, and it selects an aggregator for the ports that are yet not + * related to any aggregator, and selects the active aggregator for a bond. + */ +void bond_3ad_state_machine_handler(struct bonding *bond) +{ + struct port *port; + struct aggregator *aggregator; + + read_lock(&bond->lock); + + //check if there are any slaves + if (bond->next == (struct slave *)bond) { + goto end; + } + + if ((bond->device->flags & IFF_UP) != IFF_UP) { + goto end; + } + + // check if agg_select_timer timer after initialize is timed out + if (BOND_AD_INFO(bond).agg_select_timer && !(--BOND_AD_INFO(bond).agg_select_timer)) { + // select the active aggregator for the bond + if ((port = __get_first_port(bond))) { + if (!port->slave) { + printk(KERN_WARNING "bonding: Warning: bond's first port is uninitialized\n"); + goto end; + } + + aggregator = __get_first_agg(port); + ad_agg_selection_logic(aggregator); + } + } + + // for each port run the state machines + for (port = __get_first_port(bond); port; port = __get_next_port(port)) { + if (!port->slave) { + printk(KERN_WARNING "bonding: Warning: Found an uninitialized port\n"); + goto end; + } + + ad_rx_machine(NULL, port); + ad_periodic_machine(port); + ad_port_selection_logic(port); + ad_mux_machine(port); + ad_tx_machine(port); + + // turn off the BEGIN bit, since we already handled it + if (port->sm_vars & AD_PORT_BEGIN) { + port->sm_vars &= ~AD_PORT_BEGIN; + } + } + +end: + read_unlock(&bond->lock); + + + if ((bond->device->flags & IFF_UP) == IFF_UP) { + /* re-arm the timer */ + mod_timer(&(BOND_AD_INFO(bond).ad_timer), jiffies + (AD_TIMER_INTERVAL * HZ / 1000)); + } +} + +/** + * bond_3ad_rx_indication - handle a received frame + * @lacpdu: received lacpdu + * @slave: slave struct to work on + * @length: length of the data received + * + * It is assumed that frames that were sent on this NIC don't returned as new + * received frames (loopback). Since only the payload is given to this + * function, it check for loopback. + */ +void bond_3ad_rx_indication(struct lacpdu *lacpdu, struct slave *slave, u16 length) +{ + struct port *port; + + if (length >= sizeof(struct lacpdu)) { + + port = &(SLAVE_AD_INFO(slave).port); + + if (!port->slave) { + printk(KERN_WARNING "bonding: Warning: port of slave %s is uninitialized\n", slave->dev->name); + return; + } + + switch (lacpdu->subtype) { + case AD_TYPE_LACPDU: + __ntohs_lacpdu(lacpdu); + BOND_PRINT_DBG(("Received LACPDU on port %d", port->actor_port_number)); + ad_rx_machine(lacpdu, port); + break; + + case AD_TYPE_MARKER: + // No need to convert fields to Little Endian since we don't use the marker's fields. + + switch (((struct marker *)lacpdu)->tlv_type) { + case AD_MARKER_INFORMATION_SUBTYPE: + BOND_PRINT_DBG(("Received Marker Information on port %d", port->actor_port_number)); + ad_marker_info_received((struct marker *)lacpdu, port); + break; + + case AD_MARKER_RESPONSE_SUBTYPE: + BOND_PRINT_DBG(("Received Marker Response on port %d", port->actor_port_number)); + ad_marker_response_received((struct marker *)lacpdu, port); + break; + + default: + BOND_PRINT_DBG(("Received an unknown Marker subtype on slot %d", port->actor_port_number)); + } + } + } +} + +/** + * bond_3ad_adapter_speed_changed - handle a slave's speed change indication + * @slave: slave struct to work on + * + * Handle reselection of aggregator (if needed) for this port. + */ +void bond_3ad_adapter_speed_changed(struct slave *slave) +{ + struct port *port; + + port = &(SLAVE_AD_INFO(slave).port); + + // if slave is null, the whole port is not initialized + if (!port->slave) { + printk(KERN_WARNING "bonding: Warning: speed changed for uninitialized port on %s\n", + slave->dev->name); + return; + } + + port->actor_admin_port_key &= ~AD_SPEED_KEY_BITS; + port->actor_oper_port_key=port->actor_admin_port_key |= (__get_link_speed(port) << 1); + BOND_PRINT_DBG(("Port %d changed speed", port->actor_port_number)); + // there is no need to reselect a new aggregator, just signal the + // state machines to reinitialize + port->sm_vars |= AD_PORT_BEGIN; +} + +/** + * bond_3ad_adapter_duplex_changed - handle a slave's duplex change indication + * @slave: slave struct to work on + * + * Handle reselection of aggregator (if needed) for this port. + */ +void bond_3ad_adapter_duplex_changed(struct slave *slave) +{ + struct port *port; + + port=&(SLAVE_AD_INFO(slave).port); + + // if slave is null, the whole port is not initialized + if (!port->slave) { + printk(KERN_WARNING "bonding: Warning: duplex changed for uninitialized port on %s\n", + slave->dev->name); + return; + } + + port->actor_admin_port_key &= ~AD_DUPLEX_KEY_BITS; + port->actor_oper_port_key=port->actor_admin_port_key |= __get_duplex(port); + BOND_PRINT_DBG(("Port %d changed duplex", port->actor_port_number)); + // there is no need to reselect a new aggregator, just signal the + // state machines to reinitialize + port->sm_vars |= AD_PORT_BEGIN; +} + +/** + * bond_3ad_handle_link_change - handle a slave's link status change indication + * @slave: slave struct to work on + * @status: whether the link is now up or down + * + * Handle reselection of aggregator (if needed) for this port. + */ +void bond_3ad_handle_link_change(struct slave *slave, char link) +{ + struct port *port; + + port = &(SLAVE_AD_INFO(slave).port); + + // if slave is null, the whole port is not initialized + if (!port->slave) { +#ifdef BONDING_DEBUG + printk(KERN_WARNING "bonding: Warning: link status changed for uninitialized port on %s\n", + slave->dev->name); +#endif + return; + } + + // on link down we are zeroing duplex and speed since some of the adaptors(ce1000.lan) report full duplex/speed instead of N/A(duplex) / 0(speed) + // on link up we are forcing recheck on the duplex and speed since some of he adaptors(ce1000.lan) report + if (link == BOND_LINK_UP) { + port->is_enabled = 1; + port->actor_admin_port_key &= ~AD_DUPLEX_KEY_BITS; + port->actor_oper_port_key=port->actor_admin_port_key |= __get_duplex(port); + port->actor_admin_port_key &= ~AD_SPEED_KEY_BITS; + port->actor_oper_port_key=port->actor_admin_port_key |= (__get_link_speed(port) << 1); + } else { + /* link has failed */ + port->is_enabled = 0; + port->actor_admin_port_key &= ~AD_DUPLEX_KEY_BITS; + port->actor_oper_port_key= (port->actor_admin_port_key &= ~AD_SPEED_KEY_BITS); + } + //BOND_PRINT_DBG(("Port %d changed link status to %s", port->actor_port_number, ((link == BOND_LINK_UP)?"UP":"DOWN"))); + // there is no need to reselect a new aggregator, just signal the + // state machines to reinitialize + port->sm_vars |= AD_PORT_BEGIN; +} + +/** + * bond_3ad_get_active_agg_info - get information of the active aggregator + * @bond: bonding struct to work on + * @ad_info: ad_info struct to fill with the bond's info + * + * Returns: 0 on success + * < 0 on error + */ +int bond_3ad_get_active_agg_info(struct bonding *bond, struct ad_info *ad_info) +{ + struct aggregator *aggregator = NULL; + struct port *port; + + for (port = __get_first_port(bond); port; port = __get_next_port(port)) { + if (port->aggregator && port->aggregator->is_active) { + aggregator = port->aggregator; + break; + } + } + + if (aggregator) { + ad_info->aggregator_id = aggregator->aggregator_identifier; + ad_info->ports = aggregator->num_of_ports; + ad_info->actor_key = aggregator->actor_oper_aggregator_key; + ad_info->partner_key = aggregator->partner_oper_aggregator_key; + memcpy(ad_info->partner_system, aggregator->partner_system.mac_addr_value, ETH_ALEN); + return 0; + } + + return -1; +} + +int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev) +{ + slave_t *slave, *start_at; + struct bonding *bond = (struct bonding *) dev->priv; + struct ethhdr *data = (struct ethhdr *)skb->data; + int slave_agg_no; + int slaves_in_agg; + int agg_id; + struct ad_info ad_info; + + if (!IS_UP(dev)) { /* bond down */ + dev_kfree_skb(skb); + return 0; + } + + if (bond == NULL) { + printk(KERN_CRIT "bonding: Error: bond is NULL on device %s\n", dev->name); + dev_kfree_skb(skb); + return 0; + } + + read_lock(&bond->lock); + slave = bond->prev; + + /* check if bond is empty */ + if ((slave == (struct slave *) bond) || (bond->slave_cnt == 0)) { + printk(KERN_DEBUG "ERROR: bond is empty\n"); + dev_kfree_skb(skb); + read_unlock(&bond->lock); + return 0; + } + + if (bond_3ad_get_active_agg_info(bond, &ad_info)) { + printk(KERN_DEBUG "ERROR: bond_3ad_get_active_agg_info failed\n"); + dev_kfree_skb(skb); + read_unlock(&bond->lock); + return 0; + } + + slaves_in_agg = ad_info.ports; + agg_id = ad_info.aggregator_id; + + if (slaves_in_agg == 0) { + /*the aggregator is empty*/ + printk(KERN_DEBUG "ERROR: active aggregator is empty\n"); + dev_kfree_skb(skb); + read_unlock(&bond->lock); + return 0; + } + + /* we're at the root, get the first slave */ + if ((slave == NULL) || (slave->dev == NULL)) { + /* no suitable interface, frame not sent */ + dev_kfree_skb(skb); + read_unlock(&bond->lock); + return 0; + } + + slave_agg_no = (data->h_dest[5]^slave->dev->dev_addr[5]) % slaves_in_agg; + while (slave != (slave_t *)bond) { + struct aggregator *agg = SLAVE_AD_INFO(slave).port.aggregator; + + if (agg && (agg->aggregator_identifier == agg_id)) { + slave_agg_no--; + if (slave_agg_no < 0) { + break; + } + } + + slave = slave->prev; + if (slave == NULL) { + printk(KERN_ERR "bonding: Error: slave is NULL\n"); + dev_kfree_skb(skb); + read_unlock(&bond->lock); + return 0; + } + } + + if (slave == (slave_t *)bond) { + printk(KERN_ERR "bonding: Error: Couldn't find a slave to tx on for aggregator ID %d\n", agg_id); + dev_kfree_skb(skb); + read_unlock(&bond->lock); + return 0; + } + + start_at = slave; + + do { + int slave_agg_id = 0; + struct aggregator *agg; + + if (slave == NULL) { + printk(KERN_ERR "bonding: Error: slave is NULL\n"); + dev_kfree_skb(skb); + read_unlock(&bond->lock); + return 0; + } + + agg = SLAVE_AD_INFO(slave).port.aggregator; + + if (agg) { + slave_agg_id = agg->aggregator_identifier; + } + + if (SLAVE_IS_OK(slave) && + agg && (slave_agg_id == agg_id)) { + skb->dev = slave->dev; + skb->priority = 1; + dev_queue_xmit(skb); + read_unlock(&bond->lock); + return 0; + } + } while ((slave = slave->next) != start_at); + + /* no suitable interface, frame not sent */ + dev_kfree_skb(skb); + read_unlock(&bond->lock); + return 0; +} + +int bond_3ad_lacpdu_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type* ptype) +{ + struct bonding *bond = (struct bonding *)dev->priv; + struct slave *slave = NULL; + int ret = NET_RX_DROP; + + if (!(dev->flags & IFF_MASTER)) { + goto out; + } + + read_lock(&bond->lock); + slave = bond_get_slave_by_dev((struct bonding *)dev->priv, + skb->real_dev); + if (slave == NULL) { + goto out_unlock; + } + + bond_3ad_rx_indication((struct lacpdu *) skb->data, slave, skb->len); + + ret = NET_RX_SUCCESS; + +out_unlock: + read_unlock(&bond->lock); +out: + dev_kfree_skb(skb); + + return ret; +} + diff --git a/drivers/net/bonding/bond_3ad.h b/drivers/net/bonding/bond_3ad.h new file mode 100644 index 000000000000..58dde6673193 --- /dev/null +++ b/drivers/net/bonding/bond_3ad.h @@ -0,0 +1,298 @@ +/* + * Copyright(c) 1999 - 2003 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * The full GNU General Public License is included in this distribution in the + * file called LICENSE. + * + * + * Changes: + * + * 2003/05/01 - Tsippy Mendelson <tsippy.mendelson at intel dot com> and + * Amir Noam <amir.noam at intel dot com> + * - Added support for lacp_rate module param. + * + * 2003/05/01 - Shmulik Hen <shmulik.hen at intel dot com> + * - Renamed bond_3ad_link_status_changed() to + * bond_3ad_handle_link_change() for compatibility with TLB. + */ + +#ifndef __BOND_3AD_H__ +#define __BOND_3AD_H__ + +#include <asm/byteorder.h> +#include <linux/skbuff.h> +#include <linux/netdevice.h> + +// General definitions +#define BOND_ETH_P_LACPDU 0x8809 +#define PKT_TYPE_LACPDU __constant_htons(BOND_ETH_P_LACPDU) +#define AD_TIMER_INTERVAL 100 /*msec*/ + +#define MULTICAST_LACPDU_ADDR {0x01, 0x80, 0xC2, 0x00, 0x00, 0x02} +#define AD_MULTICAST_LACPDU_ADDR {MULTICAST_LACPDU_ADDR} + +#define AD_LACP_SLOW 0 +#define AD_LACP_FAST 1 + +typedef struct mac_addr { + u8 mac_addr_value[ETH_ALEN]; +} mac_addr_t; + +typedef enum { + AD_BANDWIDTH = 0, + AD_COUNT +} agg_selection_t; + +// rx machine states(43.4.11 in the 802.3ad standard) +typedef enum { + AD_RX_DUMMY, + AD_RX_INITIALIZE, // rx Machine + AD_RX_PORT_DISABLED, // rx Machine + AD_RX_LACP_DISABLED, // rx Machine + AD_RX_EXPIRED, // rx Machine + AD_RX_DEFAULTED, // rx Machine + AD_RX_CURRENT // rx Machine +} rx_states_t; + +// periodic machine states(43.4.12 in the 802.3ad standard) +typedef enum { + AD_PERIODIC_DUMMY, + AD_NO_PERIODIC, // periodic machine + AD_FAST_PERIODIC, // periodic machine + AD_SLOW_PERIODIC, // periodic machine + AD_PERIODIC_TX // periodic machine +} periodic_states_t; + +// mux machine states(43.4.13 in the 802.3ad standard) +typedef enum { + AD_MUX_DUMMY, + AD_MUX_DETACHED, // mux machine + AD_MUX_WAITING, // mux machine + AD_MUX_ATTACHED, // mux machine + AD_MUX_COLLECTING_DISTRIBUTING // mux machine +} mux_states_t; + +// tx machine states(43.4.15 in the 802.3ad standard) +typedef enum { + AD_TX_DUMMY, + AD_TRANSMIT // tx Machine +} tx_states_t; + +// rx indication types +typedef enum { + AD_TYPE_LACPDU = 1, // type lacpdu + AD_TYPE_MARKER // type marker +} pdu_type_t; + +// rx marker indication types +typedef enum { + AD_MARKER_INFORMATION_SUBTYPE = 1, // marker imformation subtype + AD_MARKER_RESPONSE_SUBTYPE // marker response subtype +} marker_subtype_t; + +// timers types(43.4.9 in the 802.3ad standard) +typedef enum { + AD_CURRENT_WHILE_TIMER, + AD_ACTOR_CHURN_TIMER, + AD_PERIODIC_TIMER, + AD_PARTNER_CHURN_TIMER, + AD_WAIT_WHILE_TIMER +} ad_timers_t; + +#pragma pack(1) + +typedef struct ad_header { + struct mac_addr destination_address; + struct mac_addr source_address; + u16 length_type; +} ad_header_t; + +// Link Aggregation Control Protocol(LACP) data unit structure(43.4.2.2 in the 802.3ad standard) +typedef struct lacpdu { + u8 subtype; // = LACP(= 0x01) + u8 version_number; + u8 tlv_type_actor_info; // = actor information(type/length/value) + u8 actor_information_length; // = 20 + u16 actor_system_priority; + struct mac_addr actor_system; + u16 actor_key; + u16 actor_port_priority; + u16 actor_port; + u8 actor_state; + u8 reserved_3_1[3]; // = 0 + u8 tlv_type_partner_info; // = partner information + u8 partner_information_length; // = 20 + u16 partner_system_priority; + struct mac_addr partner_system; + u16 partner_key; + u16 partner_port_priority; + u16 partner_port; + u8 partner_state; + u8 reserved_3_2[3]; // = 0 + u8 tlv_type_collector_info; // = collector information + u8 collector_information_length; // = 16 + u16 collector_max_delay; + u8 reserved_12[12]; + u8 tlv_type_terminator; // = terminator + u8 terminator_length; // = 0 + u8 reserved_50[50]; // = 0 +} lacpdu_t; + +typedef struct lacpdu_header { + struct ad_header ad_header; + struct lacpdu lacpdu; +} lacpdu_header_t; + +// Marker Protocol Data Unit(PDU) structure(43.5.3.2 in the 802.3ad standard) +typedef struct marker { + u8 subtype; // = 0x02 (marker PDU) + u8 version_number; // = 0x01 + u8 tlv_type; // = 0x01 (marker information) + // = 0x02 (marker response information) + u8 marker_length; // = 0x16 + u16 requester_port; // The number assigned to the port by the requester + struct mac_addr requester_system; // The requester’s system id + u32 requester_transaction_id; // The transaction id allocated by the requester, + u16 pad; // = 0 + u8 tlv_type_terminator; // = 0x00 + u8 terminator_length; // = 0x00 + u8 reserved_90[90]; // = 0 +} marker_t; + +typedef struct marker_header { + struct ad_header ad_header; + struct marker marker; +} marker_header_t; + +#pragma pack() + +struct slave; +struct bonding; +struct ad_info; +struct port; + +#ifdef __ia64__ +#pragma pack(8) +#endif + +// aggregator structure(43.4.5 in the 802.3ad standard) +typedef struct aggregator { + struct mac_addr aggregator_mac_address; + u16 aggregator_identifier; + u16 is_individual; // BOOLEAN + u16 actor_admin_aggregator_key; + u16 actor_oper_aggregator_key; + struct mac_addr partner_system; + u16 partner_system_priority; + u16 partner_oper_aggregator_key; + u16 receive_state; // BOOLEAN + u16 transmit_state; // BOOLEAN + struct port *lag_ports; + // ****** PRIVATE PARAMETERS ****** + struct slave *slave; // pointer to the bond slave that this aggregator belongs to + u16 is_active; // BOOLEAN. Indicates if this aggregator is active + u16 num_of_ports; +} aggregator_t; + +// port structure(43.4.6 in the 802.3ad standard) +typedef struct port { + u16 actor_port_number; + u16 actor_port_priority; + struct mac_addr actor_system; // This parameter is added here although it is not specified in the standard, just for simplification + u16 actor_system_priority; // This parameter is added here although it is not specified in the standard, just for simplification + u16 actor_port_aggregator_identifier; + u16 ntt; // BOOLEAN + u16 actor_admin_port_key; + u16 actor_oper_port_key; + u8 actor_admin_port_state; + u8 actor_oper_port_state; + struct mac_addr partner_admin_system; + struct mac_addr partner_oper_system; + u16 partner_admin_system_priority; + u16 partner_oper_system_priority; + u16 partner_admin_key; + u16 partner_oper_key; + u16 partner_admin_port_number; + u16 partner_oper_port_number; + u16 partner_admin_port_priority; + u16 partner_oper_port_priority; + u8 partner_admin_port_state; + u8 partner_oper_port_state; + u16 is_enabled; // BOOLEAN + // ****** PRIVATE PARAMETERS ****** + u16 sm_vars; // all state machines variables for this port + rx_states_t sm_rx_state; // state machine rx state + u16 sm_rx_timer_counter; // state machine rx timer counter + periodic_states_t sm_periodic_state;// state machine periodic state + u16 sm_periodic_timer_counter; // state machine periodic timer counter + mux_states_t sm_mux_state; // state machine mux state + u16 sm_mux_timer_counter; // state machine mux timer counter + tx_states_t sm_tx_state; // state machine tx state + u16 sm_tx_timer_counter; // state machine tx timer counter(allways on - enter to transmit state 3 time per second) + struct slave *slave; // pointer to the bond slave that this port belongs to + struct aggregator *aggregator; // pointer to an aggregator that this port related to + struct port *next_port_in_aggregator; // Next port on the linked list of the parent aggregator + u32 transaction_id; // continuous number for identification of Marker PDU's; + struct lacpdu lacpdu; // the lacpdu that will be sent for this port +} port_t; + +// system structure +typedef struct ad_system { + u16 sys_priority; + struct mac_addr sys_mac_addr; +} ad_system_t; + +#ifdef __ia64__ +#pragma pack() +#endif + +// ================= AD Exported structures to the main bonding code ================== +#define BOND_AD_INFO(bond) ((bond)->ad_info) +#define SLAVE_AD_INFO(slave) ((slave)->ad_info) + +struct ad_bond_info { + ad_system_t system; // 802.3ad system structure + u32 agg_select_timer; // Timer to select aggregator after all adapter's hand shakes + u32 agg_select_mode; // Mode of selection of active aggregator(bandwidth/count) + int lacp_fast; /* whether fast periodic tx should be + * requested + */ + struct timer_list ad_timer; + struct packet_type ad_pkt_type; +}; + +struct ad_slave_info { + struct aggregator aggregator; // 802.3ad aggregator structure + struct port port; // 802.3ad port structure + spinlock_t rx_machine_lock; // To avoid race condition between callback and receive interrupt + u16 id; +}; + +// ================= AD Exported functions to the main bonding code ================== +void bond_3ad_initialize(struct bonding *bond, u16 tick_resolution, int lacp_fast); +int bond_3ad_bind_slave(struct slave *slave); +void bond_3ad_unbind_slave(struct slave *slave); +void bond_3ad_state_machine_handler(struct bonding *bond); +void bond_3ad_rx_indication(struct lacpdu *lacpdu, struct slave *slave, u16 length); +void bond_3ad_adapter_speed_changed(struct slave *slave); +void bond_3ad_adapter_duplex_changed(struct slave *slave); +void bond_3ad_handle_link_change(struct slave *slave, char link); +int bond_3ad_get_active_agg_info(struct bonding *bond, struct ad_info *ad_info); +int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev); +int bond_3ad_lacpdu_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type* ptype); +#endif //__BOND_3AD_H__ + diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c new file mode 100644 index 000000000000..4e0ec9b6361f --- /dev/null +++ b/drivers/net/bonding/bond_alb.c @@ -0,0 +1,1571 @@ +/* + * Copyright(c) 1999 - 2003 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * The full GNU General Public License is included in this distribution in the + * file called LICENSE. + */ + +#include <linux/skbuff.h> +#include <linux/netdevice.h> +#include <linux/pkt_sched.h> +#include <linux/spinlock.h> +#include <linux/slab.h> +#include <linux/timer.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/if_arp.h> +#include <linux/if_ether.h> +#include <linux/if_bonding.h> +#include <net/ipx.h> +#include <net/arp.h> +#include <asm/byteorder.h> +#include "bonding.h" +#include "bond_alb.h" + + +#define ALB_TIMER_TICKS_PER_SEC 10 /* should be a divisor of HZ */ +#define BOND_TLB_REBALANCE_INTERVAL 10 /* in seconds, periodic re-balancing + * used for division - never set + * to zero !!! + */ +#define BOND_ALB_LP_INTERVAL 1 /* in seconds periodic send of + * learning packets to the switch + */ + +#define BOND_TLB_REBALANCE_TICKS (BOND_TLB_REBALANCE_INTERVAL \ + * ALB_TIMER_TICKS_PER_SEC) + +#define BOND_ALB_LP_TICKS (BOND_ALB_LP_INTERVAL \ + * ALB_TIMER_TICKS_PER_SEC) + +#define TLB_HASH_TABLE_SIZE 256 /* The size of the clients hash table. + * Note that this value MUST NOT be smaller + * because the key hash table BYTE wide ! + */ + + +#define TLB_NULL_INDEX 0xffffffff +#define MAX_LP_RETRY 3 + +/* rlb defs */ +#define RLB_HASH_TABLE_SIZE 256 +#define RLB_NULL_INDEX 0xffffffff +#define RLB_UPDATE_DELAY 2*ALB_TIMER_TICKS_PER_SEC /* 2 seconds */ +#define RLB_ARP_BURST_SIZE 2 +#define RLB_UPDATE_RETRY 3 /* 3-ticks - must be smaller than the rlb + * rebalance interval (5 min). + */ +/* RLB_PROMISC_TIMEOUT = 10 sec equals the time that the current slave is + * promiscuous after failover + */ +#define RLB_PROMISC_TIMEOUT 10*ALB_TIMER_TICKS_PER_SEC + +#pragma pack(1) +struct learning_pkt { + u8 mac_dst[ETH_ALEN]; + u8 mac_src[ETH_ALEN]; + u16 type; + u8 padding[ETH_ZLEN - (2*ETH_ALEN + 2)]; +}; + +struct arp_pkt { + u16 hw_addr_space; + u16 prot_addr_space; + u8 hw_addr_len; + u8 prot_addr_len; + u16 op_code; + u8 mac_src[ETH_ALEN]; /* sender hardware address */ + u32 ip_src; /* sender IP address */ + u8 mac_dst[ETH_ALEN]; /* target hardware address */ + u32 ip_dst; /* target IP address */ +}; +#pragma pack() + +/* Forward declaration */ +static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[]); + +static inline u8 +_simple_hash(u8 *hash_start, int hash_size) +{ + int i; + u8 hash = 0; + + for (i=0; i<hash_size; i++) { + hash ^= hash_start[i]; + } + + return hash; +} + +/*********************** tlb specific functions ***************************/ + +static inline void +_lock_tx_hashtbl(struct bonding *bond) +{ + spin_lock(&(BOND_ALB_INFO(bond).tx_hashtbl_lock)); +} + +static inline void +_unlock_tx_hashtbl(struct bonding *bond) +{ + spin_unlock(&(BOND_ALB_INFO(bond).tx_hashtbl_lock)); +} + +/* Caller must hold tx_hashtbl lock */ +static inline void +tlb_init_table_entry(struct bonding *bond, u8 index, u8 save_load) +{ + struct tlb_client_info *entry; + + if (BOND_ALB_INFO(bond).tx_hashtbl == NULL) { + return; + } + + entry = &(BOND_ALB_INFO(bond).tx_hashtbl[index]); + /* at end of cycle, save the load that was transmitted to the client + * during the cycle, and set the tx_bytes counter to 0 for counting + * the load during the next cycle + */ + if (save_load) { + entry->load_history = 1 + entry->tx_bytes / + BOND_TLB_REBALANCE_INTERVAL; + entry->tx_bytes = 0; + } + entry->tx_slave = NULL; + entry->next = TLB_NULL_INDEX; + entry->prev = TLB_NULL_INDEX; +} + +static inline void +tlb_init_slave(struct slave *slave) +{ + struct tlb_slave_info *slave_info = &(SLAVE_TLB_INFO(slave)); + + slave_info->load = 0; + slave_info->head = TLB_NULL_INDEX; +} + +/* Caller must hold bond lock for read */ +static inline void +tlb_clear_slave(struct bonding *bond, struct slave *slave, u8 save_load) +{ + struct tlb_client_info *tx_hash_table = NULL; + u32 index, next_index; + + /* clear slave from tx_hashtbl */ + _lock_tx_hashtbl(bond); + tx_hash_table = BOND_ALB_INFO(bond).tx_hashtbl; + + if (tx_hash_table) { + index = SLAVE_TLB_INFO(slave).head; + while (index != TLB_NULL_INDEX) { + next_index = tx_hash_table[index].next; + tlb_init_table_entry(bond, index, save_load); + index = next_index; + } + } + _unlock_tx_hashtbl(bond); + + tlb_init_slave(slave); +} + +/* Must be called before starting the monitor timer */ +static int +tlb_initialize(struct bonding *bond) +{ + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + int i; + size_t size; + +#if(TLB_HASH_TABLE_SIZE != 256) + /* Key to the hash table is byte wide. Check the size! */ + #error Hash Table size is wrong. +#endif + + spin_lock_init(&(bond_info->tx_hashtbl_lock)); + + _lock_tx_hashtbl(bond); + if (bond_info->tx_hashtbl != NULL) { + printk (KERN_ERR "%s: TLB hash table is not NULL\n", + bond->device->name); + _unlock_tx_hashtbl(bond); + return -1; + } + + size = TLB_HASH_TABLE_SIZE * sizeof(struct tlb_client_info); + bond_info->tx_hashtbl = kmalloc(size, GFP_KERNEL); + if (bond_info->tx_hashtbl == NULL) { + printk (KERN_ERR "%s: Failed to allocate TLB hash table\n", + bond->device->name); + _unlock_tx_hashtbl(bond); + return -1; + } + + memset(bond_info->tx_hashtbl, 0, size); + for (i=0; i<TLB_HASH_TABLE_SIZE; i++) { + tlb_init_table_entry(bond, i, 1); + } + _unlock_tx_hashtbl(bond); + + return 0; +} + +/* Must be called only after all slaves have been released */ +static void +tlb_deinitialize(struct bonding *bond) +{ + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + + _lock_tx_hashtbl(bond); + if (bond_info->tx_hashtbl == NULL) { + _unlock_tx_hashtbl(bond); + return; + } + kfree(bond_info->tx_hashtbl); + bond_info->tx_hashtbl = NULL; + _unlock_tx_hashtbl(bond); +} + +/* Caller must hold bond lock for read */ +static struct slave* +tlb_get_least_loaded_slave(struct bonding *bond) +{ + struct slave *slave; + struct slave *least_loaded; + u32 curr_gap, max_gap; + + /* Find the first enabled slave */ + slave = bond_get_first_slave(bond); + while (slave) { + if (SLAVE_IS_OK(slave)) { + break; + } + slave = bond_get_next_slave(bond, slave); + } + + if (!slave) { + return NULL; + } + + least_loaded = slave; + max_gap = (slave->speed * 1000000) - + (SLAVE_TLB_INFO(slave).load * 8); + + /* Find the slave with the largest gap */ + slave = bond_get_next_slave(bond, slave); + while (slave) { + if (SLAVE_IS_OK(slave)) { + curr_gap = (slave->speed * 1000000) - + (SLAVE_TLB_INFO(slave).load * 8); + if (max_gap < curr_gap) { + least_loaded = slave; + max_gap = curr_gap; + } + } + slave = bond_get_next_slave(bond, slave); + } + + return least_loaded; +} + +/* Caller must hold bond lock for read */ +struct slave* +tlb_choose_channel(struct bonding *bond, u32 hash_index, u32 skb_len) +{ + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + struct tlb_client_info *hash_table = NULL; + struct slave *assigned_slave = NULL; + + _lock_tx_hashtbl(bond); + + hash_table = bond_info->tx_hashtbl; + if (hash_table == NULL) { + printk (KERN_ERR "%s: TLB hash table is NULL\n", + bond->device->name); + _unlock_tx_hashtbl(bond); + return NULL; + } + + assigned_slave = hash_table[hash_index].tx_slave; + if (!assigned_slave) { + assigned_slave = tlb_get_least_loaded_slave(bond); + + if (assigned_slave) { + struct tlb_slave_info *slave_info = + &(SLAVE_TLB_INFO(assigned_slave)); + u32 next_index = slave_info->head; + + hash_table[hash_index].tx_slave = assigned_slave; + hash_table[hash_index].next = next_index; + hash_table[hash_index].prev = TLB_NULL_INDEX; + + if (next_index != TLB_NULL_INDEX) { + hash_table[next_index].prev = hash_index; + } + + slave_info->head = hash_index; + slave_info->load += + hash_table[hash_index].load_history; + } + } + + if (assigned_slave) { + hash_table[hash_index].tx_bytes += skb_len; + } + + _unlock_tx_hashtbl(bond); + + return assigned_slave; +} + +/*********************** rlb specific functions ***************************/ +static inline void +_lock_rx_hashtbl(struct bonding *bond) +{ + spin_lock(&(BOND_ALB_INFO(bond).rx_hashtbl_lock)); +} + +static inline void +_unlock_rx_hashtbl(struct bonding *bond) +{ + spin_unlock(&(BOND_ALB_INFO(bond).rx_hashtbl_lock)); +} + +/* when an ARP REPLY is received from a client update its info + * in the rx_hashtbl + */ +static void +rlb_update_entry_from_arp(struct bonding *bond, struct arp_pkt *arp) +{ + u32 hash_index; + struct rlb_client_info *client_info = NULL; + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + + _lock_rx_hashtbl(bond); + + if (bond_info->rx_hashtbl == NULL) { + _unlock_rx_hashtbl(bond); + return; + } + hash_index = _simple_hash((u8*)&(arp->ip_src), 4); + client_info = &(bond_info->rx_hashtbl[hash_index]); + + if ((client_info->assigned) && + (client_info->ip_src == arp->ip_dst) && + (client_info->ip_dst == arp->ip_src)) { + + /* update the clients MAC address */ + memcpy(client_info->mac_dst, arp->mac_src, ETH_ALEN); + client_info->ntt = 1; + bond_info->rx_ntt = 1; + } + + _unlock_rx_hashtbl(bond); +} + +static int +rlb_arp_recv(struct sk_buff *skb, + struct net_device *dev, + struct packet_type* ptype) +{ + struct bonding *bond = (struct bonding *)dev->priv; + int ret = NET_RX_DROP; + struct arp_pkt *arp = (struct arp_pkt *)skb->data; + + if (!(dev->flags & IFF_MASTER)) { + goto out; + } + + if (!arp) { + printk(KERN_ERR "Packet has no ARP data\n"); + goto out; + } + + if (skb->len < sizeof(struct arp_pkt)) { + printk(KERN_ERR "Packet is too small to be an ARP\n"); + goto out; + } + + if (arp->op_code == htons(ARPOP_REPLY)) { + /* update rx hash table for this ARP */ + rlb_update_entry_from_arp(bond, arp); + BOND_PRINT_DBG(("Server received an ARP Reply from client")); + } + + ret = NET_RX_SUCCESS; + +out: + dev_kfree_skb(skb); + + return ret; +} + +/* Caller must hold bond lock for read */ +static struct slave* +rlb_next_rx_slave(struct bonding *bond) +{ + struct slave *rx_slave = NULL, *slave = NULL; + unsigned int i = 0; + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + + slave = bond_info->next_rx_slave; + if (slave == NULL) { + slave = bond->next; + } + + /* this loop uses the circular linked list property of the + * slave's list to go through all slaves + */ + for (i = 0; i < bond->slave_cnt; i++, slave = slave->next) { + + if (SLAVE_IS_OK(slave)) { + if (!rx_slave) { + rx_slave = slave; + } + else if (slave->speed > rx_slave->speed) { + rx_slave = slave; + } + } + } + + if (rx_slave) { + bond_info->next_rx_slave = rx_slave->next; + } + + return rx_slave; +} + +/* teach the switch the mac of a disabled slave + * on the primary for fault tolerance + * + * Caller must hold bond->ptrlock for write or bond lock for write + */ +static void +rlb_teach_disabled_mac_on_primary(struct bonding *bond, u8 addr[]) +{ + if (!bond->current_slave) { + return; + } + if (!bond->alb_info.primary_is_promisc) { + bond->alb_info.primary_is_promisc = 1; + dev_set_promiscuity(bond->current_slave->dev, 1); + } + bond->alb_info.rlb_promisc_timeout_counter = 0; + + alb_send_learning_packets(bond->current_slave, addr); +} + +/* slave being removed should not be active at this point + * + * Caller must hold bond lock for read + */ +static void +rlb_clear_slave(struct bonding *bond, struct slave *slave) +{ + struct rlb_client_info *rx_hash_table = NULL; + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + u8 mac_bcast[ETH_ALEN] = {0xff,0xff,0xff,0xff,0xff,0xff}; + u32 index, next_index; + + /* clear slave from rx_hashtbl */ + _lock_rx_hashtbl(bond); + rx_hash_table = bond_info->rx_hashtbl; + + if (rx_hash_table == NULL) { + _unlock_rx_hashtbl(bond); + return; + } + + index = bond_info->rx_hashtbl_head; + for (; index != RLB_NULL_INDEX; index = next_index) { + next_index = rx_hash_table[index].next; + + if (rx_hash_table[index].slave == slave) { + struct slave *assigned_slave = rlb_next_rx_slave(bond); + + if (assigned_slave) { + rx_hash_table[index].slave = assigned_slave; + if (memcmp(rx_hash_table[index].mac_dst, + mac_bcast, ETH_ALEN)) { + bond_info->rx_hashtbl[index].ntt = 1; + bond_info->rx_ntt = 1; + /* A slave has been removed from the + * table because it is either disabled + * or being released. We must retry the + * update to avoid clients from not + * being updated & disconnecting when + * there is stress + */ + bond_info->rlb_update_retry_counter = + RLB_UPDATE_RETRY; + } + } else { /* there is no active slave */ + rx_hash_table[index].slave = NULL; + } + } + } + + _unlock_rx_hashtbl(bond); + + write_lock(&bond->ptrlock); + if (slave != bond->current_slave) { + rlb_teach_disabled_mac_on_primary(bond, slave->dev->dev_addr); + } + write_unlock(&bond->ptrlock); +} + +static void +rlb_update_client(struct rlb_client_info *client_info) +{ + int i = 0; + + if (client_info->slave == NULL) { + return; + } + + for (i=0; i<RLB_ARP_BURST_SIZE; i++) { + arp_send(ARPOP_REPLY, ETH_P_ARP, + client_info->ip_dst, + client_info->slave->dev, + client_info->ip_src, + client_info->mac_dst, + client_info->slave->dev->dev_addr, + client_info->mac_dst); + } +} + +/* sends ARP REPLIES that update the clients that need updating */ +static void +rlb_update_rx_clients(struct bonding *bond) +{ + u32 hash_index; + struct rlb_client_info *client_info = NULL; + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + + _lock_rx_hashtbl(bond); + + if (bond_info->rx_hashtbl == NULL) { + _unlock_rx_hashtbl(bond); + return; + } + + hash_index = bond_info->rx_hashtbl_head; + for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) { + client_info = &(bond_info->rx_hashtbl[hash_index]); + if (client_info->ntt) { + rlb_update_client(client_info); + if (bond_info->rlb_update_retry_counter == 0) { + client_info->ntt = 0; + } + } + } + + /* do not update the entries again untill this counter is zero so that + * not to confuse the clients. + */ + bond_info->rlb_update_delay_counter = RLB_UPDATE_DELAY; + + _unlock_rx_hashtbl(bond); +} + +/* The slave was assigned a new mac address - update the clients */ +static void +rlb_req_update_slave_clients(struct bonding *bond, struct slave *slave) +{ + u32 hash_index; + u8 ntt = 0; + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + u8 mac_bcast[ETH_ALEN] = {0xff,0xff,0xff,0xff,0xff,0xff}; + struct rlb_client_info* client_info = NULL; + + _lock_rx_hashtbl(bond); + + if (bond_info->rx_hashtbl == NULL) { + _unlock_rx_hashtbl(bond); + return; + } + + hash_index = bond_info->rx_hashtbl_head; + for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) { + client_info = &(bond_info->rx_hashtbl[hash_index]); + + if ((client_info->slave == slave) && + memcmp(client_info->mac_dst, mac_bcast, ETH_ALEN)) { + client_info->ntt = 1; + ntt = 1; + } + } + + // update the team's flag only after the whole iteration + if (ntt) { + bond_info->rx_ntt = 1; + //fasten the change + bond_info->rlb_update_retry_counter = RLB_UPDATE_RETRY; + } + + _unlock_rx_hashtbl(bond); +} + +/* mark all clients using src_ip to be updated */ +static void +rlb_req_update_subnet_clients(struct bonding *bond, u32 src_ip) +{ + u32 hash_index; + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + u8 mac_bcast[ETH_ALEN] = {0xff,0xff,0xff,0xff,0xff,0xff}; + struct rlb_client_info *client_info = NULL; + + _lock_rx_hashtbl(bond); + + if (bond_info->rx_hashtbl == NULL) { + _unlock_rx_hashtbl(bond); + return; + } + + hash_index = bond_info->rx_hashtbl_head; + for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) { + client_info = &(bond_info->rx_hashtbl[hash_index]); + + if (!client_info->slave) { + printk(KERN_ERR "Bonding: Error: found a client with no" + " channel in the client's hash table\n"); + continue; + } + /*update all clients using this src_ip, that are not assigned + * to the team's address (current_slave) and have a known + * unicast mac address. + */ + if ((client_info->ip_src == src_ip) && + memcmp(client_info->slave->dev->dev_addr, + bond->device->dev_addr, ETH_ALEN) && + memcmp(client_info->mac_dst, mac_bcast, ETH_ALEN)) { + client_info->ntt = 1; + bond_info->rx_ntt = 1; + } + } + + _unlock_rx_hashtbl(bond); +} + +/* Caller must hold both bond and ptr locks for read */ +struct slave* +rlb_choose_channel(struct bonding *bond, struct arp_pkt *arp) +{ + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + struct rlb_client_info *client_info = NULL; + u32 hash_index = 0; + struct slave *assigned_slave = NULL; + u8 mac_bcast[ETH_ALEN] = {0xff,0xff,0xff,0xff,0xff,0xff}; + + _lock_rx_hashtbl(bond); + + if (bond_info->rx_hashtbl == NULL) { + _unlock_rx_hashtbl(bond); + return NULL; + } + + hash_index = _simple_hash((u8 *)&arp->ip_dst, 4); + client_info = &(bond_info->rx_hashtbl[hash_index]); + + if (client_info->assigned == 1) { + if ((client_info->ip_src == arp->ip_src) && + (client_info->ip_dst == arp->ip_dst)) { + /* the entry is already assigned to this client */ + + if (memcmp(arp->mac_dst, mac_bcast, ETH_ALEN)) { + /* update mac address from arp */ + memcpy(client_info->mac_dst, arp->mac_dst, ETH_ALEN); + } + + assigned_slave = client_info->slave; + if (assigned_slave) { + _unlock_rx_hashtbl(bond); + return assigned_slave; + } + } else { + /* the entry is already assigned to some other client, + * move the old client to primary (current_slave) so + * that the new client can be assigned to this entry. + */ + if (bond->current_slave && + client_info->slave != bond->current_slave) { + client_info->slave = bond->current_slave; + rlb_update_client(client_info); + } + } + } + /* assign a new slave */ + assigned_slave = rlb_next_rx_slave(bond); + + if (assigned_slave) { + client_info->ip_src = arp->ip_src; + client_info->ip_dst = arp->ip_dst; + /* arp->mac_dst is broadcast for arp reqeusts. + * will be updated with clients actual unicast mac address + * upon receiving an arp reply. + */ + memcpy(client_info->mac_dst, arp->mac_dst, ETH_ALEN); + client_info->slave = assigned_slave; + + if (memcmp(client_info->mac_dst, mac_bcast, ETH_ALEN)) { + client_info->ntt = 1; + bond->alb_info.rx_ntt = 1; + } + else { + client_info->ntt = 0; + } + + if (!client_info->assigned) { + u32 prev_tbl_head = bond_info->rx_hashtbl_head; + bond_info->rx_hashtbl_head = hash_index; + client_info->next = prev_tbl_head; + if (prev_tbl_head != RLB_NULL_INDEX) { + bond_info->rx_hashtbl[prev_tbl_head].prev = + hash_index; + } + client_info->assigned = 1; + } + } + + _unlock_rx_hashtbl(bond); + + return assigned_slave; +} + +/* chooses (and returns) transmit channel for arp reply + * does not choose channel for other arp types since they are + * sent on the current_slave + */ +static struct slave* +rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond) +{ + struct arp_pkt *arp = (struct arp_pkt *)skb->nh.raw; + struct slave *tx_slave = NULL; + + if (arp->op_code == __constant_htons(ARPOP_REPLY)) { + /* the arp must be sent on the selected + * rx channel + */ + tx_slave = rlb_choose_channel(bond, arp); + if (tx_slave) { + memcpy(arp->mac_src,tx_slave->dev->dev_addr, ETH_ALEN); + } + BOND_PRINT_DBG(("Server sent ARP Reply packet")); + } else if (arp->op_code == __constant_htons(ARPOP_REQUEST)) { + + /* Create an entry in the rx_hashtbl for this client as a + * place holder. + * When the arp reply is received the entry will be updated + * with the correct unicast address of the client. + */ + rlb_choose_channel(bond, arp); + + /* The ARP relpy packets must be delayed so that + * they can cancel out the influence of the ARP request. + */ + bond->alb_info.rlb_update_delay_counter = RLB_UPDATE_DELAY; + + /* arp requests are broadcast and are sent on the primary + * the arp request will collapse all clients on the subnet to + * the primary slave. We must register these clients to be + * updated with their assigned mac. + */ + rlb_req_update_subnet_clients(bond, arp->ip_src); + BOND_PRINT_DBG(("Server sent ARP Request packet")); + } + + return tx_slave; +} + +/* Caller must hold bond lock for read */ +static void +rlb_rebalance(struct bonding *bond) +{ + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + struct slave *assigned_slave = NULL; + u32 hash_index; + struct rlb_client_info *client_info = NULL; + u8 ntt = 0; + + _lock_rx_hashtbl(bond); + + if (bond_info->rx_hashtbl == NULL) { + _unlock_rx_hashtbl(bond); + return; + } + + hash_index = bond_info->rx_hashtbl_head; + for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) { + client_info = &(bond_info->rx_hashtbl[hash_index]); + assigned_slave = rlb_next_rx_slave(bond); + if (assigned_slave && (client_info->slave != assigned_slave)){ + client_info->slave = assigned_slave; + client_info->ntt = 1; + ntt = 1; + } + } + + /* update the team's flag only after the whole iteration */ + if (ntt) { + bond_info->rx_ntt = 1; + } + _unlock_rx_hashtbl(bond); +} + +/* Caller must hold rx_hashtbl lock */ +static inline void +rlb_init_table_entry(struct rlb_client_info *entry) +{ + entry->next = RLB_NULL_INDEX; + entry->prev = RLB_NULL_INDEX; + entry->assigned = 0; + entry->ntt = 0; +} + +static int +rlb_initialize(struct bonding *bond) +{ + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + struct packet_type *pk_type = &(BOND_ALB_INFO(bond).rlb_pkt_type); + int i; + size_t size; + + spin_lock_init(&(bond_info->rx_hashtbl_lock)); + + _lock_rx_hashtbl(bond); + if (bond_info->rx_hashtbl != NULL) { + printk (KERN_ERR "%s: RLB hash table is not NULL\n", + bond->device->name); + _unlock_rx_hashtbl(bond); + return -1; + } + + size = RLB_HASH_TABLE_SIZE * sizeof(struct rlb_client_info); + bond_info->rx_hashtbl = kmalloc(size, GFP_KERNEL); + if (bond_info->rx_hashtbl == NULL) { + printk (KERN_ERR "%s: Failed to allocate" + " RLB hash table\n", bond->device->name); + _unlock_rx_hashtbl(bond); + return -1; + } + + bond_info->rx_hashtbl_head = RLB_NULL_INDEX; + + for (i=0; i<RLB_HASH_TABLE_SIZE; i++) { + rlb_init_table_entry(bond_info->rx_hashtbl + i); + } + _unlock_rx_hashtbl(bond); + + /* register to receive ARPs */ + + /*initialize packet type*/ + pk_type->type = __constant_htons(ETH_P_ARP); + pk_type->dev = bond->device; + pk_type->func = rlb_arp_recv; + pk_type->data = (void*)1; /* understand shared skbs */ + + dev_add_pack(pk_type); + + return 0; +} + +static void +rlb_deinitialize(struct bonding *bond) +{ + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + + dev_remove_pack(&(bond_info->rlb_pkt_type)); + + _lock_rx_hashtbl(bond); + if (bond_info->rx_hashtbl == NULL) { + _unlock_rx_hashtbl(bond); + return; + } + kfree(bond_info->rx_hashtbl); + bond_info->rx_hashtbl = NULL; + _unlock_rx_hashtbl(bond); +} + +/*********************** tlb/rlb shared functions *********************/ + +static void +alb_send_learning_packets(struct slave *slave, u8 mac_addr[]) +{ + struct sk_buff *skb = NULL; + struct learning_pkt pkt; + char *data = NULL; + int i; + unsigned int size = sizeof(struct learning_pkt); + + memset(&pkt, 0, size); + memcpy(pkt.mac_dst, mac_addr, ETH_ALEN); + memcpy(pkt.mac_src, mac_addr, ETH_ALEN); + pkt.type = __constant_htons(ETH_P_LOOP); + + for (i=0; i < MAX_LP_RETRY; i++) { + skb = NULL; + skb = dev_alloc_skb(size); + if (!skb) { + return; + } + + data = skb_put(skb, size); + memcpy(data, &pkt, size); + skb->mac.raw = data; + skb->nh.raw = data + ETH_HLEN; + skb->protocol = pkt.type; + skb->priority = TC_PRIO_CONTROL; + skb->dev = slave->dev; + dev_queue_xmit(skb); + } + +} + +/* hw is a boolean parameter that determines whether we should try and + * set the hw address of the hw as well as the hw address of the net_device + */ +static int +alb_set_mac_addr(struct slave *slave, u8 addr[], int hw) +{ + struct net_device *dev = NULL; + struct sockaddr s_addr; + + dev = slave->dev; + + if (!hw) { + memcpy(dev->dev_addr, addr, ETH_ALEN); + return 0; + } + + /* for rlb each slave must have a unique hw mac addresses so that */ + /* each slave will receive packets destined to a different mac */ + memcpy(s_addr.sa_data, addr, ETH_ALEN); + s_addr.sa_family = dev->type; + if (dev->set_mac_address(dev, &s_addr)) { + printk(KERN_DEBUG "bonding: Error: alb_set_mac_addr:" + " dev->set_mac_address of dev %s failed!" + " ALB mode requires that the base driver" + " support setting the hw address also when" + " the network device's interface is open\n", + dev->name); + return -EOPNOTSUPP; + } + return 0; +} + +/* Caller must hold bond lock for write or ptrlock for write*/ +static void +alb_swap_mac_addr(struct bonding *bond, + struct slave *slave1, + struct slave *slave2) +{ + u8 tmp_mac_addr[ETH_ALEN]; + struct slave *disabled_slave = NULL; + u8 slaves_state_differ; + + slaves_state_differ = (SLAVE_IS_OK(slave1) != SLAVE_IS_OK(slave2)); + + memcpy(tmp_mac_addr, slave1->dev->dev_addr, ETH_ALEN); + alb_set_mac_addr(slave1, slave2->dev->dev_addr, bond->alb_info.rlb_enabled); + alb_set_mac_addr(slave2, tmp_mac_addr, bond->alb_info.rlb_enabled); + + /* fasten the change in the switch */ + if (SLAVE_IS_OK(slave1)) { + alb_send_learning_packets(slave1, slave1->dev->dev_addr); + if (bond->alb_info.rlb_enabled) { + /* inform the clients that the mac address + * has changed + */ + rlb_req_update_slave_clients(bond, slave1); + } + } + else { + disabled_slave = slave1; + } + + if (SLAVE_IS_OK(slave2)) { + alb_send_learning_packets(slave2, slave2->dev->dev_addr); + if (bond->alb_info.rlb_enabled) { + /* inform the clients that the mac address + * has changed + */ + rlb_req_update_slave_clients(bond, slave2); + } + } + else { + disabled_slave = slave2; + } + + if (bond->alb_info.rlb_enabled && slaves_state_differ) { + /* A disabled slave was assigned an active mac addr */ + rlb_teach_disabled_mac_on_primary(bond, + disabled_slave->dev->dev_addr); + } +} + +/** + * alb_change_hw_addr_on_detach + * @bond: bonding we're working on + * @slave: the slave that was just detached + * + * We assume that @slave was already detached from the slave list. + * + * If @slave's permanent hw address is different both from its current + * address and from @bond's address, then somewhere in the bond there's + * a slave that has @slave's permanet address as its current address. + * We'll make sure that that slave no longer uses @slave's permanent address. + * + * Caller must hold bond lock + */ +static void +alb_change_hw_addr_on_detach(struct bonding *bond, struct slave *slave) +{ + struct slave *tmp_slave; + int perm_curr_diff; + int perm_bond_diff; + + perm_curr_diff = memcmp(slave->perm_hwaddr, + slave->dev->dev_addr, + ETH_ALEN); + perm_bond_diff = memcmp(slave->perm_hwaddr, + bond->device->dev_addr, + ETH_ALEN); + if (perm_curr_diff && perm_bond_diff) { + tmp_slave = bond_get_first_slave(bond); + while (tmp_slave) { + if (!memcmp(slave->perm_hwaddr, + tmp_slave->dev->dev_addr, + ETH_ALEN)) { + break; + } + tmp_slave = bond_get_next_slave(bond, tmp_slave); + } + + if (tmp_slave) { + alb_swap_mac_addr(bond, slave, tmp_slave); + } + } +} + +/** + * alb_handle_addr_collision_on_attach + * @bond: bonding we're working on + * @slave: the slave that was just attached + * + * checks uniqueness of slave's mac address and handles the case the + * new slave uses the bonds mac address. + * + * If the permanent hw address of @slave is @bond's hw address, we need to + * find a different hw address to give @slave, that isn't in use by any other + * slave in the bond. This address must be, of course, one of the premanent + * addresses of the other slaves. + * + * We go over the slave list, and for each slave there we compare its + * permanent hw address with the current address of all the other slaves. + * If no match was found, then we've found a slave with a permanent address + * that isn't used by any other slave in the bond, so we can assign it to + * @slave. + * + * assumption: this function is called before @slave is attached to the + * bond slave list. + * + * caller must hold the bond lock for write since the mac addresses are compared + * and may be swapped. + */ +static int +alb_handle_addr_collision_on_attach(struct bonding *bond, struct slave *slave) +{ + struct slave *tmp_slave1, *tmp_slave2; + + if (bond->slave_cnt == 0) { + /* this is the first slave */ + return 0; + } + + /* if slave's mac address differs from bond's mac address + * check uniqueness of slave's mac address against the other + * slaves in the bond. + */ + if (memcmp(slave->perm_hwaddr, bond->device->dev_addr, ETH_ALEN)) { + tmp_slave1 = bond_get_first_slave(bond); + for (; tmp_slave1; tmp_slave1 = bond_get_next_slave(bond, tmp_slave1)) { + if (!memcmp(tmp_slave1->dev->dev_addr, slave->dev->dev_addr, + ETH_ALEN)) { + break; + } + } + if (tmp_slave1) { + /* a slave was found that is using the mac address + * of the new slave + */ + printk(KERN_ERR "bonding: Warning: the hw address " + "of slave %s is not unique - cannot enslave it!" + , slave->dev->name); + return -EINVAL; + } + return 0; + } + + /* the slave's address is equal to the address of the bond + * search for a spare address in the bond for this slave. + */ + tmp_slave1 = bond_get_first_slave(bond); + for (; tmp_slave1; tmp_slave1 = bond_get_next_slave(bond, tmp_slave1)) { + + tmp_slave2 = bond_get_first_slave(bond); + for (; tmp_slave2; tmp_slave2 = bond_get_next_slave(bond, tmp_slave2)) { + + if (!memcmp(tmp_slave1->perm_hwaddr, + tmp_slave2->dev->dev_addr, + ETH_ALEN)) { + + break; + } + } + + if (!tmp_slave2) { + /* no slave has tmp_slave1's perm addr + * as its curr addr + */ + break; + } + } + + if (tmp_slave1) { + alb_set_mac_addr(slave, tmp_slave1->perm_hwaddr, + bond->alb_info.rlb_enabled); + + printk(KERN_WARNING "bonding: Warning: the hw address " + "of slave %s is in use by the bond; " + "giving it the hw address of %s\n", + slave->dev->name, tmp_slave1->dev->name); + } else { + printk(KERN_CRIT "bonding: Error: the hw address " + "of slave %s is in use by the bond; " + "couldn't find a slave with a free hw " + "address to give it (this should not have " + "happened)\n", slave->dev->name); + return -EFAULT; + } + + return 0; +} + +/************************ exported alb funcions ************************/ + +int +bond_alb_initialize(struct bonding *bond, int rlb_enabled) +{ + int res; + + res = tlb_initialize(bond); + if (res) { + return res; + } + + if (rlb_enabled) { + bond->alb_info.rlb_enabled = 1; + /* initialize rlb */ + res = rlb_initialize(bond); + if (res) { + tlb_deinitialize(bond); + return res; + } + } + + return 0; +} + +void +bond_alb_deinitialize(struct bonding *bond) +{ + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + + tlb_deinitialize(bond); + + if (bond_info->rlb_enabled) { + rlb_deinitialize(bond); + } +} + +int +bond_alb_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct bonding *bond = (struct bonding *) dev->priv; + struct ethhdr *eth_data = (struct ethhdr *)skb->data; + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + struct slave *tx_slave = NULL; + char do_tx_balance = 1; + int hash_size = 0; + u32 hash_index = 0; + u8 *hash_start = NULL; + u8 mac_bcast[ETH_ALEN] = {0xff,0xff,0xff,0xff,0xff,0xff}; + + if (!IS_UP(dev)) { /* bond down */ + dev_kfree_skb(skb); + return 0; + } + + /* make sure that the current_slave and the slaves list do + * not change during tx + */ + read_lock(&bond->lock); + + if (bond->slave_cnt == 0) { + /* no suitable interface, frame not sent */ + dev_kfree_skb(skb); + read_unlock(&bond->lock); + return 0; + } + + read_lock(&bond->ptrlock); + + switch (ntohs(skb->protocol)) { + case ETH_P_IP: + if ((memcmp(eth_data->h_dest, mac_bcast, ETH_ALEN) == 0) || + (skb->nh.iph->daddr == 0xffffffff)) { + do_tx_balance = 0; + break; + } + hash_start = (char*)&(skb->nh.iph->daddr); + hash_size = 4; + break; + + case ETH_P_IPV6: + if (memcmp(eth_data->h_dest, mac_bcast, ETH_ALEN) == 0) { + do_tx_balance = 0; + break; + } + + hash_start = (char*)&(skb->nh.ipv6h->daddr); + hash_size = 16; + break; + +#ifdef FIXME + case ETH_P_IPX: + if (skb->nh.ipxh->ipx_checksum != + __constant_htons(IPX_NO_CHECKSUM)) { + /* something is wrong with this packet */ + do_tx_balance = 0; + break; + } + + if (skb->nh.ipxh->ipx_type != + __constant_htons(IPX_TYPE_NCP)) { + /* The only protocol worth balancing in + * this family since it has an "ARP" like + * mechanism + */ + do_tx_balance = 0; + break; + } + + hash_start = (char*)eth_data->h_dest; + hash_size = ETH_ALEN; + break; +#endif + + case ETH_P_ARP: + do_tx_balance = 0; + if (bond_info->rlb_enabled) { + tx_slave = rlb_arp_xmit(skb, bond); + } + break; + + default: + do_tx_balance = 0; + break; + } + + if (do_tx_balance) { + hash_index = _simple_hash(hash_start, hash_size); + tx_slave = tlb_choose_channel(bond, hash_index, skb->len); + } + + if (!tx_slave) { + /* unbalanced or unassigned, send through primary */ + tx_slave = bond->current_slave; + bond_info->unbalanced_load += skb->len; + } + + if (tx_slave && SLAVE_IS_OK(tx_slave)) { + skb->dev = tx_slave->dev; + if (tx_slave != bond->current_slave) { + memcpy(eth_data->h_source, + tx_slave->dev->dev_addr, + ETH_ALEN); + } + dev_queue_xmit(skb); + } else { + /* no suitable interface, frame not sent */ + if (tx_slave) { + tlb_clear_slave(bond, tx_slave, 0); + } + dev_kfree_skb(skb); + } + + read_unlock(&bond->ptrlock); + read_unlock(&bond->lock); + return 0; +} + +void +bond_alb_monitor(struct bonding *bond) +{ + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + struct slave *slave = NULL; + + read_lock(&bond->lock); + + if ((bond->slave_cnt == 0) || !(bond->device->flags & IFF_UP)) { + bond_info->tx_rebalance_counter = 0; + bond_info->lp_counter = 0; + goto out; + } + + bond_info->tx_rebalance_counter++; + bond_info->lp_counter++; + + /* send learning packets */ + if (bond_info->lp_counter >= BOND_ALB_LP_TICKS) { + /* change of current_slave involves swapping of mac addresses. + * in order to avoid this swapping from happening while + * sending the learning packets, the ptrlock must be held for + * read. + */ + read_lock(&bond->ptrlock); + slave = bond_get_first_slave(bond); + while (slave) { + alb_send_learning_packets(slave,slave->dev->dev_addr); + slave = bond_get_next_slave(bond, slave); + } + read_unlock(&bond->ptrlock); + + bond_info->lp_counter = 0; + } + + /* rebalance tx traffic */ + if (bond_info->tx_rebalance_counter >= BOND_TLB_REBALANCE_TICKS) { + read_lock(&bond->ptrlock); + slave = bond_get_first_slave(bond); + while (slave) { + tlb_clear_slave(bond, slave, 1); + if (slave == bond->current_slave) { + SLAVE_TLB_INFO(slave).load = + bond_info->unbalanced_load / + BOND_TLB_REBALANCE_INTERVAL; + bond_info->unbalanced_load = 0; + } + slave = bond_get_next_slave(bond, slave); + } + read_unlock(&bond->ptrlock); + bond_info->tx_rebalance_counter = 0; + } + + /* handle rlb stuff */ + if (bond_info->rlb_enabled) { + /* the following code changes the promiscuity of the + * the current_slave. It needs to be locked with a + * write lock to protect from other code that also + * sets the promiscuity. + */ + write_lock(&bond->ptrlock); + if (bond_info->primary_is_promisc && + (++bond_info->rlb_promisc_timeout_counter >= + RLB_PROMISC_TIMEOUT)) { + + bond_info->rlb_promisc_timeout_counter = 0; + + /* If the primary was set to promiscuous mode + * because a slave was disabled then + * it can now leave promiscuous mode. + */ + dev_set_promiscuity(bond->current_slave->dev, -1); + bond_info->primary_is_promisc = 0; + } + write_unlock(&bond->ptrlock); + + if (bond_info->rlb_rebalance == 1) { + bond_info->rlb_rebalance = 0; + rlb_rebalance(bond); + } + + /* check if clients need updating */ + if (bond_info->rx_ntt) { + if (bond_info->rlb_update_delay_counter) { + --bond_info->rlb_update_delay_counter; + } else { + rlb_update_rx_clients(bond); + if (bond_info->rlb_update_retry_counter) { + --bond_info->rlb_update_retry_counter; + } else { + bond_info->rx_ntt = 0; + } + } + } + } + +out: + read_unlock(&bond->lock); + + if (bond->device->flags & IFF_UP) { + /* re-arm the timer */ + mod_timer(&(bond_info->alb_timer), + jiffies + (HZ/ALB_TIMER_TICKS_PER_SEC)); + } +} + +/* assumption: called before the slave is attched to the bond + * and not locked by the bond lock + */ +int +bond_alb_init_slave(struct bonding *bond, struct slave *slave) +{ + int err = 0; + + err = alb_set_mac_addr(slave, slave->perm_hwaddr, + bond->alb_info.rlb_enabled); + if (err) { + return err; + } + + /* caller must hold the bond lock for write since the mac addresses + * are compared and may be swapped. + */ + write_lock_bh(&bond->lock); + + err = alb_handle_addr_collision_on_attach(bond, slave); + + write_unlock_bh(&bond->lock); + + if (err) { + return err; + } + + tlb_init_slave(slave); + + /* order a rebalance ASAP */ + bond->alb_info.tx_rebalance_counter = BOND_TLB_REBALANCE_TICKS; + + if (bond->alb_info.rlb_enabled) { + bond->alb_info.rlb_rebalance = 1; + } + + return 0; +} + +/* Caller must hold bond lock for write */ +void +bond_alb_deinit_slave(struct bonding *bond, struct slave *slave) +{ + if (bond->slave_cnt > 1) { + alb_change_hw_addr_on_detach(bond, slave); + } + + tlb_clear_slave(bond, slave, 0); + + if (bond->alb_info.rlb_enabled) { + bond->alb_info.next_rx_slave = NULL; + rlb_clear_slave(bond, slave); + } +} + +/* Caller must hold bond lock for read */ +void +bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, + char link) +{ + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + + if (link == BOND_LINK_DOWN) { + tlb_clear_slave(bond, slave, 0); + if (bond->alb_info.rlb_enabled) { + rlb_clear_slave(bond, slave); + } + } else if (link == BOND_LINK_UP) { + /* order a rebalance ASAP */ + bond_info->tx_rebalance_counter = BOND_TLB_REBALANCE_TICKS; + if (bond->alb_info.rlb_enabled) { + bond->alb_info.rlb_rebalance = 1; + /* If the updelay module parameter is smaller than the + * forwarding delay of the switch the rebalance will + * not work because the rebalance arp replies will + * not be forwarded to the clients.. + */ + } + } +} + +/** + * bond_alb_assign_current_slave - assign new current_slave + * @bond: our bonding struct + * @new_slave: new slave to assign + * + * Set the bond->current_slave to @new_slave and handle + * mac address swapping and promiscuity changes as needed. + * + * Caller must hold bond ptrlock for write (or bond lock for write) + */ +void +bond_alb_assign_current_slave(struct bonding *bond, struct slave *new_slave) +{ + struct slave *swap_slave = bond->current_slave; + + if (bond->current_slave == new_slave) { + return; + } + + if (bond->current_slave && bond->alb_info.primary_is_promisc) { + dev_set_promiscuity(bond->current_slave->dev, -1); + bond->alb_info.primary_is_promisc = 0; + bond->alb_info.rlb_promisc_timeout_counter = 0; + } + + bond->current_slave = new_slave; + + if (!new_slave || (bond->slave_cnt == 0)) { + return; + } + + /* set the new current_slave to the bonds mac address + * i.e. swap mac addresses of old current_slave and new current_slave + */ + if (!swap_slave) { + /* find slave that is holding the bond's mac address */ + swap_slave = bond_get_first_slave(bond); + while (swap_slave) { + if (!memcmp(swap_slave->dev->dev_addr, + bond->device->dev_addr, ETH_ALEN)) { + break; + } + swap_slave = bond_get_next_slave(bond, swap_slave); + } + } + + /* current_slave must be set before calling alb_swap_mac_addr */ + if (swap_slave) { + /* swap mac address */ + alb_swap_mac_addr(bond, swap_slave, new_slave); + } else { + /* set the new_slave to the bond mac address */ + alb_set_mac_addr(new_slave, bond->device->dev_addr, + bond->alb_info.rlb_enabled); + /* fasten bond mac on new current slave */ + alb_send_learning_packets(new_slave, bond->device->dev_addr); + } +} + diff --git a/drivers/net/bonding/bond_alb.h b/drivers/net/bonding/bond_alb.h new file mode 100644 index 000000000000..8b69c8a8f4b9 --- /dev/null +++ b/drivers/net/bonding/bond_alb.h @@ -0,0 +1,127 @@ +/* + * Copyright(c) 1999 - 2003 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * The full GNU General Public License is included in this distribution in the + * file called LICENSE. + */ + +#ifndef __BOND_ALB_H__ +#define __BOND_ALB_H__ + +#include <linux/if_ether.h> + +struct bonding; +struct slave; + +#define BOND_ALB_INFO(bond) ((bond)->alb_info) +#define SLAVE_TLB_INFO(slave) ((slave)->tlb_info) + +struct tlb_client_info { + struct slave *tx_slave; /* A pointer to slave used for transmiting + * packets to a Client that the Hash function + * gave this entry index. + */ + u32 tx_bytes; /* Each Client acumulates the BytesTx that + * were tranmitted to it, and after each + * CallBack the LoadHistory is devided + * by the balance interval + */ + u32 load_history; /* This field contains the amount of Bytes + * that were transmitted to this client by + * the server on the previous balance + * interval in Bps. + */ + u32 next; /* The next Hash table entry index, assigned + * to use the same adapter for transmit. + */ + u32 prev; /* The previous Hash table entry index, + * assigned to use the same + */ +}; + +/* ------------------------------------------------------------------------- + * struct rlb_client_info contains all info related to a specific rx client + * connection. This is the Clients Hash Table entry struct + * ------------------------------------------------------------------------- + */ +struct rlb_client_info { + u32 ip_src; /* the server IP address */ + u32 ip_dst; /* the client IP address */ + u8 mac_dst[ETH_ALEN]; /* the client MAC address */ + u32 next; /* The next Hash table entry index */ + u32 prev; /* The previous Hash table entry index */ + u8 assigned; /* checking whether this entry is assigned */ + u8 ntt; /* flag - need to transmit client info */ + struct slave *slave; /* the slave assigned to this client */ +}; + +struct tlb_slave_info { + u32 head; /* Index to the head of the bi-directional clients + * hash table entries list. The entries in the list + * are the entries that were assigned to use this + * slave for transmit. + */ + u32 load; /* Each slave sums the loadHistory of all clients + * assigned to it + */ +}; + +struct alb_bond_info { + struct timer_list alb_timer; + struct tlb_client_info *tx_hashtbl; /* Dynamically allocated */ + spinlock_t tx_hashtbl_lock; + u32 unbalanced_load; + int tx_rebalance_counter; + int lp_counter; + /* -------- rlb parameters -------- */ + int rlb_enabled; + struct packet_type rlb_pkt_type; + struct rlb_client_info *rx_hashtbl; /* Receive hash table */ + spinlock_t rx_hashtbl_lock; + u32 rx_hashtbl_head; + u8 rx_ntt; /* flag - need to transmit + * to all rx clients + */ + struct slave *next_rx_slave;/* next slave to be assigned + * to a new rx client for + */ + u32 rlb_interval_counter; + u8 primary_is_promisc; /* boolean */ + u32 rlb_promisc_timeout_counter;/* counts primary + * promiscuity time + */ + u32 rlb_update_delay_counter; + u32 rlb_update_retry_counter;/* counter of retries + * of client update + */ + u8 rlb_rebalance; /* flag - indicates that the + * rx traffic should be + * rebalanced + */ +}; + +int bond_alb_initialize(struct bonding *bond, int rlb_enabled); +void bond_alb_deinitialize(struct bonding *bond); +int bond_alb_init_slave(struct bonding *bond, struct slave *slave); +void bond_alb_deinit_slave(struct bonding *bond, struct slave *slave); +void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char link); +void bond_alb_assign_current_slave(struct bonding *bond, struct slave *new_slave); +int bond_alb_xmit(struct sk_buff *skb, struct net_device *dev); +void bond_alb_monitor(struct bonding *bond); + +#endif /* __BOND_ALB_H__ */ + diff --git a/drivers/net/bonding.c b/drivers/net/bonding/bond_main.c index 4ec7f202cfa2..080cd55bf00b 100644 --- a/drivers/net/bonding.c +++ b/drivers/net/bonding/bond_main.c @@ -278,7 +278,7 @@ * bonding round-robin mode ignoring links after failover/recovery * * 2003/03/17 - Jay Vosburgh <fubar at us dot ibm dot com> - * - kmalloc fix (GFP_KERNEL to GFP_ATOMIC) reported by + * - kmalloc fix (GPF_KERNEL to GPF_ATOMIC) reported by * Shmulik dot Hen at intel.com. * - Based on discussion on mailing list, changed use of * update_slave_cnt(), created wrapper functions for adding/removing @@ -292,21 +292,95 @@ * - Make sure only bond_attach_slave() and bond_detach_slave() can * manipulate the slave list, including slave_cnt, even when in * bond_release_all(). - * - Fixed hang in bond_release() while traffic is running. + * - Fixed hang in bond_release() with traffic running: * netdev_set_master() must not be called from within the bond lock. * * 2003/03/18 - Tsippy Mendelson <tsippy.mendelson at intel dot com> and * Shmulik Hen <shmulik.hen at intel dot com> - * - Fixed hang in bond_enslave(): netdev_set_master() must not be - * called from within the bond lock while traffic is running. + * - Fixed hang in bond_enslave() with traffic running: + * netdev_set_master() must not be called from within the bond lock. + * + * 2003/03/18 - Amir Noam <amir.noam at intel dot com> + * - Added support for getting slave's speed and duplex via ethtool. + * Needed for 802.3ad and other future modes. + * + * 2003/03/18 - Tsippy Mendelson <tsippy.mendelson at intel dot com> and + * Shmulik Hen <shmulik.hen at intel dot com> + * - Enable support of modes that need to use the unique mac address of + * each slave. + * * bond_enslave(): Moved setting the slave's mac address, and + * openning it, from the application to the driver. This breaks + * backward comaptibility with old versions of ifenslave that open + * the slave before enalsving it !!!. + * * bond_release(): The driver also takes care of closing the slave + * and restoring its original mac address. + * - Removed the code that restores all base driver's flags. + * Flags are automatically restored once all undo stages are done + * properly. + * - Block possibility of enslaving before the master is up. This + * prevents putting the system in an unstable state. + * + * 2003/03/18 - Amir Noam <amir.noam at intel dot com>, + * Tsippy Mendelson <tsippy.mendelson at intel dot com> and + * Shmulik Hen <shmulik.hen at intel dot com> + * - Added support for IEEE 802.3ad Dynamic link aggregation mode. + * + * 2003/05/01 - Amir Noam <amir.noam at intel dot com> + * - Added ABI version control to restore compatibility between + * new/old ifenslave and new/old bonding. + * + * 2003/05/01 - Shmulik Hen <shmulik.hen at intel dot com> + * - Fixed bug in bond_release_all(): save old value of current_slave + * before setting it to NULL. + * - Changed driver versioning scheme to include version number instead + * of release date (that is already in another field). There are 3 + * fields X.Y.Z where: + * X - Major version - big behavior changes + * Y - Minor version - addition of features + * Z - Extra version - minor changes and bug fixes + * The current version is 1.0.0 as a base line. + * + * 2003/05/01 - Tsippy Mendelson <tsippy.mendelson at intel dot com> and + * Amir Noam <amir.noam at intel dot com> + * - Added support for lacp_rate module param. + * - Code beautification and style changes (mainly in comments). + * new version - 1.0.1 + * + * 2003/05/01 - Shmulik Hen <shmulik.hen at intel dot com> + * - Based on discussion on mailing list, changed locking scheme + * to use lock/unlock or lock_bh/unlock_bh appropriately instead + * of lock_irqsave/unlock_irqrestore. The new scheme helps exposing + * hidden bugs and solves system hangs that occurred due to the fact + * that holding lock_irqsave doesn't prevent softirqs from running. + * This also increases total throughput since interrupts are not + * blocked on each transmitted packets or monitor timeout. + * new version - 2.0.0 + * + * 2003/05/01 - Shmulik Hen <shmulik.hen at intel dot com> + * - Added support for Transmit load balancing mode. + * - Concentrate all assignments of current_slave to a single point + * so specific modes can take actions when the primary adapter is + * changed. + * - Take the updelay parameter into consideration during bond_enslave + * since some adapters loose their link during setting the device. + * - Renamed bond_3ad_link_status_changed() to + * bond_3ad_handle_link_change() for compatibility with TLB. + * new version - 2.1.0 + * + * 2003/05/01 - Tsippy Mendelson <tsippy.mendelson at intel dot com> + * - Added support for Adaptive load balancing mode which is + * equivalent to Transmit load balancing + Receive load balancing. + * new version - 2.2.0 */ #include <linux/config.h> #include <linux/kernel.h> #include <linux/module.h> +#include <linux/sched.h> #include <linux/types.h> #include <linux/fcntl.h> #include <linux/interrupt.h> +#include <linux/ptrace.h> #include <linux/ioport.h> #include <linux/in.h> #include <linux/ip.h> @@ -316,28 +390,32 @@ #include <linux/timer.h> #include <linux/socket.h> #include <linux/ctype.h> +#include <asm/system.h> +#include <asm/bitops.h> +#include <asm/io.h> +#include <asm/dma.h> +#include <asm/uaccess.h> #include <linux/errno.h> + #include <linux/netdevice.h> #include <linux/inetdevice.h> #include <linux/etherdevice.h> #include <linux/skbuff.h> #include <net/sock.h> #include <linux/rtnetlink.h> + #include <linux/if_bonding.h> #include <linux/smp.h> #include <linux/if_ether.h> #include <net/arp.h> #include <linux/mii.h> #include <linux/ethtool.h> +#include "bonding.h" +#include "bond_3ad.h" +#include "bond_alb.h" -#include <asm/system.h> -#include <asm/bitops.h> -#include <asm/io.h> -#include <asm/dma.h> -#include <asm/uaccess.h> - -#define DRV_VERSION "2.5.65-20030320" -#define DRV_RELDATE "March 20, 2003" +#define DRV_VERSION "2.2.0" +#define DRV_RELDATE "April 15, 2003" #define DRV_NAME "bonding" #define DRV_DESCRIPTION "Ethernet Channel Bonding Driver" @@ -357,6 +435,11 @@ DRV_NAME ".c:v" DRV_VERSION " (" DRV_RELDATE ")\n"; #define MAX_ARP_IP_TARGETS 16 #endif +struct bond_parm_tbl { + char *modename; + int mode; +}; + static int arp_interval = BOND_LINK_ARP_INTERV; static char *arp_ip_target[MAX_ARP_IP_TARGETS] = { NULL, }; static unsigned long arp_target[MAX_ARP_IP_TARGETS] = { 0, } ; @@ -366,6 +449,15 @@ char *arp_target_hw_addr = NULL; static char *primary= NULL; +static int app_abi_ver = 0; +static int orig_app_abi_ver = -1; /* This is used to save the first ABI version + * we receive from the application. Once set, + * it won't be changed, and the module will + * refuse to enslave/release interfaces if the + * command comes from an application using + * another ABI version. + */ + static int max_bonds = BOND_DEFAULT_MAX_BONDS; static int miimon = BOND_LINK_MON_INTERV; static int use_carrier = 1; @@ -380,6 +472,9 @@ static struct bond_parm_tbl bond_mode_tbl[] = { { "active-backup", BOND_MODE_ACTIVEBACKUP}, { "balance-xor", BOND_MODE_XOR}, { "broadcast", BOND_MODE_BROADCAST}, +{ "802.3ad", BOND_MODE_8023AD}, +{ "tlb", BOND_MODE_TLB}, +{ "alb", BOND_MODE_ALB}, { NULL, -1}, }; @@ -393,6 +488,15 @@ static struct bond_parm_tbl bond_mc_tbl[] = { { NULL, -1}, }; +static int lacp_fast = 0; +static char *lacp_rate = NULL; + +static struct bond_parm_tbl bond_lacp_tbl[] = { +{ "slow", AD_LACP_SLOW}, +{ "fast", AD_LACP_FAST}, +{ NULL, -1}, +}; + static int first_pass = 1; static struct bonding *these_bonds = NULL; static struct net_device *dev_bonds = NULL; @@ -417,6 +521,8 @@ MODULE_PARM(primary, "s"); MODULE_PARM_DESC(primary, "Primary network device to use"); MODULE_PARM(multicast, "s"); MODULE_PARM_DESC(multicast, "Mode for multicast support : 0 for none, 1 for active slave, 2 for all slaves (default)"); +MODULE_PARM(lacp_rate, "s"); +MODULE_PARM_DESC(lacp_rate, "LACPDU tx rate to request from 802.3ad partner (slow/fast)"); static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *dev); static int bond_xmit_xor(struct sk_buff *skb, struct net_device *dev); @@ -426,7 +532,6 @@ static void bond_mii_monitor(struct net_device *dev); static void loadbalance_arp_monitor(struct net_device *dev); static void activebackup_arp_monitor(struct net_device *dev); static int bond_event(struct notifier_block *this, unsigned long event, void *ptr); -static void bond_restore_slave_flags(slave_t *slave); static void bond_mc_list_destroy(struct bonding *bond); static void bond_mc_add(bonding_t *bond, void *addr, int alen); static void bond_mc_delete(bonding_t *bond, void *addr, int alen); @@ -436,8 +541,6 @@ static void bond_set_promiscuity(bonding_t *bond, int inc); static void bond_set_allmulti(bonding_t *bond, int inc); static struct dev_mc_list* bond_mc_list_find_dmi(struct dev_mc_list *dmi, struct dev_mc_list *mc_list); static void bond_mc_update(bonding_t *bond, slave_t *new, slave_t *old); -static void bond_set_slave_inactive_flags(slave_t *slave); -static void bond_set_slave_active_flags(slave_t *slave); static int bond_enslave(struct net_device *master, struct net_device *slave); static int bond_release(struct net_device *master, struct net_device *slave); static int bond_release_all(struct net_device *master); @@ -451,13 +554,24 @@ static int bond_sethwaddr(struct net_device *master, struct net_device *slave); */ static int bond_get_info(char *buf, char **start, off_t offset, int length); +/* Caller must hold bond->ptrlock for write */ +static inline struct slave* +bond_assign_current_slave(struct bonding *bond,struct slave *newslave) +{ + if ((bond_mode == BOND_MODE_TLB) || + (bond_mode == BOND_MODE_ALB)) { + bond_alb_assign_current_slave(bond, newslave); + } else { + bond->current_slave = newslave; + } + + return bond->current_slave; +} + /* #define BONDING_DEBUG 1 */ /* several macros */ -#define IS_UP(dev) ((((dev)->flags & (IFF_UP)) == (IFF_UP)) && \ - (netif_running(dev) && netif_carrier_ok(dev))) - static void arp_send_all(slave_t *slave) { int i; @@ -482,7 +596,13 @@ bond_mode_name(void) return "load balancing (xor)"; case BOND_MODE_BROADCAST : return "fault-tolerance (broadcast)"; - default : + case BOND_MODE_8023AD: + return "IEEE 802.3ad Dynamic link aggregation"; + case BOND_MODE_TLB: + return "transmit load balancing"; + case BOND_MODE_ALB: + return "adaptive load balancing"; + default: return "unknown"; } } @@ -502,18 +622,13 @@ multicast_mode_name(void) } } -static void bond_restore_slave_flags(slave_t *slave) -{ - slave->dev->flags = slave->original_flags; -} - -static void bond_set_slave_inactive_flags(slave_t *slave) +void bond_set_slave_inactive_flags(slave_t *slave) { slave->state = BOND_STATE_BACKUP; slave->dev->flags |= IFF_NOARP; } -static void bond_set_slave_active_flags(slave_t *slave) +void bond_set_slave_active_flags(slave_t *slave) { slave->state = BOND_STATE_ACTIVE; slave->dev->flags &= ~IFF_NOARP; @@ -548,9 +663,9 @@ update_slave_cnt(bonding_t *bond, int incr) * belongs to <bond>. It returns <slave> in case it's needed. * Nothing is freed on return, structures are just unchained. * If the bond->current_slave pointer was pointing to <slave>, - * it's replaced with slave->next, or <bond> if not applicable. + * it's replaced with bond->next, or NULL if not applicable. * - * bond->lock held by caller. + * bond->lock held for writing by caller. */ static slave_t * bond_detach_slave(bonding_t *bond, slave_t *slave) @@ -565,20 +680,11 @@ bond_detach_slave(bonding_t *bond, slave_t *slave) if (bond->next == slave) { /* is the slave at the head ? */ if (bond->prev == slave) { /* is the slave alone ? */ - write_lock(&bond->ptrlock); - bond->current_slave = NULL; /* no slave anymore */ - write_unlock(&bond->ptrlock); bond->prev = bond->next = (slave_t *)bond; } else { /* not alone */ bond->next = slave->next; slave->next->prev = (slave_t *)bond; bond->prev->next = slave->next; - - write_lock(&bond->ptrlock); - if (bond->current_slave == slave) { - bond->current_slave = slave->next; - } - write_unlock(&bond->ptrlock); } } else { slave->prev->next = slave->next; @@ -587,19 +693,29 @@ bond_detach_slave(bonding_t *bond, slave_t *slave) } else { slave->next->prev = slave->prev; } - - write_lock(&bond->ptrlock); - if (bond->current_slave == slave) { - bond->current_slave = slave->next; - } - write_unlock(&bond->ptrlock); } update_slave_cnt(bond, -1); + /* no need to hold ptrlock since bond lock is + * already held for writing + */ + if (slave == bond->current_slave) { + if ( bond->next != (slave_t *)bond) { /* found one slave */ + bond_assign_current_slave(bond, bond->next); + } else { + bond_assign_current_slave(bond, NULL); + } + } + return slave; } +/* + * This function attaches the slave <slave> to the list <bond>. + * + * bond->lock held for writing by caller. + */ static void bond_attach_slave(struct bonding *bond, struct slave *new_slave) { @@ -646,6 +762,59 @@ bond_attach_slave(struct bonding *bond, struct slave *new_slave) set_fs(fs); \ ret; }) +/* + * Get link speed and duplex from the slave's base driver + * using ethtool. If for some reason the call fails or the + * values are invalid, fake speed and duplex to 100/Full + * and return error. + */ +static int bond_update_speed_duplex(struct slave *slave) +{ + struct net_device *dev = slave->dev; + static int (* ioctl)(struct net_device *, struct ifreq *, int); + struct ifreq ifr; + struct ethtool_cmd etool; + + ioctl = dev->do_ioctl; + if (ioctl) { + etool.cmd = ETHTOOL_GSET; + ifr.ifr_data = (char*)&etool; + if (IOCTL(dev, &ifr, SIOCETHTOOL) == 0) { + slave->speed = etool.speed; + slave->duplex = etool.duplex; + } else { + goto err_out; + } + } else { + goto err_out; + } + + switch (slave->speed) { + case SPEED_10: + case SPEED_100: + case SPEED_1000: + break; + default: + goto err_out; + } + + switch (slave->duplex) { + case DUPLEX_FULL: + case DUPLEX_HALF: + break; + default: + goto err_out; + } + + return 0; + +err_out: + /* Fake speed and duplex */ + slave->speed = SPEED_100; + slave->duplex = DUPLEX_FULL; + return -1; +} + /* * if <dev> supports MII link status reporting, check its link status. * @@ -728,21 +897,61 @@ bond_check_dev_link(struct net_device *dev, int reporting) static u16 bond_check_mii_link(bonding_t *bond) { int has_active_interface = 0; - unsigned long flags; - read_lock_irqsave(&bond->lock, flags); + read_lock_bh(&bond->lock); read_lock(&bond->ptrlock); has_active_interface = (bond->current_slave != NULL); read_unlock(&bond->ptrlock); - read_unlock_irqrestore(&bond->lock, flags); + read_unlock_bh(&bond->lock); return (has_active_interface ? BMSR_LSTATUS : 0); } +/* register to receive lacpdus on a bond */ +static void bond_register_lacpdu(struct bonding *bond) +{ + struct packet_type* pk_type = &(BOND_AD_INFO(bond).ad_pkt_type); + + /* initialize packet type */ + pk_type->type = PKT_TYPE_LACPDU; + pk_type->dev = bond->device; + pk_type->func = bond_3ad_lacpdu_recv; + pk_type->data = (void*)1; /* understand shared skbs */ + + dev_add_pack(pk_type); +} + +/* unregister to receive lacpdus on a bond */ +static void bond_unregister_lacpdu(struct bonding *bond) +{ + dev_remove_pack(&(BOND_AD_INFO(bond).ad_pkt_type)); +} + static int bond_open(struct net_device *dev) { + struct bonding *bond = (struct bonding *)(dev->priv); struct timer_list *timer = &((struct bonding *)(dev->priv))->mii_timer; struct timer_list *arp_timer = &((struct bonding *)(dev->priv))->arp_timer; + + if ((bond_mode == BOND_MODE_TLB) || + (bond_mode == BOND_MODE_ALB)) { + struct timer_list *alb_timer = &(BOND_ALB_INFO(bond).alb_timer); + + /* bond_alb_initialize must be called before the timer + * is started. + */ + if (bond_alb_initialize(bond, (bond_mode == BOND_MODE_ALB))) { + /* something went wrong - fail the open operation */ + return -1; + } + + init_timer(alb_timer); + alb_timer->expires = jiffies + 1; + alb_timer->data = (unsigned long)bond; + alb_timer->function = (void *)&bond_alb_monitor; + add_timer(alb_timer); + } + MOD_INC_USE_COUNT; if (miimon > 0) { /* link check interval, in milliseconds. */ @@ -764,15 +973,27 @@ static int bond_open(struct net_device *dev) } add_timer(arp_timer); } + + if (bond_mode == BOND_MODE_8023AD) { + struct timer_list *ad_timer = &(BOND_AD_INFO(bond).ad_timer); + init_timer(ad_timer); + ad_timer->expires = jiffies + (AD_TIMER_INTERVAL * HZ / 1000); + ad_timer->data = (unsigned long)bond; + ad_timer->function = (void *)&bond_3ad_state_machine_handler; + add_timer(ad_timer); + + /* register to receive LACPDUs */ + bond_register_lacpdu(bond); + } + return 0; } static int bond_close(struct net_device *master) { bonding_t *bond = (struct bonding *) master->priv; - unsigned long flags; - write_lock_irqsave(&bond->lock, flags); + write_lock_bh(&bond->lock); if (miimon > 0) { /* link check interval, in milliseconds. */ del_timer(&bond->mii_timer); @@ -785,11 +1006,26 @@ static int bond_close(struct net_device *master) } } + if (bond_mode == BOND_MODE_8023AD) { + del_timer_sync(&(BOND_AD_INFO(bond).ad_timer)); + + /* Unregister the receive of LACPDUs */ + bond_unregister_lacpdu(bond); + } + + bond_mc_list_destroy (bond); + + write_unlock_bh(&bond->lock); + /* Release the bonded slaves */ bond_release_all(master); - bond_mc_list_destroy (bond); - write_unlock_irqrestore(&bond->lock, flags); + if ((bond_mode == BOND_MODE_TLB) || + (bond_mode == BOND_MODE_ALB)) { + del_timer_sync(&(BOND_ALB_INFO(bond).alb_timer)); + + bond_alb_deinitialize(bond); + } MOD_DEC_USE_COUNT; return 0; @@ -804,6 +1040,13 @@ static void bond_mc_list_flush(struct net_device *dev, struct net_device *flush) for (dmi = flush->mc_list; dmi != NULL; dmi = dmi->next) dev_mc_delete(dev, dmi->dmi_addr, dmi->dmi_addrlen, 0); + + if (bond_mode == BOND_MODE_8023AD) { + /* del lacpdu mc addr from mc list */ + u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR; + + dev_mc_delete(dev, lacpdu_multicast, ETH_ALEN, 0); + } } /* @@ -960,14 +1203,13 @@ static void set_multicast_list(struct net_device *master) { bonding_t *bond = master->priv; struct dev_mc_list *dmi; - unsigned long flags = 0; if (multicast_mode == BOND_MULTICAST_DISABLED) return; /* * Lock the private data for the master */ - write_lock_irqsave(&bond->lock, flags); + write_lock_bh(&bond->lock); /* set promiscuity flag to slaves */ if ( (master->flags & IFF_PROMISC) && !(bond->flags & IFF_PROMISC) ) @@ -1002,7 +1244,7 @@ static void set_multicast_list(struct net_device *master) bond_mc_list_destroy (bond); bond_mc_list_copy (master->mc_list, bond, GFP_ATOMIC); - write_unlock_irqrestore(&bond->lock, flags); + write_unlock_bh(&bond->lock); } /* @@ -1048,14 +1290,13 @@ static int bond_enslave(struct net_device *master_dev, { bonding_t *bond = NULL; slave_t *new_slave = NULL; - unsigned long flags = 0; unsigned long rflags = 0; - int ndx = 0; int err = 0; struct dev_mc_list *dmi; struct in_ifaddr **ifap; struct in_ifaddr *ifa; int link_reporting; + struct sockaddr addr; if (master_dev == NULL || slave_dev == NULL) { return -ENODEV; @@ -1068,12 +1309,13 @@ static int bond_enslave(struct net_device *master_dev, slave_dev->name); } - /* not running. */ - if ((slave_dev->flags & IFF_UP) != IFF_UP) { + + /* bond must be initialized by bond_open() before enslaving */ + if (!(master_dev->flags & IFF_UP)) { #ifdef BONDING_DEBUG - printk(KERN_CRIT "Error, slave_dev is not running\n"); + printk(KERN_CRIT "Error, master_dev is not up\n"); #endif - return -EINVAL; + return -EPERM; } /* already enslaved */ @@ -1083,25 +1325,117 @@ static int bond_enslave(struct net_device *master_dev, #endif return -EBUSY; } - - if ((new_slave = kmalloc(sizeof(slave_t), GFP_ATOMIC)) == NULL) { + + if (app_abi_ver >= 1) { + /* The application is using an ABI, which requires the + * slave interface to be closed. + */ + if ((slave_dev->flags & IFF_UP)) { +#ifdef BONDING_DEBUG + printk(KERN_CRIT "Error, slave_dev is up\n"); +#endif + return -EPERM; + } + + if (slave_dev->set_mac_address == NULL) { + printk(KERN_CRIT + "The slave device you specified does not support" + " setting the MAC address.\n"); + printk(KERN_CRIT + "Your kernel likely does not support slave" + " devices.\n"); + + return -EOPNOTSUPP; + } + } else { + /* The application is not using an ABI, which requires the + * slave interface to be open. + */ + if (!(slave_dev->flags & IFF_UP)) { +#ifdef BONDING_DEBUG + printk(KERN_CRIT "Error, slave_dev is not running\n"); +#endif + return -EINVAL; + } + + if ((bond_mode == BOND_MODE_8023AD) || + (bond_mode == BOND_MODE_TLB) || + (bond_mode == BOND_MODE_ALB)) { + printk(KERN_ERR + "bonding: Error: to use %s mode, you must " + "upgrade ifenslave.\n", bond_mode_name()); + return -EOPNOTSUPP; + } + } + + if ((new_slave = kmalloc(sizeof(slave_t), GFP_KERNEL)) == NULL) { return -ENOMEM; } memset(new_slave, 0, sizeof(slave_t)); - /* save flags before call to netdev_set_master */ + /* save slave's original flags before calling + * netdev_set_master and dev_open + */ new_slave->original_flags = slave_dev->flags; - err = netdev_set_master(slave_dev, master_dev); + if (app_abi_ver >= 1) { + /* save slave's original ("permanent") mac address for + * modes that needs it, and for restoring it upon release, + * and then set it to the master's address + */ + memcpy(new_slave->perm_hwaddr, slave_dev->dev_addr, ETH_ALEN); + + if (bond->slave_cnt > 0) { + /* set slave to master's mac address + * The application already set the master's + * mac address to that of the first slave + */ + memcpy(addr.sa_data, master_dev->dev_addr, ETH_ALEN); + addr.sa_family = slave_dev->type; + err = slave_dev->set_mac_address(slave_dev, &addr); + if (err) { +#ifdef BONDING_DEBUG + printk(KERN_CRIT "Error %d calling set_mac_address\n", err); +#endif + goto err_free; + } + } + + /* open the slave since the application closed it */ + err = dev_open(slave_dev); + if (err) { +#ifdef BONDING_DEBUG + printk(KERN_CRIT "Openning slave %s failed\n", slave_dev->name); +#endif + goto err_restore_mac; + } + } + + err = netdev_set_master(slave_dev, master_dev); if (err) { #ifdef BONDING_DEBUG printk(KERN_CRIT "Error %d calling netdev_set_master\n", err); #endif - goto err_free; + if (app_abi_ver < 1) { + goto err_free; + } else { + goto err_close; + } } new_slave->dev = slave_dev; + if ((bond_mode == BOND_MODE_TLB) || + (bond_mode == BOND_MODE_ALB)) { + /* bond_alb_init_slave() must be called before all other stages since + * it might fail and we do not want to have to undo everything + */ + err = bond_alb_init_slave(bond, new_slave); + if (err) { + goto err_unset_master; + } + } + if (multicast_mode == BOND_MULTICAST_ALL) { /* set promiscuity level to new slave */ if (master_dev->flags & IFF_PROMISC) @@ -1116,7 +1450,14 @@ static int bond_enslave(struct net_device *master_dev, dev_mc_add (slave_dev, dmi->dmi_addr, dmi->dmi_addrlen, 0); } - write_lock_irqsave(&bond->lock, flags); + if (bond_mode == BOND_MODE_8023AD) { + /* add lacpdu mc addr to mc list */ + u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR; + + dev_mc_add(slave_dev, lacpdu_multicast, ETH_ALEN, 0); + } + + write_lock_bh(&bond->lock); bond_attach_slave(bond, new_slave); new_slave->delay = 0; @@ -1157,19 +1498,45 @@ static int bond_enslave(struct net_device *master_dev, /* check for initial state */ if ((miimon <= 0) || (bond_check_dev_link(slave_dev, 0) == BMSR_LSTATUS)) { + if (updelay) { #ifdef BONDING_DEBUG - printk(KERN_CRIT "Initial state of slave_dev is BOND_LINK_UP\n"); + printk(KERN_CRIT "Initial state of slave_dev is " + "BOND_LINK_BACK\n"); #endif - new_slave->link = BOND_LINK_UP; + new_slave->link = BOND_LINK_BACK; + new_slave->delay = updelay; + } + else { +#ifdef BONDING_DEBUG + printk(KERN_DEBUG "Initial state of slave_dev is " + "BOND_LINK_UP\n"); +#endif + new_slave->link = BOND_LINK_UP; + } new_slave->jiffies = jiffies; } else { #ifdef BONDING_DEBUG - printk(KERN_CRIT "Initial state of slave_dev is BOND_LINK_DOWN\n"); + printk(KERN_CRIT "Initial state of slave_dev is " + "BOND_LINK_DOWN\n"); #endif new_slave->link = BOND_LINK_DOWN; } + if (bond_update_speed_duplex(new_slave) && + (new_slave->link != BOND_LINK_DOWN)) { + + printk(KERN_WARNING + "bond_enslave(): failed to get speed/duplex from %s, " + "speed forced to 100Mbps, duplex forced to Full.\n", + new_slave->dev->name); + if (bond_mode == BOND_MODE_8023AD) { + printk(KERN_WARNING + "Operation of 802.3ad mode requires ETHTOOL support " + "in base driver for proper aggregator selection.\n"); + } + } + /* if we're in active-backup mode, we need one and only one active * interface. The backup interfaces will have their NOARP flag set * because we need them to be completely deaf and not to respond to @@ -1180,13 +1547,13 @@ static int bond_enslave(struct net_device *master_dev, if (bond_mode == BOND_MODE_ACTIVEBACKUP) { if (((bond->current_slave == NULL) || (bond->current_slave->dev->flags & IFF_NOARP)) - && (new_slave->link == BOND_LINK_UP)) { + && (new_slave->link != BOND_LINK_DOWN)) { #ifdef BONDING_DEBUG printk(KERN_CRIT "This is the first active slave\n"); #endif /* first slave or no active slave yet, and this link is OK, so make this interface the active one */ - bond->current_slave = new_slave; + bond_assign_current_slave(bond, new_slave); bond_set_slave_active_flags(new_slave); bond_mc_update(bond, new_slave, NULL); } @@ -1203,9 +1570,47 @@ static int bond_enslave(struct net_device *master_dev, read_unlock_irqrestore(&(((struct in_device *)slave_dev->ip_ptr)->lock), rflags); /* if there is a primary slave, remember it */ - if (primary != NULL) - if( strcmp(primary, new_slave->dev->name) == 0) - bond->primary_slave = new_slave; + if (primary != NULL) { + if (strcmp(primary, new_slave->dev->name) == 0) { + bond->primary_slave = new_slave; + } + } + } else if (bond_mode == BOND_MODE_8023AD) { + /* in 802.3ad mode, the internal mechanism + * will activate the slaves in the selected + * aggregator + */ + bond_set_slave_inactive_flags(new_slave); + /* if this is the first slave */ + if (new_slave == bond->next) { + SLAVE_AD_INFO(new_slave).id = 1; + /* Initialize AD with the number of times that the AD timer is called in 1 second + * can be called only after the mac address of the bond is set + */ + bond_3ad_initialize(bond, 1000/AD_TIMER_INTERVAL, + lacp_fast); + } else { + SLAVE_AD_INFO(new_slave).id = + SLAVE_AD_INFO(new_slave->prev).id + 1; + } + + bond_3ad_bind_slave(new_slave); + } else if ((bond_mode == BOND_MODE_TLB) || + (bond_mode == BOND_MODE_ALB)) { + new_slave->state = BOND_STATE_ACTIVE; + if ((bond->current_slave == NULL) && (new_slave->link != BOND_LINK_DOWN)) { + /* first slave or no active slave yet, and this link + * is OK, so make this interface the active one + */ + bond_assign_current_slave(bond, new_slave); + } + + /* if there is a primary slave, remember it */ + if (primary != NULL) { + if (strcmp(primary, new_slave->dev->name) == 0) { + bond->primary_slave = new_slave; + } + } } else { #ifdef BONDING_DEBUG printk(KERN_CRIT "This slave is always active in trunk mode\n"); @@ -1213,51 +1618,69 @@ static int bond_enslave(struct net_device *master_dev, /* always active in trunk mode */ new_slave->state = BOND_STATE_ACTIVE; if (bond->current_slave == NULL) - bond->current_slave = new_slave; + bond_assign_current_slave(bond, new_slave); } - write_unlock_irqrestore(&bond->lock, flags); + write_unlock_bh(&bond->lock); - /* - * !!! This is to support old versions of ifenslave. We can remove - * this in 2.5 because our ifenslave takes care of this for us. - * We check to see if the master has a mac address yet. If not, - * we'll give it the mac address of our slave device. - */ - for (ndx = 0; ndx < slave_dev->addr_len; ndx++) { + if (app_abi_ver < 1) { + /* + * !!! This is to support old versions of ifenslave. + * We can remove this in 2.5 because our ifenslave takes + * care of this for us. + * We check to see if the master has a mac address yet. + * If not, we'll give it the mac address of our slave device. + */ + int ndx = 0; + + for (ndx = 0; ndx < slave_dev->addr_len; ndx++) { #ifdef BONDING_DEBUG - printk(KERN_CRIT "Checking ndx=%d of master_dev->dev_addr\n", - ndx); + printk(KERN_DEBUG + "Checking ndx=%d of master_dev->dev_addr\n", ndx); #endif - if (master_dev->dev_addr[ndx] != 0) { + if (master_dev->dev_addr[ndx] != 0) { #ifdef BONDING_DEBUG - printk(KERN_CRIT "Found non-zero byte at ndx=%d\n", - ndx); + printk(KERN_DEBUG + "Found non-zero byte at ndx=%d\n", ndx); #endif - break; + break; + } } - } - if (ndx == slave_dev->addr_len) { - /* - * We got all the way through the address and it was - * all 0's. - */ + if (ndx == slave_dev->addr_len) { + /* + * We got all the way through the address and it was + * all 0's. + */ #ifdef BONDING_DEBUG - printk(KERN_CRIT "%s doesn't have a MAC address yet. ", - master_dev->name); - printk(KERN_CRIT "Going to give assign it from %s.\n", - slave_dev->name); + printk(KERN_DEBUG "%s doesn't have a MAC address yet. ", + master_dev->name); + printk(KERN_DEBUG "Going to give assign it from %s.\n", + slave_dev->name); #endif - bond_sethwaddr(master_dev, slave_dev); + bond_sethwaddr(master_dev, slave_dev); + } } printk (KERN_INFO "%s: enslaving %s as a%s interface with a%s link.\n", master_dev->name, slave_dev->name, new_slave->state == BOND_STATE_ACTIVE ? "n active" : " backup", - new_slave->link == BOND_LINK_UP ? "n up" : " down"); + new_slave->link != BOND_LINK_DOWN ? "n up" : " down"); /* enslave is successful */ return 0; + +/* Undo stages on error */ +err_unset_master: + netdev_set_master(slave_dev, NULL); + +err_close: + dev_close(slave_dev); + +err_restore_mac: + memcpy(addr.sa_data, new_slave->perm_hwaddr, ETH_ALEN); + addr.sa_family = slave_dev->type; + slave_dev->set_mac_address(slave_dev, &addr); + err_free: kfree(new_slave); return err; @@ -1280,7 +1703,6 @@ static int bond_change_active(struct net_device *master_dev, struct net_device * slave_t *slave; slave_t *oldactive = NULL; slave_t *newactive = NULL; - unsigned long flags; int ret = 0; if (master_dev == NULL || slave_dev == NULL) { @@ -1288,7 +1710,7 @@ static int bond_change_active(struct net_device *master_dev, struct net_device * } bond = (struct bonding *) master_dev->priv; - write_lock_irqsave(&bond->lock, flags); + write_lock_bh(&bond->lock); slave = (slave_t *)bond; oldactive = bond->current_slave; @@ -1307,7 +1729,7 @@ static int bond_change_active(struct net_device *master_dev, struct net_device * bond_set_slave_inactive_flags(oldactive); bond_set_slave_active_flags(newactive); bond_mc_update(bond, newactive, oldactive); - bond->current_slave = newactive; + bond_assign_current_slave(bond, newactive); printk("%s : activate %s(old : %s)\n", master_dev->name, newactive->dev->name, oldactive->dev->name); @@ -1315,7 +1737,7 @@ static int bond_change_active(struct net_device *master_dev, struct net_device * else { ret = -EINVAL; } - write_unlock_irqrestore(&bond->lock, flags); + write_unlock_bh(&bond->lock); return ret; } @@ -1329,7 +1751,7 @@ static int bond_change_active(struct net_device *master_dev, struct net_device * * Since this function sends messages tails through printk, the caller * must have started something like `printk(KERN_INFO "xxxx ");'. * - * Warning: must put locks around the call to this function if needed. + * Warning: Caller must hold ptrlock for writing. */ slave_t *change_active_interface(bonding_t *bond) { @@ -1337,22 +1759,16 @@ slave_t *change_active_interface(bonding_t *bond) slave_t *bestslave = NULL; int mintime; - read_lock(&bond->ptrlock); newslave = oldslave = bond->current_slave; - read_unlock(&bond->ptrlock); if (newslave == NULL) { /* there were no active slaves left */ if (bond->next != (slave_t *)bond) { /* found one slave */ - write_lock(&bond->ptrlock); - newslave = bond->current_slave = bond->next; - write_unlock(&bond->ptrlock); + newslave = bond_assign_current_slave(bond, bond->next); } else { printk (" but could not find any %s interface.\n", (bond_mode == BOND_MODE_ACTIVEBACKUP) ? "backup":"other"); - write_lock(&bond->ptrlock); - bond->current_slave = (slave_t *)NULL; - write_unlock(&bond->ptrlock); + bond_assign_current_slave(bond, NULL); return NULL; /* still no slave, return NULL */ } } else if (bond_mode == BOND_MODE_ACTIVEBACKUP) { @@ -1395,9 +1811,7 @@ slave_t *change_active_interface(bonding_t *bond) newslave->dev->name); } - write_lock(&bond->ptrlock); - bond->current_slave = newslave; - write_unlock(&bond->ptrlock); + bond_assign_current_slave(bond, newslave); return newslave; } else if (newslave->link == BOND_LINK_BACK) { @@ -1425,9 +1839,7 @@ slave_t *change_active_interface(bonding_t *bond) bestslave->jiffies = jiffies; bond_set_slave_active_flags(bestslave); bond_mc_update(bond, bestslave, oldslave); - write_lock(&bond->ptrlock); - bond->current_slave = bestslave; - write_unlock(&bond->ptrlock); + bond_assign_current_slave(bond, bestslave); return bestslave; } @@ -1450,9 +1862,7 @@ slave_t *change_active_interface(bonding_t *bond) (bond_mode == BOND_MODE_ACTIVEBACKUP) ? "backup":"other"); /* absolutely nothing found. let's return NULL */ - write_lock(&bond->ptrlock); - bond->current_slave = (slave_t *)NULL; - write_unlock(&bond->ptrlock); + bond_assign_current_slave(bond, NULL); return NULL; } @@ -1471,7 +1881,7 @@ static int bond_release(struct net_device *master, struct net_device *slave) { bonding_t *bond; slave_t *our_slave, *old_current; - unsigned long flags; + struct sockaddr addr; if (master == NULL || slave == NULL) { return -ENODEV; @@ -1486,12 +1896,41 @@ static int bond_release(struct net_device *master, struct net_device *slave) return -EINVAL; } - write_lock_irqsave(&bond->lock, flags); + write_lock_bh(&bond->lock); bond->current_arp_slave = NULL; our_slave = (slave_t *)bond; old_current = bond->current_slave; while ((our_slave = our_slave->prev) != (slave_t *)bond) { if (our_slave->dev == slave) { + int mac_addr_differ = memcmp(bond->device->dev_addr, + our_slave->perm_hwaddr, + ETH_ALEN); + if (!mac_addr_differ && (bond->slave_cnt > 1)) { + printk(KERN_WARNING "WARNING: the permanent HWaddr of %s " + "- %02X:%02X:%02X:%02X:%02X:%02X - " + "is still in use by %s. Set the HWaddr " + "of %s to a different address " + "to avoid conflicts.\n", + slave->name, + slave->dev_addr[0], + slave->dev_addr[1], + slave->dev_addr[2], + slave->dev_addr[3], + slave->dev_addr[4], + slave->dev_addr[5], + bond->device->name, + slave->name); + } + + /* Inform AD package of unbinding of slave. */ + if (bond_mode == BOND_MODE_8023AD) { + /* must be called before the slave is + * detached from the list + */ + bond_3ad_unbind_slave(our_slave); + } + + /* release the slave from its bond */ bond_detach_slave(bond, our_slave); printk (KERN_INFO "%s: releasing %s interface %s", @@ -1516,11 +1955,20 @@ static int bond_release(struct net_device *master, struct net_device *slave) bond->primary_slave = NULL; } + if ((bond_mode == BOND_MODE_TLB) || + (bond_mode == BOND_MODE_ALB)) { + /* must be called only after the slave has been + * detached from the list and the current_slave + * has been replaced (if our_slave == old_current) + */ + bond_alb_deinit_slave(bond, our_slave); + } + break; } } - write_unlock_irqrestore(&bond->lock, flags); + write_unlock_bh(&bond->lock); if (our_slave == (slave_t *)bond) { /* if we get here, it's because the device was not found */ @@ -1545,33 +1993,47 @@ static int bond_release(struct net_device *master, struct net_device *slave) netdev_set_master(slave, NULL); - /* only restore its RUNNING flag if monitoring set it down */ - if (slave->flags & IFF_UP) { - slave->flags |= IFF_RUNNING; + /* close slave before restoring its mac address */ + dev_close(slave); + + if (app_abi_ver >= 1) { + /* restore original ("permanent") mac address */ + memcpy(addr.sa_data, our_slave->perm_hwaddr, ETH_ALEN); + addr.sa_family = slave->type; + slave->set_mac_address(slave, &addr); } - if (slave->flags & IFF_NOARP || - bond->current_slave != NULL) { - dev_close(slave); - our_slave->original_flags &= ~IFF_UP; + /* restore the original state of the + * IFF_NOARP flag that might have been + * set by bond_set_slave_inactive_flags() + */ + if ((our_slave->original_flags & IFF_NOARP) == 0) { + slave->flags &= ~IFF_NOARP; } - bond_restore_slave_flags(our_slave); - kfree(our_slave); + /* if the last slave was removed, zero the mac address + * of the master so it will be set by the application + * to the mac address of the first slave + */ + if (bond->next == (slave_t*)bond) { + memset(master->dev_addr, 0, master->addr_len); + } + return 0; /* deletion OK */ } /* * This function releases all slaves. - * Warning: must put write-locks around the call to this function. */ static int bond_release_all(struct net_device *master) { bonding_t *bond; - slave_t *our_slave; + slave_t *our_slave, *old_current; struct net_device *slave_dev; + struct sockaddr addr; + int err = 0; if (master == NULL) { return -ENODEV; @@ -1582,49 +2044,96 @@ static int bond_release_all(struct net_device *master) } bond = (struct bonding *) master->priv; + + write_lock_bh(&bond->lock); + if (bond->next == (struct slave *) bond) { + err = -EINVAL; + goto out; + } + + old_current = bond->current_slave; + bond_assign_current_slave(bond, NULL); bond->current_arp_slave = NULL; - bond->current_slave = NULL; bond->primary_slave = NULL; while ((our_slave = bond->prev) != (slave_t *)bond) { + /* Inform AD package of unbinding of slave + * before slave is detached from the list. + */ + if (bond_mode == BOND_MODE_8023AD) { + bond_3ad_unbind_slave(our_slave); + } + slave_dev = our_slave->dev; bond_detach_slave(bond, our_slave); + if ((bond_mode == BOND_MODE_TLB) || + (bond_mode == BOND_MODE_ALB)) { + /* must be called only after the slave + * has been detached from the list + */ + bond_alb_deinit_slave(bond, our_slave); + } + + /* now that the slave is detached, unlock and perform + * all the undo steps that should not be called from + * within a lock. + */ + write_unlock_bh(&bond->lock); + if (multicast_mode == BOND_MULTICAST_ALL || (multicast_mode == BOND_MULTICAST_ACTIVE - && bond->current_slave == our_slave)) { + && old_current == our_slave)) { /* flush master's mc_list from slave */ bond_mc_list_flush (slave_dev, master); - + /* unset promiscuity level from slave */ if (master->flags & IFF_PROMISC) dev_set_promiscuity(slave_dev, -1); - + /* unset allmulti level from slave */ if (master->flags & IFF_ALLMULTI) dev_set_allmulti(slave_dev, -1); } - kfree(our_slave); + netdev_set_master(slave_dev, NULL); - /* - * Can be safely called from inside the bond lock - * since traffic and timers have already stopped + /* close slave before restoring its mac address */ + dev_close(slave_dev); + + if (app_abi_ver >= 1) { + /* restore original ("permanent") mac address*/ + memcpy(addr.sa_data, our_slave->perm_hwaddr, ETH_ALEN); + addr.sa_family = slave_dev->type; + slave_dev->set_mac_address(slave_dev, &addr); + } + + /* restore the original state of the IFF_NOARP flag that might have + * been set by bond_set_slave_inactive_flags() */ - netdev_set_master(slave_dev, NULL); + if ((our_slave->original_flags & IFF_NOARP) == 0) { + slave_dev->flags &= ~IFF_NOARP; + } - /* only restore its RUNNING flag if monitoring set it down */ - if (slave_dev->flags & IFF_UP) - slave_dev->flags |= IFF_RUNNING; + kfree(our_slave); - if (slave_dev->flags & IFF_NOARP) - dev_close(slave_dev); + /* re-acquire the lock before getting the next slave */ + write_lock_bh(&bond->lock); } + /* zero the mac address of the master so it will be + * set by the application to the mac address of the + * first slave + */ + memset(master->dev_addr, 0, master->addr_len); + printk (KERN_INFO "%s: released all slaves\n", master->name); - return 0; +out: + write_unlock_bh(&bond->lock); + + return err; } /* this function is called regularly to monitor each slave's link. */ @@ -1632,10 +2141,9 @@ static void bond_mii_monitor(struct net_device *master) { bonding_t *bond = (struct bonding *) master->priv; slave_t *slave, *bestslave, *oldcurrent; - unsigned long flags; int slave_died = 0; - read_lock_irqsave(&bond->lock, flags); + read_lock(&bond->lock); /* we will try to read the link status of each of our slaves, and * set their IFF_RUNNING flag appropriately. For each slave not @@ -1655,6 +2163,8 @@ static void bond_mii_monitor(struct net_device *master) int mindelay = updelay + 1; struct net_device *dev = slave->dev; int link_state; + u16 old_speed = slave->speed; + u8 old_duplex = slave->duplex; link_state = bond_check_dev_link(dev, 0); @@ -1702,25 +2212,36 @@ static void bond_mii_monitor(struct net_device *master) /* link down for too long time */ slave->link = BOND_LINK_DOWN; /* in active/backup mode, we must - completely disable this interface */ - if (bond_mode == BOND_MODE_ACTIVEBACKUP) { + * completely disable this interface + */ + if ((bond_mode == BOND_MODE_ACTIVEBACKUP) || + (bond_mode == BOND_MODE_8023AD)) { bond_set_slave_inactive_flags(slave); } printk(KERN_INFO "%s: link status definitely down " - "for interface %s, disabling it", + "for interface %s, disabling it\n", master->name, dev->name); - read_lock(&bond->ptrlock); + /* notify ad that the link status has changed */ + if (bond_mode == BOND_MODE_8023AD) { + bond_3ad_handle_link_change(slave, BOND_LINK_DOWN); + } + + if ((bond_mode == BOND_MODE_TLB) || + (bond_mode == BOND_MODE_ALB)) { + bond_alb_handle_link_change(bond, slave, BOND_LINK_DOWN); + } + + write_lock(&bond->ptrlock); if (slave == bond->current_slave) { - read_unlock(&bond->ptrlock); /* find a new interface and be verbose */ change_active_interface(bond); } else { - read_unlock(&bond->ptrlock); printk(".\n"); } + write_unlock(&bond->ptrlock); slave_died = 1; } else { slave->delay--; @@ -1783,7 +2304,11 @@ static void bond_mii_monitor(struct net_device *master) slave->link = BOND_LINK_UP; slave->jiffies = jiffies; - if (bond_mode != BOND_MODE_ACTIVEBACKUP) { + if (bond_mode == BOND_MODE_8023AD) { + /* prevent it from being the active one */ + slave->state = BOND_STATE_BACKUP; + } + else if (bond_mode != BOND_MODE_ACTIVEBACKUP) { /* make it immediately active */ slave->state = BOND_STATE_ACTIVE; } else if (slave != bond->primary_slave) { @@ -1797,13 +2322,25 @@ static void bond_mii_monitor(struct net_device *master) master->name, dev->name); - if ( (bond->primary_slave != NULL) + /* notify ad that the link status has changed */ + if (bond_mode == BOND_MODE_8023AD) { + bond_3ad_handle_link_change(slave, BOND_LINK_UP); + } + + if ((bond_mode == BOND_MODE_TLB) || + (bond_mode == BOND_MODE_ALB)) { + bond_alb_handle_link_change(bond, slave, BOND_LINK_UP); + } + + write_lock(&bond->ptrlock); + if ( (bond->primary_slave != NULL) && (slave == bond->primary_slave) ) change_active_interface(bond); + write_unlock(&bond->ptrlock); } else slave->delay--; - + /* we'll also look for the mostly eligible slave */ if (bond->primary_slave == NULL) { if (IS_UP(dev) && (slave->delay < mindelay)) { @@ -1819,6 +2356,18 @@ static void bond_mii_monitor(struct net_device *master) } break; } /* end of switch */ + + bond_update_speed_duplex(slave); + + if (bond_mode == BOND_MODE_8023AD) { + if (old_speed != slave->speed) { + bond_3ad_adapter_speed_changed(slave); + } + if (old_duplex != slave->duplex) { + bond_3ad_adapter_duplex_changed(slave); + } + } + } /* end of while */ /* @@ -1846,16 +2395,26 @@ static void bond_mii_monitor(struct net_device *master) bestslave->delay = 0; bestslave->link = BOND_LINK_UP; bestslave->jiffies = jiffies; + + /* notify ad that the link status has changed */ + if (bond_mode == BOND_MODE_8023AD) { + bond_3ad_handle_link_change(bestslave, BOND_LINK_UP); + } + + if ((bond_mode == BOND_MODE_TLB) || + (bond_mode == BOND_MODE_ALB)) { + bond_alb_handle_link_change(bond, bestslave, BOND_LINK_UP); + } } if (bond_mode == BOND_MODE_ACTIVEBACKUP) { bond_set_slave_active_flags(bestslave); bond_mc_update(bond, bestslave, NULL); - } else { + } else if (bond_mode != BOND_MODE_8023AD) { bestslave->state = BOND_STATE_ACTIVE; } write_lock(&bond->ptrlock); - bond->current_slave = bestslave; + bond_assign_current_slave(bond, bestslave); write_unlock(&bond->ptrlock); } else if (slave_died) { /* print this message only once a slave has just died */ @@ -1865,7 +2424,7 @@ static void bond_mii_monitor(struct net_device *master) } } - read_unlock_irqrestore(&bond->lock, flags); + read_unlock(&bond->lock); /* re-arm the timer */ mod_timer(&bond->mii_timer, jiffies + (miimon * HZ / 1000)); } @@ -1880,7 +2439,6 @@ static void bond_mii_monitor(struct net_device *master) static void loadbalance_arp_monitor(struct net_device *master) { bonding_t *bond; - unsigned long flags; slave_t *slave; int the_delta_in_ticks = arp_interval * HZ / 1000; int next_timer = jiffies + (arp_interval * HZ / 1000); @@ -1891,24 +2449,22 @@ static void loadbalance_arp_monitor(struct net_device *master) return; } - read_lock_irqsave(&bond->lock, flags); - /* TODO: investigate why rtnl_shlock_nowait and rtnl_exlock_nowait * are called below and add comment why they are required... */ if ((!IS_UP(master)) || rtnl_shlock_nowait()) { mod_timer(&bond->arp_timer, next_timer); - read_unlock_irqrestore(&bond->lock, flags); return; } if (rtnl_exlock_nowait()) { rtnl_shunlock(); mod_timer(&bond->arp_timer, next_timer); - read_unlock_irqrestore(&bond->lock, flags); return; } + read_lock(&bond->lock); + /* see if any of the previous devices are up now (i.e. they have * xmt and rcv traffic). the current_slave does not come into * the picture unless it is null. also, slave->jiffies is not needed @@ -1935,9 +2491,8 @@ static void loadbalance_arp_monitor(struct net_device *master) * current_slave being null after enslaving * is closed. */ - read_lock(&bond->ptrlock); + write_lock(&bond->ptrlock); if (bond->current_slave == NULL) { - read_unlock(&bond->ptrlock); printk(KERN_INFO "%s: link status definitely up " "for interface %s, ", @@ -1945,12 +2500,12 @@ static void loadbalance_arp_monitor(struct net_device *master) slave->dev->name); change_active_interface(bond); } else { - read_unlock(&bond->ptrlock); printk(KERN_INFO "%s: interface %s is now up\n", master->name, slave->dev->name); } + write_unlock(&bond->ptrlock); } } else { /* slave->link == BOND_LINK_UP */ @@ -1973,13 +2528,11 @@ static void loadbalance_arp_monitor(struct net_device *master) master->name, slave->dev->name); - read_lock(&bond->ptrlock); + write_lock(&bond->ptrlock); if (slave == bond->current_slave) { - read_unlock(&bond->ptrlock); change_active_interface(bond); - } else { - read_unlock(&bond->ptrlock); } + write_unlock(&bond->ptrlock); } } @@ -1995,9 +2548,9 @@ static void loadbalance_arp_monitor(struct net_device *master) } } + read_unlock(&bond->lock); rtnl_exunlock(); rtnl_shunlock(); - read_unlock_irqrestore(&bond->lock, flags); /* re-arm the timer */ mod_timer(&bond->arp_timer, next_timer); @@ -2021,7 +2574,6 @@ static void loadbalance_arp_monitor(struct net_device *master) static void activebackup_arp_monitor(struct net_device *master) { bonding_t *bond; - unsigned long flags; slave_t *slave; int the_delta_in_ticks = arp_interval * HZ / 1000; int next_timer = jiffies + (arp_interval * HZ / 1000); @@ -2032,14 +2584,13 @@ static void activebackup_arp_monitor(struct net_device *master) return; } - read_lock_irqsave(&bond->lock, flags); - if (!IS_UP(master)) { mod_timer(&bond->arp_timer, next_timer); - read_unlock_irqrestore(&bond->lock, flags); return; } + read_lock(&bond->lock); + /* determine if any slave has come up or any backup slave has * gone down * TODO: what about up/down delay in arp mode? it wasn't here before @@ -2057,7 +2608,7 @@ static void activebackup_arp_monitor(struct net_device *master) if ((bond->current_slave == NULL) && ((jiffies - slave->dev->trans_start) <= the_delta_in_ticks)) { - bond->current_slave = slave; + bond_assign_current_slave(bond, slave); bond_set_slave_active_flags(slave); bond_mc_update(bond, slave, NULL); bond->current_arp_slave = NULL; @@ -2148,7 +2699,9 @@ static void activebackup_arp_monitor(struct net_device *master) "active interface %s, disabling it", master->name, slave->dev->name); + write_lock(&bond->ptrlock); slave = change_active_interface(bond); + write_unlock(&bond->ptrlock); bond->current_arp_slave = slave; if (slave != NULL) { slave->jiffies = jiffies; @@ -2169,7 +2722,7 @@ static void activebackup_arp_monitor(struct net_device *master) bond_set_slave_inactive_flags(slave); bond_mc_update(bond, bond->primary_slave, slave); write_lock(&bond->ptrlock); - bond->current_slave = bond->primary_slave; + bond_assign_current_slave(bond, bond->primary_slave); write_unlock(&bond->ptrlock); slave = bond->primary_slave; bond_set_slave_active_flags(slave); @@ -2240,8 +2793,8 @@ static void activebackup_arp_monitor(struct net_device *master) } } + read_unlock(&bond->lock); mod_timer(&bond->arp_timer, next_timer); - read_unlock_irqrestore(&bond->lock, flags); } typedef uint32_t in_addr_t; @@ -2337,17 +2890,16 @@ static int bond_info_query(struct net_device *master, struct ifbond *info) { bonding_t *bond = (struct bonding *) master->priv; slave_t *slave; - unsigned long flags; info->bond_mode = bond_mode; info->num_slaves = 0; info->miimon = miimon; - read_lock_irqsave(&bond->lock, flags); + read_lock_bh(&bond->lock); for (slave = bond->prev; slave != (slave_t *)bond; slave = slave->prev) { info->num_slaves++; } - read_unlock_irqrestore(&bond->lock, flags); + read_unlock_bh(&bond->lock); return 0; } @@ -2358,19 +2910,18 @@ static int bond_slave_info_query(struct net_device *master, bonding_t *bond = (struct bonding *) master->priv; slave_t *slave; int cur_ndx = 0; - unsigned long flags; if (info->slave_id < 0) { return -ENODEV; } - read_lock_irqsave(&bond->lock, flags); + read_lock_bh(&bond->lock); for (slave = bond->prev; slave != (slave_t *)bond && cur_ndx < info->slave_id; slave = slave->prev) { cur_ndx++; } - read_unlock_irqrestore(&bond->lock, flags); + read_unlock_bh(&bond->lock); if (slave != (slave_t *)bond) { strcpy(info->slave_name, slave->dev->name); @@ -2384,6 +2935,59 @@ static int bond_slave_info_query(struct net_device *master, return 0; } +static int bond_ethtool_ioctl(struct net_device *master_dev, struct ifreq *ifr) +{ + void *addr = ifr->ifr_data; + uint32_t cmd; + + if (get_user(cmd, (uint32_t *) addr)) + return -EFAULT; + + switch (cmd) { + + case ETHTOOL_GDRVINFO: + { + struct ethtool_drvinfo info; + char *endptr; + + if (copy_from_user(&info, addr, sizeof(info))) + return -EFAULT; + + if (strcmp(info.driver, "ifenslave") == 0) { + int new_abi_ver; + + new_abi_ver = simple_strtoul(info.fw_version, + &endptr, 0); + if (*endptr) { + printk(KERN_ERR + "bonding: Error: got invalid ABI" + " version from application\n"); + + return -EINVAL; + } + + if (orig_app_abi_ver == -1) { + orig_app_abi_ver = new_abi_ver; + } + + app_abi_ver = new_abi_ver; + } + + strncpy(info.driver, DRV_NAME, 32); + strncpy(info.version, DRV_VERSION, 32); + snprintf(info.fw_version, 32, "%d", BOND_ABI_VERSION); + + if (copy_to_user(addr, &info, sizeof(info))) + return -EFAULT; + + return 0; + } + break; + default: + return -EOPNOTSUPP; + } +} + static int bond_ioctl(struct net_device *master_dev, struct ifreq *ifr, int cmd) { struct net_device *slave_dev = NULL; @@ -2398,6 +3002,9 @@ static int bond_ioctl(struct net_device *master_dev, struct ifreq *ifr, int cmd) #endif switch (cmd) { + case SIOCETHTOOL: + return bond_ethtool_ioctl(master_dev, ifr); + case SIOCGMIIPHY: mii = (struct mii_ioctl_data *)&ifr->ifr_data; if (mii == NULL) { @@ -2451,6 +3058,21 @@ static int bond_ioctl(struct net_device *master_dev, struct ifreq *ifr, int cmd) return -EPERM; } + if (orig_app_abi_ver == -1) { + /* no orig_app_abi_ver was provided yet, so we'll use the + * current one from now on, even if it's 0 + */ + orig_app_abi_ver = app_abi_ver; + + } else if (orig_app_abi_ver != app_abi_ver) { + printk(KERN_ERR + "bonding: Error: already using ifenslave ABI " + "version %d; to upgrade ifenslave to version %d," + "you must first reload bonding.\n", + orig_app_abi_ver, app_abi_ver); + return -EINVAL; + } + slave_dev = dev_get_by_name(ifr->ifr_slave); #ifdef BONDING_DEBUG @@ -2476,7 +3098,9 @@ static int bond_ioctl(struct net_device *master_dev, struct ifreq *ifr, int cmd) break; case BOND_CHANGE_ACTIVE_OLD: case SIOCBONDCHANGEACTIVE: - if (bond_mode == BOND_MODE_ACTIVEBACKUP) { + if ((bond_mode == BOND_MODE_ACTIVEBACKUP) || + (bond_mode == BOND_MODE_TLB) || + (bond_mode == BOND_MODE_ALB)) { ret = bond_change_active(master_dev, slave_dev); } else { @@ -2505,7 +3129,6 @@ static int bond_xmit_broadcast(struct sk_buff *skb, struct net_device *dev) { slave_t *slave, *start_at; struct bonding *bond = (struct bonding *) dev->priv; - unsigned long flags; struct net_device *device_we_should_send_to = 0; if (!IS_UP(dev)) { /* bond down */ @@ -2513,7 +3136,7 @@ static int bond_xmit_broadcast(struct sk_buff *skb, struct net_device *dev) return 0; } - read_lock_irqsave(&bond->lock, flags); + read_lock(&bond->lock); read_lock(&bond->ptrlock); slave = start_at = bond->current_slave; @@ -2521,7 +3144,7 @@ static int bond_xmit_broadcast(struct sk_buff *skb, struct net_device *dev) if (slave == NULL) { /* we're at the root, get the first slave */ /* no suitable interface, frame not sent */ - read_unlock_irqrestore(&bond->lock, flags); + read_unlock(&bond->lock); dev_kfree_skb(skb); return 0; } @@ -2553,7 +3176,7 @@ static int bond_xmit_broadcast(struct sk_buff *skb, struct net_device *dev) dev_kfree_skb(skb); /* frame sent to all suitable interfaces */ - read_unlock_irqrestore(&bond->lock, flags); + read_unlock(&bond->lock); return 0; } @@ -2561,14 +3184,13 @@ static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *dev) { slave_t *slave, *start_at; struct bonding *bond = (struct bonding *) dev->priv; - unsigned long flags; if (!IS_UP(dev)) { /* bond down */ dev_kfree_skb(skb); return 0; } - read_lock_irqsave(&bond->lock, flags); + read_lock(&bond->lock); read_lock(&bond->ptrlock); slave = start_at = bond->current_slave; @@ -2577,7 +3199,7 @@ static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *dev) if (slave == NULL) { /* we're at the root, get the first slave */ /* no suitable interface, frame not sent */ dev_kfree_skb(skb); - read_unlock_irqrestore(&bond->lock, flags); + read_unlock(&bond->lock); return 0; } @@ -2591,30 +3213,29 @@ static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *dev) dev_queue_xmit(skb); write_lock(&bond->ptrlock); - bond->current_slave = slave->next; + bond_assign_current_slave(bond, slave->next); write_unlock(&bond->ptrlock); - read_unlock_irqrestore(&bond->lock, flags); + read_unlock(&bond->lock); return 0; } } while ((slave = slave->next) != start_at); /* no suitable interface, frame not sent */ dev_kfree_skb(skb); - read_unlock_irqrestore(&bond->lock, flags); + read_unlock(&bond->lock); return 0; } /* * in XOR mode, we determine the output device by performing xor on - * the source and destination hw addresses. If this device is not + * the source and destination hw adresses. If this device is not * enabled, find the next slave following this xor slave. */ static int bond_xmit_xor(struct sk_buff *skb, struct net_device *dev) { slave_t *slave, *start_at; struct bonding *bond = (struct bonding *) dev->priv; - unsigned long flags; struct ethhdr *data = (struct ethhdr *)skb->data; int slave_no; @@ -2623,14 +3244,14 @@ static int bond_xmit_xor(struct sk_buff *skb, struct net_device *dev) return 0; } - read_lock_irqsave(&bond->lock, flags); + read_lock(&bond->lock); slave = bond->prev; /* we're at the root, get the first slave */ if (bond->slave_cnt == 0) { /* no suitable interface, frame not sent */ dev_kfree_skb(skb); - read_unlock_irqrestore(&bond->lock, flags); + read_unlock(&bond->lock); return 0; } @@ -2651,14 +3272,14 @@ static int bond_xmit_xor(struct sk_buff *skb, struct net_device *dev) skb->priority = 1; dev_queue_xmit(skb); - read_unlock_irqrestore(&bond->lock, flags); + read_unlock(&bond->lock); return 0; } } while ((slave = slave->next) != start_at); /* no suitable interface, frame not sent */ dev_kfree_skb(skb); - read_unlock_irqrestore(&bond->lock, flags); + read_unlock(&bond->lock); return 0; } @@ -2669,7 +3290,6 @@ static int bond_xmit_xor(struct sk_buff *skb, struct net_device *dev) static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *dev) { struct bonding *bond = (struct bonding *) dev->priv; - unsigned long flags; int ret; if (!IS_UP(dev)) { /* bond down */ @@ -2710,7 +3330,7 @@ static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *dev) } } - read_lock_irqsave(&bond->lock, flags); + read_lock(&bond->lock); read_lock(&bond->ptrlock); if (bond->current_slave != NULL) { /* one usable interface */ @@ -2718,7 +3338,7 @@ static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *dev) read_unlock(&bond->ptrlock); skb->priority = 1; ret = dev_queue_xmit(skb); - read_unlock_irqrestore(&bond->lock, flags); + read_unlock(&bond->lock); return 0; } else { @@ -2730,7 +3350,7 @@ static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *dev) printk(KERN_INFO "There was no suitable interface, so we don't transmit\n"); #endif dev_kfree_skb(skb); - read_unlock_irqrestore(&bond->lock, flags); + read_unlock(&bond->lock); return 0; } @@ -2739,11 +3359,10 @@ static struct net_device_stats *bond_get_stats(struct net_device *dev) bonding_t *bond = dev->priv; struct net_device_stats *stats = bond->stats, *sstats; slave_t *slave; - unsigned long flags; memset(bond->stats, 0, sizeof(struct net_device_stats)); - read_lock_irqsave(&bond->lock, flags); + read_lock_bh(&bond->lock); for (slave = bond->prev; slave != (slave_t *)bond; slave = slave->prev) { sstats = slave->dev->get_stats(slave->dev); @@ -2776,7 +3395,7 @@ static struct net_device_stats *bond_get_stats(struct net_device *dev) } - read_unlock_irqrestore(&bond->lock, flags); + read_unlock_bh(&bond->lock); return stats; } @@ -2787,7 +3406,8 @@ static int bond_get_info(char *buf, char **start, off_t offset, int length) off_t begin = 0; u16 link; slave_t *slave = NULL; - unsigned long flags; + + len += sprintf(buf + len, "%s\n", version); while (bond != NULL) { /* @@ -2799,8 +3419,10 @@ static int bond_get_info(char *buf, char **start, off_t offset, int length) len += sprintf(buf + len, "Bonding Mode: %s\n", bond_mode_name()); - if (bond_mode == BOND_MODE_ACTIVEBACKUP) { - read_lock_irqsave(&bond->lock, flags); + if ((bond_mode == BOND_MODE_ACTIVEBACKUP) || + (bond_mode == BOND_MODE_TLB) || + (bond_mode == BOND_MODE_ALB)) { + read_lock_bh(&bond->lock); read_lock(&bond->ptrlock); if (bond->current_slave != NULL) { len += sprintf(buf + len, @@ -2808,7 +3430,7 @@ static int bond_get_info(char *buf, char **start, off_t offset, int length) bond->current_slave->dev->name); } read_unlock(&bond->ptrlock); - read_unlock_irqrestore(&bond->lock, flags); + read_unlock_bh(&bond->lock); } len += sprintf(buf + len, "MII Status: "); @@ -2823,7 +3445,32 @@ static int bond_get_info(char *buf, char **start, off_t offset, int length) len += sprintf(buf + len, "Multicast Mode: %s\n", multicast_mode_name()); - read_lock_irqsave(&bond->lock, flags); + read_lock_bh(&bond->lock); + + if (bond_mode == BOND_MODE_8023AD) { + struct ad_info ad_info; + + len += sprintf(buf + len, "\n802.3ad info\n"); + + if (bond_3ad_get_active_agg_info(bond, &ad_info)) { + len += sprintf(buf + len, "bond %s has no active aggregator\n", bond->device->name); + } else { + len += sprintf(buf + len, "Active Aggregator Info:\n"); + + len += sprintf(buf + len, "\tAggregator ID: %d\n", ad_info.aggregator_id); + len += sprintf(buf + len, "\tNumber of ports: %d\n", ad_info.ports); + len += sprintf(buf + len, "\tActor Key: %d\n", ad_info.actor_key); + len += sprintf(buf + len, "\tPartner Key: %d\n", ad_info.partner_key); + len += sprintf(buf + len, "\tPartner Mac Address: %02x:%02x:%02x:%02x:%02x:%02x\n", + ad_info.partner_system[0], + ad_info.partner_system[1], + ad_info.partner_system[2], + ad_info.partner_system[3], + ad_info.partner_system[4], + ad_info.partner_system[5]); + } + } + for (slave = bond->prev; slave != (slave_t *)bond; slave = slave->prev) { len += sprintf(buf + len, "\nSlave Interface: %s\n", slave->dev->name); @@ -2835,8 +3482,30 @@ static int bond_get_info(char *buf, char **start, off_t offset, int length) "up\n" : "down\n"); len += sprintf(buf + len, "Link Failure Count: %d\n", slave->link_failure_count); + + if (app_abi_ver >= 1) { + len += sprintf(buf + len, + "Permanent HW addr: %02x:%02x:%02x:%02x:%02x:%02x\n", + slave->perm_hwaddr[0], + slave->perm_hwaddr[1], + slave->perm_hwaddr[2], + slave->perm_hwaddr[3], + slave->perm_hwaddr[4], + slave->perm_hwaddr[5]); + } + + if (bond_mode == BOND_MODE_8023AD) { + struct aggregator *agg = SLAVE_AD_INFO(slave).port.aggregator; + + if (agg) { + len += sprintf(buf + len, "Aggregator ID: %d\n", + agg->aggregator_identifier); + } else { + len += sprintf(buf + len, "Aggregator ID: N/A\n"); + } + } } - read_unlock_irqrestore(&bond->lock, flags); + read_unlock_bh(&bond->lock); /* * Figure out the calcs for the /proc/net interface @@ -2904,7 +3573,7 @@ static int bond_event(struct notifier_block *this, unsigned long event, } static struct notifier_block bond_netdev_notifier = { - .notifier_call = bond_event, + notifier_call: bond_event, }; static int __init bond_init(struct net_device *dev) @@ -2952,6 +3621,13 @@ static int __init bond_init(struct net_device *dev) case BOND_MODE_BROADCAST: dev->hard_start_xmit = bond_xmit_broadcast; break; + case BOND_MODE_8023AD: + dev->hard_start_xmit = bond_3ad_xmit_xor; + break; + case BOND_MODE_TLB: + case BOND_MODE_ALB: + dev->hard_start_xmit = bond_alb_xmit; + break; default: printk(KERN_ERR "Unknown bonding mode %d\n", bond_mode); kfree(bond->stats); @@ -3101,6 +3777,24 @@ static int __init bonding_init(void) } } + if (lacp_rate) { + if (bond_mode != BOND_MODE_8023AD) { + printk(KERN_WARNING + "lacp_rate param is irrelevant in mode %s\n", + bond_mode_name()); + } else { + lacp_fast = bond_parse_parm(lacp_rate, bond_lacp_tbl); + if (lacp_fast == -1) { + printk(KERN_WARNING + "bonding_init(): Invalid lacp rate " + "\"%s\"\n", + lacp_rate == NULL ? "NULL" : lacp_rate); + + return -EINVAL; + } + } + } + if (max_bonds < 1 || max_bonds > INT_MAX) { printk(KERN_WARNING "bonding_init(): max_bonds (%d) not in range %d-%d, " @@ -3139,6 +3833,64 @@ static int __init bonding_init(void) downdelay = 0; } + /* reset values for 802.3ad */ + if (bond_mode == BOND_MODE_8023AD) { + if (arp_interval != 0) { + printk(KERN_WARNING "bonding_init(): ARP monitoring" + "can't be used simultaneously with 802.3ad, " + "disabling ARP monitoring\n"); + arp_interval = 0; + } + + if (miimon == 0) { + printk(KERN_ERR + "bonding_init(): miimon must be specified, " + "otherwise bonding will not detect link failure, " + "speed and duplex which are essential " + "for 802.3ad operation\n"); + printk(KERN_ERR "Forcing miimon to 100msec\n"); + miimon = 100; + } + + if (multicast_mode != BOND_MULTICAST_ALL) { + printk(KERN_ERR + "bonding_init(): Multicast mode must " + "be set to ALL for 802.3ad\n"); + printk(KERN_ERR "Forcing Multicast mode to ALL\n"); + multicast_mode = BOND_MULTICAST_ALL; + } + } + + /* reset values for TLB/ALB */ + if ((bond_mode == BOND_MODE_TLB) || + (bond_mode == BOND_MODE_ALB)) { + if (miimon == 0) { + printk(KERN_ERR + "bonding_init(): miimon must be specified, " + "otherwise bonding will not detect link failure " + "and link speed which are essential " + "for TLB/ALB load balancing\n"); + printk(KERN_ERR "Forcing miimon to 100msec\n"); + miimon = 100; + } + + if (multicast_mode != BOND_MULTICAST_ACTIVE) { + printk(KERN_ERR + "bonding_init(): Multicast mode must " + "be set to ACTIVE for TLB/ALB\n"); + printk(KERN_ERR "Forcing Multicast mode to ACTIVE\n"); + multicast_mode = BOND_MULTICAST_ACTIVE; + } + } + + if (bond_mode == BOND_MODE_ALB) { + printk(KERN_INFO + "In ALB mode you might experience client disconnections" + " upon reconnection of a link if the bonding module" + " updelay parameter (%d msec) is incompatible with the" + " forwarding delay time of the switch\n", updelay); + } + if (miimon == 0) { if ((updelay != 0) || (downdelay != 0)) { /* just warn the user the up/down delay will have @@ -3230,13 +3982,15 @@ static int __init bonding_init(void) "link failures! see bonding.txt for details.\n"); } - if ((primary != NULL) && (bond_mode != BOND_MODE_ACTIVEBACKUP)){ - /* currently, using a primary only makes sence - * in active backup mode + if ((primary != NULL) && (bond_mode != BOND_MODE_ACTIVEBACKUP) && + (bond_mode != BOND_MODE_TLB) && + (bond_mode != BOND_MODE_ALB)){ + /* currently, using a primary only makes sense + * in active backup, TLB or ALB modes */ printk(KERN_WARNING "bonding_init(): %s primary device specified but has " - " no effect in %s mode\n", + "no effect in %s mode\n", primary, bond_mode_name()); primary = NULL; } diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h new file mode 100644 index 000000000000..6388d78abeea --- /dev/null +++ b/drivers/net/bonding/bonding.h @@ -0,0 +1,181 @@ +/* + * Bond several ethernet interfaces into a Cisco, running 'Etherchannel'. + * + * Portions are (c) Copyright 1995 Simon "Guru Aleph-Null" Janes + * NCM: Network and Communications Management, Inc. + * + * BUT, I'm the one who modified it for ethernet, so: + * (c) Copyright 1999, Thomas Davis, tadavis@lbl.gov + * + * This software may be used and distributed according to the terms + * of the GNU Public License, incorporated herein by reference. + * + * + * 2003/03/18 - Amir Noam <amir.noam at intel dot com>, + * Tsippy Mendelson <tsippy.mendelson at intel dot com> and + * Shmulik Hen <shmulik.hen at intel dot com> + * - Added support for IEEE 802.3ad Dynamic link aggregation mode. + * + * 2003/05/01 - Tsippy Mendelson <tsippy.mendelson at intel dot com> and + * Amir Noam <amir.noam at intel dot com> + * - Code beautification and style changes (mainly in comments). + * + * 2003/05/01 - Shmulik Hen <shmulik.hen at intel dot com> + * - Added support for Transmit load balancing mode. + */ + +#ifndef _LINUX_BONDING_H +#define _LINUX_BONDING_H + +#include <linux/timer.h> +#include <linux/proc_fs.h> +#include "bond_3ad.h" +#include "bond_alb.h" + +#ifdef BONDING_DEBUG + +// use this like so: BOND_PRINT_DBG(("foo = %d, bar = %d", foo, bar)); +#define BOND_PRINT_DBG(X) \ +do { \ + printk(KERN_DEBUG "%s (%d)", __FUNCTION__, __LINE__); \ + printk X; \ + printk("\n"); \ +} while(0) + +#else +#define BOND_PRINT_DBG(X) +#endif /* BONDING_DEBUG */ + +#define IS_UP(dev) ((((dev)->flags & (IFF_UP)) == (IFF_UP)) && \ + (netif_running(dev) && netif_carrier_ok(dev))) + +/* Checks whether the dev is ready for transmit. We do not check netif_running + * since a device can be stopped by the driver for short periods of time for + * maintainance. dev_queue_xmit() handles this by queing the packet until the + * the dev is running again. Keeping packets ordering requires sticking the + * same dev as much as possible + */ +#define SLAVE_IS_OK(slave) \ + ((((slave)->dev->flags & (IFF_UP)) == (IFF_UP)) && \ + netif_carrier_ok((slave)->dev) && \ + ((slave)->link == BOND_LINK_UP) && \ + ((slave)->state == BOND_STATE_ACTIVE)) + + +typedef struct slave { + struct slave *next; + struct slave *prev; + struct net_device *dev; + short delay; + unsigned long jiffies; + char link; /* one of BOND_LINK_XXXX */ + char state; /* one of BOND_STATE_XXXX */ + unsigned short original_flags; + u32 link_failure_count; + u16 speed; + u8 duplex; + u8 perm_hwaddr[ETH_ALEN]; + struct ad_slave_info ad_info; /* HUGE - better to dynamically alloc */ + struct tlb_slave_info tlb_info; +} slave_t; + +/* + * Here are the locking policies for the two bonding locks: + * + * 1) Get bond->lock when reading/writing slave list. + * 2) Get bond->ptrlock when reading/writing bond->current_slave. + * (It is unnecessary when the write-lock is put with bond->lock.) + * 3) When we lock with bond->ptrlock, we must lock with bond->lock + * beforehand. + */ +typedef struct bonding { + slave_t *next; + slave_t *prev; + slave_t *current_slave; + slave_t *primary_slave; + slave_t *current_arp_slave; + __s32 slave_cnt; + rwlock_t lock; + rwlock_t ptrlock; + struct timer_list mii_timer; + struct timer_list arp_timer; + struct net_device_stats *stats; +#ifdef CONFIG_PROC_FS + struct proc_dir_entry *bond_proc_dir; + struct proc_dir_entry *bond_proc_info_file; +#endif /* CONFIG_PROC_FS */ + struct bonding *next_bond; + struct net_device *device; + struct dev_mc_list *mc_list; + unsigned short flags; + struct ad_bond_info ad_info; + struct alb_bond_info alb_info; +} bonding_t; + +/* Forward declarations */ +void bond_set_slave_active_flags(slave_t *slave); +void bond_set_slave_inactive_flags(slave_t *slave); + +/** + * These functions can be used for iterating the slave list + * (which is circular) + * Caller must hold bond lock for read + */ +extern inline struct slave* +bond_get_first_slave(struct bonding *bond) +{ + /* if there are no slaves return NULL */ + if (bond->next == (slave_t *)bond) { + return NULL; + } + return bond->next; +} + +/** + * Caller must hold bond lock for read + */ +extern inline struct slave* +bond_get_next_slave(struct bonding *bond, struct slave *slave) +{ + /* If we have reached the last slave return NULL */ + if (slave->next == bond->next) { + return NULL; + } + return slave->next; +} + +/** + * Returns NULL if the net_device does not belong to any of the bond's slaves + * + * Caller must hold bond lock for read + */ +extern inline struct slave* +bond_get_slave_by_dev(struct bonding *bond, struct net_device *slave_dev) +{ + struct slave *our_slave = bond->next; + + /* check if the list of slaves is empty */ + if (our_slave == (slave_t *)bond) { + return NULL; + } + + for (; our_slave; our_slave = bond_get_next_slave(bond, our_slave)) { + if (our_slave->dev == slave_dev) { + break; + } + } + return our_slave; +} + +extern inline struct bonding* +bond_get_bond_by_slave(struct slave *slave) +{ + if (!slave || !slave->dev->master) { + return NULL; + } + + return (struct bonding *)(slave->dev->master->priv); +} + +#endif /* _LINUX_BONDING_H */ + diff --git a/drivers/net/cs89x0.h b/drivers/net/cs89x0.h index 39a07ee1657e..f1507b549dcb 100644 --- a/drivers/net/cs89x0.h +++ b/drivers/net/cs89x0.h @@ -385,11 +385,11 @@ #define A_CNF_10B_T 0x0001 #define A_CNF_AUI 0x0002 #define A_CNF_10B_2 0x0004 -#define A_CNF_MEDIA_TYPE 0x0060 -#define A_CNF_MEDIA_AUTO 0x0000 +#define A_CNF_MEDIA_TYPE 0x0070 +#define A_CNF_MEDIA_AUTO 0x0070 #define A_CNF_MEDIA_10B_T 0x0020 #define A_CNF_MEDIA_AUI 0x0040 -#define A_CNF_MEDIA_10B_2 0x0060 +#define A_CNF_MEDIA_10B_2 0x0010 #define A_CNF_DC_DC_POLARITY 0x0080 #define A_CNF_NO_AUTO_POLARITY 0x2000 #define A_CNF_LOW_RX_SQUELCH 0x4000 diff --git a/drivers/net/fec.h b/drivers/net/fec.h index e8f04e8d2474..3bdb0d78b75a 100644 --- a/drivers/net/fec.h +++ b/drivers/net/fec.h @@ -1,9 +1,10 @@ /****************************************************************************/ /* - * fec.h -- Fast Ethernet Controller for Motorola ColdFire 5272. + * fec.h -- Fast Ethernet Controller for Motorola ColdFire 5272 + * and 5282.. * - * (C) Copyright 2000-2001, Greg Ungerer (gerg@snapgear.com) + * (C) Copyright 2000-2003, Greg Ungerer (gerg@snapgear.com) * (C) Copyright 2000-2001, Lineo (www.lineo.com) */ @@ -12,6 +13,52 @@ #define FEC_H /****************************************************************************/ +#ifdef CONFIG_M5282 +/* + * Just figures, Motorola would have to change the offsets for + * registers in the same peripheral device on different models + * of the ColdFire! + */ +typedef struct fec { + unsigned long fec_reserved0; + unsigned long fec_ievent; /* Interrupt event reg */ + unsigned long fec_imask; /* Interrupt mask reg */ + unsigned long fec_reserved1; + unsigned long fec_r_des_active; /* Receive descriptor reg */ + unsigned long fec_x_des_active; /* Transmit descriptor reg */ + unsigned long fec_reserved2[3]; + unsigned long fec_ecntrl; /* Ethernet control reg */ + unsigned long fec_reserved3[6]; + unsigned long fec_mii_data; /* MII manage frame reg */ + unsigned long fec_mii_speed; /* MII speed control reg */ + unsigned long fec_reserved4[7]; + unsigned long fec_mib_ctrlstat; /* MIB control/status reg */ + unsigned long fec_reserved5[7]; + unsigned long fec_r_cntrl; /* Receive control reg */ + unsigned long fec_reserved6[15]; + unsigned long fec_x_cntrl; /* Transmit Control reg */ + unsigned long fec_reserved7[7]; + unsigned long fec_addr_low; /* Low 32bits MAC address */ + unsigned long fec_addr_high; /* High 16bits MAC address */ + unsigned long fec_opd; /* Opcode + Pause duration */ + unsigned long fec_reserved8[10]; + unsigned long fec_hash_table_high; /* High 32bits hash table */ + unsigned long fec_hash_table_low; /* Low 32bits hash table */ + unsigned long fec_grp_hash_table_high;/* High 32bits hash table */ + unsigned long fec_grp_hash_table_low; /* Low 32bits hash table */ + unsigned long fec_reserved9[7]; + unsigned long fec_x_wmrk; /* FIFO transmit water mark */ + unsigned long fec_reserved10; + unsigned long fec_r_bound; /* FIFO receive bound reg */ + unsigned long fec_r_fstart; /* FIFO receive start reg */ + unsigned long fec_reserved11[11]; + unsigned long fec_r_des_start; /* Receive descriptor ring */ + unsigned long fec_x_des_start; /* Transmit descriptor ring */ + unsigned long fec_r_buff_size; /* Maximum receive buff size */ +} fec_t; + +#else + /* * Define device register set address map. */ @@ -49,6 +96,8 @@ typedef struct fec { unsigned long fec_fifo_ram[112]; /* FIFO RAM buffer */ } fec_t; +#endif /* CONFIG_M5272 */ + /* * Define the buffer descriptor structure. diff --git a/drivers/net/hamachi.c b/drivers/net/hamachi.c index 93b6f7f4d4c6..6f6c735325d5 100644 --- a/drivers/net/hamachi.c +++ b/drivers/net/hamachi.c @@ -207,8 +207,10 @@ KERN_INFO " Further modifications by Keith Underwood <keithu@parl.clemson.edu> /* Condensed bus+endian portability operations. */ #if ADDRLEN == 64 #define cpu_to_leXX(addr) cpu_to_le64(addr) +#define desc_to_virt(addr) bus_to_virt(le64_to_cpu(addr)) #else #define cpu_to_leXX(addr) cpu_to_le32(addr) +#define desc_to_virt(addr) bus_to_virt(le32_to_cpu(addr)) #endif @@ -1498,7 +1500,7 @@ static int hamachi_rx(struct net_device *dev) break; pci_dma_sync_single(hmp->pci_dev, desc->addr, hmp->rx_buf_sz, PCI_DMA_FROMDEVICE); - buf_addr = (u8 *)hmp->rx_ring + entry*sizeof(*desc); + buf_addr = desc_to_virt(desc->addr); frame_status = le32_to_cpu(get_unaligned((s32*)&(buf_addr[data_size - 12]))); if (hamachi_debug > 4) printk(KERN_DEBUG " hamachi_rx() status was %8.8x.\n", diff --git a/drivers/net/tlan.c b/drivers/net/tlan.c index ad5646db78fd..36a44fe86dc8 100644 --- a/drivers/net/tlan.c +++ b/drivers/net/tlan.c @@ -1537,7 +1537,7 @@ u32 TLan_HandleRxEOF( struct net_device *dev, u16 host_int ) t = (void *) skb_put( new_skb, TLAN_MAX_FRAME_SIZE ); head_list->buffer[0].address = pci_map_single(priv->pciDev, new_skb->data, TLAN_MAX_FRAME_SIZE, PCI_DMA_FROMDEVICE); head_list->buffer[8].address = (u32) t; -#ifdef __LP64__ +#if BITS_PER_LONG==64 #error "Not 64bit clean" #endif head_list->buffer[9].address = (u32) new_skb; diff --git a/drivers/net/tulip/de4x5.c b/drivers/net/tulip/de4x5.c index 8a74d3280e73..d891b0500e46 100644 --- a/drivers/net/tulip/de4x5.c +++ b/drivers/net/tulip/de4x5.c @@ -2026,8 +2026,6 @@ set_multicast_list(struct net_device *dev) dev->trans_start = jiffies; } } - - return IRQ_HANDLED; } /* diff --git a/drivers/net/tulip/interrupt.c b/drivers/net/tulip/interrupt.c index 694692b6393d..6067504eff90 100644 --- a/drivers/net/tulip/interrupt.c +++ b/drivers/net/tulip/interrupt.c @@ -197,7 +197,7 @@ static int tulip_rx(struct net_device *dev) "do not match in tulip_rx: %08x vs. %Lx %p / %p.\n", dev->name, le32_to_cpu(tp->rx_ring[entry].buffer1), - (u64)tp->rx_buffers[entry].mapping, + (long long)tp->rx_buffers[entry].mapping, skb->head, temp); } #endif diff --git a/drivers/net/tulip/tulip.h b/drivers/net/tulip/tulip.h index 9f4289bd3a17..49c60dca3f51 100644 --- a/drivers/net/tulip/tulip.h +++ b/drivers/net/tulip/tulip.h @@ -193,16 +193,6 @@ enum desc_status_bits { }; -enum t21041_csr13_bits { - csr13_eng = (0xEF0<<4), /* for eng. purposes only, hardcode at EF0h */ - csr13_aui = (1<<3), /* clear to force 10bT, set to force AUI/BNC */ - csr13_cac = (1<<2), /* CSR13/14/15 autoconfiguration */ - csr13_srl = (1<<0), /* When reset, resets all SIA functions, machines */ - - csr13_mask_auibnc = (csr13_eng | csr13_aui | csr13_srl), - csr13_mask_10bt = (csr13_eng | csr13_srl), -}; - enum t21143_csr6_bits { csr6_sc = (1<<31), csr6_ra = (1<<30), @@ -449,9 +439,6 @@ extern const char * const medianame[]; extern const char tulip_media_cap[]; extern struct tulip_chip_table tulip_tbl[]; extern u8 t21040_csr13[]; -extern u16 t21041_csr13[]; -extern u16 t21041_csr14[]; -extern u16 t21041_csr15[]; #ifndef USE_IO_OPS #undef inb diff --git a/drivers/net/tulip/xircom_cb.c b/drivers/net/tulip/xircom_cb.c index 6aed1c21e642..adc00edc3e0d 100644 --- a/drivers/net/tulip/xircom_cb.c +++ b/drivers/net/tulip/xircom_cb.c @@ -243,38 +243,29 @@ static int __devinit xircom_probe(struct pci_dev *pdev, const struct pci_device_ return -ENODEV; } - /* Before changing the hardware, allocate the memory. This way, we can fail gracefully if not enough memory is available. */ - private = kmalloc(sizeof(*private),GFP_KERNEL); - memset(private, 0, sizeof(struct xircom_private)); + if ((dev = init_etherdev(NULL, sizeof(struct xircom_private))) == NULL) { + printk(KERN_ERR "xircom_probe: failed to allocate etherdev\n"); + goto device_fail; + } + private = dev->priv; /* Allocate the send/receive buffers */ private->rx_buffer = pci_alloc_consistent(pdev,8192,&private->rx_dma_handle); - if (private->rx_buffer == NULL) { printk(KERN_ERR "xircom_probe: no memory for rx buffer \n"); - kfree(private); - return -ENODEV; + goto rx_buf_fail; } private->tx_buffer = pci_alloc_consistent(pdev,8192,&private->tx_dma_handle); if (private->tx_buffer == NULL) { printk(KERN_ERR "xircom_probe: no memory for tx buffer \n"); - kfree(private->rx_buffer); - kfree(private); - return -ENODEV; - } - dev = init_etherdev(dev, 0); - if (dev == NULL) { - printk(KERN_ERR "xircom_probe: failed to allocate etherdev\n"); - kfree(private->rx_buffer); - kfree(private->tx_buffer); - kfree(private); - return -ENODEV; + goto tx_buf_fail; } + SET_MODULE_OWNER(dev); SET_NETDEV_DEV(dev, &pdev->dev); printk(KERN_INFO "%s: Xircom cardbus revision %i at irq %i \n", dev->name, chip_rev, pdev->irq); @@ -305,14 +296,21 @@ static int __devinit xircom_probe(struct pci_dev *pdev, const struct pci_device_ transceiver_voodoo(private); spin_lock_irqsave(&private->lock,flags); - activate_transmitter(private); - activate_receiver(private); + activate_transmitter(private); + activate_receiver(private); spin_unlock_irqrestore(&private->lock,flags); trigger_receive(private); leave("xircom_probe"); return 0; + +tx_buf_fail: + kfree(private->rx_buffer); +rx_buf_fail: + kfree(dev); +device_fail: + return -ENODEV; } @@ -337,7 +335,6 @@ static void __devexit xircom_remove(struct pci_dev *pdev) pci_free_consistent(pdev,8192,card->tx_buffer,card->tx_dma_handle); card->tx_buffer = NULL; } - kfree(card); } release_region(dev->base_addr, 128); unregister_netdev(dev); diff --git a/drivers/net/wireless/netwave_cs.c b/drivers/net/wireless/netwave_cs.c index 29fdf60fb5d6..63b97aa63877 100644 --- a/drivers/net/wireless/netwave_cs.c +++ b/drivers/net/wireless/netwave_cs.c @@ -1743,31 +1743,28 @@ static int netwave_close(struct net_device *dev) { return 0; } -static int __init init_netwave_cs(void) { - servinfo_t serv; - - DEBUG(0, "%s\n", version); +static struct pcmcia_driver netwave_driver = { + .owner = THIS_MODULE, + .drv = { + .name = "netwave_cs", + }, + .attach = netwave_attach, + .detach = netwave_detach, +}; - CardServices(GetCardServicesInfo, &serv); - if (serv.Revision != CS_RELEASE_CODE) { - printk("netwave_cs: Card Services release does not match!\n"); - return -1; - } - - register_pccard_driver(&dev_info, &netwave_attach, &netwave_detach); - - return 0; +static int __init init_netwave_cs(void) +{ + return pcmcia_register_driver(&netwave_driver); } -static void __exit exit_netwave_cs(void) { - DEBUG(1, "netwave_cs: unloading\n"); - - unregister_pccard_driver(&dev_info); +static void __exit exit_netwave_cs(void) +{ + pcmcia_unregister_driver(&netwave_driver); - /* Do some cleanup of the device list */ - netwave_flush_stale_links(); - if(dev_list != NULL) /* Critical situation */ - printk("netwave_cs: devices remaining when removing module\n"); + /* Do some cleanup of the device list */ + netwave_flush_stale_links(); + if (dev_list != NULL) /* Critical situation */ + printk("netwave_cs: devices remaining when removing module\n"); } module_init(init_netwave_cs); diff --git a/drivers/net/wireless/orinoco_cs.c b/drivers/net/wireless/orinoco_cs.c index 6b21842a0dd2..095804aa28d6 100644 --- a/drivers/net/wireless/orinoco_cs.c +++ b/drivers/net/wireless/orinoco_cs.c @@ -687,29 +687,27 @@ orinoco_cs_event(event_t event, int priority, * become const */ static char version[] __initdata = "orinoco_cs.c 0.13a (David Gibson <hermes@gibson.dropbear.id.au> and others)"; +static struct pcmcia_driver orinoco_driver = { + .owner = THIS_MODULE, + .drv = { + .name = "orinoco_cs", + }, + .attach = orinoco_cs_attach, + .detach = orinoco_cs_detach, +}; + static int __init init_orinoco_cs(void) { - servinfo_t serv; - printk(KERN_DEBUG "%s\n", version); - CardServices(GetCardServicesInfo, &serv); - if (serv.Revision != CS_RELEASE_CODE) { - printk(KERN_NOTICE "orinoco_cs: Card Services release " - "does not match!\n"); - return -1; - } - - register_pccard_driver(&dev_info, &orinoco_cs_attach, &orinoco_cs_detach); - - return 0; + return pcmcia_register_driver(&orinoco_driver); } static void __exit exit_orinoco_cs(void) { - unregister_pccard_driver(&dev_info); + pcmcia_unregister_driver(&orinoco_driver); if (dev_list) DEBUG(0, "orinoco_cs: Removing leftover devices.\n"); diff --git a/drivers/net/wireless/wavelan_cs.c b/drivers/net/wireless/wavelan_cs.c index 8d7148b6f958..887adc03a646 100644 --- a/drivers/net/wireless/wavelan_cs.c +++ b/drivers/net/wireless/wavelan_cs.c @@ -5026,77 +5026,28 @@ wavelan_event(event_t event, /* The event received */ return 0; } -/****************************** MODULE ******************************/ -/* - * Module entry points : insertion & removal - */ +static struct pcmcia_driver wavelan_driver = { + .owner = THIS_MODULE, + .drv = { + .name = "wavelan_cs", + }, + .attach = wavelan_attach, + .detach = wavelan_detach, +}; -/*------------------------------------------------------------------*/ -/* - * Module insertion : initialisation of the module. - * Register the card with cardmgr... - */ static int __init init_wavelan_cs(void) { - servinfo_t serv; - -#ifdef DEBUG_MODULE_TRACE - printk(KERN_DEBUG "-> init_wavelan_cs()\n"); -#ifdef DEBUG_VERSION_SHOW - printk(KERN_DEBUG "%s", version); -#endif -#endif - - CardServices(GetCardServicesInfo, &serv); - if(serv.Revision != CS_RELEASE_CODE) - { -#ifdef DEBUG_CONFIG_ERRORS - printk(KERN_WARNING "init_wavelan_cs: Card Services release does not match!\n"); -#endif - return -1; - } - - register_pccard_driver(&dev_info, &wavelan_attach, &wavelan_detach); - -#ifdef DEBUG_MODULE_TRACE - printk(KERN_DEBUG "<- init_wavelan_cs()\n"); -#endif - return 0; + return pcmcia_register_driver(&wavelan_driver); } -/*------------------------------------------------------------------*/ -/* - * Module removal - */ static void __exit exit_wavelan_cs(void) { -#ifdef DEBUG_MODULE_TRACE - printk(KERN_DEBUG "-> cleanup_module()\n"); -#endif -#ifdef DEBUG_BASIC_SHOW - printk(KERN_NOTICE "wavelan_cs: unloading\n"); -#endif + /* Do some cleanup of the device list */ + wv_flush_stale_links(); - /* Do some cleanup of the device list */ - wv_flush_stale_links(); - - /* If there remain some devices... */ -#ifdef DEBUG_CONFIG_ERRORS - if(dev_list != NULL) - { - /* Honestly, if this happen we are in a deep s**t */ - printk(KERN_INFO "wavelan_cs: devices remaining when removing module\n"); - printk(KERN_INFO "Please flush your disks and reboot NOW !\n"); - } -#endif - - unregister_pccard_driver(&dev_info); - -#ifdef DEBUG_MODULE_TRACE - printk(KERN_DEBUG "<- cleanup_module()\n"); -#endif + pcmcia_unregister_driver(&wavelan_driver); } module_init(init_wavelan_cs); diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c index a566ba9f6e36..59d6fc778039 100644 --- a/drivers/oprofile/buffer_sync.c +++ b/drivers/oprofile/buffer_sync.c @@ -127,6 +127,14 @@ static struct notifier_block module_load_nb = { }; +static void end_sync_timer(void) +{ + del_timer_sync(&sync_timer); + /* timer might have queued work, make sure it's completed. */ + flush_scheduled_work(); +} + + int sync_start(void) { int err; @@ -158,7 +166,7 @@ out3: out2: profile_event_unregister(EXIT_TASK, &exit_task_nb); out1: - del_timer_sync(&sync_timer); + end_sync_timer(); goto out; } @@ -169,9 +177,7 @@ void sync_stop(void) profile_event_unregister(EXIT_TASK, &exit_task_nb); profile_event_unregister(EXIT_MMAP, &exit_mmap_nb); profile_event_unregister(EXEC_UNMAP, &exec_unmap_nb); - del_timer_sync(&sync_timer); - /* timer might have queued work, make sure it's completed. */ - flush_scheduled_work(); + end_sync_timer(); } @@ -366,12 +372,26 @@ static inline int is_ctx_switch(unsigned long val) } -/* compute number of filled slots in cpu_buffer queue */ -static unsigned long nr_filled_slots(struct oprofile_cpu_buffer * b) +/* "acquire" as many cpu buffer slots as we can */ +static unsigned long get_slots(struct oprofile_cpu_buffer * b) { unsigned long head = b->head_pos; unsigned long tail = b->tail_pos; + /* + * Subtle. This resets the persistent last_task + * and in_kernel values used for switching notes. + * BUT, there is a small window between reading + * head_pos, and this call, that means samples + * can appear at the new head position, but not + * be prefixed with the notes for switching + * kernel mode or a task switch. This small hole + * can lead to mis-attribution or samples where + * we don't know if it's in the kernel or not, + * at the start of an event buffer. + */ + cpu_buffer_reset(b); + if (head >= tail) return head - tail; @@ -408,9 +428,9 @@ static void sync_buffer(struct oprofile_cpu_buffer * cpu_buf) /* Remember, only we can modify tail_pos */ - unsigned long const available_elements = nr_filled_slots(cpu_buf); + unsigned long const available = get_slots(cpu_buf); - for (i=0; i < available_elements; ++i) { + for (i=0; i < available; ++i) { struct op_sample * s = &cpu_buf->buffer[cpu_buf->tail_pos]; if (is_ctx_switch(s->eip)) { @@ -435,8 +455,6 @@ static void sync_buffer(struct oprofile_cpu_buffer * cpu_buf) increment_tail(cpu_buf); } release_mm(mm); - - cpu_buffer_reset(cpu_buf); } diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c index 9065b9fd7219..1a57df385996 100644 --- a/drivers/oprofile/cpu_buffer.c +++ b/drivers/oprofile/cpu_buffer.c @@ -168,10 +168,8 @@ void oprofile_add_sample(unsigned long eip, unsigned int is_kernel, } -/* resets the cpu buffer to a sane state - should be called with - * cpu_buf->int_lock held - */ -void cpu_buffer_reset(struct oprofile_cpu_buffer *cpu_buf) +/* Resets the cpu buffer to a sane state. */ +void cpu_buffer_reset(struct oprofile_cpu_buffer * cpu_buf) { /* reset these to invalid values; the next sample * collected will populate the buffer with proper diff --git a/drivers/oprofile/oprof.c b/drivers/oprofile/oprof.c index adaf0c407f87..bcc7ae257544 100644 --- a/drivers/oprofile/oprof.c +++ b/drivers/oprofile/oprof.c @@ -11,6 +11,7 @@ #include <linux/module.h> #include <linux/init.h> #include <linux/oprofile.h> +#include <linux/moduleparam.h> #include <asm/semaphore.h> #include "oprof.h" @@ -24,6 +25,12 @@ unsigned long oprofile_started; static unsigned long is_setup; static DECLARE_MUTEX(start_sem); +/* timer + 0 - use performance monitoring hardware if available + 1 - use the timer int mechanism regardless + */ +static int timer = 0; + int oprofile_setup(void) { int err; @@ -124,13 +131,16 @@ extern void timer_init(struct oprofile_operations ** ops); static int __init oprofile_init(void) { - int err; + int err = -ENODEV; + + if (!timer) { + /* Architecture must fill in the interrupt ops and the + * logical CPU type, or we can fall back to the timer + * interrupt profiler. + */ + err = oprofile_arch_init(&oprofile_ops); + } - /* Architecture must fill in the interrupt ops and the - * logical CPU type, or we can fall back to the timer - * interrupt profiler. - */ - err = oprofile_arch_init(&oprofile_ops); if (err == -ENODEV) { timer_init(&oprofile_ops); err = 0; @@ -163,6 +173,9 @@ static void __exit oprofile_exit(void) module_init(oprofile_init); module_exit(oprofile_exit); + +module_param_named(timer, timer, int, 0644); +MODULE_PARM_DESC(timer, "force use of timer interrupt"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("John Levon <levon@movementarian.org>"); diff --git a/drivers/serial/mcfserial.c b/drivers/serial/mcfserial.c index 088f71298eae..491f720e09e1 100644 --- a/drivers/serial/mcfserial.c +++ b/drivers/serial/mcfserial.c @@ -93,7 +93,11 @@ static int mcfrs_serial_refcount; #define _INLINE_ inline +#ifdef CONFIG_M5282 +#define IRQBASE 77 +#else #define IRQBASE 73 +#endif /* * Configuration table, UARTs to look for at startup. @@ -412,13 +416,6 @@ void mcfrs_interrupt(int irq, void *dev_id, struct pt_regs *regs) receive_chars(info, regs, isr); if (isr & MCFUART_UIR_TXREADY) transmit_chars(info); -#if 0 - if (isr & MCFUART_UIR_DELTABREAK) { - printk("%s(%d): delta break!\n", __FILE__, __LINE__); - receive_chars(info, regs, isr); - } -#endif - return; } @@ -737,7 +734,7 @@ static int mcfrs_write(struct tty_struct * tty, int from_user, #if 0 printk("%s(%d): mcfrs_write(tty=%x,from_user=%d,buf=%x,count=%d)\n", - __FILE__, __LINE__, tty, from_user, buf, count); + __FILE__, __LINE__, (int)tty, from_user, (int)buf, count); #endif if (serial_paranoia_check(info, tty->name, "mcfrs_write")) @@ -791,7 +788,7 @@ static int mcfrs_write_room(struct tty_struct *tty) { struct mcf_serial *info = (struct mcf_serial *)tty->driver_data; int ret; - + if (serial_paranoia_check(info, tty->name, "mcfrs_write_room")) return 0; ret = SERIAL_XMIT_SIZE - info->xmit_cnt - 1; @@ -803,7 +800,7 @@ static int mcfrs_write_room(struct tty_struct *tty) static int mcfrs_chars_in_buffer(struct tty_struct *tty) { struct mcf_serial *info = (struct mcf_serial *)tty->driver_data; - + if (serial_paranoia_check(info, tty->name, "mcfrs_chars_in_buffer")) return 0; return info->xmit_cnt; @@ -813,7 +810,7 @@ static void mcfrs_flush_buffer(struct tty_struct *tty) { struct mcf_serial *info = (struct mcf_serial *)tty->driver_data; unsigned long flags; - + if (serial_paranoia_check(info, tty->name, "mcfrs_flush_buffer")) return; @@ -1467,7 +1464,7 @@ int mcfrs_open(struct tty_struct *tty, struct file * filp) */ static void mcfrs_irqinit(struct mcf_serial *info) { -#ifdef CONFIG_M5272 +#if defined(CONFIG_M5272) volatile unsigned long *icrp; volatile unsigned long *portp; volatile unsigned char *uartp; @@ -1493,6 +1490,19 @@ static void mcfrs_irqinit(struct mcf_serial *info) *portp = (*portp & ~0x000000ff) | 0x00000055; portp = (volatile unsigned long *) (MCF_MBAR + MCFSIM_PDCNT); *portp = (*portp & ~0x000003fc) | 0x000002a8; +#elif defined(CONFIG_M5282) + volatile unsigned char *icrp, *uartp; + volatile unsigned long *imrp; + + uartp = (volatile unsigned char *) info->addr; + + icrp = (volatile unsigned char *) (MCF_MBAR + MCFICM_INTC0 + + MCFINTC_ICR0 + MCFINT_UART0 + info->line); + *icrp = 0x33; /* UART0 with level 6, priority 3 */ + + imrp = (volatile unsigned long *) (MCF_MBAR + MCFICM_INTC0 + + MCFINTC_IMRL); + *imrp &= ~((1 << (info->irq - 64)) | 1); #else volatile unsigned char *icrp, *uartp; diff --git a/fs/dcookies.c b/fs/dcookies.c index 7365acccf391..277ef25dff54 100644 --- a/fs/dcookies.c +++ b/fs/dcookies.c @@ -175,6 +175,11 @@ asmlinkage long sys_lookup_dcookie(u64 cookie64, char * buf, size_t len) /* FIXME: (deleted) ? */ path = d_path(dcs->dentry, dcs->vfsmnt, kbuf, PAGE_SIZE); + if (IS_ERR(path)) { + err = PTR_ERR(path); + goto out_free; + } + err = -ERANGE; pathlen = kbuf + PAGE_SIZE - path; @@ -184,6 +189,7 @@ asmlinkage long sys_lookup_dcookie(u64 cookie64, char * buf, size_t len) err = -EFAULT; } +out_free: kfree(kbuf); out: up(&dcookie_sem); diff --git a/include/asm-m68knommu/m5282sim.h b/include/asm-m68knommu/m5282sim.h new file mode 100644 index 000000000000..977a24fc3f4f --- /dev/null +++ b/include/asm-m68knommu/m5282sim.h @@ -0,0 +1,35 @@ +/****************************************************************************/ + +/* + * m5282sim.h -- ColdFire 5282 System Integration Module support. + * + * (C) Copyright 2003, Greg Ungerer (gerg@snapgear.com) + */ + +/****************************************************************************/ +#ifndef m5282sim_h +#define m5282sim_h +/****************************************************************************/ + +#include <linux/config.h> + +/* + * Define the 5282 SIM register set addresses. + */ +#define MCFICM_INTC0 0x0c00 /* Base for Interrupt Ctrl 0 */ +#define MCFICM_INTC1 0x0d00 /* Base for Interrupt Ctrl 0 */ +#define MCFINTC_IPRH 0x00 /* Interrupt pending 32-63 */ +#define MCFINTC_IPRL 0x04 /* Interrupt pending 1-31 */ +#define MCFINTC_IMRH 0x08 /* Interrupt mask 32-63 */ +#define MCFINTC_IMRL 0x0c /* Interrupt mask 1-31 */ +#define MCFINTC_INTFRCH 0x10 /* Interrupt force 32-63 */ +#define MCFINTC_INTFRCL 0x14 /* Interrupt force 1-31 */ +#define MCFINTC_IRLR 0x18 /* */ +#define MCFINTC_IACKL 0x19 /* */ +#define MCFINTC_ICR0 0x40 /* Base ICR register */ + +#define MCFINT_UART0 13 /* Interrupt number for UART0 */ +#define MCFINT_PIT1 55 /* Interrupt number for PIT1 */ + +/****************************************************************************/ +#endif /* m5282sim_h */ diff --git a/include/asm-m68knommu/mcfdma.h b/include/asm-m68knommu/mcfdma.h index 8cb3727e1b51..98fadce0a6e9 100644 --- a/include/asm-m68knommu/mcfdma.h +++ b/include/asm-m68knommu/mcfdma.h @@ -21,6 +21,12 @@ #define MCFDMA_BASE1 0x240 /* Base address of DMA 1 */ #elif defined(CONFIG_M5272) #define MCFDMA_BASE0 0x0e0 /* Base address of DMA 0 */ +#elif defined(CONFIG_M5282) +/* These are relative to the IPSBAR, not MBAR */ +#define MCFDMA_BASE0 0x100 /* Base address of DMA 0 */ +#define MCFDMA_BASE1 0x140 /* Base address of DMA 1 */ +#define MCFDMA_BASE2 0x180 /* Base address of DMA 2 */ +#define MCFDMA_BASE3 0x1C0 /* Base address of DMA 3 */ #elif defined(CONFIG_M5249) || defined(CONFIG_M5307) || defined(CONFIG_M5407) #define MCFDMA_BASE0 0x300 /* Base address of DMA 0 */ #define MCFDMA_BASE1 0x340 /* Base address of DMA 1 */ diff --git a/include/asm-m68knommu/mcfpit.h b/include/asm-m68knommu/mcfpit.h new file mode 100644 index 000000000000..76ccfc2f0fa1 --- /dev/null +++ b/include/asm-m68knommu/mcfpit.h @@ -0,0 +1,63 @@ +/****************************************************************************/ + +/* + * mcfpit.h -- ColdFire internal PIT timer support defines. + * + * (C) Copyright 2003, Greg Ungerer (gerg@snapgear.com) + */ + +/****************************************************************************/ +#ifndef mcfpit_h +#define mcfpit_h +/****************************************************************************/ + +#include <linux/config.h> + +/* + * Get address specific defines for the 5282. + */ +#define MCFPIT_BASE1 0x00150000 /* Base address of TIMER1 */ +#define MCFPIT_BASE2 0x00160000 /* Base address of TIMER2 */ +#define MCFPIT_BASE3 0x00170000 /* Base address of TIMER3 */ +#define MCFPIT_BASE4 0x00180000 /* Base address of TIMER4 */ + + +/* + * Define the PIT timer register set addresses. + */ +struct mcfpit { + unsigned short pcsr; /* PIT control and status */ + unsigned short pmr; /* PIT modulus register */ + unsigned short pcntr; /* PIT count register */ +} __attribute__((packed)); + +/* + * Bit definitions for the PIT Control and Status register. + */ +#define MCFPIT_PCSR_CLK1 0x0000 /* System clock divisor */ +#define MCFPIT_PCSR_CLK2 0x0100 /* System clock divisor */ +#define MCFPIT_PCSR_CLK4 0x0200 /* System clock divisor */ +#define MCFPIT_PCSR_CLK8 0x0300 /* System clock divisor */ +#define MCFPIT_PCSR_CLK16 0x0400 /* System clock divisor */ +#define MCFPIT_PCSR_CLK32 0x0500 /* System clock divisor */ +#define MCFPIT_PCSR_CLK64 0x0600 /* System clock divisor */ +#define MCFPIT_PCSR_CLK128 0x0700 /* System clock divisor */ +#define MCFPIT_PCSR_CLK256 0x0800 /* System clock divisor */ +#define MCFPIT_PCSR_CLK512 0x0900 /* System clock divisor */ +#define MCFPIT_PCSR_CLK1024 0x0a00 /* System clock divisor */ +#define MCFPIT_PCSR_CLK2048 0x0b00 /* System clock divisor */ +#define MCFPIT_PCSR_CLK4096 0x0c00 /* System clock divisor */ +#define MCFPIT_PCSR_CLK8192 0x0d00 /* System clock divisor */ +#define MCFPIT_PCSR_CLK16384 0x0e00 /* System clock divisor */ +#define MCFPIT_PCSR_CLK32768 0x0f00 /* System clock divisor */ +#define MCFPIT_PCSR_DOZE 0x0040 /* Clock run in doze mode */ +#define MCFPIT_PCSR_HALTED 0x0020 /* Clock run in halt mode */ +#define MCFPIT_PCSR_OVW 0x0010 /* Overwrite PIT counter now */ +#define MCFPIT_PCSR_PIE 0x0008 /* Enable PIT interrupt */ +#define MCFPIT_PCSR_PIF 0x0004 /* PIT interrupt flag */ +#define MCFPIT_PCSR_RLD 0x0002 /* Reload counter */ +#define MCFPIT_PCSR_EN 0x0001 /* Enable PIT */ +#define MCFPIT_PCSR_DISABLE 0x0000 /* Disable PIT */ + +/****************************************************************************/ +#endif /* mcfpit_h */ diff --git a/include/asm-m68knommu/mcfsim.h b/include/asm-m68knommu/mcfsim.h index c408a513e8a4..27d6cb696815 100644 --- a/include/asm-m68knommu/mcfsim.h +++ b/include/asm-m68knommu/mcfsim.h @@ -3,7 +3,7 @@ /* * mcfsim.h -- ColdFire System Integration Module support. * - * (C) Copyright 1999-2002, Greg Ungerer (gerg@snapgear.com) + * (C) Copyright 1999-2003, Greg Ungerer (gerg@snapgear.com) * (C) Copyright 2000, Lineo Inc. (www.lineo.com) */ @@ -15,7 +15,7 @@ #include <linux/config.h> /* - * Include 5204, 5206, 5249, 5272, 5307 or 5407 specific addresses. + * Include 5204, 5206, 5249, 5272, 5282, 5307 or 5407 specific addresses. */ #if defined(CONFIG_M5204) #include <asm/m5204sim.h> @@ -25,6 +25,8 @@ #include <asm/m5249sim.h> #elif defined(CONFIG_M5272) #include <asm/m5272sim.h> +#elif defined(CONFIG_M5282) +#include <asm/m5282sim.h> #elif defined(CONFIG_M5307) #include <asm/m5307sim.h> #elif defined(CONFIG_M5407) diff --git a/include/asm-m68knommu/mcfuart.h b/include/asm-m68knommu/mcfuart.h index 18201f8a230f..07a7eca83542 100644 --- a/include/asm-m68knommu/mcfuart.h +++ b/include/asm-m68knommu/mcfuart.h @@ -3,7 +3,7 @@ /* * mcfuart.h -- ColdFire internal UART support defines. * - * (C) Copyright 1999-2002, Greg Ungerer (gerg@snapgear.com) + * (C) Copyright 1999-2003, Greg Ungerer (gerg@snapgear.com) * (C) Copyright 2000, Lineo Inc. (www.lineo.com) */ @@ -29,13 +29,17 @@ #define MCFUART_BASE1 0x140 /* Base address of UART1 */ #define MCFUART_BASE2 0x180 /* Base address of UART2 */ #endif +#elif defined(CONFIG_M5282) +#define MCFUART_BASE1 0x200 /* Base address of UART1 */ +#define MCFUART_BASE2 0x240 /* Base address of UART2 */ +#define MCFUART_BASE3 0x280 /* Base address of UART3 */ #elif defined(CONFIG_M5249) || defined(CONFIG_M5307) || defined(CONFIG_M5407) #if defined(CONFIG_NETtel) || defined(CONFIG_DISKtel) || defined(CONFIG_SECUREEDGEMP3) #define MCFUART_BASE1 0x200 /* Base address of UART1 */ -#define MCFUART_BASE2 0x1c0 /* Base address of UART2 */ +#define MCFUART_BASE2 0x1c0 /* Base address of UART2 */ #else #define MCFUART_BASE1 0x1c0 /* Base address of UART1 */ -#define MCFUART_BASE2 0x200 /* Base address of UART2 */ +#define MCFUART_BASE2 0x200 /* Base address of UART2 */ #endif #endif @@ -55,7 +59,13 @@ #define MCFUART_UIMR 0x14 /* Interrupt Mask (w) */ #define MCFUART_UBG1 0x18 /* Baud Rate MSB (r/w) */ #define MCFUART_UBG2 0x1c /* Baud Rate LSB (r/w) */ +#ifdef CONFIG_M5272 +#define MCFUART_UTF 0x28 /* Transmitter FIFO (r/w) */ +#define MCFUART_URF 0x2c /* Receiver FIFO (r/w) */ +#define MCFUART_UFPD 0x30 /* Frac Prec. Divider (r/w) */ +#else #define MCFUART_UIVR 0x30 /* Interrupt Vector (r/w) */ +#endif #define MCFUART_UIPR 0x34 /* Input Port (r) */ #define MCFUART_UOP1 0x38 /* Output Port Bit Set (w) */ #define MCFUART_UOP0 0x3c /* Output Port Bit Reset (w) */ @@ -170,5 +180,21 @@ #define MCFUART_UIR_RXREADY 0x02 /* Receiver ready */ #define MCFUART_UIR_TXREADY 0x01 /* Transmitter ready */ +#ifdef CONFIG_M5272 +/* + * Define bit flags in the Transmitter FIFO Register (UTF). + */ +#define MCFUART_UTF_TXB 0x1f /* Transmitter data level */ +#define MCFUART_UTF_FULL 0x20 /* Transmitter fifo full */ +#define MCFUART_UTF_TXS 0xc0 /* Transmitter status */ + +/* + * Define bit flags in the Receiver FIFO Register (URF). + */ +#define MCFUART_URF_RXB 0x1f /* Receiver data level */ +#define MCFUART_URF_FULL 0x20 /* Receiver fifo full */ +#define MCFUART_URF_RXS 0xc0 /* Receiver status */ +#endif + /****************************************************************************/ #endif /* mcfuart_h */ diff --git a/include/linux/if_bonding.h b/include/linux/if_bonding.h index 1733b0e28cbc..4419670188d2 100644 --- a/include/linux/if_bonding.h +++ b/include/linux/if_bonding.h @@ -11,18 +11,38 @@ * This software may be used and distributed according to the terms * of the GNU Public License, incorporated herein by reference. * + * 2003/03/18 - Amir Noam <amir.noam at intel dot com> + * - Added support for getting slave's speed and duplex via ethtool. + * Needed for 802.3ad and other future modes. + * + * 2003/03/18 - Tsippy Mendelson <tsippy.mendelson at intel dot com> and + * Shmulik Hen <shmulik.hen at intel dot com> + * - Enable support of modes that need to use the unique mac address of + * each slave. + * + * 2003/03/18 - Tsippy Mendelson <tsippy.mendelson at intel dot com> and + * Amir Noam <amir.noam at intel dot com> + * - Moved driver's private data types to bonding.h + * + * 2003/03/18 - Amir Noam <amir.noam at intel dot com>, + * Tsippy Mendelson <tsippy.mendelson at intel dot com> and + * Shmulik Hen <shmulik.hen at intel dot com> + * - Added support for IEEE 802.3ad Dynamic link aggregation mode. + * + * 2003/05/01 - Amir Noam <amir.noam at intel dot com> + * - Added ABI version control to restore compatibility between + * new/old ifenslave and new/old bonding. */ #ifndef _LINUX_IF_BONDING_H #define _LINUX_IF_BONDING_H -#ifdef __KERNEL__ -#include <linux/timer.h> #include <linux/if.h> -#include <linux/proc_fs.h> -#endif /* __KERNEL__ */ - #include <linux/types.h> +#include <linux/if_ether.h> + +/* userland - kernel ABI version (2003/05/08) */ +#define BOND_ABI_VERSION 1 /* * We can remove these ioctl definitions in 2.5. People should use the @@ -41,6 +61,9 @@ #define BOND_MODE_ACTIVEBACKUP 1 #define BOND_MODE_XOR 2 #define BOND_MODE_BROADCAST 3 +#define BOND_MODE_8023AD 4 +#define BOND_MODE_TLB 5 +#define BOND_MODE_ALB 6 /* TLB + RLB (receive load balancing) */ /* each slave's link has 4 states */ #define BOND_LINK_UP 0 /* link is up and running */ @@ -58,11 +81,6 @@ #define BOND_MULTICAST_ACTIVE 1 #define BOND_MULTICAST_ALL 2 -struct bond_parm_tbl { - char *modename; - int mode; -}; - typedef struct ifbond { __s32 bond_mode; __s32 num_slaves; @@ -78,52 +96,15 @@ typedef struct ifslave __u32 link_failure_count; } ifslave; -#ifdef __KERNEL__ -typedef struct slave { - struct slave *next; - struct slave *prev; - struct net_device *dev; - short delay; - unsigned long jiffies; - char link; /* one of BOND_LINK_XXXX */ - char state; /* one of BOND_STATE_XXXX */ - unsigned short original_flags; - u32 link_failure_count; -} slave_t; - -/* - * Here are the locking policies for the two bonding locks: - * - * 1) Get bond->lock when reading/writing slave list. - * 2) Get bond->ptrlock when reading/writing bond->current_slave. - * (It is unnecessary when the write-lock is put with bond->lock.) - * 3) When we lock with bond->ptrlock, we must lock with bond->lock - * beforehand. - */ -typedef struct bonding { - slave_t *next; - slave_t *prev; - slave_t *current_slave; - slave_t *primary_slave; - slave_t *current_arp_slave; - __s32 slave_cnt; - rwlock_t lock; - rwlock_t ptrlock; - struct timer_list mii_timer; - struct timer_list arp_timer; - struct net_device_stats *stats; -#ifdef CONFIG_PROC_FS - struct proc_dir_entry *bond_proc_dir; - struct proc_dir_entry *bond_proc_info_file; -#endif /* CONFIG_PROC_FS */ - struct bonding *next_bond; - struct net_device *device; - struct dev_mc_list *mc_list; - unsigned short flags; -} bonding_t; -#endif /* __KERNEL__ */ +struct ad_info { + __u16 aggregator_id; + __u16 ports; + __u16 actor_key; + __u16 partner_key; + __u8 partner_system[ETH_ALEN]; +}; -#endif /* _LINUX_BOND_H */ +#endif /* _LINUX_IF_BONDING_H */ /* * Local variables: diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 026cf2d5b20b..6cb10ed07188 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -148,6 +148,7 @@ static inline int __vlan_hwaccel_rx(struct sk_buff *skb, { struct net_device_stats *stats; + skb->real_dev = skb->dev; skb->dev = grp->vlan_devices[vlan_tag & VLAN_VID_MASK]; if (skb->dev == NULL) { kfree_skb(skb); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 6b8ab6887236..4367ef3643e5 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -190,6 +190,7 @@ struct sk_buff { struct sock *sk; struct timeval stamp; struct net_device *dev; + struct net_device *real_dev; union { struct tcphdr *th; diff --git a/net/core/dev.c b/net/core/dev.c index 90db1c394ede..33bbf5ad3203 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1385,8 +1385,10 @@ static __inline__ void skb_bond(struct sk_buff *skb) { struct net_device *dev = skb->dev; - if (dev->master) + if (dev->master) { + skb->real_dev = skb->dev; skb->dev = dev->master; + } } static void net_tx_action(struct softirq_action *h) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index cccc6d75bdc9..efdc436b2f00 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -271,6 +271,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask) n->sk = NULL; C(stamp); C(dev); + C(real_dev); C(h); C(nh); C(mac); @@ -334,6 +335,7 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->list = NULL; new->sk = NULL; new->dev = old->dev; + new->real_dev = old->real_dev; new->priority = old->priority; new->protocol = old->protocol; new->dst = dst_clone(old->dst); |
