From 8bf62ecee58360749c5f0e68bc97d5e02a6816b1 Mon Sep 17 00:00:00 2001 From: Albert Lee Date: Thu, 12 May 2005 15:29:42 -0400 Subject: [libata] C/H/S support, for older devices --- include/linux/ata.h | 14 ++++++++++++++ include/linux/libata.h | 6 ++++++ 2 files changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ata.h b/include/linux/ata.h index f178894edd04..d8981402cd5b 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -125,6 +125,7 @@ enum { ATA_CMD_PACKET = 0xA0, ATA_CMD_VERIFY = 0x40, ATA_CMD_VERIFY_EXT = 0x42, + ATA_CMD_INIT_DEV_PARAMS = 0x91, /* SETFEATURES stuff */ SETFEATURES_XFER = 0x03, @@ -174,6 +175,7 @@ enum { ATA_TFLAG_ISADDR = (1 << 1), /* enable r/w to nsect/lba regs */ ATA_TFLAG_DEVICE = (1 << 2), /* enable r/w to device reg */ ATA_TFLAG_WRITE = (1 << 3), /* data dir: host->dev==1 (write) */ + ATA_TFLAG_LBA = (1 << 4), /* enable LBA */ }; enum ata_tf_protocols { @@ -242,6 +244,18 @@ struct ata_taskfile { ((u64) (id)[(n) + 1] << 16) | \ ((u64) (id)[(n) + 0]) ) +static inline int ata_id_current_chs_valid(u16 *id) +{ + /* For ATA-1 devices, if the INITIALIZE DEVICE PARAMETERS command + has not been issued to the device then the values of + id[54] to id[56] are vendor specific. */ + return (id[53] & 0x01) && /* Current translation valid */ + id[54] && /* cylinders in current translation */ + id[55] && /* heads in current translation */ + id[55] <= 16 && + id[56]; /* sectors in current translation */ +} + static inline int atapi_cdb_len(u16 *dev_id) { u16 tmp = dev_id[0] & 0x3; diff --git a/include/linux/libata.h b/include/linux/libata.h index 505160ab472b..bd0df84cfd87 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -95,6 +95,7 @@ enum { ATA_DFLAG_LBA48 = (1 << 0), /* device supports LBA48 */ ATA_DFLAG_PIO = (1 << 1), /* device currently in PIO mode */ ATA_DFLAG_LOCK_SECTORS = (1 << 2), /* don't adjust max_sectors */ + ATA_DFLAG_LBA = (1 << 3), /* device supports LBA */ ATA_DEV_UNKNOWN = 0, /* unknown device */ ATA_DEV_ATA = 1, /* ATA device */ @@ -278,6 +279,11 @@ struct ata_device { u8 xfer_protocol; /* taskfile xfer protocol */ u8 read_cmd; /* opcode to use on read */ u8 write_cmd; /* opcode to use on write */ + + /* for CHS addressing */ + u16 cylinders; /* Number of cylinders */ + u16 heads; /* Number of heads */ + u16 sectors; /* Number of sectors per track */ }; struct ata_port { -- cgit v1.2.3 From 3173c8907ffb2c64456142da3df2bd0500bd59e0 Mon Sep 17 00:00:00 2001 From: Nishanth Aravamudan Date: Sun, 11 Sep 2005 02:09:55 -0700 Subject: [PATCH] drivers/net: fix-up schedule_timeout() usage Use schedule_timeout_interruptible() instead of set_current_state()/schedule_timeout() to reduce kernel size. Signed-off-by: Nishanth Aravamudan Cc: Jeff Garzik Cc: "David S. Miller" Signed-off-by: Andrew Morton Signed-off-by: Jeff Garzik --- drivers/net/8139cp.c | 3 +- drivers/net/hp100.c | 48 +++++++++++-------------------- drivers/net/irda/stir4200.c | 7 ++--- drivers/net/ixgb/ixgb_ethtool.c | 7 ++--- drivers/net/ns83820.c | 3 +- drivers/net/tokenring/ibmtr.c | 9 +++--- drivers/net/tokenring/olympic.c | 2 +- drivers/net/tokenring/tms380tr.c | 3 +- drivers/net/typhoon.c | 7 ++--- drivers/net/wan/cosa.c | 6 ++-- drivers/net/wan/dscc4.c | 9 ++---- drivers/net/wan/farsync.c | 3 +- drivers/net/wireless/ipw2100.c | 17 ++++------- drivers/net/wireless/prism54/islpci_dev.c | 6 ++-- drivers/net/wireless/prism54/islpci_mgt.c | 5 ++-- include/linux/ibmtr.h | 4 +-- include/linux/netdevice.h | 6 ++-- 17 files changed, 53 insertions(+), 92 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/8139cp.c b/drivers/net/8139cp.c index ebc20d9e7d7b..bd99c268e2da 100644 --- a/drivers/net/8139cp.c +++ b/drivers/net/8139cp.c @@ -1029,8 +1029,7 @@ static void cp_reset_hw (struct cp_private *cp) if (!(cpr8(Cmd) & CmdReset)) return; - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(10); + schedule_timeout_uninterruptible(10); } printk(KERN_ERR "%s: hardware reset timeout\n", cp->dev->name); diff --git a/drivers/net/hp100.c b/drivers/net/hp100.c index cf0ac6fda1a1..b71fab6e34f4 100644 --- a/drivers/net/hp100.c +++ b/drivers/net/hp100.c @@ -2517,10 +2517,8 @@ static int hp100_down_vg_link(struct net_device *dev) do { if (hp100_inb(VG_LAN_CFG_1) & HP100_LINK_CABLE_ST) break; - if (!in_interrupt()) { - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(1); - } + if (!in_interrupt()) + schedule_timeout_interruptible(1); } while (time_after(time, jiffies)); if (time_after_eq(jiffies, time)) /* no signal->no logout */ @@ -2536,10 +2534,8 @@ static int hp100_down_vg_link(struct net_device *dev) do { if (!(hp100_inb(VG_LAN_CFG_1) & HP100_LINK_UP_ST)) break; - if (!in_interrupt()) { - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(1); - } + if (!in_interrupt()) + schedule_timeout_interruptible(1); } while (time_after(time, jiffies)); #ifdef HP100_DEBUG @@ -2577,10 +2573,8 @@ static int hp100_down_vg_link(struct net_device *dev) do { if (!(hp100_inb(MAC_CFG_4) & HP100_MAC_SEL_ST)) break; - if (!in_interrupt()) { - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(1); - } + if (!in_interrupt()) + schedule_timeout_interruptible(1); } while (time_after(time, jiffies)); hp100_orb(HP100_AUTO_MODE, MAC_CFG_3); /* Autosel back on */ @@ -2591,10 +2585,8 @@ static int hp100_down_vg_link(struct net_device *dev) do { if ((hp100_inb(VG_LAN_CFG_1) & HP100_LINK_CABLE_ST) == 0) break; - if (!in_interrupt()) { - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(1); - } + if (!in_interrupt()) + schedule_timeout_interruptible(1); } while (time_after(time, jiffies)); if (time_before_eq(time, jiffies)) { @@ -2606,10 +2598,8 @@ static int hp100_down_vg_link(struct net_device *dev) time = jiffies + (2 * HZ); /* This seems to take a while.... */ do { - if (!in_interrupt()) { - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(1); - } + if (!in_interrupt()) + schedule_timeout_interruptible(1); } while (time_after(time, jiffies)); return 0; @@ -2659,10 +2649,8 @@ static int hp100_login_to_vg_hub(struct net_device *dev, u_short force_relogin) do { if (~(hp100_inb(VG_LAN_CFG_1) & HP100_LINK_UP_ST)) break; - if (!in_interrupt()) { - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(1); - } + if (!in_interrupt()) + schedule_timeout_interruptible(1); } while (time_after(time, jiffies)); /* Start an addressed training and optionally request promiscuous port */ @@ -2697,10 +2685,8 @@ static int hp100_login_to_vg_hub(struct net_device *dev, u_short force_relogin) do { if (hp100_inb(VG_LAN_CFG_1) & HP100_LINK_CABLE_ST) break; - if (!in_interrupt()) { - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(1); - } + if (!in_interrupt()) + schedule_timeout_interruptible(1); } while (time_before(jiffies, time)); if (time_after_eq(jiffies, time)) { @@ -2723,10 +2709,8 @@ static int hp100_login_to_vg_hub(struct net_device *dev, u_short force_relogin) #endif break; } - if (!in_interrupt()) { - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(1); - } + if (!in_interrupt()) + schedule_timeout_interruptible(1); } while (time_after(time, jiffies)); } diff --git a/drivers/net/irda/stir4200.c b/drivers/net/irda/stir4200.c index 15f207323d97..3961a754e920 100644 --- a/drivers/net/irda/stir4200.c +++ b/drivers/net/irda/stir4200.c @@ -678,10 +678,9 @@ static void turnaround_delay(const struct stir_cb *stir, long us) return; ticks = us / (1000000 / HZ); - if (ticks > 0) { - current->state = TASK_INTERRUPTIBLE; - schedule_timeout(1 + ticks); - } else + if (ticks > 0) + schedule_timeout_interruptible(1 + ticks); + else udelay(us); } diff --git a/drivers/net/ixgb/ixgb_ethtool.c b/drivers/net/ixgb/ixgb_ethtool.c index 319ee4cd70d8..04e47189d830 100644 --- a/drivers/net/ixgb/ixgb_ethtool.c +++ b/drivers/net/ixgb/ixgb_ethtool.c @@ -645,11 +645,10 @@ ixgb_phys_id(struct net_device *netdev, uint32_t data) mod_timer(&adapter->blink_timer, jiffies); - set_current_state(TASK_INTERRUPTIBLE); - if(data) - schedule_timeout(data * HZ); + if (data) + schedule_timeout_interruptible(data * HZ); else - schedule_timeout(MAX_SCHEDULE_TIMEOUT); + schedule_timeout_interruptible(MAX_SCHEDULE_TIMEOUT); del_timer_sync(&adapter->blink_timer); ixgb_led_off(&adapter->hw); diff --git a/drivers/net/ns83820.c b/drivers/net/ns83820.c index e64df4d0800b..ed72a23c85dd 100644 --- a/drivers/net/ns83820.c +++ b/drivers/net/ns83820.c @@ -1632,8 +1632,7 @@ static void ns83820_run_bist(struct net_device *ndev, const char *name, u32 enab timed_out = 1; break; } - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(1); + schedule_timeout_uninterruptible(1); } if (status & fail) diff --git a/drivers/net/tokenring/ibmtr.c b/drivers/net/tokenring/ibmtr.c index e7b001017b9a..8154bbbb7792 100644 --- a/drivers/net/tokenring/ibmtr.c +++ b/drivers/net/tokenring/ibmtr.c @@ -318,7 +318,7 @@ static void ibmtr_cleanup_card(struct net_device *dev) if (dev->base_addr) { outb(0,dev->base_addr+ADAPTRESET); - schedule_timeout(TR_RST_TIME); /* wait 50ms */ + schedule_timeout_uninterruptible(TR_RST_TIME); /* wait 50ms */ outb(0,dev->base_addr+ADAPTRESETREL); } @@ -859,8 +859,7 @@ static int tok_init_card(struct net_device *dev) writeb(~INT_ENABLE, ti->mmio + ACA_OFFSET + ACA_RESET + ISRP_EVEN); outb(0, PIOaddr + ADAPTRESET); - current->state=TASK_UNINTERRUPTIBLE; - schedule_timeout(TR_RST_TIME); /* wait 50ms */ + schedule_timeout_uninterruptible(TR_RST_TIME); /* wait 50ms */ outb(0, PIOaddr + ADAPTRESETREL); #ifdef ENABLE_PAGING @@ -908,8 +907,8 @@ static int tok_open(struct net_device *dev) DPRINTK("Adapter is up and running\n"); return 0; } - current->state=TASK_INTERRUPTIBLE; - i=schedule_timeout(TR_RETRY_INTERVAL); /* wait 30 seconds */ + i=schedule_timeout_interruptible(TR_RETRY_INTERVAL); + /* wait 30 seconds */ if(i!=0) break; /*prob. a signal, like the i>24*HZ case above */ } outb(0, dev->base_addr + ADAPTRESET);/* kill pending interrupts*/ diff --git a/drivers/net/tokenring/olympic.c b/drivers/net/tokenring/olympic.c index 9e7923192a49..05477d24fd49 100644 --- a/drivers/net/tokenring/olympic.c +++ b/drivers/net/tokenring/olympic.c @@ -1101,7 +1101,7 @@ static int olympic_close(struct net_device *dev) while(olympic_priv->srb_queued) { - t = schedule_timeout(60*HZ); + t = schedule_timeout_interruptible(60*HZ); if(signal_pending(current)) { printk(KERN_WARNING "%s: SRB timed out.\n",dev->name); diff --git a/drivers/net/tokenring/tms380tr.c b/drivers/net/tokenring/tms380tr.c index 2e39bf1f7462..c1925590a0e1 100644 --- a/drivers/net/tokenring/tms380tr.c +++ b/drivers/net/tokenring/tms380tr.c @@ -1243,8 +1243,7 @@ void tms380tr_wait(unsigned long time) tmp = jiffies + time/(1000000/HZ); do { - current->state = TASK_INTERRUPTIBLE; - tmp = schedule_timeout(tmp); + tmp = schedule_timeout_interruptible(tmp); } while(time_after(tmp, jiffies)); #else udelay(time); diff --git a/drivers/net/typhoon.c b/drivers/net/typhoon.c index ecfa6f8805ce..4c76cb794bfb 100644 --- a/drivers/net/typhoon.c +++ b/drivers/net/typhoon.c @@ -419,10 +419,9 @@ typhoon_reset(void __iomem *ioaddr, int wait_type) TYPHOON_STATUS_WAITING_FOR_HOST) goto out; - if(wait_type == WaitSleep) { - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(1); - } else + if(wait_type == WaitSleep) + schedule_timeout_uninterruptible(1); + else udelay(TYPHOON_UDELAY); } diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c index 7ff814fd65d0..ae9e897c255e 100644 --- a/drivers/net/wan/cosa.c +++ b/drivers/net/wan/cosa.c @@ -1617,8 +1617,7 @@ static int get_wait_data(struct cosa_data *cosa) return r; } /* sleep if not ready to read */ - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(1); + schedule_timeout_interruptible(1); } printk(KERN_INFO "cosa: timeout in get_wait_data (status 0x%x)\n", cosa_getstatus(cosa)); @@ -1644,8 +1643,7 @@ static int put_wait_data(struct cosa_data *cosa, int data) } #if 0 /* sleep if not ready to read */ - current->state = TASK_INTERRUPTIBLE; - schedule_timeout(1); + schedule_timeout_interruptible(1); #endif } printk(KERN_INFO "cosa%d: timeout in put_wait_data (status 0x%x)\n", diff --git a/drivers/net/wan/dscc4.c b/drivers/net/wan/dscc4.c index 520a77a798e2..0c1ab4ac8bdb 100644 --- a/drivers/net/wan/dscc4.c +++ b/drivers/net/wan/dscc4.c @@ -542,8 +542,7 @@ static int dscc4_wait_ack_cec(struct dscc4_dev_priv *dpriv, msg, i); goto done; } - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(10); + schedule_timeout_uninterruptible(10); rmb(); } while (++i > 0); printk(KERN_ERR "%s: %s timeout\n", dev->name, msg); @@ -588,8 +587,7 @@ static inline int dscc4_xpr_ack(struct dscc4_dev_priv *dpriv) (dpriv->iqtx[cur] & Xpr)) break; smp_rmb(); - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(10); + schedule_timeout_uninterruptible(10); } while (++i > 0); return (i >= 0 ) ? i : -EAGAIN; @@ -1035,8 +1033,7 @@ static void dscc4_pci_reset(struct pci_dev *pdev, void __iomem *ioaddr) /* Flush posted writes */ readl(ioaddr + GSTAR); - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(10); + schedule_timeout_uninterruptible(10); for (i = 0; i < 16; i++) pci_write_config_dword(pdev, i << 2, dscc4_pci_config_store[i]); diff --git a/drivers/net/wan/farsync.c b/drivers/net/wan/farsync.c index 2c83cca34b86..10befb02d768 100644 --- a/drivers/net/wan/farsync.c +++ b/drivers/net/wan/farsync.c @@ -980,8 +980,7 @@ fst_issue_cmd(struct fst_port_info *port, unsigned short cmd) /* Wait for any previous command to complete */ while (mbval > NAK) { spin_unlock_irqrestore(&card->card_lock, flags); - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(1); + schedule_timeout_uninterruptible(1); spin_lock_irqsave(&card->card_lock, flags); if (++safety > 2000) { diff --git a/drivers/net/wireless/ipw2100.c b/drivers/net/wireless/ipw2100.c index 2414e6493aa5..e5cdb5bfabc8 100644 --- a/drivers/net/wireless/ipw2100.c +++ b/drivers/net/wireless/ipw2100.c @@ -800,8 +800,7 @@ static int ipw2100_hw_send_command(struct ipw2100_priv *priv, * doesn't seem to have as many firmware restart cycles... * * As a test, we're sticking in a 1/100s delay here */ - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(HZ / 100); + schedule_timeout_uninterruptible(msecs_to_jiffies(10)); return 0; @@ -1256,8 +1255,7 @@ static int ipw2100_start_adapter(struct ipw2100_priv *priv) IPW_DEBUG_FW("Waiting for f/w initialization to complete...\n"); i = 5000; do { - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(40 * HZ / 1000); + schedule_timeout_uninterruptible(msecs_to_jiffies(40)); /* Todo... wait for sync command ... */ read_register(priv->net_dev, IPW_REG_INTA, &inta); @@ -1411,8 +1409,7 @@ static int ipw2100_hw_phy_off(struct ipw2100_priv *priv) (val2 & IPW2100_COMMAND_PHY_OFF)) return 0; - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(HW_PHY_OFF_LOOP_DELAY); + schedule_timeout_uninterruptible(HW_PHY_OFF_LOOP_DELAY); } return -EIO; @@ -1466,7 +1463,7 @@ fail_up: static int ipw2100_hw_stop_adapter(struct ipw2100_priv *priv) { -#define HW_POWER_DOWN_DELAY (HZ / 10) +#define HW_POWER_DOWN_DELAY (msecs_to_jiffies(100)) struct host_command cmd = { .host_command = HOST_PRE_POWER_DOWN, @@ -1520,10 +1517,8 @@ static int ipw2100_hw_stop_adapter(struct ipw2100_priv *priv) printk(KERN_WARNING DRV_NAME ": " "%s: Power down command failed: Error %d\n", priv->net_dev->name, err); - else { - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(HW_POWER_DOWN_DELAY); - } + else + schedule_timeout_uninterruptible(HW_POWER_DOWN_DELAY); } priv->status &= ~STATUS_ENABLED; diff --git a/drivers/net/wireless/prism54/islpci_dev.c b/drivers/net/wireless/prism54/islpci_dev.c index 6f13d4a8e2d3..10cce514c15d 100644 --- a/drivers/net/wireless/prism54/islpci_dev.c +++ b/drivers/net/wireless/prism54/islpci_dev.c @@ -439,8 +439,7 @@ prism54_bring_down(islpci_private *priv) wmb(); /* wait a while for the device to reset */ - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(50*HZ/1000); + schedule_timeout_uninterruptible(msecs_to_jiffies(50)); return 0; } @@ -491,8 +490,7 @@ islpci_reset_if(islpci_private *priv) /* The software reset acknowledge needs about 220 msec here. * Be conservative and wait for up to one second. */ - set_current_state(TASK_UNINTERRUPTIBLE); - remaining = schedule_timeout(HZ); + remaining = schedule_timeout_uninterruptible(HZ); if(remaining > 0) { result = 0; diff --git a/drivers/net/wireless/prism54/islpci_mgt.c b/drivers/net/wireless/prism54/islpci_mgt.c index b6f2e5a223be..4937a5ad4b2c 100644 --- a/drivers/net/wireless/prism54/islpci_mgt.c +++ b/drivers/net/wireless/prism54/islpci_mgt.c @@ -455,7 +455,7 @@ islpci_mgt_transaction(struct net_device *ndev, struct islpci_mgmtframe **recvframe) { islpci_private *priv = netdev_priv(ndev); - const long wait_cycle_jiffies = (ISL38XX_WAIT_CYCLE * 10 * HZ) / 1000; + const long wait_cycle_jiffies = msecs_to_jiffies(ISL38XX_WAIT_CYCLE * 10); long timeout_left = ISL38XX_MAX_WAIT_CYCLES * wait_cycle_jiffies; int err; DEFINE_WAIT(wait); @@ -475,8 +475,7 @@ islpci_mgt_transaction(struct net_device *ndev, int timeleft; struct islpci_mgmtframe *frame; - set_current_state(TASK_UNINTERRUPTIBLE); - timeleft = schedule_timeout(wait_cycle_jiffies); + timeleft = schedule_timeout_uninterruptible(wait_cycle_jiffies); frame = xchg(&priv->mgmt_received, NULL); if (frame) { if (frame->header->oid == oid) { diff --git a/include/linux/ibmtr.h b/include/linux/ibmtr.h index 2ef0b21517fb..1c7a0dd5536a 100644 --- a/include/linux/ibmtr.h +++ b/include/linux/ibmtr.h @@ -7,8 +7,8 @@ /* ported to the Alpha architecture 02/20/96 (just used the HZ macro) */ #define TR_RETRY_INTERVAL (30*HZ) /* 500 on PC = 5 s */ -#define TR_RST_TIME (HZ/20) /* 5 on PC = 50 ms */ -#define TR_BUSY_INTERVAL (HZ/5) /* 5 on PC = 200 ms */ +#define TR_RST_TIME (msecs_to_jiffies(50)) /* 5 on PC = 50 ms */ +#define TR_BUSY_INTERVAL (msecs_to_jiffies(200)) /* 5 on PC = 200 ms */ #define TR_SPIN_INTERVAL (3*HZ) /* 3 seconds before init timeout */ #define TR_ISA 1 diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7c717907896d..98c98e6cd4f3 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -852,11 +852,9 @@ static inline void netif_rx_complete(struct net_device *dev) static inline void netif_poll_disable(struct net_device *dev) { - while (test_and_set_bit(__LINK_STATE_RX_SCHED, &dev->state)) { + while (test_and_set_bit(__LINK_STATE_RX_SCHED, &dev->state)) /* No hurry. */ - current->state = TASK_INTERRUPTIBLE; - schedule_timeout(1); - } + schedule_timeout_interruptible(1); } static inline void netif_poll_enable(struct net_device *dev) -- cgit v1.2.3 From 8e18d1f9c9dcbf2de5b79cad771ed639983ab6cd Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Sat, 10 Sep 2005 14:45:00 -0700 Subject: [PATCH] Replace drivers/net/wan custom ctype macros with standard ones Replace the custom is_digit()/is_hex_digit() macros with isdigit()/isxdigit() from Additionaly remove unused macro is_alpha() from Signed-off-by: Tobias Klauser Cc: Jeff Garzik Signed-off-by: Andrew Morton Signed-off-by: Jeff Garzik --- drivers/net/wan/cycx_x25.c | 5 +++-- drivers/net/wan/sdla_fr.c | 4 ++-- drivers/net/wan/sdla_x25.c | 8 ++++---- include/linux/cyclomx.h | 2 -- include/linux/wanpipe.h | 9 --------- 5 files changed, 9 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/wan/cycx_x25.c b/drivers/net/wan/cycx_x25.c index 02d57c0b4243..a631d1c2fa14 100644 --- a/drivers/net/wan/cycx_x25.c +++ b/drivers/net/wan/cycx_x25.c @@ -78,6 +78,7 @@ #define CYCLOMX_X25_DEBUG 1 +#include /* isdigit() */ #include /* return codes */ #include /* ARPHRD_HWX25 */ #include /* printk(), and other useful stuff */ @@ -418,7 +419,7 @@ static int cycx_wan_new_if(struct wan_device *wandev, struct net_device *dev, /* Set channel timeouts (default if not specified) */ chan->idle_tmout = conf->idle_timeout ? conf->idle_timeout : 90; - } else if (is_digit(conf->addr[0])) { /* PVC */ + } else if (isdigit(conf->addr[0])) { /* PVC */ s16 lcn = dec_to_uint(conf->addr, 0); if (lcn >= card->u.x.lo_pvc && lcn <= card->u.x.hi_pvc) @@ -1531,7 +1532,7 @@ static unsigned dec_to_uint(u8 *str, int len) if (!len) len = strlen(str); - for (; len && is_digit(*str); ++str, --len) + for (; len && isdigit(*str); ++str, --len) val = (val * 10) + (*str - (unsigned) '0'); return val; diff --git a/drivers/net/wan/sdla_fr.c b/drivers/net/wan/sdla_fr.c index 0497dbdb8631..7f1ce9d4333e 100644 --- a/drivers/net/wan/sdla_fr.c +++ b/drivers/net/wan/sdla_fr.c @@ -822,7 +822,7 @@ static int new_if(struct wan_device* wandev, struct net_device* dev, chan->card = card; /* verify media address */ - if (is_digit(conf->addr[0])) { + if (isdigit(conf->addr[0])) { dlci = dec_to_uint(conf->addr, 0); @@ -3456,7 +3456,7 @@ static unsigned int dec_to_uint (unsigned char* str, int len) if (!len) len = strlen(str); - for (val = 0; len && is_digit(*str); ++str, --len) + for (val = 0; len && isdigit(*str); ++str, --len) val = (val * 10) + (*str - (unsigned)'0'); return val; diff --git a/drivers/net/wan/sdla_x25.c b/drivers/net/wan/sdla_x25.c index 8a95d61a2f8f..63f846d6f3a6 100644 --- a/drivers/net/wan/sdla_x25.c +++ b/drivers/net/wan/sdla_x25.c @@ -957,7 +957,7 @@ static int new_if(struct wan_device* wandev, struct net_device* dev, chan->hold_timeout = (conf->hold_timeout) ? conf->hold_timeout : 10; - }else if (is_digit(conf->addr[0])){ /* PVC */ + }else if (isdigit(conf->addr[0])){ /* PVC */ int lcn = dec_to_uint(conf->addr, 0); if ((lcn >= card->u.x.lo_pvc) && (lcn <= card->u.x.hi_pvc)){ @@ -3875,7 +3875,7 @@ static unsigned int dec_to_uint (unsigned char* str, int len) if (!len) len = strlen(str); - for (val = 0; len && is_digit(*str); ++str, --len) + for (val = 0; len && isdigit(*str); ++str, --len) val = (val * 10) + (*str - (unsigned)'0'); return val; @@ -3896,9 +3896,9 @@ static unsigned int hex_to_uint (unsigned char* str, int len) for (val = 0; len; ++str, --len) { ch = *str; - if (is_digit(ch)) + if (isdigit(ch)) val = (val << 4) + (ch - (unsigned)'0'); - else if (is_hex_digit(ch)) + else if (isxdigit(ch)) val = (val << 4) + ((ch & 0xDF) - (unsigned)'A' + 10); else break; } diff --git a/include/linux/cyclomx.h b/include/linux/cyclomx.h index 04fa7dff079c..300d704bdb9a 100644 --- a/include/linux/cyclomx.h +++ b/include/linux/cyclomx.h @@ -37,8 +37,6 @@ #include #endif -#define is_digit(ch) (((ch)>=(unsigned)'0'&&(ch)<=(unsigned)'9')?1:0) - /* Adapter Data Space. * This structure is needed because we handle multiple cards, otherwise * static data would do it. diff --git a/include/linux/wanpipe.h b/include/linux/wanpipe.h index 167d956c492b..dae9860091dd 100644 --- a/include/linux/wanpipe.h +++ b/include/linux/wanpipe.h @@ -265,15 +265,6 @@ typedef struct { #include #include - -#define is_digit(ch) (((ch)>=(unsigned)'0'&&(ch)<=(unsigned)'9')?1:0) -#define is_alpha(ch) ((((ch)>=(unsigned)'a'&&(ch)<=(unsigned)'z')||\ - ((ch)>=(unsigned)'A'&&(ch)<=(unsigned)'Z'))?1:0) -#define is_hex_digit(ch) ((((ch)>=(unsigned)'0'&&(ch)<=(unsigned)'9')||\ - ((ch)>=(unsigned)'a'&&(ch)<=(unsigned)'f')||\ - ((ch)>=(unsigned)'A'&&(ch)<=(unsigned)'F'))?1:0) - - /****** Data Structures *****************************************************/ /* Adapter Data Space. -- cgit v1.2.3 From 7665a08928f241247afe8c76865cdbe4ef5489bf Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Fri, 9 Sep 2005 23:17:28 -0700 Subject: [PATCH] drivers/net/wan/: possible cleanups This patch contains possible cleanups including the following: - make needlessly global code static - #if 0 the following unused global function: - sdladrv.c: sdla_intde - remove the following unused global variable: - lmc_media.c: lmc_t1_cables - remove the following unneeded EXPORT_SYMBOL's: - cycx_drv.c: cycx_inten - sdladrv.c: sdla_inten - sdladrv.c: sdla_intde - sdladrv.c: sdla_intack - sdladrv.c: sdla_intr - syncppp.c: sppp_input - syncppp.c: sppp_change_mtu Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Jeff Garzik --- drivers/net/wan/cycx_drv.c | 7 ++-- drivers/net/wan/cycx_main.c | 2 +- drivers/net/wan/dscc4.c | 14 ++++--- drivers/net/wan/farsync.c | 24 ++++++------ drivers/net/wan/hdlc_fr.c | 2 +- drivers/net/wan/lmc/lmc_debug.c | 10 ++--- drivers/net/wan/lmc/lmc_media.c | 8 ---- drivers/net/wan/pc300.h | 16 -------- drivers/net/wan/pc300_drv.c | 87 +++++++++++++++++++++-------------------- drivers/net/wan/pc300_tty.c | 18 ++++----- drivers/net/wan/sdla.c | 20 +++++----- drivers/net/wan/sdladrv.c | 16 +++----- drivers/net/wan/syncppp.c | 10 ++--- include/linux/cycx_drv.h | 1 - include/linux/sdladrv.h | 4 -- include/net/syncppp.h | 1 - 16 files changed, 101 insertions(+), 139 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/wan/cycx_drv.c b/drivers/net/wan/cycx_drv.c index 9e56fc346ba4..e6d005726aad 100644 --- a/drivers/net/wan/cycx_drv.c +++ b/drivers/net/wan/cycx_drv.c @@ -109,7 +109,7 @@ static long cycx_2x_irq_options[] = { 7, 3, 5, 9, 10, 11, 12, 15 }; * < 0 error. * Context: process */ -int __init cycx_drv_init(void) +static int __init cycx_drv_init(void) { printk(KERN_INFO "%s v%u.%u %s\n", fullname, MOD_VERSION, MOD_RELEASE, copyright); @@ -119,7 +119,7 @@ int __init cycx_drv_init(void) /* Module 'remove' entry point. * o release all remaining system resources */ -void cycx_drv_cleanup(void) +static void cycx_drv_cleanup(void) { } @@ -184,8 +184,7 @@ int cycx_down(struct cycx_hw *hw) } /* Enable interrupt generation. */ -EXPORT_SYMBOL(cycx_inten); -void cycx_inten(struct cycx_hw *hw) +static void cycx_inten(struct cycx_hw *hw) { writeb(0, hw->dpmbase); } diff --git a/drivers/net/wan/cycx_main.c b/drivers/net/wan/cycx_main.c index 7b48064364dc..430b1f630fb4 100644 --- a/drivers/net/wan/cycx_main.c +++ b/drivers/net/wan/cycx_main.c @@ -103,7 +103,7 @@ static struct cycx_device *cycx_card_array; /* adapter data space */ * < 0 error. * Context: process */ -int __init cycx_init(void) +static int __init cycx_init(void) { int cnt, err = -ENOMEM; diff --git a/drivers/net/wan/dscc4.c b/drivers/net/wan/dscc4.c index 0c1ab4ac8bdb..2f61a47b4716 100644 --- a/drivers/net/wan/dscc4.c +++ b/drivers/net/wan/dscc4.c @@ -446,8 +446,8 @@ static inline unsigned int dscc4_tx_quiescent(struct dscc4_dev_priv *dpriv, return readl(dpriv->base_addr + CH0FTDA + dpriv->dev_id*4) == dpriv->ltda; } -int state_check(u32 state, struct dscc4_dev_priv *dpriv, struct net_device *dev, - const char *msg) +static int state_check(u32 state, struct dscc4_dev_priv *dpriv, + struct net_device *dev, const char *msg) { int ret = 0; @@ -466,8 +466,9 @@ int state_check(u32 state, struct dscc4_dev_priv *dpriv, struct net_device *dev, return ret; } -void dscc4_tx_print(struct net_device *dev, struct dscc4_dev_priv *dpriv, - char *msg) +static void dscc4_tx_print(struct net_device *dev, + struct dscc4_dev_priv *dpriv, + char *msg) { printk(KERN_DEBUG "%s: tx_current=%02d tx_dirty=%02d (%s)\n", dev->name, dpriv->tx_current, dpriv->tx_dirty, msg); @@ -507,7 +508,8 @@ static void dscc4_release_ring(struct dscc4_dev_priv *dpriv) } } -inline int try_get_rx_skb(struct dscc4_dev_priv *dpriv, struct net_device *dev) +static inline int try_get_rx_skb(struct dscc4_dev_priv *dpriv, + struct net_device *dev) { unsigned int dirty = dpriv->rx_dirty%RX_RING_SIZE; struct RxFD *rx_fd = dpriv->rx_fd + dirty; @@ -1891,7 +1893,7 @@ try: * It failed and locked solid. Thus the introduction of a dummy skb. * Problem is acknowledged in errata sheet DS5. Joy :o/ */ -struct sk_buff *dscc4_init_dummy_skb(struct dscc4_dev_priv *dpriv) +static struct sk_buff *dscc4_init_dummy_skb(struct dscc4_dev_priv *dpriv) { struct sk_buff *skb; diff --git a/drivers/net/wan/farsync.c b/drivers/net/wan/farsync.c index 10befb02d768..7981a2c7906e 100644 --- a/drivers/net/wan/farsync.c +++ b/drivers/net/wan/farsync.c @@ -74,11 +74,11 @@ MODULE_LICENSE("GPL"); /* * Modules parameters and associated varaibles */ -int fst_txq_low = FST_LOW_WATER_MARK; -int fst_txq_high = FST_HIGH_WATER_MARK; -int fst_max_reads = 7; -int fst_excluded_cards = 0; -int fst_excluded_list[FST_MAX_CARDS]; +static int fst_txq_low = FST_LOW_WATER_MARK; +static int fst_txq_high = FST_HIGH_WATER_MARK; +static int fst_max_reads = 7; +static int fst_excluded_cards = 0; +static int fst_excluded_list[FST_MAX_CARDS]; module_param(fst_txq_low, int, 0); module_param(fst_txq_high, int, 0); @@ -572,13 +572,13 @@ static void do_bottom_half_rx(struct fst_card_info *card); static void fst_process_tx_work_q(unsigned long work_q); static void fst_process_int_work_q(unsigned long work_q); -DECLARE_TASKLET(fst_tx_task, fst_process_tx_work_q, 0); -DECLARE_TASKLET(fst_int_task, fst_process_int_work_q, 0); +static DECLARE_TASKLET(fst_tx_task, fst_process_tx_work_q, 0); +static DECLARE_TASKLET(fst_int_task, fst_process_int_work_q, 0); -struct fst_card_info *fst_card_array[FST_MAX_CARDS]; -spinlock_t fst_work_q_lock; -u64 fst_work_txq; -u64 fst_work_intq; +static struct fst_card_info *fst_card_array[FST_MAX_CARDS]; +static spinlock_t fst_work_q_lock; +static u64 fst_work_txq; +static u64 fst_work_intq; static void fst_q_work_item(u64 * queue, int card_index) @@ -1497,7 +1497,7 @@ do_bottom_half_rx(struct fst_card_info *card) * The interrupt service routine * Dev_id is our fst_card_info pointer */ -irqreturn_t +static irqreturn_t fst_intr(int irq, void *dev_id, struct pt_regs *regs) { struct fst_card_info *card; diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c index a5d6891c9d4c..e1601d35dced 100644 --- a/drivers/net/wan/hdlc_fr.c +++ b/drivers/net/wan/hdlc_fr.c @@ -330,7 +330,7 @@ static int pvc_close(struct net_device *dev) -int pvc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +static int pvc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { pvc_device *pvc = dev_to_pvc(dev); fr_proto_pvc_info info; diff --git a/drivers/net/wan/lmc/lmc_debug.c b/drivers/net/wan/lmc/lmc_debug.c index 9dccd9546a17..3b94352b0d03 100644 --- a/drivers/net/wan/lmc/lmc_debug.c +++ b/drivers/net/wan/lmc/lmc_debug.c @@ -8,10 +8,10 @@ /* * Prints out len, max to 80 octets using printk, 20 per line */ -void lmcConsoleLog(char *type, unsigned char *ucData, int iLen) -{ #ifdef DEBUG #ifdef LMC_PACKET_LOG +void lmcConsoleLog(char *type, unsigned char *ucData, int iLen) +{ int iNewLine = 1; char str[80], *pstr; @@ -43,26 +43,24 @@ void lmcConsoleLog(char *type, unsigned char *ucData, int iLen) } sprintf(pstr, "\n"); printk(str); +} #endif #endif -} #ifdef DEBUG u_int32_t lmcEventLogIndex = 0; u_int32_t lmcEventLogBuf[LMC_EVENTLOGSIZE * LMC_EVENTLOGARGS]; -#endif void lmcEventLog (u_int32_t EventNum, u_int32_t arg2, u_int32_t arg3) { -#ifdef DEBUG lmcEventLogBuf[lmcEventLogIndex++] = EventNum; lmcEventLogBuf[lmcEventLogIndex++] = arg2; lmcEventLogBuf[lmcEventLogIndex++] = arg3; lmcEventLogBuf[lmcEventLogIndex++] = jiffies; lmcEventLogIndex &= (LMC_EVENTLOGSIZE * LMC_EVENTLOGARGS) - 1; -#endif } +#endif /* DEBUG */ void lmc_trace(struct net_device *dev, char *msg){ #ifdef LMC_TRACE diff --git a/drivers/net/wan/lmc/lmc_media.c b/drivers/net/wan/lmc/lmc_media.c index f55ce76b00ed..af8b55fdd9d9 100644 --- a/drivers/net/wan/lmc/lmc_media.c +++ b/drivers/net/wan/lmc/lmc_media.c @@ -47,14 +47,6 @@ * of the GNU General Public License version 2, incorporated herein by reference. */ -/* - * For lack of a better place, put the SSI cable stuff here. - */ -char *lmc_t1_cables[] = { - "V.10/RS423", "EIA530A", "reserved", "X.21", "V.35", - "EIA449/EIA530/V.36", "V.28/EIA232", "none", NULL -}; - /* * protocol independent method. */ diff --git a/drivers/net/wan/pc300.h b/drivers/net/wan/pc300.h index 73401b0f0151..2024b26b99e6 100644 --- a/drivers/net/wan/pc300.h +++ b/drivers/net/wan/pc300.h @@ -472,24 +472,8 @@ enum pc300_loopback_cmds { #ifdef __KERNEL__ /* Function Prototypes */ -int dma_buf_write(pc300_t *, int, ucchar *, int); -int dma_buf_read(pc300_t *, int, struct sk_buff *); void tx_dma_start(pc300_t *, int); -void rx_dma_start(pc300_t *, int); -void tx_dma_stop(pc300_t *, int); -void rx_dma_stop(pc300_t *, int); -int cpc_queue_xmit(struct sk_buff *, struct net_device *); -void cpc_net_rx(struct net_device *); -void cpc_sca_status(pc300_t *, int); -int cpc_change_mtu(struct net_device *, int); -int cpc_ioctl(struct net_device *, struct ifreq *, int); -int ch_config(pc300dev_t *); -int rx_config(pc300dev_t *); -int tx_config(pc300dev_t *); -void cpc_opench(pc300dev_t *); -void cpc_closech(pc300dev_t *); int cpc_open(struct net_device *dev); -int cpc_close(struct net_device *dev); int cpc_set_media(hdlc_device *, int); #endif /* __KERNEL__ */ diff --git a/drivers/net/wan/pc300_drv.c b/drivers/net/wan/pc300_drv.c index 3e7753b10717..a3e65d1bc19b 100644 --- a/drivers/net/wan/pc300_drv.c +++ b/drivers/net/wan/pc300_drv.c @@ -291,6 +291,7 @@ static uclong detect_ram(pc300_t *); static void plx_init(pc300_t *); static void cpc_trace(struct net_device *, struct sk_buff *, char); static int cpc_attach(struct net_device *, unsigned short, unsigned short); +static int cpc_close(struct net_device *dev); #ifdef CONFIG_PC300_MLPPP void cpc_tty_init(pc300dev_t * dev); @@ -437,7 +438,7 @@ static void rx_dma_buf_check(pc300_t * card, int ch) printk("\n"); } -int dma_get_rx_frame_size(pc300_t * card, int ch) +static int dma_get_rx_frame_size(pc300_t * card, int ch) { volatile pcsca_bd_t __iomem *ptdescr; ucshort first_bd = card->chan[ch].rx_first_bd; @@ -462,7 +463,7 @@ int dma_get_rx_frame_size(pc300_t * card, int ch) * dma_buf_write: writes a frame to the Tx DMA buffers * NOTE: this function writes one frame at a time. */ -int dma_buf_write(pc300_t * card, int ch, ucchar * ptdata, int len) +static int dma_buf_write(pc300_t * card, int ch, ucchar * ptdata, int len) { int i, nchar; volatile pcsca_bd_t __iomem *ptdescr; @@ -503,7 +504,7 @@ int dma_buf_write(pc300_t * card, int ch, ucchar * ptdata, int len) * dma_buf_read: reads a frame from the Rx DMA buffers * NOTE: this function reads one frame at a time. */ -int dma_buf_read(pc300_t * card, int ch, struct sk_buff *skb) +static int dma_buf_read(pc300_t * card, int ch, struct sk_buff *skb) { int nchar; pc300ch_t *chan = (pc300ch_t *) & card->chan[ch]; @@ -560,7 +561,7 @@ int dma_buf_read(pc300_t * card, int ch, struct sk_buff *skb) return (rcvd); } -void tx_dma_stop(pc300_t * card, int ch) +static void tx_dma_stop(pc300_t * card, int ch) { void __iomem *scabase = card->hw.scabase; ucchar drr_ena_bit = 1 << (5 + 2 * ch); @@ -571,7 +572,7 @@ void tx_dma_stop(pc300_t * card, int ch) cpc_writeb(scabase + DRR, drr_rst_bit & ~drr_ena_bit); } -void rx_dma_stop(pc300_t * card, int ch) +static void rx_dma_stop(pc300_t * card, int ch) { void __iomem *scabase = card->hw.scabase; ucchar drr_ena_bit = 1 << (4 + 2 * ch); @@ -582,7 +583,7 @@ void rx_dma_stop(pc300_t * card, int ch) cpc_writeb(scabase + DRR, drr_rst_bit & ~drr_ena_bit); } -void rx_dma_start(pc300_t * card, int ch) +static void rx_dma_start(pc300_t * card, int ch) { void __iomem *scabase = card->hw.scabase; pc300ch_t *chan = (pc300ch_t *) & card->chan[ch]; @@ -607,7 +608,7 @@ void rx_dma_start(pc300_t * card, int ch) /*************************/ /*** FALC Routines ***/ /*************************/ -void falc_issue_cmd(pc300_t * card, int ch, ucchar cmd) +static void falc_issue_cmd(pc300_t * card, int ch, ucchar cmd) { void __iomem *falcbase = card->hw.falcbase; unsigned long i = 0; @@ -622,7 +623,7 @@ void falc_issue_cmd(pc300_t * card, int ch, ucchar cmd) cpc_writeb(falcbase + F_REG(CMDR, ch), cmd); } -void falc_intr_enable(pc300_t * card, int ch) +static void falc_intr_enable(pc300_t * card, int ch) { pc300ch_t *chan = (pc300ch_t *) & card->chan[ch]; pc300chconf_t *conf = (pc300chconf_t *) & chan->conf; @@ -672,7 +673,7 @@ void falc_intr_enable(pc300_t * card, int ch) } } -void falc_open_timeslot(pc300_t * card, int ch, int timeslot) +static void falc_open_timeslot(pc300_t * card, int ch, int timeslot) { void __iomem *falcbase = card->hw.falcbase; ucchar tshf = card->chan[ch].falc.offset; @@ -688,7 +689,7 @@ void falc_open_timeslot(pc300_t * card, int ch, int timeslot) (0x80 >> (timeslot & 0x07))); } -void falc_close_timeslot(pc300_t * card, int ch, int timeslot) +static void falc_close_timeslot(pc300_t * card, int ch, int timeslot) { void __iomem *falcbase = card->hw.falcbase; ucchar tshf = card->chan[ch].falc.offset; @@ -704,7 +705,7 @@ void falc_close_timeslot(pc300_t * card, int ch, int timeslot) ~(0x80 >> (timeslot & 0x07))); } -void falc_close_all_timeslots(pc300_t * card, int ch) +static void falc_close_all_timeslots(pc300_t * card, int ch) { pc300ch_t *chan = (pc300ch_t *) & card->chan[ch]; pc300chconf_t *conf = (pc300chconf_t *) & chan->conf; @@ -726,7 +727,7 @@ void falc_close_all_timeslots(pc300_t * card, int ch) } } -void falc_open_all_timeslots(pc300_t * card, int ch) +static void falc_open_all_timeslots(pc300_t * card, int ch) { pc300ch_t *chan = (pc300ch_t *) & card->chan[ch]; pc300chconf_t *conf = (pc300chconf_t *) & chan->conf; @@ -758,7 +759,7 @@ void falc_open_all_timeslots(pc300_t * card, int ch) } } -void falc_init_timeslot(pc300_t * card, int ch) +static void falc_init_timeslot(pc300_t * card, int ch) { pc300ch_t *chan = (pc300ch_t *) & card->chan[ch]; pc300chconf_t *conf = (pc300chconf_t *) & chan->conf; @@ -776,7 +777,7 @@ void falc_init_timeslot(pc300_t * card, int ch) } } -void falc_enable_comm(pc300_t * card, int ch) +static void falc_enable_comm(pc300_t * card, int ch) { pc300ch_t *chan = (pc300ch_t *) & card->chan[ch]; falc_t *pfalc = (falc_t *) & chan->falc; @@ -792,7 +793,7 @@ void falc_enable_comm(pc300_t * card, int ch) ~((CPLD_REG1_FALC_DCD | CPLD_REG1_FALC_CTS) << (2 * ch))); } -void falc_disable_comm(pc300_t * card, int ch) +static void falc_disable_comm(pc300_t * card, int ch) { pc300ch_t *chan = (pc300ch_t *) & card->chan[ch]; falc_t *pfalc = (falc_t *) & chan->falc; @@ -806,7 +807,7 @@ void falc_disable_comm(pc300_t * card, int ch) ((CPLD_REG1_FALC_DCD | CPLD_REG1_FALC_CTS) << (2 * ch))); } -void falc_init_t1(pc300_t * card, int ch) +static void falc_init_t1(pc300_t * card, int ch) { pc300ch_t *chan = (pc300ch_t *) & card->chan[ch]; pc300chconf_t *conf = (pc300chconf_t *) & chan->conf; @@ -975,7 +976,7 @@ void falc_init_t1(pc300_t * card, int ch) falc_close_all_timeslots(card, ch); } -void falc_init_e1(pc300_t * card, int ch) +static void falc_init_e1(pc300_t * card, int ch) { pc300ch_t *chan = (pc300ch_t *) & card->chan[ch]; pc300chconf_t *conf = (pc300chconf_t *) & chan->conf; @@ -1155,7 +1156,7 @@ void falc_init_e1(pc300_t * card, int ch) falc_close_all_timeslots(card, ch); } -void falc_init_hdlc(pc300_t * card, int ch) +static void falc_init_hdlc(pc300_t * card, int ch) { void __iomem *falcbase = card->hw.falcbase; pc300ch_t *chan = (pc300ch_t *) & card->chan[ch]; @@ -1181,7 +1182,7 @@ void falc_init_hdlc(pc300_t * card, int ch) falc_intr_enable(card, ch); } -void te_config(pc300_t * card, int ch) +static void te_config(pc300_t * card, int ch) { pc300ch_t *chan = (pc300ch_t *) & card->chan[ch]; pc300chconf_t *conf = (pc300chconf_t *) & chan->conf; @@ -1241,7 +1242,7 @@ void te_config(pc300_t * card, int ch) CPC_UNLOCK(card, flags); } -void falc_check_status(pc300_t * card, int ch, unsigned char frs0) +static void falc_check_status(pc300_t * card, int ch, unsigned char frs0) { pc300ch_t *chan = (pc300ch_t *) & card->chan[ch]; pc300chconf_t *conf = (pc300chconf_t *) & chan->conf; @@ -1397,7 +1398,7 @@ void falc_check_status(pc300_t * card, int ch, unsigned char frs0) } } -void falc_update_stats(pc300_t * card, int ch) +static void falc_update_stats(pc300_t * card, int ch) { pc300ch_t *chan = (pc300ch_t *) & card->chan[ch]; pc300chconf_t *conf = (pc300chconf_t *) & chan->conf; @@ -1450,7 +1451,7 @@ void falc_update_stats(pc300_t * card, int ch) * the synchronizer and then sent to the system interface. *---------------------------------------------------------------------------- */ -void falc_remote_loop(pc300_t * card, int ch, int loop_on) +static void falc_remote_loop(pc300_t * card, int ch, int loop_on) { pc300ch_t *chan = (pc300ch_t *) & card->chan[ch]; pc300chconf_t *conf = (pc300chconf_t *) & chan->conf; @@ -1495,7 +1496,7 @@ void falc_remote_loop(pc300_t * card, int ch, int loop_on) * coding must be identical. *---------------------------------------------------------------------------- */ -void falc_local_loop(pc300_t * card, int ch, int loop_on) +static void falc_local_loop(pc300_t * card, int ch, int loop_on) { pc300ch_t *chan = (pc300ch_t *) & card->chan[ch]; falc_t *pfalc = (falc_t *) & chan->falc; @@ -1522,7 +1523,7 @@ void falc_local_loop(pc300_t * card, int ch, int loop_on) * looped. They are originated by the FALC-LH transmitter. *---------------------------------------------------------------------------- */ -void falc_payload_loop(pc300_t * card, int ch, int loop_on) +static void falc_payload_loop(pc300_t * card, int ch, int loop_on) { pc300ch_t *chan = (pc300ch_t *) & card->chan[ch]; pc300chconf_t *conf = (pc300chconf_t *) & chan->conf; @@ -1576,7 +1577,7 @@ void falc_payload_loop(pc300_t * card, int ch, int loop_on) * Description: Turns XLU bit off in the proper register *---------------------------------------------------------------------------- */ -void turn_off_xlu(pc300_t * card, int ch) +static void turn_off_xlu(pc300_t * card, int ch) { pc300ch_t *chan = (pc300ch_t *) & card->chan[ch]; pc300chconf_t *conf = (pc300chconf_t *) & chan->conf; @@ -1597,7 +1598,7 @@ void turn_off_xlu(pc300_t * card, int ch) * Description: Turns XLD bit off in the proper register *---------------------------------------------------------------------------- */ -void turn_off_xld(pc300_t * card, int ch) +static void turn_off_xld(pc300_t * card, int ch) { pc300ch_t *chan = (pc300ch_t *) & card->chan[ch]; pc300chconf_t *conf = (pc300chconf_t *) & chan->conf; @@ -1619,7 +1620,7 @@ void turn_off_xld(pc300_t * card, int ch) * to generate a LOOP activation code over a T1/E1 line. *---------------------------------------------------------------------------- */ -void falc_generate_loop_up_code(pc300_t * card, int ch) +static void falc_generate_loop_up_code(pc300_t * card, int ch) { pc300ch_t *chan = (pc300ch_t *) & card->chan[ch]; pc300chconf_t *conf = (pc300chconf_t *) & chan->conf; @@ -1652,7 +1653,7 @@ void falc_generate_loop_up_code(pc300_t * card, int ch) * to generate a LOOP deactivation code over a T1/E1 line. *---------------------------------------------------------------------------- */ -void falc_generate_loop_down_code(pc300_t * card, int ch) +static void falc_generate_loop_down_code(pc300_t * card, int ch) { pc300ch_t *chan = (pc300ch_t *) & card->chan[ch]; pc300chconf_t *conf = (pc300chconf_t *) & chan->conf; @@ -1682,7 +1683,7 @@ void falc_generate_loop_down_code(pc300_t * card, int ch) * it on the reception side. *---------------------------------------------------------------------------- */ -void falc_pattern_test(pc300_t * card, int ch, unsigned int activate) +static void falc_pattern_test(pc300_t * card, int ch, unsigned int activate) { pc300ch_t *chan = (pc300ch_t *) & card->chan[ch]; pc300chconf_t *conf = (pc300chconf_t *) & chan->conf; @@ -1729,7 +1730,7 @@ void falc_pattern_test(pc300_t * card, int ch, unsigned int activate) * Description: This routine returns the bit error counter value *---------------------------------------------------------------------------- */ -ucshort falc_pattern_test_error(pc300_t * card, int ch) +static ucshort falc_pattern_test_error(pc300_t * card, int ch) { pc300ch_t *chan = (pc300ch_t *) & card->chan[ch]; falc_t *pfalc = (falc_t *) & chan->falc; @@ -1769,7 +1770,7 @@ cpc_trace(struct net_device *dev, struct sk_buff *skb_main, char rx_tx) netif_rx(skb); } -void cpc_tx_timeout(struct net_device *dev) +static void cpc_tx_timeout(struct net_device *dev) { pc300dev_t *d = (pc300dev_t *) dev->priv; pc300ch_t *chan = (pc300ch_t *) d->chan; @@ -1797,7 +1798,7 @@ void cpc_tx_timeout(struct net_device *dev) netif_wake_queue(dev); } -int cpc_queue_xmit(struct sk_buff *skb, struct net_device *dev) +static int cpc_queue_xmit(struct sk_buff *skb, struct net_device *dev) { pc300dev_t *d = (pc300dev_t *) dev->priv; pc300ch_t *chan = (pc300ch_t *) d->chan; @@ -1880,7 +1881,7 @@ int cpc_queue_xmit(struct sk_buff *skb, struct net_device *dev) return 0; } -void cpc_net_rx(struct net_device *dev) +static void cpc_net_rx(struct net_device *dev) { pc300dev_t *d = (pc300dev_t *) dev->priv; pc300ch_t *chan = (pc300ch_t *) d->chan; @@ -2403,7 +2404,7 @@ static irqreturn_t cpc_intr(int irq, void *dev_id, struct pt_regs *regs) return IRQ_HANDLED; } -void cpc_sca_status(pc300_t * card, int ch) +static void cpc_sca_status(pc300_t * card, int ch) { ucchar ilar; void __iomem *scabase = card->hw.scabase; @@ -2495,7 +2496,7 @@ void cpc_sca_status(pc300_t * card, int ch) } } -void cpc_falc_status(pc300_t * card, int ch) +static void cpc_falc_status(pc300_t * card, int ch) { pc300ch_t *chan = &card->chan[ch]; falc_t *pfalc = (falc_t *) & chan->falc; @@ -2523,7 +2524,7 @@ void cpc_falc_status(pc300_t * card, int ch) CPC_UNLOCK(card, flags); } -int cpc_change_mtu(struct net_device *dev, int new_mtu) +static int cpc_change_mtu(struct net_device *dev, int new_mtu) { if ((new_mtu < 128) || (new_mtu > PC300_DEF_MTU)) return -EINVAL; @@ -2531,7 +2532,7 @@ int cpc_change_mtu(struct net_device *dev, int new_mtu) return 0; } -int cpc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +static int cpc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { hdlc_device *hdlc = dev_to_hdlc(dev); pc300dev_t *d = (pc300dev_t *) dev->priv; @@ -2856,7 +2857,7 @@ static int clock_rate_calc(uclong rate, uclong clock, int *br_io) } } -int ch_config(pc300dev_t * d) +static int ch_config(pc300dev_t * d) { pc300ch_t *chan = (pc300ch_t *) d->chan; pc300chconf_t *conf = (pc300chconf_t *) & chan->conf; @@ -3004,7 +3005,7 @@ int ch_config(pc300dev_t * d) return 0; } -int rx_config(pc300dev_t * d) +static int rx_config(pc300dev_t * d) { pc300ch_t *chan = (pc300ch_t *) d->chan; pc300_t *card = (pc300_t *) chan->card; @@ -3035,7 +3036,7 @@ int rx_config(pc300dev_t * d) return 0; } -int tx_config(pc300dev_t * d) +static int tx_config(pc300dev_t * d) { pc300ch_t *chan = (pc300ch_t *) d->chan; pc300_t *card = (pc300_t *) chan->card; @@ -3098,7 +3099,7 @@ static int cpc_attach(struct net_device *dev, unsigned short encoding, return 0; } -void cpc_opench(pc300dev_t * d) +static void cpc_opench(pc300dev_t * d) { pc300ch_t *chan = (pc300ch_t *) d->chan; pc300_t *card = (pc300_t *) chan->card; @@ -3116,7 +3117,7 @@ void cpc_opench(pc300dev_t * d) cpc_readb(scabase + M_REG(CTL, ch)) & ~(CTL_RTS | CTL_DTR)); } -void cpc_closech(pc300dev_t * d) +static void cpc_closech(pc300dev_t * d) { pc300ch_t *chan = (pc300ch_t *) d->chan; pc300_t *card = (pc300_t *) chan->card; @@ -3173,7 +3174,7 @@ int cpc_open(struct net_device *dev) return 0; } -int cpc_close(struct net_device *dev) +static int cpc_close(struct net_device *dev) { hdlc_device *hdlc = dev_to_hdlc(dev); pc300dev_t *d = (pc300dev_t *) dev->priv; diff --git a/drivers/net/wan/pc300_tty.c b/drivers/net/wan/pc300_tty.c index 8454bf6caaa7..52f26b9c69d2 100644 --- a/drivers/net/wan/pc300_tty.c +++ b/drivers/net/wan/pc300_tty.c @@ -112,10 +112,10 @@ typedef struct _st_cpc_tty_area { static struct tty_driver serial_drv; /* local variables */ -st_cpc_tty_area cpc_tty_area[CPC_TTY_NPORTS]; +static st_cpc_tty_area cpc_tty_area[CPC_TTY_NPORTS]; -int cpc_tty_cnt=0; /* number of intrfaces configured with MLPPP */ -int cpc_tty_unreg_flag = 0; +static int cpc_tty_cnt = 0; /* number of intrfaces configured with MLPPP */ +static int cpc_tty_unreg_flag = 0; /* TTY functions prototype */ static int cpc_tty_open(struct tty_struct *tty, struct file *flip); @@ -132,9 +132,9 @@ static void cpc_tty_trace(pc300dev_t *dev, char* buf, int len, char rxtx); static void cpc_tty_signal_off(pc300dev_t *pc300dev, unsigned char); static void cpc_tty_signal_on(pc300dev_t *pc300dev, unsigned char); -int pc300_tiocmset(struct tty_struct *, struct file *, - unsigned int, unsigned int); -int pc300_tiocmget(struct tty_struct *, struct file *); +static int pc300_tiocmset(struct tty_struct *, struct file *, + unsigned int, unsigned int); +static int pc300_tiocmget(struct tty_struct *, struct file *); /* functions called by PC300 driver */ void cpc_tty_init(pc300dev_t *dev); @@ -538,8 +538,8 @@ static int cpc_tty_chars_in_buffer(struct tty_struct *tty) return(0); } -int pc300_tiocmset(struct tty_struct *tty, struct file *file, - unsigned int set, unsigned int clear) +static int pc300_tiocmset(struct tty_struct *tty, struct file *file, + unsigned int set, unsigned int clear) { st_cpc_tty_area *cpc_tty; @@ -565,7 +565,7 @@ int pc300_tiocmset(struct tty_struct *tty, struct file *file, return 0; } -int pc300_tiocmget(struct tty_struct *tty, struct file *file) +static int pc300_tiocmget(struct tty_struct *tty, struct file *file) { unsigned int result; unsigned char status; diff --git a/drivers/net/wan/sdla.c b/drivers/net/wan/sdla.c index 3ac9a45b20fa..036adc4f8ba7 100644 --- a/drivers/net/wan/sdla.c +++ b/drivers/net/wan/sdla.c @@ -182,7 +182,7 @@ static char sdla_byte(struct net_device *dev, int addr) return(byte); } -void sdla_stop(struct net_device *dev) +static void sdla_stop(struct net_device *dev) { struct frad_local *flp; @@ -209,7 +209,7 @@ void sdla_stop(struct net_device *dev) } } -void sdla_start(struct net_device *dev) +static void sdla_start(struct net_device *dev) { struct frad_local *flp; @@ -247,7 +247,7 @@ void sdla_start(struct net_device *dev) * ***************************************************/ -int sdla_z80_poll(struct net_device *dev, int z80_addr, int jiffs, char resp1, char resp2) +static int sdla_z80_poll(struct net_device *dev, int z80_addr, int jiffs, char resp1, char resp2) { unsigned long start, done, now; char resp, *temp; @@ -505,7 +505,7 @@ static int sdla_cmd(struct net_device *dev, int cmd, short dlci, short flags, static int sdla_reconfig(struct net_device *dev); -int sdla_activate(struct net_device *slave, struct net_device *master) +static int sdla_activate(struct net_device *slave, struct net_device *master) { struct frad_local *flp; int i; @@ -527,7 +527,7 @@ int sdla_activate(struct net_device *slave, struct net_device *master) return(0); } -int sdla_deactivate(struct net_device *slave, struct net_device *master) +static int sdla_deactivate(struct net_device *slave, struct net_device *master) { struct frad_local *flp; int i; @@ -549,7 +549,7 @@ int sdla_deactivate(struct net_device *slave, struct net_device *master) return(0); } -int sdla_assoc(struct net_device *slave, struct net_device *master) +static int sdla_assoc(struct net_device *slave, struct net_device *master) { struct frad_local *flp; int i; @@ -585,7 +585,7 @@ int sdla_assoc(struct net_device *slave, struct net_device *master) return(0); } -int sdla_deassoc(struct net_device *slave, struct net_device *master) +static int sdla_deassoc(struct net_device *slave, struct net_device *master) { struct frad_local *flp; int i; @@ -613,7 +613,7 @@ int sdla_deassoc(struct net_device *slave, struct net_device *master) return(0); } -int sdla_dlci_conf(struct net_device *slave, struct net_device *master, int get) +static int sdla_dlci_conf(struct net_device *slave, struct net_device *master, int get) { struct frad_local *flp; struct dlci_local *dlp; @@ -1324,7 +1324,7 @@ NOTE: This is rather a useless action right now, as the return(0); } -int sdla_change_mtu(struct net_device *dev, int new_mtu) +static int sdla_change_mtu(struct net_device *dev, int new_mtu) { struct frad_local *flp; @@ -1337,7 +1337,7 @@ int sdla_change_mtu(struct net_device *dev, int new_mtu) return(-EOPNOTSUPP); } -int sdla_set_config(struct net_device *dev, struct ifmap *map) +static int sdla_set_config(struct net_device *dev, struct ifmap *map) { struct frad_local *flp; int i; diff --git a/drivers/net/wan/sdladrv.c b/drivers/net/wan/sdladrv.c index c8bc6da57a41..7c2cf2e76300 100644 --- a/drivers/net/wan/sdladrv.c +++ b/drivers/net/wan/sdladrv.c @@ -642,9 +642,7 @@ int sdla_mapmem (sdlahw_t* hw, unsigned long addr) * Enable interrupt generation. */ -EXPORT_SYMBOL(sdla_inten); - -int sdla_inten (sdlahw_t* hw) +static int sdla_inten (sdlahw_t* hw) { unsigned port = hw->port; int tmp, i; @@ -698,8 +696,7 @@ int sdla_inten (sdlahw_t* hw) * Disable interrupt generation. */ -EXPORT_SYMBOL(sdla_intde); - +#if 0 int sdla_intde (sdlahw_t* hw) { unsigned port = hw->port; @@ -748,14 +745,13 @@ int sdla_intde (sdlahw_t* hw) } return 0; } +#endif /* 0 */ /*============================================================================ * Acknowledge SDLA hardware interrupt. */ -EXPORT_SYMBOL(sdla_intack); - -int sdla_intack (sdlahw_t* hw) +static int sdla_intack (sdlahw_t* hw) { unsigned port = hw->port; int tmp; @@ -827,8 +823,7 @@ void read_S514_int_stat (sdlahw_t* hw, u32* int_status) * Generate an interrupt to adapter's CPU. */ -EXPORT_SYMBOL(sdla_intr); - +#if 0 int sdla_intr (sdlahw_t* hw) { unsigned port = hw->port; @@ -863,6 +858,7 @@ int sdla_intr (sdlahw_t* hw) } return 0; } +#endif /* 0 */ /*============================================================================ * Execute Adapter Command. diff --git a/drivers/net/wan/syncppp.c b/drivers/net/wan/syncppp.c index b56a7b516d24..3731b22f6757 100644 --- a/drivers/net/wan/syncppp.c +++ b/drivers/net/wan/syncppp.c @@ -221,7 +221,7 @@ static void sppp_clear_timeout(struct sppp *p) * here. */ -void sppp_input (struct net_device *dev, struct sk_buff *skb) +static void sppp_input (struct net_device *dev, struct sk_buff *skb) { struct ppp_header *h; struct sppp *sp = (struct sppp *)sppp_of(dev); @@ -355,8 +355,6 @@ done: return; } -EXPORT_SYMBOL(sppp_input); - /* * Handle transmit packets. */ @@ -990,7 +988,7 @@ EXPORT_SYMBOL(sppp_reopen); * the mtu is out of range. */ -int sppp_change_mtu(struct net_device *dev, int new_mtu) +static int sppp_change_mtu(struct net_device *dev, int new_mtu) { if(new_mtu<128||new_mtu>PPP_MTU||(dev->flags&IFF_UP)) return -EINVAL; @@ -998,8 +996,6 @@ int sppp_change_mtu(struct net_device *dev, int new_mtu) return 0; } -EXPORT_SYMBOL(sppp_change_mtu); - /** * sppp_do_ioctl - Ioctl handler for ppp/hdlc * @dev: Device subject to ioctl @@ -1456,7 +1452,7 @@ static int sppp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_t return 0; } -struct packet_type sppp_packet_type = { +static struct packet_type sppp_packet_type = { .type = __constant_htons(ETH_P_WAN_PPP), .func = sppp_rcv, }; diff --git a/include/linux/cycx_drv.h b/include/linux/cycx_drv.h index 6621df86a748..12fe6b0bfcff 100644 --- a/include/linux/cycx_drv.h +++ b/include/linux/cycx_drv.h @@ -60,6 +60,5 @@ extern int cycx_peek(struct cycx_hw *hw, u32 addr, void *buf, u32 len); extern int cycx_poke(struct cycx_hw *hw, u32 addr, void *buf, u32 len); extern int cycx_exec(void __iomem *addr); -extern void cycx_inten(struct cycx_hw *hw); extern void cycx_intr(struct cycx_hw *hw); #endif /* _CYCX_DRV_H */ diff --git a/include/linux/sdladrv.h b/include/linux/sdladrv.h index 78f634007fc6..c85e103d5e7b 100644 --- a/include/linux/sdladrv.h +++ b/include/linux/sdladrv.h @@ -52,12 +52,8 @@ typedef struct sdlahw extern int sdla_setup (sdlahw_t* hw, void* sfm, unsigned len); extern int sdla_down (sdlahw_t* hw); -extern int sdla_inten (sdlahw_t* hw); -extern int sdla_intde (sdlahw_t* hw); -extern int sdla_intack (sdlahw_t* hw); extern void S514_intack (sdlahw_t* hw, u32 int_status); extern void read_S514_int_stat (sdlahw_t* hw, u32* int_status); -extern int sdla_intr (sdlahw_t* hw); extern int sdla_mapmem (sdlahw_t* hw, unsigned long addr); extern int sdla_peek (sdlahw_t* hw, unsigned long addr, void* buf, unsigned len); diff --git a/include/net/syncppp.h b/include/net/syncppp.h index 614cb6ba564e..877efa434700 100644 --- a/include/net/syncppp.h +++ b/include/net/syncppp.h @@ -86,7 +86,6 @@ static inline struct sppp *sppp_of(struct net_device *dev) void sppp_attach (struct ppp_device *pd); void sppp_detach (struct net_device *dev); -void sppp_input (struct net_device *dev, struct sk_buff *m); int sppp_do_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd); struct sk_buff *sppp_dequeue (struct net_device *dev); int sppp_isempty (struct net_device *dev); -- cgit v1.2.3 From 43ec6e95e4d8a73afc2405a44b955c380aeeb65a Mon Sep 17 00:00:00 2001 From: Dale Farnsworth Date: Tue, 23 Aug 2005 10:30:29 -0700 Subject: [PATCH] mii: Add test for GigE support Signed-off-by: Dale Farnsworth Signed-off-by: Jeff Garzik --- drivers/net/mii.c | 15 +++++++++++++++ include/linux/mii.h | 1 + 2 files changed, 16 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/mii.c b/drivers/net/mii.c index c33cb3dc942b..e42aa797f08b 100644 --- a/drivers/net/mii.c +++ b/drivers/net/mii.c @@ -207,6 +207,20 @@ int mii_ethtool_sset(struct mii_if_info *mii, struct ethtool_cmd *ecmd) return 0; } +int mii_check_gmii_support(struct mii_if_info *mii) +{ + int reg; + + reg = mii->mdio_read(mii->dev, mii->phy_id, MII_BMSR); + if (reg & BMSR_ESTATEN) { + reg = mii->mdio_read(mii->dev, mii->phy_id, MII_ESTATUS); + if (reg & (ESTATUS_1000_TFULL | ESTATUS_1000_THALF)) + return 1; + } + + return 0; +} + int mii_link_ok (struct mii_if_info *mii) { /* first, a dummy read, needed to latch some MII phys */ @@ -394,5 +408,6 @@ EXPORT_SYMBOL(mii_ethtool_gset); EXPORT_SYMBOL(mii_ethtool_sset); EXPORT_SYMBOL(mii_check_link); EXPORT_SYMBOL(mii_check_media); +EXPORT_SYMBOL(mii_check_gmii_support); EXPORT_SYMBOL(generic_mii_ioctl); diff --git a/include/linux/mii.h b/include/linux/mii.h index 9b8d0476988a..68f5a0f392dd 100644 --- a/include/linux/mii.h +++ b/include/linux/mii.h @@ -158,6 +158,7 @@ extern int mii_link_ok (struct mii_if_info *mii); extern int mii_nway_restart (struct mii_if_info *mii); extern int mii_ethtool_gset(struct mii_if_info *mii, struct ethtool_cmd *ecmd); extern int mii_ethtool_sset(struct mii_if_info *mii, struct ethtool_cmd *ecmd); +extern int mii_check_gmii_support(struct mii_if_info *mii); extern void mii_check_link (struct mii_if_info *mii); extern unsigned int mii_check_media (struct mii_if_info *mii, unsigned int ok_to_print, -- cgit v1.2.3 From 17b14451fd2b187ddd6303726755a3af0a926b6c Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Thu, 15 Sep 2005 15:44:00 +0100 Subject: [PATCH] PATCH: remove function for non-PCI as requested Signed-off-by: Alan Cox Signed-off-by: Jeff Garzik --- drivers/scsi/libata-core.c | 81 ++++++++++++++++++++++++++++------------------ include/linux/libata.h | 1 + 2 files changed, 50 insertions(+), 32 deletions(-) (limited to 'include/linux') diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c index 5cc53cd9323e..72bdc91e148c 100644 --- a/drivers/scsi/libata-core.c +++ b/drivers/scsi/libata-core.c @@ -4122,6 +4122,53 @@ err_out: return 0; } +/** + * ata_host_set_remove - PCI layer callback for device removal + * @host_set: ATA host set that was removed + * + * Unregister all objects associated with this host set. Free those + * objects. + * + * LOCKING: + * Inherited from calling layer (may sleep). + */ + + +void ata_host_set_remove(struct ata_host_set *host_set) +{ + struct ata_port *ap; + unsigned int i; + + for (i = 0; i < host_set->n_ports; i++) { + ap = host_set->ports[i]; + scsi_remove_host(ap->host); + } + + free_irq(host_set->irq, host_set); + + for (i = 0; i < host_set->n_ports; i++) { + ap = host_set->ports[i]; + + ata_scsi_release(ap->host); + + if ((ap->flags & ATA_FLAG_NO_LEGACY) == 0) { + struct ata_ioports *ioaddr = &ap->ioaddr; + + if (ioaddr->cmd_addr == 0x1f0) + release_region(0x1f0, 8); + else if (ioaddr->cmd_addr == 0x170) + release_region(0x170, 8); + } + + scsi_host_put(ap->host); + } + + if (host_set->ops->host_stop) + host_set->ops->host_stop(host_set); + + kfree(host_set); +} + /** * ata_scsi_release - SCSI layer callback hook for host unload * @host: libata host to be unloaded @@ -4462,39 +4509,8 @@ void ata_pci_remove_one (struct pci_dev *pdev) { struct device *dev = pci_dev_to_dev(pdev); struct ata_host_set *host_set = dev_get_drvdata(dev); - struct ata_port *ap; - unsigned int i; - - for (i = 0; i < host_set->n_ports; i++) { - ap = host_set->ports[i]; - - scsi_remove_host(ap->host); - } - - free_irq(host_set->irq, host_set); - - for (i = 0; i < host_set->n_ports; i++) { - ap = host_set->ports[i]; - - ata_scsi_release(ap->host); - - if ((ap->flags & ATA_FLAG_NO_LEGACY) == 0) { - struct ata_ioports *ioaddr = &ap->ioaddr; - - if (ioaddr->cmd_addr == 0x1f0) - release_region(0x1f0, 8); - else if (ioaddr->cmd_addr == 0x170) - release_region(0x170, 8); - } - - scsi_host_put(ap->host); - } - - if (host_set->ops->host_stop) - host_set->ops->host_stop(host_set); - - kfree(host_set); + ata_host_set_remove(host_set); pci_release_regions(pdev); pci_disable_device(pdev); dev_set_drvdata(dev, NULL); @@ -4564,6 +4580,7 @@ module_exit(ata_exit); EXPORT_SYMBOL_GPL(ata_std_bios_param); EXPORT_SYMBOL_GPL(ata_std_ports); EXPORT_SYMBOL_GPL(ata_device_add); +EXPORT_SYMBOL_GPL(ata_host_set_remove); EXPORT_SYMBOL_GPL(ata_sg_init); EXPORT_SYMBOL_GPL(ata_sg_init_one); EXPORT_SYMBOL_GPL(ata_qc_complete); diff --git a/include/linux/libata.h b/include/linux/libata.h index 022105c745fc..ceee1fc42c60 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -393,6 +393,7 @@ extern int ata_pci_init_one (struct pci_dev *pdev, struct ata_port_info **port_i extern void ata_pci_remove_one (struct pci_dev *pdev); #endif /* CONFIG_PCI */ extern int ata_device_add(struct ata_probe_ent *ent); +extern void ata_host_set_remove(struct ata_host_set *host_set); extern int ata_scsi_detect(Scsi_Host_Template *sht); extern int ata_scsi_ioctl(struct scsi_device *dev, int cmd, void __user *arg); extern int ata_scsi_queuecmd(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *)); -- cgit v1.2.3 From e86ee6682b649183c11013a98be02f25e9ae399d Mon Sep 17 00:00:00 2001 From: Andy Currid Date: Mon, 19 Sep 2005 06:17:52 -0700 Subject: [PATCH] Add NVIDIA device ID in sata_nv Signed-off-by: Andy Currid Signed-off-by: Jeff Garzik --- drivers/scsi/sata_nv.c | 2 ++ include/linux/pci_ids.h | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/scsi/sata_nv.c b/drivers/scsi/sata_nv.c index a1d62dee3be6..c05653c7779d 100644 --- a/drivers/scsi/sata_nv.c +++ b/drivers/scsi/sata_nv.c @@ -158,6 +158,8 @@ static struct pci_device_id nv_pci_tbl[] = { PCI_ANY_ID, PCI_ANY_ID, 0, 0, MCP51 }, { PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SATA, PCI_ANY_ID, PCI_ANY_ID, 0, 0, MCP55 }, + { PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SATA2, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, MCP55 }, { PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_STORAGE_IDE<<8, 0xffff00, GENERIC }, diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index f6c1a142286a..cb414ea42f02 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1267,7 +1267,8 @@ #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_SATA 0x0266 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_SATA2 0x0267 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_IDE 0x036E -#define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SATA 0x036F +#define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SATA 0x037E +#define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SATA2 0x037F #define PCI_DEVICE_ID_NVIDIA_NVENET_12 0x0268 #define PCI_DEVICE_ID_NVIDIA_NVENET_13 0x0269 #define PCI_DEVICE_ID_NVIDIA_MCP51_AUDIO 0x026B -- cgit v1.2.3 From 3905ec4561b7b049e9c2e27311d072c356dbdee2 Mon Sep 17 00:00:00 2001 From: James Ketrenos Date: Wed, 21 Sep 2005 11:56:42 -0500 Subject: [PATCH] ieee80211: Added ieee80211_radiotap.h tree 383c59b2516a61f2683f02dfebbed0caf6ee5dc3 parent a04948f63fd96c4b875a43f78afad1a0874cc441 author Mike Kershaw 1124447833 -0500 committer James Ketrenos 1127313883 -0500 Added ieee80211_radiotap.h to enhance statistic reporting to user space from wireless drivers. Signed-off-by: Mike Kershaw Signed-off-by: James Ketrenos Signed-off-by: Jeff Garzik --- include/linux/if_arp.h | 1 + include/net/ieee80211_radiotap.h | 231 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 232 insertions(+) create mode 100644 include/net/ieee80211_radiotap.h (limited to 'include/linux') diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h index 0856548a2a08..a8b1a2071838 100644 --- a/include/linux/if_arp.h +++ b/include/linux/if_arp.h @@ -84,6 +84,7 @@ #define ARPHRD_IEEE802_TR 800 /* Magic type ident for TR */ #define ARPHRD_IEEE80211 801 /* IEEE 802.11 */ #define ARPHRD_IEEE80211_PRISM 802 /* IEEE 802.11 + Prism2 header */ +#define ARPHRD_IEEE80211_RADIOTAP 803 /* IEEE 802.11 + radiotap header */ #define ARPHRD_VOID 0xFFFF /* Void type, nothing is known */ #define ARPHRD_NONE 0xFFFE /* zero header length */ diff --git a/include/net/ieee80211_radiotap.h b/include/net/ieee80211_radiotap.h new file mode 100644 index 000000000000..429b73892a5f --- /dev/null +++ b/include/net/ieee80211_radiotap.h @@ -0,0 +1,231 @@ +/* $FreeBSD: src/sys/net80211/ieee80211_radiotap.h,v 1.5 2005/01/22 20:12:05 sam Exp $ */ +/* $NetBSD: ieee80211_radiotap.h,v 1.11 2005/06/22 06:16:02 dyoung Exp $ */ + +/*- + * Copyright (c) 2003, 2004 David Young. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of David Young may not be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY DAVID YOUNG ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL DAVID + * YOUNG BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY + * OF SUCH DAMAGE. + */ + +/* + * Modifications to fit into the linux IEEE 802.11 stack, + * Mike Kershaw (dragorn@kismetwireless.net) + */ + +#ifndef IEEE80211RADIOTAP_H +#define IEEE80211RADIOTAP_H + +#include +#include + +/* Radiotap header version (from official NetBSD feed) */ +#define IEEE80211RADIOTAP_VERSION "1.5" +/* Base version of the radiotap packet header data */ +#define PKTHDR_RADIOTAP_VERSION 0 + +/* A generic radio capture format is desirable. There is one for + * Linux, but it is neither rigidly defined (there were not even + * units given for some fields) nor easily extensible. + * + * I suggest the following extensible radio capture format. It is + * based on a bitmap indicating which fields are present. + * + * I am trying to describe precisely what the application programmer + * should expect in the following, and for that reason I tell the + * units and origin of each measurement (where it applies), or else I + * use sufficiently weaselly language ("is a monotonically nondecreasing + * function of...") that I cannot set false expectations for lawyerly + * readers. + */ + +/* XXX tcpdump/libpcap do not tolerate variable-length headers, + * yet, so we pad every radiotap header to 64 bytes. Ugh. + */ +#define IEEE80211_RADIOTAP_HDRLEN 64 + +/* The radio capture header precedes the 802.11 header. */ +struct ieee80211_radiotap_header { + u8 it_version; /* Version 0. Only increases + * for drastic changes, + * introduction of compatible + * new fields does not count. + */ + u8 it_pad; + u16 it_len; /* length of the whole + * header in bytes, including + * it_version, it_pad, + * it_len, and data fields. + */ + u32 it_present; /* A bitmap telling which + * fields are present. Set bit 31 + * (0x80000000) to extend the + * bitmap by another 32 bits. + * Additional extensions are made + * by setting bit 31. + */ +}; + +/* Name Data type Units + * ---- --------- ----- + * + * IEEE80211_RADIOTAP_TSFT u64 microseconds + * + * Value in microseconds of the MAC's 64-bit 802.11 Time + * Synchronization Function timer when the first bit of the + * MPDU arrived at the MAC. For received frames, only. + * + * IEEE80211_RADIOTAP_CHANNEL 2 x u16 MHz, bitmap + * + * Tx/Rx frequency in MHz, followed by flags (see below). + * + * IEEE80211_RADIOTAP_FHSS u16 see below + * + * For frequency-hopping radios, the hop set (first byte) + * and pattern (second byte). + * + * IEEE80211_RADIOTAP_RATE u8 500kb/s + * + * Tx/Rx data rate + * + * IEEE80211_RADIOTAP_DBM_ANTSIGNAL int8_t decibels from + * one milliwatt (dBm) + * + * RF signal power at the antenna, decibel difference from + * one milliwatt. + * + * IEEE80211_RADIOTAP_DBM_ANTNOISE int8_t decibels from + * one milliwatt (dBm) + * + * RF noise power at the antenna, decibel difference from one + * milliwatt. + * + * IEEE80211_RADIOTAP_DB_ANTSIGNAL u8 decibel (dB) + * + * RF signal power at the antenna, decibel difference from an + * arbitrary, fixed reference. + * + * IEEE80211_RADIOTAP_DB_ANTNOISE u8 decibel (dB) + * + * RF noise power at the antenna, decibel difference from an + * arbitrary, fixed reference point. + * + * IEEE80211_RADIOTAP_LOCK_QUALITY u16 unitless + * + * Quality of Barker code lock. Unitless. Monotonically + * nondecreasing with "better" lock strength. Called "Signal + * Quality" in datasheets. (Is there a standard way to measure + * this?) + * + * IEEE80211_RADIOTAP_TX_ATTENUATION u16 unitless + * + * Transmit power expressed as unitless distance from max + * power set at factory calibration. 0 is max power. + * Monotonically nondecreasing with lower power levels. + * + * IEEE80211_RADIOTAP_DB_TX_ATTENUATION u16 decibels (dB) + * + * Transmit power expressed as decibel distance from max power + * set at factory calibration. 0 is max power. Monotonically + * nondecreasing with lower power levels. + * + * IEEE80211_RADIOTAP_DBM_TX_POWER int8_t decibels from + * one milliwatt (dBm) + * + * Transmit power expressed as dBm (decibels from a 1 milliwatt + * reference). This is the absolute power level measured at + * the antenna port. + * + * IEEE80211_RADIOTAP_FLAGS u8 bitmap + * + * Properties of transmitted and received frames. See flags + * defined below. + * + * IEEE80211_RADIOTAP_ANTENNA u8 antenna index + * + * Unitless indication of the Rx/Tx antenna for this packet. + * The first antenna is antenna 0. + * + * IEEE80211_RADIOTAP_FCS u32 data + * + * FCS from frame in network byte order. + */ +enum ieee80211_radiotap_type { + IEEE80211_RADIOTAP_TSFT = 0, + IEEE80211_RADIOTAP_FLAGS = 1, + IEEE80211_RADIOTAP_RATE = 2, + IEEE80211_RADIOTAP_CHANNEL = 3, + IEEE80211_RADIOTAP_FHSS = 4, + IEEE80211_RADIOTAP_DBM_ANTSIGNAL = 5, + IEEE80211_RADIOTAP_DBM_ANTNOISE = 6, + IEEE80211_RADIOTAP_LOCK_QUALITY = 7, + IEEE80211_RADIOTAP_TX_ATTENUATION = 8, + IEEE80211_RADIOTAP_DB_TX_ATTENUATION = 9, + IEEE80211_RADIOTAP_DBM_TX_POWER = 10, + IEEE80211_RADIOTAP_ANTENNA = 11, + IEEE80211_RADIOTAP_DB_ANTSIGNAL = 12, + IEEE80211_RADIOTAP_DB_ANTNOISE = 13, + IEEE80211_RADIOTAP_EXT = 31, +}; + +/* Channel flags. */ +#define IEEE80211_CHAN_TURBO 0x0010 /* Turbo channel */ +#define IEEE80211_CHAN_CCK 0x0020 /* CCK channel */ +#define IEEE80211_CHAN_OFDM 0x0040 /* OFDM channel */ +#define IEEE80211_CHAN_2GHZ 0x0080 /* 2 GHz spectrum channel. */ +#define IEEE80211_CHAN_5GHZ 0x0100 /* 5 GHz spectrum channel */ +#define IEEE80211_CHAN_PASSIVE 0x0200 /* Only passive scan allowed */ +#define IEEE80211_CHAN_DYN 0x0400 /* Dynamic CCK-OFDM channel */ +#define IEEE80211_CHAN_GFSK 0x0800 /* GFSK channel (FHSS PHY) */ + +/* For IEEE80211_RADIOTAP_FLAGS */ +#define IEEE80211_RADIOTAP_F_CFP 0x01 /* sent/received + * during CFP + */ +#define IEEE80211_RADIOTAP_F_SHORTPRE 0x02 /* sent/received + * with short + * preamble + */ +#define IEEE80211_RADIOTAP_F_WEP 0x04 /* sent/received + * with WEP encryption + */ +#define IEEE80211_RADIOTAP_F_FRAG 0x08 /* sent/received + * with fragmentation + */ +#define IEEE80211_RADIOTAP_F_FCS 0x10 /* frame includes FCS */ +#define IEEE80211_RADIOTAP_F_DATAPAD 0x20 /* frame has padding between + * 802.11 header and payload + * (to 32-bit boundary) + */ + +/* Ugly macro to convert literal channel numbers into their mhz equivalents + * There are certianly some conditions that will break this (like feeding it '30') + * but they shouldn't arise since nothing talks on channel 30. */ +#define ieee80211chan2mhz(x) \ + (((x) <= 14) ? \ + (((x) == 14) ? 2484 : ((x) * 5) + 2407) : \ + ((x) + 1000) * 5) + +#endif /* IEEE80211_RADIOTAP_H */ -- cgit v1.2.3 From 590232a7150674b2036291eaefce085f3f9659c8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 22 Sep 2005 04:30:44 -0300 Subject: [LLC]: Add sysctl support for the LLC timeouts Signed-off-by: Jochen Friedrich Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/sysctl.h | 26 ++++++++- include/net/llc.h | 7 +++ include/net/llc_conn.h | 10 ++-- net/llc/Makefile | 1 + net/llc/af_llc.c | 47 +++++++++++----- net/llc/llc_c_ac.c | 12 ++--- net/llc/llc_conn.c | 13 +++-- net/llc/llc_station.c | 11 ++-- net/llc/sysctl_net_llc.c | 136 +++++++++++++++++++++++++++++++++++++++++++++++ 9 files changed, 232 insertions(+), 31 deletions(-) create mode 100644 net/llc/sysctl_net_llc.c (limited to 'include/linux') diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 3a29a9f9b451..fc8e367f671e 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -202,7 +202,8 @@ enum NET_TR=14, NET_DECNET=15, NET_ECONET=16, - NET_SCTP=17, + NET_SCTP=17, + NET_LLC=18, }; /* /proc/sys/kernel/random */ @@ -522,6 +523,29 @@ enum { NET_IPX_FORWARDING=2 }; +/* /proc/sys/net/llc */ +enum { + NET_LLC2=1, + NET_LLC_STATION=2, +}; + +/* /proc/sys/net/llc/llc2 */ +enum { + NET_LLC2_TIMEOUT=1, +}; + +/* /proc/sys/net/llc/station */ +enum { + NET_LLC_STATION_ACK_TIMEOUT=1, +}; + +/* /proc/sys/net/llc/llc2/timeout */ +enum { + NET_LLC2_ACK_TIMEOUT=1, + NET_LLC2_P_TIMEOUT=2, + NET_LLC2_REJ_TIMEOUT=3, + NET_LLC2_BUSY_TIMEOUT=4, +}; /* /proc/sys/net/appletalk */ enum { diff --git a/include/net/llc.h b/include/net/llc.h index 71769a5aeef3..8b8e2be289b1 100644 --- a/include/net/llc.h +++ b/include/net/llc.h @@ -98,4 +98,11 @@ extern void llc_proc_exit(void); #define llc_proc_init() (0) #define llc_proc_exit() do { } while(0) #endif /* CONFIG_PROC_FS */ +#ifdef CONFIG_SYSCTL +extern int llc_sysctl_init(void); +extern void llc_sysctl_exit(void); +#else +#define llc_sysctl_init() (0) +#define llc_sysctl_exit() do { } while(0) +#endif /* CONFIG_SYSCTL */ #endif /* LLC_H */ diff --git a/include/net/llc_conn.h b/include/net/llc_conn.h index 8ad3bc2c23d7..8a8ff4810135 100644 --- a/include/net/llc_conn.h +++ b/include/net/llc_conn.h @@ -19,14 +19,14 @@ #define LLC_EVENT 1 #define LLC_PACKET 2 -#define LLC_P_TIME 2 -#define LLC_ACK_TIME 1 -#define LLC_REJ_TIME 3 -#define LLC_BUSY_TIME 3 +#define LLC2_P_TIME 2 +#define LLC2_ACK_TIME 1 +#define LLC2_REJ_TIME 3 +#define LLC2_BUSY_TIME 3 struct llc_timer { struct timer_list timer; - u16 expire; /* timer expire time */ + unsigned long expire; /* timer expire time */ }; struct llc_sock { diff --git a/net/llc/Makefile b/net/llc/Makefile index 5ebd4ed2bd42..4e260cff3c5d 100644 --- a/net/llc/Makefile +++ b/net/llc/Makefile @@ -22,3 +22,4 @@ llc2-y := llc_if.o llc_c_ev.o llc_c_ac.o llc_conn.o llc_c_st.o llc_pdu.o \ llc_sap.o llc_s_ac.o llc_s_ev.o llc_s_st.o af_llc.o llc_station.o llc2-$(CONFIG_PROC_FS) += llc_proc.o +llc2-$(CONFIG_SYSCTL) += sysctl_net_llc.o diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 95444f227510..ef125345a2db 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -877,22 +877,22 @@ static int llc_ui_setsockopt(struct socket *sock, int level, int optname, case LLC_OPT_ACK_TMR_EXP: if (opt > LLC_OPT_MAX_ACK_TMR_EXP) goto out; - llc->ack_timer.expire = opt; + llc->ack_timer.expire = opt * HZ; break; case LLC_OPT_P_TMR_EXP: if (opt > LLC_OPT_MAX_P_TMR_EXP) goto out; - llc->pf_cycle_timer.expire = opt; + llc->pf_cycle_timer.expire = opt * HZ; break; case LLC_OPT_REJ_TMR_EXP: if (opt > LLC_OPT_MAX_REJ_TMR_EXP) goto out; - llc->rej_sent_timer.expire = opt; + llc->rej_sent_timer.expire = opt * HZ; break; case LLC_OPT_BUSY_TMR_EXP: if (opt > LLC_OPT_MAX_BUSY_TMR_EXP) goto out; - llc->busy_state_timer.expire = opt; + llc->busy_state_timer.expire = opt * HZ; break; case LLC_OPT_TX_WIN: if (opt > LLC_OPT_MAX_WIN) @@ -942,17 +942,17 @@ static int llc_ui_getsockopt(struct socket *sock, int level, int optname, goto out; switch (optname) { case LLC_OPT_RETRY: - val = llc->n2; break; + val = llc->n2; break; case LLC_OPT_SIZE: - val = llc->n1; break; + val = llc->n1; break; case LLC_OPT_ACK_TMR_EXP: - val = llc->ack_timer.expire; break; + val = llc->ack_timer.expire / HZ; break; case LLC_OPT_P_TMR_EXP: - val = llc->pf_cycle_timer.expire; break; + val = llc->pf_cycle_timer.expire / HZ; break; case LLC_OPT_REJ_TMR_EXP: - val = llc->rej_sent_timer.expire; break; + val = llc->rej_sent_timer.expire / HZ; break; case LLC_OPT_BUSY_TMR_EXP: - val = llc->busy_state_timer.expire; break; + val = llc->busy_state_timer.expire / HZ; break; case LLC_OPT_TX_WIN: val = llc->k; break; case LLC_OPT_RX_WIN: @@ -999,6 +999,13 @@ static struct proto_ops llc_ui_ops = { extern void llc_sap_handler(struct llc_sap *sap, struct sk_buff *skb); extern void llc_conn_handler(struct llc_sap *sap, struct sk_buff *skb); +static char llc_proc_err_msg[] __initdata = + KERN_CRIT "LLC: Unable to register the proc_fs entries\n"; +static char llc_sysctl_err_msg[] __initdata = + KERN_CRIT "LLC: Unable to register the sysctl entries\n"; +static char llc_sock_err_msg[] __initdata = + KERN_CRIT "LLC: Unable to register the network family\n"; + static int __init llc2_init(void) { int rc = proto_register(&llc_proto, 0); @@ -1010,13 +1017,28 @@ static int __init llc2_init(void) llc_station_init(); llc_ui_sap_last_autoport = LLC_SAP_DYN_START; rc = llc_proc_init(); - if (rc != 0) + if (rc != 0) { + printk(llc_proc_err_msg); goto out_unregister_llc_proto; - sock_register(&llc_ui_family_ops); + } + rc = llc_sysctl_init(); + if (rc) { + printk(llc_sysctl_err_msg); + goto out_proc; + } + rc = sock_register(&llc_ui_family_ops); + if (rc) { + printk(llc_sock_err_msg); + goto out_sysctl; + } llc_add_pack(LLC_DEST_SAP, llc_sap_handler); llc_add_pack(LLC_DEST_CONN, llc_conn_handler); out: return rc; +out_sysctl: + llc_sysctl_exit(); +out_proc: + llc_proc_exit(); out_unregister_llc_proto: proto_unregister(&llc_proto); goto out; @@ -1029,6 +1051,7 @@ static void __exit llc2_exit(void) llc_remove_pack(LLC_DEST_CONN); sock_unregister(PF_LLC); llc_proc_exit(); + llc_sysctl_exit(); proto_unregister(&llc_proto); } diff --git a/net/llc/llc_c_ac.c b/net/llc/llc_c_ac.c index 50c827e13a9f..a4daa91003dd 100644 --- a/net/llc/llc_c_ac.c +++ b/net/llc/llc_c_ac.c @@ -620,7 +620,7 @@ int llc_conn_ac_set_remote_busy(struct sock *sk, struct sk_buff *skb) if (!llc->remote_busy_flag) { llc->remote_busy_flag = 1; mod_timer(&llc->busy_state_timer.timer, - jiffies + llc->busy_state_timer.expire * HZ); + jiffies + llc->busy_state_timer.expire); } return 0; } @@ -853,7 +853,7 @@ int llc_conn_ac_start_p_timer(struct sock *sk, struct sk_buff *skb) llc_conn_set_p_flag(sk, 1); mod_timer(&llc->pf_cycle_timer.timer, - jiffies + llc->pf_cycle_timer.expire * HZ); + jiffies + llc->pf_cycle_timer.expire); return 0; } @@ -1131,7 +1131,7 @@ int llc_conn_ac_start_ack_timer(struct sock *sk, struct sk_buff *skb) { struct llc_sock *llc = llc_sk(sk); - mod_timer(&llc->ack_timer.timer, jiffies + llc->ack_timer.expire * HZ); + mod_timer(&llc->ack_timer.timer, jiffies + llc->ack_timer.expire); return 0; } @@ -1140,7 +1140,7 @@ int llc_conn_ac_start_rej_timer(struct sock *sk, struct sk_buff *skb) struct llc_sock *llc = llc_sk(sk); mod_timer(&llc->rej_sent_timer.timer, - jiffies + llc->rej_sent_timer.expire * HZ); + jiffies + llc->rej_sent_timer.expire); return 0; } @@ -1151,7 +1151,7 @@ int llc_conn_ac_start_ack_tmr_if_not_running(struct sock *sk, if (!timer_pending(&llc->ack_timer.timer)) mod_timer(&llc->ack_timer.timer, - jiffies + llc->ack_timer.expire * HZ); + jiffies + llc->ack_timer.expire); return 0; } @@ -1199,7 +1199,7 @@ int llc_conn_ac_upd_nr_received(struct sock *sk, struct sk_buff *skb) } if (unacked) mod_timer(&llc->ack_timer.timer, - jiffies + llc->ack_timer.expire * HZ); + jiffies + llc->ack_timer.expire); } else if (llc->failed_data_req) { u8 f_bit; diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index ce7b893ed1ab..d3783f8ee481 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -40,6 +40,11 @@ static struct llc_conn_state_trans *llc_qualify_conn_ev(struct sock *sk, /* Offset table on connection states transition diagram */ static int llc_offset_table[NBR_CONN_STATES][NBR_CONN_EV]; +int sysctl_llc2_ack_timeout = LLC2_ACK_TIME * HZ; +int sysctl_llc2_p_timeout = LLC2_P_TIME * HZ; +int sysctl_llc2_rej_timeout = LLC2_REJ_TIME * HZ; +int sysctl_llc2_busy_timeout = LLC2_BUSY_TIME * HZ; + /** * llc_conn_state_process - sends event to connection state machine * @sk: connection @@ -799,22 +804,22 @@ static void llc_sk_init(struct sock* sk) llc->dec_step = llc->connect_step = 1; init_timer(&llc->ack_timer.timer); - llc->ack_timer.expire = LLC_ACK_TIME; + llc->ack_timer.expire = sysctl_llc2_ack_timeout; llc->ack_timer.timer.data = (unsigned long)sk; llc->ack_timer.timer.function = llc_conn_ack_tmr_cb; init_timer(&llc->pf_cycle_timer.timer); - llc->pf_cycle_timer.expire = LLC_P_TIME; + llc->pf_cycle_timer.expire = sysctl_llc2_p_timeout; llc->pf_cycle_timer.timer.data = (unsigned long)sk; llc->pf_cycle_timer.timer.function = llc_conn_pf_cycle_tmr_cb; init_timer(&llc->rej_sent_timer.timer); - llc->rej_sent_timer.expire = LLC_REJ_TIME; + llc->rej_sent_timer.expire = sysctl_llc2_rej_timeout; llc->rej_sent_timer.timer.data = (unsigned long)sk; llc->rej_sent_timer.timer.function = llc_conn_rej_tmr_cb; init_timer(&llc->busy_state_timer.timer); - llc->busy_state_timer.expire = LLC_BUSY_TIME; + llc->busy_state_timer.expire = sysctl_llc2_busy_timeout; llc->busy_state_timer.timer.data = (unsigned long)sk; llc->busy_state_timer.timer.function = llc_conn_busy_tmr_cb; diff --git a/net/llc/llc_station.c b/net/llc/llc_station.c index 85a7ac276141..2d764b0382ce 100644 --- a/net/llc/llc_station.c +++ b/net/llc/llc_station.c @@ -50,6 +50,10 @@ struct llc_station { struct sk_buff_head mac_pdu_q; }; +#define LLC_STATION_ACK_TIME (3 * HZ) + +int sysctl_llc_station_ack_timeout = LLC_STATION_ACK_TIME; + /* Types of events (possible values in 'ev->type') */ #define LLC_STATION_EV_TYPE_SIMPLE 1 #define LLC_STATION_EV_TYPE_CONDITION 2 @@ -218,7 +222,8 @@ static void llc_station_send_pdu(struct sk_buff *skb) static int llc_station_ac_start_ack_timer(struct sk_buff *skb) { - mod_timer(&llc_main_station.ack_timer, jiffies + LLC_ACK_TIME * HZ); + mod_timer(&llc_main_station.ack_timer, + jiffies + sysctl_llc_station_ack_timeout); return 0; } @@ -687,7 +692,8 @@ int __init llc_station_init(void) init_timer(&llc_main_station.ack_timer); llc_main_station.ack_timer.data = (unsigned long)&llc_main_station; llc_main_station.ack_timer.function = llc_station_ack_tmr_cb; - + llc_main_station.ack_timer.expires = jiffies + + sysctl_llc_station_ack_timeout; skb = alloc_skb(0, GFP_ATOMIC); if (!skb) goto out; @@ -695,7 +701,6 @@ int __init llc_station_init(void) llc_set_station_handler(llc_station_rcv); ev = llc_station_ev(skb); memset(ev, 0, sizeof(*ev)); - llc_main_station.ack_timer.expires = jiffies + 3 * HZ; llc_main_station.maximum_retry = 1; llc_main_station.state = LLC_STATION_STATE_DOWN; ev->type = LLC_STATION_EV_TYPE_SIMPLE; diff --git a/net/llc/sysctl_net_llc.c b/net/llc/sysctl_net_llc.c new file mode 100644 index 000000000000..4d99d2f27a21 --- /dev/null +++ b/net/llc/sysctl_net_llc.c @@ -0,0 +1,136 @@ +/* + * sysctl_net_llc.c: sysctl interface to LLC net subsystem. + * + * Arnaldo Carvalho de Melo + */ + +#include +#include +#include +#include + +#ifndef CONFIG_SYSCTL +#error This file should not be compiled without CONFIG_SYSCTL defined +#endif + +extern int sysctl_llc2_ack_timeout; +extern int sysctl_llc2_busy_timeout; +extern int sysctl_llc2_p_timeout; +extern int sysctl_llc2_rej_timeout; +extern int sysctl_llc_station_ack_timeout; + +static struct ctl_table llc2_timeout_table[] = { + { + .ctl_name = NET_LLC2_ACK_TIMEOUT, + .procname = "ack", + .data = &sysctl_llc2_ack_timeout, + .maxlen = sizeof(long), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + .strategy = &sysctl_jiffies, + }, + { + .ctl_name = NET_LLC2_BUSY_TIMEOUT, + .procname = "busy", + .data = &sysctl_llc2_busy_timeout, + .maxlen = sizeof(long), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + .strategy = &sysctl_jiffies, + }, + { + .ctl_name = NET_LLC2_P_TIMEOUT, + .procname = "p", + .data = &sysctl_llc2_p_timeout, + .maxlen = sizeof(long), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + .strategy = &sysctl_jiffies, + }, + { + .ctl_name = NET_LLC2_REJ_TIMEOUT, + .procname = "rej", + .data = &sysctl_llc2_rej_timeout, + .maxlen = sizeof(long), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + .strategy = &sysctl_jiffies, + }, + { 0 }, +}; + +static struct ctl_table llc_station_table[] = { + { + .ctl_name = NET_LLC_STATION_ACK_TIMEOUT, + .procname = "ack_timeout", + .data = &sysctl_llc_station_ack_timeout, + .maxlen = sizeof(long), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + .strategy = &sysctl_jiffies, + }, + { 0 }, +}; + +static struct ctl_table llc2_dir_timeout_table[] = { + { + .ctl_name = NET_LLC2, + .procname = "timeout", + .mode = 0555, + .child = llc2_timeout_table, + }, + { 0 }, +}; + +static struct ctl_table llc_table[] = { + { + .ctl_name = NET_LLC2, + .procname = "llc2", + .mode = 0555, + .child = llc2_dir_timeout_table, + }, + { + .ctl_name = NET_LLC_STATION, + .procname = "station", + .mode = 0555, + .child = llc_station_table, + }, + { 0 }, +}; + +static struct ctl_table llc_dir_table[] = { + { + .ctl_name = NET_LLC, + .procname = "llc", + .mode = 0555, + .child = llc_table, + }, + { 0 }, +}; + +static struct ctl_table llc_root_table[] = { + { + .ctl_name = CTL_NET, + .procname = "net", + .mode = 0555, + .child = llc_dir_table, + }, + { 0 }, +}; + +static struct ctl_table_header *llc_table_header; + +int __init llc_sysctl_init(void) +{ + llc_table_header = register_sysctl_table(llc_root_table, 1); + + return llc_table_header ? 0 : -ENOMEM; +} + +void llc_sysctl_exit(void) +{ + if (llc_table_header) { + unregister_sysctl_table(llc_table_header); + llc_table_header = NULL; + } +} -- cgit v1.2.3 From d305ef5d2a4e77bfa66160513f4a7494126a506b Mon Sep 17 00:00:00 2001 From: Daniel Ritz Date: Thu, 22 Sep 2005 00:47:24 -0700 Subject: [PATCH] driver core: add helper device_is_registered() add the helper and use it instead of open coding the klist_node_attached() check (which is a layering violation IMHO) idea by Alan Stern. Signed-off-by: Daniel Ritz Cc: Alan Stern Signed-off-by: Greg Kroah-Hartman Signed-off-by: Linus Torvalds --- drivers/s390/cio/ccwgroup.c | 2 +- drivers/usb/core/message.c | 2 +- drivers/usb/core/usb.c | 6 +++--- include/linux/device.h | 5 +++++ 4 files changed, 10 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/s390/cio/ccwgroup.c b/drivers/s390/cio/ccwgroup.c index 91ea8e4777f3..dbb3eb0e330b 100644 --- a/drivers/s390/cio/ccwgroup.c +++ b/drivers/s390/cio/ccwgroup.c @@ -437,7 +437,7 @@ __ccwgroup_get_gdev_by_cdev(struct ccw_device *cdev) if (cdev->dev.driver_data) { gdev = (struct ccwgroup_device *)cdev->dev.driver_data; if (get_device(&gdev->dev)) { - if (klist_node_attached(&gdev->dev.knode_bus)) + if (device_is_registered(&gdev->dev)) return gdev; put_device(&gdev->dev); } diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c index c47c8052b486..f1fb67fe22a8 100644 --- a/drivers/usb/core/message.c +++ b/drivers/usb/core/message.c @@ -987,7 +987,7 @@ void usb_disable_device(struct usb_device *dev, int skip_ep0) /* remove this interface if it has been registered */ interface = dev->actconfig->interface[i]; - if (!klist_node_attached(&interface->dev.knode_bus)) + if (!device_is_registered(&interface->dev)) continue; dev_dbg (&dev->dev, "unregistering interface %s\n", interface->dev.bus_id); diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c index 087af73a59dd..7d131509e419 100644 --- a/drivers/usb/core/usb.c +++ b/drivers/usb/core/usb.c @@ -303,7 +303,7 @@ int usb_driver_claim_interface(struct usb_driver *driver, /* if interface was already added, bind now; else let * the future device_add() bind it, bypassing probe() */ - if (klist_node_attached(&dev->knode_bus)) + if (device_is_registered(dev)) device_bind_driver(dev); return 0; @@ -336,8 +336,8 @@ void usb_driver_release_interface(struct usb_driver *driver, if (iface->condition != USB_INTERFACE_BOUND) return; - /* release only after device_add() */ - if (klist_node_attached(&dev->knode_bus)) { + /* don't release if the interface hasn't been added yet */ + if (device_is_registered(dev)) { iface->condition = USB_INTERFACE_UNBINDING; device_release_driver(dev); } diff --git a/include/linux/device.h b/include/linux/device.h index 06e5d42f2c7b..95d607a48f06 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -317,6 +317,11 @@ dev_set_drvdata (struct device *dev, void *data) dev->driver_data = data; } +static inline int device_is_registered(struct device *dev) +{ + return klist_node_attached(&dev->knode_bus); +} + /* * High level routines for use by the bus drivers */ -- cgit v1.2.3 From e4c94330e3395ae87451bded2840a25d04f27902 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 22 Sep 2005 21:43:45 -0700 Subject: [PATCH] reboot: comment and factor the main reboot functions In the lead up to 2.6.13 I fixed a large number of reboot problems by making the calling conventions consistent. Despite checking and double checking my work it appears I missed an obvious one. This first patch simply refactors the reboot routines so all of the preparation for various kinds of reboots are in their own functions. Making it very hard to get the various kinds of reboot out of sync. Signed-off-by: Eric W. Biederman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/reboot.h | 4 ++++ kernel/sys.c | 52 ++++++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 50 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/reboot.h b/include/linux/reboot.h index 3b3266ff1a95..7ab2cdb83ef0 100644 --- a/include/linux/reboot.h +++ b/include/linux/reboot.h @@ -59,6 +59,10 @@ extern void machine_crash_shutdown(struct pt_regs *); * Architecture independent implemenations of sys_reboot commands. */ +extern void kernel_restart_prepare(char *cmd); +extern void kernel_halt_prepare(void); +extern void kernel_power_off_prepare(void); + extern void kernel_restart(char *cmd); extern void kernel_halt(void); extern void kernel_power_off(void); diff --git a/kernel/sys.c b/kernel/sys.c index f723522e6986..2fa1ed18123c 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -361,17 +361,35 @@ out_unlock: return retval; } +/** + * emergency_restart - reboot the system + * + * Without shutting down any hardware or taking any locks + * reboot the system. This is called when we know we are in + * trouble so this is our best effort to reboot. This is + * safe to call in interrupt context. + */ void emergency_restart(void) { machine_emergency_restart(); } EXPORT_SYMBOL_GPL(emergency_restart); -void kernel_restart(char *cmd) +/** + * kernel_restart - reboot the system + * + * Shutdown everything and perform a clean reboot. + * This is not safe to call in interrupt context. + */ +void kernel_restart_prepare(char *cmd) { notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd); system_state = SYSTEM_RESTART; device_shutdown(); +} +void kernel_restart(char *cmd) +{ + kernel_restart_prepare(cmd); if (!cmd) { printk(KERN_EMERG "Restarting system.\n"); } else { @@ -382,6 +400,12 @@ void kernel_restart(char *cmd) } EXPORT_SYMBOL_GPL(kernel_restart); +/** + * kernel_kexec - reboot the system + * + * Move into place and start executing a preloaded standalone + * executable. If nothing was preloaded return an error. + */ void kernel_kexec(void) { #ifdef CONFIG_KEXEC @@ -390,9 +414,7 @@ void kernel_kexec(void) if (!image) { return; } - notifier_call_chain(&reboot_notifier_list, SYS_RESTART, NULL); - system_state = SYSTEM_RESTART; - device_shutdown(); + kernel_restart_prepare(NULL); printk(KERN_EMERG "Starting new kernel\n"); machine_shutdown(); machine_kexec(image); @@ -400,21 +422,39 @@ void kernel_kexec(void) } EXPORT_SYMBOL_GPL(kernel_kexec); -void kernel_halt(void) +/** + * kernel_halt - halt the system + * + * Shutdown everything and perform a clean system halt. + */ +void kernel_halt_prepare(void) { notifier_call_chain(&reboot_notifier_list, SYS_HALT, NULL); system_state = SYSTEM_HALT; device_shutdown(); +} +void kernel_halt(void) +{ + kernel_halt_prepare(); printk(KERN_EMERG "System halted.\n"); machine_halt(); } EXPORT_SYMBOL_GPL(kernel_halt); -void kernel_power_off(void) +/** + * kernel_power_off - power_off the system + * + * Shutdown everything and perform a clean system power_off. + */ +void kernel_power_off_prepare(void) { notifier_call_chain(&reboot_notifier_list, SYS_POWER_OFF, NULL); system_state = SYSTEM_POWER_OFF; device_shutdown(); +} +void kernel_power_off(void) +{ + kernel_power_off_prepare(); printk(KERN_EMERG "Power down.\n"); machine_power_off(); } -- cgit v1.2.3 From 67497205b12e3cb408259cc09b50c3a9d12cd935 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 22 Sep 2005 23:45:24 -0700 Subject: [NETFILTER] Fix sparse endian warnings in pptp helper Signed-off-by: Alexey Dobriyan Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_conntrack_pptp.h | 118 +++++++++++----------- include/linux/netfilter_ipv4/ip_conntrack_tuple.h | 6 +- net/ipv4/netfilter/ip_conntrack_helper_pptp.c | 14 +-- 3 files changed, 70 insertions(+), 68 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv4/ip_conntrack_pptp.h b/include/linux/netfilter_ipv4/ip_conntrack_pptp.h index 389e3851d52f..50a761db5a04 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_pptp.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_pptp.h @@ -60,8 +60,8 @@ struct ip_ct_pptp_expect { struct pptp_pkt_hdr { __u16 packetLength; - __u16 packetType; - __u32 magicCookie; + __be16 packetType; + __be32 magicCookie; }; /* PptpControlMessageType values */ @@ -93,7 +93,7 @@ struct pptp_pkt_hdr { #define PPTP_REMOVE_DEVICE_ERROR 6 struct PptpControlHeader { - __u16 messageType; + __be16 messageType; __u16 reserved; }; @@ -106,13 +106,13 @@ struct PptpControlHeader { #define PPTP_BEARER_CAP_DIGITAL 0x2 struct PptpStartSessionRequest { - __u16 protocolVersion; + __be16 protocolVersion; __u8 reserved1; __u8 reserved2; - __u32 framingCapability; - __u32 bearerCapability; - __u16 maxChannels; - __u16 firmwareRevision; + __be32 framingCapability; + __be32 bearerCapability; + __be16 maxChannels; + __be16 firmwareRevision; __u8 hostName[64]; __u8 vendorString[64]; }; @@ -125,13 +125,13 @@ struct PptpStartSessionRequest { #define PPTP_START_UNKNOWN_PROTOCOL 5 struct PptpStartSessionReply { - __u16 protocolVersion; + __be16 protocolVersion; __u8 resultCode; __u8 generalErrorCode; - __u32 framingCapability; - __u32 bearerCapability; - __u16 maxChannels; - __u16 firmwareRevision; + __be32 framingCapability; + __be32 bearerCapability; + __be16 maxChannels; + __be16 firmwareRevision; __u8 hostName[64]; __u8 vendorString[64]; }; @@ -155,7 +155,7 @@ struct PptpStopSessionReply { }; struct PptpEchoRequest { - __u32 identNumber; + __be32 identNumber; }; /* PptpEchoReplyResultCode */ @@ -163,7 +163,7 @@ struct PptpEchoRequest { #define PPTP_ECHO_GENERAL_ERROR 2 struct PptpEchoReply { - __u32 identNumber; + __be32 identNumber; __u8 resultCode; __u8 generalErrorCode; __u16 reserved; @@ -180,16 +180,16 @@ struct PptpEchoReply { #define PPTP_DONT_CARE_BEARER_TYPE 3 struct PptpOutCallRequest { - __u16 callID; - __u16 callSerialNumber; - __u32 minBPS; - __u32 maxBPS; - __u32 bearerType; - __u32 framingType; - __u16 packetWindow; - __u16 packetProcDelay; + __be16 callID; + __be16 callSerialNumber; + __be32 minBPS; + __be32 maxBPS; + __be32 bearerType; + __be32 framingType; + __be16 packetWindow; + __be16 packetProcDelay; __u16 reserved1; - __u16 phoneNumberLength; + __be16 phoneNumberLength; __u16 reserved2; __u8 phoneNumber[64]; __u8 subAddress[64]; @@ -205,24 +205,24 @@ struct PptpOutCallRequest { #define PPTP_OUTCALL_DONT_ACCEPT 7 struct PptpOutCallReply { - __u16 callID; - __u16 peersCallID; + __be16 callID; + __be16 peersCallID; __u8 resultCode; __u8 generalErrorCode; - __u16 causeCode; - __u32 connectSpeed; - __u16 packetWindow; - __u16 packetProcDelay; - __u32 physChannelID; + __be16 causeCode; + __be32 connectSpeed; + __be16 packetWindow; + __be16 packetProcDelay; + __be32 physChannelID; }; struct PptpInCallRequest { - __u16 callID; - __u16 callSerialNumber; - __u32 callBearerType; - __u32 physChannelID; - __u16 dialedNumberLength; - __u16 dialingNumberLength; + __be16 callID; + __be16 callSerialNumber; + __be32 callBearerType; + __be32 physChannelID; + __be16 dialedNumberLength; + __be16 dialingNumberLength; __u8 dialedNumber[64]; __u8 dialingNumber[64]; __u8 subAddress[64]; @@ -234,54 +234,54 @@ struct PptpInCallRequest { #define PPTP_INCALL_DONT_ACCEPT 3 struct PptpInCallReply { - __u16 callID; - __u16 peersCallID; + __be16 callID; + __be16 peersCallID; __u8 resultCode; __u8 generalErrorCode; - __u16 packetWindow; - __u16 packetProcDelay; + __be16 packetWindow; + __be16 packetProcDelay; __u16 reserved; }; struct PptpInCallConnected { - __u16 peersCallID; + __be16 peersCallID; __u16 reserved; - __u32 connectSpeed; - __u16 packetWindow; - __u16 packetProcDelay; - __u32 callFramingType; + __be32 connectSpeed; + __be16 packetWindow; + __be16 packetProcDelay; + __be32 callFramingType; }; struct PptpClearCallRequest { - __u16 callID; + __be16 callID; __u16 reserved; }; struct PptpCallDisconnectNotify { - __u16 callID; + __be16 callID; __u8 resultCode; __u8 generalErrorCode; - __u16 causeCode; + __be16 causeCode; __u16 reserved; __u8 callStatistics[128]; }; struct PptpWanErrorNotify { - __u16 peersCallID; + __be16 peersCallID; __u16 reserved; - __u32 crcErrors; - __u32 framingErrors; - __u32 hardwareOverRuns; - __u32 bufferOverRuns; - __u32 timeoutErrors; - __u32 alignmentErrors; + __be32 crcErrors; + __be32 framingErrors; + __be32 hardwareOverRuns; + __be32 bufferOverRuns; + __be32 timeoutErrors; + __be32 alignmentErrors; }; struct PptpSetLinkInfo { - __u16 peersCallID; + __be16 peersCallID; __u16 reserved; - __u32 sendAccm; - __u32 recvAccm; + __be32 sendAccm; + __be32 recvAccm; }; diff --git a/include/linux/netfilter_ipv4/ip_conntrack_tuple.h b/include/linux/netfilter_ipv4/ip_conntrack_tuple.h index 14dc0f7b6556..20e43f018b7c 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_tuple.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_tuple.h @@ -17,7 +17,7 @@ union ip_conntrack_manip_proto u_int16_t all; struct { - u_int16_t port; + __be16 port; } tcp; struct { u_int16_t port; @@ -29,7 +29,7 @@ union ip_conntrack_manip_proto u_int16_t port; } sctp; struct { - u_int16_t key; /* key is 32bit, pptp only uses 16 */ + __be16 key; /* key is 32bit, pptp only uses 16 */ } gre; }; @@ -65,7 +65,7 @@ struct ip_conntrack_tuple u_int16_t port; } sctp; struct { - u_int16_t key; /* key is 32bit, + __be16 key; /* key is 32bit, * pptp only uses 16 */ } gre; } u; diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c index 117587978700..8236ee0fb090 100644 --- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c +++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c @@ -223,8 +223,8 @@ static void pptp_destroy_siblings(struct ip_conntrack *ct) static inline int exp_gre(struct ip_conntrack *master, u_int32_t seq, - u_int16_t callid, - u_int16_t peer_callid) + __be16 callid, + __be16 peer_callid) { struct ip_conntrack_tuple inv_tuple; struct ip_conntrack_tuple exp_tuples[] = { @@ -263,7 +263,7 @@ exp_gre(struct ip_conntrack *master, exp_orig->mask.src.ip = 0xffffffff; exp_orig->mask.src.u.all = 0; exp_orig->mask.dst.u.all = 0; - exp_orig->mask.dst.u.gre.key = 0xffff; + exp_orig->mask.dst.u.gre.key = htons(0xffff); exp_orig->mask.dst.ip = 0xffffffff; exp_orig->mask.dst.protonum = 0xff; @@ -340,7 +340,8 @@ pptp_inbound_pkt(struct sk_buff **pskb, unsigned int reqlen; union pptp_ctrl_union _pptpReq, *pptpReq; struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info; - u_int16_t msg, *cid, *pcid; + u_int16_t msg; + __be16 *cid, *pcid; u_int32_t seq; ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh); @@ -551,7 +552,8 @@ pptp_outbound_pkt(struct sk_buff **pskb, unsigned int reqlen; union pptp_ctrl_union _pptpReq, *pptpReq; struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info; - u_int16_t msg, *cid, *pcid; + u_int16_t msg; + __be16 *cid, *pcid; ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh); if (!ctlh) @@ -755,7 +757,7 @@ static struct ip_conntrack_helper pptp = { } }, .mask = { .src = { .ip = 0, - .u = { .tcp = { .port = 0xffff } } + .u = { .tcp = { .port = __constant_htons(0xffff) } } }, .dst = { .ip = 0, .u = { .all = 0 }, -- cgit v1.2.3 From a82b748930fce0dab22c64075c38c830ae116904 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Thu, 22 Sep 2005 23:45:44 -0700 Subject: [NETFILTER] remove unneeded structure definition from conntrack helper Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_conntrack_pptp.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv4/ip_conntrack_pptp.h b/include/linux/netfilter_ipv4/ip_conntrack_pptp.h index 50a761db5a04..816144c75de0 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_pptp.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_pptp.h @@ -284,13 +284,6 @@ struct PptpSetLinkInfo { __be32 recvAccm; }; - -struct pptp_priv_data { - __u16 call_id; - __u16 mcall_id; - __u16 pcall_id; -}; - union pptp_ctrl_union { struct PptpStartSessionRequest sreq; struct PptpStartSessionReply srep; -- cgit v1.2.3 From 1dfbab59498d6f227c91988bab6c71af049a5333 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Thu, 22 Sep 2005 23:46:57 -0700 Subject: [NETFILTER] Fix conntrack event cache deadlock/oops This patch fixes a number of bugs. It cannot be reasonably split up in multiple fixes, since all bugs interact with each other and affect the same function: Bug #1: The event cache code cannot be called while a lock is held. Therefore, the call to ip_conntrack_event_cache() within ip_ct_refresh_acct() needs to be moved outside of the locked section. This fixes a number of 2.6.14-rcX oops and deadlock reports. Bug #2: We used to call ct_add_counters() for unconfirmed connections without holding a lock. Since the add operations are not atomic, we could race with another CPU. Bug #3: ip_ct_refresh_acct() lost REFRESH events in some cases where refresh (and the corresponding event) are desired, but no accounting shall be performed. Both, evenst and accounting implicitly depended on the skb parameter bein non-null. We now re-introduce a non-accounting "ip_ct_refresh()" variant to explicitly state the desired behaviour. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_conntrack.h | 25 +++++++++++--- net/ipv4/netfilter/ip_conntrack_amanda.c | 2 +- net/ipv4/netfilter/ip_conntrack_core.c | 49 ++++++++++++++------------- net/ipv4/netfilter/ip_conntrack_helper_pptp.c | 1 - net/ipv4/netfilter/ip_conntrack_netbios_ns.c | 2 +- net/ipv4/netfilter/ip_conntrack_standalone.c | 2 +- 6 files changed, 49 insertions(+), 32 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv4/ip_conntrack.h b/include/linux/netfilter_ipv4/ip_conntrack.h index bace72a76cc4..4ced38736813 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack.h +++ b/include/linux/netfilter_ipv4/ip_conntrack.h @@ -332,11 +332,28 @@ extern void need_ip_conntrack(void); extern int invert_tuplepr(struct ip_conntrack_tuple *inverse, const struct ip_conntrack_tuple *orig); +extern void __ip_ct_refresh_acct(struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo, + const struct sk_buff *skb, + unsigned long extra_jiffies, + int do_acct); + +/* Refresh conntrack for this many jiffies and do accounting */ +static inline void ip_ct_refresh_acct(struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo, + const struct sk_buff *skb, + unsigned long extra_jiffies) +{ + __ip_ct_refresh_acct(ct, ctinfo, skb, extra_jiffies, 1); +} + /* Refresh conntrack for this many jiffies */ -extern void ip_ct_refresh_acct(struct ip_conntrack *ct, - enum ip_conntrack_info ctinfo, - const struct sk_buff *skb, - unsigned long extra_jiffies); +static inline void ip_ct_refresh(struct ip_conntrack *ct, + const struct sk_buff *skb, + unsigned long extra_jiffies) +{ + __ip_ct_refresh_acct(ct, 0, skb, extra_jiffies, 0); +} /* These are for NAT. Icky. */ /* Update TCP window tracking data when NAT mangles the packet */ diff --git a/net/ipv4/netfilter/ip_conntrack_amanda.c b/net/ipv4/netfilter/ip_conntrack_amanda.c index dc20881004bc..fa3f914117ec 100644 --- a/net/ipv4/netfilter/ip_conntrack_amanda.c +++ b/net/ipv4/netfilter/ip_conntrack_amanda.c @@ -65,7 +65,7 @@ static int help(struct sk_buff **pskb, /* increase the UDP timeout of the master connection as replies from * Amanda clients to the server can be quite delayed */ - ip_ct_refresh_acct(ct, ctinfo, NULL, master_timeout * HZ); + ip_ct_refresh(ct, *pskb, master_timeout * HZ); /* No data? */ dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr); diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index c1f82e0c81cf..ea65dd3e517a 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c @@ -1112,45 +1112,46 @@ void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me) synchronize_net(); } -static inline void ct_add_counters(struct ip_conntrack *ct, - enum ip_conntrack_info ctinfo, - const struct sk_buff *skb) -{ -#ifdef CONFIG_IP_NF_CT_ACCT - if (skb) { - ct->counters[CTINFO2DIR(ctinfo)].packets++; - ct->counters[CTINFO2DIR(ctinfo)].bytes += - ntohs(skb->nh.iph->tot_len); - } -#endif -} - -/* Refresh conntrack for this many jiffies and do accounting (if skb != NULL) */ -void ip_ct_refresh_acct(struct ip_conntrack *ct, +/* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */ +void __ip_ct_refresh_acct(struct ip_conntrack *ct, enum ip_conntrack_info ctinfo, const struct sk_buff *skb, - unsigned long extra_jiffies) + unsigned long extra_jiffies, + int do_acct) { + int do_event = 0; + IP_NF_ASSERT(ct->timeout.data == (unsigned long)ct); + IP_NF_ASSERT(skb); + + write_lock_bh(&ip_conntrack_lock); /* If not in hash table, timer will not be active yet */ if (!is_confirmed(ct)) { ct->timeout.expires = extra_jiffies; - ct_add_counters(ct, ctinfo, skb); + do_event = 1; } else { - write_lock_bh(&ip_conntrack_lock); /* Need del_timer for race avoidance (may already be dying). */ if (del_timer(&ct->timeout)) { ct->timeout.expires = jiffies + extra_jiffies; add_timer(&ct->timeout); - /* FIXME: We loose some REFRESH events if this function - * is called without an skb. I'll fix this later -HW */ - if (skb) - ip_conntrack_event_cache(IPCT_REFRESH, skb); + do_event = 1; } - ct_add_counters(ct, ctinfo, skb); - write_unlock_bh(&ip_conntrack_lock); } + +#ifdef CONFIG_IP_NF_CT_ACCT + if (do_acct) { + ct->counters[CTINFO2DIR(ctinfo)].packets++; + ct->counters[CTINFO2DIR(ctinfo)].bytes += + ntohs(skb->nh.iph->tot_len); + } +#endif + + write_unlock_bh(&ip_conntrack_lock); + + /* must be unlocked when calling event cache */ + if (do_event) + ip_conntrack_event_cache(IPCT_REFRESH, skb); } #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c index 8236ee0fb090..926a6684643d 100644 --- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c +++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c @@ -172,7 +172,6 @@ static int destroy_sibling_or_exp(const struct ip_conntrack_tuple *t) DEBUGP("setting timeout of conntrack %p to 0\n", sibling); sibling->proto.gre.timeout = 0; sibling->proto.gre.stream_timeout = 0; - /* refresh_acct will not modify counters if skb == NULL */ if (del_timer(&sibling->timeout)) sibling->timeout.function((unsigned long)sibling); ip_conntrack_put(sibling); diff --git a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c index 71ef19d126d0..577bac22dcc6 100644 --- a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c +++ b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c @@ -91,7 +91,7 @@ static int help(struct sk_buff **pskb, ip_conntrack_expect_related(exp); ip_conntrack_expect_put(exp); - ip_ct_refresh_acct(ct, ctinfo, NULL, timeout * HZ); + ip_ct_refresh(ct, *pskb, timeout * HZ); out: return NF_ACCEPT; } diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c index d3c7808010ec..dd476b191f4b 100644 --- a/net/ipv4/netfilter/ip_conntrack_standalone.c +++ b/net/ipv4/netfilter/ip_conntrack_standalone.c @@ -989,7 +989,7 @@ EXPORT_SYMBOL(need_ip_conntrack); EXPORT_SYMBOL(ip_conntrack_helper_register); EXPORT_SYMBOL(ip_conntrack_helper_unregister); EXPORT_SYMBOL(ip_ct_iterate_cleanup); -EXPORT_SYMBOL(ip_ct_refresh_acct); +EXPORT_SYMBOL(__ip_ct_refresh_acct); EXPORT_SYMBOL(ip_conntrack_expect_alloc); EXPORT_SYMBOL(ip_conntrack_expect_put); -- cgit v1.2.3 From 094bb20b9fcab3a1652a77741caba6b78097d622 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 11 Aug 2005 16:25:20 -0400 Subject: [PATCH] RPC: extract socket logic common to both client and server Clean-up: Move some code that is common to both RPC client- and server-side socket transports into its own source file, net/sunrpc/socklib.c. Test-plan: Compile kernel with CONFIG_NFS enabled. Millions of fsx operations over UDP, client and server. Connectathon over UDP. Version: Thu, 11 Aug 2005 16:03:09 -0400 Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xdr.h | 1 + net/sunrpc/Makefile | 2 +- net/sunrpc/socklib.c | 175 +++++++++++++++++++++++++++++++++++++++++++++ net/sunrpc/svcsock.c | 3 - net/sunrpc/xdr.c | 75 ------------------- net/sunrpc/xprt.c | 64 ----------------- 6 files changed, 177 insertions(+), 143 deletions(-) create mode 100644 net/sunrpc/socklib.c (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 23448d0fb5bc..d8b7656bca41 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -161,6 +161,7 @@ typedef struct { typedef size_t (*skb_read_actor_t)(skb_reader_t *desc, void *to, size_t len); +extern int csum_partial_copy_to_xdr(struct xdr_buf *, struct sk_buff *); extern ssize_t xdr_partial_copy_from_skb(struct xdr_buf *, unsigned int, skb_reader_t *, skb_read_actor_t); diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile index 46a2ce00a29b..f0a955627177 100644 --- a/net/sunrpc/Makefile +++ b/net/sunrpc/Makefile @@ -6,7 +6,7 @@ obj-$(CONFIG_SUNRPC) += sunrpc.o obj-$(CONFIG_SUNRPC_GSS) += auth_gss/ -sunrpc-y := clnt.o xprt.o sched.o \ +sunrpc-y := clnt.o xprt.o socklib.o sched.o \ auth.o auth_null.o auth_unix.o \ svc.o svcsock.o svcauth.o svcauth_unix.o \ pmap_clnt.o timer.o xdr.o \ diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c new file mode 100644 index 000000000000..8f97e90f36c8 --- /dev/null +++ b/net/sunrpc/socklib.c @@ -0,0 +1,175 @@ +/* + * linux/net/sunrpc/socklib.c + * + * Common socket helper routines for RPC client and server + * + * Copyright (C) 1995, 1996 Olaf Kirch + */ + +#include +#include +#include +#include + + +/** + * skb_read_bits - copy some data bits from skb to internal buffer + * @desc: sk_buff copy helper + * @to: copy destination + * @len: number of bytes to copy + * + * Possibly called several times to iterate over an sk_buff and copy + * data out of it. + */ +static size_t skb_read_bits(skb_reader_t *desc, void *to, size_t len) +{ + if (len > desc->count) + len = desc->count; + if (skb_copy_bits(desc->skb, desc->offset, to, len)) + return 0; + desc->count -= len; + desc->offset += len; + return len; +} + +/** + * skb_read_and_csum_bits - copy and checksum from skb to buffer + * @desc: sk_buff copy helper + * @to: copy destination + * @len: number of bytes to copy + * + * Same as skb_read_bits, but calculate a checksum at the same time. + */ +static size_t skb_read_and_csum_bits(skb_reader_t *desc, void *to, size_t len) +{ + unsigned int csum2, pos; + + if (len > desc->count) + len = desc->count; + pos = desc->offset; + csum2 = skb_copy_and_csum_bits(desc->skb, pos, to, len, 0); + desc->csum = csum_block_add(desc->csum, csum2, pos); + desc->count -= len; + desc->offset += len; + return len; +} + +/** + * xdr_partial_copy_from_skb - copy data out of an skb + * @xdr: target XDR buffer + * @base: starting offset + * @desc: sk_buff copy helper + * @copy_actor: virtual method for copying data + * + */ +ssize_t xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, skb_reader_t *desc, skb_read_actor_t copy_actor) +{ + struct page **ppage = xdr->pages; + unsigned int len, pglen = xdr->page_len; + ssize_t copied = 0; + int ret; + + len = xdr->head[0].iov_len; + if (base < len) { + len -= base; + ret = copy_actor(desc, (char *)xdr->head[0].iov_base + base, len); + copied += ret; + if (ret != len || !desc->count) + goto out; + base = 0; + } else + base -= len; + + if (unlikely(pglen == 0)) + goto copy_tail; + if (unlikely(base >= pglen)) { + base -= pglen; + goto copy_tail; + } + if (base || xdr->page_base) { + pglen -= base; + base += xdr->page_base; + ppage += base >> PAGE_CACHE_SHIFT; + base &= ~PAGE_CACHE_MASK; + } + do { + char *kaddr; + + /* ACL likes to be lazy in allocating pages - ACLs + * are small by default but can get huge. */ + if (unlikely(*ppage == NULL)) { + *ppage = alloc_page(GFP_ATOMIC); + if (unlikely(*ppage == NULL)) { + if (copied == 0) + copied = -ENOMEM; + goto out; + } + } + + len = PAGE_CACHE_SIZE; + kaddr = kmap_atomic(*ppage, KM_SKB_SUNRPC_DATA); + if (base) { + len -= base; + if (pglen < len) + len = pglen; + ret = copy_actor(desc, kaddr + base, len); + base = 0; + } else { + if (pglen < len) + len = pglen; + ret = copy_actor(desc, kaddr, len); + } + flush_dcache_page(*ppage); + kunmap_atomic(kaddr, KM_SKB_SUNRPC_DATA); + copied += ret; + if (ret != len || !desc->count) + goto out; + ppage++; + } while ((pglen -= len) != 0); +copy_tail: + len = xdr->tail[0].iov_len; + if (base < len) + copied += copy_actor(desc, (char *)xdr->tail[0].iov_base + base, len - base); +out: + return copied; +} + +/** + * csum_partial_copy_to_xdr - checksum and copy data + * @xdr: target XDR buffer + * @skb: source skb + * + * We have set things up such that we perform the checksum of the UDP + * packet in parallel with the copies into the RPC client iovec. -DaveM + */ +int csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb) +{ + skb_reader_t desc; + + desc.skb = skb; + desc.offset = sizeof(struct udphdr); + desc.count = skb->len - desc.offset; + + if (skb->ip_summed == CHECKSUM_UNNECESSARY) + goto no_checksum; + + desc.csum = csum_partial(skb->data, desc.offset, skb->csum); + if (xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_and_csum_bits) < 0) + return -1; + if (desc.offset != skb->len) { + unsigned int csum2; + csum2 = skb_checksum(skb, desc.offset, skb->len - desc.offset, 0); + desc.csum = csum_block_add(desc.csum, csum2, desc.offset); + } + if (desc.count) + return -1; + if ((unsigned short)csum_fold(desc.csum)) + return -1; + return 0; +no_checksum: + if (xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_bits) < 0) + return -1; + if (desc.count) + return -1; + return 0; +} diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 30ec3efc48a6..130f2b5d93dd 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -548,9 +548,6 @@ svc_write_space(struct sock *sk) /* * Receive a datagram from a UDP socket. */ -extern int -csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb); - static int svc_udp_recvfrom(struct svc_rqst *rqstp) { diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index fde16f40a581..9cc12aeed22c 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -176,81 +176,6 @@ xdr_inline_pages(struct xdr_buf *xdr, unsigned int offset, xdr->buflen += len; } -ssize_t -xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, - skb_reader_t *desc, - skb_read_actor_t copy_actor) -{ - struct page **ppage = xdr->pages; - unsigned int len, pglen = xdr->page_len; - ssize_t copied = 0; - int ret; - - len = xdr->head[0].iov_len; - if (base < len) { - len -= base; - ret = copy_actor(desc, (char *)xdr->head[0].iov_base + base, len); - copied += ret; - if (ret != len || !desc->count) - goto out; - base = 0; - } else - base -= len; - - if (pglen == 0) - goto copy_tail; - if (base >= pglen) { - base -= pglen; - goto copy_tail; - } - if (base || xdr->page_base) { - pglen -= base; - base += xdr->page_base; - ppage += base >> PAGE_CACHE_SHIFT; - base &= ~PAGE_CACHE_MASK; - } - do { - char *kaddr; - - /* ACL likes to be lazy in allocating pages - ACLs - * are small by default but can get huge. */ - if (unlikely(*ppage == NULL)) { - *ppage = alloc_page(GFP_ATOMIC); - if (unlikely(*ppage == NULL)) { - if (copied == 0) - copied = -ENOMEM; - goto out; - } - } - - len = PAGE_CACHE_SIZE; - kaddr = kmap_atomic(*ppage, KM_SKB_SUNRPC_DATA); - if (base) { - len -= base; - if (pglen < len) - len = pglen; - ret = copy_actor(desc, kaddr + base, len); - base = 0; - } else { - if (pglen < len) - len = pglen; - ret = copy_actor(desc, kaddr, len); - } - flush_dcache_page(*ppage); - kunmap_atomic(kaddr, KM_SKB_SUNRPC_DATA); - copied += ret; - if (ret != len || !desc->count) - goto out; - ppage++; - } while ((pglen -= len) != 0); -copy_tail: - len = xdr->tail[0].iov_len; - if (base < len) - copied += copy_actor(desc, (char *)xdr->tail[0].iov_base + base, len - base); -out: - return copied; -} - int xdr_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 0e4ffdaa0129..67444f494fea 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -691,70 +691,6 @@ xprt_complete_rqst(struct rpc_xprt *xprt, struct rpc_rqst *req, int copied) return; } -static size_t -skb_read_bits(skb_reader_t *desc, void *to, size_t len) -{ - if (len > desc->count) - len = desc->count; - if (skb_copy_bits(desc->skb, desc->offset, to, len)) - return 0; - desc->count -= len; - desc->offset += len; - return len; -} - -static size_t -skb_read_and_csum_bits(skb_reader_t *desc, void *to, size_t len) -{ - unsigned int csum2, pos; - - if (len > desc->count) - len = desc->count; - pos = desc->offset; - csum2 = skb_copy_and_csum_bits(desc->skb, pos, to, len, 0); - desc->csum = csum_block_add(desc->csum, csum2, pos); - desc->count -= len; - desc->offset += len; - return len; -} - -/* - * We have set things up such that we perform the checksum of the UDP - * packet in parallel with the copies into the RPC client iovec. -DaveM - */ -int -csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb) -{ - skb_reader_t desc; - - desc.skb = skb; - desc.offset = sizeof(struct udphdr); - desc.count = skb->len - desc.offset; - - if (skb->ip_summed == CHECKSUM_UNNECESSARY) - goto no_checksum; - - desc.csum = csum_partial(skb->data, desc.offset, skb->csum); - if (xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_and_csum_bits) < 0) - return -1; - if (desc.offset != skb->len) { - unsigned int csum2; - csum2 = skb_checksum(skb, desc.offset, skb->len - desc.offset, 0); - desc.csum = csum_block_add(desc.csum, csum2, desc.offset); - } - if (desc.count) - return -1; - if ((unsigned short)csum_fold(desc.csum)) - return -1; - return 0; -no_checksum: - if (xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_bits) < 0) - return -1; - if (desc.count) - return -1; - return 0; -} - /* * Input handler for RPC replies. Called from a bottom half and hence * atomic. -- cgit v1.2.3 From a246b0105bbd9a70a698f69baae2042996f2a0e9 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 11 Aug 2005 16:25:23 -0400 Subject: [PATCH] RPC: introduce client-side transport switch Move the bulk of client-side socket-specific code into a separate source file, net/sunrpc/xprtsock.c. Test-plan: Millions of fsx operations. Performance characterization such as "sio" or "iozone". Destructive testing (unplugging the network temporarily, server reboots). Connectathon with v2, v3, and v4. Version: Thu, 11 Aug 2005 16:03:38 -0400 Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xdr.h | 5 - include/linux/sunrpc/xprt.h | 38 +- net/sunrpc/Makefile | 2 +- net/sunrpc/clnt.c | 3 +- net/sunrpc/sysctl.c | 3 + net/sunrpc/xdr.c | 102 +---- net/sunrpc/xprt.c | 916 ++------------------------------------ net/sunrpc/xprtsock.c | 1021 +++++++++++++++++++++++++++++++++++++++++++ 8 files changed, 1101 insertions(+), 989 deletions(-) create mode 100644 net/sunrpc/xprtsock.c (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index d8b7656bca41..5da968729cf8 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -165,11 +165,6 @@ extern int csum_partial_copy_to_xdr(struct xdr_buf *, struct sk_buff *); extern ssize_t xdr_partial_copy_from_skb(struct xdr_buf *, unsigned int, skb_reader_t *, skb_read_actor_t); -struct socket; -struct sockaddr; -extern int xdr_sendpages(struct socket *, struct sockaddr *, int, - struct xdr_buf *, unsigned int, int); - extern int xdr_encode_word(struct xdr_buf *, int, u32); extern int xdr_decode_word(struct xdr_buf *, int, u32 *); diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index e618c1649814..d82b47ab73cb 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -59,7 +59,13 @@ extern unsigned int xprt_tcp_slot_table_entries; */ #define RPC_REESTABLISH_TIMEOUT (15*HZ) -/* RPC call and reply header size as number of 32bit words (verifier +/* + * RPC transport idle timeout. + */ +#define RPC_IDLE_DISCONNECT_TIMEOUT (5*60*HZ) + +/* + * RPC call and reply header size as number of 32bit words (verifier * size computed separately) */ #define RPC_CALLHDRSIZE 6 @@ -121,12 +127,19 @@ struct rpc_rqst { #define rq_svec rq_snd_buf.head #define rq_slen rq_snd_buf.len -#define XPRT_LAST_FRAG (1 << 0) -#define XPRT_COPY_RECM (1 << 1) -#define XPRT_COPY_XID (1 << 2) -#define XPRT_COPY_DATA (1 << 3) +struct rpc_task; +struct rpc_xprt; + +struct rpc_xprt_ops { + void (*set_buffer_size)(struct rpc_xprt *xprt); + void (*connect)(struct rpc_task *task); + int (*send_request)(struct rpc_task *task); + void (*close)(struct rpc_xprt *xprt); + void (*destroy)(struct rpc_xprt *xprt); +}; struct rpc_xprt { + struct rpc_xprt_ops * ops; /* transport methods */ struct socket * sock; /* BSD socket layer */ struct sock * inet; /* INET layer */ @@ -199,14 +212,22 @@ struct rpc_xprt { wait_queue_head_t cong_wait; }; +#define XPRT_LAST_FRAG (1 << 0) +#define XPRT_COPY_RECM (1 << 1) +#define XPRT_COPY_XID (1 << 2) +#define XPRT_COPY_DATA (1 << 3) + #ifdef __KERNEL__ struct rpc_xprt * xprt_create_proto(int proto, struct sockaddr_in *addr, struct rpc_timeout *toparms); +void xprt_disconnect(struct rpc_xprt *); int xprt_destroy(struct rpc_xprt *); void xprt_set_timeout(struct rpc_timeout *, unsigned int, unsigned long); - +struct rpc_rqst * xprt_lookup_rqst(struct rpc_xprt *, u32); +void xprt_complete_rqst(struct rpc_xprt *, + struct rpc_rqst *, int); void xprt_reserve(struct rpc_task *); int xprt_prepare_transmit(struct rpc_task *); void xprt_transmit(struct rpc_task *); @@ -214,7 +235,10 @@ void xprt_receive(struct rpc_task *); int xprt_adjust_timeout(struct rpc_rqst *req); void xprt_release(struct rpc_task *); void xprt_connect(struct rpc_task *); -void xprt_sock_setbufsize(struct rpc_xprt *); +int xs_setup_udp(struct rpc_xprt *, + struct rpc_timeout *); +int xs_setup_tcp(struct rpc_xprt *, + struct rpc_timeout *); #define XPRT_LOCKED 0 #define XPRT_CONNECT 1 diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile index f0a955627177..cdcab9ca4c60 100644 --- a/net/sunrpc/Makefile +++ b/net/sunrpc/Makefile @@ -6,7 +6,7 @@ obj-$(CONFIG_SUNRPC) += sunrpc.o obj-$(CONFIG_SUNRPC_GSS) += auth_gss/ -sunrpc-y := clnt.o xprt.o socklib.o sched.o \ +sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \ auth.o auth_null.o auth_unix.o \ svc.o svcsock.o svcauth.o svcauth_unix.o \ pmap_clnt.o timer.o xdr.o \ diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 2d3cf0a52d82..ab50c3c9e6a8 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -525,8 +525,7 @@ rpc_setbufsize(struct rpc_clnt *clnt, unsigned int sndsize, unsigned int rcvsize xprt->rcvsize = 0; if (rcvsize) xprt->rcvsize = rcvsize + RPC_SLACK_SPACE; - if (xprt_connected(xprt)) - xprt_sock_setbufsize(xprt); + xprt->ops->set_buffer_size(xprt); } /* diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c index 1b9616a12e24..ef483262f17f 100644 --- a/net/sunrpc/sysctl.c +++ b/net/sunrpc/sysctl.c @@ -119,6 +119,9 @@ done: return 0; } +unsigned int xprt_udp_slot_table_entries = RPC_DEF_SLOT_TABLE; +unsigned int xprt_tcp_slot_table_entries = RPC_DEF_SLOT_TABLE; + static unsigned int min_slot_table_size = RPC_MIN_SLOT_TABLE; static unsigned int max_slot_table_size = RPC_MAX_SLOT_TABLE; diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 9cc12aeed22c..32df43372ee9 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -6,15 +6,12 @@ * Copyright (C) 1995, 1996 Olaf Kirch */ +#include #include -#include #include #include #include #include -#include -#include -#include #include #include @@ -177,103 +174,6 @@ xdr_inline_pages(struct xdr_buf *xdr, unsigned int offset, } -int -xdr_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, - struct xdr_buf *xdr, unsigned int base, int msgflags) -{ - struct page **ppage = xdr->pages; - unsigned int len, pglen = xdr->page_len; - int err, ret = 0; - ssize_t (*sendpage)(struct socket *, struct page *, int, size_t, int); - - len = xdr->head[0].iov_len; - if (base < len || (addr != NULL && base == 0)) { - struct kvec iov = { - .iov_base = xdr->head[0].iov_base + base, - .iov_len = len - base, - }; - struct msghdr msg = { - .msg_name = addr, - .msg_namelen = addrlen, - .msg_flags = msgflags, - }; - if (xdr->len > len) - msg.msg_flags |= MSG_MORE; - - if (iov.iov_len != 0) - err = kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len); - else - err = kernel_sendmsg(sock, &msg, NULL, 0, 0); - if (ret == 0) - ret = err; - else if (err > 0) - ret += err; - if (err != iov.iov_len) - goto out; - base = 0; - } else - base -= len; - - if (pglen == 0) - goto copy_tail; - if (base >= pglen) { - base -= pglen; - goto copy_tail; - } - if (base || xdr->page_base) { - pglen -= base; - base += xdr->page_base; - ppage += base >> PAGE_CACHE_SHIFT; - base &= ~PAGE_CACHE_MASK; - } - - sendpage = sock->ops->sendpage ? : sock_no_sendpage; - do { - int flags = msgflags; - - len = PAGE_CACHE_SIZE; - if (base) - len -= base; - if (pglen < len) - len = pglen; - - if (pglen != len || xdr->tail[0].iov_len != 0) - flags |= MSG_MORE; - - /* Hmm... We might be dealing with highmem pages */ - if (PageHighMem(*ppage)) - sendpage = sock_no_sendpage; - err = sendpage(sock, *ppage, base, len, flags); - if (ret == 0) - ret = err; - else if (err > 0) - ret += err; - if (err != len) - goto out; - base = 0; - ppage++; - } while ((pglen -= len) != 0); -copy_tail: - len = xdr->tail[0].iov_len; - if (base < len) { - struct kvec iov = { - .iov_base = xdr->tail[0].iov_base + base, - .iov_len = len - base, - }; - struct msghdr msg = { - .msg_flags = msgflags, - }; - err = kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len); - if (ret == 0) - ret = err; - else if (err > 0) - ret += err; - } -out: - return ret; -} - - /* * Helper routines for doing 'memmove' like operations on a struct xdr_buf * diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 67444f494fea..4342acf4d1cd 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -32,37 +32,16 @@ * tasks that rely on callbacks. * * Copyright (C) 1995-1997, Olaf Kirch - * - * TCP callback races fixes (C) 1998 Red Hat Software - * TCP send fixes (C) 1998 Red Hat Software - * TCP NFS related read + write fixes - * (C) 1999 Dave Airlie, University of Limerick, Ireland - * - * Rewrite of larges part of the code in order to stabilize TCP stuff. - * Fix behaviour when socket buffer is full. - * (C) 1999 Trond Myklebust */ +#include + #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include #include #include -#include -#include -#include -#include +#include /* * Local variables @@ -74,64 +53,17 @@ #endif #define XPRT_MAX_BACKOFF (8) -#define XPRT_IDLE_TIMEOUT (5*60*HZ) -#define XPRT_MAX_RESVPORT (800) /* * Local functions */ static void xprt_request_init(struct rpc_task *, struct rpc_xprt *); static inline void do_xprt_reserve(struct rpc_task *); -static void xprt_disconnect(struct rpc_xprt *); static void xprt_connect_status(struct rpc_task *task); -static struct rpc_xprt * xprt_setup(int proto, struct sockaddr_in *ap, - struct rpc_timeout *to); -static struct socket *xprt_create_socket(struct rpc_xprt *, int, int); -static void xprt_bind_socket(struct rpc_xprt *, struct socket *); static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *); static int xprt_clear_backlog(struct rpc_xprt *xprt); -#ifdef RPC_DEBUG_DATA -/* - * Print the buffer contents (first 128 bytes only--just enough for - * diropres return). - */ -static void -xprt_pktdump(char *msg, u32 *packet, unsigned int count) -{ - u8 *buf = (u8 *) packet; - int j; - - dprintk("RPC: %s\n", msg); - for (j = 0; j < count && j < 128; j += 4) { - if (!(j & 31)) { - if (j) - dprintk("\n"); - dprintk("0x%04x ", j); - } - dprintk("%02x%02x%02x%02x ", - buf[j], buf[j+1], buf[j+2], buf[j+3]); - } - dprintk("\n"); -} -#else -static inline void -xprt_pktdump(char *msg, u32 *packet, unsigned int count) -{ - /* NOP */ -} -#endif - -/* - * Look up RPC transport given an INET socket - */ -static inline struct rpc_xprt * -xprt_from_sock(struct sock *sk) -{ - return (struct rpc_xprt *) sk->sk_user_data; -} - /* * Serialize write access to sockets, in order to prevent different * requests from interfering with each other. @@ -234,62 +166,6 @@ xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task) spin_unlock_bh(&xprt->sock_lock); } -/* - * Write data to socket. - */ -static inline int -xprt_sendmsg(struct rpc_xprt *xprt, struct rpc_rqst *req) -{ - struct socket *sock = xprt->sock; - struct xdr_buf *xdr = &req->rq_snd_buf; - struct sockaddr *addr = NULL; - int addrlen = 0; - unsigned int skip; - int result; - - if (!sock) - return -ENOTCONN; - - xprt_pktdump("packet data:", - req->rq_svec->iov_base, - req->rq_svec->iov_len); - - /* For UDP, we need to provide an address */ - if (!xprt->stream) { - addr = (struct sockaddr *) &xprt->addr; - addrlen = sizeof(xprt->addr); - } - /* Dont repeat bytes */ - skip = req->rq_bytes_sent; - - clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags); - result = xdr_sendpages(sock, addr, addrlen, xdr, skip, MSG_DONTWAIT); - - dprintk("RPC: xprt_sendmsg(%d) = %d\n", xdr->len - skip, result); - - if (result >= 0) - return result; - - switch (result) { - case -ECONNREFUSED: - /* When the server has died, an ICMP port unreachable message - * prompts ECONNREFUSED. - */ - case -EAGAIN: - break; - case -ECONNRESET: - case -ENOTCONN: - case -EPIPE: - /* connection broken */ - if (xprt->stream) - result = -ENOTCONN; - break; - default: - printk(KERN_NOTICE "RPC: sendmsg returned error %d\n", -result); - } - return result; -} - /* * Van Jacobson congestion avoidance. Check if the congestion window * overflowed. Put the task to sleep if this is the case. @@ -405,48 +281,20 @@ int xprt_adjust_timeout(struct rpc_rqst *req) return status; } -/* - * Close down a transport socket - */ -static void -xprt_close(struct rpc_xprt *xprt) -{ - struct socket *sock = xprt->sock; - struct sock *sk = xprt->inet; - - if (!sk) - return; - - write_lock_bh(&sk->sk_callback_lock); - xprt->inet = NULL; - xprt->sock = NULL; - - sk->sk_user_data = NULL; - sk->sk_data_ready = xprt->old_data_ready; - sk->sk_state_change = xprt->old_state_change; - sk->sk_write_space = xprt->old_write_space; - write_unlock_bh(&sk->sk_callback_lock); - - sk->sk_no_check = 0; - - sock_release(sock); -} - static void xprt_socket_autoclose(void *args) { struct rpc_xprt *xprt = (struct rpc_xprt *)args; xprt_disconnect(xprt); - xprt_close(xprt); + xprt->ops->close(xprt); xprt_release_write(xprt, NULL); } /* * Mark a transport as disconnected */ -static void -xprt_disconnect(struct rpc_xprt *xprt) +void xprt_disconnect(struct rpc_xprt *xprt) { dprintk("RPC: disconnected transport %p\n", xprt); spin_lock_bh(&xprt->sock_lock); @@ -479,57 +327,6 @@ out_abort: spin_unlock(&xprt->sock_lock); } -static void xprt_socket_connect(void *args) -{ - struct rpc_xprt *xprt = (struct rpc_xprt *)args; - struct socket *sock = xprt->sock; - int status = -EIO; - - if (xprt->shutdown || xprt->addr.sin_port == 0) - goto out; - - /* - * Start by resetting any existing state - */ - xprt_close(xprt); - sock = xprt_create_socket(xprt, xprt->prot, xprt->resvport); - if (sock == NULL) { - /* couldn't create socket or bind to reserved port; - * this is likely a permanent error, so cause an abort */ - goto out; - } - xprt_bind_socket(xprt, sock); - xprt_sock_setbufsize(xprt); - - status = 0; - if (!xprt->stream) - goto out; - - /* - * Tell the socket layer to start connecting... - */ - status = sock->ops->connect(sock, (struct sockaddr *) &xprt->addr, - sizeof(xprt->addr), O_NONBLOCK); - dprintk("RPC: %p connect status %d connected %d sock state %d\n", - xprt, -status, xprt_connected(xprt), sock->sk->sk_state); - if (status < 0) { - switch (status) { - case -EINPROGRESS: - case -EALREADY: - goto out_clear; - } - } -out: - if (status < 0) - rpc_wake_up_status(&xprt->pending, status); - else - rpc_wake_up(&xprt->pending); -out_clear: - smp_mb__before_clear_bit(); - clear_bit(XPRT_CONNECTING, &xprt->sockstate); - smp_mb__after_clear_bit(); -} - /* * Attempt to connect a TCP socket. * @@ -552,30 +349,16 @@ void xprt_connect(struct rpc_task *task) if (!xprt_lock_write(xprt, task)) return; if (xprt_connected(xprt)) - goto out_write; + xprt_release_write(xprt, task); + else { + if (task->tk_rqstp) + task->tk_rqstp->rq_bytes_sent = 0; - if (task->tk_rqstp) - task->tk_rqstp->rq_bytes_sent = 0; - - task->tk_timeout = RPC_CONNECT_TIMEOUT; - rpc_sleep_on(&xprt->pending, task, xprt_connect_status, NULL); - if (!test_and_set_bit(XPRT_CONNECTING, &xprt->sockstate)) { - /* Note: if we are here due to a dropped connection - * we delay reconnecting by RPC_REESTABLISH_TIMEOUT/HZ - * seconds - */ - if (xprt->sock != NULL) - schedule_delayed_work(&xprt->sock_connect, - RPC_REESTABLISH_TIMEOUT); - else { - schedule_work(&xprt->sock_connect); - if (!RPC_IS_ASYNC(task)) - flush_scheduled_work(); - } + task->tk_timeout = RPC_CONNECT_TIMEOUT; + rpc_sleep_on(&xprt->pending, task, xprt_connect_status, NULL); + xprt->ops->connect(task); } return; - out_write: - xprt_release_write(xprt, task); } /* @@ -624,8 +407,7 @@ xprt_connect_status(struct rpc_task *task) /* * Look up the RPC request corresponding to a reply, and then lock it. */ -static inline struct rpc_rqst * -xprt_lookup_rqst(struct rpc_xprt *xprt, u32 xid) +struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, u32 xid) { struct list_head *pos; struct rpc_rqst *req = NULL; @@ -644,8 +426,7 @@ xprt_lookup_rqst(struct rpc_xprt *xprt, u32 xid) * Complete reply received. * The TCP code relies on us to remove the request from xprt->pending. */ -static void -xprt_complete_rqst(struct rpc_xprt *xprt, struct rpc_rqst *req, int copied) +void xprt_complete_rqst(struct rpc_xprt *xprt, struct rpc_rqst *req, int copied) { struct rpc_task *task = req->rq_task; struct rpc_clnt *clnt = task->tk_client; @@ -691,409 +472,6 @@ xprt_complete_rqst(struct rpc_xprt *xprt, struct rpc_rqst *req, int copied) return; } -/* - * Input handler for RPC replies. Called from a bottom half and hence - * atomic. - */ -static void -udp_data_ready(struct sock *sk, int len) -{ - struct rpc_task *task; - struct rpc_xprt *xprt; - struct rpc_rqst *rovr; - struct sk_buff *skb; - int err, repsize, copied; - u32 _xid, *xp; - - read_lock(&sk->sk_callback_lock); - dprintk("RPC: udp_data_ready...\n"); - if (!(xprt = xprt_from_sock(sk))) { - printk("RPC: udp_data_ready request not found!\n"); - goto out; - } - - dprintk("RPC: udp_data_ready client %p\n", xprt); - - if ((skb = skb_recv_datagram(sk, 0, 1, &err)) == NULL) - goto out; - - if (xprt->shutdown) - goto dropit; - - repsize = skb->len - sizeof(struct udphdr); - if (repsize < 4) { - printk("RPC: impossible RPC reply size %d!\n", repsize); - goto dropit; - } - - /* Copy the XID from the skb... */ - xp = skb_header_pointer(skb, sizeof(struct udphdr), - sizeof(_xid), &_xid); - if (xp == NULL) - goto dropit; - - /* Look up and lock the request corresponding to the given XID */ - spin_lock(&xprt->sock_lock); - rovr = xprt_lookup_rqst(xprt, *xp); - if (!rovr) - goto out_unlock; - task = rovr->rq_task; - - dprintk("RPC: %4d received reply\n", task->tk_pid); - - if ((copied = rovr->rq_private_buf.buflen) > repsize) - copied = repsize; - - /* Suck it into the iovec, verify checksum if not done by hw. */ - if (csum_partial_copy_to_xdr(&rovr->rq_private_buf, skb)) - goto out_unlock; - - /* Something worked... */ - dst_confirm(skb->dst); - - xprt_complete_rqst(xprt, rovr, copied); - - out_unlock: - spin_unlock(&xprt->sock_lock); - dropit: - skb_free_datagram(sk, skb); - out: - read_unlock(&sk->sk_callback_lock); -} - -/* - * Copy from an skb into memory and shrink the skb. - */ -static inline size_t -tcp_copy_data(skb_reader_t *desc, void *p, size_t len) -{ - if (len > desc->count) - len = desc->count; - if (skb_copy_bits(desc->skb, desc->offset, p, len)) { - dprintk("RPC: failed to copy %zu bytes from skb. %zu bytes remain\n", - len, desc->count); - return 0; - } - desc->offset += len; - desc->count -= len; - dprintk("RPC: copied %zu bytes from skb. %zu bytes remain\n", - len, desc->count); - return len; -} - -/* - * TCP read fragment marker - */ -static inline void -tcp_read_fraghdr(struct rpc_xprt *xprt, skb_reader_t *desc) -{ - size_t len, used; - char *p; - - p = ((char *) &xprt->tcp_recm) + xprt->tcp_offset; - len = sizeof(xprt->tcp_recm) - xprt->tcp_offset; - used = tcp_copy_data(desc, p, len); - xprt->tcp_offset += used; - if (used != len) - return; - xprt->tcp_reclen = ntohl(xprt->tcp_recm); - if (xprt->tcp_reclen & 0x80000000) - xprt->tcp_flags |= XPRT_LAST_FRAG; - else - xprt->tcp_flags &= ~XPRT_LAST_FRAG; - xprt->tcp_reclen &= 0x7fffffff; - xprt->tcp_flags &= ~XPRT_COPY_RECM; - xprt->tcp_offset = 0; - /* Sanity check of the record length */ - if (xprt->tcp_reclen < 4) { - printk(KERN_ERR "RPC: Invalid TCP record fragment length\n"); - xprt_disconnect(xprt); - } - dprintk("RPC: reading TCP record fragment of length %d\n", - xprt->tcp_reclen); -} - -static void -tcp_check_recm(struct rpc_xprt *xprt) -{ - dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u, tcp_flags = %lx\n", - xprt, xprt->tcp_copied, xprt->tcp_offset, xprt->tcp_reclen, xprt->tcp_flags); - if (xprt->tcp_offset == xprt->tcp_reclen) { - xprt->tcp_flags |= XPRT_COPY_RECM; - xprt->tcp_offset = 0; - if (xprt->tcp_flags & XPRT_LAST_FRAG) { - xprt->tcp_flags &= ~XPRT_COPY_DATA; - xprt->tcp_flags |= XPRT_COPY_XID; - xprt->tcp_copied = 0; - } - } -} - -/* - * TCP read xid - */ -static inline void -tcp_read_xid(struct rpc_xprt *xprt, skb_reader_t *desc) -{ - size_t len, used; - char *p; - - len = sizeof(xprt->tcp_xid) - xprt->tcp_offset; - dprintk("RPC: reading XID (%Zu bytes)\n", len); - p = ((char *) &xprt->tcp_xid) + xprt->tcp_offset; - used = tcp_copy_data(desc, p, len); - xprt->tcp_offset += used; - if (used != len) - return; - xprt->tcp_flags &= ~XPRT_COPY_XID; - xprt->tcp_flags |= XPRT_COPY_DATA; - xprt->tcp_copied = 4; - dprintk("RPC: reading reply for XID %08x\n", - ntohl(xprt->tcp_xid)); - tcp_check_recm(xprt); -} - -/* - * TCP read and complete request - */ -static inline void -tcp_read_request(struct rpc_xprt *xprt, skb_reader_t *desc) -{ - struct rpc_rqst *req; - struct xdr_buf *rcvbuf; - size_t len; - ssize_t r; - - /* Find and lock the request corresponding to this xid */ - spin_lock(&xprt->sock_lock); - req = xprt_lookup_rqst(xprt, xprt->tcp_xid); - if (!req) { - xprt->tcp_flags &= ~XPRT_COPY_DATA; - dprintk("RPC: XID %08x request not found!\n", - ntohl(xprt->tcp_xid)); - spin_unlock(&xprt->sock_lock); - return; - } - - rcvbuf = &req->rq_private_buf; - len = desc->count; - if (len > xprt->tcp_reclen - xprt->tcp_offset) { - skb_reader_t my_desc; - - len = xprt->tcp_reclen - xprt->tcp_offset; - memcpy(&my_desc, desc, sizeof(my_desc)); - my_desc.count = len; - r = xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied, - &my_desc, tcp_copy_data); - desc->count -= r; - desc->offset += r; - } else - r = xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied, - desc, tcp_copy_data); - - if (r > 0) { - xprt->tcp_copied += r; - xprt->tcp_offset += r; - } - if (r != len) { - /* Error when copying to the receive buffer, - * usually because we weren't able to allocate - * additional buffer pages. All we can do now - * is turn off XPRT_COPY_DATA, so the request - * will not receive any additional updates, - * and time out. - * Any remaining data from this record will - * be discarded. - */ - xprt->tcp_flags &= ~XPRT_COPY_DATA; - dprintk("RPC: XID %08x truncated request\n", - ntohl(xprt->tcp_xid)); - dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u\n", - xprt, xprt->tcp_copied, xprt->tcp_offset, xprt->tcp_reclen); - goto out; - } - - dprintk("RPC: XID %08x read %Zd bytes\n", - ntohl(xprt->tcp_xid), r); - dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u\n", - xprt, xprt->tcp_copied, xprt->tcp_offset, xprt->tcp_reclen); - - if (xprt->tcp_copied == req->rq_private_buf.buflen) - xprt->tcp_flags &= ~XPRT_COPY_DATA; - else if (xprt->tcp_offset == xprt->tcp_reclen) { - if (xprt->tcp_flags & XPRT_LAST_FRAG) - xprt->tcp_flags &= ~XPRT_COPY_DATA; - } - -out: - if (!(xprt->tcp_flags & XPRT_COPY_DATA)) { - dprintk("RPC: %4d received reply complete\n", - req->rq_task->tk_pid); - xprt_complete_rqst(xprt, req, xprt->tcp_copied); - } - spin_unlock(&xprt->sock_lock); - tcp_check_recm(xprt); -} - -/* - * TCP discard extra bytes from a short read - */ -static inline void -tcp_read_discard(struct rpc_xprt *xprt, skb_reader_t *desc) -{ - size_t len; - - len = xprt->tcp_reclen - xprt->tcp_offset; - if (len > desc->count) - len = desc->count; - desc->count -= len; - desc->offset += len; - xprt->tcp_offset += len; - dprintk("RPC: discarded %Zu bytes\n", len); - tcp_check_recm(xprt); -} - -/* - * TCP record receive routine - * We first have to grab the record marker, then the XID, then the data. - */ -static int -tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, - unsigned int offset, size_t len) -{ - struct rpc_xprt *xprt = rd_desc->arg.data; - skb_reader_t desc = { - .skb = skb, - .offset = offset, - .count = len, - .csum = 0 - }; - - dprintk("RPC: tcp_data_recv\n"); - do { - /* Read in a new fragment marker if necessary */ - /* Can we ever really expect to get completely empty fragments? */ - if (xprt->tcp_flags & XPRT_COPY_RECM) { - tcp_read_fraghdr(xprt, &desc); - continue; - } - /* Read in the xid if necessary */ - if (xprt->tcp_flags & XPRT_COPY_XID) { - tcp_read_xid(xprt, &desc); - continue; - } - /* Read in the request data */ - if (xprt->tcp_flags & XPRT_COPY_DATA) { - tcp_read_request(xprt, &desc); - continue; - } - /* Skip over any trailing bytes on short reads */ - tcp_read_discard(xprt, &desc); - } while (desc.count); - dprintk("RPC: tcp_data_recv done\n"); - return len - desc.count; -} - -static void tcp_data_ready(struct sock *sk, int bytes) -{ - struct rpc_xprt *xprt; - read_descriptor_t rd_desc; - - read_lock(&sk->sk_callback_lock); - dprintk("RPC: tcp_data_ready...\n"); - if (!(xprt = xprt_from_sock(sk))) { - printk("RPC: tcp_data_ready socket info not found!\n"); - goto out; - } - if (xprt->shutdown) - goto out; - - /* We use rd_desc to pass struct xprt to tcp_data_recv */ - rd_desc.arg.data = xprt; - rd_desc.count = 65536; - tcp_read_sock(sk, &rd_desc, tcp_data_recv); -out: - read_unlock(&sk->sk_callback_lock); -} - -static void -tcp_state_change(struct sock *sk) -{ - struct rpc_xprt *xprt; - - read_lock(&sk->sk_callback_lock); - if (!(xprt = xprt_from_sock(sk))) - goto out; - dprintk("RPC: tcp_state_change client %p...\n", xprt); - dprintk("RPC: state %x conn %d dead %d zapped %d\n", - sk->sk_state, xprt_connected(xprt), - sock_flag(sk, SOCK_DEAD), - sock_flag(sk, SOCK_ZAPPED)); - - switch (sk->sk_state) { - case TCP_ESTABLISHED: - spin_lock_bh(&xprt->sock_lock); - if (!xprt_test_and_set_connected(xprt)) { - /* Reset TCP record info */ - xprt->tcp_offset = 0; - xprt->tcp_reclen = 0; - xprt->tcp_copied = 0; - xprt->tcp_flags = XPRT_COPY_RECM | XPRT_COPY_XID; - rpc_wake_up(&xprt->pending); - } - spin_unlock_bh(&xprt->sock_lock); - break; - case TCP_SYN_SENT: - case TCP_SYN_RECV: - break; - default: - xprt_disconnect(xprt); - break; - } - out: - read_unlock(&sk->sk_callback_lock); -} - -/* - * Called when more output buffer space is available for this socket. - * We try not to wake our writers until they can make "significant" - * progress, otherwise we'll waste resources thrashing sock_sendmsg - * with a bunch of small requests. - */ -static void -xprt_write_space(struct sock *sk) -{ - struct rpc_xprt *xprt; - struct socket *sock; - - read_lock(&sk->sk_callback_lock); - if (!(xprt = xprt_from_sock(sk)) || !(sock = sk->sk_socket)) - goto out; - if (xprt->shutdown) - goto out; - - /* Wait until we have enough socket memory */ - if (xprt->stream) { - /* from net/core/stream.c:sk_stream_write_space */ - if (sk_stream_wspace(sk) < sk_stream_min_wspace(sk)) - goto out; - } else { - /* from net/core/sock.c:sock_def_write_space */ - if (!sock_writeable(sk)) - goto out; - } - - if (!test_and_clear_bit(SOCK_NOSPACE, &sock->flags)) - goto out; - - spin_lock_bh(&xprt->sock_lock); - if (xprt->snd_task) - rpc_wake_up_task(xprt->snd_task); - spin_unlock_bh(&xprt->sock_lock); -out: - read_unlock(&sk->sk_callback_lock); -} - /* * RPC receive timeout handler. */ @@ -1161,19 +539,10 @@ xprt_transmit(struct rpc_task *task) struct rpc_clnt *clnt = task->tk_client; struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; - int status, retry = 0; - + int status; dprintk("RPC: %4d xprt_transmit(%u)\n", task->tk_pid, req->rq_slen); - /* set up everything as needed. */ - /* Write the record marker */ - if (xprt->stream) { - u32 *marker = req->rq_svec[0].iov_base; - - *marker = htonl(0x80000000|(req->rq_slen-sizeof(*marker))); - } - smp_rmb(); if (!req->rq_received) { if (list_empty(&req->rq_list)) { @@ -1191,41 +560,9 @@ xprt_transmit(struct rpc_task *task) } else if (!req->rq_bytes_sent) return; - /* Continue transmitting the packet/record. We must be careful - * to cope with writespace callbacks arriving _after_ we have - * called xprt_sendmsg(). - */ - while (1) { - req->rq_xtime = jiffies; - status = xprt_sendmsg(xprt, req); - - if (status < 0) - break; - - if (xprt->stream) { - req->rq_bytes_sent += status; - - /* If we've sent the entire packet, immediately - * reset the count of bytes sent. */ - if (req->rq_bytes_sent >= req->rq_slen) { - req->rq_bytes_sent = 0; - goto out_receive; - } - } else { - if (status >= req->rq_slen) - goto out_receive; - status = -EAGAIN; - break; - } - - dprintk("RPC: %4d xmit incomplete (%d left of %d)\n", - task->tk_pid, req->rq_slen - req->rq_bytes_sent, - req->rq_slen); - - status = -EAGAIN; - if (retry++ > 50) - break; - } + status = xprt->ops->send_request(task); + if (!status) + goto out_receive; /* Note: at this point, task->tk_sleeping has not yet been set, * hence there is no danger of the waking up task being put on @@ -1234,26 +571,10 @@ xprt_transmit(struct rpc_task *task) task->tk_status = status; switch (status) { - case -EAGAIN: - if (test_bit(SOCK_ASYNC_NOSPACE, &xprt->sock->flags)) { - /* Protect against races with xprt_write_space */ - spin_lock_bh(&xprt->sock_lock); - /* Don't race with disconnect */ - if (!xprt_connected(xprt)) - task->tk_status = -ENOTCONN; - else if (test_bit(SOCK_NOSPACE, &xprt->sock->flags)) { - task->tk_timeout = req->rq_timeout; - rpc_sleep_on(&xprt->pending, task, NULL, NULL); - } - spin_unlock_bh(&xprt->sock_lock); - return; - } - /* Keep holding the socket if it is blocked */ - rpc_delay(task, HZ>>4); - return; case -ECONNREFUSED: task->tk_timeout = RPC_REESTABLISH_TIMEOUT; rpc_sleep_on(&xprt->sending, task, NULL, NULL); + case -EAGAIN: case -ENOTCONN: return; default: @@ -1367,7 +688,8 @@ xprt_release(struct rpc_task *task) list_del(&req->rq_list); xprt->last_used = jiffies; if (list_empty(&xprt->recv) && !xprt->shutdown) - mod_timer(&xprt->timer, xprt->last_used + XPRT_IDLE_TIMEOUT); + mod_timer(&xprt->timer, + xprt->last_used + RPC_IDLE_DISCONNECT_TIMEOUT); spin_unlock_bh(&xprt->sock_lock); task->tk_rqstp = NULL; memset(req, 0, sizeof(*req)); /* mark unused */ @@ -1380,18 +702,6 @@ xprt_release(struct rpc_task *task) spin_unlock(&xprt->xprt_lock); } -/* - * Set default timeout parameters - */ -static void -xprt_default_timeout(struct rpc_timeout *to, int proto) -{ - if (proto == IPPROTO_UDP) - xprt_set_timeout(to, 5, 5 * HZ); - else - xprt_set_timeout(to, 2, 60 * HZ); -} - /* * Set constant timeout */ @@ -1405,68 +715,51 @@ xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long incr) to->to_exponential = 0; } -unsigned int xprt_udp_slot_table_entries = RPC_DEF_SLOT_TABLE; -unsigned int xprt_tcp_slot_table_entries = RPC_DEF_SLOT_TABLE; - /* * Initialize an RPC client */ static struct rpc_xprt * xprt_setup(int proto, struct sockaddr_in *ap, struct rpc_timeout *to) { + int result; struct rpc_xprt *xprt; - unsigned int entries; - size_t slot_table_size; struct rpc_rqst *req; - dprintk("RPC: setting up %s transport...\n", - proto == IPPROTO_UDP? "UDP" : "TCP"); - - entries = (proto == IPPROTO_TCP)? - xprt_tcp_slot_table_entries : xprt_udp_slot_table_entries; - if ((xprt = kmalloc(sizeof(struct rpc_xprt), GFP_KERNEL)) == NULL) return ERR_PTR(-ENOMEM); memset(xprt, 0, sizeof(*xprt)); /* Nnnngh! */ - xprt->max_reqs = entries; - slot_table_size = entries * sizeof(xprt->slot[0]); - xprt->slot = kmalloc(slot_table_size, GFP_KERNEL); - if (xprt->slot == NULL) { - kfree(xprt); - return ERR_PTR(-ENOMEM); - } - memset(xprt->slot, 0, slot_table_size); xprt->addr = *ap; - xprt->prot = proto; - xprt->stream = (proto == IPPROTO_TCP)? 1 : 0; - if (xprt->stream) { - xprt->cwnd = RPC_MAXCWND(xprt); - xprt->nocong = 1; - xprt->max_payload = (1U << 31) - 1; - } else { - xprt->cwnd = RPC_INITCWND; - xprt->max_payload = (1U << 16) - (MAX_HEADER << 3); + + switch (proto) { + case IPPROTO_UDP: + result = xs_setup_udp(xprt, to); + break; + case IPPROTO_TCP: + result = xs_setup_tcp(xprt, to); + break; + default: + printk(KERN_ERR "RPC: unrecognized transport protocol: %d\n", + proto); + result = -EIO; + break; + } + if (result) { + kfree(xprt); + return ERR_PTR(result); } + spin_lock_init(&xprt->sock_lock); spin_lock_init(&xprt->xprt_lock); init_waitqueue_head(&xprt->cong_wait); INIT_LIST_HEAD(&xprt->free); INIT_LIST_HEAD(&xprt->recv); - INIT_WORK(&xprt->sock_connect, xprt_socket_connect, xprt); INIT_WORK(&xprt->task_cleanup, xprt_socket_autoclose, xprt); init_timer(&xprt->timer); xprt->timer.function = xprt_init_autodisconnect; xprt->timer.data = (unsigned long) xprt; xprt->last_used = jiffies; - xprt->port = XPRT_MAX_RESVPORT; - - /* Set timeout parameters */ - if (to) { - xprt->timeout = *to; - } else - xprt_default_timeout(&xprt->timeout, xprt->prot); rpc_init_wait_queue(&xprt->pending, "xprt_pending"); rpc_init_wait_queue(&xprt->sending, "xprt_sending"); @@ -1474,134 +767,17 @@ xprt_setup(int proto, struct sockaddr_in *ap, struct rpc_timeout *to) rpc_init_priority_wait_queue(&xprt->backlog, "xprt_backlog"); /* initialize free list */ - for (req = &xprt->slot[entries-1]; req >= &xprt->slot[0]; req--) + for (req = &xprt->slot[xprt->max_reqs-1]; req >= &xprt->slot[0]; req--) list_add(&req->rq_list, &xprt->free); xprt_init_xid(xprt); - /* Check whether we want to use a reserved port */ - xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0; - dprintk("RPC: created transport %p with %u slots\n", xprt, xprt->max_reqs); return xprt; } -/* - * Bind to a reserved port - */ -static inline int xprt_bindresvport(struct rpc_xprt *xprt, struct socket *sock) -{ - struct sockaddr_in myaddr = { - .sin_family = AF_INET, - }; - int err, port; - - /* Were we already bound to a given port? Try to reuse it */ - port = xprt->port; - do { - myaddr.sin_port = htons(port); - err = sock->ops->bind(sock, (struct sockaddr *) &myaddr, - sizeof(myaddr)); - if (err == 0) { - xprt->port = port; - return 0; - } - if (--port == 0) - port = XPRT_MAX_RESVPORT; - } while (err == -EADDRINUSE && port != xprt->port); - - printk("RPC: Can't bind to reserved port (%d).\n", -err); - return err; -} - -static void -xprt_bind_socket(struct rpc_xprt *xprt, struct socket *sock) -{ - struct sock *sk = sock->sk; - - if (xprt->inet) - return; - - write_lock_bh(&sk->sk_callback_lock); - sk->sk_user_data = xprt; - xprt->old_data_ready = sk->sk_data_ready; - xprt->old_state_change = sk->sk_state_change; - xprt->old_write_space = sk->sk_write_space; - if (xprt->prot == IPPROTO_UDP) { - sk->sk_data_ready = udp_data_ready; - sk->sk_no_check = UDP_CSUM_NORCV; - xprt_set_connected(xprt); - } else { - tcp_sk(sk)->nonagle = 1; /* disable Nagle's algorithm */ - sk->sk_data_ready = tcp_data_ready; - sk->sk_state_change = tcp_state_change; - xprt_clear_connected(xprt); - } - sk->sk_write_space = xprt_write_space; - - /* Reset to new socket */ - xprt->sock = sock; - xprt->inet = sk; - write_unlock_bh(&sk->sk_callback_lock); - - return; -} - -/* - * Set socket buffer length - */ -void -xprt_sock_setbufsize(struct rpc_xprt *xprt) -{ - struct sock *sk = xprt->inet; - - if (xprt->stream) - return; - if (xprt->rcvsize) { - sk->sk_userlocks |= SOCK_RCVBUF_LOCK; - sk->sk_rcvbuf = xprt->rcvsize * xprt->max_reqs * 2; - } - if (xprt->sndsize) { - sk->sk_userlocks |= SOCK_SNDBUF_LOCK; - sk->sk_sndbuf = xprt->sndsize * xprt->max_reqs * 2; - sk->sk_write_space(sk); - } -} - -/* - * Datastream sockets are created here, but xprt_connect will create - * and connect stream sockets. - */ -static struct socket * xprt_create_socket(struct rpc_xprt *xprt, int proto, int resvport) -{ - struct socket *sock; - int type, err; - - dprintk("RPC: xprt_create_socket(%s %d)\n", - (proto == IPPROTO_UDP)? "udp" : "tcp", proto); - - type = (proto == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM; - - if ((err = sock_create_kern(PF_INET, type, proto, &sock)) < 0) { - printk("RPC: can't create socket (%d).\n", -err); - return NULL; - } - - /* If the caller has the capability, bind to a reserved port */ - if (resvport && xprt_bindresvport(xprt, sock) < 0) { - printk("RPC: can't bind to reserved port.\n"); - goto failed; - } - - return sock; - -failed: - sock_release(sock); - return NULL; -} - /* * Create an RPC client transport given the protocol and peer address. */ @@ -1631,10 +807,6 @@ xprt_shutdown(struct rpc_xprt *xprt) rpc_wake_up(&xprt->backlog); wake_up(&xprt->cong_wait); del_timer_sync(&xprt->timer); - - /* synchronously wait for connect worker to finish */ - cancel_delayed_work(&xprt->sock_connect); - flush_scheduled_work(); } /* @@ -1655,9 +827,7 @@ xprt_destroy(struct rpc_xprt *xprt) { dprintk("RPC: destroying transport %p\n", xprt); xprt_shutdown(xprt); - xprt_disconnect(xprt); - xprt_close(xprt); - kfree(xprt->slot); + xprt->ops->destroy(xprt); kfree(xprt); return 0; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c new file mode 100644 index 000000000000..fa1180ac4823 --- /dev/null +++ b/net/sunrpc/xprtsock.c @@ -0,0 +1,1021 @@ +/* + * linux/net/sunrpc/xprtsock.c + * + * Client-side transport implementation for sockets. + * + * TCP callback races fixes (C) 1998 Red Hat Software + * TCP send fixes (C) 1998 Red Hat Software + * TCP NFS related read + write fixes + * (C) 1999 Dave Airlie, University of Limerick, Ireland + * + * Rewrite of larges part of the code in order to stabilize TCP stuff. + * Fix behaviour when socket buffer is full. + * (C) 1999 Trond Myklebust + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#ifdef RPC_DEBUG +# undef RPC_DEBUG_DATA +# define RPCDBG_FACILITY RPCDBG_XPRT +#endif + +#define XPRT_MAX_RESVPORT (800) + +#ifdef RPC_DEBUG_DATA +/* + * Print the buffer contents (first 128 bytes only--just enough for + * diropres return). + */ +static void +xprt_pktdump(char *msg, u32 *packet, unsigned int count) +{ + u8 *buf = (u8 *) packet; + int j; + + dprintk("RPC: %s\n", msg); + for (j = 0; j < count && j < 128; j += 4) { + if (!(j & 31)) { + if (j) + dprintk("\n"); + dprintk("0x%04x ", j); + } + dprintk("%02x%02x%02x%02x ", + buf[j], buf[j+1], buf[j+2], buf[j+3]); + } + dprintk("\n"); +} +#else +static inline void +xprt_pktdump(char *msg, u32 *packet, unsigned int count) +{ + /* NOP */ +} +#endif + +/* + * Look up RPC transport given an INET socket + */ +static inline struct rpc_xprt * +xprt_from_sock(struct sock *sk) +{ + return (struct rpc_xprt *) sk->sk_user_data; +} + +static int +xdr_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, + struct xdr_buf *xdr, unsigned int base, int msgflags) +{ + struct page **ppage = xdr->pages; + unsigned int len, pglen = xdr->page_len; + int err, ret = 0; + ssize_t (*sendpage)(struct socket *, struct page *, int, size_t, int); + + len = xdr->head[0].iov_len; + if (base < len || (addr != NULL && base == 0)) { + struct kvec iov = { + .iov_base = xdr->head[0].iov_base + base, + .iov_len = len - base, + }; + struct msghdr msg = { + .msg_name = addr, + .msg_namelen = addrlen, + .msg_flags = msgflags, + }; + if (xdr->len > len) + msg.msg_flags |= MSG_MORE; + + if (iov.iov_len != 0) + err = kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len); + else + err = kernel_sendmsg(sock, &msg, NULL, 0, 0); + if (ret == 0) + ret = err; + else if (err > 0) + ret += err; + if (err != iov.iov_len) + goto out; + base = 0; + } else + base -= len; + + if (pglen == 0) + goto copy_tail; + if (base >= pglen) { + base -= pglen; + goto copy_tail; + } + if (base || xdr->page_base) { + pglen -= base; + base += xdr->page_base; + ppage += base >> PAGE_CACHE_SHIFT; + base &= ~PAGE_CACHE_MASK; + } + + sendpage = sock->ops->sendpage ? : sock_no_sendpage; + do { + int flags = msgflags; + + len = PAGE_CACHE_SIZE; + if (base) + len -= base; + if (pglen < len) + len = pglen; + + if (pglen != len || xdr->tail[0].iov_len != 0) + flags |= MSG_MORE; + + /* Hmm... We might be dealing with highmem pages */ + if (PageHighMem(*ppage)) + sendpage = sock_no_sendpage; + err = sendpage(sock, *ppage, base, len, flags); + if (ret == 0) + ret = err; + else if (err > 0) + ret += err; + if (err != len) + goto out; + base = 0; + ppage++; + } while ((pglen -= len) != 0); +copy_tail: + len = xdr->tail[0].iov_len; + if (base < len) { + struct kvec iov = { + .iov_base = xdr->tail[0].iov_base + base, + .iov_len = len - base, + }; + struct msghdr msg = { + .msg_flags = msgflags, + }; + err = kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len); + if (ret == 0) + ret = err; + else if (err > 0) + ret += err; + } +out: + return ret; +} + +/* + * Write data to socket. + */ +static inline int +xprt_sendmsg(struct rpc_xprt *xprt, struct rpc_rqst *req) +{ + struct socket *sock = xprt->sock; + struct xdr_buf *xdr = &req->rq_snd_buf; + struct sockaddr *addr = NULL; + int addrlen = 0; + unsigned int skip; + int result; + + if (!sock) + return -ENOTCONN; + + xprt_pktdump("packet data:", + req->rq_svec->iov_base, + req->rq_svec->iov_len); + + /* For UDP, we need to provide an address */ + if (!xprt->stream) { + addr = (struct sockaddr *) &xprt->addr; + addrlen = sizeof(xprt->addr); + } + /* Dont repeat bytes */ + skip = req->rq_bytes_sent; + + clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags); + result = xdr_sendpages(sock, addr, addrlen, xdr, skip, MSG_DONTWAIT); + + dprintk("RPC: xprt_sendmsg(%d) = %d\n", xdr->len - skip, result); + + if (result >= 0) + return result; + + switch (result) { + case -ECONNREFUSED: + /* When the server has died, an ICMP port unreachable message + * prompts ECONNREFUSED. + */ + case -EAGAIN: + break; + case -ECONNRESET: + case -ENOTCONN: + case -EPIPE: + /* connection broken */ + if (xprt->stream) + result = -ENOTCONN; + break; + default: + printk(KERN_NOTICE "RPC: sendmsg returned error %d\n", -result); + } + return result; +} + +static int +xprt_send_request(struct rpc_task *task) +{ + struct rpc_rqst *req = task->tk_rqstp; + struct rpc_xprt *xprt = req->rq_xprt; + int status, retry = 0; + + /* set up everything as needed. */ + /* Write the record marker */ + if (xprt->stream) { + u32 *marker = req->rq_svec[0].iov_base; + + *marker = htonl(0x80000000|(req->rq_slen-sizeof(*marker))); + } + + /* Continue transmitting the packet/record. We must be careful + * to cope with writespace callbacks arriving _after_ we have + * called xprt_sendmsg(). + */ + while (1) { + req->rq_xtime = jiffies; + status = xprt_sendmsg(xprt, req); + + if (status < 0) + break; + + if (xprt->stream) { + req->rq_bytes_sent += status; + + /* If we've sent the entire packet, immediately + * reset the count of bytes sent. */ + if (req->rq_bytes_sent >= req->rq_slen) { + req->rq_bytes_sent = 0; + return 0; + } + } else { + if (status >= req->rq_slen) + return 0; + status = -EAGAIN; + break; + } + + dprintk("RPC: %4d xmit incomplete (%d left of %d)\n", + task->tk_pid, req->rq_slen - req->rq_bytes_sent, + req->rq_slen); + + status = -EAGAIN; + if (retry++ > 50) + break; + } + + if (status == -EAGAIN) { + if (test_bit(SOCK_ASYNC_NOSPACE, &xprt->sock->flags)) { + /* Protect against races with xprt_write_space */ + spin_lock_bh(&xprt->sock_lock); + /* Don't race with disconnect */ + if (!xprt_connected(xprt)) + task->tk_status = -ENOTCONN; + else if (test_bit(SOCK_NOSPACE, &xprt->sock->flags)) { + task->tk_timeout = req->rq_timeout; + rpc_sleep_on(&xprt->pending, task, NULL, NULL); + } + spin_unlock_bh(&xprt->sock_lock); + return status; + } + /* Keep holding the socket if it is blocked */ + rpc_delay(task, HZ>>4); + } + return status; +} + +/* + * Close down a transport socket + */ +static void +xprt_close(struct rpc_xprt *xprt) +{ + struct socket *sock = xprt->sock; + struct sock *sk = xprt->inet; + + if (!sk) + return; + + write_lock_bh(&sk->sk_callback_lock); + xprt->inet = NULL; + xprt->sock = NULL; + + sk->sk_user_data = NULL; + sk->sk_data_ready = xprt->old_data_ready; + sk->sk_state_change = xprt->old_state_change; + sk->sk_write_space = xprt->old_write_space; + write_unlock_bh(&sk->sk_callback_lock); + + sk->sk_no_check = 0; + + sock_release(sock); +} + +static void xprt_socket_destroy(struct rpc_xprt *xprt) +{ + cancel_delayed_work(&xprt->sock_connect); + flush_scheduled_work(); + + xprt_disconnect(xprt); + xprt_close(xprt); + kfree(xprt->slot); +} + +/* + * Input handler for RPC replies. Called from a bottom half and hence + * atomic. + */ +static void +udp_data_ready(struct sock *sk, int len) +{ + struct rpc_task *task; + struct rpc_xprt *xprt; + struct rpc_rqst *rovr; + struct sk_buff *skb; + int err, repsize, copied; + u32 _xid, *xp; + + read_lock(&sk->sk_callback_lock); + dprintk("RPC: udp_data_ready...\n"); + if (!(xprt = xprt_from_sock(sk))) { + printk("RPC: udp_data_ready request not found!\n"); + goto out; + } + + dprintk("RPC: udp_data_ready client %p\n", xprt); + + if ((skb = skb_recv_datagram(sk, 0, 1, &err)) == NULL) + goto out; + + if (xprt->shutdown) + goto dropit; + + repsize = skb->len - sizeof(struct udphdr); + if (repsize < 4) { + printk("RPC: impossible RPC reply size %d!\n", repsize); + goto dropit; + } + + /* Copy the XID from the skb... */ + xp = skb_header_pointer(skb, sizeof(struct udphdr), + sizeof(_xid), &_xid); + if (xp == NULL) + goto dropit; + + /* Look up and lock the request corresponding to the given XID */ + spin_lock(&xprt->sock_lock); + rovr = xprt_lookup_rqst(xprt, *xp); + if (!rovr) + goto out_unlock; + task = rovr->rq_task; + + dprintk("RPC: %4d received reply\n", task->tk_pid); + + if ((copied = rovr->rq_private_buf.buflen) > repsize) + copied = repsize; + + /* Suck it into the iovec, verify checksum if not done by hw. */ + if (csum_partial_copy_to_xdr(&rovr->rq_private_buf, skb)) + goto out_unlock; + + /* Something worked... */ + dst_confirm(skb->dst); + + xprt_complete_rqst(xprt, rovr, copied); + + out_unlock: + spin_unlock(&xprt->sock_lock); + dropit: + skb_free_datagram(sk, skb); + out: + read_unlock(&sk->sk_callback_lock); +} + +/* + * Copy from an skb into memory and shrink the skb. + */ +static inline size_t +tcp_copy_data(skb_reader_t *desc, void *p, size_t len) +{ + if (len > desc->count) + len = desc->count; + if (skb_copy_bits(desc->skb, desc->offset, p, len)) { + dprintk("RPC: failed to copy %zu bytes from skb. %zu bytes remain\n", + len, desc->count); + return 0; + } + desc->offset += len; + desc->count -= len; + dprintk("RPC: copied %zu bytes from skb. %zu bytes remain\n", + len, desc->count); + return len; +} + +/* + * TCP read fragment marker + */ +static inline void +tcp_read_fraghdr(struct rpc_xprt *xprt, skb_reader_t *desc) +{ + size_t len, used; + char *p; + + p = ((char *) &xprt->tcp_recm) + xprt->tcp_offset; + len = sizeof(xprt->tcp_recm) - xprt->tcp_offset; + used = tcp_copy_data(desc, p, len); + xprt->tcp_offset += used; + if (used != len) + return; + xprt->tcp_reclen = ntohl(xprt->tcp_recm); + if (xprt->tcp_reclen & 0x80000000) + xprt->tcp_flags |= XPRT_LAST_FRAG; + else + xprt->tcp_flags &= ~XPRT_LAST_FRAG; + xprt->tcp_reclen &= 0x7fffffff; + xprt->tcp_flags &= ~XPRT_COPY_RECM; + xprt->tcp_offset = 0; + /* Sanity check of the record length */ + if (xprt->tcp_reclen < 4) { + printk(KERN_ERR "RPC: Invalid TCP record fragment length\n"); + xprt_disconnect(xprt); + } + dprintk("RPC: reading TCP record fragment of length %d\n", + xprt->tcp_reclen); +} + +static void +tcp_check_recm(struct rpc_xprt *xprt) +{ + dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u, tcp_flags = %lx\n", + xprt, xprt->tcp_copied, xprt->tcp_offset, xprt->tcp_reclen, xprt->tcp_flags); + if (xprt->tcp_offset == xprt->tcp_reclen) { + xprt->tcp_flags |= XPRT_COPY_RECM; + xprt->tcp_offset = 0; + if (xprt->tcp_flags & XPRT_LAST_FRAG) { + xprt->tcp_flags &= ~XPRT_COPY_DATA; + xprt->tcp_flags |= XPRT_COPY_XID; + xprt->tcp_copied = 0; + } + } +} + +/* + * TCP read xid + */ +static inline void +tcp_read_xid(struct rpc_xprt *xprt, skb_reader_t *desc) +{ + size_t len, used; + char *p; + + len = sizeof(xprt->tcp_xid) - xprt->tcp_offset; + dprintk("RPC: reading XID (%Zu bytes)\n", len); + p = ((char *) &xprt->tcp_xid) + xprt->tcp_offset; + used = tcp_copy_data(desc, p, len); + xprt->tcp_offset += used; + if (used != len) + return; + xprt->tcp_flags &= ~XPRT_COPY_XID; + xprt->tcp_flags |= XPRT_COPY_DATA; + xprt->tcp_copied = 4; + dprintk("RPC: reading reply for XID %08x\n", + ntohl(xprt->tcp_xid)); + tcp_check_recm(xprt); +} + +/* + * TCP read and complete request + */ +static inline void +tcp_read_request(struct rpc_xprt *xprt, skb_reader_t *desc) +{ + struct rpc_rqst *req; + struct xdr_buf *rcvbuf; + size_t len; + ssize_t r; + + /* Find and lock the request corresponding to this xid */ + spin_lock(&xprt->sock_lock); + req = xprt_lookup_rqst(xprt, xprt->tcp_xid); + if (!req) { + xprt->tcp_flags &= ~XPRT_COPY_DATA; + dprintk("RPC: XID %08x request not found!\n", + ntohl(xprt->tcp_xid)); + spin_unlock(&xprt->sock_lock); + return; + } + + rcvbuf = &req->rq_private_buf; + len = desc->count; + if (len > xprt->tcp_reclen - xprt->tcp_offset) { + skb_reader_t my_desc; + + len = xprt->tcp_reclen - xprt->tcp_offset; + memcpy(&my_desc, desc, sizeof(my_desc)); + my_desc.count = len; + r = xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied, + &my_desc, tcp_copy_data); + desc->count -= r; + desc->offset += r; + } else + r = xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied, + desc, tcp_copy_data); + + if (r > 0) { + xprt->tcp_copied += r; + xprt->tcp_offset += r; + } + if (r != len) { + /* Error when copying to the receive buffer, + * usually because we weren't able to allocate + * additional buffer pages. All we can do now + * is turn off XPRT_COPY_DATA, so the request + * will not receive any additional updates, + * and time out. + * Any remaining data from this record will + * be discarded. + */ + xprt->tcp_flags &= ~XPRT_COPY_DATA; + dprintk("RPC: XID %08x truncated request\n", + ntohl(xprt->tcp_xid)); + dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u\n", + xprt, xprt->tcp_copied, xprt->tcp_offset, xprt->tcp_reclen); + goto out; + } + + dprintk("RPC: XID %08x read %Zd bytes\n", + ntohl(xprt->tcp_xid), r); + dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u\n", + xprt, xprt->tcp_copied, xprt->tcp_offset, xprt->tcp_reclen); + + if (xprt->tcp_copied == req->rq_private_buf.buflen) + xprt->tcp_flags &= ~XPRT_COPY_DATA; + else if (xprt->tcp_offset == xprt->tcp_reclen) { + if (xprt->tcp_flags & XPRT_LAST_FRAG) + xprt->tcp_flags &= ~XPRT_COPY_DATA; + } + +out: + if (!(xprt->tcp_flags & XPRT_COPY_DATA)) { + dprintk("RPC: %4d received reply complete\n", + req->rq_task->tk_pid); + xprt_complete_rqst(xprt, req, xprt->tcp_copied); + } + spin_unlock(&xprt->sock_lock); + tcp_check_recm(xprt); +} + +/* + * TCP discard extra bytes from a short read + */ +static inline void +tcp_read_discard(struct rpc_xprt *xprt, skb_reader_t *desc) +{ + size_t len; + + len = xprt->tcp_reclen - xprt->tcp_offset; + if (len > desc->count) + len = desc->count; + desc->count -= len; + desc->offset += len; + xprt->tcp_offset += len; + dprintk("RPC: discarded %Zu bytes\n", len); + tcp_check_recm(xprt); +} + +/* + * TCP record receive routine + * We first have to grab the record marker, then the XID, then the data. + */ +static int +tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, + unsigned int offset, size_t len) +{ + struct rpc_xprt *xprt = rd_desc->arg.data; + skb_reader_t desc = { + .skb = skb, + .offset = offset, + .count = len, + .csum = 0 + }; + + dprintk("RPC: tcp_data_recv\n"); + do { + /* Read in a new fragment marker if necessary */ + /* Can we ever really expect to get completely empty fragments? */ + if (xprt->tcp_flags & XPRT_COPY_RECM) { + tcp_read_fraghdr(xprt, &desc); + continue; + } + /* Read in the xid if necessary */ + if (xprt->tcp_flags & XPRT_COPY_XID) { + tcp_read_xid(xprt, &desc); + continue; + } + /* Read in the request data */ + if (xprt->tcp_flags & XPRT_COPY_DATA) { + tcp_read_request(xprt, &desc); + continue; + } + /* Skip over any trailing bytes on short reads */ + tcp_read_discard(xprt, &desc); + } while (desc.count); + dprintk("RPC: tcp_data_recv done\n"); + return len - desc.count; +} + +static void tcp_data_ready(struct sock *sk, int bytes) +{ + struct rpc_xprt *xprt; + read_descriptor_t rd_desc; + + read_lock(&sk->sk_callback_lock); + dprintk("RPC: tcp_data_ready...\n"); + if (!(xprt = xprt_from_sock(sk))) { + printk("RPC: tcp_data_ready socket info not found!\n"); + goto out; + } + if (xprt->shutdown) + goto out; + + /* We use rd_desc to pass struct xprt to tcp_data_recv */ + rd_desc.arg.data = xprt; + rd_desc.count = 65536; + tcp_read_sock(sk, &rd_desc, tcp_data_recv); +out: + read_unlock(&sk->sk_callback_lock); +} + +static void +tcp_state_change(struct sock *sk) +{ + struct rpc_xprt *xprt; + + read_lock(&sk->sk_callback_lock); + if (!(xprt = xprt_from_sock(sk))) + goto out; + dprintk("RPC: tcp_state_change client %p...\n", xprt); + dprintk("RPC: state %x conn %d dead %d zapped %d\n", + sk->sk_state, xprt_connected(xprt), + sock_flag(sk, SOCK_DEAD), + sock_flag(sk, SOCK_ZAPPED)); + + switch (sk->sk_state) { + case TCP_ESTABLISHED: + spin_lock_bh(&xprt->sock_lock); + if (!xprt_test_and_set_connected(xprt)) { + /* Reset TCP record info */ + xprt->tcp_offset = 0; + xprt->tcp_reclen = 0; + xprt->tcp_copied = 0; + xprt->tcp_flags = XPRT_COPY_RECM | XPRT_COPY_XID; + rpc_wake_up(&xprt->pending); + } + spin_unlock_bh(&xprt->sock_lock); + break; + case TCP_SYN_SENT: + case TCP_SYN_RECV: + break; + default: + xprt_disconnect(xprt); + break; + } + out: + read_unlock(&sk->sk_callback_lock); +} + +/* + * Called when more output buffer space is available for this socket. + * We try not to wake our writers until they can make "significant" + * progress, otherwise we'll waste resources thrashing sock_sendmsg + * with a bunch of small requests. + */ +static void +xprt_write_space(struct sock *sk) +{ + struct rpc_xprt *xprt; + struct socket *sock; + + read_lock(&sk->sk_callback_lock); + if (!(xprt = xprt_from_sock(sk)) || !(sock = sk->sk_socket)) + goto out; + if (xprt->shutdown) + goto out; + + /* Wait until we have enough socket memory */ + if (xprt->stream) { + /* from net/core/stream.c:sk_stream_write_space */ + if (sk_stream_wspace(sk) < sk_stream_min_wspace(sk)) + goto out; + } else { + /* from net/core/sock.c:sock_def_write_space */ + if (!sock_writeable(sk)) + goto out; + } + + if (!test_and_clear_bit(SOCK_NOSPACE, &sock->flags)) + goto out; + + spin_lock_bh(&xprt->sock_lock); + if (xprt->snd_task) + rpc_wake_up_task(xprt->snd_task); + spin_unlock_bh(&xprt->sock_lock); +out: + read_unlock(&sk->sk_callback_lock); +} + +/* + * Set socket buffer length + */ +static void +xprt_sock_setbufsize(struct rpc_xprt *xprt) +{ + struct sock *sk = xprt->inet; + + if (xprt->stream) + return; + if (xprt->rcvsize) { + sk->sk_userlocks |= SOCK_RCVBUF_LOCK; + sk->sk_rcvbuf = xprt->rcvsize * xprt->max_reqs * 2; + } + if (xprt->sndsize) { + sk->sk_userlocks |= SOCK_SNDBUF_LOCK; + sk->sk_sndbuf = xprt->sndsize * xprt->max_reqs * 2; + sk->sk_write_space(sk); + } +} + +/* + * Bind to a reserved port + */ +static inline int xprt_bindresvport(struct rpc_xprt *xprt, struct socket *sock) +{ + struct sockaddr_in myaddr = { + .sin_family = AF_INET, + }; + int err, port; + + /* Were we already bound to a given port? Try to reuse it */ + port = xprt->port; + do { + myaddr.sin_port = htons(port); + err = sock->ops->bind(sock, (struct sockaddr *) &myaddr, + sizeof(myaddr)); + if (err == 0) { + xprt->port = port; + return 0; + } + if (--port == 0) + port = XPRT_MAX_RESVPORT; + } while (err == -EADDRINUSE && port != xprt->port); + + printk("RPC: Can't bind to reserved port (%d).\n", -err); + return err; +} + +static void +xprt_bind_socket(struct rpc_xprt *xprt, struct socket *sock) +{ + struct sock *sk = sock->sk; + + if (xprt->inet) + return; + + write_lock_bh(&sk->sk_callback_lock); + sk->sk_user_data = xprt; + xprt->old_data_ready = sk->sk_data_ready; + xprt->old_state_change = sk->sk_state_change; + xprt->old_write_space = sk->sk_write_space; + if (xprt->prot == IPPROTO_UDP) { + sk->sk_data_ready = udp_data_ready; + sk->sk_no_check = UDP_CSUM_NORCV; + xprt_set_connected(xprt); + } else { + tcp_sk(sk)->nonagle = 1; /* disable Nagle's algorithm */ + sk->sk_data_ready = tcp_data_ready; + sk->sk_state_change = tcp_state_change; + xprt_clear_connected(xprt); + } + sk->sk_write_space = xprt_write_space; + + /* Reset to new socket */ + xprt->sock = sock; + xprt->inet = sk; + write_unlock_bh(&sk->sk_callback_lock); + + return; +} + +/* + * Datastream sockets are created here, but xprt_connect will create + * and connect stream sockets. + */ +static struct socket * xprt_create_socket(struct rpc_xprt *xprt, int proto, int resvport) +{ + struct socket *sock; + int type, err; + + dprintk("RPC: xprt_create_socket(%s %d)\n", + (proto == IPPROTO_UDP)? "udp" : "tcp", proto); + + type = (proto == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM; + + if ((err = sock_create_kern(PF_INET, type, proto, &sock)) < 0) { + printk("RPC: can't create socket (%d).\n", -err); + return NULL; + } + + /* If the caller has the capability, bind to a reserved port */ + if (resvport && xprt_bindresvport(xprt, sock) < 0) { + printk("RPC: can't bind to reserved port.\n"); + goto failed; + } + + return sock; + +failed: + sock_release(sock); + return NULL; +} + +static void xprt_socket_connect(void *args) +{ + struct rpc_xprt *xprt = (struct rpc_xprt *)args; + struct socket *sock = xprt->sock; + int status = -EIO; + + if (xprt->shutdown || xprt->addr.sin_port == 0) + goto out; + + /* + * Start by resetting any existing state + */ + xprt_close(xprt); + sock = xprt_create_socket(xprt, xprt->prot, xprt->resvport); + if (sock == NULL) { + /* couldn't create socket or bind to reserved port; + * this is likely a permanent error, so cause an abort */ + goto out; + } + xprt_bind_socket(xprt, sock); + xprt_sock_setbufsize(xprt); + + status = 0; + if (!xprt->stream) + goto out; + + /* + * Tell the socket layer to start connecting... + */ + status = sock->ops->connect(sock, (struct sockaddr *) &xprt->addr, + sizeof(xprt->addr), O_NONBLOCK); + dprintk("RPC: %p connect status %d connected %d sock state %d\n", + xprt, -status, xprt_connected(xprt), sock->sk->sk_state); + if (status < 0) { + switch (status) { + case -EINPROGRESS: + case -EALREADY: + goto out_clear; + } + } +out: + if (status < 0) + rpc_wake_up_status(&xprt->pending, status); + else + rpc_wake_up(&xprt->pending); +out_clear: + smp_mb__before_clear_bit(); + clear_bit(XPRT_CONNECTING, &xprt->sockstate); + smp_mb__after_clear_bit(); +} + +static void +xprt_connect_sock(struct rpc_task *task) +{ + struct rpc_xprt *xprt = task->tk_xprt; + + if (!test_and_set_bit(XPRT_CONNECTING, &xprt->sockstate)) { + /* Note: if we are here due to a dropped connection + * we delay reconnecting by RPC_REESTABLISH_TIMEOUT/HZ + * seconds + */ + if (xprt->sock != NULL) + schedule_delayed_work(&xprt->sock_connect, + RPC_REESTABLISH_TIMEOUT); + else { + schedule_work(&xprt->sock_connect); + /* flush_scheduled_work can sleep... */ + if (!RPC_IS_ASYNC(task)) + flush_scheduled_work(); + } + } +} + +/* + * Set default timeout parameters + */ +static void +xprt_default_timeout(struct rpc_timeout *to, int proto) +{ + if (proto == IPPROTO_UDP) + xprt_set_timeout(to, 5, 5 * HZ); + else + xprt_set_timeout(to, 2, 60 * HZ); +} + +static struct rpc_xprt_ops xprt_socket_ops = { + .set_buffer_size = xprt_sock_setbufsize, + .connect = xprt_connect_sock, + .send_request = xprt_send_request, + .close = xprt_close, + .destroy = xprt_socket_destroy, +}; + +extern unsigned int xprt_udp_slot_table_entries; +extern unsigned int xprt_tcp_slot_table_entries; + +int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to) +{ + size_t slot_table_size; + + dprintk("RPC: setting up udp-ipv4 transport...\n"); + + xprt->max_reqs = xprt_udp_slot_table_entries; + slot_table_size = xprt->max_reqs * sizeof(xprt->slot[0]); + xprt->slot = kmalloc(slot_table_size, GFP_KERNEL); + if (xprt->slot == NULL) + return -ENOMEM; + memset(xprt->slot, 0, slot_table_size); + + xprt->prot = IPPROTO_UDP; + xprt->port = XPRT_MAX_RESVPORT; + xprt->stream = 0; + xprt->nocong = 0; + xprt->cwnd = RPC_INITCWND; + xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0; + /* XXX: header size can vary due to auth type, IPv6, etc. */ + xprt->max_payload = (1U << 16) - (MAX_HEADER << 3); + + INIT_WORK(&xprt->sock_connect, xprt_socket_connect, xprt); + + xprt->ops = &xprt_socket_ops; + + if (to) + xprt->timeout = *to; + else + xprt_default_timeout(to, xprt->prot); + + return 0; +} + +int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to) +{ + size_t slot_table_size; + + dprintk("RPC: setting up tcp-ipv4 transport...\n"); + + xprt->max_reqs = xprt_tcp_slot_table_entries; + slot_table_size = xprt->max_reqs * sizeof(xprt->slot[0]); + xprt->slot = kmalloc(slot_table_size, GFP_KERNEL); + if (xprt->slot == NULL) + return -ENOMEM; + memset(xprt->slot, 0, slot_table_size); + + xprt->prot = IPPROTO_TCP; + xprt->port = XPRT_MAX_RESVPORT; + xprt->stream = 1; + xprt->nocong = 1; + xprt->cwnd = RPC_MAXCWND(xprt); + xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0; + xprt->max_payload = (1U << 31) - 1; + + INIT_WORK(&xprt->sock_connect, xprt_socket_connect, xprt); + + xprt->ops = &xprt_socket_ops; + + if (to) + xprt->timeout = *to; + else + xprt_default_timeout(to, xprt->prot); + + return 0; +} -- cgit v1.2.3 From 9903cd1c27a1f30e8efea75e125be3b2002f7cb9 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 11 Aug 2005 16:25:26 -0400 Subject: [PATCH] RPC: transport switch function naming Introduce block header comments and a function naming convention to the socket transport implementation. Provide a debug setting for transports that is separate from RPCDBG_XPRT. Eliminate xprt_default_timeout(). Provide block comments for exposed interfaces in xprt.c, and eliminate the useless obvious comments. Convert printk's to dprintk's. Test-plan: Compile kernel with CONFIG_NFS enabled. Version: Thu, 11 Aug 2005 16:04:04 -0400 Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/debug.h | 1 + net/sunrpc/xprt.c | 147 +++++++------- net/sunrpc/xprtsock.c | 464 ++++++++++++++++++++++--------------------- 3 files changed, 311 insertions(+), 301 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/debug.h b/include/linux/sunrpc/debug.h index eadb31e3c198..42d299747956 100644 --- a/include/linux/sunrpc/debug.h +++ b/include/linux/sunrpc/debug.h @@ -32,6 +32,7 @@ #define RPCDBG_AUTH 0x0010 #define RPCDBG_PMAP 0x0020 #define RPCDBG_SCHED 0x0040 +#define RPCDBG_TRANS 0x0080 #define RPCDBG_SVCSOCK 0x0100 #define RPCDBG_SVCDSP 0x0200 #define RPCDBG_MISC 0x0400 diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 4342acf4d1cd..589195e630ef 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -227,9 +227,6 @@ xprt_adjust_cwnd(struct rpc_xprt *xprt, int result) xprt->cwnd = cwnd; } -/* - * Reset the major timeout value - */ static void xprt_reset_majortimeo(struct rpc_rqst *req) { struct rpc_timeout *to = &req->rq_xprt->timeout; @@ -244,8 +241,10 @@ static void xprt_reset_majortimeo(struct rpc_rqst *req) req->rq_majortimeo += jiffies; } -/* - * Adjust timeout values etc for next retransmit +/** + * xprt_adjust_timeout - adjust timeout values for next retransmit + * @req: RPC request containing parameters to use for the adjustment + * */ int xprt_adjust_timeout(struct rpc_rqst *req) { @@ -291,8 +290,10 @@ xprt_socket_autoclose(void *args) xprt_release_write(xprt, NULL); } -/* - * Mark a transport as disconnected +/** + * xprt_disconnect - mark a transport as disconnected + * @xprt: transport to flag for disconnect + * */ void xprt_disconnect(struct rpc_xprt *xprt) { @@ -303,9 +304,6 @@ void xprt_disconnect(struct rpc_xprt *xprt) spin_unlock_bh(&xprt->sock_lock); } -/* - * Used to allow disconnection when we've been idle - */ static void xprt_init_autodisconnect(unsigned long data) { @@ -327,8 +325,9 @@ out_abort: spin_unlock(&xprt->sock_lock); } -/* - * Attempt to connect a TCP socket. +/** + * xprt_connect - schedule a transport connect operation + * @task: RPC task that is requesting the connect * */ void xprt_connect(struct rpc_task *task) @@ -361,11 +360,7 @@ void xprt_connect(struct rpc_task *task) return; } -/* - * We arrive here when awoken from waiting on connection establishment. - */ -static void -xprt_connect_status(struct rpc_task *task) +static void xprt_connect_status(struct rpc_task *task) { struct rpc_xprt *xprt = task->tk_xprt; @@ -404,8 +399,11 @@ xprt_connect_status(struct rpc_task *task) } } -/* - * Look up the RPC request corresponding to a reply, and then lock it. +/** + * xprt_lookup_rqst - find an RPC request corresponding to an XID + * @xprt: transport on which the original request was transmitted + * @xid: RPC XID of incoming reply + * */ struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, u32 xid) { @@ -422,9 +420,12 @@ struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, u32 xid) return req; } -/* - * Complete reply received. - * The TCP code relies on us to remove the request from xprt->pending. +/** + * xprt_complete_rqst - called when reply processing is complete + * @xprt: controlling transport + * @req: RPC request that just completed + * @copied: actual number of bytes received from the transport + * */ void xprt_complete_rqst(struct rpc_xprt *xprt, struct rpc_rqst *req, int copied) { @@ -498,12 +499,12 @@ out: spin_unlock(&xprt->sock_lock); } -/* - * Place the actual RPC call. - * We have to copy the iovec because sendmsg fiddles with its contents. +/** + * xprt_prepare_transmit - reserve the transport before sending a request + * @task: RPC task about to send a request + * */ -int -xprt_prepare_transmit(struct rpc_task *task) +int xprt_prepare_transmit(struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; @@ -533,8 +534,13 @@ out_unlock: return err; } -void -xprt_transmit(struct rpc_task *task) +/** + * xprt_transmit - send an RPC request on a transport + * @task: controlling RPC task + * + * We have to copy the iovec because sendmsg fiddles with its contents. + */ +void xprt_transmit(struct rpc_task *task) { struct rpc_clnt *clnt = task->tk_client; struct rpc_rqst *req = task->tk_rqstp; @@ -604,11 +610,7 @@ xprt_transmit(struct rpc_task *task) spin_unlock_bh(&xprt->sock_lock); } -/* - * Reserve an RPC call slot. - */ -static inline void -do_xprt_reserve(struct rpc_task *task) +static inline void do_xprt_reserve(struct rpc_task *task) { struct rpc_xprt *xprt = task->tk_xprt; @@ -628,8 +630,14 @@ do_xprt_reserve(struct rpc_task *task) rpc_sleep_on(&xprt->backlog, task, NULL, NULL); } -void -xprt_reserve(struct rpc_task *task) +/** + * xprt_reserve - allocate an RPC request slot + * @task: RPC task requesting a slot allocation + * + * If no more slots are available, place the task on the transport's + * backlog queue. + */ +void xprt_reserve(struct rpc_task *task) { struct rpc_xprt *xprt = task->tk_xprt; @@ -641,9 +649,6 @@ xprt_reserve(struct rpc_task *task) } } -/* - * Allocate a 'unique' XID - */ static inline u32 xprt_alloc_xid(struct rpc_xprt *xprt) { return xprt->xid++; @@ -654,11 +659,7 @@ static inline void xprt_init_xid(struct rpc_xprt *xprt) get_random_bytes(&xprt->xid, sizeof(xprt->xid)); } -/* - * Initialize RPC request - */ -static void -xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt) +static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt) { struct rpc_rqst *req = task->tk_rqstp; @@ -670,11 +671,12 @@ xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt) req, ntohl(req->rq_xid)); } -/* - * Release an RPC call slot +/** + * xprt_release - release an RPC request slot + * @task: task which is finished with the slot + * */ -void -xprt_release(struct rpc_task *task) +void xprt_release(struct rpc_task *task) { struct rpc_xprt *xprt = task->tk_xprt; struct rpc_rqst *req; @@ -702,11 +704,14 @@ xprt_release(struct rpc_task *task) spin_unlock(&xprt->xprt_lock); } -/* - * Set constant timeout +/** + * xprt_set_timeout - set constant RPC timeout + * @to: RPC timeout parameters to set up + * @retr: number of retries + * @incr: amount of increase after each retry + * */ -void -xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long incr) +void xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long incr) { to->to_initval = to->to_increment = incr; @@ -715,11 +720,7 @@ xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long incr) to->to_exponential = 0; } -/* - * Initialize an RPC client - */ -static struct rpc_xprt * -xprt_setup(int proto, struct sockaddr_in *ap, struct rpc_timeout *to) +static struct rpc_xprt *xprt_setup(int proto, struct sockaddr_in *ap, struct rpc_timeout *to) { int result; struct rpc_xprt *xprt; @@ -778,11 +779,14 @@ xprt_setup(int proto, struct sockaddr_in *ap, struct rpc_timeout *to) return xprt; } -/* - * Create an RPC client transport given the protocol and peer address. +/** + * xprt_create_proto - create an RPC client transport + * @proto: requested transport protocol + * @sap: remote peer's address + * @to: timeout parameters for new transport + * */ -struct rpc_xprt * -xprt_create_proto(int proto, struct sockaddr_in *sap, struct rpc_timeout *to) +struct rpc_xprt *xprt_create_proto(int proto, struct sockaddr_in *sap, struct rpc_timeout *to) { struct rpc_xprt *xprt; @@ -794,11 +798,7 @@ xprt_create_proto(int proto, struct sockaddr_in *sap, struct rpc_timeout *to) return xprt; } -/* - * Prepare for transport shutdown. - */ -static void -xprt_shutdown(struct rpc_xprt *xprt) +static void xprt_shutdown(struct rpc_xprt *xprt) { xprt->shutdown = 1; rpc_wake_up(&xprt->sending); @@ -809,21 +809,18 @@ xprt_shutdown(struct rpc_xprt *xprt) del_timer_sync(&xprt->timer); } -/* - * Clear the xprt backlog queue - */ -static int -xprt_clear_backlog(struct rpc_xprt *xprt) { +static int xprt_clear_backlog(struct rpc_xprt *xprt) { rpc_wake_up_next(&xprt->backlog); wake_up(&xprt->cong_wait); return 1; } -/* - * Destroy an RPC transport, killing off all requests. +/** + * xprt_destroy - destroy an RPC transport, killing off all requests. + * @xprt: transport to destroy + * */ -int -xprt_destroy(struct rpc_xprt *xprt) +int xprt_destroy(struct rpc_xprt *xprt) { dprintk("RPC: destroying transport %p\n", xprt); xprt_shutdown(xprt); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index fa1180ac4823..80222de3afa4 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -33,23 +33,21 @@ #include #include +/* + * Maximum port number to use when requesting a reserved port. + */ +#define XS_MAX_RESVPORT (800U) + #ifdef RPC_DEBUG # undef RPC_DEBUG_DATA -# define RPCDBG_FACILITY RPCDBG_XPRT +# define RPCDBG_FACILITY RPCDBG_TRANS #endif -#define XPRT_MAX_RESVPORT (800) - #ifdef RPC_DEBUG_DATA -/* - * Print the buffer contents (first 128 bytes only--just enough for - * diropres return). - */ -static void -xprt_pktdump(char *msg, u32 *packet, unsigned int count) +static void xs_pktdump(char *msg, u32 *packet, unsigned int count) { - u8 *buf = (u8 *) packet; - int j; + u8 *buf = (u8 *) packet; + int j; dprintk("RPC: %s\n", msg); for (j = 0; j < count && j < 128; j += 4) { @@ -64,25 +62,22 @@ xprt_pktdump(char *msg, u32 *packet, unsigned int count) dprintk("\n"); } #else -static inline void -xprt_pktdump(char *msg, u32 *packet, unsigned int count) +static inline void xs_pktdump(char *msg, u32 *packet, unsigned int count) { /* NOP */ } #endif -/* - * Look up RPC transport given an INET socket +/** + * xs_sendpages - write pages directly to a socket + * @sock: socket to send on + * @addr: UDP only -- address of destination + * @addrlen: UDP only -- length of destination address + * @xdr: buffer containing this request + * @base: starting position in the buffer + * */ -static inline struct rpc_xprt * -xprt_from_sock(struct sock *sk) -{ - return (struct rpc_xprt *) sk->sk_user_data; -} - -static int -xdr_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, - struct xdr_buf *xdr, unsigned int base, int msgflags) +static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base, int msgflags) { struct page **ppage = xdr->pages; unsigned int len, pglen = xdr->page_len; @@ -125,7 +120,7 @@ xdr_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, } if (base || xdr->page_base) { pglen -= base; - base += xdr->page_base; + base += xdr->page_base; ppage += base >> PAGE_CACHE_SHIFT; base &= ~PAGE_CACHE_MASK; } @@ -176,23 +171,25 @@ out: return ret; } -/* - * Write data to socket. +/** + * xs_sendmsg - write an RPC request to a socket + * @xprt: generic transport + * @req: the RPC request to write + * */ -static inline int -xprt_sendmsg(struct rpc_xprt *xprt, struct rpc_rqst *req) +static int xs_sendmsg(struct rpc_xprt *xprt, struct rpc_rqst *req) { - struct socket *sock = xprt->sock; - struct xdr_buf *xdr = &req->rq_snd_buf; + struct socket *sock = xprt->sock; + struct xdr_buf *xdr = &req->rq_snd_buf; struct sockaddr *addr = NULL; int addrlen = 0; - unsigned int skip; - int result; + unsigned int skip; + int result; if (!sock) return -ENOTCONN; - xprt_pktdump("packet data:", + xs_pktdump("packet data:", req->rq_svec->iov_base, req->rq_svec->iov_len); @@ -201,13 +198,13 @@ xprt_sendmsg(struct rpc_xprt *xprt, struct rpc_rqst *req) addr = (struct sockaddr *) &xprt->addr; addrlen = sizeof(xprt->addr); } - /* Dont repeat bytes */ + /* Don't repeat bytes */ skip = req->rq_bytes_sent; clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags); - result = xdr_sendpages(sock, addr, addrlen, xdr, skip, MSG_DONTWAIT); + result = xs_sendpages(sock, addr, addrlen, xdr, skip, MSG_DONTWAIT); - dprintk("RPC: xprt_sendmsg(%d) = %d\n", xdr->len - skip, result); + dprintk("RPC: xs_sendmsg(%d) = %d\n", xdr->len - skip, result); if (result >= 0) return result; @@ -215,8 +212,7 @@ xprt_sendmsg(struct rpc_xprt *xprt, struct rpc_rqst *req) switch (result) { case -ECONNREFUSED: /* When the server has died, an ICMP port unreachable message - * prompts ECONNREFUSED. - */ + * prompts ECONNREFUSED. */ case -EAGAIN: break; case -ECONNRESET: @@ -227,13 +223,25 @@ xprt_sendmsg(struct rpc_xprt *xprt, struct rpc_rqst *req) result = -ENOTCONN; break; default: - printk(KERN_NOTICE "RPC: sendmsg returned error %d\n", -result); + break; } return result; } -static int -xprt_send_request(struct rpc_task *task) +/** + * xs_send_request - write an RPC request to a socket + * @task: address of RPC task that manages the state of an RPC request + * + * Return values: + * 0: The request has been sent + * EAGAIN: The socket was blocked, please call again later to + * complete the request + * other: Some other error occured, the request was not sent + * + * XXX: In the case of soft timeouts, should we eventually give up + * if the socket is not able to make progress? + */ +static int xs_send_request(struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; @@ -242,18 +250,18 @@ xprt_send_request(struct rpc_task *task) /* set up everything as needed. */ /* Write the record marker */ if (xprt->stream) { - u32 *marker = req->rq_svec[0].iov_base; + u32 *marker = req->rq_svec[0].iov_base; *marker = htonl(0x80000000|(req->rq_slen-sizeof(*marker))); } /* Continue transmitting the packet/record. We must be careful * to cope with writespace callbacks arriving _after_ we have - * called xprt_sendmsg(). + * called sendmsg(). */ while (1) { req->rq_xtime = jiffies; - status = xprt_sendmsg(xprt, req); + status = xs_sendmsg(xprt, req); if (status < 0) break; @@ -285,7 +293,7 @@ xprt_send_request(struct rpc_task *task) if (status == -EAGAIN) { if (test_bit(SOCK_ASYNC_NOSPACE, &xprt->sock->flags)) { - /* Protect against races with xprt_write_space */ + /* Protect against races with xs_write_space */ spin_lock_bh(&xprt->sock_lock); /* Don't race with disconnect */ if (!xprt_connected(xprt)) @@ -303,65 +311,77 @@ xprt_send_request(struct rpc_task *task) return status; } -/* - * Close down a transport socket +/** + * xs_close - close a socket + * @xprt: transport + * */ -static void -xprt_close(struct rpc_xprt *xprt) +static void xs_close(struct rpc_xprt *xprt) { - struct socket *sock = xprt->sock; - struct sock *sk = xprt->inet; + struct socket *sock = xprt->sock; + struct sock *sk = xprt->inet; if (!sk) return; + dprintk("RPC: xs_close xprt %p\n", xprt); + write_lock_bh(&sk->sk_callback_lock); xprt->inet = NULL; xprt->sock = NULL; - sk->sk_user_data = NULL; - sk->sk_data_ready = xprt->old_data_ready; + sk->sk_user_data = NULL; + sk->sk_data_ready = xprt->old_data_ready; sk->sk_state_change = xprt->old_state_change; - sk->sk_write_space = xprt->old_write_space; + sk->sk_write_space = xprt->old_write_space; write_unlock_bh(&sk->sk_callback_lock); - sk->sk_no_check = 0; + sk->sk_no_check = 0; sock_release(sock); } -static void xprt_socket_destroy(struct rpc_xprt *xprt) +/** + * xs_destroy - prepare to shutdown a transport + * @xprt: doomed transport + * + */ +static void xs_destroy(struct rpc_xprt *xprt) { + dprintk("RPC: xs_destroy xprt %p\n", xprt); + cancel_delayed_work(&xprt->sock_connect); flush_scheduled_work(); xprt_disconnect(xprt); - xprt_close(xprt); + xs_close(xprt); kfree(xprt->slot); } -/* - * Input handler for RPC replies. Called from a bottom half and hence - * atomic. +static inline struct rpc_xprt *xprt_from_sock(struct sock *sk) +{ + return (struct rpc_xprt *) sk->sk_user_data; +} + +/** + * xs_udp_data_ready - "data ready" callback for UDP sockets + * @sk: socket with data to read + * @len: how much data to read + * */ -static void -udp_data_ready(struct sock *sk, int len) +static void xs_udp_data_ready(struct sock *sk, int len) { - struct rpc_task *task; - struct rpc_xprt *xprt; + struct rpc_task *task; + struct rpc_xprt *xprt; struct rpc_rqst *rovr; - struct sk_buff *skb; + struct sk_buff *skb; int err, repsize, copied; u32 _xid, *xp; read_lock(&sk->sk_callback_lock); - dprintk("RPC: udp_data_ready...\n"); - if (!(xprt = xprt_from_sock(sk))) { - printk("RPC: udp_data_ready request not found!\n"); + dprintk("RPC: xs_udp_data_ready...\n"); + if (!(xprt = xprt_from_sock(sk))) goto out; - } - - dprintk("RPC: udp_data_ready client %p\n", xprt); if ((skb = skb_recv_datagram(sk, 0, 1, &err)) == NULL) goto out; @@ -371,7 +391,7 @@ udp_data_ready(struct sock *sk, int len) repsize = skb->len - sizeof(struct udphdr); if (repsize < 4) { - printk("RPC: impossible RPC reply size %d!\n", repsize); + dprintk("RPC: impossible RPC reply size %d!\n", repsize); goto dropit; } @@ -410,11 +430,7 @@ udp_data_ready(struct sock *sk, int len) read_unlock(&sk->sk_callback_lock); } -/* - * Copy from an skb into memory and shrink the skb. - */ -static inline size_t -tcp_copy_data(skb_reader_t *desc, void *p, size_t len) +static inline size_t xs_tcp_copy_data(skb_reader_t *desc, void *p, size_t len) { if (len > desc->count) len = desc->count; @@ -430,18 +446,14 @@ tcp_copy_data(skb_reader_t *desc, void *p, size_t len) return len; } -/* - * TCP read fragment marker - */ -static inline void -tcp_read_fraghdr(struct rpc_xprt *xprt, skb_reader_t *desc) +static inline void xs_tcp_read_fraghdr(struct rpc_xprt *xprt, skb_reader_t *desc) { size_t len, used; char *p; p = ((char *) &xprt->tcp_recm) + xprt->tcp_offset; len = sizeof(xprt->tcp_recm) - xprt->tcp_offset; - used = tcp_copy_data(desc, p, len); + used = xs_tcp_copy_data(desc, p, len); xprt->tcp_offset += used; if (used != len) return; @@ -455,15 +467,15 @@ tcp_read_fraghdr(struct rpc_xprt *xprt, skb_reader_t *desc) xprt->tcp_offset = 0; /* Sanity check of the record length */ if (xprt->tcp_reclen < 4) { - printk(KERN_ERR "RPC: Invalid TCP record fragment length\n"); + dprintk("RPC: invalid TCP record fragment length\n"); xprt_disconnect(xprt); + return; } dprintk("RPC: reading TCP record fragment of length %d\n", xprt->tcp_reclen); } -static void -tcp_check_recm(struct rpc_xprt *xprt) +static void xs_tcp_check_recm(struct rpc_xprt *xprt) { dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u, tcp_flags = %lx\n", xprt, xprt->tcp_copied, xprt->tcp_offset, xprt->tcp_reclen, xprt->tcp_flags); @@ -478,11 +490,7 @@ tcp_check_recm(struct rpc_xprt *xprt) } } -/* - * TCP read xid - */ -static inline void -tcp_read_xid(struct rpc_xprt *xprt, skb_reader_t *desc) +static inline void xs_tcp_read_xid(struct rpc_xprt *xprt, skb_reader_t *desc) { size_t len, used; char *p; @@ -490,7 +498,7 @@ tcp_read_xid(struct rpc_xprt *xprt, skb_reader_t *desc) len = sizeof(xprt->tcp_xid) - xprt->tcp_offset; dprintk("RPC: reading XID (%Zu bytes)\n", len); p = ((char *) &xprt->tcp_xid) + xprt->tcp_offset; - used = tcp_copy_data(desc, p, len); + used = xs_tcp_copy_data(desc, p, len); xprt->tcp_offset += used; if (used != len) return; @@ -499,14 +507,10 @@ tcp_read_xid(struct rpc_xprt *xprt, skb_reader_t *desc) xprt->tcp_copied = 4; dprintk("RPC: reading reply for XID %08x\n", ntohl(xprt->tcp_xid)); - tcp_check_recm(xprt); + xs_tcp_check_recm(xprt); } -/* - * TCP read and complete request - */ -static inline void -tcp_read_request(struct rpc_xprt *xprt, skb_reader_t *desc) +static inline void xs_tcp_read_request(struct rpc_xprt *xprt, skb_reader_t *desc) { struct rpc_rqst *req; struct xdr_buf *rcvbuf; @@ -533,12 +537,12 @@ tcp_read_request(struct rpc_xprt *xprt, skb_reader_t *desc) memcpy(&my_desc, desc, sizeof(my_desc)); my_desc.count = len; r = xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied, - &my_desc, tcp_copy_data); + &my_desc, xs_tcp_copy_data); desc->count -= r; desc->offset += r; } else r = xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied, - desc, tcp_copy_data); + desc, xs_tcp_copy_data); if (r > 0) { xprt->tcp_copied += r; @@ -581,14 +585,10 @@ out: xprt_complete_rqst(xprt, req, xprt->tcp_copied); } spin_unlock(&xprt->sock_lock); - tcp_check_recm(xprt); + xs_tcp_check_recm(xprt); } -/* - * TCP discard extra bytes from a short read - */ -static inline void -tcp_read_discard(struct rpc_xprt *xprt, skb_reader_t *desc) +static inline void xs_tcp_read_discard(struct rpc_xprt *xprt, skb_reader_t *desc) { size_t len; @@ -599,16 +599,10 @@ tcp_read_discard(struct rpc_xprt *xprt, skb_reader_t *desc) desc->offset += len; xprt->tcp_offset += len; dprintk("RPC: discarded %Zu bytes\n", len); - tcp_check_recm(xprt); + xs_tcp_check_recm(xprt); } -/* - * TCP record receive routine - * We first have to grab the record marker, then the XID, then the data. - */ -static int -tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, - unsigned int offset, size_t len) +static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, unsigned int offset, size_t len) { struct rpc_xprt *xprt = rd_desc->arg.data; skb_reader_t desc = { @@ -616,64 +610,72 @@ tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, .offset = offset, .count = len, .csum = 0 - }; + }; - dprintk("RPC: tcp_data_recv\n"); + dprintk("RPC: xs_tcp_data_recv started\n"); do { /* Read in a new fragment marker if necessary */ /* Can we ever really expect to get completely empty fragments? */ if (xprt->tcp_flags & XPRT_COPY_RECM) { - tcp_read_fraghdr(xprt, &desc); + xs_tcp_read_fraghdr(xprt, &desc); continue; } /* Read in the xid if necessary */ if (xprt->tcp_flags & XPRT_COPY_XID) { - tcp_read_xid(xprt, &desc); + xs_tcp_read_xid(xprt, &desc); continue; } /* Read in the request data */ if (xprt->tcp_flags & XPRT_COPY_DATA) { - tcp_read_request(xprt, &desc); + xs_tcp_read_request(xprt, &desc); continue; } /* Skip over any trailing bytes on short reads */ - tcp_read_discard(xprt, &desc); + xs_tcp_read_discard(xprt, &desc); } while (desc.count); - dprintk("RPC: tcp_data_recv done\n"); + dprintk("RPC: xs_tcp_data_recv done\n"); return len - desc.count; } -static void tcp_data_ready(struct sock *sk, int bytes) +/** + * xs_tcp_data_ready - "data ready" callback for TCP sockets + * @sk: socket with data to read + * @bytes: how much data to read + * + */ +static void xs_tcp_data_ready(struct sock *sk, int bytes) { struct rpc_xprt *xprt; read_descriptor_t rd_desc; read_lock(&sk->sk_callback_lock); - dprintk("RPC: tcp_data_ready...\n"); - if (!(xprt = xprt_from_sock(sk))) { - printk("RPC: tcp_data_ready socket info not found!\n"); + dprintk("RPC: xs_tcp_data_ready...\n"); + if (!(xprt = xprt_from_sock(sk))) goto out; - } if (xprt->shutdown) goto out; - /* We use rd_desc to pass struct xprt to tcp_data_recv */ + /* We use rd_desc to pass struct xprt to xs_tcp_data_recv */ rd_desc.arg.data = xprt; rd_desc.count = 65536; - tcp_read_sock(sk, &rd_desc, tcp_data_recv); + tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv); out: read_unlock(&sk->sk_callback_lock); } -static void -tcp_state_change(struct sock *sk) +/** + * xs_tcp_state_change - callback to handle TCP socket state changes + * @sk: socket whose state has changed + * + */ +static void xs_tcp_state_change(struct sock *sk) { - struct rpc_xprt *xprt; + struct rpc_xprt *xprt; read_lock(&sk->sk_callback_lock); if (!(xprt = xprt_from_sock(sk))) goto out; - dprintk("RPC: tcp_state_change client %p...\n", xprt); + dprintk("RPC: xs_tcp_state_change client %p...\n", xprt); dprintk("RPC: state %x conn %d dead %d zapped %d\n", sk->sk_state, xprt_connected(xprt), sock_flag(sk, SOCK_DEAD), @@ -703,17 +705,20 @@ tcp_state_change(struct sock *sk) read_unlock(&sk->sk_callback_lock); } -/* +/** + * xs_write_space - callback invoked when socket buffer space becomes + * available + * @sk: socket whose state has changed + * * Called when more output buffer space is available for this socket. * We try not to wake our writers until they can make "significant" * progress, otherwise we'll waste resources thrashing sock_sendmsg * with a bunch of small requests. */ -static void -xprt_write_space(struct sock *sk) +static void xs_write_space(struct sock *sk) { - struct rpc_xprt *xprt; - struct socket *sock; + struct rpc_xprt *xprt; + struct socket *sock; read_lock(&sk->sk_callback_lock); if (!(xprt = xprt_from_sock(sk)) || !(sock = sk->sk_socket)) @@ -743,11 +748,15 @@ out: read_unlock(&sk->sk_callback_lock); } -/* - * Set socket buffer length +/** + * xs_set_buffer_size - set send and receive limits + * @xprt: generic transport + * + * Set socket send and receive limits based on the + * sndsize and rcvsize fields in the generic transport + * structure. This applies only to UDP sockets. */ -static void -xprt_sock_setbufsize(struct rpc_xprt *xprt) +static void xs_set_buffer_size(struct rpc_xprt *xprt) { struct sock *sk = xprt->inet; @@ -764,15 +773,12 @@ xprt_sock_setbufsize(struct rpc_xprt *xprt) } } -/* - * Bind to a reserved port - */ -static inline int xprt_bindresvport(struct rpc_xprt *xprt, struct socket *sock) +static int xs_bindresvport(struct rpc_xprt *xprt, struct socket *sock) { struct sockaddr_in myaddr = { .sin_family = AF_INET, }; - int err, port; + int err, port; /* Were we already bound to a given port? Try to reuse it */ port = xprt->port; @@ -782,20 +788,47 @@ static inline int xprt_bindresvport(struct rpc_xprt *xprt, struct socket *sock) sizeof(myaddr)); if (err == 0) { xprt->port = port; + dprintk("RPC: xs_bindresvport bound to port %u\n", + port); return 0; } if (--port == 0) - port = XPRT_MAX_RESVPORT; + port = XS_MAX_RESVPORT; } while (err == -EADDRINUSE && port != xprt->port); - printk("RPC: Can't bind to reserved port (%d).\n", -err); + dprintk("RPC: can't bind to reserved port (%d).\n", -err); return err; } -static void -xprt_bind_socket(struct rpc_xprt *xprt, struct socket *sock) +static struct socket *xs_create(struct rpc_xprt *xprt, int proto, int resvport) { - struct sock *sk = sock->sk; + struct socket *sock; + int type, err; + + dprintk("RPC: xs_create(%s %d)\n", + (proto == IPPROTO_UDP)? "udp" : "tcp", proto); + + type = (proto == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM; + + if ((err = sock_create_kern(PF_INET, type, proto, &sock)) < 0) { + dprintk("RPC: can't create socket (%d).\n", -err); + return NULL; + } + + /* If the caller has the capability, bind to a reserved port */ + if (resvport && xs_bindresvport(xprt, sock) < 0) + goto failed; + + return sock; + +failed: + sock_release(sock); + return NULL; +} + +static void xs_bind(struct rpc_xprt *xprt, struct socket *sock) +{ + struct sock *sk = sock->sk; if (xprt->inet) return; @@ -806,16 +839,16 @@ xprt_bind_socket(struct rpc_xprt *xprt, struct socket *sock) xprt->old_state_change = sk->sk_state_change; xprt->old_write_space = sk->sk_write_space; if (xprt->prot == IPPROTO_UDP) { - sk->sk_data_ready = udp_data_ready; + sk->sk_data_ready = xs_udp_data_ready; sk->sk_no_check = UDP_CSUM_NORCV; xprt_set_connected(xprt); } else { tcp_sk(sk)->nonagle = 1; /* disable Nagle's algorithm */ - sk->sk_data_ready = tcp_data_ready; - sk->sk_state_change = tcp_state_change; + sk->sk_data_ready = xs_tcp_data_ready; + sk->sk_state_change = xs_tcp_state_change; xprt_clear_connected(xprt); } - sk->sk_write_space = xprt_write_space; + sk->sk_write_space = xs_write_space; /* Reset to new socket */ xprt->sock = sock; @@ -825,39 +858,13 @@ xprt_bind_socket(struct rpc_xprt *xprt, struct socket *sock) return; } -/* - * Datastream sockets are created here, but xprt_connect will create - * and connect stream sockets. +/** + * xs_connect_worker - try to connect a socket to a remote endpoint + * @args: RPC transport to connect + * + * Invoked by a work queue tasklet. */ -static struct socket * xprt_create_socket(struct rpc_xprt *xprt, int proto, int resvport) -{ - struct socket *sock; - int type, err; - - dprintk("RPC: xprt_create_socket(%s %d)\n", - (proto == IPPROTO_UDP)? "udp" : "tcp", proto); - - type = (proto == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM; - - if ((err = sock_create_kern(PF_INET, type, proto, &sock)) < 0) { - printk("RPC: can't create socket (%d).\n", -err); - return NULL; - } - - /* If the caller has the capability, bind to a reserved port */ - if (resvport && xprt_bindresvport(xprt, sock) < 0) { - printk("RPC: can't bind to reserved port.\n"); - goto failed; - } - - return sock; - -failed: - sock_release(sock); - return NULL; -} - -static void xprt_socket_connect(void *args) +static void xs_connect_worker(void *args) { struct rpc_xprt *xprt = (struct rpc_xprt *)args; struct socket *sock = xprt->sock; @@ -866,18 +873,20 @@ static void xprt_socket_connect(void *args) if (xprt->shutdown || xprt->addr.sin_port == 0) goto out; + dprintk("RPC: xs_connect_worker xprt %p\n", xprt); + /* * Start by resetting any existing state */ - xprt_close(xprt); - sock = xprt_create_socket(xprt, xprt->prot, xprt->resvport); + xs_close(xprt); + sock = xs_create(xprt, xprt->prot, xprt->resvport); if (sock == NULL) { /* couldn't create socket or bind to reserved port; * this is likely a permanent error, so cause an abort */ goto out; } - xprt_bind_socket(xprt, sock); - xprt_sock_setbufsize(xprt); + xs_bind(xprt, sock); + xs_set_buffer_size(xprt); status = 0; if (!xprt->stream) @@ -908,20 +917,23 @@ out_clear: smp_mb__after_clear_bit(); } -static void -xprt_connect_sock(struct rpc_task *task) +/** + * xs_connect - connect a socket to a remote endpoint + * @task: address of RPC task that manages state of connect request + * + * TCP: If the remote end dropped the connection, delay reconnecting. + */ +static void xs_connect(struct rpc_task *task) { struct rpc_xprt *xprt = task->tk_xprt; if (!test_and_set_bit(XPRT_CONNECTING, &xprt->sockstate)) { - /* Note: if we are here due to a dropped connection - * we delay reconnecting by RPC_REESTABLISH_TIMEOUT/HZ - * seconds - */ - if (xprt->sock != NULL) + if (xprt->sock != NULL) { + dprintk("RPC: xs_connect delayed xprt %p\n", xprt); schedule_delayed_work(&xprt->sock_connect, RPC_REESTABLISH_TIMEOUT); - else { + } else { + dprintk("RPC: xs_connect scheduled xprt %p\n", xprt); schedule_work(&xprt->sock_connect); /* flush_scheduled_work can sleep... */ if (!RPC_IS_ASYNC(task)) @@ -930,29 +942,23 @@ xprt_connect_sock(struct rpc_task *task) } } -/* - * Set default timeout parameters - */ -static void -xprt_default_timeout(struct rpc_timeout *to, int proto) -{ - if (proto == IPPROTO_UDP) - xprt_set_timeout(to, 5, 5 * HZ); - else - xprt_set_timeout(to, 2, 60 * HZ); -} - -static struct rpc_xprt_ops xprt_socket_ops = { - .set_buffer_size = xprt_sock_setbufsize, - .connect = xprt_connect_sock, - .send_request = xprt_send_request, - .close = xprt_close, - .destroy = xprt_socket_destroy, +static struct rpc_xprt_ops xs_ops = { + .set_buffer_size = xs_set_buffer_size, + .connect = xs_connect, + .send_request = xs_send_request, + .close = xs_close, + .destroy = xs_destroy, }; extern unsigned int xprt_udp_slot_table_entries; extern unsigned int xprt_tcp_slot_table_entries; +/** + * xs_setup_udp - Set up transport to use a UDP socket + * @xprt: transport to set up + * @to: timeout parameters + * + */ int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to) { size_t slot_table_size; @@ -967,7 +973,7 @@ int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to) memset(xprt->slot, 0, slot_table_size); xprt->prot = IPPROTO_UDP; - xprt->port = XPRT_MAX_RESVPORT; + xprt->port = XS_MAX_RESVPORT; xprt->stream = 0; xprt->nocong = 0; xprt->cwnd = RPC_INITCWND; @@ -975,18 +981,24 @@ int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to) /* XXX: header size can vary due to auth type, IPv6, etc. */ xprt->max_payload = (1U << 16) - (MAX_HEADER << 3); - INIT_WORK(&xprt->sock_connect, xprt_socket_connect, xprt); + INIT_WORK(&xprt->sock_connect, xs_connect_worker, xprt); - xprt->ops = &xprt_socket_ops; + xprt->ops = &xs_ops; if (to) xprt->timeout = *to; else - xprt_default_timeout(to, xprt->prot); + xprt_set_timeout(&xprt->timeout, 5, 5 * HZ); return 0; } +/** + * xs_setup_tcp - Set up transport to use a TCP socket + * @xprt: transport to set up + * @to: timeout parameters + * + */ int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to) { size_t slot_table_size; @@ -1001,21 +1013,21 @@ int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to) memset(xprt->slot, 0, slot_table_size); xprt->prot = IPPROTO_TCP; - xprt->port = XPRT_MAX_RESVPORT; + xprt->port = XS_MAX_RESVPORT; xprt->stream = 1; xprt->nocong = 1; xprt->cwnd = RPC_MAXCWND(xprt); xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0; xprt->max_payload = (1U << 31) - 1; - INIT_WORK(&xprt->sock_connect, xprt_socket_connect, xprt); + INIT_WORK(&xprt->sock_connect, xs_connect_worker, xprt); - xprt->ops = &xprt_socket_ops; + xprt->ops = &xs_ops; if (to) xprt->timeout = *to; else - xprt_default_timeout(to, xprt->prot); + xprt_set_timeout(&xprt->timeout, 2, 60 * HZ); return 0; } -- cgit v1.2.3 From 4a0f8c04f2ece949d54a0c4fd7490259cf23a58a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 11 Aug 2005 16:25:32 -0400 Subject: [PATCH] RPC: Rename sock_lock Clean-up: replace a name reference to sockets in the generic parts of the RPC client by renaming sock_lock in the rpc_xprt structure. Test-plan: Compile kernel with CONFIG_NFS enabled. Version: Thu, 11 Aug 2005 16:05:00 -0400 Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 2 +- net/sunrpc/xprt.c | 44 ++++++++++++++++++++++---------------------- net/sunrpc/xprtsock.c | 22 +++++++++++----------- 3 files changed, 34 insertions(+), 34 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index d82b47ab73cb..c4f903f0e17c 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -198,7 +198,7 @@ struct rpc_xprt { /* * Send stuff */ - spinlock_t sock_lock; /* lock socket info */ + spinlock_t transport_lock; /* lock transport info */ spinlock_t xprt_lock; /* lock xprt info */ struct rpc_task * snd_task; /* Task blocked in send */ diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 589195e630ef..1f0da8c1a3b0 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -106,9 +106,9 @@ xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task) { int retval; - spin_lock_bh(&xprt->sock_lock); + spin_lock_bh(&xprt->transport_lock); retval = __xprt_lock_write(xprt, task); - spin_unlock_bh(&xprt->sock_lock); + spin_unlock_bh(&xprt->transport_lock); return retval; } @@ -161,9 +161,9 @@ __xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task) static inline void xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task) { - spin_lock_bh(&xprt->sock_lock); + spin_lock_bh(&xprt->transport_lock); __xprt_release_write(xprt, task); - spin_unlock_bh(&xprt->sock_lock); + spin_unlock_bh(&xprt->transport_lock); } /* @@ -266,9 +266,9 @@ int xprt_adjust_timeout(struct rpc_rqst *req) req->rq_retries = 0; xprt_reset_majortimeo(req); /* Reset the RTT counters == "slow start" */ - spin_lock_bh(&xprt->sock_lock); + spin_lock_bh(&xprt->transport_lock); rpc_init_rtt(req->rq_task->tk_client->cl_rtt, to->to_initval); - spin_unlock_bh(&xprt->sock_lock); + spin_unlock_bh(&xprt->transport_lock); pprintk("RPC: %lu timeout\n", jiffies); status = -ETIMEDOUT; } @@ -298,10 +298,10 @@ xprt_socket_autoclose(void *args) void xprt_disconnect(struct rpc_xprt *xprt) { dprintk("RPC: disconnected transport %p\n", xprt); - spin_lock_bh(&xprt->sock_lock); + spin_lock_bh(&xprt->transport_lock); xprt_clear_connected(xprt); rpc_wake_up_status(&xprt->pending, -ENOTCONN); - spin_unlock_bh(&xprt->sock_lock); + spin_unlock_bh(&xprt->transport_lock); } static void @@ -309,12 +309,12 @@ xprt_init_autodisconnect(unsigned long data) { struct rpc_xprt *xprt = (struct rpc_xprt *)data; - spin_lock(&xprt->sock_lock); + spin_lock(&xprt->transport_lock); if (!list_empty(&xprt->recv) || xprt->shutdown) goto out_abort; if (test_and_set_bit(XPRT_LOCKED, &xprt->sockstate)) goto out_abort; - spin_unlock(&xprt->sock_lock); + spin_unlock(&xprt->transport_lock); /* Let keventd close the socket */ if (test_bit(XPRT_CONNECTING, &xprt->sockstate) != 0) xprt_release_write(xprt, NULL); @@ -322,7 +322,7 @@ xprt_init_autodisconnect(unsigned long data) schedule_work(&xprt->task_cleanup); return; out_abort: - spin_unlock(&xprt->sock_lock); + spin_unlock(&xprt->transport_lock); } /** @@ -482,7 +482,7 @@ xprt_timer(struct rpc_task *task) struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; - spin_lock(&xprt->sock_lock); + spin_lock(&xprt->transport_lock); if (req->rq_received) goto out; @@ -496,7 +496,7 @@ xprt_timer(struct rpc_task *task) out: task->tk_timeout = 0; rpc_wake_up_task(task); - spin_unlock(&xprt->sock_lock); + spin_unlock(&xprt->transport_lock); } /** @@ -515,7 +515,7 @@ int xprt_prepare_transmit(struct rpc_task *task) if (xprt->shutdown) return -EIO; - spin_lock_bh(&xprt->sock_lock); + spin_lock_bh(&xprt->transport_lock); if (req->rq_received && !req->rq_bytes_sent) { err = req->rq_received; goto out_unlock; @@ -530,7 +530,7 @@ int xprt_prepare_transmit(struct rpc_task *task) goto out_unlock; } out_unlock: - spin_unlock_bh(&xprt->sock_lock); + spin_unlock_bh(&xprt->transport_lock); return err; } @@ -552,13 +552,13 @@ void xprt_transmit(struct rpc_task *task) smp_rmb(); if (!req->rq_received) { if (list_empty(&req->rq_list)) { - spin_lock_bh(&xprt->sock_lock); + spin_lock_bh(&xprt->transport_lock); /* Update the softirq receive buffer */ memcpy(&req->rq_private_buf, &req->rq_rcv_buf, sizeof(req->rq_private_buf)); /* Add request to the receive list */ list_add_tail(&req->rq_list, &xprt->recv); - spin_unlock_bh(&xprt->sock_lock); + spin_unlock_bh(&xprt->transport_lock); xprt_reset_majortimeo(req); /* Turn off autodisconnect */ del_singleshot_timer_sync(&xprt->timer); @@ -592,7 +592,7 @@ void xprt_transmit(struct rpc_task *task) out_receive: dprintk("RPC: %4d xmit complete\n", task->tk_pid); /* Set the task's receive timeout value */ - spin_lock_bh(&xprt->sock_lock); + spin_lock_bh(&xprt->transport_lock); if (!xprt->nocong) { int timer = task->tk_msg.rpc_proc->p_timer; task->tk_timeout = rpc_calc_rto(clnt->cl_rtt, timer); @@ -607,7 +607,7 @@ void xprt_transmit(struct rpc_task *task) else if (!req->rq_received) rpc_sleep_on(&xprt->pending, task, NULL, xprt_timer); __xprt_release_write(xprt, task); - spin_unlock_bh(&xprt->sock_lock); + spin_unlock_bh(&xprt->transport_lock); } static inline void do_xprt_reserve(struct rpc_task *task) @@ -683,7 +683,7 @@ void xprt_release(struct rpc_task *task) if (!(req = task->tk_rqstp)) return; - spin_lock_bh(&xprt->sock_lock); + spin_lock_bh(&xprt->transport_lock); __xprt_release_write(xprt, task); __xprt_put_cong(xprt, req); if (!list_empty(&req->rq_list)) @@ -692,7 +692,7 @@ void xprt_release(struct rpc_task *task) if (list_empty(&xprt->recv) && !xprt->shutdown) mod_timer(&xprt->timer, xprt->last_used + RPC_IDLE_DISCONNECT_TIMEOUT); - spin_unlock_bh(&xprt->sock_lock); + spin_unlock_bh(&xprt->transport_lock); task->tk_rqstp = NULL; memset(req, 0, sizeof(*req)); /* mark unused */ @@ -750,7 +750,7 @@ static struct rpc_xprt *xprt_setup(int proto, struct sockaddr_in *ap, struct rpc return ERR_PTR(result); } - spin_lock_init(&xprt->sock_lock); + spin_lock_init(&xprt->transport_lock); spin_lock_init(&xprt->xprt_lock); init_waitqueue_head(&xprt->cong_wait); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index a5a04203a6b0..bc90caab6088 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -307,7 +307,7 @@ static int xs_send_request(struct rpc_task *task) if (status == -EAGAIN) { if (test_bit(SOCK_ASYNC_NOSPACE, &xprt->sock->flags)) { /* Protect against races with xs_write_space */ - spin_lock_bh(&xprt->sock_lock); + spin_lock_bh(&xprt->transport_lock); /* Don't race with disconnect */ if (!xprt_connected(xprt)) task->tk_status = -ENOTCONN; @@ -315,7 +315,7 @@ static int xs_send_request(struct rpc_task *task) task->tk_timeout = req->rq_timeout; rpc_sleep_on(&xprt->pending, task, NULL, NULL); } - spin_unlock_bh(&xprt->sock_lock); + spin_unlock_bh(&xprt->transport_lock); return status; } /* Keep holding the socket if it is blocked */ @@ -415,7 +415,7 @@ static void xs_udp_data_ready(struct sock *sk, int len) goto dropit; /* Look up and lock the request corresponding to the given XID */ - spin_lock(&xprt->sock_lock); + spin_lock(&xprt->transport_lock); rovr = xprt_lookup_rqst(xprt, *xp); if (!rovr) goto out_unlock; @@ -436,7 +436,7 @@ static void xs_udp_data_ready(struct sock *sk, int len) xprt_complete_rqst(xprt, rovr, copied); out_unlock: - spin_unlock(&xprt->sock_lock); + spin_unlock(&xprt->transport_lock); dropit: skb_free_datagram(sk, skb); out: @@ -531,13 +531,13 @@ static inline void xs_tcp_read_request(struct rpc_xprt *xprt, skb_reader_t *desc ssize_t r; /* Find and lock the request corresponding to this xid */ - spin_lock(&xprt->sock_lock); + spin_lock(&xprt->transport_lock); req = xprt_lookup_rqst(xprt, xprt->tcp_xid); if (!req) { xprt->tcp_flags &= ~XPRT_COPY_DATA; dprintk("RPC: XID %08x request not found!\n", ntohl(xprt->tcp_xid)); - spin_unlock(&xprt->sock_lock); + spin_unlock(&xprt->transport_lock); return; } @@ -597,7 +597,7 @@ out: req->rq_task->tk_pid); xprt_complete_rqst(xprt, req, xprt->tcp_copied); } - spin_unlock(&xprt->sock_lock); + spin_unlock(&xprt->transport_lock); xs_tcp_check_recm(xprt); } @@ -696,7 +696,7 @@ static void xs_tcp_state_change(struct sock *sk) switch (sk->sk_state) { case TCP_ESTABLISHED: - spin_lock_bh(&xprt->sock_lock); + spin_lock_bh(&xprt->transport_lock); if (!xprt_test_and_set_connected(xprt)) { /* Reset TCP record info */ xprt->tcp_offset = 0; @@ -705,7 +705,7 @@ static void xs_tcp_state_change(struct sock *sk) xprt->tcp_flags = XPRT_COPY_RECM | XPRT_COPY_XID; rpc_wake_up(&xprt->pending); } - spin_unlock_bh(&xprt->sock_lock); + spin_unlock_bh(&xprt->transport_lock); break; case TCP_SYN_SENT: case TCP_SYN_RECV: @@ -753,10 +753,10 @@ static void xs_write_space(struct sock *sk) if (!test_and_clear_bit(SOCK_NOSPACE, &sock->flags)) goto out; - spin_lock_bh(&xprt->sock_lock); + spin_lock_bh(&xprt->transport_lock); if (xprt->snd_task) rpc_wake_up_task(xprt->snd_task); - spin_unlock_bh(&xprt->sock_lock); + spin_unlock_bh(&xprt->transport_lock); out: read_unlock(&sk->sk_callback_lock); } -- cgit v1.2.3 From 5dc07727f86b25851e95193a0c484ea21b531c47 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 11 Aug 2005 16:25:35 -0400 Subject: [PATCH] RPC: Rename xprt_lock Clean-up: Replace the xprt_lock with something more aptly named. This lock single-threads the XID and request slot reservation process. Test-plan: Compile kernel with CONFIG_NFS enabled. Version: Thu, 11 Aug 2005 16:05:26 -0400 Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 2 +- net/sunrpc/xprt.c | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index c4f903f0e17c..41ce296dded1 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -199,7 +199,7 @@ struct rpc_xprt { * Send stuff */ spinlock_t transport_lock; /* lock transport info */ - spinlock_t xprt_lock; /* lock xprt info */ + spinlock_t reserve_lock; /* lock slot table */ struct rpc_task * snd_task; /* Task blocked in send */ struct list_head recv; diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 1f0da8c1a3b0..9c45c522e3ef 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -643,9 +643,9 @@ void xprt_reserve(struct rpc_task *task) task->tk_status = -EIO; if (!xprt->shutdown) { - spin_lock(&xprt->xprt_lock); + spin_lock(&xprt->reserve_lock); do_xprt_reserve(task); - spin_unlock(&xprt->xprt_lock); + spin_unlock(&xprt->reserve_lock); } } @@ -698,10 +698,10 @@ void xprt_release(struct rpc_task *task) dprintk("RPC: %4d release request %p\n", task->tk_pid, req); - spin_lock(&xprt->xprt_lock); + spin_lock(&xprt->reserve_lock); list_add(&req->rq_list, &xprt->free); xprt_clear_backlog(xprt); - spin_unlock(&xprt->xprt_lock); + spin_unlock(&xprt->reserve_lock); } /** @@ -751,7 +751,7 @@ static struct rpc_xprt *xprt_setup(int proto, struct sockaddr_in *ap, struct rpc } spin_lock_init(&xprt->transport_lock); - spin_lock_init(&xprt->xprt_lock); + spin_lock_init(&xprt->reserve_lock); init_waitqueue_head(&xprt->cong_wait); INIT_LIST_HEAD(&xprt->free); -- cgit v1.2.3 From 2226feb6bcd0e5e117a9be3ea3dd3ffc14f3e41e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 11 Aug 2005 16:25:38 -0400 Subject: [PATCH] RPC: rename the sockstate field Clean-up: get rid of a name reference to sockets in the generic parts of the RPC client by renaming the sockstate field in the rpc_xprt structure. Test-plan: Compile kernel with CONFIG_NFS enabled. Version: Thu, 11 Aug 2005 16:05:53 -0400 Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 60 ++++++++++++++++++++++++++++++++++++--------- net/sunrpc/xprt.c | 14 +++++------ net/sunrpc/xprtsock.c | 6 ++--- 3 files changed, 58 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 41ce296dded1..009a3bb4f997 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -163,7 +163,7 @@ struct rpc_xprt { struct list_head free; /* free slots */ struct rpc_rqst * slot; /* slot table storage */ unsigned int max_reqs; /* total slots */ - unsigned long sockstate; /* Socket state */ + unsigned long state; /* transport state */ unsigned char shutdown : 1, /* being shut down */ nocong : 1, /* no congestion control */ resvport : 1, /* use a reserved port */ @@ -240,16 +240,54 @@ int xs_setup_udp(struct rpc_xprt *, int xs_setup_tcp(struct rpc_xprt *, struct rpc_timeout *); -#define XPRT_LOCKED 0 -#define XPRT_CONNECT 1 -#define XPRT_CONNECTING 2 - -#define xprt_connected(xp) (test_bit(XPRT_CONNECT, &(xp)->sockstate)) -#define xprt_set_connected(xp) (set_bit(XPRT_CONNECT, &(xp)->sockstate)) -#define xprt_test_and_set_connected(xp) (test_and_set_bit(XPRT_CONNECT, &(xp)->sockstate)) -#define xprt_test_and_clear_connected(xp) \ - (test_and_clear_bit(XPRT_CONNECT, &(xp)->sockstate)) -#define xprt_clear_connected(xp) (clear_bit(XPRT_CONNECT, &(xp)->sockstate)) +/* + * Reserved bit positions in xprt->state + */ +#define XPRT_LOCKED (0) +#define XPRT_CONNECTED (1) +#define XPRT_CONNECTING (2) + +static inline void xprt_set_connected(struct rpc_xprt *xprt) +{ + set_bit(XPRT_CONNECTED, &xprt->state); +} + +static inline void xprt_clear_connected(struct rpc_xprt *xprt) +{ + clear_bit(XPRT_CONNECTED, &xprt->state); +} + +static inline int xprt_connected(struct rpc_xprt *xprt) +{ + return test_bit(XPRT_CONNECTED, &xprt->state); +} + +static inline int xprt_test_and_set_connected(struct rpc_xprt *xprt) +{ + return test_and_set_bit(XPRT_CONNECTED, &xprt->state); +} + +static inline int xprt_test_and_clear_connected(struct rpc_xprt *xprt) +{ + return test_and_clear_bit(XPRT_CONNECTED, &xprt->state); +} + +static inline void xprt_clear_connecting(struct rpc_xprt *xprt) +{ + smp_mb__before_clear_bit(); + clear_bit(XPRT_CONNECTING, &xprt->state); + smp_mb__after_clear_bit(); +} + +static inline int xprt_connecting(struct rpc_xprt *xprt) +{ + return test_bit(XPRT_CONNECTING, &xprt->state); +} + +static inline int xprt_test_and_set_connecting(struct rpc_xprt *xprt) +{ + return test_and_set_bit(XPRT_CONNECTING, &xprt->state); +} #endif /* __KERNEL__*/ diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 9c45c522e3ef..57c5e77b155e 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -74,7 +74,7 @@ __xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; - if (test_and_set_bit(XPRT_LOCKED, &xprt->sockstate)) { + if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) { if (task == xprt->snd_task) return 1; goto out_sleep; @@ -88,7 +88,7 @@ __xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task) return 1; } smp_mb__before_clear_bit(); - clear_bit(XPRT_LOCKED, &xprt->sockstate); + clear_bit(XPRT_LOCKED, &xprt->state); smp_mb__after_clear_bit(); out_sleep: dprintk("RPC: %4d failed to lock socket %p\n", task->tk_pid, xprt); @@ -118,7 +118,7 @@ __xprt_lock_write_next(struct rpc_xprt *xprt) { struct rpc_task *task; - if (test_and_set_bit(XPRT_LOCKED, &xprt->sockstate)) + if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) return; if (!xprt->nocong && RPCXPRT_CONGESTED(xprt)) goto out_unlock; @@ -139,7 +139,7 @@ __xprt_lock_write_next(struct rpc_xprt *xprt) } out_unlock: smp_mb__before_clear_bit(); - clear_bit(XPRT_LOCKED, &xprt->sockstate); + clear_bit(XPRT_LOCKED, &xprt->state); smp_mb__after_clear_bit(); } @@ -152,7 +152,7 @@ __xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task) if (xprt->snd_task == task) { xprt->snd_task = NULL; smp_mb__before_clear_bit(); - clear_bit(XPRT_LOCKED, &xprt->sockstate); + clear_bit(XPRT_LOCKED, &xprt->state); smp_mb__after_clear_bit(); __xprt_lock_write_next(xprt); } @@ -312,11 +312,11 @@ xprt_init_autodisconnect(unsigned long data) spin_lock(&xprt->transport_lock); if (!list_empty(&xprt->recv) || xprt->shutdown) goto out_abort; - if (test_and_set_bit(XPRT_LOCKED, &xprt->sockstate)) + if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) goto out_abort; spin_unlock(&xprt->transport_lock); /* Let keventd close the socket */ - if (test_bit(XPRT_CONNECTING, &xprt->sockstate) != 0) + if (xprt_connecting(xprt)) xprt_release_write(xprt, NULL); else schedule_work(&xprt->task_cleanup); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index bc90caab6088..76a33b54f436 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -925,9 +925,7 @@ out: else rpc_wake_up(&xprt->pending); out_clear: - smp_mb__before_clear_bit(); - clear_bit(XPRT_CONNECTING, &xprt->sockstate); - smp_mb__after_clear_bit(); + xprt_clear_connecting(xprt); } /** @@ -940,7 +938,7 @@ static void xs_connect(struct rpc_task *task) { struct rpc_xprt *xprt = task->tk_xprt; - if (!test_and_set_bit(XPRT_CONNECTING, &xprt->sockstate)) { + if (!xprt_test_and_set_connecting(xprt)) { if (xprt->sock != NULL) { dprintk("RPC: xs_connect delayed xprt %p\n", xprt); schedule_delayed_work(&xprt->sock_connect, -- cgit v1.2.3 From 44fbac2288dfed6f1963ac00bf922c3bcd779cd1 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 11 Aug 2005 16:25:44 -0400 Subject: [PATCH] RPC: Add helper for waking tasks pending on a transport Clean-up: remove only reference to xprt->pending from the socket transport implementation. This makes a cleaner interface for other transport implementations as well. Test-plan: Compile kernel with CONFIG_NFS enabled. Version: Thu, 11 Aug 2005 16:06:52 -0400 Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 1 + net/sunrpc/xprt.c | 18 ++++++++++++++++-- net/sunrpc/xprtsock.c | 7 ++----- 3 files changed, 19 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 009a3bb4f997..d5223993fca9 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -232,6 +232,7 @@ void xprt_reserve(struct rpc_task *); int xprt_prepare_transmit(struct rpc_task *); void xprt_transmit(struct rpc_task *); void xprt_receive(struct rpc_task *); +void xprt_wake_pending_tasks(struct rpc_xprt *, int); int xprt_adjust_timeout(struct rpc_rqst *req); void xprt_release(struct rpc_task *); void xprt_connect(struct rpc_task *); diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 57c5e77b155e..2f9cd468b953 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -227,6 +227,20 @@ xprt_adjust_cwnd(struct rpc_xprt *xprt, int result) xprt->cwnd = cwnd; } +/** + * xprt_wake_pending_tasks - wake all tasks on a transport's pending queue + * @xprt: transport with waiting tasks + * @status: result code to plant in each task before waking it + * + */ +void xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status) +{ + if (status < 0) + rpc_wake_up_status(&xprt->pending, status); + else + rpc_wake_up(&xprt->pending); +} + static void xprt_reset_majortimeo(struct rpc_rqst *req) { struct rpc_timeout *to = &req->rq_xprt->timeout; @@ -300,7 +314,7 @@ void xprt_disconnect(struct rpc_xprt *xprt) dprintk("RPC: disconnected transport %p\n", xprt); spin_lock_bh(&xprt->transport_lock); xprt_clear_connected(xprt); - rpc_wake_up_status(&xprt->pending, -ENOTCONN); + xprt_wake_pending_tasks(xprt, -ENOTCONN); spin_unlock_bh(&xprt->transport_lock); } @@ -803,7 +817,7 @@ static void xprt_shutdown(struct rpc_xprt *xprt) xprt->shutdown = 1; rpc_wake_up(&xprt->sending); rpc_wake_up(&xprt->resend); - rpc_wake_up(&xprt->pending); + xprt_wake_pending_tasks(xprt, -EIO); rpc_wake_up(&xprt->backlog); wake_up(&xprt->cong_wait); del_timer_sync(&xprt->timer); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 76a33b54f436..182da2edf61c 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -703,7 +703,7 @@ static void xs_tcp_state_change(struct sock *sk) xprt->tcp_reclen = 0; xprt->tcp_copied = 0; xprt->tcp_flags = XPRT_COPY_RECM | XPRT_COPY_XID; - rpc_wake_up(&xprt->pending); + xprt_wake_pending_tasks(xprt, 0); } spin_unlock_bh(&xprt->transport_lock); break; @@ -920,10 +920,7 @@ static void xs_connect_worker(void *args) } } out: - if (status < 0) - rpc_wake_up_status(&xprt->pending, status); - else - rpc_wake_up(&xprt->pending); + xprt_wake_pending_tasks(xprt, status); out_clear: xprt_clear_connecting(xprt); } -- cgit v1.2.3 From 55aa4f58aa43dc9a51fb80010630d94b96053a2e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 11 Aug 2005 16:25:47 -0400 Subject: [PATCH] RPC: client-side transport switch cleanup Clean-up: change some comments to reflect the realities of the new RPC transport switch mechanism. Get rid of unused xprt_receive() prototype. Also, organize function prototypes in xprt.h by usage and scope. Test-plan: Compile kernel with CONFIG_NFS enabled. Version: Thu, 11 Aug 2005 16:07:21 -0400 Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 61 ++++++++++++++++++++++++++------------------- net/sunrpc/clnt.c | 2 +- net/sunrpc/xprt.c | 26 +++++++++---------- net/sunrpc/xprtsock.c | 12 +++++---- 4 files changed, 55 insertions(+), 46 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index d5223993fca9..bfbc492ae36d 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -1,5 +1,5 @@ /* - * linux/include/linux/sunrpc/clnt_xprt.h + * linux/include/linux/sunrpc/xprt.h * * Declarations for the RPC transport interface. * @@ -150,8 +150,8 @@ struct rpc_xprt { unsigned long cong; /* current congestion */ unsigned long cwnd; /* congestion window */ - unsigned int rcvsize, /* socket receive buffer size */ - sndsize; /* socket send buffer size */ + unsigned int rcvsize, /* transport rcv buffer size */ + sndsize; /* transport send buffer size */ size_t max_payload; /* largest RPC payload size, in bytes */ @@ -184,12 +184,12 @@ struct rpc_xprt { unsigned long tcp_copied, /* copied to request */ tcp_flags; /* - * Connection of sockets + * Connection of transports */ - struct work_struct sock_connect; + struct work_struct connect_worker; unsigned short port; /* - * Disconnection of idle sockets + * Disconnection of idle transports */ struct work_struct task_cleanup; struct timer_list timer; @@ -219,27 +219,36 @@ struct rpc_xprt { #ifdef __KERNEL__ -struct rpc_xprt * xprt_create_proto(int proto, struct sockaddr_in *addr, - struct rpc_timeout *toparms); -void xprt_disconnect(struct rpc_xprt *); -int xprt_destroy(struct rpc_xprt *); -void xprt_set_timeout(struct rpc_timeout *, unsigned int, - unsigned long); -struct rpc_rqst * xprt_lookup_rqst(struct rpc_xprt *, u32); -void xprt_complete_rqst(struct rpc_xprt *, - struct rpc_rqst *, int); -void xprt_reserve(struct rpc_task *); -int xprt_prepare_transmit(struct rpc_task *); -void xprt_transmit(struct rpc_task *); -void xprt_receive(struct rpc_task *); -void xprt_wake_pending_tasks(struct rpc_xprt *, int); +/* + * Transport operations used by ULPs + */ +struct rpc_xprt * xprt_create_proto(int proto, struct sockaddr_in *addr, struct rpc_timeout *to); +void xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long incr); + +/* + * Generic internal transport functions + */ +void xprt_connect(struct rpc_task *task); +void xprt_reserve(struct rpc_task *task); +int xprt_prepare_transmit(struct rpc_task *task); +void xprt_transmit(struct rpc_task *task); int xprt_adjust_timeout(struct rpc_rqst *req); -void xprt_release(struct rpc_task *); -void xprt_connect(struct rpc_task *); -int xs_setup_udp(struct rpc_xprt *, - struct rpc_timeout *); -int xs_setup_tcp(struct rpc_xprt *, - struct rpc_timeout *); +void xprt_release(struct rpc_task *task); +int xprt_destroy(struct rpc_xprt *xprt); + +/* + * Transport switch helper functions + */ +void xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status); +struct rpc_rqst * xprt_lookup_rqst(struct rpc_xprt *xprt, u32 xid); +void xprt_complete_rqst(struct rpc_xprt *xprt, struct rpc_rqst *req, int copied); +void xprt_disconnect(struct rpc_xprt *xprt); + +/* + * Socket transport setup operations + */ +int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to); +int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to); /* * Reserved bit positions in xprt->state diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 0d1b010a4a01..4677959d2834 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1,5 +1,5 @@ /* - * linux/net/sunrpc/rpcclnt.c + * linux/net/sunrpc/clnt.c * * This file contains the high-level RPC interface. * It is modeled as a finite state machine to support both synchronous diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 2f9cd468b953..247fa1ec870c 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -10,12 +10,12 @@ * one is available. Otherwise, it sleeps on the backlog queue * (xprt_reserve). * - Next, the caller puts together the RPC message, stuffs it into - * the request struct, and calls xprt_call(). - * - xprt_call transmits the message and installs the caller on the - * socket's wait list. At the same time, it installs a timer that + * the request struct, and calls xprt_transmit(). + * - xprt_transmit sends the message and installs the caller on the + * transport's wait list. At the same time, it installs a timer that * is run after the packet's timeout has expired. * - When a packet arrives, the data_ready handler walks the list of - * pending requests for that socket. If a matching XID is found, the + * pending requests for that transport. If a matching XID is found, the * caller is woken up, and the timer removed. * - When no reply arrives within the timeout interval, the timer is * fired by the kernel and runs xprt_timer(). It either adjusts the @@ -32,6 +32,8 @@ * tasks that rely on callbacks. * * Copyright (C) 1995-1997, Olaf Kirch + * + * Transport switch API copyright (C) 2005, Chuck Lever */ #include @@ -52,8 +54,6 @@ # define RPCDBG_FACILITY RPCDBG_XPRT #endif -#define XPRT_MAX_BACKOFF (8) - /* * Local functions */ @@ -65,9 +65,9 @@ static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *); static int xprt_clear_backlog(struct rpc_xprt *xprt); /* - * Serialize write access to sockets, in order to prevent different + * Serialize write access to transports, in order to prevent different * requests from interfering with each other. - * Also prevents TCP socket connects from colliding with writes. + * Also prevents transport connects from colliding with writes. */ static int __xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task) @@ -91,7 +91,7 @@ __xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task) clear_bit(XPRT_LOCKED, &xprt->state); smp_mb__after_clear_bit(); out_sleep: - dprintk("RPC: %4d failed to lock socket %p\n", task->tk_pid, xprt); + dprintk("RPC: %4d failed to lock transport %p\n", task->tk_pid, xprt); task->tk_timeout = 0; task->tk_status = -EAGAIN; if (req && req->rq_ntrans) @@ -144,7 +144,7 @@ out_unlock: } /* - * Releases the socket for use by other requests. + * Releases the transport for use by other requests. */ static void __xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task) @@ -294,8 +294,7 @@ int xprt_adjust_timeout(struct rpc_rqst *req) return status; } -static void -xprt_socket_autoclose(void *args) +static void xprt_autoclose(void *args) { struct rpc_xprt *xprt = (struct rpc_xprt *)args; @@ -329,7 +328,6 @@ xprt_init_autodisconnect(unsigned long data) if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) goto out_abort; spin_unlock(&xprt->transport_lock); - /* Let keventd close the socket */ if (xprt_connecting(xprt)) xprt_release_write(xprt, NULL); else @@ -770,7 +768,7 @@ static struct rpc_xprt *xprt_setup(int proto, struct sockaddr_in *ap, struct rpc INIT_LIST_HEAD(&xprt->free); INIT_LIST_HEAD(&xprt->recv); - INIT_WORK(&xprt->task_cleanup, xprt_socket_autoclose, xprt); + INIT_WORK(&xprt->task_cleanup, xprt_autoclose, xprt); init_timer(&xprt->timer); xprt->timer.function = xprt_init_autodisconnect; xprt->timer.data = (unsigned long) xprt; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 182da2edf61c..7f0b9f7f167b 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -11,6 +11,8 @@ * Rewrite of larges part of the code in order to stabilize TCP stuff. * Fix behaviour when socket buffer is full. * (C) 1999 Trond Myklebust + * + * IP socket transport implementation, (C) 2005 Chuck Lever */ #include @@ -363,7 +365,7 @@ static void xs_destroy(struct rpc_xprt *xprt) { dprintk("RPC: xs_destroy xprt %p\n", xprt); - cancel_delayed_work(&xprt->sock_connect); + cancel_delayed_work(&xprt->connect_worker); flush_scheduled_work(); xprt_disconnect(xprt); @@ -938,11 +940,11 @@ static void xs_connect(struct rpc_task *task) if (!xprt_test_and_set_connecting(xprt)) { if (xprt->sock != NULL) { dprintk("RPC: xs_connect delayed xprt %p\n", xprt); - schedule_delayed_work(&xprt->sock_connect, + schedule_delayed_work(&xprt->connect_worker, RPC_REESTABLISH_TIMEOUT); } else { dprintk("RPC: xs_connect scheduled xprt %p\n", xprt); - schedule_work(&xprt->sock_connect); + schedule_work(&xprt->connect_worker); /* flush_scheduled_work can sleep... */ if (!RPC_IS_ASYNC(task)) flush_scheduled_work(); @@ -989,7 +991,7 @@ int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to) /* XXX: header size can vary due to auth type, IPv6, etc. */ xprt->max_payload = (1U << 16) - (MAX_HEADER << 3); - INIT_WORK(&xprt->sock_connect, xs_connect_worker, xprt); + INIT_WORK(&xprt->connect_worker, xs_connect_worker, xprt); xprt->ops = &xs_ops; @@ -1028,7 +1030,7 @@ int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to) xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0; xprt->max_payload = (1U << 31) - 1; - INIT_WORK(&xprt->sock_connect, xs_connect_worker, xprt); + INIT_WORK(&xprt->connect_worker, xs_connect_worker, xprt); xprt->ops = &xs_ops; -- cgit v1.2.3 From c7b2cae8a634015b72941ba2fc6c4bc9b8d3a129 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 11 Aug 2005 16:25:50 -0400 Subject: [PATCH] RPC: separate TCP and UDP write space callbacks Split the socket write space callback function into a TCP version and UDP version, eliminating one dependence on the "xprt->stream" variable. Keep the common pieces of this path in xprt.c so other transports can use it too. Test-plan: Write-intensive workload on a single mount point. Version: Thu, 11 Aug 2005 16:07:51 -0400 Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 2 ++ net/sunrpc/xprt.c | 34 ++++++++++++++++++ net/sunrpc/xprtsock.c | 84 ++++++++++++++++++++++++++++----------------- 3 files changed, 89 insertions(+), 31 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index bfbc492ae36d..e73174c7e450 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -240,6 +240,8 @@ int xprt_destroy(struct rpc_xprt *xprt); * Transport switch helper functions */ void xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status); +void xprt_wait_for_buffer_space(struct rpc_task *task); +void xprt_write_space(struct rpc_xprt *xprt); struct rpc_rqst * xprt_lookup_rqst(struct rpc_xprt *xprt, u32 xid); void xprt_complete_rqst(struct rpc_xprt *xprt, struct rpc_rqst *req, int copied); void xprt_disconnect(struct rpc_xprt *xprt); diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 247fa1ec870c..31ef7dc7eed6 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -241,6 +241,40 @@ void xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status) rpc_wake_up(&xprt->pending); } +/** + * xprt_wait_for_buffer_space - wait for transport output buffer to clear + * @task: task to be put to sleep + * + */ +void xprt_wait_for_buffer_space(struct rpc_task *task) +{ + struct rpc_rqst *req = task->tk_rqstp; + struct rpc_xprt *xprt = req->rq_xprt; + + task->tk_timeout = req->rq_timeout; + rpc_sleep_on(&xprt->pending, task, NULL, NULL); +} + +/** + * xprt_write_space - wake the task waiting for transport output buffer space + * @xprt: transport with waiting tasks + * + * Can be called in a soft IRQ context, so xprt_write_space never sleeps. + */ +void xprt_write_space(struct rpc_xprt *xprt) +{ + if (unlikely(xprt->shutdown)) + return; + + spin_lock_bh(&xprt->transport_lock); + if (xprt->snd_task) { + dprintk("RPC: write space: waking waiting task on xprt %p\n", + xprt); + rpc_wake_up_task(xprt->snd_task); + } + spin_unlock_bh(&xprt->transport_lock); +} + static void xprt_reset_majortimeo(struct rpc_rqst *req) { struct rpc_timeout *to = &req->rq_xprt->timeout; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 7f0b9f7f167b..70a772d7a796 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -308,15 +308,13 @@ static int xs_send_request(struct rpc_task *task) if (status == -EAGAIN) { if (test_bit(SOCK_ASYNC_NOSPACE, &xprt->sock->flags)) { - /* Protect against races with xs_write_space */ + /* Protect against races with write_space */ spin_lock_bh(&xprt->transport_lock); /* Don't race with disconnect */ if (!xprt_connected(xprt)) task->tk_status = -ENOTCONN; - else if (test_bit(SOCK_NOSPACE, &xprt->sock->flags)) { - task->tk_timeout = req->rq_timeout; - rpc_sleep_on(&xprt->pending, task, NULL, NULL); - } + else if (test_bit(SOCK_NOSPACE, &xprt->sock->flags)) + xprt_wait_for_buffer_space(task); spin_unlock_bh(&xprt->transport_lock); return status; } @@ -721,45 +719,68 @@ static void xs_tcp_state_change(struct sock *sk) } /** - * xs_write_space - callback invoked when socket buffer space becomes - * available + * xs_udp_write_space - callback invoked when socket buffer space + * becomes available * @sk: socket whose state has changed * * Called when more output buffer space is available for this socket. * We try not to wake our writers until they can make "significant" - * progress, otherwise we'll waste resources thrashing sock_sendmsg + * progress, otherwise we'll waste resources thrashing kernel_sendmsg * with a bunch of small requests. */ -static void xs_write_space(struct sock *sk) +static void xs_udp_write_space(struct sock *sk) { - struct rpc_xprt *xprt; - struct socket *sock; - read_lock(&sk->sk_callback_lock); - if (!(xprt = xprt_from_sock(sk)) || !(sock = sk->sk_socket)) - goto out; - if (xprt->shutdown) - goto out; - /* Wait until we have enough socket memory */ - if (xprt->stream) { - /* from net/core/stream.c:sk_stream_write_space */ - if (sk_stream_wspace(sk) < sk_stream_min_wspace(sk)) + /* from net/core/sock.c:sock_def_write_space */ + if (sock_writeable(sk)) { + struct socket *sock; + struct rpc_xprt *xprt; + + if (unlikely(!(sock = sk->sk_socket))) goto out; - } else { - /* from net/core/sock.c:sock_def_write_space */ - if (!sock_writeable(sk)) + if (unlikely(!(xprt = xprt_from_sock(sk)))) + goto out; + if (unlikely(!test_and_clear_bit(SOCK_NOSPACE, &sock->flags))) goto out; + + xprt_write_space(xprt); } - if (!test_and_clear_bit(SOCK_NOSPACE, &sock->flags)) - goto out; + out: + read_unlock(&sk->sk_callback_lock); +} - spin_lock_bh(&xprt->transport_lock); - if (xprt->snd_task) - rpc_wake_up_task(xprt->snd_task); - spin_unlock_bh(&xprt->transport_lock); -out: +/** + * xs_tcp_write_space - callback invoked when socket buffer space + * becomes available + * @sk: socket whose state has changed + * + * Called when more output buffer space is available for this socket. + * We try not to wake our writers until they can make "significant" + * progress, otherwise we'll waste resources thrashing kernel_sendmsg + * with a bunch of small requests. + */ +static void xs_tcp_write_space(struct sock *sk) +{ + read_lock(&sk->sk_callback_lock); + + /* from net/core/stream.c:sk_stream_write_space */ + if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) { + struct socket *sock; + struct rpc_xprt *xprt; + + if (unlikely(!(sock = sk->sk_socket))) + goto out; + if (unlikely(!(xprt = xprt_from_sock(sk)))) + goto out; + if (unlikely(!test_and_clear_bit(SOCK_NOSPACE, &sock->flags))) + goto out; + + xprt_write_space(xprt); + } + + out: read_unlock(&sk->sk_callback_lock); } @@ -855,15 +876,16 @@ static void xs_bind(struct rpc_xprt *xprt, struct socket *sock) xprt->old_write_space = sk->sk_write_space; if (xprt->prot == IPPROTO_UDP) { sk->sk_data_ready = xs_udp_data_ready; + sk->sk_write_space = xs_udp_write_space; sk->sk_no_check = UDP_CSUM_NORCV; xprt_set_connected(xprt); } else { tcp_sk(sk)->nonagle = 1; /* disable Nagle's algorithm */ sk->sk_data_ready = xs_tcp_data_ready; sk->sk_state_change = xs_tcp_state_change; + sk->sk_write_space = xs_tcp_write_space; xprt_clear_connected(xprt); } - sk->sk_write_space = xs_write_space; /* Reset to new socket */ xprt->sock = sock; -- cgit v1.2.3 From 808012fbb23a52ec59352445d2076d175ad4ab26 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 25 Aug 2005 16:25:49 -0700 Subject: [PATCH] RPC: skip over transport-specific heads automatically Add a generic mechanism for skipping over transport-specific headers when constructing an RPC request. This removes another "xprt->stream" dependency. Test-plan: Write-intensive workload on a single mount point (try both UDP and TCP). Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/msg_prot.h | 25 +++++++++++++++++++++++++ include/linux/sunrpc/xprt.h | 7 +++++++ net/sunrpc/auth_gss/auth_gss.c | 6 ++---- net/sunrpc/clnt.c | 5 ++--- net/sunrpc/xprtsock.c | 24 +++++++++++++++++------- 5 files changed, 53 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/msg_prot.h b/include/linux/sunrpc/msg_prot.h index 15f115332389..f43f237360ae 100644 --- a/include/linux/sunrpc/msg_prot.h +++ b/include/linux/sunrpc/msg_prot.h @@ -76,5 +76,30 @@ enum rpc_auth_stat { #define RPC_MAXNETNAMELEN 256 +/* + * From RFC 1831: + * + * "A record is composed of one or more record fragments. A record + * fragment is a four-byte header followed by 0 to (2**31) - 1 bytes of + * fragment data. The bytes encode an unsigned binary number; as with + * XDR integers, the byte order is from highest to lowest. The number + * encodes two values -- a boolean which indicates whether the fragment + * is the last fragment of the record (bit value 1 implies the fragment + * is the last fragment) and a 31-bit unsigned binary value which is the + * length in bytes of the fragment's data. The boolean value is the + * highest-order bit of the header; the length is the 31 low-order bits. + * (Note that this record specification is NOT in XDR standard form!)" + * + * The Linux RPC client always sends its requests in a single record + * fragment, limiting the maximum payload size for stream transports to + * 2GB. + */ + +typedef u32 rpc_fraghdr; + +#define RPC_LAST_STREAM_FRAGMENT (1U << 31) +#define RPC_FRAGMENT_SIZE_MASK (~RPC_LAST_STREAM_FRAGMENT) +#define RPC_MAX_FRAGMENT_SIZE ((1U << 31) - 1) + #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_MSGPROT_H_ */ diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index e73174c7e450..966c456a0f6d 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -155,6 +155,8 @@ struct rpc_xprt { size_t max_payload; /* largest RPC payload size, in bytes */ + unsigned int tsh_size; /* size of transport specific + header */ struct rpc_wait_queue sending; /* requests waiting to send */ struct rpc_wait_queue resend; /* requests waiting to resend */ @@ -236,6 +238,11 @@ int xprt_adjust_timeout(struct rpc_rqst *req); void xprt_release(struct rpc_task *task); int xprt_destroy(struct rpc_xprt *xprt); +static inline u32 *xprt_skip_transport_header(struct rpc_xprt *xprt, u32 *p) +{ + return p + xprt->tsh_size; +} + /* * Transport switch helper functions */ diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 53a030acdf75..d2b08f16c257 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -844,10 +844,8 @@ gss_marshal(struct rpc_task *task, u32 *p) /* We compute the checksum for the verifier over the xdr-encoded bytes * starting with the xid and ending at the end of the credential: */ - iov.iov_base = req->rq_snd_buf.head[0].iov_base; - if (task->tk_client->cl_xprt->stream) - /* See clnt.c:call_header() */ - iov.iov_base += 4; + iov.iov_base = xprt_skip_transport_header(task->tk_xprt, + req->rq_snd_buf.head[0].iov_base); iov.iov_len = (u8 *)p - (u8 *)iov.iov_base; xdr_buf_from_iov(&iov, &verf_buf); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 4677959d2834..cc1b773a79d3 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1075,13 +1075,12 @@ static u32 * call_header(struct rpc_task *task) { struct rpc_clnt *clnt = task->tk_client; - struct rpc_xprt *xprt = clnt->cl_xprt; struct rpc_rqst *req = task->tk_rqstp; u32 *p = req->rq_svec[0].iov_base; /* FIXME: check buffer size? */ - if (xprt->stream) - *p++ = 0; /* fill in later */ + + p = xprt_skip_transport_header(task->tk_xprt, p); *p++ = req->rq_xid; /* XID */ *p++ = htonl(RPC_CALL); /* CALL */ *p++ = htonl(RPC_VERSION); /* RPC version */ diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 57988300640a..aaf053b1a0c4 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -282,6 +282,13 @@ static int xs_udp_send_request(struct rpc_task *task) return status; } +static inline void xs_encode_tcp_record_marker(struct xdr_buf *buf) +{ + u32 reclen = buf->len - sizeof(rpc_fraghdr); + rpc_fraghdr *base = buf->head[0].iov_base; + *base = htonl(RPC_LAST_STREAM_FRAGMENT | reclen); +} + /** * xs_tcp_send_request - write an RPC request to a TCP socket * @task: address of RPC task that manages the state of an RPC request @@ -301,11 +308,9 @@ static int xs_tcp_send_request(struct rpc_task *task) struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; struct xdr_buf *xdr = &req->rq_snd_buf; - u32 *marker = req->rq_svec[0].iov_base; int status, retry = 0; - /* Write the record marker */ - *marker = htonl(0x80000000|(req->rq_slen-sizeof(*marker))); + xs_encode_tcp_record_marker(&req->rq_snd_buf); xs_pktdump("packet data:", req->rq_svec->iov_base, @@ -503,16 +508,19 @@ static inline void xs_tcp_read_fraghdr(struct rpc_xprt *xprt, skb_reader_t *desc xprt->tcp_offset += used; if (used != len) return; + xprt->tcp_reclen = ntohl(xprt->tcp_recm); - if (xprt->tcp_reclen & 0x80000000) + if (xprt->tcp_reclen & RPC_LAST_STREAM_FRAGMENT) xprt->tcp_flags |= XPRT_LAST_FRAG; else xprt->tcp_flags &= ~XPRT_LAST_FRAG; - xprt->tcp_reclen &= 0x7fffffff; + xprt->tcp_reclen &= RPC_FRAGMENT_SIZE_MASK; + xprt->tcp_flags &= ~XPRT_COPY_RECM; xprt->tcp_offset = 0; + /* Sanity check of the record length */ - if (xprt->tcp_reclen < 4) { + if (unlikely(xprt->tcp_reclen < 4)) { dprintk("RPC: invalid TCP record fragment length\n"); xprt_disconnect(xprt); return; @@ -1065,6 +1073,7 @@ int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to) xprt->prot = IPPROTO_UDP; xprt->port = XS_MAX_RESVPORT; + xprt->tsh_size = 0; xprt->stream = 0; xprt->nocong = 0; xprt->cwnd = RPC_INITCWND; @@ -1105,11 +1114,12 @@ int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to) xprt->prot = IPPROTO_TCP; xprt->port = XS_MAX_RESVPORT; + xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32); xprt->stream = 1; xprt->nocong = 1; xprt->cwnd = RPC_MAXCWND(xprt); xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0; - xprt->max_payload = (1U << 31) - 1; + xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; INIT_WORK(&xprt->connect_worker, xs_tcp_connect_worker, xprt); -- cgit v1.2.3 From 43118c29dea2b23798bd42a147015cceee7fa885 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 25 Aug 2005 16:25:49 -0700 Subject: [PATCH] RPC: get rid of xprt->stream Now we can fix up the last few places that use the "xprt->stream" variable, and get rid of it from the rpc_xprt structure. Test-plan: Destructive testing (unplugging the network temporarily). Connectathon with UDP and TCP. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/lockd/host.c | 3 +-- include/linux/sunrpc/xprt.h | 3 +-- net/sunrpc/xprt.c | 3 +-- net/sunrpc/xprtsock.c | 28 ++++++++++++++++++---------- 4 files changed, 21 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 82c77df81c5f..7901f5b8092c 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -173,11 +173,10 @@ nlm_bind_host(struct nlm_host *host) /* If we've already created an RPC client, check whether * RPC rebind is required - * Note: why keep rebinding if we're on a tcp connection? */ if ((clnt = host->h_rpcclnt) != NULL) { xprt = clnt->cl_xprt; - if (!xprt->stream && time_after_eq(jiffies, host->h_nextrebind)) { + if (time_after_eq(jiffies, host->h_nextrebind)) { clnt->cl_port = 0; host->h_nextrebind = jiffies + NLM_HOST_REBIND; dprintk("lockd: next rebind in %ld jiffies\n", diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 966c456a0f6d..c9477f022efb 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -168,8 +168,7 @@ struct rpc_xprt { unsigned long state; /* transport state */ unsigned char shutdown : 1, /* being shut down */ nocong : 1, /* no congestion control */ - resvport : 1, /* use a reserved port */ - stream : 1; /* TCP */ + resvport : 1; /* use a reserved port */ /* * XID diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 31ef7dc7eed6..43fef7626442 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -630,8 +630,7 @@ void xprt_transmit(struct rpc_task *task) case -ENOTCONN: return; default: - if (xprt->stream) - xprt_disconnect(xprt); + break; } xprt_release_write(xprt, task); return; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index aaf053b1a0c4..5bb6fed3df34 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -356,6 +356,7 @@ static int xs_tcp_send_request(struct rpc_task *task) default: dprintk("RPC: sendmsg returned unrecognized error %d\n", -status); + xprt_disconnect(xprt); break; } @@ -826,19 +827,17 @@ static void xs_tcp_write_space(struct sock *sk) } /** - * xs_set_buffer_size - set send and receive limits + * xs_udp_set_buffer_size - set send and receive limits * @xprt: generic transport * * Set socket send and receive limits based on the * sndsize and rcvsize fields in the generic transport - * structure. This applies only to UDP sockets. + * structure. */ -static void xs_set_buffer_size(struct rpc_xprt *xprt) +static void xs_udp_set_buffer_size(struct rpc_xprt *xprt) { struct sock *sk = xprt->inet; - if (xprt->stream) - return; if (xprt->rcvsize) { sk->sk_userlocks |= SOCK_RCVBUF_LOCK; sk->sk_rcvbuf = xprt->rcvsize * xprt->max_reqs * 2; @@ -850,6 +849,17 @@ static void xs_set_buffer_size(struct rpc_xprt *xprt) } } +/** + * xs_tcp_set_buffer_size - set send and receive limits + * @xprt: generic transport + * + * Nothing to do for TCP. + */ +static void xs_tcp_set_buffer_size(struct rpc_xprt *xprt) +{ + return; +} + static int xs_bindresvport(struct rpc_xprt *xprt, struct socket *sock) { struct sockaddr_in myaddr = { @@ -928,7 +938,7 @@ static void xs_udp_connect_worker(void *args) write_unlock_bh(&sk->sk_callback_lock); } - xs_set_buffer_size(xprt); + xs_udp_set_buffer_size(xprt); status = 0; out: xprt_wake_pending_tasks(xprt, status); @@ -1034,7 +1044,7 @@ static void xs_connect(struct rpc_task *task) } static struct rpc_xprt_ops xs_udp_ops = { - .set_buffer_size = xs_set_buffer_size, + .set_buffer_size = xs_udp_set_buffer_size, .connect = xs_connect, .send_request = xs_udp_send_request, .close = xs_close, @@ -1042,7 +1052,7 @@ static struct rpc_xprt_ops xs_udp_ops = { }; static struct rpc_xprt_ops xs_tcp_ops = { - .set_buffer_size = xs_set_buffer_size, + .set_buffer_size = xs_tcp_set_buffer_size, .connect = xs_connect, .send_request = xs_tcp_send_request, .close = xs_close, @@ -1074,7 +1084,6 @@ int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to) xprt->prot = IPPROTO_UDP; xprt->port = XS_MAX_RESVPORT; xprt->tsh_size = 0; - xprt->stream = 0; xprt->nocong = 0; xprt->cwnd = RPC_INITCWND; xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0; @@ -1115,7 +1124,6 @@ int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to) xprt->prot = IPPROTO_TCP; xprt->port = XS_MAX_RESVPORT; xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32); - xprt->stream = 1; xprt->nocong = 1; xprt->cwnd = RPC_MAXCWND(xprt); xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0; -- cgit v1.2.3 From fe3aca290f17ae4978bd73d02aa4029f1c9c024c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 25 Aug 2005 16:25:50 -0700 Subject: [PATCH] RPC: add API to set transport-specific timeouts Prepare the way to remove the "xprt->nocong" variable by adding a callout to the RPC client transport switch API to handle setting RPC retransmit timeouts. Add a pair of generic helper functions that provide the ability to set a simple fixed timeout, or to set a timeout based on the state of a round- trip estimator. Test-plan: Use WAN simulation to cause sporadic bursty packet loss. Look for significant regression in performance or client stability. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 3 ++ net/sunrpc/xprt.c | 67 ++++++++++++++++++++++++++++++--------------- net/sunrpc/xprtsock.c | 2 ++ 3 files changed, 50 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index c9477f022efb..ac08e99a81cb 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -134,6 +134,7 @@ struct rpc_xprt_ops { void (*set_buffer_size)(struct rpc_xprt *xprt); void (*connect)(struct rpc_task *task); int (*send_request)(struct rpc_task *task); + void (*set_retrans_timeout)(struct rpc_task *task); void (*close)(struct rpc_xprt *xprt); void (*destroy)(struct rpc_xprt *xprt); }; @@ -245,6 +246,8 @@ static inline u32 *xprt_skip_transport_header(struct rpc_xprt *xprt, u32 *p) /* * Transport switch helper functions */ +void xprt_set_retrans_timeout_def(struct rpc_task *task); +void xprt_set_retrans_timeout_rtt(struct rpc_task *task); void xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status); void xprt_wait_for_buffer_space(struct rpc_task *task); void xprt_write_space(struct rpc_xprt *xprt); diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 43fef7626442..1ac2fbe05102 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -275,6 +275,38 @@ void xprt_write_space(struct rpc_xprt *xprt) spin_unlock_bh(&xprt->transport_lock); } +/** + * xprt_set_retrans_timeout_def - set a request's retransmit timeout + * @task: task whose timeout is to be set + * + * Set a request's retransmit timeout based on the transport's + * default timeout parameters. Used by transports that don't adjust + * the retransmit timeout based on round-trip time estimation. + */ +void xprt_set_retrans_timeout_def(struct rpc_task *task) +{ + task->tk_timeout = task->tk_rqstp->rq_timeout; +} + +/* + * xprt_set_retrans_timeout_rtt - set a request's retransmit timeout + * @task: task whose timeout is to be set + * + * Set a request's retransmit timeout using the RTT estimator. + */ +void xprt_set_retrans_timeout_rtt(struct rpc_task *task) +{ + int timer = task->tk_msg.rpc_proc->p_timer; + struct rpc_rtt *rtt = task->tk_client->cl_rtt; + struct rpc_rqst *req = task->tk_rqstp; + unsigned long max_timeout = req->rq_xprt->timeout.to_maxval; + + task->tk_timeout = rpc_calc_rto(rtt, timer); + task->tk_timeout <<= rpc_ntimeo(rtt, timer) + req->rq_retries; + if (task->tk_timeout > max_timeout || task->tk_timeout == 0) + task->tk_timeout = max_timeout; +} + static void xprt_reset_majortimeo(struct rpc_rqst *req) { struct rpc_timeout *to = &req->rq_xprt->timeout; @@ -588,7 +620,6 @@ out_unlock: */ void xprt_transmit(struct rpc_task *task) { - struct rpc_clnt *clnt = task->tk_client; struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; int status; @@ -613,8 +644,19 @@ void xprt_transmit(struct rpc_task *task) return; status = xprt->ops->send_request(task); - if (!status) - goto out_receive; + if (status == 0) { + dprintk("RPC: %4d xmit complete\n", task->tk_pid); + spin_lock_bh(&xprt->transport_lock); + xprt->ops->set_retrans_timeout(task); + /* Don't race with disconnect */ + if (!xprt_connected(xprt)) + task->tk_status = -ENOTCONN; + else if (!req->rq_received) + rpc_sleep_on(&xprt->pending, task, NULL, xprt_timer); + __xprt_release_write(xprt, task); + spin_unlock_bh(&xprt->transport_lock); + return; + } /* Note: at this point, task->tk_sleeping has not yet been set, * hence there is no danger of the waking up task being put on @@ -634,25 +676,6 @@ void xprt_transmit(struct rpc_task *task) } xprt_release_write(xprt, task); return; - out_receive: - dprintk("RPC: %4d xmit complete\n", task->tk_pid); - /* Set the task's receive timeout value */ - spin_lock_bh(&xprt->transport_lock); - if (!xprt->nocong) { - int timer = task->tk_msg.rpc_proc->p_timer; - task->tk_timeout = rpc_calc_rto(clnt->cl_rtt, timer); - task->tk_timeout <<= rpc_ntimeo(clnt->cl_rtt, timer) + req->rq_retries; - if (task->tk_timeout > xprt->timeout.to_maxval || task->tk_timeout == 0) - task->tk_timeout = xprt->timeout.to_maxval; - } else - task->tk_timeout = req->rq_timeout; - /* Don't race with disconnect */ - if (!xprt_connected(xprt)) - task->tk_status = -ENOTCONN; - else if (!req->rq_received) - rpc_sleep_on(&xprt->pending, task, NULL, xprt_timer); - __xprt_release_write(xprt, task); - spin_unlock_bh(&xprt->transport_lock); } static inline void do_xprt_reserve(struct rpc_task *task) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 5bb6fed3df34..79433ffd1df0 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1047,6 +1047,7 @@ static struct rpc_xprt_ops xs_udp_ops = { .set_buffer_size = xs_udp_set_buffer_size, .connect = xs_connect, .send_request = xs_udp_send_request, + .set_retrans_timeout = xprt_set_retrans_timeout_rtt, .close = xs_close, .destroy = xs_destroy, }; @@ -1055,6 +1056,7 @@ static struct rpc_xprt_ops xs_tcp_ops = { .set_buffer_size = xs_tcp_set_buffer_size, .connect = xs_connect, .send_request = xs_tcp_send_request, + .set_retrans_timeout = xprt_set_retrans_timeout_def, .close = xs_close, .destroy = xs_destroy, }; -- cgit v1.2.3 From 12a804698b29d040b7cdd92e8a44b0e75164dae9 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 25 Aug 2005 16:25:51 -0700 Subject: [PATCH] RPC: expose API for serializing access to RPC transports The next several patches introduce an API that allows transports to choose whether the RPC client provides congestion control or whether the transport itself provides it. The first method we abstract is the one that serializes access to the RPC transport to prevent the bytes from different requests from mingling together. This method provides proper request serialization and the opportunity to prevent new requests from being started because the transport is congested. The normal situation is for the transport to handle congestion control itself. Although NFS over UDP was first, it has been recognized after years of experience that having the transport provide congestion control is much better than doing it in the RPC client. Thus TCP, and probably every future transport implementation, will use the default method, xprt_lock_write, provided in xprt.c, which does not provide any kind of congestion control. UDP can continue using the xprt.c-provided Van Jacobson congestion avoidance implementation. Test-plan: Use WAN simulation to cause sporadic bursty packet loss. Look for significant regression in performance or client stability. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 3 +++ net/sunrpc/xprt.c | 64 ++++++++++++++++++++++++++++++++++++--------- net/sunrpc/xprtsock.c | 2 ++ 3 files changed, 57 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index ac08e99a81cb..eee1c6877851 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -132,6 +132,7 @@ struct rpc_xprt; struct rpc_xprt_ops { void (*set_buffer_size)(struct rpc_xprt *xprt); + int (*reserve_xprt)(struct rpc_task *task); void (*connect)(struct rpc_task *task); int (*send_request)(struct rpc_task *task); void (*set_retrans_timeout)(struct rpc_task *task); @@ -232,6 +233,8 @@ void xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long */ void xprt_connect(struct rpc_task *task); void xprt_reserve(struct rpc_task *task); +int xprt_reserve_xprt(struct rpc_task *task); +int xprt_reserve_xprt_cong(struct rpc_task *task); int xprt_prepare_transmit(struct rpc_task *task); void xprt_transmit(struct rpc_task *task); int xprt_adjust_timeout(struct rpc_rqst *req); diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 1ac2fbe05102..2d1e8b83dd68 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -64,14 +64,56 @@ static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *); static int xprt_clear_backlog(struct rpc_xprt *xprt); +/** + * xprt_reserve_xprt - serialize write access to transports + * @task: task that is requesting access to the transport + * + * This prevents mixing the payload of separate requests, and prevents + * transport connects from colliding with writes. No congestion control + * is provided. + */ +int xprt_reserve_xprt(struct rpc_task *task) +{ + struct rpc_xprt *xprt = task->tk_xprt; + struct rpc_rqst *req = task->tk_rqstp; + + if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) { + if (task == xprt->snd_task) + return 1; + if (task == NULL) + return 0; + goto out_sleep; + } + xprt->snd_task = task; + if (req) { + req->rq_bytes_sent = 0; + req->rq_ntrans++; + } + return 1; + +out_sleep: + dprintk("RPC: %4d failed to lock transport %p\n", + task->tk_pid, xprt); + task->tk_timeout = 0; + task->tk_status = -EAGAIN; + if (req && req->rq_ntrans) + rpc_sleep_on(&xprt->resend, task, NULL, NULL); + else + rpc_sleep_on(&xprt->sending, task, NULL, NULL); + return 0; +} + /* - * Serialize write access to transports, in order to prevent different - * requests from interfering with each other. - * Also prevents transport connects from colliding with writes. + * xprt_reserve_xprt_cong - serialize write access to transports + * @task: task that is requesting access to the transport + * + * Same as xprt_reserve_xprt, but Van Jacobson congestion control is + * integrated into the decision of whether a request is allowed to be + * woken up and given access to the transport. */ -static int -__xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task) +int xprt_reserve_xprt_cong(struct rpc_task *task) { + struct rpc_xprt *xprt = task->tk_xprt; struct rpc_rqst *req = task->tk_rqstp; if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) { @@ -79,7 +121,7 @@ __xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task) return 1; goto out_sleep; } - if (xprt->nocong || __xprt_get_cong(xprt, task)) { + if (__xprt_get_cong(xprt, task)) { xprt->snd_task = task; if (req) { req->rq_bytes_sent = 0; @@ -101,20 +143,18 @@ out_sleep: return 0; } -static inline int -xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task) +static inline int xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task) { int retval; spin_lock_bh(&xprt->transport_lock); - retval = __xprt_lock_write(xprt, task); + retval = xprt->ops->reserve_xprt(task); spin_unlock_bh(&xprt->transport_lock); return retval; } -static void -__xprt_lock_write_next(struct rpc_xprt *xprt) +static void __xprt_lock_write_next(struct rpc_xprt *xprt) { struct rpc_task *task; @@ -598,7 +638,7 @@ int xprt_prepare_transmit(struct rpc_task *task) err = req->rq_received; goto out_unlock; } - if (!__xprt_lock_write(xprt, task)) { + if (!xprt->ops->reserve_xprt(task)) { err = -EAGAIN; goto out_unlock; } diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 79433ffd1df0..fc4fbe8ea346 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1045,6 +1045,7 @@ static void xs_connect(struct rpc_task *task) static struct rpc_xprt_ops xs_udp_ops = { .set_buffer_size = xs_udp_set_buffer_size, + .reserve_xprt = xprt_reserve_xprt_cong, .connect = xs_connect, .send_request = xs_udp_send_request, .set_retrans_timeout = xprt_set_retrans_timeout_rtt, @@ -1054,6 +1055,7 @@ static struct rpc_xprt_ops xs_udp_ops = { static struct rpc_xprt_ops xs_tcp_ops = { .set_buffer_size = xs_tcp_set_buffer_size, + .reserve_xprt = xprt_reserve_xprt, .connect = xs_connect, .send_request = xs_tcp_send_request, .set_retrans_timeout = xprt_set_retrans_timeout_def, -- cgit v1.2.3 From 49e9a89086b3cae784a4868ca852863e4f4ea3fe Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 25 Aug 2005 16:25:51 -0700 Subject: [PATCH] RPC: expose API for serializing access to RPC transports The next method we abstract is the one that releases a transport, allowing another task to have access to the transport. Again, one generic version of this is provided for transports that don't need the RPC client to perform congestion control, and one version is for transports that can use the original Van Jacobson implementation in xprt.c. Test-plan: Use WAN simulation to cause sporadic bursty packet loss. Look for significant regression in performance or client stability. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 3 ++ net/sunrpc/xprt.c | 77 ++++++++++++++++++++++++++++++++++++--------- net/sunrpc/xprtsock.c | 2 ++ 3 files changed, 68 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index eee1c6877851..86833b725bb5 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -133,6 +133,7 @@ struct rpc_xprt; struct rpc_xprt_ops { void (*set_buffer_size)(struct rpc_xprt *xprt); int (*reserve_xprt)(struct rpc_task *task); + void (*release_xprt)(struct rpc_xprt *xprt, struct rpc_task *task); void (*connect)(struct rpc_task *task); int (*send_request)(struct rpc_task *task); void (*set_retrans_timeout)(struct rpc_task *task); @@ -238,6 +239,8 @@ int xprt_reserve_xprt_cong(struct rpc_task *task); int xprt_prepare_transmit(struct rpc_task *task); void xprt_transmit(struct rpc_task *task); int xprt_adjust_timeout(struct rpc_rqst *req); +void xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task); +void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task); void xprt_release(struct rpc_task *task); int xprt_destroy(struct rpc_xprt *xprt); diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 2d1e8b83dd68..e92ea99dd318 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -153,14 +153,42 @@ static inline int xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task) return retval; } - static void __xprt_lock_write_next(struct rpc_xprt *xprt) +{ + struct rpc_task *task; + struct rpc_rqst *req; + + if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) + return; + + task = rpc_wake_up_next(&xprt->resend); + if (!task) { + task = rpc_wake_up_next(&xprt->sending); + if (!task) + goto out_unlock; + } + + req = task->tk_rqstp; + xprt->snd_task = task; + if (req) { + req->rq_bytes_sent = 0; + req->rq_ntrans++; + } + return; + +out_unlock: + smp_mb__before_clear_bit(); + clear_bit(XPRT_LOCKED, &xprt->state); + smp_mb__after_clear_bit(); +} + +static void __xprt_lock_write_next_cong(struct rpc_xprt *xprt) { struct rpc_task *task; if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) return; - if (!xprt->nocong && RPCXPRT_CONGESTED(xprt)) + if (RPCXPRT_CONGESTED(xprt)) goto out_unlock; task = rpc_wake_up_next(&xprt->resend); if (!task) { @@ -168,7 +196,7 @@ static void __xprt_lock_write_next(struct rpc_xprt *xprt) if (!task) goto out_unlock; } - if (xprt->nocong || __xprt_get_cong(xprt, task)) { + if (__xprt_get_cong(xprt, task)) { struct rpc_rqst *req = task->tk_rqstp; xprt->snd_task = task; if (req) { @@ -183,11 +211,14 @@ out_unlock: smp_mb__after_clear_bit(); } -/* - * Releases the transport for use by other requests. +/** + * xprt_release_xprt - allow other requests to use a transport + * @xprt: transport with other tasks potentially waiting + * @task: task that is releasing access to the transport + * + * Note that "task" can be NULL. No congestion control is provided. */ -static void -__xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task) +void xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task) { if (xprt->snd_task == task) { xprt->snd_task = NULL; @@ -198,11 +229,29 @@ __xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task) } } -static inline void -xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task) +/** + * xprt_release_xprt_cong - allow other requests to use a transport + * @xprt: transport with other tasks potentially waiting + * @task: task that is releasing access to the transport + * + * Note that "task" can be NULL. Another task is awoken to use the + * transport if the transport's congestion window allows it. + */ +void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task) +{ + if (xprt->snd_task == task) { + xprt->snd_task = NULL; + smp_mb__before_clear_bit(); + clear_bit(XPRT_LOCKED, &xprt->state); + smp_mb__after_clear_bit(); + __xprt_lock_write_next_cong(xprt); + } +} + +static inline void xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task) { spin_lock_bh(&xprt->transport_lock); - __xprt_release_write(xprt, task); + xprt->ops->release_xprt(xprt, task); spin_unlock_bh(&xprt->transport_lock); } @@ -237,7 +286,7 @@ __xprt_put_cong(struct rpc_xprt *xprt, struct rpc_rqst *req) return; req->rq_cong = 0; xprt->cong -= RPC_CWNDSCALE; - __xprt_lock_write_next(xprt); + __xprt_lock_write_next_cong(xprt); } /* @@ -256,7 +305,7 @@ xprt_adjust_cwnd(struct rpc_xprt *xprt, int result) cwnd += (RPC_CWNDSCALE * RPC_CWNDSCALE + (cwnd >> 1)) / cwnd; if (cwnd > RPC_MAXCWND(xprt)) cwnd = RPC_MAXCWND(xprt); - __xprt_lock_write_next(xprt); + __xprt_lock_write_next_cong(xprt); } else if (result == -ETIMEDOUT) { cwnd >>= 1; if (cwnd < RPC_CWNDSCALE) @@ -693,7 +742,7 @@ void xprt_transmit(struct rpc_task *task) task->tk_status = -ENOTCONN; else if (!req->rq_received) rpc_sleep_on(&xprt->pending, task, NULL, xprt_timer); - __xprt_release_write(xprt, task); + xprt->ops->release_xprt(xprt, task); spin_unlock_bh(&xprt->transport_lock); return; } @@ -792,7 +841,7 @@ void xprt_release(struct rpc_task *task) if (!(req = task->tk_rqstp)) return; spin_lock_bh(&xprt->transport_lock); - __xprt_release_write(xprt, task); + xprt->ops->release_xprt(xprt, task); __xprt_put_cong(xprt, req); if (!list_empty(&req->rq_list)) list_del(&req->rq_list); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index fc4fbe8ea346..8589c1ad55e3 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1046,6 +1046,7 @@ static void xs_connect(struct rpc_task *task) static struct rpc_xprt_ops xs_udp_ops = { .set_buffer_size = xs_udp_set_buffer_size, .reserve_xprt = xprt_reserve_xprt_cong, + .release_xprt = xprt_release_xprt_cong, .connect = xs_connect, .send_request = xs_udp_send_request, .set_retrans_timeout = xprt_set_retrans_timeout_rtt, @@ -1056,6 +1057,7 @@ static struct rpc_xprt_ops xs_udp_ops = { static struct rpc_xprt_ops xs_tcp_ops = { .set_buffer_size = xs_tcp_set_buffer_size, .reserve_xprt = xprt_reserve_xprt, + .release_xprt = xprt_release_xprt, .connect = xs_connect, .send_request = xs_tcp_send_request, .set_retrans_timeout = xprt_set_retrans_timeout_def, -- cgit v1.2.3 From 46c0ee8bc4ad3743de05e8b8b20201df44dcb6d3 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 25 Aug 2005 16:25:52 -0700 Subject: [PATCH] RPC: separate xprt_timer implementations Allow transports to hook the retransmit timer interrupt. Some transports calculate their congestion window here so that a retransmit timeout has immediate effect on the congestion window. Test-plan: Use WAN simulation to cause sporadic bursty packet loss. Look for significant regression in performance or client stability. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 2 ++ net/sunrpc/xprt.c | 45 ++++++++++++++++++++------------------------- net/sunrpc/xprtsock.c | 12 ++++++++++++ 3 files changed, 34 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 86833b725bb5..443c3f984cf9 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -137,6 +137,7 @@ struct rpc_xprt_ops { void (*connect)(struct rpc_task *task); int (*send_request)(struct rpc_task *task); void (*set_retrans_timeout)(struct rpc_task *task); + void (*timer)(struct rpc_task *task); void (*close)(struct rpc_xprt *xprt); void (*destroy)(struct rpc_xprt *xprt); }; @@ -257,6 +258,7 @@ void xprt_set_retrans_timeout_rtt(struct rpc_task *task); void xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status); void xprt_wait_for_buffer_space(struct rpc_task *task); void xprt_write_space(struct rpc_xprt *xprt); +void xprt_adjust_cwnd(struct rpc_task *task, int result); struct rpc_rqst * xprt_lookup_rqst(struct rpc_xprt *xprt, u32 xid); void xprt_complete_rqst(struct rpc_xprt *xprt, struct rpc_rqst *req, int copied); void xprt_disconnect(struct rpc_xprt *xprt); diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index e92ea99dd318..ffc595592af3 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -289,16 +289,19 @@ __xprt_put_cong(struct rpc_xprt *xprt, struct rpc_rqst *req) __xprt_lock_write_next_cong(xprt); } -/* - * Adjust RPC congestion window +/** + * xprt_adjust_cwnd - adjust transport congestion window + * @task: recently completed RPC request used to adjust window + * @result: result code of completed RPC request + * * We use a time-smoothed congestion estimator to avoid heavy oscillation. */ -static void -xprt_adjust_cwnd(struct rpc_xprt *xprt, int result) +void xprt_adjust_cwnd(struct rpc_task *task, int result) { - unsigned long cwnd; + struct rpc_rqst *req = task->tk_rqstp; + struct rpc_xprt *xprt = task->tk_xprt; + unsigned long cwnd = xprt->cwnd; - cwnd = xprt->cwnd; if (result >= 0 && cwnd <= xprt->cong) { /* The (cwnd >> 1) term makes sure * the result gets rounded properly. */ @@ -314,6 +317,7 @@ xprt_adjust_cwnd(struct rpc_xprt *xprt, int result) dprintk("RPC: cong %ld, cwnd was %ld, now %ld\n", xprt->cong, xprt->cwnd, cwnd); xprt->cwnd = cwnd; + __xprt_put_cong(xprt, req); } /** @@ -602,8 +606,7 @@ void xprt_complete_rqst(struct rpc_xprt *xprt, struct rpc_rqst *req, int copied) /* Adjust congestion window */ if (!xprt->nocong) { unsigned timer = task->tk_msg.rpc_proc->p_timer; - xprt_adjust_cwnd(xprt, copied); - __xprt_put_cong(xprt, req); + xprt_adjust_cwnd(task, copied); if (timer) { if (req->rq_ntrans == 1) rpc_update_rtt(clnt->cl_rtt, timer, @@ -640,27 +643,19 @@ void xprt_complete_rqst(struct rpc_xprt *xprt, struct rpc_rqst *req, int copied) return; } -/* - * RPC receive timeout handler. - */ -static void -xprt_timer(struct rpc_task *task) +static void xprt_timer(struct rpc_task *task) { - struct rpc_rqst *req = task->tk_rqstp; + struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; - spin_lock(&xprt->transport_lock); - if (req->rq_received) - goto out; - - xprt_adjust_cwnd(req->rq_xprt, -ETIMEDOUT); - __xprt_put_cong(xprt, req); + dprintk("RPC: %4d xprt_timer\n", task->tk_pid); - dprintk("RPC: %4d xprt_timer (%s request)\n", - task->tk_pid, req ? "pending" : "backlogged"); - - task->tk_status = -ETIMEDOUT; -out: + spin_lock(&xprt->transport_lock); + if (!req->rq_received) { + if (xprt->ops->timer) + xprt->ops->timer(task); + task->tk_status = -ETIMEDOUT; + } task->tk_timeout = 0; rpc_wake_up_task(task); spin_unlock(&xprt->transport_lock); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 8589c1ad55e3..c3658ff027a6 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -860,6 +860,17 @@ static void xs_tcp_set_buffer_size(struct rpc_xprt *xprt) return; } +/** + * xs_udp_timer - called when a retransmit timeout occurs on a UDP transport + * @task: task that timed out + * + * Adjust the congestion window after a retransmit timeout has occurred. + */ +static void xs_udp_timer(struct rpc_task *task) +{ + xprt_adjust_cwnd(task, -ETIMEDOUT); +} + static int xs_bindresvport(struct rpc_xprt *xprt, struct socket *sock) { struct sockaddr_in myaddr = { @@ -1050,6 +1061,7 @@ static struct rpc_xprt_ops xs_udp_ops = { .connect = xs_connect, .send_request = xs_udp_send_request, .set_retrans_timeout = xprt_set_retrans_timeout_rtt, + .timer = xs_udp_timer, .close = xs_close, .destroy = xs_destroy, }; -- cgit v1.2.3 From 1570c1e41eabf6b7031f3e4322a2cf1cbe319fee Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 25 Aug 2005 16:25:52 -0700 Subject: [PATCH] RPC: add generic interface for adjusting the congestion window A new interface that allows transports to adjust their congestion window using the Van Jacobson implementation in xprt.c is provided. Test-plan: Use WAN simulation to cause sporadic bursty packet loss. Look for significant regression in performance or client stability. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 3 ++- net/sunrpc/xprt.c | 66 ++++++++++++++++++--------------------------- net/sunrpc/xprtsock.c | 13 ++++----- 3 files changed, 33 insertions(+), 49 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 443c3f984cf9..2e48752d55d9 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -258,9 +258,10 @@ void xprt_set_retrans_timeout_rtt(struct rpc_task *task); void xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status); void xprt_wait_for_buffer_space(struct rpc_task *task); void xprt_write_space(struct rpc_xprt *xprt); +void xprt_update_rtt(struct rpc_task *task); void xprt_adjust_cwnd(struct rpc_task *task, int result); struct rpc_rqst * xprt_lookup_rqst(struct rpc_xprt *xprt, u32 xid); -void xprt_complete_rqst(struct rpc_xprt *xprt, struct rpc_rqst *req, int copied); +void xprt_complete_rqst(struct rpc_task *task, int copied); void xprt_disconnect(struct rpc_xprt *xprt); /* diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index ffc595592af3..707806fe1a23 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -591,56 +591,42 @@ struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, u32 xid) return req; } +/** + * xprt_update_rtt - update an RPC client's RTT state after receiving a reply + * @task: RPC request that recently completed + * + */ +void xprt_update_rtt(struct rpc_task *task) +{ + struct rpc_rqst *req = task->tk_rqstp; + struct rpc_rtt *rtt = task->tk_client->cl_rtt; + unsigned timer = task->tk_msg.rpc_proc->p_timer; + + if (timer) { + if (req->rq_ntrans == 1) + rpc_update_rtt(rtt, timer, + (long)jiffies - req->rq_xtime); + rpc_set_timeo(rtt, timer, req->rq_ntrans - 1); + } +} + /** * xprt_complete_rqst - called when reply processing is complete - * @xprt: controlling transport - * @req: RPC request that just completed + * @task: RPC request that recently completed * @copied: actual number of bytes received from the transport * + * Caller holds transport lock. */ -void xprt_complete_rqst(struct rpc_xprt *xprt, struct rpc_rqst *req, int copied) -{ - struct rpc_task *task = req->rq_task; - struct rpc_clnt *clnt = task->tk_client; - - /* Adjust congestion window */ - if (!xprt->nocong) { - unsigned timer = task->tk_msg.rpc_proc->p_timer; - xprt_adjust_cwnd(task, copied); - if (timer) { - if (req->rq_ntrans == 1) - rpc_update_rtt(clnt->cl_rtt, timer, - (long)jiffies - req->rq_xtime); - rpc_set_timeo(clnt->cl_rtt, timer, req->rq_ntrans - 1); - } - } +void xprt_complete_rqst(struct rpc_task *task, int copied) +{ + struct rpc_rqst *req = task->tk_rqstp; -#ifdef RPC_PROFILE - /* Profile only reads for now */ - if (copied > 1024) { - static unsigned long nextstat; - static unsigned long pkt_rtt, pkt_len, pkt_cnt; - - pkt_cnt++; - pkt_len += req->rq_slen + copied; - pkt_rtt += jiffies - req->rq_xtime; - if (time_before(nextstat, jiffies)) { - printk("RPC: %lu %ld cwnd\n", jiffies, xprt->cwnd); - printk("RPC: %ld %ld %ld %ld stat\n", - jiffies, pkt_cnt, pkt_len, pkt_rtt); - pkt_rtt = pkt_len = pkt_cnt = 0; - nextstat = jiffies + 5 * HZ; - } - } -#endif + dprintk("RPC: %5u xid %08x complete (%d bytes received)\n", + task->tk_pid, ntohl(req->rq_xid), copied); - dprintk("RPC: %4d has input (%d bytes)\n", task->tk_pid, copied); list_del_init(&req->rq_list); req->rq_received = req->rq_private_buf.len = copied; - - /* ... and wake up the process. */ rpc_wake_up_task(task); - return; } static void xprt_timer(struct rpc_task *task) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index c3658ff027a6..980f26504f48 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -460,8 +460,6 @@ static void xs_udp_data_ready(struct sock *sk, int len) goto out_unlock; task = rovr->rq_task; - dprintk("RPC: %4d received reply\n", task->tk_pid); - if ((copied = rovr->rq_private_buf.buflen) > repsize) copied = repsize; @@ -472,7 +470,9 @@ static void xs_udp_data_ready(struct sock *sk, int len) /* Something worked... */ dst_confirm(skb->dst); - xprt_complete_rqst(xprt, rovr, copied); + xprt_adjust_cwnd(task, copied); + xprt_update_rtt(task); + xprt_complete_rqst(task, copied); out_unlock: spin_unlock(&xprt->transport_lock); @@ -634,11 +634,8 @@ static inline void xs_tcp_read_request(struct rpc_xprt *xprt, skb_reader_t *desc } out: - if (!(xprt->tcp_flags & XPRT_COPY_DATA)) { - dprintk("RPC: %4d received reply complete\n", - req->rq_task->tk_pid); - xprt_complete_rqst(xprt, req, xprt->tcp_copied); - } + if (!(xprt->tcp_flags & XPRT_COPY_DATA)) + xprt_complete_rqst(req->rq_task, xprt->tcp_copied); spin_unlock(&xprt->transport_lock); xs_tcp_check_recm(xprt); } -- cgit v1.2.3 From a58dd398f5db4f73d5c581069fd70a4304cc4f0a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 25 Aug 2005 16:25:53 -0700 Subject: [PATCH] RPC: add a release_rqst callout to the RPC transport switch The final place where congestion control state is adjusted is in xprt_release, where each request is finally released. Add a callout there to allow transports to perform additional processing when a request is about to be released. Test-plan: Use WAN simulation to cause sporadic bursty packet loss. Look for significant regression in performance or client stability. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 2 ++ net/sunrpc/xprt.c | 14 +++++++++++++- net/sunrpc/xprtsock.c | 1 + 3 files changed, 16 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 2e48752d55d9..64e77658fa30 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -138,6 +138,7 @@ struct rpc_xprt_ops { int (*send_request)(struct rpc_task *task); void (*set_retrans_timeout)(struct rpc_task *task); void (*timer)(struct rpc_task *task); + void (*release_request)(struct rpc_task *task); void (*close)(struct rpc_xprt *xprt); void (*destroy)(struct rpc_xprt *xprt); }; @@ -262,6 +263,7 @@ void xprt_update_rtt(struct rpc_task *task); void xprt_adjust_cwnd(struct rpc_task *task, int result); struct rpc_rqst * xprt_lookup_rqst(struct rpc_xprt *xprt, u32 xid); void xprt_complete_rqst(struct rpc_task *task, int copied); +void xprt_release_rqst_cong(struct rpc_task *task); void xprt_disconnect(struct rpc_xprt *xprt); /* diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 707806fe1a23..e8d11bd6158e 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -289,6 +289,17 @@ __xprt_put_cong(struct rpc_xprt *xprt, struct rpc_rqst *req) __xprt_lock_write_next_cong(xprt); } +/** + * xprt_release_rqst_cong - housekeeping when request is complete + * @task: RPC request that recently completed + * + * Useful for transports that require congestion control. + */ +void xprt_release_rqst_cong(struct rpc_task *task) +{ + __xprt_put_cong(task->tk_xprt, task->tk_rqstp); +} + /** * xprt_adjust_cwnd - adjust transport congestion window * @task: recently completed RPC request used to adjust window @@ -823,7 +834,8 @@ void xprt_release(struct rpc_task *task) return; spin_lock_bh(&xprt->transport_lock); xprt->ops->release_xprt(xprt, task); - __xprt_put_cong(xprt, req); + if (xprt->ops->release_request) + xprt->ops->release_request(task); if (!list_empty(&req->rq_list)) list_del(&req->rq_list); xprt->last_used = jiffies; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 980f26504f48..6c2f5dcea416 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1059,6 +1059,7 @@ static struct rpc_xprt_ops xs_udp_ops = { .send_request = xs_udp_send_request, .set_retrans_timeout = xprt_set_retrans_timeout_rtt, .timer = xs_udp_timer, + .release_request = xprt_release_rqst_cong, .close = xs_close, .destroy = xs_destroy, }; -- cgit v1.2.3 From ed63c003701a314c4893c11eceb9d68f8f46c662 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 25 Aug 2005 16:25:53 -0700 Subject: [PATCH] RPC: remove xprt->nocong Get rid of the "xprt->nocong" variable. Test-plan: Use WAN simulation to cause sporadic bursty packet loss with UDP mounts. Look for significant regression in performance or client stability. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/lockd/host.c | 1 - include/linux/sunrpc/xprt.h | 1 - net/sunrpc/xprtsock.c | 2 -- 3 files changed, 4 deletions(-) (limited to 'include/linux') diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 7901f5b8092c..c4c8601096e0 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -188,7 +188,6 @@ nlm_bind_host(struct nlm_host *host) goto forgetit; xprt_set_timeout(&xprt->timeout, 5, nlmsvc_timeout); - xprt->nocong = 1; /* No congestion control for NLM */ xprt->resvport = 1; /* NLM requires a reserved port */ /* Existing NLM servers accept AUTH_UNIX only */ diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 64e77658fa30..559fb471f6f2 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -172,7 +172,6 @@ struct rpc_xprt { unsigned int max_reqs; /* total slots */ unsigned long state; /* transport state */ unsigned char shutdown : 1, /* being shut down */ - nocong : 1, /* no congestion control */ resvport : 1; /* use a reserved port */ /* diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 6c2f5dcea416..7e5e020fe78d 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1100,7 +1100,6 @@ int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to) xprt->prot = IPPROTO_UDP; xprt->port = XS_MAX_RESVPORT; xprt->tsh_size = 0; - xprt->nocong = 0; xprt->cwnd = RPC_INITCWND; xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0; /* XXX: header size can vary due to auth type, IPv6, etc. */ @@ -1140,7 +1139,6 @@ int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to) xprt->prot = IPPROTO_TCP; xprt->port = XS_MAX_RESVPORT; xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32); - xprt->nocong = 1; xprt->cwnd = RPC_MAXCWND(xprt); xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0; xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; -- cgit v1.2.3 From 555ee3af161b037865793bd4bebc06b58daafde6 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 25 Aug 2005 16:25:54 -0700 Subject: [PATCH] RPC: clean up after nocong was removed Clean-up: Move some macros that are specific to the Van Jacobson implementation into xprt.c. Get rid of the cong_wait field in rpc_xprt, which is no longer used. Get rid of xprt_clear_backlog. Test-plan: Compile with CONFIG_NFS enabled. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 22 ---------------------- net/sunrpc/xprt.c | 29 +++++++++++++++++++---------- net/sunrpc/xprtsock.c | 2 -- 3 files changed, 19 insertions(+), 34 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 559fb471f6f2..dcf0326bda01 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -15,20 +15,6 @@ #include #include -/* - * The transport code maintains an estimate on the maximum number of out- - * standing RPC requests, using a smoothed version of the congestion - * avoidance implemented in 44BSD. This is basically the Van Jacobson - * congestion algorithm: If a retransmit occurs, the congestion window is - * halved; otherwise, it is incremented by 1/cwnd when - * - * - a reply is received and - * - a full number of requests are outstanding and - * - the congestion window hasn't been updated recently. - * - * Upper procedures may check whether a request would block waiting for - * a free RPC slot by using the RPC_CONGESTED() macro. - */ extern unsigned int xprt_udp_slot_table_entries; extern unsigned int xprt_tcp_slot_table_entries; @@ -36,12 +22,6 @@ extern unsigned int xprt_tcp_slot_table_entries; #define RPC_DEF_SLOT_TABLE (16U) #define RPC_MAX_SLOT_TABLE (128U) -#define RPC_CWNDSHIFT (8U) -#define RPC_CWNDSCALE (1U << RPC_CWNDSHIFT) -#define RPC_INITCWND RPC_CWNDSCALE -#define RPC_MAXCWND(xprt) ((xprt)->max_reqs << RPC_CWNDSHIFT) -#define RPCXPRT_CONGESTED(xprt) ((xprt)->cong >= (xprt)->cwnd) - /* Default timeout values */ #define RPC_MAX_UDP_TIMEOUT (60*HZ) #define RPC_MAX_TCP_TIMEOUT (600*HZ) @@ -213,8 +193,6 @@ struct rpc_xprt { void (*old_data_ready)(struct sock *, int); void (*old_state_change)(struct sock *); void (*old_write_space)(struct sock *); - - wait_queue_head_t cong_wait; }; #define XPRT_LAST_FRAG (1 << 0) diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index e8d11bd6158e..0458319a1bdd 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -62,7 +62,23 @@ static inline void do_xprt_reserve(struct rpc_task *); static void xprt_connect_status(struct rpc_task *task); static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *); -static int xprt_clear_backlog(struct rpc_xprt *xprt); +/* + * The transport code maintains an estimate on the maximum number of out- + * standing RPC requests, using a smoothed version of the congestion + * avoidance implemented in 44BSD. This is basically the Van Jacobson + * congestion algorithm: If a retransmit occurs, the congestion window is + * halved; otherwise, it is incremented by 1/cwnd when + * + * - a reply is received and + * - a full number of requests are outstanding and + * - the congestion window hasn't been updated recently. + */ +#define RPC_CWNDSHIFT (8U) +#define RPC_CWNDSCALE (1U << RPC_CWNDSHIFT) +#define RPC_INITCWND RPC_CWNDSCALE +#define RPC_MAXCWND(xprt) ((xprt)->max_reqs << RPC_CWNDSHIFT) + +#define RPCXPRT_CONGESTED(xprt) ((xprt)->cong >= (xprt)->cwnd) /** * xprt_reserve_xprt - serialize write access to transports @@ -850,7 +866,7 @@ void xprt_release(struct rpc_task *task) spin_lock(&xprt->reserve_lock); list_add(&req->rq_list, &xprt->free); - xprt_clear_backlog(xprt); + rpc_wake_up_next(&xprt->backlog); spin_unlock(&xprt->reserve_lock); } @@ -902,7 +918,6 @@ static struct rpc_xprt *xprt_setup(int proto, struct sockaddr_in *ap, struct rpc spin_lock_init(&xprt->transport_lock); spin_lock_init(&xprt->reserve_lock); - init_waitqueue_head(&xprt->cong_wait); INIT_LIST_HEAD(&xprt->free); INIT_LIST_HEAD(&xprt->recv); @@ -911,6 +926,7 @@ static struct rpc_xprt *xprt_setup(int proto, struct sockaddr_in *ap, struct rpc xprt->timer.function = xprt_init_autodisconnect; xprt->timer.data = (unsigned long) xprt; xprt->last_used = jiffies; + xprt->cwnd = RPC_INITCWND; rpc_init_wait_queue(&xprt->pending, "xprt_pending"); rpc_init_wait_queue(&xprt->sending, "xprt_sending"); @@ -955,16 +971,9 @@ static void xprt_shutdown(struct rpc_xprt *xprt) rpc_wake_up(&xprt->resend); xprt_wake_pending_tasks(xprt, -EIO); rpc_wake_up(&xprt->backlog); - wake_up(&xprt->cong_wait); del_timer_sync(&xprt->timer); } -static int xprt_clear_backlog(struct rpc_xprt *xprt) { - rpc_wake_up_next(&xprt->backlog); - wake_up(&xprt->cong_wait); - return 1; -} - /** * xprt_destroy - destroy an RPC transport, killing off all requests. * @xprt: transport to destroy diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 7e5e020fe78d..26402c063f00 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1100,7 +1100,6 @@ int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to) xprt->prot = IPPROTO_UDP; xprt->port = XS_MAX_RESVPORT; xprt->tsh_size = 0; - xprt->cwnd = RPC_INITCWND; xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0; /* XXX: header size can vary due to auth type, IPv6, etc. */ xprt->max_payload = (1U << 16) - (MAX_HEADER << 3); @@ -1139,7 +1138,6 @@ int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to) xprt->prot = IPPROTO_TCP; xprt->port = XS_MAX_RESVPORT; xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32); - xprt->cwnd = RPC_MAXCWND(xprt); xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0; xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; -- cgit v1.2.3 From 529b33c6db0120126b1381faa51406dc463acdc9 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 25 Aug 2005 16:25:54 -0700 Subject: [PATCH] RPC: allow RPC client's port range to be adjustable Select an RPC client source port between 650 and 1023 instead of between 1 and 800. The old range conflicts with a number of network services. Provide sysctls to allow admins to select a different port range. Note that this doesn't affect user-level RPC library behavior, which still uses 1 to 800. Based on a suggestion by Olaf Kirch . Test-plan: Repeated mount and unmount. Destructive testing. Idle timeouts. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/debug.h | 2 ++ include/linux/sunrpc/xprt.h | 17 ++++++++++++++--- net/sunrpc/sysctl.c | 29 +++++++++++++++++++++++++++++ net/sunrpc/xprtsock.c | 23 ++++++++--------------- 4 files changed, 53 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/debug.h b/include/linux/sunrpc/debug.h index 42d299747956..1a42d902bc11 100644 --- a/include/linux/sunrpc/debug.h +++ b/include/linux/sunrpc/debug.h @@ -95,6 +95,8 @@ enum { CTL_NLMDEBUG, CTL_SLOTTABLE_UDP, CTL_SLOTTABLE_TCP, + CTL_MIN_RESVPORT, + CTL_MAX_RESVPORT, }; #endif /* _LINUX_SUNRPC_DEBUG_H_ */ diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index dcf0326bda01..9d9266cf8a36 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -51,6 +51,17 @@ extern unsigned int xprt_tcp_slot_table_entries; #define RPC_CALLHDRSIZE 6 #define RPC_REPHDRSIZE 4 +/* + * Parameters for choosing a free port + */ +extern unsigned int xprt_min_resvport; +extern unsigned int xprt_max_resvport; + +#define RPC_MIN_RESVPORT (1U) +#define RPC_MAX_RESVPORT (65535U) +#define RPC_DEF_MIN_RESVPORT (650U) +#define RPC_DEF_MAX_RESVPORT (1023U) + /* * This describes a timeout strategy */ @@ -62,6 +73,9 @@ struct rpc_timeout { unsigned char to_exponential; }; +struct rpc_task; +struct rpc_xprt; + /* * This describes a complete RPC request */ @@ -107,9 +121,6 @@ struct rpc_rqst { #define rq_svec rq_snd_buf.head #define rq_slen rq_snd_buf.len -struct rpc_task; -struct rpc_xprt; - struct rpc_xprt_ops { void (*set_buffer_size)(struct rpc_xprt *xprt); int (*reserve_xprt)(struct rpc_task *task); diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c index ef483262f17f..d0c9f460e411 100644 --- a/net/sunrpc/sysctl.c +++ b/net/sunrpc/sysctl.c @@ -121,9 +121,16 @@ done: unsigned int xprt_udp_slot_table_entries = RPC_DEF_SLOT_TABLE; unsigned int xprt_tcp_slot_table_entries = RPC_DEF_SLOT_TABLE; +unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT; +EXPORT_SYMBOL(xprt_min_resvport); +unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT; +EXPORT_SYMBOL(xprt_max_resvport); + static unsigned int min_slot_table_size = RPC_MIN_SLOT_TABLE; static unsigned int max_slot_table_size = RPC_MAX_SLOT_TABLE; +static unsigned int xprt_min_resvport_limit = RPC_MIN_RESVPORT; +static unsigned int xprt_max_resvport_limit = RPC_MAX_RESVPORT; static ctl_table debug_table[] = { { @@ -180,6 +187,28 @@ static ctl_table debug_table[] = { .extra1 = &min_slot_table_size, .extra2 = &max_slot_table_size }, + { + .ctl_name = CTL_MIN_RESVPORT, + .procname = "min_resvport", + .data = &xprt_min_resvport, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &xprt_min_resvport_limit, + .extra2 = &xprt_max_resvport_limit + }, + { + .ctl_name = CTL_MAX_RESVPORT, + .procname = "max_resvport", + .data = &xprt_max_resvport, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &xprt_min_resvport_limit, + .extra2 = &xprt_max_resvport_limit + }, { .ctl_name = 0 } }; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 26402c063f00..62c2e7caa345 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -35,11 +35,6 @@ #include #include -/* - * Maximum port number to use when requesting a reserved port. - */ -#define XS_MAX_RESVPORT (800U) - /* * How many times to try sending a request on a socket before waiting * for the socket buffer to clear. @@ -873,10 +868,9 @@ static int xs_bindresvport(struct rpc_xprt *xprt, struct socket *sock) struct sockaddr_in myaddr = { .sin_family = AF_INET, }; - int err, port; + int err; + unsigned short port = xprt->port; - /* Were we already bound to a given port? Try to reuse it */ - port = xprt->port; do { myaddr.sin_port = htons(port); err = sock->ops->bind(sock, (struct sockaddr *) &myaddr, @@ -887,8 +881,10 @@ static int xs_bindresvport(struct rpc_xprt *xprt, struct socket *sock) port); return 0; } - if (--port == 0) - port = XS_MAX_RESVPORT; + if (port <= xprt_min_resvport) + port = xprt_max_resvport; + else + port--; } while (err == -EADDRINUSE && port != xprt->port); dprintk("RPC: can't bind to reserved port (%d).\n", -err); @@ -1075,9 +1071,6 @@ static struct rpc_xprt_ops xs_tcp_ops = { .destroy = xs_destroy, }; -extern unsigned int xprt_udp_slot_table_entries; -extern unsigned int xprt_tcp_slot_table_entries; - /** * xs_setup_udp - Set up transport to use a UDP socket * @xprt: transport to set up @@ -1098,7 +1091,7 @@ int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to) memset(xprt->slot, 0, slot_table_size); xprt->prot = IPPROTO_UDP; - xprt->port = XS_MAX_RESVPORT; + xprt->port = xprt_max_resvport; xprt->tsh_size = 0; xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0; /* XXX: header size can vary due to auth type, IPv6, etc. */ @@ -1136,7 +1129,7 @@ int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to) memset(xprt->slot, 0, slot_table_size); xprt->prot = IPPROTO_TCP; - xprt->port = XS_MAX_RESVPORT; + xprt->port = xprt_max_resvport; xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32); xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0; xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; -- cgit v1.2.3 From 03bf4b707eee06706c9db343dd5c905b7ee47ed2 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 25 Aug 2005 16:25:55 -0700 Subject: [PATCH] RPC: parametrize various transport connect timeouts Each transport implementation can now set unique bind, connect, reestablishment, and idle timeout values. These are variables, allowing the values to be modified dynamically. This permits exponential backoff of any of these values, for instance. As an example, we implement exponential backoff for the connection reestablishment timeout. Test-plan: Destructive testing (unplugging the network temporarily). Connectathon with UDP and TCP. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 10 +++---- include/linux/nfs_fs.h | 4 +++ include/linux/sunrpc/xprt.h | 29 ++++--------------- net/sunrpc/clnt.c | 2 +- net/sunrpc/xprt.c | 5 ++-- net/sunrpc/xprtsock.c | 68 +++++++++++++++++++++++++++++++++++++++++++-- 6 files changed, 84 insertions(+), 34 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index b6a1ca508e60..062911e7ceb5 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -369,8 +369,8 @@ static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, unsigned case IPPROTO_TCP: if (!to->to_initval) to->to_initval = 60 * HZ; - if (to->to_initval > RPC_MAX_TCP_TIMEOUT) - to->to_initval = RPC_MAX_TCP_TIMEOUT; + if (to->to_initval > NFS_MAX_TCP_TIMEOUT) + to->to_initval = NFS_MAX_TCP_TIMEOUT; to->to_increment = to->to_initval; to->to_maxval = to->to_initval + (to->to_increment * to->to_retries); to->to_exponential = 0; @@ -379,9 +379,9 @@ static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, unsigned default: if (!to->to_initval) to->to_initval = 11 * HZ / 10; - if (to->to_initval > RPC_MAX_UDP_TIMEOUT) - to->to_initval = RPC_MAX_UDP_TIMEOUT; - to->to_maxval = RPC_MAX_UDP_TIMEOUT; + if (to->to_initval > NFS_MAX_UDP_TIMEOUT) + to->to_initval = NFS_MAX_UDP_TIMEOUT; + to->to_maxval = NFS_MAX_UDP_TIMEOUT; to->to_exponential = 1; break; } diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 9a6047ff1b25..7bac2785c6e4 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -41,6 +41,10 @@ #define NFS_MAX_FILE_IO_BUFFER_SIZE 32768 #define NFS_DEF_FILE_IO_BUFFER_SIZE 4096 +/* Default timeout values */ +#define NFS_MAX_UDP_TIMEOUT (60*HZ) +#define NFS_MAX_TCP_TIMEOUT (600*HZ) + /* * superblock magic number for NFS */ diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 9d9266cf8a36..2543adf18551 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -22,28 +22,6 @@ extern unsigned int xprt_tcp_slot_table_entries; #define RPC_DEF_SLOT_TABLE (16U) #define RPC_MAX_SLOT_TABLE (128U) -/* Default timeout values */ -#define RPC_MAX_UDP_TIMEOUT (60*HZ) -#define RPC_MAX_TCP_TIMEOUT (600*HZ) - -/* - * Wait duration for an RPC TCP connection to be established. Solaris - * NFS over TCP uses 60 seconds, for example, which is in line with how - * long a server takes to reboot. - */ -#define RPC_CONNECT_TIMEOUT (60*HZ) - -/* - * Delay an arbitrary number of seconds before attempting to reconnect - * after an error. - */ -#define RPC_REESTABLISH_TIMEOUT (15*HZ) - -/* - * RPC transport idle timeout. - */ -#define RPC_IDLE_DISCONNECT_TIMEOUT (5*60*HZ) - /* * RPC call and reply header size as number of 32bit words (verifier * size computed separately) @@ -182,14 +160,19 @@ struct rpc_xprt { /* * Connection of transports */ + unsigned long connect_timeout, + bind_timeout, + reestablish_timeout; struct work_struct connect_worker; unsigned short port; + /* * Disconnection of idle transports */ struct work_struct task_cleanup; struct timer_list timer; - unsigned long last_used; + unsigned long last_used, + idle_timeout; /* * Send stuff diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index cc1b773a79d3..24b44e73f391 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -740,7 +740,7 @@ call_bind(struct rpc_task *task) task->tk_action = call_connect; if (!clnt->cl_port) { task->tk_action = call_bind_status; - task->tk_timeout = RPC_CONNECT_TIMEOUT; + task->tk_timeout = task->tk_xprt->bind_timeout; rpc_getport(task, clnt); } } diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 0458319a1bdd..215be0d0ef6b 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -551,7 +551,7 @@ void xprt_connect(struct rpc_task *task) if (task->tk_rqstp) task->tk_rqstp->rq_bytes_sent = 0; - task->tk_timeout = RPC_CONNECT_TIMEOUT; + task->tk_timeout = xprt->connect_timeout; rpc_sleep_on(&xprt->pending, task, xprt_connect_status, NULL); xprt->ops->connect(task); } @@ -763,7 +763,6 @@ void xprt_transmit(struct rpc_task *task) switch (status) { case -ECONNREFUSED: - task->tk_timeout = RPC_REESTABLISH_TIMEOUT; rpc_sleep_on(&xprt->sending, task, NULL, NULL); case -EAGAIN: case -ENOTCONN: @@ -857,7 +856,7 @@ void xprt_release(struct rpc_task *task) xprt->last_used = jiffies; if (list_empty(&xprt->recv) && !xprt->shutdown) mod_timer(&xprt->timer, - xprt->last_used + RPC_IDLE_DISCONNECT_TIMEOUT); + xprt->last_used + xprt->idle_timeout); spin_unlock_bh(&xprt->transport_lock); task->tk_rqstp = NULL; memset(req, 0, sizeof(*req)); /* mark unused */ diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 88ac71fcd335..06c2d95484e0 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -41,6 +41,50 @@ */ #define XS_SENDMSG_RETRY (10U) +/* + * Time out for an RPC UDP socket connect. UDP socket connects are + * synchronous, but we set a timeout anyway in case of resource + * exhaustion on the local host. + */ +#define XS_UDP_CONN_TO (5U * HZ) + +/* + * Wait duration for an RPC TCP connection to be established. Solaris + * NFS over TCP uses 60 seconds, for example, which is in line with how + * long a server takes to reboot. + */ +#define XS_TCP_CONN_TO (60U * HZ) + +/* + * Wait duration for a reply from the RPC portmapper. + */ +#define XS_BIND_TO (60U * HZ) + +/* + * Delay if a UDP socket connect error occurs. This is most likely some + * kind of resource problem on the local host. + */ +#define XS_UDP_REEST_TO (2U * HZ) + +/* + * The reestablish timeout allows clients to delay for a bit before attempting + * to reconnect to a server that just dropped our connection. + * + * We implement an exponential backoff when trying to reestablish a TCP + * transport connection with the server. Some servers like to drop a TCP + * connection when they are overworked, so we start with a short timeout and + * increase over time if the server is down or not responding. + */ +#define XS_TCP_INIT_REEST_TO (3U * HZ) +#define XS_TCP_MAX_REEST_TO (5U * 60 * HZ) + +/* + * TCP idle timeout; client drops the transport socket if it is idle + * for this long. Note that we also timeout UDP sockets to prevent + * holding port numbers when there is no RPC traffic. + */ +#define XS_IDLE_DISC_TO (5U * 60 * HZ) + #ifdef RPC_DEBUG # undef RPC_DEBUG_DATA # define RPCDBG_FACILITY RPCDBG_TRANS @@ -739,6 +783,7 @@ static void xs_tcp_state_change(struct sock *sk) xprt->tcp_reclen = 0; xprt->tcp_copied = 0; xprt->tcp_flags = XPRT_COPY_RECM | XPRT_COPY_XID; + xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; xprt_wake_pending_tasks(xprt, 0); } spin_unlock_bh(&xprt->transport_lock); @@ -1066,6 +1111,13 @@ out_clear: * @task: address of RPC task that manages state of connect request * * TCP: If the remote end dropped the connection, delay reconnecting. + * + * UDP socket connects are synchronous, but we use a work queue anyway + * to guarantee that even unprivileged user processes can set up a + * socket on a privileged port. + * + * If a UDP socket connect fails, the delay behavior here prevents + * retry floods (hard mounts). */ static void xs_connect(struct rpc_task *task) { @@ -1075,9 +1127,13 @@ static void xs_connect(struct rpc_task *task) return; if (xprt->sock != NULL) { - dprintk("RPC: xs_connect delayed xprt %p\n", xprt); + dprintk("RPC: xs_connect delayed xprt %p for %lu seconds\n", + xprt, xprt->reestablish_timeout / HZ); schedule_delayed_work(&xprt->connect_worker, - RPC_REESTABLISH_TIMEOUT); + xprt->reestablish_timeout); + xprt->reestablish_timeout <<= 1; + if (xprt->reestablish_timeout > XS_TCP_MAX_REEST_TO) + xprt->reestablish_timeout = XS_TCP_MAX_REEST_TO; } else { dprintk("RPC: xs_connect scheduled xprt %p\n", xprt); schedule_work(&xprt->connect_worker); @@ -1139,6 +1195,10 @@ int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to) xprt->max_payload = (1U << 16) - (MAX_HEADER << 3); INIT_WORK(&xprt->connect_worker, xs_udp_connect_worker, xprt); + xprt->bind_timeout = XS_BIND_TO; + xprt->connect_timeout = XS_UDP_CONN_TO; + xprt->reestablish_timeout = XS_UDP_REEST_TO; + xprt->idle_timeout = XS_IDLE_DISC_TO; xprt->ops = &xs_udp_ops; @@ -1176,6 +1236,10 @@ int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to) xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; INIT_WORK(&xprt->connect_worker, xs_tcp_connect_worker, xprt); + xprt->bind_timeout = XS_BIND_TO; + xprt->connect_timeout = XS_TCP_CONN_TO; + xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; + xprt->idle_timeout = XS_IDLE_DISC_TO; xprt->ops = &xs_tcp_ops; -- cgit v1.2.3 From 470056c288334eb0b37be26c9ff8aee37ed1cc7a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 25 Aug 2005 16:25:56 -0700 Subject: [PATCH] RPC: rationalize set_buffer_size In fact, ->set_buffer_size should be completely functionless for non-UDP. Test-plan: Check socket buffer size on UDP sockets over time. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 4 ++-- net/sunrpc/clnt.c | 10 ++-------- net/sunrpc/xprtsock.c | 30 +++++++++++++++--------------- 3 files changed, 19 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 2543adf18551..99cad3ead81d 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -100,7 +100,7 @@ struct rpc_rqst { #define rq_slen rq_snd_buf.len struct rpc_xprt_ops { - void (*set_buffer_size)(struct rpc_xprt *xprt); + void (*set_buffer_size)(struct rpc_xprt *xprt, size_t sndsize, size_t rcvsize); int (*reserve_xprt)(struct rpc_task *task); void (*release_xprt)(struct rpc_xprt *xprt, struct rpc_task *task); void (*connect)(struct rpc_task *task); @@ -124,7 +124,7 @@ struct rpc_xprt { unsigned long cong; /* current congestion */ unsigned long cwnd; /* congestion window */ - unsigned int rcvsize, /* transport rcv buffer size */ + size_t rcvsize, /* transport rcv buffer size */ sndsize; /* transport send buffer size */ size_t max_payload; /* largest RPC payload size, diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 24b44e73f391..5a8f01d726e9 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -517,14 +517,8 @@ void rpc_setbufsize(struct rpc_clnt *clnt, unsigned int sndsize, unsigned int rcvsize) { struct rpc_xprt *xprt = clnt->cl_xprt; - - xprt->sndsize = 0; - if (sndsize) - xprt->sndsize = sndsize + RPC_SLACK_SPACE; - xprt->rcvsize = 0; - if (rcvsize) - xprt->rcvsize = rcvsize + RPC_SLACK_SPACE; - xprt->ops->set_buffer_size(xprt); + if (xprt->ops->set_buffer_size) + xprt->ops->set_buffer_size(xprt, sndsize, rcvsize); } /* diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 06c2d95484e0..2e1529217e65 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -865,15 +865,7 @@ static void xs_tcp_write_space(struct sock *sk) read_unlock(&sk->sk_callback_lock); } -/** - * xs_udp_set_buffer_size - set send and receive limits - * @xprt: generic transport - * - * Set socket send and receive limits based on the - * sndsize and rcvsize fields in the generic transport - * structure. - */ -static void xs_udp_set_buffer_size(struct rpc_xprt *xprt) +static void xs_udp_do_set_buffer_size(struct rpc_xprt *xprt) { struct sock *sk = xprt->inet; @@ -889,14 +881,23 @@ static void xs_udp_set_buffer_size(struct rpc_xprt *xprt) } /** - * xs_tcp_set_buffer_size - set send and receive limits + * xs_udp_set_buffer_size - set send and receive limits * @xprt: generic transport + * @sndsize: requested size of send buffer, in bytes + * @rcvsize: requested size of receive buffer, in bytes * - * Nothing to do for TCP. + * Set socket send and receive buffer size limits. */ -static void xs_tcp_set_buffer_size(struct rpc_xprt *xprt) +static void xs_udp_set_buffer_size(struct rpc_xprt *xprt, size_t sndsize, size_t rcvsize) { - return; + xprt->sndsize = 0; + if (sndsize) + xprt->sndsize = sndsize + 1024; + xprt->rcvsize = 0; + if (rcvsize) + xprt->rcvsize = rcvsize + 1024; + + xs_udp_do_set_buffer_size(xprt); } /** @@ -989,7 +990,7 @@ static void xs_udp_connect_worker(void *args) write_unlock_bh(&sk->sk_callback_lock); } - xs_udp_set_buffer_size(xprt); + xs_udp_do_set_buffer_size(xprt); status = 0; out: xprt_wake_pending_tasks(xprt, status); @@ -1158,7 +1159,6 @@ static struct rpc_xprt_ops xs_udp_ops = { }; static struct rpc_xprt_ops xs_tcp_ops = { - .set_buffer_size = xs_tcp_set_buffer_size, .reserve_xprt = xprt_reserve_xprt, .release_xprt = xprt_release_xprt, .connect = xs_connect, -- cgit v1.2.3 From 278c995c8a153bb2a9bc427e931cfb9c8034c9d7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 24 Jul 2005 23:53:01 +0100 Subject: [PATCH] RPC,NFS: new rpc_pipefs patch Currently rpc_mkdir/rpc_rmdir and rpc_mkpipe/mk_unlink have an API that's a little unfortunate. They take a path relative to the rpc_pipefs root and thus need to perform a full lookup. If you look at debugfs or usbfs they always store the dentry for directories they created and thus can pass in a dentry + single pathname component pair into their equivalents of the above functions. And in fact rpc_pipefs actually stores a dentry for all but one component so this change not only simplifies the core rpc_pipe code but also the callers. Unfortuntately this code path is only used by the NFS4 idmapper and AUTH_GSSAPI for which I don't have a test enviroment. Could someone give it a spin? It's the last bit needed before we can rework the lookup_hash API Signed-off-by: Christoph Hellwig Signed-off-by: Trond Myklebust --- fs/nfs/idmap.c | 10 +- include/linux/sunrpc/clnt.h | 2 +- include/linux/sunrpc/rpc_pipe_fs.h | 9 +- net/sunrpc/auth_gss/auth_gss.c | 9 +- net/sunrpc/clnt.c | 53 +++++--- net/sunrpc/rpc_pipe.c | 268 +++++++++++++------------------------ 6 files changed, 142 insertions(+), 209 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index ffb8df91dc34..1d0a5bf0d264 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -66,7 +66,6 @@ struct idmap_hashtable { }; struct idmap { - char idmap_path[48]; struct dentry *idmap_dentry; wait_queue_head_t idmap_wq; struct idmap_msg idmap_im; @@ -102,11 +101,8 @@ nfs_idmap_new(struct nfs4_client *clp) memset(idmap, 0, sizeof(*idmap)); - snprintf(idmap->idmap_path, sizeof(idmap->idmap_path), - "%s/idmap", clp->cl_rpcclient->cl_pathname); - - idmap->idmap_dentry = rpc_mkpipe(idmap->idmap_path, - idmap, &idmap_upcall_ops, 0); + idmap->idmap_dentry = rpc_mkpipe(clp->cl_rpcclient->cl_dentry, + "idmap", idmap, &idmap_upcall_ops, 0); if (IS_ERR(idmap->idmap_dentry)) { kfree(idmap); return; @@ -128,7 +124,7 @@ nfs_idmap_delete(struct nfs4_client *clp) if (!idmap) return; - rpc_unlink(idmap->idmap_path); + rpc_unlink(idmap->idmap_dentry); clp->cl_idmap = NULL; kfree(idmap); } diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index ab151bbb66df..b5b51c196690 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -59,7 +59,7 @@ struct rpc_clnt { int cl_nodelen; /* nodename length */ char cl_nodename[UNX_MAXNODENAME]; - char cl_pathname[30];/* Path in rpc_pipe_fs */ + struct dentry * __cl_parent_dentry; struct dentry * cl_dentry; /* inode */ struct rpc_clnt * cl_parent; /* Points to parent of clones */ struct rpc_rtt cl_rtt_default; diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h index 63929349571f..63878d05c9a9 100644 --- a/include/linux/sunrpc/rpc_pipe_fs.h +++ b/include/linux/sunrpc/rpc_pipe_fs.h @@ -41,10 +41,11 @@ RPC_I(struct inode *inode) extern int rpc_queue_upcall(struct inode *, struct rpc_pipe_msg *); -extern struct dentry *rpc_mkdir(char *, struct rpc_clnt *); -extern int rpc_rmdir(char *); -extern struct dentry *rpc_mkpipe(char *, void *, struct rpc_pipe_ops *, int flags); -extern int rpc_unlink(char *); +extern struct dentry *rpc_mkdir(struct dentry *, char *, struct rpc_clnt *); +extern void rpc_rmdir(struct dentry *); +extern struct dentry *rpc_mkpipe(struct dentry *, char *, void *, + struct rpc_pipe_ops *, int flags); +extern void rpc_unlink(struct dentry *); #endif #endif diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index d2b08f16c257..bd2555139fa9 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -87,7 +87,6 @@ struct gss_auth { struct list_head upcalls; struct rpc_clnt *client; struct dentry *dentry; - char path[48]; spinlock_t lock; }; @@ -690,10 +689,8 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor) if (err) goto err_put_mech; - snprintf(gss_auth->path, sizeof(gss_auth->path), "%s/%s", - clnt->cl_pathname, - gss_auth->mech->gm_name); - gss_auth->dentry = rpc_mkpipe(gss_auth->path, clnt, &gss_upcall_ops, RPC_PIPE_WAIT_FOR_OPEN); + gss_auth->dentry = rpc_mkpipe(clnt->cl_dentry, gss_auth->mech->gm_name, + clnt, &gss_upcall_ops, RPC_PIPE_WAIT_FOR_OPEN); if (IS_ERR(gss_auth->dentry)) { err = PTR_ERR(gss_auth->dentry); goto err_put_mech; @@ -718,7 +715,7 @@ gss_destroy(struct rpc_auth *auth) auth, auth->au_flavor); gss_auth = container_of(auth, struct gss_auth, rpc_auth); - rpc_unlink(gss_auth->path); + rpc_unlink(gss_auth->dentry); gss_mech_put(gss_auth->mech); rpcauth_free_credcache(auth); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 5a8f01d726e9..63bf591310e0 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -67,26 +67,42 @@ static u32 * call_verify(struct rpc_task *task); static int rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name) { - static uint32_t clntid; + static unsigned int clntid; + char name[128]; int error; if (dir_name == NULL) return 0; - for (;;) { - snprintf(clnt->cl_pathname, sizeof(clnt->cl_pathname), - "%s/clnt%x", dir_name, - (unsigned int)clntid++); - clnt->cl_pathname[sizeof(clnt->cl_pathname) - 1] = '\0'; - clnt->cl_dentry = rpc_mkdir(clnt->cl_pathname, clnt); - if (!IS_ERR(clnt->cl_dentry)) - return 0; + + retry_parent: + clnt->__cl_parent_dentry = rpc_mkdir(NULL, dir_name, NULL); + if (IS_ERR(clnt->__cl_parent_dentry)) { + error = PTR_ERR(clnt->__cl_parent_dentry); + if (error == -EEXIST) + goto retry_parent; /* XXX(hch): WTF? */ + + printk(KERN_INFO "RPC: Couldn't create pipefs entry %s, error %d\n", + dir_name, error); + return error; + } + + + retry_child: + snprintf(name, sizeof(name), "clnt%x", clntid++); + name[sizeof(name) - 1] = '\0'; + + clnt->cl_dentry = rpc_mkdir(clnt->__cl_parent_dentry, name, clnt); + if (IS_ERR(clnt->cl_dentry)) { error = PTR_ERR(clnt->cl_dentry); - if (error != -EEXIST) { - printk(KERN_INFO "RPC: Couldn't create pipefs entry %s, error %d\n", - clnt->cl_pathname, error); - return error; - } + if (error == -EEXIST) + goto retry_child; + printk(KERN_INFO "RPC: Couldn't create pipefs entry %s, error %d\n", + name, error); + rpc_rmdir(clnt->__cl_parent_dentry); + return error; } + + return 0; } /* @@ -174,7 +190,8 @@ rpc_new_client(struct rpc_xprt *xprt, char *servname, return clnt; out_no_auth: - rpc_rmdir(clnt->cl_pathname); + rpc_rmdir(clnt->cl_dentry); + rpc_rmdir(clnt->__cl_parent_dentry); out_no_path: if (clnt->cl_server != clnt->cl_inline_name) kfree(clnt->cl_server); @@ -302,8 +319,10 @@ rpc_destroy_client(struct rpc_clnt *clnt) rpc_destroy_client(clnt->cl_parent); goto out_free; } - if (clnt->cl_pathname[0]) - rpc_rmdir(clnt->cl_pathname); + if (clnt->cl_dentry) + rpc_rmdir(clnt->cl_dentry); + if (clnt->__cl_parent_dentry) + rpc_rmdir(clnt->__cl_parent_dentry); if (clnt->cl_xprt) { xprt_destroy(clnt->cl_xprt); clnt->cl_xprt = NULL; diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index ded6c63f11ec..b382809726d8 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -414,38 +414,6 @@ rpc_put_mount(void) simple_release_fs(&rpc_mount, &rpc_mount_count); } -static int -rpc_lookup_parent(char *path, struct nameidata *nd) -{ - if (path[0] == '\0') - return -ENOENT; - if (rpc_get_mount()) { - printk(KERN_WARNING "%s: %s failed to mount " - "pseudofilesystem \n", __FILE__, __FUNCTION__); - return -ENODEV; - } - nd->mnt = mntget(rpc_mount); - nd->dentry = dget(rpc_mount->mnt_root); - nd->last_type = LAST_ROOT; - nd->flags = LOOKUP_PARENT; - nd->depth = 0; - - if (path_walk(path, nd)) { - printk(KERN_WARNING "%s: %s failed to find path %s\n", - __FILE__, __FUNCTION__, path); - rpc_put_mount(); - return -ENOENT; - } - return 0; -} - -static void -rpc_release_path(struct nameidata *nd) -{ - path_release(nd); - rpc_put_mount(); -} - static struct inode * rpc_get_inode(struct super_block *sb, int mode) { @@ -550,197 +518,149 @@ out_bad: return -ENOMEM; } -static int -__rpc_mkdir(struct inode *dir, struct dentry *dentry) +struct dentry * +rpc_mkdir(struct dentry *parent, char *name, struct rpc_clnt *rpc_client) { + struct inode *dir; + struct dentry *dentry; struct inode *inode; - - inode = rpc_get_inode(dir->i_sb, S_IFDIR | S_IRUSR | S_IXUSR); - if (!inode) - goto out_err; - inode->i_ino = iunique(dir->i_sb, 100); - d_instantiate(dentry, inode); - dir->i_nlink++; - inode_dir_notify(dir, DN_CREATE); - rpc_get_mount(); - return 0; -out_err: - printk(KERN_WARNING "%s: %s failed to allocate inode for dentry %s\n", - __FILE__, __FUNCTION__, dentry->d_name.name); - return -ENOMEM; -} - -static int -__rpc_rmdir(struct inode *dir, struct dentry *dentry) -{ int error; - shrink_dcache_parent(dentry); - if (dentry->d_inode) { - rpc_close_pipes(dentry->d_inode); - rpc_inode_setowner(dentry->d_inode, NULL); - } - if ((error = simple_rmdir(dir, dentry)) != 0) - return error; - if (!error) { - inode_dir_notify(dir, DN_DELETE); - d_drop(dentry); - rpc_put_mount(); - } - return 0; -} + if (!parent) + parent = rpc_mount->mnt_root; -static struct dentry * -rpc_lookup_negative(char *path, struct nameidata *nd) -{ - struct dentry *dentry; - struct inode *dir; - int error; - - if ((error = rpc_lookup_parent(path, nd)) != 0) + dir = parent->d_inode; + + error = rpc_get_mount(); + if (error) return ERR_PTR(error); - dir = nd->dentry->d_inode; + down(&dir->i_sem); - dentry = lookup_hash(&nd->last, nd->dentry); + dentry = lookup_one_len(name, parent, strlen(name)); if (IS_ERR(dentry)) - goto out_err; + goto out_unlock; if (dentry->d_inode) { - dput(dentry); dentry = ERR_PTR(-EEXIST); - goto out_err; + goto out_dput; } - return dentry; -out_err: - up(&dir->i_sem); - rpc_release_path(nd); - return dentry; -} + inode = rpc_get_inode(dir->i_sb, S_IFDIR | S_IRUSR | S_IXUSR); + if (!inode) + goto out_dput; + inode->i_ino = iunique(dir->i_sb, 100); + dir->i_nlink++; + RPC_I(dentry->d_inode)->private = rpc_client; -struct dentry * -rpc_mkdir(char *path, struct rpc_clnt *rpc_client) -{ - struct nameidata nd; - struct dentry *dentry; - struct inode *dir; - int error; + d_instantiate(dentry, inode); + dget(dentry); + up(&dir->i_sem); + + inode_dir_notify(dir, DN_CREATE); - dentry = rpc_lookup_negative(path, &nd); - if (IS_ERR(dentry)) - return dentry; - dir = nd.dentry->d_inode; - if ((error = __rpc_mkdir(dir, dentry)) != 0) - goto err_dput; - RPC_I(dentry->d_inode)->private = rpc_client; error = rpc_populate(dentry, authfiles, RPCAUTH_info, RPCAUTH_EOF); if (error) - goto err_depopulate; -out: - up(&dir->i_sem); - rpc_release_path(&nd); + goto out_depopulate; + return dentry; -err_depopulate: - rpc_depopulate(dentry); - __rpc_rmdir(dir, dentry); -err_dput: + + out_depopulate: + rpc_rmdir(dentry); + out_dput: dput(dentry); - printk(KERN_WARNING "%s: %s() failed to create directory %s (errno = %d)\n", - __FILE__, __FUNCTION__, path, error); - dentry = ERR_PTR(error); - goto out; + out_unlock: + up(&dir->i_sem); + rpc_put_mount(); + return dentry; } -int -rpc_rmdir(char *path) +void +rpc_rmdir(struct dentry *dentry) { - struct nameidata nd; - struct dentry *dentry; - struct inode *dir; - int error; + struct dentry *parent = dentry->d_parent; - if ((error = rpc_lookup_parent(path, &nd)) != 0) - return error; - dir = nd.dentry->d_inode; - down(&dir->i_sem); - dentry = lookup_hash(&nd.last, nd.dentry); - if (IS_ERR(dentry)) { - error = PTR_ERR(dentry); - goto out_release; - } rpc_depopulate(dentry); - error = __rpc_rmdir(dir, dentry); - dput(dentry); -out_release: - up(&dir->i_sem); - rpc_release_path(&nd); - return error; + + down(&parent->d_inode->i_sem); + if (dentry->d_inode) { + rpc_close_pipes(dentry->d_inode); + rpc_inode_setowner(dentry->d_inode, NULL); + simple_rmdir(parent->d_inode, dentry); + } + up(&parent->d_inode->i_sem); + + inode_dir_notify(parent->d_inode, DN_DELETE); + rpc_put_mount(); } struct dentry * -rpc_mkpipe(char *path, void *private, struct rpc_pipe_ops *ops, int flags) +rpc_mkpipe(struct dentry *parent, char *name, void *private, + struct rpc_pipe_ops *ops, int flags) { - struct nameidata nd; + struct inode *dir = parent->d_inode; struct dentry *dentry; - struct inode *dir, *inode; + struct inode *inode; struct rpc_inode *rpci; + int error; + + error = rpc_get_mount(); + if (error) + return ERR_PTR(error); - dentry = rpc_lookup_negative(path, &nd); + down(&parent->d_inode->i_sem); + dentry = lookup_one_len(name, parent, strlen(name)); if (IS_ERR(dentry)) - return dentry; - dir = nd.dentry->d_inode; - inode = rpc_get_inode(dir->i_sb, S_IFSOCK | S_IRUSR | S_IWUSR); - if (!inode) - goto err_dput; + goto out_unlock; + if (dentry->d_inode) { + dentry = ERR_PTR(-EEXIST); + goto out_dput; + } + + inode = rpc_get_inode(parent->d_inode->i_sb, + S_IFSOCK | S_IRUSR | S_IWUSR); + if (!inode) { + dentry = ERR_PTR(-ENOMEM); + goto out_dput; + } + inode->i_ino = iunique(dir->i_sb, 100); inode->i_fop = &rpc_pipe_fops; - d_instantiate(dentry, inode); + rpci = RPC_I(inode); rpci->private = private; rpci->flags = flags; rpci->ops = ops; + + d_instantiate(dentry, inode); + dget(dentry); + up(&parent->d_inode->i_sem); + inode_dir_notify(dir, DN_CREATE); -out: - up(&dir->i_sem); - rpc_release_path(&nd); return dentry; -err_dput: + + out_dput: dput(dentry); - dentry = ERR_PTR(-ENOMEM); - printk(KERN_WARNING "%s: %s() failed to create pipe %s (errno = %d)\n", - __FILE__, __FUNCTION__, path, -ENOMEM); - goto out; + out_unlock: + up(&parent->d_inode->i_sem); + rpc_put_mount(); + return dentry; } -int -rpc_unlink(char *path) +void +rpc_unlink(struct dentry *dentry) { - struct nameidata nd; - struct dentry *dentry; - struct inode *dir; - int error; + struct dentry *parent = dentry->d_parent; - if ((error = rpc_lookup_parent(path, &nd)) != 0) - return error; - dir = nd.dentry->d_inode; - down(&dir->i_sem); - dentry = lookup_hash(&nd.last, nd.dentry); - if (IS_ERR(dentry)) { - error = PTR_ERR(dentry); - goto out_release; - } - d_drop(dentry); + down(&parent->d_inode->i_sem); if (dentry->d_inode) { rpc_close_pipes(dentry->d_inode); rpc_inode_setowner(dentry->d_inode, NULL); - error = simple_unlink(dir, dentry); + simple_unlink(parent->d_inode, dentry); } - dput(dentry); - inode_dir_notify(dir, DN_DELETE); -out_release: - up(&dir->i_sem); - rpc_release_path(&nd); - return error; + up(&parent->d_inode->i_sem); + + inode_dir_notify(parent->d_inode, DN_DELETE); + rpc_put_mount(); } /* -- cgit v1.2.3 From f134585a7343d71f9be7f0cf97e2145f21dd10c6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 23 Sep 2005 11:08:25 -0400 Subject: Revert "[PATCH] RPC,NFS: new rpc_pipefs patch" This reverts 17f4e6febca160a9f9dd4bdece9784577a2f4524 commit. --- fs/nfs/idmap.c | 10 +- include/linux/sunrpc/clnt.h | 2 +- include/linux/sunrpc/rpc_pipe_fs.h | 9 +- net/sunrpc/auth_gss/auth_gss.c | 9 +- net/sunrpc/clnt.c | 53 +++----- net/sunrpc/rpc_pipe.c | 268 ++++++++++++++++++++++++------------- 6 files changed, 209 insertions(+), 142 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 1d0a5bf0d264..ffb8df91dc34 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -66,6 +66,7 @@ struct idmap_hashtable { }; struct idmap { + char idmap_path[48]; struct dentry *idmap_dentry; wait_queue_head_t idmap_wq; struct idmap_msg idmap_im; @@ -101,8 +102,11 @@ nfs_idmap_new(struct nfs4_client *clp) memset(idmap, 0, sizeof(*idmap)); - idmap->idmap_dentry = rpc_mkpipe(clp->cl_rpcclient->cl_dentry, - "idmap", idmap, &idmap_upcall_ops, 0); + snprintf(idmap->idmap_path, sizeof(idmap->idmap_path), + "%s/idmap", clp->cl_rpcclient->cl_pathname); + + idmap->idmap_dentry = rpc_mkpipe(idmap->idmap_path, + idmap, &idmap_upcall_ops, 0); if (IS_ERR(idmap->idmap_dentry)) { kfree(idmap); return; @@ -124,7 +128,7 @@ nfs_idmap_delete(struct nfs4_client *clp) if (!idmap) return; - rpc_unlink(idmap->idmap_dentry); + rpc_unlink(idmap->idmap_path); clp->cl_idmap = NULL; kfree(idmap); } diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index b5b51c196690..ab151bbb66df 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -59,7 +59,7 @@ struct rpc_clnt { int cl_nodelen; /* nodename length */ char cl_nodename[UNX_MAXNODENAME]; - struct dentry * __cl_parent_dentry; + char cl_pathname[30];/* Path in rpc_pipe_fs */ struct dentry * cl_dentry; /* inode */ struct rpc_clnt * cl_parent; /* Points to parent of clones */ struct rpc_rtt cl_rtt_default; diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h index 63878d05c9a9..63929349571f 100644 --- a/include/linux/sunrpc/rpc_pipe_fs.h +++ b/include/linux/sunrpc/rpc_pipe_fs.h @@ -41,11 +41,10 @@ RPC_I(struct inode *inode) extern int rpc_queue_upcall(struct inode *, struct rpc_pipe_msg *); -extern struct dentry *rpc_mkdir(struct dentry *, char *, struct rpc_clnt *); -extern void rpc_rmdir(struct dentry *); -extern struct dentry *rpc_mkpipe(struct dentry *, char *, void *, - struct rpc_pipe_ops *, int flags); -extern void rpc_unlink(struct dentry *); +extern struct dentry *rpc_mkdir(char *, struct rpc_clnt *); +extern int rpc_rmdir(char *); +extern struct dentry *rpc_mkpipe(char *, void *, struct rpc_pipe_ops *, int flags); +extern int rpc_unlink(char *); #endif #endif diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index bd2555139fa9..d2b08f16c257 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -87,6 +87,7 @@ struct gss_auth { struct list_head upcalls; struct rpc_clnt *client; struct dentry *dentry; + char path[48]; spinlock_t lock; }; @@ -689,8 +690,10 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor) if (err) goto err_put_mech; - gss_auth->dentry = rpc_mkpipe(clnt->cl_dentry, gss_auth->mech->gm_name, - clnt, &gss_upcall_ops, RPC_PIPE_WAIT_FOR_OPEN); + snprintf(gss_auth->path, sizeof(gss_auth->path), "%s/%s", + clnt->cl_pathname, + gss_auth->mech->gm_name); + gss_auth->dentry = rpc_mkpipe(gss_auth->path, clnt, &gss_upcall_ops, RPC_PIPE_WAIT_FOR_OPEN); if (IS_ERR(gss_auth->dentry)) { err = PTR_ERR(gss_auth->dentry); goto err_put_mech; @@ -715,7 +718,7 @@ gss_destroy(struct rpc_auth *auth) auth, auth->au_flavor); gss_auth = container_of(auth, struct gss_auth, rpc_auth); - rpc_unlink(gss_auth->dentry); + rpc_unlink(gss_auth->path); gss_mech_put(gss_auth->mech); rpcauth_free_credcache(auth); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 63bf591310e0..5a8f01d726e9 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -67,42 +67,26 @@ static u32 * call_verify(struct rpc_task *task); static int rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name) { - static unsigned int clntid; - char name[128]; + static uint32_t clntid; int error; if (dir_name == NULL) return 0; - - retry_parent: - clnt->__cl_parent_dentry = rpc_mkdir(NULL, dir_name, NULL); - if (IS_ERR(clnt->__cl_parent_dentry)) { - error = PTR_ERR(clnt->__cl_parent_dentry); - if (error == -EEXIST) - goto retry_parent; /* XXX(hch): WTF? */ - - printk(KERN_INFO "RPC: Couldn't create pipefs entry %s, error %d\n", - dir_name, error); - return error; - } - - - retry_child: - snprintf(name, sizeof(name), "clnt%x", clntid++); - name[sizeof(name) - 1] = '\0'; - - clnt->cl_dentry = rpc_mkdir(clnt->__cl_parent_dentry, name, clnt); - if (IS_ERR(clnt->cl_dentry)) { + for (;;) { + snprintf(clnt->cl_pathname, sizeof(clnt->cl_pathname), + "%s/clnt%x", dir_name, + (unsigned int)clntid++); + clnt->cl_pathname[sizeof(clnt->cl_pathname) - 1] = '\0'; + clnt->cl_dentry = rpc_mkdir(clnt->cl_pathname, clnt); + if (!IS_ERR(clnt->cl_dentry)) + return 0; error = PTR_ERR(clnt->cl_dentry); - if (error == -EEXIST) - goto retry_child; - printk(KERN_INFO "RPC: Couldn't create pipefs entry %s, error %d\n", - name, error); - rpc_rmdir(clnt->__cl_parent_dentry); - return error; + if (error != -EEXIST) { + printk(KERN_INFO "RPC: Couldn't create pipefs entry %s, error %d\n", + clnt->cl_pathname, error); + return error; + } } - - return 0; } /* @@ -190,8 +174,7 @@ rpc_new_client(struct rpc_xprt *xprt, char *servname, return clnt; out_no_auth: - rpc_rmdir(clnt->cl_dentry); - rpc_rmdir(clnt->__cl_parent_dentry); + rpc_rmdir(clnt->cl_pathname); out_no_path: if (clnt->cl_server != clnt->cl_inline_name) kfree(clnt->cl_server); @@ -319,10 +302,8 @@ rpc_destroy_client(struct rpc_clnt *clnt) rpc_destroy_client(clnt->cl_parent); goto out_free; } - if (clnt->cl_dentry) - rpc_rmdir(clnt->cl_dentry); - if (clnt->__cl_parent_dentry) - rpc_rmdir(clnt->__cl_parent_dentry); + if (clnt->cl_pathname[0]) + rpc_rmdir(clnt->cl_pathname); if (clnt->cl_xprt) { xprt_destroy(clnt->cl_xprt); clnt->cl_xprt = NULL; diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index b382809726d8..ded6c63f11ec 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -414,6 +414,38 @@ rpc_put_mount(void) simple_release_fs(&rpc_mount, &rpc_mount_count); } +static int +rpc_lookup_parent(char *path, struct nameidata *nd) +{ + if (path[0] == '\0') + return -ENOENT; + if (rpc_get_mount()) { + printk(KERN_WARNING "%s: %s failed to mount " + "pseudofilesystem \n", __FILE__, __FUNCTION__); + return -ENODEV; + } + nd->mnt = mntget(rpc_mount); + nd->dentry = dget(rpc_mount->mnt_root); + nd->last_type = LAST_ROOT; + nd->flags = LOOKUP_PARENT; + nd->depth = 0; + + if (path_walk(path, nd)) { + printk(KERN_WARNING "%s: %s failed to find path %s\n", + __FILE__, __FUNCTION__, path); + rpc_put_mount(); + return -ENOENT; + } + return 0; +} + +static void +rpc_release_path(struct nameidata *nd) +{ + path_release(nd); + rpc_put_mount(); +} + static struct inode * rpc_get_inode(struct super_block *sb, int mode) { @@ -518,149 +550,197 @@ out_bad: return -ENOMEM; } -struct dentry * -rpc_mkdir(struct dentry *parent, char *name, struct rpc_clnt *rpc_client) +static int +__rpc_mkdir(struct inode *dir, struct dentry *dentry) { - struct inode *dir; - struct dentry *dentry; struct inode *inode; + + inode = rpc_get_inode(dir->i_sb, S_IFDIR | S_IRUSR | S_IXUSR); + if (!inode) + goto out_err; + inode->i_ino = iunique(dir->i_sb, 100); + d_instantiate(dentry, inode); + dir->i_nlink++; + inode_dir_notify(dir, DN_CREATE); + rpc_get_mount(); + return 0; +out_err: + printk(KERN_WARNING "%s: %s failed to allocate inode for dentry %s\n", + __FILE__, __FUNCTION__, dentry->d_name.name); + return -ENOMEM; +} + +static int +__rpc_rmdir(struct inode *dir, struct dentry *dentry) +{ int error; - if (!parent) - parent = rpc_mount->mnt_root; + shrink_dcache_parent(dentry); + if (dentry->d_inode) { + rpc_close_pipes(dentry->d_inode); + rpc_inode_setowner(dentry->d_inode, NULL); + } + if ((error = simple_rmdir(dir, dentry)) != 0) + return error; + if (!error) { + inode_dir_notify(dir, DN_DELETE); + d_drop(dentry); + rpc_put_mount(); + } + return 0; +} - dir = parent->d_inode; - - error = rpc_get_mount(); - if (error) - return ERR_PTR(error); +static struct dentry * +rpc_lookup_negative(char *path, struct nameidata *nd) +{ + struct dentry *dentry; + struct inode *dir; + int error; + if ((error = rpc_lookup_parent(path, nd)) != 0) + return ERR_PTR(error); + dir = nd->dentry->d_inode; down(&dir->i_sem); - dentry = lookup_one_len(name, parent, strlen(name)); + dentry = lookup_hash(&nd->last, nd->dentry); if (IS_ERR(dentry)) - goto out_unlock; + goto out_err; if (dentry->d_inode) { + dput(dentry); dentry = ERR_PTR(-EEXIST); - goto out_dput; + goto out_err; } - - inode = rpc_get_inode(dir->i_sb, S_IFDIR | S_IRUSR | S_IXUSR); - if (!inode) - goto out_dput; - inode->i_ino = iunique(dir->i_sb, 100); - dir->i_nlink++; - RPC_I(dentry->d_inode)->private = rpc_client; - - d_instantiate(dentry, inode); - dget(dentry); + return dentry; +out_err: up(&dir->i_sem); + rpc_release_path(nd); + return dentry; +} - inode_dir_notify(dir, DN_CREATE); +struct dentry * +rpc_mkdir(char *path, struct rpc_clnt *rpc_client) +{ + struct nameidata nd; + struct dentry *dentry; + struct inode *dir; + int error; + + dentry = rpc_lookup_negative(path, &nd); + if (IS_ERR(dentry)) + return dentry; + dir = nd.dentry->d_inode; + if ((error = __rpc_mkdir(dir, dentry)) != 0) + goto err_dput; + RPC_I(dentry->d_inode)->private = rpc_client; error = rpc_populate(dentry, authfiles, RPCAUTH_info, RPCAUTH_EOF); if (error) - goto out_depopulate; - - return dentry; - - out_depopulate: - rpc_rmdir(dentry); - out_dput: - dput(dentry); - out_unlock: + goto err_depopulate; +out: up(&dir->i_sem); - rpc_put_mount(); + rpc_release_path(&nd); return dentry; +err_depopulate: + rpc_depopulate(dentry); + __rpc_rmdir(dir, dentry); +err_dput: + dput(dentry); + printk(KERN_WARNING "%s: %s() failed to create directory %s (errno = %d)\n", + __FILE__, __FUNCTION__, path, error); + dentry = ERR_PTR(error); + goto out; } -void -rpc_rmdir(struct dentry *dentry) +int +rpc_rmdir(char *path) { - struct dentry *parent = dentry->d_parent; - - rpc_depopulate(dentry); + struct nameidata nd; + struct dentry *dentry; + struct inode *dir; + int error; - down(&parent->d_inode->i_sem); - if (dentry->d_inode) { - rpc_close_pipes(dentry->d_inode); - rpc_inode_setowner(dentry->d_inode, NULL); - simple_rmdir(parent->d_inode, dentry); + if ((error = rpc_lookup_parent(path, &nd)) != 0) + return error; + dir = nd.dentry->d_inode; + down(&dir->i_sem); + dentry = lookup_hash(&nd.last, nd.dentry); + if (IS_ERR(dentry)) { + error = PTR_ERR(dentry); + goto out_release; } - up(&parent->d_inode->i_sem); - - inode_dir_notify(parent->d_inode, DN_DELETE); - rpc_put_mount(); + rpc_depopulate(dentry); + error = __rpc_rmdir(dir, dentry); + dput(dentry); +out_release: + up(&dir->i_sem); + rpc_release_path(&nd); + return error; } struct dentry * -rpc_mkpipe(struct dentry *parent, char *name, void *private, - struct rpc_pipe_ops *ops, int flags) +rpc_mkpipe(char *path, void *private, struct rpc_pipe_ops *ops, int flags) { - struct inode *dir = parent->d_inode; + struct nameidata nd; struct dentry *dentry; - struct inode *inode; + struct inode *dir, *inode; struct rpc_inode *rpci; - int error; - - error = rpc_get_mount(); - if (error) - return ERR_PTR(error); - down(&parent->d_inode->i_sem); - dentry = lookup_one_len(name, parent, strlen(name)); + dentry = rpc_lookup_negative(path, &nd); if (IS_ERR(dentry)) - goto out_unlock; - if (dentry->d_inode) { - dentry = ERR_PTR(-EEXIST); - goto out_dput; - } - - inode = rpc_get_inode(parent->d_inode->i_sb, - S_IFSOCK | S_IRUSR | S_IWUSR); - if (!inode) { - dentry = ERR_PTR(-ENOMEM); - goto out_dput; - } - + return dentry; + dir = nd.dentry->d_inode; + inode = rpc_get_inode(dir->i_sb, S_IFSOCK | S_IRUSR | S_IWUSR); + if (!inode) + goto err_dput; inode->i_ino = iunique(dir->i_sb, 100); inode->i_fop = &rpc_pipe_fops; - + d_instantiate(dentry, inode); rpci = RPC_I(inode); rpci->private = private; rpci->flags = flags; rpci->ops = ops; - - d_instantiate(dentry, inode); - dget(dentry); - up(&parent->d_inode->i_sem); - inode_dir_notify(dir, DN_CREATE); +out: + up(&dir->i_sem); + rpc_release_path(&nd); return dentry; - - out_dput: +err_dput: dput(dentry); - out_unlock: - up(&parent->d_inode->i_sem); - rpc_put_mount(); - return dentry; + dentry = ERR_PTR(-ENOMEM); + printk(KERN_WARNING "%s: %s() failed to create pipe %s (errno = %d)\n", + __FILE__, __FUNCTION__, path, -ENOMEM); + goto out; } -void -rpc_unlink(struct dentry *dentry) +int +rpc_unlink(char *path) { - struct dentry *parent = dentry->d_parent; + struct nameidata nd; + struct dentry *dentry; + struct inode *dir; + int error; - down(&parent->d_inode->i_sem); + if ((error = rpc_lookup_parent(path, &nd)) != 0) + return error; + dir = nd.dentry->d_inode; + down(&dir->i_sem); + dentry = lookup_hash(&nd.last, nd.dentry); + if (IS_ERR(dentry)) { + error = PTR_ERR(dentry); + goto out_release; + } + d_drop(dentry); if (dentry->d_inode) { rpc_close_pipes(dentry->d_inode); rpc_inode_setowner(dentry->d_inode, NULL); - simple_unlink(parent->d_inode, dentry); + error = simple_unlink(dir, dentry); } - up(&parent->d_inode->i_sem); - - inode_dir_notify(parent->d_inode, DN_DELETE); - rpc_put_mount(); + dput(dentry); + inode_dir_notify(dir, DN_DELETE); +out_release: + up(&dir->i_sem); + rpc_release_path(&nd); + return error; } /* -- cgit v1.2.3 From 8c3520d4eb3b1bbf2e45fbae8dcfb8db06d5e775 Mon Sep 17 00:00:00 2001 From: Daniel Ritz Date: Sun, 21 Aug 2005 22:29:26 -0700 Subject: [PATCH] yenta: auto-tune EnE bridges for CardBus cards Echo Audio cardbus products are known to be incompatible with EnE bridges. in order to maybe solve the problem a EnE specific test bit has to be set, another cleared...but other setups have a good chance to break when just forcing the bits. so do the whole thingy automatically. The patch adds a hook in cb_alloc() that allows special tuning for the different chipsets. for ene just match the Echo products and set/clear the test bits, defaults to do the same thing as w/o the patch to not break working setups. Signed-off-by: Daniel Ritz Cc: Linus Torvalds Signed-off-by: Andrew Morton Signed-off-by: Dominik Brodowski --- drivers/pcmcia/cardbus.c | 5 +++ drivers/pcmcia/ti113x.h | 86 ++++++++++++++++++++++++++++++++++++++----- drivers/pcmcia/yenta_socket.c | 15 ++++++-- include/linux/pci_ids.h | 5 +++ include/pcmcia/ss.h | 9 ++++- 5 files changed, 105 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/drivers/pcmcia/cardbus.c b/drivers/pcmcia/cardbus.c index 1d755e20880c..3f6d51d11374 100644 --- a/drivers/pcmcia/cardbus.c +++ b/drivers/pcmcia/cardbus.c @@ -228,6 +228,11 @@ int cb_alloc(struct pcmcia_socket * s) pci_bus_size_bridges(bus); pci_bus_assign_resources(bus); cardbus_assign_irqs(bus, s->pci_irq); + + /* socket specific tune function */ + if (s->tune_bridge) + s->tune_bridge(s, bus); + pci_enable_bridges(bus); pci_bus_add_devices(bus); diff --git a/drivers/pcmcia/ti113x.h b/drivers/pcmcia/ti113x.h index fbe233e19ceb..d319f2e7d053 100644 --- a/drivers/pcmcia/ti113x.h +++ b/drivers/pcmcia/ti113x.h @@ -153,6 +153,12 @@ /* EnE test register */ #define ENE_TEST_C9 0xc9 /* 8bit */ #define ENE_TEST_C9_TLTENABLE 0x02 +#define ENE_TEST_C9_PFENABLE_F0 0x04 +#define ENE_TEST_C9_PFENABLE_F1 0x08 +#define ENE_TEST_C9_PFENABLE (ENE_TEST_C9_PFENABLE_F0 | ENE_TEST_C9_PFENABLE_F0) +#define ENE_TEST_C9_WPDISALBLE_F0 0x40 +#define ENE_TEST_C9_WPDISALBLE_F1 0x80 +#define ENE_TEST_C9_WPDISALBLE (ENE_TEST_C9_WPDISALBLE_F0 | ENE_TEST_C9_WPDISALBLE_F1) /* * Texas Instruments CardBus controller overrides. @@ -790,16 +796,6 @@ static int ti12xx_override(struct yenta_socket *socket) if (val_orig != val) config_writel(socket, TI113X_SYSTEM_CONTROL, val); - /* - * for EnE bridges only: clear testbit TLTEnable. this makes the - * RME Hammerfall DSP sound card working. - */ - if (socket->dev->vendor == PCI_VENDOR_ID_ENE) { - u8 test_c9 = config_readb(socket, ENE_TEST_C9); - test_c9 &= ~ENE_TEST_C9_TLTENABLE; - config_writeb(socket, ENE_TEST_C9, test_c9); - } - /* * Yenta expects controllers to use CSCINT to route * CSC interrupts to PCI rather than INTVAL. @@ -841,5 +837,75 @@ static int ti1250_override(struct yenta_socket *socket) return ti12xx_override(socket); } + +/** + * EnE specific part. EnE bridges are register compatible with TI bridges but + * have their own test registers and more important their own little problems. + * Some fixup code to make everybody happy (TM). + */ + +/** + * set/clear various test bits: + * Defaults to clear the bit. + * - mask (u8) defines what bits to change + * - bits (u8) is the values to change them to + * -> it's + * current = (current & ~mask) | bits + */ +/* pci ids of devices that wants to have the bit set */ +#define DEVID(_vend,_dev,_subvend,_subdev,mask,bits) { \ + .vendor = _vend, \ + .device = _dev, \ + .subvendor = _subvend, \ + .subdevice = _subdev, \ + .driver_data = ((mask) << 8 | (bits)), \ + } +static struct pci_device_id ene_tune_tbl[] = { + /* Echo Audio products based on motorola DSP56301 and DSP56361 */ + DEVID(PCI_VENDOR_ID_MOTOROLA, 0x1801, 0xECC0, PCI_ANY_ID, + ENE_TEST_C9_TLTENABLE | ENE_TEST_C9_PFENABLE, ENE_TEST_C9_TLTENABLE), + DEVID(PCI_VENDOR_ID_MOTOROLA, 0x3410, 0xECC0, PCI_ANY_ID, + ENE_TEST_C9_TLTENABLE | ENE_TEST_C9_PFENABLE, ENE_TEST_C9_TLTENABLE), + + {} +}; + +static void ene_tune_bridge(struct pcmcia_socket *sock, struct pci_bus *bus) +{ + struct yenta_socket *socket = container_of(sock, struct yenta_socket, socket); + struct pci_dev *dev; + struct pci_device_id *id = NULL; + u8 test_c9, old_c9, mask, bits; + + list_for_each_entry(dev, &bus->devices, bus_list) { + id = (struct pci_device_id *) pci_match_id(ene_tune_tbl, dev); + if (id) + break; + } + + test_c9 = old_c9 = config_readb(socket, ENE_TEST_C9); + if (id) { + mask = (id->driver_data >> 8) & 0xFF; + bits = id->driver_data & 0xFF; + + test_c9 = (test_c9 & ~mask) | bits; + } + else + /* default to clear TLTEnable bit, old behaviour */ + test_c9 &= ~ENE_TEST_C9_TLTENABLE; + + printk(KERN_INFO "yenta EnE: chaning testregister 0xC9, %02x -> %02x\n", old_c9, test_c9); + config_writeb(socket, ENE_TEST_C9, test_c9); +} + + +static int ene_override(struct yenta_socket *socket) +{ + /* install tune_bridge() function */ + socket->socket.tune_bridge = ene_tune_bridge; + + return ti1250_override(socket); +} + #endif /* _LINUX_TI113X_H */ diff --git a/drivers/pcmcia/yenta_socket.c b/drivers/pcmcia/yenta_socket.c index ba4d78e5b121..fd2a6f892c41 100644 --- a/drivers/pcmcia/yenta_socket.c +++ b/drivers/pcmcia/yenta_socket.c @@ -819,6 +819,7 @@ enum { CARDBUS_TYPE_TOPIC95, CARDBUS_TYPE_TOPIC97, CARDBUS_TYPE_O2MICRO, + CARDBUS_TYPE_ENE, }; /* @@ -865,6 +866,12 @@ static struct cardbus_type cardbus_type[] = { .override = o2micro_override, .restore_state = o2micro_restore_state, }, + [CARDBUS_TYPE_ENE] = { + .override = ene_override, + .save_state = ti_save_state, + .restore_state = ti_restore_state, + .sock_init = ti_init, + }, }; @@ -1265,10 +1272,10 @@ static struct pci_device_id yenta_table [] = { CB_ID(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_1250, TI1250), CB_ID(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_1410, TI1250), - CB_ID(PCI_VENDOR_ID_ENE, PCI_DEVICE_ID_ENE_1211, TI12XX), - CB_ID(PCI_VENDOR_ID_ENE, PCI_DEVICE_ID_ENE_1225, TI12XX), - CB_ID(PCI_VENDOR_ID_ENE, PCI_DEVICE_ID_ENE_1410, TI1250), - CB_ID(PCI_VENDOR_ID_ENE, PCI_DEVICE_ID_ENE_1420, TI12XX), + CB_ID(PCI_VENDOR_ID_ENE, PCI_DEVICE_ID_ENE_1211, ENE), + CB_ID(PCI_VENDOR_ID_ENE, PCI_DEVICE_ID_ENE_1225, ENE), + CB_ID(PCI_VENDOR_ID_ENE, PCI_DEVICE_ID_ENE_1410, ENE), + CB_ID(PCI_VENDOR_ID_ENE, PCI_DEVICE_ID_ENE_1420, ENE), CB_ID(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_RL5C465, RICOH), CB_ID(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_RL5C466, RICOH), diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index b86a4b77007e..92efb2c767f9 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2187,7 +2187,12 @@ #define PCI_DEVICE_ID_ENE_1211 0x1211 #define PCI_DEVICE_ID_ENE_1225 0x1225 #define PCI_DEVICE_ID_ENE_1410 0x1410 +#define PCI_DEVICE_ID_ENE_710 0x1411 +#define PCI_DEVICE_ID_ENE_712 0x1412 #define PCI_DEVICE_ID_ENE_1420 0x1420 +#define PCI_DEVICE_ID_ENE_720 0x1421 +#define PCI_DEVICE_ID_ENE_722 0x1422 + #define PCI_VENDOR_ID_CHELSIO 0x1425 #define PCI_VENDOR_ID_MIPS 0x153f diff --git a/include/pcmcia/ss.h b/include/pcmcia/ss.h index 0f7aacc33fe9..c8592c7e8eaa 100644 --- a/include/pcmcia/ss.h +++ b/include/pcmcia/ss.h @@ -21,6 +21,9 @@ #include #include #include +#ifdef CONFIG_CARDBUS +#include +#endif /* Definitions for card status flags for GetStatus */ #define SS_WRPROT 0x0001 @@ -233,7 +236,11 @@ struct pcmcia_socket { /* so is power hook */ int (*power_hook)(struct pcmcia_socket *sock, int operation); - +#ifdef CONFIG_CARDBUS + /* allows tuning the CB bridge before loading driver for the CB card */ + void (*tune_bridge)(struct pcmcia_socket *sock, struct pci_bus *bus); +#endif + /* state thread */ struct semaphore skt_sem; /* protects socket h/w state */ -- cgit v1.2.3 From 6c1a10dba92cbacb58563f5eacf93807125b488a Mon Sep 17 00:00:00 2001 From: Daniel Ritz Date: Tue, 20 Sep 2005 14:12:17 -0700 Subject: [PATCH] yenta: add support for more TI bridges Support some more TI cardbus bridges. most of them are multifunction devices which adds 1394 controllers, smartcard readers etc. this could also help with the various problems with the XX21 controllers seen on the linux-pcmcia list. Signed-off-by: Daniel Ritz Signed-off-by: Dominik Brodowski --- drivers/pcmcia/ti113x.h | 29 +++++++++++++++++++++++++++++ drivers/pcmcia/yenta_socket.c | 8 ++++++++ include/linux/pci_ids.h | 7 +++++++ 3 files changed, 44 insertions(+) (limited to 'include/linux') diff --git a/drivers/pcmcia/ti113x.h b/drivers/pcmcia/ti113x.h index d319f2e7d053..da0b404561c9 100644 --- a/drivers/pcmcia/ti113x.h +++ b/drivers/pcmcia/ti113x.h @@ -59,6 +59,7 @@ #define TI122X_SCR_SER_STEP 0xc0000000 #define TI122X_SCR_INTRTIE 0x20000000 +#define TIXX21_SCR_TIEALL 0x10000000 #define TI122X_SCR_CBRSVD 0x00400000 #define TI122X_SCR_MRBURSTDN 0x00008000 #define TI122X_SCR_MRBURSTUP 0x00004000 @@ -624,6 +625,7 @@ static int ti12xx_2nd_slot_empty(struct yenta_socket *socket) int devfn; unsigned int state; int ret = 1; + u32 sysctl; /* catch the two-slot controllers */ switch (socket->dev->device) { @@ -646,6 +648,24 @@ static int ti12xx_2nd_slot_empty(struct yenta_socket *socket) */ break; + case PCI_DEVICE_ID_TI_X515: + case PCI_DEVICE_ID_TI_X420: + case PCI_DEVICE_ID_TI_X620: + case PCI_DEVICE_ID_TI_XX21_XX11: + case PCI_DEVICE_ID_TI_7410: + case PCI_DEVICE_ID_TI_7610: + /* + * those are either single or dual slot CB with additional functions + * like 1394, smartcard reader, etc. check the TIEALL flag for them + * the TIEALL flag binds the IRQ of all functions toghether. + * we catch the single slot variants later. + */ + sysctl = config_readl(socket, TI113X_SYSTEM_CONTROL); + if (sysctl & TIXX21_SCR_TIEALL) + return 0; + + break; + /* single-slot controllers have the 2nd slot empty always :) */ default: return 1; @@ -658,6 +678,15 @@ static int ti12xx_2nd_slot_empty(struct yenta_socket *socket) if (!func) return 1; + /* + * check that the device id of both slots match. this is needed for the + * XX21 and the XX11 controller that share the same device id for single + * and dual slot controllers. return '2nd slot empty'. we already checked + * if the interrupt is tied to another function. + */ + if (socket->dev->device != func->device) + goto out; + slot2 = pci_get_drvdata(func); if (!slot2) goto out; diff --git a/drivers/pcmcia/yenta_socket.c b/drivers/pcmcia/yenta_socket.c index 80806c9b43ad..c3e22fca105a 100644 --- a/drivers/pcmcia/yenta_socket.c +++ b/drivers/pcmcia/yenta_socket.c @@ -1249,6 +1249,14 @@ static struct pci_device_id yenta_table [] = { CB_ID(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_1250, TI1250), CB_ID(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_1410, TI1250), + CB_ID(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_XX21_XX11, TI12XX), + CB_ID(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_X515, TI12XX), + CB_ID(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_X420, TI12XX), + CB_ID(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_X620, TI12XX), + CB_ID(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_7410, TI12XX), + CB_ID(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_7510, TI12XX), + CB_ID(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_7610, TI12XX), + CB_ID(PCI_VENDOR_ID_ENE, PCI_DEVICE_ID_ENE_1211, ENE), CB_ID(PCI_VENDOR_ID_ENE, PCI_DEVICE_ID_ENE_1225, ENE), CB_ID(PCI_VENDOR_ID_ENE, PCI_DEVICE_ID_ENE_1410, ENE), diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 92efb2c767f9..68f11ac1a314 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -769,6 +769,8 @@ #define PCI_DEVICE_ID_TI_TVP4010 0x3d04 #define PCI_DEVICE_ID_TI_TVP4020 0x3d07 #define PCI_DEVICE_ID_TI_4450 0x8011 +#define PCI_DEVICE_ID_TI_XX21_XX11 0x8031 +#define PCI_DEVICE_ID_TI_X515 0x8036 #define PCI_DEVICE_ID_TI_1130 0xac12 #define PCI_DEVICE_ID_TI_1031 0xac13 #define PCI_DEVICE_ID_TI_1131 0xac15 @@ -785,12 +787,17 @@ #define PCI_DEVICE_ID_TI_4451 0xac42 #define PCI_DEVICE_ID_TI_4510 0xac44 #define PCI_DEVICE_ID_TI_4520 0xac46 +#define PCI_DEVICE_ID_TI_7510 0xac47 +#define PCI_DEVICE_ID_TI_7610 0xac48 +#define PCI_DEVICE_ID_TI_7410 0xac49 #define PCI_DEVICE_ID_TI_1410 0xac50 #define PCI_DEVICE_ID_TI_1420 0xac51 #define PCI_DEVICE_ID_TI_1451A 0xac52 #define PCI_DEVICE_ID_TI_1620 0xac54 #define PCI_DEVICE_ID_TI_1520 0xac55 #define PCI_DEVICE_ID_TI_1510 0xac56 +#define PCI_DEVICE_ID_TI_X620 0xac8d +#define PCI_DEVICE_ID_TI_X420 0xac8e #define PCI_VENDOR_ID_SONY 0x104d #define PCI_DEVICE_ID_SONY_CXD3222 0x8039 -- cgit v1.2.3 From 4fb7edce52e5b6cf41e3375822d74a27f0b6f2dd Mon Sep 17 00:00:00 2001 From: Kars de Jong Date: Sun, 25 Sep 2005 14:39:46 +0200 Subject: [PATCH] pcmcia: fix cross-platform issues with pcmcia module aliases - Added a missing TO_NATIVE call to scripts/mod/file2alias.c:do_pcmcia_entry() - Add an alignment attribute to struct pcmcia_device_no to solve an alignment issue seen when cross-compiling on x86 for m68k. Signed-off-by: Kars de Jong Signed-off-by: Dominik Brodowski --- include/linux/mod_devicetable.h | 5 +++-- scripts/mod/file2alias.c | 2 ++ 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 47da39ba3f03..4ed2107bc020 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -209,10 +209,11 @@ struct pcmcia_device_id { /* for real multi-function devices */ __u8 function; - /* for pseude multi-function devices */ + /* for pseudo multi-function devices */ __u8 device_no; - __u32 prod_id_hash[4]; + __u32 prod_id_hash[4] + __attribute__((aligned(sizeof(__u32)))); /* not matched against in kernelspace*/ #ifdef __KERNEL__ diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c index d8ee38aede26..f2ee673329a7 100644 --- a/scripts/mod/file2alias.c +++ b/scripts/mod/file2alias.c @@ -295,11 +295,13 @@ static int do_pcmcia_entry(const char *filename, { unsigned int i; + id->match_flags = TO_NATIVE(id->match_flags); id->manf_id = TO_NATIVE(id->manf_id); id->card_id = TO_NATIVE(id->card_id); id->func_id = TO_NATIVE(id->func_id); id->function = TO_NATIVE(id->function); id->device_no = TO_NATIVE(id->device_no); + for (i=0; i<4; i++) { id->prod_id_hash[i] = TO_NATIVE(id->prod_id_hash[i]); } -- cgit v1.2.3 From acd042bb2de50d4e6fb969281a00cc8b8b71e46d Mon Sep 17 00:00:00 2001 From: Evgeniy Polyakov Date: Mon, 26 Sep 2005 15:06:50 -0700 Subject: [CONNECTOR]: async connector mode. If input message rate from userspace is too high, do not drop them, but try to deliver using work queue allocation. Failing there is some kind of congestion control. It also removes warn_on on this condition, which scares people. Signed-off-by: Evgeniy Polyakov Signed-off-by: David S. Miller --- drivers/connector/cn_queue.c | 32 +++++++++++-------- drivers/connector/connector.c | 74 ++++++++++++++++++++++--------------------- include/linux/connector.h | 21 ++++++++---- 3 files changed, 72 insertions(+), 55 deletions(-) (limited to 'include/linux') diff --git a/drivers/connector/cn_queue.c b/drivers/connector/cn_queue.c index 966632182e2d..9f2f00d82917 100644 --- a/drivers/connector/cn_queue.c +++ b/drivers/connector/cn_queue.c @@ -31,16 +31,19 @@ #include #include -static void cn_queue_wrapper(void *data) +void cn_queue_wrapper(void *data) { - struct cn_callback_entry *cbq = data; + struct cn_callback_data *d = data; - cbq->cb->callback(cbq->cb->priv); - cbq->destruct_data(cbq->ddata); - cbq->ddata = NULL; + d->callback(d->callback_priv); + + d->destruct_data(d->ddata); + d->ddata = NULL; + + kfree(d->free); } -static struct cn_callback_entry *cn_queue_alloc_callback_entry(struct cn_callback *cb) +static struct cn_callback_entry *cn_queue_alloc_callback_entry(char *name, struct cb_id *id, void (*callback)(void *)) { struct cn_callback_entry *cbq; @@ -50,8 +53,11 @@ static struct cn_callback_entry *cn_queue_alloc_callback_entry(struct cn_callbac return NULL; } - cbq->cb = cb; - INIT_WORK(&cbq->work, &cn_queue_wrapper, cbq); + snprintf(cbq->id.name, sizeof(cbq->id.name), "%s", name); + memcpy(&cbq->id.id, id, sizeof(struct cb_id)); + cbq->data.callback = callback; + + INIT_WORK(&cbq->work, &cn_queue_wrapper, &cbq->data); return cbq; } @@ -68,12 +74,12 @@ int cn_cb_equal(struct cb_id *i1, struct cb_id *i2) return ((i1->idx == i2->idx) && (i1->val == i2->val)); } -int cn_queue_add_callback(struct cn_queue_dev *dev, struct cn_callback *cb) +int cn_queue_add_callback(struct cn_queue_dev *dev, char *name, struct cb_id *id, void (*callback)(void *)) { struct cn_callback_entry *cbq, *__cbq; int found = 0; - cbq = cn_queue_alloc_callback_entry(cb); + cbq = cn_queue_alloc_callback_entry(name, id, callback); if (!cbq) return -ENOMEM; @@ -82,7 +88,7 @@ int cn_queue_add_callback(struct cn_queue_dev *dev, struct cn_callback *cb) spin_lock_bh(&dev->queue_lock); list_for_each_entry(__cbq, &dev->queue_list, callback_entry) { - if (cn_cb_equal(&__cbq->cb->id, &cb->id)) { + if (cn_cb_equal(&__cbq->id.id, id)) { found = 1; break; } @@ -99,7 +105,7 @@ int cn_queue_add_callback(struct cn_queue_dev *dev, struct cn_callback *cb) cbq->nls = dev->nls; cbq->seq = 0; - cbq->group = cbq->cb->id.idx; + cbq->group = cbq->id.id.idx; return 0; } @@ -111,7 +117,7 @@ void cn_queue_del_callback(struct cn_queue_dev *dev, struct cb_id *id) spin_lock_bh(&dev->queue_lock); list_for_each_entry_safe(cbq, n, &dev->queue_list, callback_entry) { - if (cn_cb_equal(&cbq->cb->id, id)) { + if (cn_cb_equal(&cbq->id.id, id)) { list_del(&cbq->callback_entry); found = 1; break; diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c index aaf6d468a8b9..bb0b3a8de14b 100644 --- a/drivers/connector/connector.c +++ b/drivers/connector/connector.c @@ -84,7 +84,7 @@ int cn_netlink_send(struct cn_msg *msg, u32 __group, int gfp_mask) spin_lock_bh(&dev->cbdev->queue_lock); list_for_each_entry(__cbq, &dev->cbdev->queue_list, callback_entry) { - if (cn_cb_equal(&__cbq->cb->id, &msg->id)) { + if (cn_cb_equal(&__cbq->id.id, &msg->id)) { found = 1; group = __cbq->group; } @@ -127,42 +127,56 @@ static int cn_call_callback(struct cn_msg *msg, void (*destruct_data)(void *), v { struct cn_callback_entry *__cbq; struct cn_dev *dev = &cdev; - int found = 0; + int err = -ENODEV; spin_lock_bh(&dev->cbdev->queue_lock); list_for_each_entry(__cbq, &dev->cbdev->queue_list, callback_entry) { - if (cn_cb_equal(&__cbq->cb->id, &msg->id)) { - /* - * Let's scream if there is some magic and the - * data will arrive asynchronously here. - * [i.e. netlink messages will be queued]. - * After the first warning I will fix it - * quickly, but now I think it is - * impossible. --zbr (2004_04_27). - */ + if (cn_cb_equal(&__cbq->id.id, &msg->id)) { if (likely(!test_bit(0, &__cbq->work.pending) && - __cbq->ddata == NULL)) { - __cbq->cb->priv = msg; + __cbq->data.ddata == NULL)) { + __cbq->data.callback_priv = msg; - __cbq->ddata = data; - __cbq->destruct_data = destruct_data; + __cbq->data.ddata = data; + __cbq->data.destruct_data = destruct_data; if (queue_work(dev->cbdev->cn_queue, &__cbq->work)) - found = 1; + err = 0; } else { - printk("%s: cbq->data=%p, " - "work->pending=%08lx.\n", - __func__, __cbq->ddata, - __cbq->work.pending); - WARN_ON(1); + struct work_struct *w; + struct cn_callback_data *d; + + w = kzalloc(sizeof(*w) + sizeof(*d), GFP_ATOMIC); + if (w) { + d = (struct cn_callback_data *)(w+1); + + d->callback_priv = msg; + d->callback = __cbq->data.callback; + d->ddata = data; + d->destruct_data = destruct_data; + d->free = w; + + INIT_LIST_HEAD(&w->entry); + w->pending = 0; + w->func = &cn_queue_wrapper; + w->data = d; + init_timer(&w->timer); + + if (queue_work(dev->cbdev->cn_queue, w)) + err = 0; + else { + kfree(w); + err = -EINVAL; + } + } else + err = -ENOMEM; } break; } } spin_unlock_bh(&dev->cbdev->queue_lock); - return found ? 0 : -ENODEV; + return err; } /* @@ -291,22 +305,10 @@ int cn_add_callback(struct cb_id *id, char *name, void (*callback)(void *)) { int err; struct cn_dev *dev = &cdev; - struct cn_callback *cb; - - cb = kzalloc(sizeof(*cb), GFP_KERNEL); - if (!cb) - return -ENOMEM; - - scnprintf(cb->name, sizeof(cb->name), "%s", name); - memcpy(&cb->id, id, sizeof(cb->id)); - cb->callback = callback; - - err = cn_queue_add_callback(dev->cbdev, cb); - if (err) { - kfree(cb); + err = cn_queue_add_callback(dev->cbdev, name, id, callback); + if (err) return err; - } cn_notify(id, 0); diff --git a/include/linux/connector.h b/include/linux/connector.h index 96de26301f84..86d4b0a81713 100644 --- a/include/linux/connector.h +++ b/include/linux/connector.h @@ -104,12 +104,19 @@ struct cn_queue_dev { struct sock *nls; }; -struct cn_callback { +struct cn_callback_id { unsigned char name[CN_CBQ_NAMELEN]; - struct cb_id id; +}; + +struct cn_callback_data { + void (*destruct_data) (void *); + void *ddata; + + void *callback_priv; void (*callback) (void *); - void *priv; + + void *free; }; struct cn_callback_entry { @@ -118,8 +125,8 @@ struct cn_callback_entry { struct work_struct work; struct cn_queue_dev *pdev; - void (*destruct_data) (void *); - void *ddata; + struct cn_callback_id id; + struct cn_callback_data data; int seq, group; struct sock *nls; @@ -144,7 +151,7 @@ int cn_add_callback(struct cb_id *, char *, void (*callback) (void *)); void cn_del_callback(struct cb_id *); int cn_netlink_send(struct cn_msg *, u32, int); -int cn_queue_add_callback(struct cn_queue_dev *dev, struct cn_callback *cb); +int cn_queue_add_callback(struct cn_queue_dev *dev, char *name, struct cb_id *id, void (*callback)(void *)); void cn_queue_del_callback(struct cn_queue_dev *dev, struct cb_id *id); struct cn_queue_dev *cn_queue_alloc_dev(char *name, struct sock *); @@ -152,6 +159,8 @@ void cn_queue_free_dev(struct cn_queue_dev *dev); int cn_cb_equal(struct cb_id *, struct cb_id *); +void cn_queue_wrapper(void *data); + extern int cn_already_initialized; #endif /* __KERNEL__ */ -- cgit v1.2.3 From 188bab3ae0ed164bc18f98be932512d777dd038b Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Mon, 26 Sep 2005 15:25:11 -0700 Subject: [NETFILTER]: Fix invalid module autoloading by splitting iptable_nat When you've enabled conntrack and NAT as a module (standard case in all distributions), and you've also enabled the new conntrack netlink interface, loading ip_conntrack_netlink.ko will auto-load iptable_nat.ko. This causes a huge performance penalty, since for every packet you iterate the nat code, even if you don't want it. This patch splits iptable_nat.ko into the NAT core (ip_nat.ko) and the iptables frontend (iptable_nat.ko). Threfore, ip_conntrack_netlink.ko will only pull ip_nat.ko, but not the frontend. ip_nat.ko will "only" allocate some resources, but not affect runtime performance. This separation is also a nice step in anticipation of new packet filters (nf-hipac, ipset, pkttables) being able to use the NAT core. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_nat_core.h | 12 +++++----- net/ipv4/netfilter/Makefile | 5 +++-- net/ipv4/netfilter/ip_nat_core.c | 35 ++++++++++++++++++++---------- net/ipv4/netfilter/ip_nat_helper.c | 4 ++++ net/ipv4/netfilter/ip_nat_standalone.c | 25 ++++----------------- 5 files changed, 40 insertions(+), 41 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv4/ip_nat_core.h b/include/linux/netfilter_ipv4/ip_nat_core.h index 3b50eb91f007..30db23f06b03 100644 --- a/include/linux/netfilter_ipv4/ip_nat_core.h +++ b/include/linux/netfilter_ipv4/ip_nat_core.h @@ -5,16 +5,14 @@ /* This header used to share core functionality between the standalone NAT module, and the compatibility layer's use of NAT for masquerading. */ -extern int ip_nat_init(void); -extern void ip_nat_cleanup(void); -extern unsigned int nat_packet(struct ip_conntrack *ct, +extern unsigned int ip_nat_packet(struct ip_conntrack *ct, enum ip_conntrack_info conntrackinfo, unsigned int hooknum, struct sk_buff **pskb); -extern int icmp_reply_translation(struct sk_buff **pskb, - struct ip_conntrack *ct, - enum ip_nat_manip_type manip, - enum ip_conntrack_dir dir); +extern int ip_nat_icmp_reply_translation(struct sk_buff **pskb, + struct ip_conntrack *ct, + enum ip_nat_manip_type manip, + enum ip_conntrack_dir dir); #endif /* _IP_NAT_CORE_H */ diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 89002533f2a2..dab4b58dd31e 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -4,7 +4,8 @@ # objects for the standalone - connection tracking / NAT ip_conntrack-objs := ip_conntrack_standalone.o ip_conntrack_core.o ip_conntrack_proto_generic.o ip_conntrack_proto_tcp.o ip_conntrack_proto_udp.o ip_conntrack_proto_icmp.o -iptable_nat-objs := ip_nat_standalone.o ip_nat_rule.o ip_nat_core.o ip_nat_helper.o ip_nat_proto_unknown.o ip_nat_proto_tcp.o ip_nat_proto_udp.o ip_nat_proto_icmp.o +ip_nat-objs := ip_nat_core.o ip_nat_helper.o ip_nat_proto_unknown.o ip_nat_proto_tcp.o ip_nat_proto_udp.o ip_nat_proto_icmp.o +iptable_nat-objs := ip_nat_rule.o ip_nat_standalone.o ip_conntrack_pptp-objs := ip_conntrack_helper_pptp.o ip_conntrack_proto_gre.o ip_nat_pptp-objs := ip_nat_helper_pptp.o ip_nat_proto_gre.o @@ -40,7 +41,7 @@ obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o # the three instances of ip_tables obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o obj-$(CONFIG_IP_NF_MANGLE) += iptable_mangle.o -obj-$(CONFIG_IP_NF_NAT) += iptable_nat.o +obj-$(CONFIG_IP_NF_NAT) += iptable_nat.o ip_nat.o obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o # matches diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c index c3ea891d38e7..c5e3abd24672 100644 --- a/net/ipv4/netfilter/ip_nat_core.c +++ b/net/ipv4/netfilter/ip_nat_core.c @@ -74,12 +74,14 @@ ip_nat_proto_find_get(u_int8_t protonum) return p; } +EXPORT_SYMBOL_GPL(ip_nat_proto_find_get); void ip_nat_proto_put(struct ip_nat_protocol *p) { module_put(p->me); } +EXPORT_SYMBOL_GPL(ip_nat_proto_put); /* We keep an extra hash for each conntrack, for fast searching. */ static inline unsigned int @@ -111,6 +113,7 @@ ip_nat_cheat_check(u_int32_t oldvalinv, u_int32_t newval, u_int16_t oldcheck) return csum_fold(csum_partial((char *)diffs, sizeof(diffs), oldcheck^0xFFFF)); } +EXPORT_SYMBOL(ip_nat_cheat_check); /* Is this tuple already taken? (not by us) */ int @@ -127,6 +130,7 @@ ip_nat_used_tuple(const struct ip_conntrack_tuple *tuple, invert_tuplepr(&reply, tuple); return ip_conntrack_tuple_taken(&reply, ignored_conntrack); } +EXPORT_SYMBOL(ip_nat_used_tuple); /* If we source map this tuple so reply looks like reply_tuple, will * that meet the constraints of range. */ @@ -347,6 +351,7 @@ ip_nat_setup_info(struct ip_conntrack *conntrack, return NF_ACCEPT; } +EXPORT_SYMBOL(ip_nat_setup_info); /* Returns true if succeeded. */ static int @@ -387,10 +392,10 @@ manip_pkt(u_int16_t proto, } /* Do packet manipulations according to ip_nat_setup_info. */ -unsigned int nat_packet(struct ip_conntrack *ct, - enum ip_conntrack_info ctinfo, - unsigned int hooknum, - struct sk_buff **pskb) +unsigned int ip_nat_packet(struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo, + unsigned int hooknum, + struct sk_buff **pskb) { enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); unsigned long statusbit; @@ -417,12 +422,13 @@ unsigned int nat_packet(struct ip_conntrack *ct, } return NF_ACCEPT; } +EXPORT_SYMBOL_GPL(ip_nat_packet); /* Dir is direction ICMP is coming from (opposite to packet it contains) */ -int icmp_reply_translation(struct sk_buff **pskb, - struct ip_conntrack *ct, - enum ip_nat_manip_type manip, - enum ip_conntrack_dir dir) +int ip_nat_icmp_reply_translation(struct sk_buff **pskb, + struct ip_conntrack *ct, + enum ip_nat_manip_type manip, + enum ip_conntrack_dir dir) { struct { struct icmphdr icmp; @@ -509,6 +515,7 @@ int icmp_reply_translation(struct sk_buff **pskb, return 1; } +EXPORT_SYMBOL_GPL(ip_nat_icmp_reply_translation); /* Protocol registration. */ int ip_nat_protocol_register(struct ip_nat_protocol *proto) @@ -525,6 +532,7 @@ int ip_nat_protocol_register(struct ip_nat_protocol *proto) write_unlock_bh(&ip_nat_lock); return ret; } +EXPORT_SYMBOL(ip_nat_protocol_register); /* Noone stores the protocol anywhere; simply delete it. */ void ip_nat_protocol_unregister(struct ip_nat_protocol *proto) @@ -536,6 +544,7 @@ void ip_nat_protocol_unregister(struct ip_nat_protocol *proto) /* Someone could be still looking at the proto in a bh. */ synchronize_net(); } +EXPORT_SYMBOL(ip_nat_protocol_unregister); #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) @@ -582,7 +591,7 @@ EXPORT_SYMBOL_GPL(ip_nat_port_nfattr_to_range); EXPORT_SYMBOL_GPL(ip_nat_port_range_to_nfattr); #endif -int __init ip_nat_init(void) +static int __init ip_nat_init(void) { size_t i; @@ -624,10 +633,14 @@ static int clean_nat(struct ip_conntrack *i, void *data) return 0; } -/* Not __exit: called from ip_nat_standalone.c:init_or_cleanup() --RR */ -void ip_nat_cleanup(void) +static void __exit ip_nat_cleanup(void) { ip_ct_iterate_cleanup(&clean_nat, NULL); ip_conntrack_destroyed = NULL; vfree(bysource); } + +MODULE_LICENSE("GPL"); + +module_init(ip_nat_init); +module_exit(ip_nat_cleanup); diff --git a/net/ipv4/netfilter/ip_nat_helper.c b/net/ipv4/netfilter/ip_nat_helper.c index d2dd5d313556..5d506e0564d5 100644 --- a/net/ipv4/netfilter/ip_nat_helper.c +++ b/net/ipv4/netfilter/ip_nat_helper.c @@ -199,6 +199,7 @@ ip_nat_mangle_tcp_packet(struct sk_buff **pskb, } return 1; } +EXPORT_SYMBOL(ip_nat_mangle_tcp_packet); /* Generic function for mangling variable-length address changes inside * NATed UDP connections (like the CONNECT DATA XXXXX MESG XXXXX INDEX XXXXX @@ -256,6 +257,7 @@ ip_nat_mangle_udp_packet(struct sk_buff **pskb, return 1; } +EXPORT_SYMBOL(ip_nat_mangle_udp_packet); /* Adjust one found SACK option including checksum correction */ static void @@ -399,6 +401,7 @@ ip_nat_seq_adjust(struct sk_buff **pskb, return 1; } +EXPORT_SYMBOL(ip_nat_seq_adjust); /* Setup NAT on this expected conntrack so it follows master. */ /* If we fail to get a free NAT slot, we'll get dropped on confirm */ @@ -425,3 +428,4 @@ void ip_nat_follow_master(struct ip_conntrack *ct, /* hook doesn't matter, but it has to do destination manip */ ip_nat_setup_info(ct, &range, NF_IP_PRE_ROUTING); } +EXPORT_SYMBOL(ip_nat_follow_master); diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c index 0ff368b131f6..30cd4e18c129 100644 --- a/net/ipv4/netfilter/ip_nat_standalone.c +++ b/net/ipv4/netfilter/ip_nat_standalone.c @@ -108,8 +108,8 @@ ip_nat_fn(unsigned int hooknum, case IP_CT_RELATED: case IP_CT_RELATED+IP_CT_IS_REPLY: if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) { - if (!icmp_reply_translation(pskb, ct, maniptype, - CTINFO2DIR(ctinfo))) + if (!ip_nat_icmp_reply_translation(pskb, ct, maniptype, + CTINFO2DIR(ctinfo))) return NF_DROP; else return NF_ACCEPT; @@ -152,7 +152,7 @@ ip_nat_fn(unsigned int hooknum, } IP_NF_ASSERT(info); - return nat_packet(ct, ctinfo, hooknum, pskb); + return ip_nat_packet(ct, ctinfo, hooknum, pskb); } static unsigned int @@ -325,15 +325,10 @@ static int init_or_cleanup(int init) printk("ip_nat_init: can't setup rules.\n"); goto cleanup_nothing; } - ret = ip_nat_init(); - if (ret < 0) { - printk("ip_nat_init: can't setup rules.\n"); - goto cleanup_rule_init; - } ret = nf_register_hook(&ip_nat_in_ops); if (ret < 0) { printk("ip_nat_init: can't register in hook.\n"); - goto cleanup_nat; + goto cleanup_rule_init; } ret = nf_register_hook(&ip_nat_out_ops); if (ret < 0) { @@ -374,8 +369,6 @@ static int init_or_cleanup(int init) nf_unregister_hook(&ip_nat_out_ops); cleanup_inops: nf_unregister_hook(&ip_nat_in_ops); - cleanup_nat: - ip_nat_cleanup(); cleanup_rule_init: ip_nat_rule_cleanup(); cleanup_nothing: @@ -395,14 +388,4 @@ static void __exit fini(void) module_init(init); module_exit(fini); -EXPORT_SYMBOL(ip_nat_setup_info); -EXPORT_SYMBOL(ip_nat_protocol_register); -EXPORT_SYMBOL(ip_nat_protocol_unregister); -EXPORT_SYMBOL_GPL(ip_nat_proto_find_get); -EXPORT_SYMBOL_GPL(ip_nat_proto_put); -EXPORT_SYMBOL(ip_nat_cheat_check); -EXPORT_SYMBOL(ip_nat_mangle_tcp_packet); -EXPORT_SYMBOL(ip_nat_mangle_udp_packet); -EXPORT_SYMBOL(ip_nat_used_tuple); -EXPORT_SYMBOL(ip_nat_follow_master); MODULE_LICENSE("GPL"); -- cgit v1.2.3 From c4a3e0a529ab3e65223e81681c7c6b1bc188fa58 Mon Sep 17 00:00:00 2001 From: "Bagalkote, Sreenivas" Date: Tue, 20 Sep 2005 17:46:58 -0400 Subject: [SCSI] MegaRAID SAS RAID: new driver Signed-off-by: Sreenivas Bagalkote Signed-off-by: James Bottomley --- drivers/scsi/Makefile | 1 + drivers/scsi/megaraid/Kconfig.megaraid | 9 + drivers/scsi/megaraid/Makefile | 1 + drivers/scsi/megaraid/megaraid_sas.c | 2805 ++++++++++++++++++++++++++++++++ drivers/scsi/megaraid/megaraid_sas.h | 1142 +++++++++++++ include/linux/pci_ids.h | 2 + 6 files changed, 3960 insertions(+) create mode 100644 drivers/scsi/megaraid/megaraid_sas.c create mode 100644 drivers/scsi/megaraid/megaraid_sas.h (limited to 'include/linux') diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile index 1e4edbdf2730..48529d180ca8 100644 --- a/drivers/scsi/Makefile +++ b/drivers/scsi/Makefile @@ -99,6 +99,7 @@ obj-$(CONFIG_SCSI_DC395x) += dc395x.o obj-$(CONFIG_SCSI_DC390T) += tmscsim.o obj-$(CONFIG_MEGARAID_LEGACY) += megaraid.o obj-$(CONFIG_MEGARAID_NEWGEN) += megaraid/ +obj-$(CONFIG_MEGARAID_SAS) += megaraid/ obj-$(CONFIG_SCSI_ACARD) += atp870u.o obj-$(CONFIG_SCSI_SUNESP) += esp.o obj-$(CONFIG_SCSI_GDTH) += gdth.o diff --git a/drivers/scsi/megaraid/Kconfig.megaraid b/drivers/scsi/megaraid/Kconfig.megaraid index 917d591d90b2..7363e12663ac 100644 --- a/drivers/scsi/megaraid/Kconfig.megaraid +++ b/drivers/scsi/megaraid/Kconfig.megaraid @@ -76,3 +76,12 @@ config MEGARAID_LEGACY To compile this driver as a module, choose M here: the module will be called megaraid endif + +config MEGARAID_SAS + tristate "LSI Logic MegaRAID SAS RAID Module" + depends on PCI && SCSI + help + Module for LSI Logic's SAS based RAID controllers. + To compile this driver as a module, choose 'm' here. + Module will be called megaraid_sas + diff --git a/drivers/scsi/megaraid/Makefile b/drivers/scsi/megaraid/Makefile index 6dd99f275722..f469915b97c3 100644 --- a/drivers/scsi/megaraid/Makefile +++ b/drivers/scsi/megaraid/Makefile @@ -1,2 +1,3 @@ obj-$(CONFIG_MEGARAID_MM) += megaraid_mm.o obj-$(CONFIG_MEGARAID_MAILBOX) += megaraid_mbox.o +obj-$(CONFIG_MEGARAID_SAS) += megaraid_sas.o diff --git a/drivers/scsi/megaraid/megaraid_sas.c b/drivers/scsi/megaraid/megaraid_sas.c new file mode 100644 index 000000000000..1b3148e842af --- /dev/null +++ b/drivers/scsi/megaraid/megaraid_sas.c @@ -0,0 +1,2805 @@ +/* + * + * Linux MegaRAID driver for SAS based RAID controllers + * + * Copyright (c) 2003-2005 LSI Logic Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * FILE : megaraid_sas.c + * Version : v00.00.02.00-rc4 + * + * Authors: + * Sreenivas Bagalkote + * Sumant Patro + * + * List of supported controllers + * + * OEM Product Name VID DID SSVID SSID + * --- ------------ --- --- ---- ---- + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include "megaraid_sas.h" + +MODULE_LICENSE("GPL"); +MODULE_VERSION(MEGASAS_VERSION); +MODULE_AUTHOR("sreenivas.bagalkote@lsil.com"); +MODULE_DESCRIPTION("LSI Logic MegaRAID SAS Driver"); + +/* + * PCI ID table for all supported controllers + */ +static struct pci_device_id megasas_pci_table[] = { + + { + PCI_VENDOR_ID_LSI_LOGIC, + PCI_DEVICE_ID_LSI_SAS1064R, + PCI_ANY_ID, + PCI_ANY_ID, + }, + { + PCI_VENDOR_ID_DELL, + PCI_DEVICE_ID_DELL_PERC5, + PCI_ANY_ID, + PCI_ANY_ID, + }, + {0} /* Terminating entry */ +}; + +MODULE_DEVICE_TABLE(pci, megasas_pci_table); + +static int megasas_mgmt_majorno; +static struct megasas_mgmt_info megasas_mgmt_info; +static struct fasync_struct *megasas_async_queue; +static DECLARE_MUTEX(megasas_async_queue_mutex); + +/** + * megasas_get_cmd - Get a command from the free pool + * @instance: Adapter soft state + * + * Returns a free command from the pool + */ +static inline struct megasas_cmd *megasas_get_cmd(struct megasas_instance + *instance) +{ + unsigned long flags; + struct megasas_cmd *cmd = NULL; + + spin_lock_irqsave(&instance->cmd_pool_lock, flags); + + if (!list_empty(&instance->cmd_pool)) { + cmd = list_entry((&instance->cmd_pool)->next, + struct megasas_cmd, list); + list_del_init(&cmd->list); + } else { + printk(KERN_ERR "megasas: Command pool empty!\n"); + } + + spin_unlock_irqrestore(&instance->cmd_pool_lock, flags); + return cmd; +} + +/** + * megasas_return_cmd - Return a cmd to free command pool + * @instance: Adapter soft state + * @cmd: Command packet to be returned to free command pool + */ +static inline void +megasas_return_cmd(struct megasas_instance *instance, struct megasas_cmd *cmd) +{ + unsigned long flags; + + spin_lock_irqsave(&instance->cmd_pool_lock, flags); + + cmd->scmd = NULL; + list_add_tail(&cmd->list, &instance->cmd_pool); + + spin_unlock_irqrestore(&instance->cmd_pool_lock, flags); +} + +/** + * megasas_enable_intr - Enables interrupts + * @regs: MFI register set + */ +static inline void +megasas_enable_intr(struct megasas_register_set __iomem * regs) +{ + writel(1, &(regs)->outbound_intr_mask); + + /* Dummy readl to force pci flush */ + readl(®s->outbound_intr_mask); +} + +/** + * megasas_disable_intr - Disables interrupts + * @regs: MFI register set + */ +static inline void +megasas_disable_intr(struct megasas_register_set __iomem * regs) +{ + u32 mask = readl(®s->outbound_intr_mask) & (~0x00000001); + writel(mask, ®s->outbound_intr_mask); + + /* Dummy readl to force pci flush */ + readl(®s->outbound_intr_mask); +} + +/** + * megasas_issue_polled - Issues a polling command + * @instance: Adapter soft state + * @cmd: Command packet to be issued + * + * For polling, MFI requires the cmd_status to be set to 0xFF before posting. + */ +static int +megasas_issue_polled(struct megasas_instance *instance, struct megasas_cmd *cmd) +{ + int i; + u32 msecs = MFI_POLL_TIMEOUT_SECS * 1000; + + struct megasas_header *frame_hdr = &cmd->frame->hdr; + + frame_hdr->cmd_status = 0xFF; + frame_hdr->flags |= MFI_FRAME_DONT_POST_IN_REPLY_QUEUE; + + /* + * Issue the frame using inbound queue port + */ + writel(cmd->frame_phys_addr >> 3, + &instance->reg_set->inbound_queue_port); + + /* + * Wait for cmd_status to change + */ + for (i = 0; (i < msecs) && (frame_hdr->cmd_status == 0xff); i++) { + rmb(); + msleep(1); + } + + if (frame_hdr->cmd_status == 0xff) + return -ETIME; + + return 0; +} + +/** + * megasas_issue_blocked_cmd - Synchronous wrapper around regular FW cmds + * @instance: Adapter soft state + * @cmd: Command to be issued + * + * This function waits on an event for the command to be returned from ISR. + * Used to issue ioctl commands. + */ +static int +megasas_issue_blocked_cmd(struct megasas_instance *instance, + struct megasas_cmd *cmd) +{ + cmd->cmd_status = ENODATA; + + writel(cmd->frame_phys_addr >> 3, + &instance->reg_set->inbound_queue_port); + + wait_event(instance->int_cmd_wait_q, (cmd->cmd_status != ENODATA)); + + return 0; +} + +/** + * megasas_issue_blocked_abort_cmd - Aborts previously issued cmd + * @instance: Adapter soft state + * @cmd_to_abort: Previously issued cmd to be aborted + * + * MFI firmware can abort previously issued AEN comamnd (automatic event + * notification). The megasas_issue_blocked_abort_cmd() issues such abort + * cmd and blocks till it is completed. + */ +static int +megasas_issue_blocked_abort_cmd(struct megasas_instance *instance, + struct megasas_cmd *cmd_to_abort) +{ + struct megasas_cmd *cmd; + struct megasas_abort_frame *abort_fr; + + cmd = megasas_get_cmd(instance); + + if (!cmd) + return -1; + + abort_fr = &cmd->frame->abort; + + /* + * Prepare and issue the abort frame + */ + abort_fr->cmd = MFI_CMD_ABORT; + abort_fr->cmd_status = 0xFF; + abort_fr->flags = 0; + abort_fr->abort_context = cmd_to_abort->index; + abort_fr->abort_mfi_phys_addr_lo = cmd_to_abort->frame_phys_addr; + abort_fr->abort_mfi_phys_addr_hi = 0; + + cmd->sync_cmd = 1; + cmd->cmd_status = 0xFF; + + writel(cmd->frame_phys_addr >> 3, + &instance->reg_set->inbound_queue_port); + + /* + * Wait for this cmd to complete + */ + wait_event(instance->abort_cmd_wait_q, (cmd->cmd_status != 0xFF)); + + megasas_return_cmd(instance, cmd); + return 0; +} + +/** + * megasas_make_sgl32 - Prepares 32-bit SGL + * @instance: Adapter soft state + * @scp: SCSI command from the mid-layer + * @mfi_sgl: SGL to be filled in + * + * If successful, this function returns the number of SG elements. Otherwise, + * it returnes -1. + */ +static inline int +megasas_make_sgl32(struct megasas_instance *instance, struct scsi_cmnd *scp, + union megasas_sgl *mfi_sgl) +{ + int i; + int sge_count; + struct scatterlist *os_sgl; + + /* + * Return 0 if there is no data transfer + */ + if (!scp->request_buffer || !scp->request_bufflen) + return 0; + + if (!scp->use_sg) { + mfi_sgl->sge32[0].phys_addr = pci_map_single(instance->pdev, + scp-> + request_buffer, + scp-> + request_bufflen, + scp-> + sc_data_direction); + mfi_sgl->sge32[0].length = scp->request_bufflen; + + return 1; + } + + os_sgl = (struct scatterlist *)scp->request_buffer; + sge_count = pci_map_sg(instance->pdev, os_sgl, scp->use_sg, + scp->sc_data_direction); + + for (i = 0; i < sge_count; i++, os_sgl++) { + mfi_sgl->sge32[i].length = sg_dma_len(os_sgl); + mfi_sgl->sge32[i].phys_addr = sg_dma_address(os_sgl); + } + + return sge_count; +} + +/** + * megasas_make_sgl64 - Prepares 64-bit SGL + * @instance: Adapter soft state + * @scp: SCSI command from the mid-layer + * @mfi_sgl: SGL to be filled in + * + * If successful, this function returns the number of SG elements. Otherwise, + * it returnes -1. + */ +static inline int +megasas_make_sgl64(struct megasas_instance *instance, struct scsi_cmnd *scp, + union megasas_sgl *mfi_sgl) +{ + int i; + int sge_count; + struct scatterlist *os_sgl; + + /* + * Return 0 if there is no data transfer + */ + if (!scp->request_buffer || !scp->request_bufflen) + return 0; + + if (!scp->use_sg) { + mfi_sgl->sge64[0].phys_addr = pci_map_single(instance->pdev, + scp-> + request_buffer, + scp-> + request_bufflen, + scp-> + sc_data_direction); + + mfi_sgl->sge64[0].length = scp->request_bufflen; + + return 1; + } + + os_sgl = (struct scatterlist *)scp->request_buffer; + sge_count = pci_map_sg(instance->pdev, os_sgl, scp->use_sg, + scp->sc_data_direction); + + for (i = 0; i < sge_count; i++, os_sgl++) { + mfi_sgl->sge64[i].length = sg_dma_len(os_sgl); + mfi_sgl->sge64[i].phys_addr = sg_dma_address(os_sgl); + } + + return sge_count; +} + +/** + * megasas_build_dcdb - Prepares a direct cdb (DCDB) command + * @instance: Adapter soft state + * @scp: SCSI command + * @cmd: Command to be prepared in + * + * This function prepares CDB commands. These are typcially pass-through + * commands to the devices. + */ +static inline int +megasas_build_dcdb(struct megasas_instance *instance, struct scsi_cmnd *scp, + struct megasas_cmd *cmd) +{ + u32 sge_sz; + int sge_bytes; + u32 is_logical; + u32 device_id; + u16 flags = 0; + struct megasas_pthru_frame *pthru; + + is_logical = MEGASAS_IS_LOGICAL(scp); + device_id = MEGASAS_DEV_INDEX(instance, scp); + pthru = (struct megasas_pthru_frame *)cmd->frame; + + if (scp->sc_data_direction == PCI_DMA_TODEVICE) + flags = MFI_FRAME_DIR_WRITE; + else if (scp->sc_data_direction == PCI_DMA_FROMDEVICE) + flags = MFI_FRAME_DIR_READ; + else if (scp->sc_data_direction == PCI_DMA_NONE) + flags = MFI_FRAME_DIR_NONE; + + /* + * Prepare the DCDB frame + */ + pthru->cmd = (is_logical) ? MFI_CMD_LD_SCSI_IO : MFI_CMD_PD_SCSI_IO; + pthru->cmd_status = 0x0; + pthru->scsi_status = 0x0; + pthru->target_id = device_id; + pthru->lun = scp->device->lun; + pthru->cdb_len = scp->cmd_len; + pthru->timeout = 0; + pthru->flags = flags; + pthru->data_xfer_len = scp->request_bufflen; + + memcpy(pthru->cdb, scp->cmnd, scp->cmd_len); + + /* + * Construct SGL + */ + sge_sz = (IS_DMA64) ? sizeof(struct megasas_sge64) : + sizeof(struct megasas_sge32); + + if (IS_DMA64) { + pthru->flags |= MFI_FRAME_SGL64; + pthru->sge_count = megasas_make_sgl64(instance, scp, + &pthru->sgl); + } else + pthru->sge_count = megasas_make_sgl32(instance, scp, + &pthru->sgl); + + /* + * Sense info specific + */ + pthru->sense_len = SCSI_SENSE_BUFFERSIZE; + pthru->sense_buf_phys_addr_hi = 0; + pthru->sense_buf_phys_addr_lo = cmd->sense_phys_addr; + + sge_bytes = sge_sz * pthru->sge_count; + + /* + * Compute the total number of frames this command consumes. FW uses + * this number to pull sufficient number of frames from host memory. + */ + cmd->frame_count = (sge_bytes / MEGAMFI_FRAME_SIZE) + + ((sge_bytes % MEGAMFI_FRAME_SIZE) ? 1 : 0) + 1; + + if (cmd->frame_count > 7) + cmd->frame_count = 8; + + return cmd->frame_count; +} + +/** + * megasas_build_ldio - Prepares IOs to logical devices + * @instance: Adapter soft state + * @scp: SCSI command + * @cmd: Command to to be prepared + * + * Frames (and accompanying SGLs) for regular SCSI IOs use this function. + */ +static inline int +megasas_build_ldio(struct megasas_instance *instance, struct scsi_cmnd *scp, + struct megasas_cmd *cmd) +{ + u32 sge_sz; + int sge_bytes; + u32 device_id; + u8 sc = scp->cmnd[0]; + u16 flags = 0; + struct megasas_io_frame *ldio; + + device_id = MEGASAS_DEV_INDEX(instance, scp); + ldio = (struct megasas_io_frame *)cmd->frame; + + if (scp->sc_data_direction == PCI_DMA_TODEVICE) + flags = MFI_FRAME_DIR_WRITE; + else if (scp->sc_data_direction == PCI_DMA_FROMDEVICE) + flags = MFI_FRAME_DIR_READ; + + /* + * Preare the Logical IO frame: 2nd bit is zero for all read cmds + */ + ldio->cmd = (sc & 0x02) ? MFI_CMD_LD_WRITE : MFI_CMD_LD_READ; + ldio->cmd_status = 0x0; + ldio->scsi_status = 0x0; + ldio->target_id = device_id; + ldio->timeout = 0; + ldio->reserved_0 = 0; + ldio->pad_0 = 0; + ldio->flags = flags; + ldio->start_lba_hi = 0; + ldio->access_byte = (scp->cmd_len != 6) ? scp->cmnd[1] : 0; + + /* + * 6-byte READ(0x08) or WRITE(0x0A) cdb + */ + if (scp->cmd_len == 6) { + ldio->lba_count = (u32) scp->cmnd[4]; + ldio->start_lba_lo = ((u32) scp->cmnd[1] << 16) | + ((u32) scp->cmnd[2] << 8) | (u32) scp->cmnd[3]; + + ldio->start_lba_lo &= 0x1FFFFF; + } + + /* + * 10-byte READ(0x28) or WRITE(0x2A) cdb + */ + else if (scp->cmd_len == 10) { + ldio->lba_count = (u32) scp->cmnd[8] | + ((u32) scp->cmnd[7] << 8); + ldio->start_lba_lo = ((u32) scp->cmnd[2] << 24) | + ((u32) scp->cmnd[3] << 16) | + ((u32) scp->cmnd[4] << 8) | (u32) scp->cmnd[5]; + } + + /* + * 12-byte READ(0xA8) or WRITE(0xAA) cdb + */ + else if (scp->cmd_len == 12) { + ldio->lba_count = ((u32) scp->cmnd[6] << 24) | + ((u32) scp->cmnd[7] << 16) | + ((u32) scp->cmnd[8] << 8) | (u32) scp->cmnd[9]; + + ldio->start_lba_lo = ((u32) scp->cmnd[2] << 24) | + ((u32) scp->cmnd[3] << 16) | + ((u32) scp->cmnd[4] << 8) | (u32) scp->cmnd[5]; + } + + /* + * 16-byte READ(0x88) or WRITE(0x8A) cdb + */ + else if (scp->cmd_len == 16) { + ldio->lba_count = ((u32) scp->cmnd[10] << 24) | + ((u32) scp->cmnd[11] << 16) | + ((u32) scp->cmnd[12] << 8) | (u32) scp->cmnd[13]; + + ldio->start_lba_lo = ((u32) scp->cmnd[6] << 24) | + ((u32) scp->cmnd[7] << 16) | + ((u32) scp->cmnd[8] << 8) | (u32) scp->cmnd[9]; + + ldio->start_lba_hi = ((u32) scp->cmnd[2] << 24) | + ((u32) scp->cmnd[3] << 16) | + ((u32) scp->cmnd[4] << 8) | (u32) scp->cmnd[5]; + + } + + /* + * Construct SGL + */ + sge_sz = (IS_DMA64) ? sizeof(struct megasas_sge64) : + sizeof(struct megasas_sge32); + + if (IS_DMA64) { + ldio->flags |= MFI_FRAME_SGL64; + ldio->sge_count = megasas_make_sgl64(instance, scp, &ldio->sgl); + } else + ldio->sge_count = megasas_make_sgl32(instance, scp, &ldio->sgl); + + /* + * Sense info specific + */ + ldio->sense_len = SCSI_SENSE_BUFFERSIZE; + ldio->sense_buf_phys_addr_hi = 0; + ldio->sense_buf_phys_addr_lo = cmd->sense_phys_addr; + + sge_bytes = sge_sz * ldio->sge_count; + + cmd->frame_count = (sge_bytes / MEGAMFI_FRAME_SIZE) + + ((sge_bytes % MEGAMFI_FRAME_SIZE) ? 1 : 0) + 1; + + if (cmd->frame_count > 7) + cmd->frame_count = 8; + + return cmd->frame_count; +} + +/** + * megasas_build_cmd - Prepares a command packet + * @instance: Adapter soft state + * @scp: SCSI command + * @frame_count: [OUT] Number of frames used to prepare this command + */ +static inline struct megasas_cmd *megasas_build_cmd(struct megasas_instance + *instance, + struct scsi_cmnd *scp, + int *frame_count) +{ + u32 logical_cmd; + struct megasas_cmd *cmd; + + /* + * Find out if this is logical or physical drive command. + */ + logical_cmd = MEGASAS_IS_LOGICAL(scp); + + /* + * Logical drive command + */ + if (logical_cmd) { + + if (scp->device->id >= MEGASAS_MAX_LD) { + scp->result = DID_BAD_TARGET << 16; + return NULL; + } + + switch (scp->cmnd[0]) { + + case READ_10: + case WRITE_10: + case READ_12: + case WRITE_12: + case READ_6: + case WRITE_6: + case READ_16: + case WRITE_16: + /* + * Fail for LUN > 0 + */ + if (scp->device->lun) { + scp->result = DID_BAD_TARGET << 16; + return NULL; + } + + cmd = megasas_get_cmd(instance); + + if (!cmd) { + scp->result = DID_IMM_RETRY << 16; + return NULL; + } + + *frame_count = megasas_build_ldio(instance, scp, cmd); + + if (!(*frame_count)) { + megasas_return_cmd(instance, cmd); + return NULL; + } + + return cmd; + + default: + /* + * Fail for LUN > 0 + */ + if (scp->device->lun) { + scp->result = DID_BAD_TARGET << 16; + return NULL; + } + + cmd = megasas_get_cmd(instance); + + if (!cmd) { + scp->result = DID_IMM_RETRY << 16; + return NULL; + } + + *frame_count = megasas_build_dcdb(instance, scp, cmd); + + if (!(*frame_count)) { + megasas_return_cmd(instance, cmd); + return NULL; + } + + return cmd; + } + } else { + cmd = megasas_get_cmd(instance); + + if (!cmd) { + scp->result = DID_IMM_RETRY << 16; + return NULL; + } + + *frame_count = megasas_build_dcdb(instance, scp, cmd); + + if (!(*frame_count)) { + megasas_return_cmd(instance, cmd); + return NULL; + } + + return cmd; + } + + return NULL; +} + +/** + * megasas_queue_command - Queue entry point + * @scmd: SCSI command to be queued + * @done: Callback entry point + */ +static int +megasas_queue_command(struct scsi_cmnd *scmd, void (*done) (struct scsi_cmnd *)) +{ + u32 frame_count; + unsigned long flags; + struct megasas_cmd *cmd; + struct megasas_instance *instance; + + instance = (struct megasas_instance *) + scmd->device->host->hostdata; + scmd->scsi_done = done; + scmd->result = 0; + + cmd = megasas_build_cmd(instance, scmd, &frame_count); + + if (!cmd) { + done(scmd); + return 0; + } + + cmd->scmd = scmd; + scmd->SCp.ptr = (char *)cmd; + scmd->SCp.sent_command = jiffies; + + /* + * Issue the command to the FW + */ + spin_lock_irqsave(&instance->instance_lock, flags); + instance->fw_outstanding++; + spin_unlock_irqrestore(&instance->instance_lock, flags); + + writel(((cmd->frame_phys_addr >> 3) | (cmd->frame_count - 1)), + &instance->reg_set->inbound_queue_port); + + return 0; +} + +/** + * megasas_wait_for_outstanding - Wait for all outstanding cmds + * @instance: Adapter soft state + * + * This function waits for upto MEGASAS_RESET_WAIT_TIME seconds for FW to + * complete all its outstanding commands. Returns error if one or more IOs + * are pending after this time period. It also marks the controller dead. + */ +static int megasas_wait_for_outstanding(struct megasas_instance *instance) +{ + int i; + u32 wait_time = MEGASAS_RESET_WAIT_TIME; + + for (i = 0; i < wait_time; i++) { + + if (!instance->fw_outstanding) + break; + + if (!(i % MEGASAS_RESET_NOTICE_INTERVAL)) { + printk(KERN_NOTICE "megasas: [%2d]waiting for %d " + "commands to complete\n", i, + instance->fw_outstanding); + } + + msleep(1000); + } + + if (instance->fw_outstanding) { + instance->hw_crit_error = 1; + return FAILED; + } + + return SUCCESS; +} + +/** + * megasas_generic_reset - Generic reset routine + * @scmd: Mid-layer SCSI command + * + * This routine implements a generic reset handler for device, bus and host + * reset requests. Device, bus and host specific reset handlers can use this + * function after they do their specific tasks. + */ +static int megasas_generic_reset(struct scsi_cmnd *scmd) +{ + int ret_val; + struct megasas_instance *instance; + + instance = (struct megasas_instance *)scmd->device->host->hostdata; + + printk(KERN_NOTICE "megasas: RESET -%ld cmd=%x \n", + scmd->serial_number, scmd->cmnd[0], scmd->device->channel, + scmd->device->id, scmd->device->lun); + + if (instance->hw_crit_error) { + printk(KERN_ERR "megasas: cannot recover from previous reset " + "failures\n"); + return FAILED; + } + + spin_unlock(scmd->device->host->host_lock); + + ret_val = megasas_wait_for_outstanding(instance); + + if (ret_val == SUCCESS) + printk(KERN_NOTICE "megasas: reset successful \n"); + else + printk(KERN_ERR "megasas: failed to do reset\n"); + + spin_lock(scmd->device->host->host_lock); + + return ret_val; +} + +static enum scsi_eh_timer_return megasas_reset_timer(struct scsi_cmnd *scmd) +{ + unsigned long seconds; + + if (scmd->SCp.ptr) { + seconds = (jiffies - scmd->SCp.sent_command) / HZ; + + if (seconds < 90) { + return EH_RESET_TIMER; + } else { + return EH_NOT_HANDLED; + } + } + + return EH_HANDLED; +} + +/** + * megasas_reset_device - Device reset handler entry point + */ +static int megasas_reset_device(struct scsi_cmnd *scmd) +{ + int ret; + + /* + * First wait for all commands to complete + */ + ret = megasas_generic_reset(scmd); + + return ret; +} + +/** + * megasas_reset_bus_host - Bus & host reset handler entry point + */ +static int megasas_reset_bus_host(struct scsi_cmnd *scmd) +{ + int ret; + + /* + * Frist wait for all commands to complete + */ + ret = megasas_generic_reset(scmd); + + return ret; +} + +/** + * megasas_service_aen - Processes an event notification + * @instance: Adapter soft state + * @cmd: AEN command completed by the ISR + * + * For AEN, driver sends a command down to FW that is held by the FW till an + * event occurs. When an event of interest occurs, FW completes the command + * that it was previously holding. + * + * This routines sends SIGIO signal to processes that have registered with the + * driver for AEN. + */ +static void +megasas_service_aen(struct megasas_instance *instance, struct megasas_cmd *cmd) +{ + /* + * Don't signal app if it is just an aborted previously registered aen + */ + if (!cmd->abort_aen) + kill_fasync(&megasas_async_queue, SIGIO, POLL_IN); + else + cmd->abort_aen = 0; + + instance->aen_cmd = NULL; + megasas_return_cmd(instance, cmd); +} + +/* + * Scsi host template for megaraid_sas driver + */ +static struct scsi_host_template megasas_template = { + + .module = THIS_MODULE, + .name = "LSI Logic SAS based MegaRAID driver", + .proc_name = "megaraid_sas", + .queuecommand = megasas_queue_command, + .eh_device_reset_handler = megasas_reset_device, + .eh_bus_reset_handler = megasas_reset_bus_host, + .eh_host_reset_handler = megasas_reset_bus_host, + .eh_timed_out = megasas_reset_timer, + .use_clustering = ENABLE_CLUSTERING, +}; + +/** + * megasas_complete_int_cmd - Completes an internal command + * @instance: Adapter soft state + * @cmd: Command to be completed + * + * The megasas_issue_blocked_cmd() function waits for a command to complete + * after it issues a command. This function wakes up that waiting routine by + * calling wake_up() on the wait queue. + */ +static void +megasas_complete_int_cmd(struct megasas_instance *instance, + struct megasas_cmd *cmd) +{ + cmd->cmd_status = cmd->frame->io.cmd_status; + + if (cmd->cmd_status == ENODATA) { + cmd->cmd_status = 0; + } + wake_up(&instance->int_cmd_wait_q); +} + +/** + * megasas_complete_abort - Completes aborting a command + * @instance: Adapter soft state + * @cmd: Cmd that was issued to abort another cmd + * + * The megasas_issue_blocked_abort_cmd() function waits on abort_cmd_wait_q + * after it issues an abort on a previously issued command. This function + * wakes up all functions waiting on the same wait queue. + */ +static void +megasas_complete_abort(struct megasas_instance *instance, + struct megasas_cmd *cmd) +{ + if (cmd->sync_cmd) { + cmd->sync_cmd = 0; + cmd->cmd_status = 0; + wake_up(&instance->abort_cmd_wait_q); + } + + return; +} + +/** + * megasas_unmap_sgbuf - Unmap SG buffers + * @instance: Adapter soft state + * @cmd: Completed command + */ +static inline void +megasas_unmap_sgbuf(struct megasas_instance *instance, struct megasas_cmd *cmd) +{ + dma_addr_t buf_h; + u8 opcode; + + if (cmd->scmd->use_sg) { + pci_unmap_sg(instance->pdev, cmd->scmd->request_buffer, + cmd->scmd->use_sg, cmd->scmd->sc_data_direction); + return; + } + + if (!cmd->scmd->request_bufflen) + return; + + opcode = cmd->frame->hdr.cmd; + + if ((opcode == MFI_CMD_LD_READ) || (opcode == MFI_CMD_LD_WRITE)) { + if (IS_DMA64) + buf_h = cmd->frame->io.sgl.sge64[0].phys_addr; + else + buf_h = cmd->frame->io.sgl.sge32[0].phys_addr; + } else { + if (IS_DMA64) + buf_h = cmd->frame->pthru.sgl.sge64[0].phys_addr; + else + buf_h = cmd->frame->pthru.sgl.sge32[0].phys_addr; + } + + pci_unmap_single(instance->pdev, buf_h, cmd->scmd->request_bufflen, + cmd->scmd->sc_data_direction); + return; +} + +/** + * megasas_complete_cmd - Completes a command + * @instance: Adapter soft state + * @cmd: Command to be completed + * @alt_status: If non-zero, use this value as status to + * SCSI mid-layer instead of the value returned + * by the FW. This should be used if caller wants + * an alternate status (as in the case of aborted + * commands) + */ +static inline void +megasas_complete_cmd(struct megasas_instance *instance, struct megasas_cmd *cmd, + u8 alt_status) +{ + int exception = 0; + struct megasas_header *hdr = &cmd->frame->hdr; + unsigned long flags; + + if (cmd->scmd) { + cmd->scmd->SCp.ptr = (char *)0; + } + + switch (hdr->cmd) { + + case MFI_CMD_PD_SCSI_IO: + case MFI_CMD_LD_SCSI_IO: + + /* + * MFI_CMD_PD_SCSI_IO and MFI_CMD_LD_SCSI_IO could have been + * issued either through an IO path or an IOCTL path. If it + * was via IOCTL, we will send it to internal completion. + */ + if (cmd->sync_cmd) { + cmd->sync_cmd = 0; + megasas_complete_int_cmd(instance, cmd); + break; + } + + /* + * Don't export physical disk devices to mid-layer. + */ + if (!MEGASAS_IS_LOGICAL(cmd->scmd) && + (hdr->cmd_status == MFI_STAT_OK) && + (cmd->scmd->cmnd[0] == INQUIRY)) { + + if (((*(u8 *) cmd->scmd->request_buffer) & 0x1F) == + TYPE_DISK) { + cmd->scmd->result = DID_BAD_TARGET << 16; + exception = 1; + } + } + + case MFI_CMD_LD_READ: + case MFI_CMD_LD_WRITE: + + if (alt_status) { + cmd->scmd->result = alt_status << 16; + exception = 1; + } + + if (exception) { + + spin_lock_irqsave(&instance->instance_lock, flags); + instance->fw_outstanding--; + spin_unlock_irqrestore(&instance->instance_lock, flags); + + megasas_unmap_sgbuf(instance, cmd); + cmd->scmd->scsi_done(cmd->scmd); + megasas_return_cmd(instance, cmd); + + break; + } + + switch (hdr->cmd_status) { + + case MFI_STAT_OK: + cmd->scmd->result = DID_OK << 16; + break; + + case MFI_STAT_SCSI_IO_FAILED: + case MFI_STAT_LD_INIT_IN_PROGRESS: + cmd->scmd->result = + (DID_ERROR << 16) | hdr->scsi_status; + break; + + case MFI_STAT_SCSI_DONE_WITH_ERROR: + + cmd->scmd->result = (DID_OK << 16) | hdr->scsi_status; + + if (hdr->scsi_status == SAM_STAT_CHECK_CONDITION) { + memset(cmd->scmd->sense_buffer, 0, + SCSI_SENSE_BUFFERSIZE); + memcpy(cmd->scmd->sense_buffer, cmd->sense, + hdr->sense_len); + + cmd->scmd->result |= DRIVER_SENSE << 24; + } + + break; + + case MFI_STAT_LD_OFFLINE: + case MFI_STAT_DEVICE_NOT_FOUND: + cmd->scmd->result = DID_BAD_TARGET << 16; + break; + + default: + printk(KERN_DEBUG "megasas: MFI FW status %#x\n", + hdr->cmd_status); + cmd->scmd->result = DID_ERROR << 16; + break; + } + + spin_lock_irqsave(&instance->instance_lock, flags); + instance->fw_outstanding--; + spin_unlock_irqrestore(&instance->instance_lock, flags); + + megasas_unmap_sgbuf(instance, cmd); + cmd->scmd->scsi_done(cmd->scmd); + megasas_return_cmd(instance, cmd); + + break; + + case MFI_CMD_SMP: + case MFI_CMD_STP: + case MFI_CMD_DCMD: + + /* + * See if got an event notification + */ + if (cmd->frame->dcmd.opcode == MR_DCMD_CTRL_EVENT_WAIT) + megasas_service_aen(instance, cmd); + else + megasas_complete_int_cmd(instance, cmd); + + break; + + case MFI_CMD_ABORT: + /* + * Cmd issued to abort another cmd returned + */ + megasas_complete_abort(instance, cmd); + break; + + default: + printk("megasas: Unknown command completed! [0x%X]\n", + hdr->cmd); + break; + } +} + +/** + * megasas_deplete_reply_queue - Processes all completed commands + * @instance: Adapter soft state + * @alt_status: Alternate status to be returned to + * SCSI mid-layer instead of the status + * returned by the FW + */ +static inline int +megasas_deplete_reply_queue(struct megasas_instance *instance, u8 alt_status) +{ + u32 status; + u32 producer; + u32 consumer; + u32 context; + struct megasas_cmd *cmd; + + /* + * Check if it is our interrupt + */ + status = readl(&instance->reg_set->outbound_intr_status); + + if (!(status & MFI_OB_INTR_STATUS_MASK)) { + return IRQ_NONE; + } + + /* + * Clear the interrupt by writing back the same value + */ + writel(status, &instance->reg_set->outbound_intr_status); + + producer = *instance->producer; + consumer = *instance->consumer; + + while (consumer != producer) { + context = instance->reply_queue[consumer]; + + cmd = instance->cmd_list[context]; + + megasas_complete_cmd(instance, cmd, alt_status); + + consumer++; + if (consumer == (instance->max_fw_cmds + 1)) { + consumer = 0; + } + } + + *instance->consumer = producer; + + return IRQ_HANDLED; +} + +/** + * megasas_isr - isr entry point + */ +static irqreturn_t megasas_isr(int irq, void *devp, struct pt_regs *regs) +{ + return megasas_deplete_reply_queue((struct megasas_instance *)devp, + DID_OK); +} + +/** + * megasas_transition_to_ready - Move the FW to READY state + * @reg_set: MFI register set + * + * During the initialization, FW passes can potentially be in any one of + * several possible states. If the FW in operational, waiting-for-handshake + * states, driver must take steps to bring it to ready state. Otherwise, it + * has to wait for the ready state. + */ +static int +megasas_transition_to_ready(struct megasas_register_set __iomem * reg_set) +{ + int i; + u8 max_wait; + u32 fw_state; + u32 cur_state; + + fw_state = readl(®_set->outbound_msg_0) & MFI_STATE_MASK; + + while (fw_state != MFI_STATE_READY) { + + printk(KERN_INFO "megasas: Waiting for FW to come to ready" + " state\n"); + switch (fw_state) { + + case MFI_STATE_FAULT: + + printk(KERN_DEBUG "megasas: FW in FAULT state!!\n"); + return -ENODEV; + + case MFI_STATE_WAIT_HANDSHAKE: + /* + * Set the CLR bit in inbound doorbell + */ + writel(MFI_INIT_CLEAR_HANDSHAKE, + ®_set->inbound_doorbell); + + max_wait = 2; + cur_state = MFI_STATE_WAIT_HANDSHAKE; + break; + + case MFI_STATE_OPERATIONAL: + /* + * Bring it to READY state; assuming max wait 2 secs + */ + megasas_disable_intr(reg_set); + writel(MFI_INIT_READY, ®_set->inbound_doorbell); + + max_wait = 10; + cur_state = MFI_STATE_OPERATIONAL; + break; + + case MFI_STATE_UNDEFINED: + /* + * This state should not last for more than 2 seconds + */ + max_wait = 2; + cur_state = MFI_STATE_UNDEFINED; + break; + + case MFI_STATE_BB_INIT: + max_wait = 2; + cur_state = MFI_STATE_BB_INIT; + break; + + case MFI_STATE_FW_INIT: + max_wait = 20; + cur_state = MFI_STATE_FW_INIT; + break; + + case MFI_STATE_FW_INIT_2: + max_wait = 20; + cur_state = MFI_STATE_FW_INIT_2; + break; + + case MFI_STATE_DEVICE_SCAN: + max_wait = 20; + cur_state = MFI_STATE_DEVICE_SCAN; + break; + + case MFI_STATE_FLUSH_CACHE: + max_wait = 20; + cur_state = MFI_STATE_FLUSH_CACHE; + break; + + default: + printk(KERN_DEBUG "megasas: Unknown state 0x%x\n", + fw_state); + return -ENODEV; + } + + /* + * The cur_state should not last for more than max_wait secs + */ + for (i = 0; i < (max_wait * 1000); i++) { + fw_state = MFI_STATE_MASK & + readl(®_set->outbound_msg_0); + + if (fw_state == cur_state) { + msleep(1); + } else + break; + } + + /* + * Return error if fw_state hasn't changed after max_wait + */ + if (fw_state == cur_state) { + printk(KERN_DEBUG "FW state [%d] hasn't changed " + "in %d secs\n", fw_state, max_wait); + return -ENODEV; + } + }; + + return 0; +} + +/** + * megasas_teardown_frame_pool - Destroy the cmd frame DMA pool + * @instance: Adapter soft state + */ +static void megasas_teardown_frame_pool(struct megasas_instance *instance) +{ + int i; + u32 max_cmd = instance->max_fw_cmds; + struct megasas_cmd *cmd; + + if (!instance->frame_dma_pool) + return; + + /* + * Return all frames to pool + */ + for (i = 0; i < max_cmd; i++) { + + cmd = instance->cmd_list[i]; + + if (cmd->frame) + pci_pool_free(instance->frame_dma_pool, cmd->frame, + cmd->frame_phys_addr); + + if (cmd->sense) + pci_pool_free(instance->sense_dma_pool, cmd->frame, + cmd->sense_phys_addr); + } + + /* + * Now destroy the pool itself + */ + pci_pool_destroy(instance->frame_dma_pool); + pci_pool_destroy(instance->sense_dma_pool); + + instance->frame_dma_pool = NULL; + instance->sense_dma_pool = NULL; +} + +/** + * megasas_create_frame_pool - Creates DMA pool for cmd frames + * @instance: Adapter soft state + * + * Each command packet has an embedded DMA memory buffer that is used for + * filling MFI frame and the SG list that immediately follows the frame. This + * function creates those DMA memory buffers for each command packet by using + * PCI pool facility. + */ +static int megasas_create_frame_pool(struct megasas_instance *instance) +{ + int i; + u32 max_cmd; + u32 sge_sz; + u32 sgl_sz; + u32 total_sz; + u32 frame_count; + struct megasas_cmd *cmd; + + max_cmd = instance->max_fw_cmds; + + /* + * Size of our frame is 64 bytes for MFI frame, followed by max SG + * elements and finally SCSI_SENSE_BUFFERSIZE bytes for sense buffer + */ + sge_sz = (IS_DMA64) ? sizeof(struct megasas_sge64) : + sizeof(struct megasas_sge32); + + /* + * Calculated the number of 64byte frames required for SGL + */ + sgl_sz = sge_sz * instance->max_num_sge; + frame_count = (sgl_sz + MEGAMFI_FRAME_SIZE - 1) / MEGAMFI_FRAME_SIZE; + + /* + * We need one extra frame for the MFI command + */ + frame_count++; + + total_sz = MEGAMFI_FRAME_SIZE * frame_count; + /* + * Use DMA pool facility provided by PCI layer + */ + instance->frame_dma_pool = pci_pool_create("megasas frame pool", + instance->pdev, total_sz, 64, + 0); + + if (!instance->frame_dma_pool) { + printk(KERN_DEBUG "megasas: failed to setup frame pool\n"); + return -ENOMEM; + } + + instance->sense_dma_pool = pci_pool_create("megasas sense pool", + instance->pdev, 128, 4, 0); + + if (!instance->sense_dma_pool) { + printk(KERN_DEBUG "megasas: failed to setup sense pool\n"); + + pci_pool_destroy(instance->frame_dma_pool); + instance->frame_dma_pool = NULL; + + return -ENOMEM; + } + + /* + * Allocate and attach a frame to each of the commands in cmd_list. + * By making cmd->index as the context instead of the &cmd, we can + * always use 32bit context regardless of the architecture + */ + for (i = 0; i < max_cmd; i++) { + + cmd = instance->cmd_list[i]; + + cmd->frame = pci_pool_alloc(instance->frame_dma_pool, + GFP_KERNEL, &cmd->frame_phys_addr); + + cmd->sense = pci_pool_alloc(instance->sense_dma_pool, + GFP_KERNEL, &cmd->sense_phys_addr); + + /* + * megasas_teardown_frame_pool() takes care of freeing + * whatever has been allocated + */ + if (!cmd->frame || !cmd->sense) { + printk(KERN_DEBUG "megasas: pci_pool_alloc failed \n"); + megasas_teardown_frame_pool(instance); + return -ENOMEM; + } + + cmd->frame->io.context = cmd->index; + } + + return 0; +} + +/** + * megasas_free_cmds - Free all the cmds in the free cmd pool + * @instance: Adapter soft state + */ +static void megasas_free_cmds(struct megasas_instance *instance) +{ + int i; + /* First free the MFI frame pool */ + megasas_teardown_frame_pool(instance); + + /* Free all the commands in the cmd_list */ + for (i = 0; i < instance->max_fw_cmds; i++) + kfree(instance->cmd_list[i]); + + /* Free the cmd_list buffer itself */ + kfree(instance->cmd_list); + instance->cmd_list = NULL; + + INIT_LIST_HEAD(&instance->cmd_pool); +} + +/** + * megasas_alloc_cmds - Allocates the command packets + * @instance: Adapter soft state + * + * Each command that is issued to the FW, whether IO commands from the OS or + * internal commands like IOCTLs, are wrapped in local data structure called + * megasas_cmd. The frame embedded in this megasas_cmd is actually issued to + * the FW. + * + * Each frame has a 32-bit field called context (tag). This context is used + * to get back the megasas_cmd from the frame when a frame gets completed in + * the ISR. Typically the address of the megasas_cmd itself would be used as + * the context. But we wanted to keep the differences between 32 and 64 bit + * systems to the mininum. We always use 32 bit integers for the context. In + * this driver, the 32 bit values are the indices into an array cmd_list. + * This array is used only to look up the megasas_cmd given the context. The + * free commands themselves are maintained in a linked list called cmd_pool. + */ +static int megasas_alloc_cmds(struct megasas_instance *instance) +{ + int i; + int j; + u32 max_cmd; + struct megasas_cmd *cmd; + + max_cmd = instance->max_fw_cmds; + + /* + * instance->cmd_list is an array of struct megasas_cmd pointers. + * Allocate the dynamic array first and then allocate individual + * commands. + */ + instance->cmd_list = kmalloc(sizeof(struct megasas_cmd *) * max_cmd, + GFP_KERNEL); + + if (!instance->cmd_list) { + printk(KERN_DEBUG "megasas: out of memory\n"); + return -ENOMEM; + } + + memset(instance->cmd_list, 0, sizeof(struct megasas_cmd *) * max_cmd); + + for (i = 0; i < max_cmd; i++) { + instance->cmd_list[i] = kmalloc(sizeof(struct megasas_cmd), + GFP_KERNEL); + + if (!instance->cmd_list[i]) { + + for (j = 0; j < i; j++) + kfree(instance->cmd_list[j]); + + kfree(instance->cmd_list); + instance->cmd_list = NULL; + + return -ENOMEM; + } + } + + /* + * Add all the commands to command pool (instance->cmd_pool) + */ + for (i = 0; i < max_cmd; i++) { + cmd = instance->cmd_list[i]; + memset(cmd, 0, sizeof(struct megasas_cmd)); + cmd->index = i; + cmd->instance = instance; + + list_add_tail(&cmd->list, &instance->cmd_pool); + } + + /* + * Create a frame pool and assign one frame to each cmd + */ + if (megasas_create_frame_pool(instance)) { + printk(KERN_DEBUG "megasas: Error creating frame DMA pool\n"); + megasas_free_cmds(instance); + } + + return 0; +} + +/** + * megasas_get_controller_info - Returns FW's controller structure + * @instance: Adapter soft state + * @ctrl_info: Controller information structure + * + * Issues an internal command (DCMD) to get the FW's controller structure. + * This information is mainly used to find out the maximum IO transfer per + * command supported by the FW. + */ +static int +megasas_get_ctrl_info(struct megasas_instance *instance, + struct megasas_ctrl_info *ctrl_info) +{ + int ret = 0; + struct megasas_cmd *cmd; + struct megasas_dcmd_frame *dcmd; + struct megasas_ctrl_info *ci; + dma_addr_t ci_h = 0; + + cmd = megasas_get_cmd(instance); + + if (!cmd) { + printk(KERN_DEBUG "megasas: Failed to get a free cmd\n"); + return -ENOMEM; + } + + dcmd = &cmd->frame->dcmd; + + ci = pci_alloc_consistent(instance->pdev, + sizeof(struct megasas_ctrl_info), &ci_h); + + if (!ci) { + printk(KERN_DEBUG "Failed to alloc mem for ctrl info\n"); + megasas_return_cmd(instance, cmd); + return -ENOMEM; + } + + memset(ci, 0, sizeof(*ci)); + memset(dcmd->mbox.b, 0, MFI_MBOX_SIZE); + + dcmd->cmd = MFI_CMD_DCMD; + dcmd->cmd_status = 0xFF; + dcmd->sge_count = 1; + dcmd->flags = MFI_FRAME_DIR_READ; + dcmd->timeout = 0; + dcmd->data_xfer_len = sizeof(struct megasas_ctrl_info); + dcmd->opcode = MR_DCMD_CTRL_GET_INFO; + dcmd->sgl.sge32[0].phys_addr = ci_h; + dcmd->sgl.sge32[0].length = sizeof(struct megasas_ctrl_info); + + if (!megasas_issue_polled(instance, cmd)) { + ret = 0; + memcpy(ctrl_info, ci, sizeof(struct megasas_ctrl_info)); + } else { + ret = -1; + } + + pci_free_consistent(instance->pdev, sizeof(struct megasas_ctrl_info), + ci, ci_h); + + megasas_return_cmd(instance, cmd); + return ret; +} + +/** + * megasas_init_mfi - Initializes the FW + * @instance: Adapter soft state + * + * This is the main function for initializing MFI firmware. + */ +static int megasas_init_mfi(struct megasas_instance *instance) +{ + u32 context_sz; + u32 reply_q_sz; + u32 max_sectors_1; + u32 max_sectors_2; + struct megasas_register_set __iomem *reg_set; + + struct megasas_cmd *cmd; + struct megasas_ctrl_info *ctrl_info; + + struct megasas_init_frame *init_frame; + struct megasas_init_queue_info *initq_info; + dma_addr_t init_frame_h; + dma_addr_t initq_info_h; + + /* + * Map the message registers + */ + instance->base_addr = pci_resource_start(instance->pdev, 0); + + if (pci_request_regions(instance->pdev, "megasas: LSI Logic")) { + printk(KERN_DEBUG "megasas: IO memory region busy!\n"); + return -EBUSY; + } + + instance->reg_set = ioremap_nocache(instance->base_addr, 8192); + + if (!instance->reg_set) { + printk(KERN_DEBUG "megasas: Failed to map IO mem\n"); + goto fail_ioremap; + } + + reg_set = instance->reg_set; + + /* + * We expect the FW state to be READY + */ + if (megasas_transition_to_ready(instance->reg_set)) + goto fail_ready_state; + + /* + * Get various operational parameters from status register + */ + instance->max_fw_cmds = readl(®_set->outbound_msg_0) & 0x00FFFF; + instance->max_num_sge = (readl(®_set->outbound_msg_0) & 0xFF0000) >> + 0x10; + /* + * Create a pool of commands + */ + if (megasas_alloc_cmds(instance)) + goto fail_alloc_cmds; + + /* + * Allocate memory for reply queue. Length of reply queue should + * be _one_ more than the maximum commands handled by the firmware. + * + * Note: When FW completes commands, it places corresponding contex + * values in this circular reply queue. This circular queue is a fairly + * typical producer-consumer queue. FW is the producer (of completed + * commands) and the driver is the consumer. + */ + context_sz = sizeof(u32); + reply_q_sz = context_sz * (instance->max_fw_cmds + 1); + + instance->reply_queue = pci_alloc_consistent(instance->pdev, + reply_q_sz, + &instance->reply_queue_h); + + if (!instance->reply_queue) { + printk(KERN_DEBUG "megasas: Out of DMA mem for reply queue\n"); + goto fail_reply_queue; + } + + /* + * Prepare a init frame. Note the init frame points to queue info + * structure. Each frame has SGL allocated after first 64 bytes. For + * this frame - since we don't need any SGL - we use SGL's space as + * queue info structure + * + * We will not get a NULL command below. We just created the pool. + */ + cmd = megasas_get_cmd(instance); + + init_frame = (struct megasas_init_frame *)cmd->frame; + initq_info = (struct megasas_init_queue_info *) + ((unsigned long)init_frame + 64); + + init_frame_h = cmd->frame_phys_addr; + initq_info_h = init_frame_h + 64; + + memset(init_frame, 0, MEGAMFI_FRAME_SIZE); + memset(initq_info, 0, sizeof(struct megasas_init_queue_info)); + + initq_info->reply_queue_entries = instance->max_fw_cmds + 1; + initq_info->reply_queue_start_phys_addr_lo = instance->reply_queue_h; + + initq_info->producer_index_phys_addr_lo = instance->producer_h; + initq_info->consumer_index_phys_addr_lo = instance->consumer_h; + + init_frame->cmd = MFI_CMD_INIT; + init_frame->cmd_status = 0xFF; + init_frame->queue_info_new_phys_addr_lo = initq_info_h; + + init_frame->data_xfer_len = sizeof(struct megasas_init_queue_info); + + /* + * Issue the init frame in polled mode + */ + if (megasas_issue_polled(instance, cmd)) { + printk(KERN_DEBUG "megasas: Failed to init firmware\n"); + goto fail_fw_init; + } + + megasas_return_cmd(instance, cmd); + + ctrl_info = kmalloc(sizeof(struct megasas_ctrl_info), GFP_KERNEL); + + /* + * Compute the max allowed sectors per IO: The controller info has two + * limits on max sectors. Driver should use the minimum of these two. + * + * 1 << stripe_sz_ops.min = max sectors per strip + * + * Note that older firmwares ( < FW ver 30) didn't report information + * to calculate max_sectors_1. So the number ended up as zero always. + */ + if (ctrl_info && !megasas_get_ctrl_info(instance, ctrl_info)) { + + max_sectors_1 = (1 << ctrl_info->stripe_sz_ops.min) * + ctrl_info->max_strips_per_io; + max_sectors_2 = ctrl_info->max_request_size; + + instance->max_sectors_per_req = (max_sectors_1 < max_sectors_2) + ? max_sectors_1 : max_sectors_2; + } else + instance->max_sectors_per_req = instance->max_num_sge * + PAGE_SIZE / 512; + + kfree(ctrl_info); + + return 0; + + fail_fw_init: + megasas_return_cmd(instance, cmd); + + pci_free_consistent(instance->pdev, reply_q_sz, + instance->reply_queue, instance->reply_queue_h); + fail_reply_queue: + megasas_free_cmds(instance); + + fail_alloc_cmds: + fail_ready_state: + iounmap(instance->reg_set); + + fail_ioremap: + pci_release_regions(instance->pdev); + + return -EINVAL; +} + +/** + * megasas_release_mfi - Reverses the FW initialization + * @intance: Adapter soft state + */ +static void megasas_release_mfi(struct megasas_instance *instance) +{ + u32 reply_q_sz = sizeof(u32) * (instance->max_fw_cmds + 1); + + pci_free_consistent(instance->pdev, reply_q_sz, + instance->reply_queue, instance->reply_queue_h); + + megasas_free_cmds(instance); + + iounmap(instance->reg_set); + + pci_release_regions(instance->pdev); +} + +/** + * megasas_get_seq_num - Gets latest event sequence numbers + * @instance: Adapter soft state + * @eli: FW event log sequence numbers information + * + * FW maintains a log of all events in a non-volatile area. Upper layers would + * usually find out the latest sequence number of the events, the seq number at + * the boot etc. They would "read" all the events below the latest seq number + * by issuing a direct fw cmd (DCMD). For the future events (beyond latest seq + * number), they would subsribe to AEN (asynchronous event notification) and + * wait for the events to happen. + */ +static int +megasas_get_seq_num(struct megasas_instance *instance, + struct megasas_evt_log_info *eli) +{ + struct megasas_cmd *cmd; + struct megasas_dcmd_frame *dcmd; + struct megasas_evt_log_info *el_info; + dma_addr_t el_info_h = 0; + + cmd = megasas_get_cmd(instance); + + if (!cmd) { + return -ENOMEM; + } + + dcmd = &cmd->frame->dcmd; + el_info = pci_alloc_consistent(instance->pdev, + sizeof(struct megasas_evt_log_info), + &el_info_h); + + if (!el_info) { + megasas_return_cmd(instance, cmd); + return -ENOMEM; + } + + memset(el_info, 0, sizeof(*el_info)); + memset(dcmd->mbox.b, 0, MFI_MBOX_SIZE); + + dcmd->cmd = MFI_CMD_DCMD; + dcmd->cmd_status = 0x0; + dcmd->sge_count = 1; + dcmd->flags = MFI_FRAME_DIR_READ; + dcmd->timeout = 0; + dcmd->data_xfer_len = sizeof(struct megasas_evt_log_info); + dcmd->opcode = MR_DCMD_CTRL_EVENT_GET_INFO; + dcmd->sgl.sge32[0].phys_addr = el_info_h; + dcmd->sgl.sge32[0].length = sizeof(struct megasas_evt_log_info); + + megasas_issue_blocked_cmd(instance, cmd); + + /* + * Copy the data back into callers buffer + */ + memcpy(eli, el_info, sizeof(struct megasas_evt_log_info)); + + pci_free_consistent(instance->pdev, sizeof(struct megasas_evt_log_info), + el_info, el_info_h); + + megasas_return_cmd(instance, cmd); + + return 0; +} + +/** + * megasas_register_aen - Registers for asynchronous event notification + * @instance: Adapter soft state + * @seq_num: The starting sequence number + * @class_locale: Class of the event + * + * This function subscribes for AEN for events beyond the @seq_num. It requests + * to be notified if and only if the event is of type @class_locale + */ +static int +megasas_register_aen(struct megasas_instance *instance, u32 seq_num, + u32 class_locale_word) +{ + int ret_val; + struct megasas_cmd *cmd; + struct megasas_dcmd_frame *dcmd; + union megasas_evt_class_locale curr_aen; + union megasas_evt_class_locale prev_aen; + + /* + * If there an AEN pending already (aen_cmd), check if the + * class_locale of that pending AEN is inclusive of the new + * AEN request we currently have. If it is, then we don't have + * to do anything. In other words, whichever events the current + * AEN request is subscribing to, have already been subscribed + * to. + * + * If the old_cmd is _not_ inclusive, then we have to abort + * that command, form a class_locale that is superset of both + * old and current and re-issue to the FW + */ + + curr_aen.word = class_locale_word; + + if (instance->aen_cmd) { + + prev_aen.word = instance->aen_cmd->frame->dcmd.mbox.w[1]; + + /* + * A class whose enum value is smaller is inclusive of all + * higher values. If a PROGRESS (= -1) was previously + * registered, then a new registration requests for higher + * classes need not be sent to FW. They are automatically + * included. + * + * Locale numbers don't have such hierarchy. They are bitmap + * values + */ + if ((prev_aen.members.class <= curr_aen.members.class) && + !((prev_aen.members.locale & curr_aen.members.locale) ^ + curr_aen.members.locale)) { + /* + * Previously issued event registration includes + * current request. Nothing to do. + */ + return 0; + } else { + curr_aen.members.locale |= prev_aen.members.locale; + + if (prev_aen.members.class < curr_aen.members.class) + curr_aen.members.class = prev_aen.members.class; + + instance->aen_cmd->abort_aen = 1; + ret_val = megasas_issue_blocked_abort_cmd(instance, + instance-> + aen_cmd); + + if (ret_val) { + printk(KERN_DEBUG "megasas: Failed to abort " + "previous AEN command\n"); + return ret_val; + } + } + } + + cmd = megasas_get_cmd(instance); + + if (!cmd) + return -ENOMEM; + + dcmd = &cmd->frame->dcmd; + + memset(instance->evt_detail, 0, sizeof(struct megasas_evt_detail)); + + /* + * Prepare DCMD for aen registration + */ + memset(dcmd->mbox.b, 0, MFI_MBOX_SIZE); + + dcmd->cmd = MFI_CMD_DCMD; + dcmd->cmd_status = 0x0; + dcmd->sge_count = 1; + dcmd->flags = MFI_FRAME_DIR_READ; + dcmd->timeout = 0; + dcmd->data_xfer_len = sizeof(struct megasas_evt_detail); + dcmd->opcode = MR_DCMD_CTRL_EVENT_WAIT; + dcmd->mbox.w[0] = seq_num; + dcmd->mbox.w[1] = curr_aen.word; + dcmd->sgl.sge32[0].phys_addr = (u32) instance->evt_detail_h; + dcmd->sgl.sge32[0].length = sizeof(struct megasas_evt_detail); + + /* + * Store reference to the cmd used to register for AEN. When an + * application wants us to register for AEN, we have to abort this + * cmd and re-register with a new EVENT LOCALE supplied by that app + */ + instance->aen_cmd = cmd; + + /* + * Issue the aen registration frame + */ + writel(cmd->frame_phys_addr >> 3, + &instance->reg_set->inbound_queue_port); + + return 0; +} + +/** + * megasas_start_aen - Subscribes to AEN during driver load time + * @instance: Adapter soft state + */ +static int megasas_start_aen(struct megasas_instance *instance) +{ + struct megasas_evt_log_info eli; + union megasas_evt_class_locale class_locale; + + /* + * Get the latest sequence number from FW + */ + memset(&eli, 0, sizeof(eli)); + + if (megasas_get_seq_num(instance, &eli)) + return -1; + + /* + * Register AEN with FW for latest sequence number plus 1 + */ + class_locale.members.reserved = 0; + class_locale.members.locale = MR_EVT_LOCALE_ALL; + class_locale.members.class = MR_EVT_CLASS_DEBUG; + + return megasas_register_aen(instance, eli.newest_seq_num + 1, + class_locale.word); +} + +/** + * megasas_io_attach - Attaches this driver to SCSI mid-layer + * @instance: Adapter soft state + */ +static int megasas_io_attach(struct megasas_instance *instance) +{ + struct Scsi_Host *host = instance->host; + + /* + * Export parameters required by SCSI mid-layer + */ + host->irq = instance->pdev->irq; + host->unique_id = instance->unique_id; + host->can_queue = instance->max_fw_cmds - MEGASAS_INT_CMDS; + host->this_id = instance->init_id; + host->sg_tablesize = instance->max_num_sge; + host->max_sectors = instance->max_sectors_per_req; + host->cmd_per_lun = 128; + host->max_channel = MEGASAS_MAX_CHANNELS - 1; + host->max_id = MEGASAS_MAX_DEV_PER_CHANNEL; + host->max_lun = MEGASAS_MAX_LUN; + + /* + * Notify the mid-layer about the new controller + */ + if (scsi_add_host(host, &instance->pdev->dev)) { + printk(KERN_DEBUG "megasas: scsi_add_host failed\n"); + return -ENODEV; + } + + /* + * Trigger SCSI to scan our drives + */ + scsi_scan_host(host); + return 0; +} + +/** + * megasas_probe_one - PCI hotplug entry point + * @pdev: PCI device structure + * @id: PCI ids of supported hotplugged adapter + */ +static int __devinit +megasas_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) +{ + int rval; + struct Scsi_Host *host; + struct megasas_instance *instance; + + /* + * Announce PCI information + */ + printk(KERN_INFO "megasas: %#4.04x:%#4.04x:%#4.04x:%#4.04x: ", + pdev->vendor, pdev->device, pdev->subsystem_vendor, + pdev->subsystem_device); + + printk("bus %d:slot %d:func %d\n", + pdev->bus->number, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); + + /* + * PCI prepping: enable device set bus mastering and dma mask + */ + rval = pci_enable_device(pdev); + + if (rval) { + return rval; + } + + pci_set_master(pdev); + + /* + * All our contollers are capable of performing 64-bit DMA + */ + if (IS_DMA64) { + if (pci_set_dma_mask(pdev, DMA_64BIT_MASK) != 0) { + + if (pci_set_dma_mask(pdev, DMA_32BIT_MASK) != 0) + goto fail_set_dma_mask; + } + } else { + if (pci_set_dma_mask(pdev, DMA_32BIT_MASK) != 0) + goto fail_set_dma_mask; + } + + host = scsi_host_alloc(&megasas_template, + sizeof(struct megasas_instance)); + + if (!host) { + printk(KERN_DEBUG "megasas: scsi_host_alloc failed\n"); + goto fail_alloc_instance; + } + + instance = (struct megasas_instance *)host->hostdata; + memset(instance, 0, sizeof(*instance)); + + instance->producer = pci_alloc_consistent(pdev, sizeof(u32), + &instance->producer_h); + instance->consumer = pci_alloc_consistent(pdev, sizeof(u32), + &instance->consumer_h); + + if (!instance->producer || !instance->consumer) { + printk(KERN_DEBUG "megasas: Failed to allocate memory for " + "producer, consumer\n"); + goto fail_alloc_dma_buf; + } + + *instance->producer = 0; + *instance->consumer = 0; + + instance->evt_detail = pci_alloc_consistent(pdev, + sizeof(struct + megasas_evt_detail), + &instance->evt_detail_h); + + if (!instance->evt_detail) { + printk(KERN_DEBUG "megasas: Failed to allocate memory for " + "event detail structure\n"); + goto fail_alloc_dma_buf; + } + + /* + * Initialize locks and queues + */ + INIT_LIST_HEAD(&instance->cmd_pool); + + init_waitqueue_head(&instance->int_cmd_wait_q); + init_waitqueue_head(&instance->abort_cmd_wait_q); + + spin_lock_init(&instance->cmd_pool_lock); + spin_lock_init(&instance->instance_lock); + + sema_init(&instance->aen_mutex, 1); + sema_init(&instance->ioctl_sem, MEGASAS_INT_CMDS); + + /* + * Initialize PCI related and misc parameters + */ + instance->pdev = pdev; + instance->host = host; + instance->unique_id = pdev->bus->number << 8 | pdev->devfn; + instance->init_id = MEGASAS_DEFAULT_INIT_ID; + + /* + * Initialize MFI Firmware + */ + if (megasas_init_mfi(instance)) + goto fail_init_mfi; + + /* + * Register IRQ + */ + if (request_irq(pdev->irq, megasas_isr, SA_SHIRQ, "megasas", instance)) { + printk(KERN_DEBUG "megasas: Failed to register IRQ\n"); + goto fail_irq; + } + + megasas_enable_intr(instance->reg_set); + + /* + * Store instance in PCI softstate + */ + pci_set_drvdata(pdev, instance); + + /* + * Add this controller to megasas_mgmt_info structure so that it + * can be exported to management applications + */ + megasas_mgmt_info.count++; + megasas_mgmt_info.instance[megasas_mgmt_info.max_index] = instance; + megasas_mgmt_info.max_index++; + + /* + * Initiate AEN (Asynchronous Event Notification) + */ + if (megasas_start_aen(instance)) { + printk(KERN_DEBUG "megasas: start aen failed\n"); + goto fail_start_aen; + } + + /* + * Register with SCSI mid-layer + */ + if (megasas_io_attach(instance)) + goto fail_io_attach; + + return 0; + + fail_start_aen: + fail_io_attach: + megasas_mgmt_info.count--; + megasas_mgmt_info.instance[megasas_mgmt_info.max_index] = NULL; + megasas_mgmt_info.max_index--; + + pci_set_drvdata(pdev, NULL); + megasas_disable_intr(instance->reg_set); + free_irq(instance->pdev->irq, instance); + + megasas_release_mfi(instance); + + fail_irq: + fail_init_mfi: + fail_alloc_dma_buf: + if (instance->evt_detail) + pci_free_consistent(pdev, sizeof(struct megasas_evt_detail), + instance->evt_detail, + instance->evt_detail_h); + + if (instance->producer) + pci_free_consistent(pdev, sizeof(u32), instance->producer, + instance->producer_h); + if (instance->consumer) + pci_free_consistent(pdev, sizeof(u32), instance->consumer, + instance->consumer_h); + scsi_host_put(host); + + fail_alloc_instance: + fail_set_dma_mask: + pci_disable_device(pdev); + + return -ENODEV; +} + +/** + * megasas_flush_cache - Requests FW to flush all its caches + * @instance: Adapter soft state + */ +static void megasas_flush_cache(struct megasas_instance *instance) +{ + struct megasas_cmd *cmd; + struct megasas_dcmd_frame *dcmd; + + cmd = megasas_get_cmd(instance); + + if (!cmd) + return; + + dcmd = &cmd->frame->dcmd; + + memset(dcmd->mbox.b, 0, MFI_MBOX_SIZE); + + dcmd->cmd = MFI_CMD_DCMD; + dcmd->cmd_status = 0x0; + dcmd->sge_count = 0; + dcmd->flags = MFI_FRAME_DIR_NONE; + dcmd->timeout = 0; + dcmd->data_xfer_len = 0; + dcmd->opcode = MR_DCMD_CTRL_CACHE_FLUSH; + dcmd->mbox.b[0] = MR_FLUSH_CTRL_CACHE | MR_FLUSH_DISK_CACHE; + + megasas_issue_blocked_cmd(instance, cmd); + + megasas_return_cmd(instance, cmd); + + return; +} + +/** + * megasas_shutdown_controller - Instructs FW to shutdown the controller + * @instance: Adapter soft state + */ +static void megasas_shutdown_controller(struct megasas_instance *instance) +{ + struct megasas_cmd *cmd; + struct megasas_dcmd_frame *dcmd; + + cmd = megasas_get_cmd(instance); + + if (!cmd) + return; + + if (instance->aen_cmd) + megasas_issue_blocked_abort_cmd(instance, instance->aen_cmd); + + dcmd = &cmd->frame->dcmd; + + memset(dcmd->mbox.b, 0, MFI_MBOX_SIZE); + + dcmd->cmd = MFI_CMD_DCMD; + dcmd->cmd_status = 0x0; + dcmd->sge_count = 0; + dcmd->flags = MFI_FRAME_DIR_NONE; + dcmd->timeout = 0; + dcmd->data_xfer_len = 0; + dcmd->opcode = MR_DCMD_CTRL_SHUTDOWN; + + megasas_issue_blocked_cmd(instance, cmd); + + megasas_return_cmd(instance, cmd); + + return; +} + +/** + * megasas_detach_one - PCI hot"un"plug entry point + * @pdev: PCI device structure + */ +static void megasas_detach_one(struct pci_dev *pdev) +{ + int i; + struct Scsi_Host *host; + struct megasas_instance *instance; + + instance = pci_get_drvdata(pdev); + host = instance->host; + + scsi_remove_host(instance->host); + megasas_flush_cache(instance); + megasas_shutdown_controller(instance); + + /* + * Take the instance off the instance array. Note that we will not + * decrement the max_index. We let this array be sparse array + */ + for (i = 0; i < megasas_mgmt_info.max_index; i++) { + if (megasas_mgmt_info.instance[i] == instance) { + megasas_mgmt_info.count--; + megasas_mgmt_info.instance[i] = NULL; + + break; + } + } + + pci_set_drvdata(instance->pdev, NULL); + + megasas_disable_intr(instance->reg_set); + + free_irq(instance->pdev->irq, instance); + + megasas_release_mfi(instance); + + pci_free_consistent(pdev, sizeof(struct megasas_evt_detail), + instance->evt_detail, instance->evt_detail_h); + + pci_free_consistent(pdev, sizeof(u32), instance->producer, + instance->producer_h); + + pci_free_consistent(pdev, sizeof(u32), instance->consumer, + instance->consumer_h); + + scsi_host_put(host); + + pci_set_drvdata(pdev, NULL); + + pci_disable_device(pdev); + + return; +} + +/** + * megasas_shutdown - Shutdown entry point + * @device: Generic device structure + */ +static void megasas_shutdown(struct pci_dev *pdev) +{ + struct megasas_instance *instance = pci_get_drvdata(pdev); + megasas_flush_cache(instance); +} + +/** + * megasas_mgmt_open - char node "open" entry point + */ +static int megasas_mgmt_open(struct inode *inode, struct file *filep) +{ + /* + * Allow only those users with admin rights + */ + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + + return 0; +} + +/** + * megasas_mgmt_release - char node "release" entry point + */ +static int megasas_mgmt_release(struct inode *inode, struct file *filep) +{ + filep->private_data = NULL; + fasync_helper(-1, filep, 0, &megasas_async_queue); + + return 0; +} + +/** + * megasas_mgmt_fasync - Async notifier registration from applications + * + * This function adds the calling process to a driver global queue. When an + * event occurs, SIGIO will be sent to all processes in this queue. + */ +static int megasas_mgmt_fasync(int fd, struct file *filep, int mode) +{ + int rc; + + down(&megasas_async_queue_mutex); + + rc = fasync_helper(fd, filep, mode, &megasas_async_queue); + + up(&megasas_async_queue_mutex); + + if (rc >= 0) { + /* For sanity check when we get ioctl */ + filep->private_data = filep; + return 0; + } + + printk(KERN_DEBUG "megasas: fasync_helper failed [%d]\n", rc); + + return rc; +} + +/** + * megasas_mgmt_fw_ioctl - Issues management ioctls to FW + * @instance: Adapter soft state + * @argp: User's ioctl packet + */ +static int +megasas_mgmt_fw_ioctl(struct megasas_instance *instance, + struct megasas_iocpacket __user * user_ioc, + struct megasas_iocpacket *ioc) +{ + struct megasas_sge32 *kern_sge32; + struct megasas_cmd *cmd; + void *kbuff_arr[MAX_IOCTL_SGE]; + dma_addr_t buf_handle = 0; + int error = 0, i; + void *sense = NULL; + dma_addr_t sense_handle; + u32 *sense_ptr; + + memset(kbuff_arr, 0, sizeof(kbuff_arr)); + + if (ioc->sge_count > MAX_IOCTL_SGE) { + printk(KERN_DEBUG "megasas: SGE count [%d] > max limit [%d]\n", + ioc->sge_count, MAX_IOCTL_SGE); + return -EINVAL; + } + + cmd = megasas_get_cmd(instance); + if (!cmd) { + printk(KERN_DEBUG "megasas: Failed to get a cmd packet\n"); + return -ENOMEM; + } + + /* + * User's IOCTL packet has 2 frames (maximum). Copy those two + * frames into our cmd's frames. cmd->frame's context will get + * overwritten when we copy from user's frames. So set that value + * alone separately + */ + memcpy(cmd->frame, ioc->frame.raw, 2 * MEGAMFI_FRAME_SIZE); + cmd->frame->hdr.context = cmd->index; + + /* + * The management interface between applications and the fw uses + * MFI frames. E.g, RAID configuration changes, LD property changes + * etc are accomplishes through different kinds of MFI frames. The + * driver needs to care only about substituting user buffers with + * kernel buffers in SGLs. The location of SGL is embedded in the + * struct iocpacket itself. + */ + kern_sge32 = (struct megasas_sge32 *) + ((unsigned long)cmd->frame + ioc->sgl_off); + + /* + * For each user buffer, create a mirror buffer and copy in + */ + for (i = 0; i < ioc->sge_count; i++) { + kbuff_arr[i] = pci_alloc_consistent(instance->pdev, + ioc->sgl[i].iov_len, + &buf_handle); + if (!kbuff_arr[i]) { + printk(KERN_DEBUG "megasas: Failed to alloc " + "kernel SGL buffer for IOCTL \n"); + error = -ENOMEM; + goto out; + } + + /* + * We don't change the dma_coherent_mask, so + * pci_alloc_consistent only returns 32bit addresses + */ + kern_sge32[i].phys_addr = (u32) buf_handle; + kern_sge32[i].length = ioc->sgl[i].iov_len; + + /* + * We created a kernel buffer corresponding to the + * user buffer. Now copy in from the user buffer + */ + if (copy_from_user(kbuff_arr[i], ioc->sgl[i].iov_base, + (u32) (ioc->sgl[i].iov_len))) { + error = -EFAULT; + goto out; + } + } + + if (ioc->sense_len) { + sense = pci_alloc_consistent(instance->pdev, ioc->sense_len, + &sense_handle); + if (!sense) { + error = -ENOMEM; + goto out; + } + + sense_ptr = + (u32 *) ((unsigned long)cmd->frame + ioc->sense_off); + *sense_ptr = sense_handle; + } + + /* + * Set the sync_cmd flag so that the ISR knows not to complete this + * cmd to the SCSI mid-layer + */ + cmd->sync_cmd = 1; + megasas_issue_blocked_cmd(instance, cmd); + cmd->sync_cmd = 0; + + /* + * copy out the kernel buffers to user buffers + */ + for (i = 0; i < ioc->sge_count; i++) { + if (copy_to_user(ioc->sgl[i].iov_base, kbuff_arr[i], + ioc->sgl[i].iov_len)) { + error = -EFAULT; + goto out; + } + } + + /* + * copy out the sense + */ + if (ioc->sense_len) { + /* + * sense_ptr points to the location that has the user + * sense buffer address + */ + sense_ptr = (u32 *) ((unsigned long)ioc->frame.raw + + ioc->sense_off); + + if (copy_to_user((void __user *)((unsigned long)(*sense_ptr)), + sense, ioc->sense_len)) { + error = -EFAULT; + goto out; + } + } + + /* + * copy the status codes returned by the fw + */ + if (copy_to_user(&user_ioc->frame.hdr.cmd_status, + &cmd->frame->hdr.cmd_status, sizeof(u8))) { + printk(KERN_DEBUG "megasas: Error copying out cmd_status\n"); + error = -EFAULT; + } + + out: + if (sense) { + pci_free_consistent(instance->pdev, ioc->sense_len, + sense, sense_handle); + } + + for (i = 0; i < ioc->sge_count && kbuff_arr[i]; i++) { + pci_free_consistent(instance->pdev, + kern_sge32[i].length, + kbuff_arr[i], kern_sge32[i].phys_addr); + } + + megasas_return_cmd(instance, cmd); + return error; +} + +static struct megasas_instance *megasas_lookup_instance(u16 host_no) +{ + int i; + + for (i = 0; i < megasas_mgmt_info.max_index; i++) { + + if ((megasas_mgmt_info.instance[i]) && + (megasas_mgmt_info.instance[i]->host->host_no == host_no)) + return megasas_mgmt_info.instance[i]; + } + + return NULL; +} + +static int megasas_mgmt_ioctl_fw(struct file *file, unsigned long arg) +{ + struct megasas_iocpacket __user *user_ioc = + (struct megasas_iocpacket __user *)arg; + struct megasas_iocpacket *ioc; + struct megasas_instance *instance; + int error; + + ioc = kmalloc(sizeof(*ioc), GFP_KERNEL); + if (!ioc) + return -ENOMEM; + + if (copy_from_user(ioc, user_ioc, sizeof(*ioc))) { + error = -EFAULT; + goto out_kfree_ioc; + } + + instance = megasas_lookup_instance(ioc->host_no); + if (!instance) { + error = -ENODEV; + goto out_kfree_ioc; + } + + /* + * We will allow only MEGASAS_INT_CMDS number of parallel ioctl cmds + */ + if (down_interruptible(&instance->ioctl_sem)) { + error = -ERESTARTSYS; + goto out_kfree_ioc; + } + error = megasas_mgmt_fw_ioctl(instance, user_ioc, ioc); + up(&instance->ioctl_sem); + + out_kfree_ioc: + kfree(ioc); + return error; +} + +static int megasas_mgmt_ioctl_aen(struct file *file, unsigned long arg) +{ + struct megasas_instance *instance; + struct megasas_aen aen; + int error; + + if (file->private_data != file) { + printk(KERN_DEBUG "megasas: fasync_helper was not " + "called first\n"); + return -EINVAL; + } + + if (copy_from_user(&aen, (void __user *)arg, sizeof(aen))) + return -EFAULT; + + instance = megasas_lookup_instance(aen.host_no); + + if (!instance) + return -ENODEV; + + down(&instance->aen_mutex); + error = megasas_register_aen(instance, aen.seq_num, + aen.class_locale_word); + up(&instance->aen_mutex); + return error; +} + +/** + * megasas_mgmt_ioctl - char node ioctl entry point + */ +static long +megasas_mgmt_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + switch (cmd) { + case MEGASAS_IOC_FIRMWARE: + return megasas_mgmt_ioctl_fw(file, arg); + + case MEGASAS_IOC_GET_AEN: + return megasas_mgmt_ioctl_aen(file, arg); + } + + return -ENOTTY; +} + +#ifdef CONFIG_COMPAT +static int megasas_mgmt_compat_ioctl_fw(struct file *file, unsigned long arg) +{ + struct compat_megasas_iocpacket __user *cioc = + (struct compat_megasas_iocpacket __user *)arg; + struct megasas_iocpacket __user *ioc = + compat_alloc_user_space(sizeof(struct megasas_iocpacket)); + int i; + int error = 0; + + clear_user(ioc, sizeof(*ioc)); + + if (copy_in_user(&ioc->host_no, &cioc->host_no, sizeof(u16)) || + copy_in_user(&ioc->sgl_off, &cioc->sgl_off, sizeof(u32)) || + copy_in_user(&ioc->sense_off, &cioc->sense_off, sizeof(u32)) || + copy_in_user(&ioc->sense_len, &cioc->sense_len, sizeof(u32)) || + copy_in_user(ioc->frame.raw, cioc->frame.raw, 128) || + copy_in_user(&ioc->sge_count, &cioc->sge_count, sizeof(u32))) + return -EFAULT; + + for (i = 0; i < MAX_IOCTL_SGE; i++) { + compat_uptr_t ptr; + + if (get_user(ptr, &cioc->sgl[i].iov_base) || + put_user(compat_ptr(ptr), &ioc->sgl[i].iov_base) || + copy_in_user(&ioc->sgl[i].iov_len, + &cioc->sgl[i].iov_len, sizeof(compat_size_t))) + return -EFAULT; + } + + error = megasas_mgmt_ioctl_fw(file, (unsigned long)ioc); + + if (copy_in_user(&cioc->frame.hdr.cmd_status, + &ioc->frame.hdr.cmd_status, sizeof(u8))) { + printk(KERN_DEBUG "megasas: error copy_in_user cmd_status\n"); + return -EFAULT; + } + return error; +} + +static long +megasas_mgmt_compat_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + switch (cmd) { + case MEGASAS_IOC_FIRMWARE:{ + return megasas_mgmt_compat_ioctl_fw(file, arg); + } + case MEGASAS_IOC_GET_AEN: + return megasas_mgmt_ioctl_aen(file, arg); + } + + return -ENOTTY; +} +#endif + +/* + * File operations structure for management interface + */ +static struct file_operations megasas_mgmt_fops = { + .owner = THIS_MODULE, + .open = megasas_mgmt_open, + .release = megasas_mgmt_release, + .fasync = megasas_mgmt_fasync, + .unlocked_ioctl = megasas_mgmt_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = megasas_mgmt_compat_ioctl, +#endif +}; + +/* + * PCI hotplug support registration structure + */ +static struct pci_driver megasas_pci_driver = { + + .name = "megaraid_sas", + .id_table = megasas_pci_table, + .probe = megasas_probe_one, + .remove = __devexit_p(megasas_detach_one), + .shutdown = megasas_shutdown, +}; + +/* + * Sysfs driver attributes + */ +static ssize_t megasas_sysfs_show_version(struct device_driver *dd, char *buf) +{ + return snprintf(buf, strlen(MEGASAS_VERSION) + 2, "%s\n", + MEGASAS_VERSION); +} + +static DRIVER_ATTR(version, S_IRUGO, megasas_sysfs_show_version, NULL); + +static ssize_t +megasas_sysfs_show_release_date(struct device_driver *dd, char *buf) +{ + return snprintf(buf, strlen(MEGASAS_RELDATE) + 2, "%s\n", + MEGASAS_RELDATE); +} + +static DRIVER_ATTR(release_date, S_IRUGO, megasas_sysfs_show_release_date, + NULL); + +/** + * megasas_init - Driver load entry point + */ +static int __init megasas_init(void) +{ + int rval; + + /* + * Announce driver version and other information + */ + printk(KERN_INFO "megasas: %s %s\n", MEGASAS_VERSION, + MEGASAS_EXT_VERSION); + + memset(&megasas_mgmt_info, 0, sizeof(megasas_mgmt_info)); + + /* + * Register character device node + */ + rval = register_chrdev(0, "megaraid_sas_ioctl", &megasas_mgmt_fops); + + if (rval < 0) { + printk(KERN_DEBUG "megasas: failed to open device node\n"); + return rval; + } + + megasas_mgmt_majorno = rval; + + /* + * Register ourselves as PCI hotplug module + */ + rval = pci_module_init(&megasas_pci_driver); + + if (rval) { + printk(KERN_DEBUG "megasas: PCI hotplug regisration failed \n"); + unregister_chrdev(megasas_mgmt_majorno, "megaraid_sas_ioctl"); + } + + driver_create_file(&megasas_pci_driver.driver, &driver_attr_version); + driver_create_file(&megasas_pci_driver.driver, + &driver_attr_release_date); + + return rval; +} + +/** + * megasas_exit - Driver unload entry point + */ +static void __exit megasas_exit(void) +{ + driver_remove_file(&megasas_pci_driver.driver, &driver_attr_version); + driver_remove_file(&megasas_pci_driver.driver, + &driver_attr_release_date); + + pci_unregister_driver(&megasas_pci_driver); + unregister_chrdev(megasas_mgmt_majorno, "megaraid_sas_ioctl"); +} + +module_init(megasas_init); +module_exit(megasas_exit); diff --git a/drivers/scsi/megaraid/megaraid_sas.h b/drivers/scsi/megaraid/megaraid_sas.h new file mode 100644 index 000000000000..eaec9d531424 --- /dev/null +++ b/drivers/scsi/megaraid/megaraid_sas.h @@ -0,0 +1,1142 @@ +/* + * + * Linux MegaRAID driver for SAS based RAID controllers + * + * Copyright (c) 2003-2005 LSI Logic Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * FILE : megaraid_sas.h + */ + +#ifndef LSI_MEGARAID_SAS_H +#define LSI_MEGARAID_SAS_H + +/** + * MegaRAID SAS Driver meta data + */ +#define MEGASAS_VERSION "00.00.02.00-rc4" +#define MEGASAS_RELDATE "Sep 16, 2005" +#define MEGASAS_EXT_VERSION "Fri Sep 16 12:37:08 EDT 2005" + +/* + * ===================================== + * MegaRAID SAS MFI firmware definitions + * ===================================== + */ + +/* + * MFI stands for MegaRAID SAS FW Interface. This is just a moniker for + * protocol between the software and firmware. Commands are issued using + * "message frames" + */ + +/** + * FW posts its state in upper 4 bits of outbound_msg_0 register + */ +#define MFI_STATE_MASK 0xF0000000 +#define MFI_STATE_UNDEFINED 0x00000000 +#define MFI_STATE_BB_INIT 0x10000000 +#define MFI_STATE_FW_INIT 0x40000000 +#define MFI_STATE_WAIT_HANDSHAKE 0x60000000 +#define MFI_STATE_FW_INIT_2 0x70000000 +#define MFI_STATE_DEVICE_SCAN 0x80000000 +#define MFI_STATE_FLUSH_CACHE 0xA0000000 +#define MFI_STATE_READY 0xB0000000 +#define MFI_STATE_OPERATIONAL 0xC0000000 +#define MFI_STATE_FAULT 0xF0000000 + +#define MEGAMFI_FRAME_SIZE 64 + +/** + * During FW init, clear pending cmds & reset state using inbound_msg_0 + * + * ABORT : Abort all pending cmds + * READY : Move from OPERATIONAL to READY state; discard queue info + * MFIMODE : Discard (possible) low MFA posted in 64-bit mode (??) + * CLR_HANDSHAKE: FW is waiting for HANDSHAKE from BIOS or Driver + */ +#define MFI_INIT_ABORT 0x00000000 +#define MFI_INIT_READY 0x00000002 +#define MFI_INIT_MFIMODE 0x00000004 +#define MFI_INIT_CLEAR_HANDSHAKE 0x00000008 +#define MFI_RESET_FLAGS MFI_INIT_READY|MFI_INIT_MFIMODE + +/** + * MFI frame flags + */ +#define MFI_FRAME_POST_IN_REPLY_QUEUE 0x0000 +#define MFI_FRAME_DONT_POST_IN_REPLY_QUEUE 0x0001 +#define MFI_FRAME_SGL32 0x0000 +#define MFI_FRAME_SGL64 0x0002 +#define MFI_FRAME_SENSE32 0x0000 +#define MFI_FRAME_SENSE64 0x0004 +#define MFI_FRAME_DIR_NONE 0x0000 +#define MFI_FRAME_DIR_WRITE 0x0008 +#define MFI_FRAME_DIR_READ 0x0010 +#define MFI_FRAME_DIR_BOTH 0x0018 + +/** + * Definition for cmd_status + */ +#define MFI_CMD_STATUS_POLL_MODE 0xFF + +/** + * MFI command opcodes + */ +#define MFI_CMD_INIT 0x00 +#define MFI_CMD_LD_READ 0x01 +#define MFI_CMD_LD_WRITE 0x02 +#define MFI_CMD_LD_SCSI_IO 0x03 +#define MFI_CMD_PD_SCSI_IO 0x04 +#define MFI_CMD_DCMD 0x05 +#define MFI_CMD_ABORT 0x06 +#define MFI_CMD_SMP 0x07 +#define MFI_CMD_STP 0x08 + +#define MR_DCMD_CTRL_GET_INFO 0x01010000 + +#define MR_DCMD_CTRL_CACHE_FLUSH 0x01101000 +#define MR_FLUSH_CTRL_CACHE 0x01 +#define MR_FLUSH_DISK_CACHE 0x02 + +#define MR_DCMD_CTRL_SHUTDOWN 0x01050000 +#define MR_ENABLE_DRIVE_SPINDOWN 0x01 + +#define MR_DCMD_CTRL_EVENT_GET_INFO 0x01040100 +#define MR_DCMD_CTRL_EVENT_GET 0x01040300 +#define MR_DCMD_CTRL_EVENT_WAIT 0x01040500 +#define MR_DCMD_LD_GET_PROPERTIES 0x03030000 + +#define MR_DCMD_CLUSTER 0x08000000 +#define MR_DCMD_CLUSTER_RESET_ALL 0x08010100 +#define MR_DCMD_CLUSTER_RESET_LD 0x08010200 + +/** + * MFI command completion codes + */ +enum MFI_STAT { + MFI_STAT_OK = 0x00, + MFI_STAT_INVALID_CMD = 0x01, + MFI_STAT_INVALID_DCMD = 0x02, + MFI_STAT_INVALID_PARAMETER = 0x03, + MFI_STAT_INVALID_SEQUENCE_NUMBER = 0x04, + MFI_STAT_ABORT_NOT_POSSIBLE = 0x05, + MFI_STAT_APP_HOST_CODE_NOT_FOUND = 0x06, + MFI_STAT_APP_IN_USE = 0x07, + MFI_STAT_APP_NOT_INITIALIZED = 0x08, + MFI_STAT_ARRAY_INDEX_INVALID = 0x09, + MFI_STAT_ARRAY_ROW_NOT_EMPTY = 0x0a, + MFI_STAT_CONFIG_RESOURCE_CONFLICT = 0x0b, + MFI_STAT_DEVICE_NOT_FOUND = 0x0c, + MFI_STAT_DRIVE_TOO_SMALL = 0x0d, + MFI_STAT_FLASH_ALLOC_FAIL = 0x0e, + MFI_STAT_FLASH_BUSY = 0x0f, + MFI_STAT_FLASH_ERROR = 0x10, + MFI_STAT_FLASH_IMAGE_BAD = 0x11, + MFI_STAT_FLASH_IMAGE_INCOMPLETE = 0x12, + MFI_STAT_FLASH_NOT_OPEN = 0x13, + MFI_STAT_FLASH_NOT_STARTED = 0x14, + MFI_STAT_FLUSH_FAILED = 0x15, + MFI_STAT_HOST_CODE_NOT_FOUNT = 0x16, + MFI_STAT_LD_CC_IN_PROGRESS = 0x17, + MFI_STAT_LD_INIT_IN_PROGRESS = 0x18, + MFI_STAT_LD_LBA_OUT_OF_RANGE = 0x19, + MFI_STAT_LD_MAX_CONFIGURED = 0x1a, + MFI_STAT_LD_NOT_OPTIMAL = 0x1b, + MFI_STAT_LD_RBLD_IN_PROGRESS = 0x1c, + MFI_STAT_LD_RECON_IN_PROGRESS = 0x1d, + MFI_STAT_LD_WRONG_RAID_LEVEL = 0x1e, + MFI_STAT_MAX_SPARES_EXCEEDED = 0x1f, + MFI_STAT_MEMORY_NOT_AVAILABLE = 0x20, + MFI_STAT_MFC_HW_ERROR = 0x21, + MFI_STAT_NO_HW_PRESENT = 0x22, + MFI_STAT_NOT_FOUND = 0x23, + MFI_STAT_NOT_IN_ENCL = 0x24, + MFI_STAT_PD_CLEAR_IN_PROGRESS = 0x25, + MFI_STAT_PD_TYPE_WRONG = 0x26, + MFI_STAT_PR_DISABLED = 0x27, + MFI_STAT_ROW_INDEX_INVALID = 0x28, + MFI_STAT_SAS_CONFIG_INVALID_ACTION = 0x29, + MFI_STAT_SAS_CONFIG_INVALID_DATA = 0x2a, + MFI_STAT_SAS_CONFIG_INVALID_PAGE = 0x2b, + MFI_STAT_SAS_CONFIG_INVALID_TYPE = 0x2c, + MFI_STAT_SCSI_DONE_WITH_ERROR = 0x2d, + MFI_STAT_SCSI_IO_FAILED = 0x2e, + MFI_STAT_SCSI_RESERVATION_CONFLICT = 0x2f, + MFI_STAT_SHUTDOWN_FAILED = 0x30, + MFI_STAT_TIME_NOT_SET = 0x31, + MFI_STAT_WRONG_STATE = 0x32, + MFI_STAT_LD_OFFLINE = 0x33, + MFI_STAT_PEER_NOTIFICATION_REJECTED = 0x34, + MFI_STAT_PEER_NOTIFICATION_FAILED = 0x35, + MFI_STAT_RESERVATION_IN_PROGRESS = 0x36, + MFI_STAT_I2C_ERRORS_DETECTED = 0x37, + MFI_STAT_PCI_ERRORS_DETECTED = 0x38, + + MFI_STAT_INVALID_STATUS = 0xFF +}; + +/* + * Number of mailbox bytes in DCMD message frame + */ +#define MFI_MBOX_SIZE 12 + +enum MR_EVT_CLASS { + + MR_EVT_CLASS_DEBUG = -2, + MR_EVT_CLASS_PROGRESS = -1, + MR_EVT_CLASS_INFO = 0, + MR_EVT_CLASS_WARNING = 1, + MR_EVT_CLASS_CRITICAL = 2, + MR_EVT_CLASS_FATAL = 3, + MR_EVT_CLASS_DEAD = 4, + +}; + +enum MR_EVT_LOCALE { + + MR_EVT_LOCALE_LD = 0x0001, + MR_EVT_LOCALE_PD = 0x0002, + MR_EVT_LOCALE_ENCL = 0x0004, + MR_EVT_LOCALE_BBU = 0x0008, + MR_EVT_LOCALE_SAS = 0x0010, + MR_EVT_LOCALE_CTRL = 0x0020, + MR_EVT_LOCALE_CONFIG = 0x0040, + MR_EVT_LOCALE_CLUSTER = 0x0080, + MR_EVT_LOCALE_ALL = 0xffff, + +}; + +enum MR_EVT_ARGS { + + MR_EVT_ARGS_NONE, + MR_EVT_ARGS_CDB_SENSE, + MR_EVT_ARGS_LD, + MR_EVT_ARGS_LD_COUNT, + MR_EVT_ARGS_LD_LBA, + MR_EVT_ARGS_LD_OWNER, + MR_EVT_ARGS_LD_LBA_PD_LBA, + MR_EVT_ARGS_LD_PROG, + MR_EVT_ARGS_LD_STATE, + MR_EVT_ARGS_LD_STRIP, + MR_EVT_ARGS_PD, + MR_EVT_ARGS_PD_ERR, + MR_EVT_ARGS_PD_LBA, + MR_EVT_ARGS_PD_LBA_LD, + MR_EVT_ARGS_PD_PROG, + MR_EVT_ARGS_PD_STATE, + MR_EVT_ARGS_PCI, + MR_EVT_ARGS_RATE, + MR_EVT_ARGS_STR, + MR_EVT_ARGS_TIME, + MR_EVT_ARGS_ECC, + +}; + +/* + * SAS controller properties + */ +struct megasas_ctrl_prop { + + u16 seq_num; + u16 pred_fail_poll_interval; + u16 intr_throttle_count; + u16 intr_throttle_timeouts; + u8 rebuild_rate; + u8 patrol_read_rate; + u8 bgi_rate; + u8 cc_rate; + u8 recon_rate; + u8 cache_flush_interval; + u8 spinup_drv_count; + u8 spinup_delay; + u8 cluster_enable; + u8 coercion_mode; + u8 alarm_enable; + u8 disable_auto_rebuild; + u8 disable_battery_warn; + u8 ecc_bucket_size; + u16 ecc_bucket_leak_rate; + u8 restore_hotspare_on_insertion; + u8 expose_encl_devices; + u8 reserved[38]; + +} __attribute__ ((packed)); + +/* + * SAS controller information + */ +struct megasas_ctrl_info { + + /* + * PCI device information + */ + struct { + + u16 vendor_id; + u16 device_id; + u16 sub_vendor_id; + u16 sub_device_id; + u8 reserved[24]; + + } __attribute__ ((packed)) pci; + + /* + * Host interface information + */ + struct { + + u8 PCIX:1; + u8 PCIE:1; + u8 iSCSI:1; + u8 SAS_3G:1; + u8 reserved_0:4; + u8 reserved_1[6]; + u8 port_count; + u64 port_addr[8]; + + } __attribute__ ((packed)) host_interface; + + /* + * Device (backend) interface information + */ + struct { + + u8 SPI:1; + u8 SAS_3G:1; + u8 SATA_1_5G:1; + u8 SATA_3G:1; + u8 reserved_0:4; + u8 reserved_1[6]; + u8 port_count; + u64 port_addr[8]; + + } __attribute__ ((packed)) device_interface; + + /* + * List of components residing in flash. All str are null terminated + */ + u32 image_check_word; + u32 image_component_count; + + struct { + + char name[8]; + char version[32]; + char build_date[16]; + char built_time[16]; + + } __attribute__ ((packed)) image_component[8]; + + /* + * List of flash components that have been flashed on the card, but + * are not in use, pending reset of the adapter. This list will be + * empty if a flash operation has not occurred. All stings are null + * terminated + */ + u32 pending_image_component_count; + + struct { + + char name[8]; + char version[32]; + char build_date[16]; + char build_time[16]; + + } __attribute__ ((packed)) pending_image_component[8]; + + u8 max_arms; + u8 max_spans; + u8 max_arrays; + u8 max_lds; + + char product_name[80]; + char serial_no[32]; + + /* + * Other physical/controller/operation information. Indicates the + * presence of the hardware + */ + struct { + + u32 bbu:1; + u32 alarm:1; + u32 nvram:1; + u32 uart:1; + u32 reserved:28; + + } __attribute__ ((packed)) hw_present; + + u32 current_fw_time; + + /* + * Maximum data transfer sizes + */ + u16 max_concurrent_cmds; + u16 max_sge_count; + u32 max_request_size; + + /* + * Logical and physical device counts + */ + u16 ld_present_count; + u16 ld_degraded_count; + u16 ld_offline_count; + + u16 pd_present_count; + u16 pd_disk_present_count; + u16 pd_disk_pred_failure_count; + u16 pd_disk_failed_count; + + /* + * Memory size information + */ + u16 nvram_size; + u16 memory_size; + u16 flash_size; + + /* + * Error counters + */ + u16 mem_correctable_error_count; + u16 mem_uncorrectable_error_count; + + /* + * Cluster information + */ + u8 cluster_permitted; + u8 cluster_active; + + /* + * Additional max data transfer sizes + */ + u16 max_strips_per_io; + + /* + * Controller capabilities structures + */ + struct { + + u32 raid_level_0:1; + u32 raid_level_1:1; + u32 raid_level_5:1; + u32 raid_level_1E:1; + u32 raid_level_6:1; + u32 reserved:27; + + } __attribute__ ((packed)) raid_levels; + + struct { + + u32 rbld_rate:1; + u32 cc_rate:1; + u32 bgi_rate:1; + u32 recon_rate:1; + u32 patrol_rate:1; + u32 alarm_control:1; + u32 cluster_supported:1; + u32 bbu:1; + u32 spanning_allowed:1; + u32 dedicated_hotspares:1; + u32 revertible_hotspares:1; + u32 foreign_config_import:1; + u32 self_diagnostic:1; + u32 mixed_redundancy_arr:1; + u32 global_hot_spares:1; + u32 reserved:17; + + } __attribute__ ((packed)) adapter_operations; + + struct { + + u32 read_policy:1; + u32 write_policy:1; + u32 io_policy:1; + u32 access_policy:1; + u32 disk_cache_policy:1; + u32 reserved:27; + + } __attribute__ ((packed)) ld_operations; + + struct { + + u8 min; + u8 max; + u8 reserved[2]; + + } __attribute__ ((packed)) stripe_sz_ops; + + struct { + + u32 force_online:1; + u32 force_offline:1; + u32 force_rebuild:1; + u32 reserved:29; + + } __attribute__ ((packed)) pd_operations; + + struct { + + u32 ctrl_supports_sas:1; + u32 ctrl_supports_sata:1; + u32 allow_mix_in_encl:1; + u32 allow_mix_in_ld:1; + u32 allow_sata_in_cluster:1; + u32 reserved:27; + + } __attribute__ ((packed)) pd_mix_support; + + /* + * Define ECC single-bit-error bucket information + */ + u8 ecc_bucket_count; + u8 reserved_2[11]; + + /* + * Include the controller properties (changeable items) + */ + struct megasas_ctrl_prop properties; + + /* + * Define FW pkg version (set in envt v'bles on OEM basis) + */ + char package_version[0x60]; + + u8 pad[0x800 - 0x6a0]; + +} __attribute__ ((packed)); + +/* + * =============================== + * MegaRAID SAS driver definitions + * =============================== + */ +#define MEGASAS_MAX_PD_CHANNELS 2 +#define MEGASAS_MAX_LD_CHANNELS 2 +#define MEGASAS_MAX_CHANNELS (MEGASAS_MAX_PD_CHANNELS + \ + MEGASAS_MAX_LD_CHANNELS) +#define MEGASAS_MAX_DEV_PER_CHANNEL 128 +#define MEGASAS_DEFAULT_INIT_ID -1 +#define MEGASAS_MAX_LUN 8 +#define MEGASAS_MAX_LD 64 + +/* + * When SCSI mid-layer calls driver's reset routine, driver waits for + * MEGASAS_RESET_WAIT_TIME seconds for all outstanding IO to complete. Note + * that the driver cannot _actually_ abort or reset pending commands. While + * it is waiting for the commands to complete, it prints a diagnostic message + * every MEGASAS_RESET_NOTICE_INTERVAL seconds + */ +#define MEGASAS_RESET_WAIT_TIME 180 +#define MEGASAS_RESET_NOTICE_INTERVAL 5 + +#define MEGASAS_IOCTL_CMD 0 + +/* + * FW reports the maximum of number of commands that it can accept (maximum + * commands that can be outstanding) at any time. The driver must report a + * lower number to the mid layer because it can issue a few internal commands + * itself (E.g, AEN, abort cmd, IOCTLs etc). The number of commands it needs + * is shown below + */ +#define MEGASAS_INT_CMDS 32 + +/* + * FW can accept both 32 and 64 bit SGLs. We want to allocate 32/64 bit + * SGLs based on the size of dma_addr_t + */ +#define IS_DMA64 (sizeof(dma_addr_t) == 8) + +#define MFI_OB_INTR_STATUS_MASK 0x00000002 +#define MFI_POLL_TIMEOUT_SECS 10 + +struct megasas_register_set { + + u32 reserved_0[4]; /*0000h */ + + u32 inbound_msg_0; /*0010h */ + u32 inbound_msg_1; /*0014h */ + u32 outbound_msg_0; /*0018h */ + u32 outbound_msg_1; /*001Ch */ + + u32 inbound_doorbell; /*0020h */ + u32 inbound_intr_status; /*0024h */ + u32 inbound_intr_mask; /*0028h */ + + u32 outbound_doorbell; /*002Ch */ + u32 outbound_intr_status; /*0030h */ + u32 outbound_intr_mask; /*0034h */ + + u32 reserved_1[2]; /*0038h */ + + u32 inbound_queue_port; /*0040h */ + u32 outbound_queue_port; /*0044h */ + + u32 reserved_2; /*004Ch */ + + u32 index_registers[1004]; /*0050h */ + +} __attribute__ ((packed)); + +struct megasas_sge32 { + + u32 phys_addr; + u32 length; + +} __attribute__ ((packed)); + +struct megasas_sge64 { + + u64 phys_addr; + u32 length; + +} __attribute__ ((packed)); + +union megasas_sgl { + + struct megasas_sge32 sge32[1]; + struct megasas_sge64 sge64[1]; + +} __attribute__ ((packed)); + +struct megasas_header { + + u8 cmd; /*00h */ + u8 sense_len; /*01h */ + u8 cmd_status; /*02h */ + u8 scsi_status; /*03h */ + + u8 target_id; /*04h */ + u8 lun; /*05h */ + u8 cdb_len; /*06h */ + u8 sge_count; /*07h */ + + u32 context; /*08h */ + u32 pad_0; /*0Ch */ + + u16 flags; /*10h */ + u16 timeout; /*12h */ + u32 data_xferlen; /*14h */ + +} __attribute__ ((packed)); + +union megasas_sgl_frame { + + struct megasas_sge32 sge32[8]; + struct megasas_sge64 sge64[5]; + +} __attribute__ ((packed)); + +struct megasas_init_frame { + + u8 cmd; /*00h */ + u8 reserved_0; /*01h */ + u8 cmd_status; /*02h */ + + u8 reserved_1; /*03h */ + u32 reserved_2; /*04h */ + + u32 context; /*08h */ + u32 pad_0; /*0Ch */ + + u16 flags; /*10h */ + u16 reserved_3; /*12h */ + u32 data_xfer_len; /*14h */ + + u32 queue_info_new_phys_addr_lo; /*18h */ + u32 queue_info_new_phys_addr_hi; /*1Ch */ + u32 queue_info_old_phys_addr_lo; /*20h */ + u32 queue_info_old_phys_addr_hi; /*24h */ + + u32 reserved_4[6]; /*28h */ + +} __attribute__ ((packed)); + +struct megasas_init_queue_info { + + u32 init_flags; /*00h */ + u32 reply_queue_entries; /*04h */ + + u32 reply_queue_start_phys_addr_lo; /*08h */ + u32 reply_queue_start_phys_addr_hi; /*0Ch */ + u32 producer_index_phys_addr_lo; /*10h */ + u32 producer_index_phys_addr_hi; /*14h */ + u32 consumer_index_phys_addr_lo; /*18h */ + u32 consumer_index_phys_addr_hi; /*1Ch */ + +} __attribute__ ((packed)); + +struct megasas_io_frame { + + u8 cmd; /*00h */ + u8 sense_len; /*01h */ + u8 cmd_status; /*02h */ + u8 scsi_status; /*03h */ + + u8 target_id; /*04h */ + u8 access_byte; /*05h */ + u8 reserved_0; /*06h */ + u8 sge_count; /*07h */ + + u32 context; /*08h */ + u32 pad_0; /*0Ch */ + + u16 flags; /*10h */ + u16 timeout; /*12h */ + u32 lba_count; /*14h */ + + u32 sense_buf_phys_addr_lo; /*18h */ + u32 sense_buf_phys_addr_hi; /*1Ch */ + + u32 start_lba_lo; /*20h */ + u32 start_lba_hi; /*24h */ + + union megasas_sgl sgl; /*28h */ + +} __attribute__ ((packed)); + +struct megasas_pthru_frame { + + u8 cmd; /*00h */ + u8 sense_len; /*01h */ + u8 cmd_status; /*02h */ + u8 scsi_status; /*03h */ + + u8 target_id; /*04h */ + u8 lun; /*05h */ + u8 cdb_len; /*06h */ + u8 sge_count; /*07h */ + + u32 context; /*08h */ + u32 pad_0; /*0Ch */ + + u16 flags; /*10h */ + u16 timeout; /*12h */ + u32 data_xfer_len; /*14h */ + + u32 sense_buf_phys_addr_lo; /*18h */ + u32 sense_buf_phys_addr_hi; /*1Ch */ + + u8 cdb[16]; /*20h */ + union megasas_sgl sgl; /*30h */ + +} __attribute__ ((packed)); + +struct megasas_dcmd_frame { + + u8 cmd; /*00h */ + u8 reserved_0; /*01h */ + u8 cmd_status; /*02h */ + u8 reserved_1[4]; /*03h */ + u8 sge_count; /*07h */ + + u32 context; /*08h */ + u32 pad_0; /*0Ch */ + + u16 flags; /*10h */ + u16 timeout; /*12h */ + + u32 data_xfer_len; /*14h */ + u32 opcode; /*18h */ + + union { /*1Ch */ + u8 b[12]; + u16 s[6]; + u32 w[3]; + } mbox; + + union megasas_sgl sgl; /*28h */ + +} __attribute__ ((packed)); + +struct megasas_abort_frame { + + u8 cmd; /*00h */ + u8 reserved_0; /*01h */ + u8 cmd_status; /*02h */ + + u8 reserved_1; /*03h */ + u32 reserved_2; /*04h */ + + u32 context; /*08h */ + u32 pad_0; /*0Ch */ + + u16 flags; /*10h */ + u16 reserved_3; /*12h */ + u32 reserved_4; /*14h */ + + u32 abort_context; /*18h */ + u32 pad_1; /*1Ch */ + + u32 abort_mfi_phys_addr_lo; /*20h */ + u32 abort_mfi_phys_addr_hi; /*24h */ + + u32 reserved_5[6]; /*28h */ + +} __attribute__ ((packed)); + +struct megasas_smp_frame { + + u8 cmd; /*00h */ + u8 reserved_1; /*01h */ + u8 cmd_status; /*02h */ + u8 connection_status; /*03h */ + + u8 reserved_2[3]; /*04h */ + u8 sge_count; /*07h */ + + u32 context; /*08h */ + u32 pad_0; /*0Ch */ + + u16 flags; /*10h */ + u16 timeout; /*12h */ + + u32 data_xfer_len; /*14h */ + u64 sas_addr; /*18h */ + + union { + struct megasas_sge32 sge32[2]; /* [0]: resp [1]: req */ + struct megasas_sge64 sge64[2]; /* [0]: resp [1]: req */ + } sgl; + +} __attribute__ ((packed)); + +struct megasas_stp_frame { + + u8 cmd; /*00h */ + u8 reserved_1; /*01h */ + u8 cmd_status; /*02h */ + u8 reserved_2; /*03h */ + + u8 target_id; /*04h */ + u8 reserved_3[2]; /*05h */ + u8 sge_count; /*07h */ + + u32 context; /*08h */ + u32 pad_0; /*0Ch */ + + u16 flags; /*10h */ + u16 timeout; /*12h */ + + u32 data_xfer_len; /*14h */ + + u16 fis[10]; /*18h */ + u32 stp_flags; + + union { + struct megasas_sge32 sge32[2]; /* [0]: resp [1]: data */ + struct megasas_sge64 sge64[2]; /* [0]: resp [1]: data */ + } sgl; + +} __attribute__ ((packed)); + +union megasas_frame { + + struct megasas_header hdr; + struct megasas_init_frame init; + struct megasas_io_frame io; + struct megasas_pthru_frame pthru; + struct megasas_dcmd_frame dcmd; + struct megasas_abort_frame abort; + struct megasas_smp_frame smp; + struct megasas_stp_frame stp; + + u8 raw_bytes[64]; +}; + +struct megasas_cmd; + +union megasas_evt_class_locale { + + struct { + u16 locale; + u8 reserved; + s8 class; + } __attribute__ ((packed)) members; + + u32 word; + +} __attribute__ ((packed)); + +struct megasas_evt_log_info { + u32 newest_seq_num; + u32 oldest_seq_num; + u32 clear_seq_num; + u32 shutdown_seq_num; + u32 boot_seq_num; + +} __attribute__ ((packed)); + +struct megasas_progress { + + u16 progress; + u16 elapsed_seconds; + +} __attribute__ ((packed)); + +struct megasas_evtarg_ld { + + u16 target_id; + u8 ld_index; + u8 reserved; + +} __attribute__ ((packed)); + +struct megasas_evtarg_pd { + u16 device_id; + u8 encl_index; + u8 slot_number; + +} __attribute__ ((packed)); + +struct megasas_evt_detail { + + u32 seq_num; + u32 time_stamp; + u32 code; + union megasas_evt_class_locale cl; + u8 arg_type; + u8 reserved1[15]; + + union { + struct { + struct megasas_evtarg_pd pd; + u8 cdb_length; + u8 sense_length; + u8 reserved[2]; + u8 cdb[16]; + u8 sense[64]; + } __attribute__ ((packed)) cdbSense; + + struct megasas_evtarg_ld ld; + + struct { + struct megasas_evtarg_ld ld; + u64 count; + } __attribute__ ((packed)) ld_count; + + struct { + u64 lba; + struct megasas_evtarg_ld ld; + } __attribute__ ((packed)) ld_lba; + + struct { + struct megasas_evtarg_ld ld; + u32 prevOwner; + u32 newOwner; + } __attribute__ ((packed)) ld_owner; + + struct { + u64 ld_lba; + u64 pd_lba; + struct megasas_evtarg_ld ld; + struct megasas_evtarg_pd pd; + } __attribute__ ((packed)) ld_lba_pd_lba; + + struct { + struct megasas_evtarg_ld ld; + struct megasas_progress prog; + } __attribute__ ((packed)) ld_prog; + + struct { + struct megasas_evtarg_ld ld; + u32 prev_state; + u32 new_state; + } __attribute__ ((packed)) ld_state; + + struct { + u64 strip; + struct megasas_evtarg_ld ld; + } __attribute__ ((packed)) ld_strip; + + struct megasas_evtarg_pd pd; + + struct { + struct megasas_evtarg_pd pd; + u32 err; + } __attribute__ ((packed)) pd_err; + + struct { + u64 lba; + struct megasas_evtarg_pd pd; + } __attribute__ ((packed)) pd_lba; + + struct { + u64 lba; + struct megasas_evtarg_pd pd; + struct megasas_evtarg_ld ld; + } __attribute__ ((packed)) pd_lba_ld; + + struct { + struct megasas_evtarg_pd pd; + struct megasas_progress prog; + } __attribute__ ((packed)) pd_prog; + + struct { + struct megasas_evtarg_pd pd; + u32 prevState; + u32 newState; + } __attribute__ ((packed)) pd_state; + + struct { + u16 vendorId; + u16 deviceId; + u16 subVendorId; + u16 subDeviceId; + } __attribute__ ((packed)) pci; + + u32 rate; + char str[96]; + + struct { + u32 rtc; + u32 elapsedSeconds; + } __attribute__ ((packed)) time; + + struct { + u32 ecar; + u32 elog; + char str[64]; + } __attribute__ ((packed)) ecc; + + u8 b[96]; + u16 s[48]; + u32 w[24]; + u64 d[12]; + } args; + + char description[128]; + +} __attribute__ ((packed)); + +struct megasas_instance { + + u32 *producer; + dma_addr_t producer_h; + u32 *consumer; + dma_addr_t consumer_h; + + u32 *reply_queue; + dma_addr_t reply_queue_h; + + unsigned long base_addr; + struct megasas_register_set __iomem *reg_set; + + s8 init_id; + u8 reserved[3]; + + u16 max_num_sge; + u16 max_fw_cmds; + u32 max_sectors_per_req; + + struct megasas_cmd **cmd_list; + struct list_head cmd_pool; + spinlock_t cmd_pool_lock; + struct dma_pool *frame_dma_pool; + struct dma_pool *sense_dma_pool; + + struct megasas_evt_detail *evt_detail; + dma_addr_t evt_detail_h; + struct megasas_cmd *aen_cmd; + struct semaphore aen_mutex; + struct semaphore ioctl_sem; + + struct Scsi_Host *host; + + wait_queue_head_t int_cmd_wait_q; + wait_queue_head_t abort_cmd_wait_q; + + struct pci_dev *pdev; + u32 unique_id; + + u32 fw_outstanding; + u32 hw_crit_error; + spinlock_t instance_lock; +}; + +#define MEGASAS_IS_LOGICAL(scp) \ + (scp->device->channel < MEGASAS_MAX_PD_CHANNELS) ? 0 : 1 + +#define MEGASAS_DEV_INDEX(inst, scp) \ + ((scp->device->channel % 2) * MEGASAS_MAX_DEV_PER_CHANNEL) + \ + scp->device->id + +struct megasas_cmd { + + union megasas_frame *frame; + dma_addr_t frame_phys_addr; + u8 *sense; + dma_addr_t sense_phys_addr; + + u32 index; + u8 sync_cmd; + u8 cmd_status; + u16 abort_aen; + + struct list_head list; + struct scsi_cmnd *scmd; + struct megasas_instance *instance; + u32 frame_count; +}; + +#define MAX_MGMT_ADAPTERS 1024 +#define MAX_IOCTL_SGE 16 + +struct megasas_iocpacket { + + u16 host_no; + u16 __pad1; + u32 sgl_off; + u32 sge_count; + u32 sense_off; + u32 sense_len; + union { + u8 raw[128]; + struct megasas_header hdr; + } frame; + + struct iovec sgl[MAX_IOCTL_SGE]; + +} __attribute__ ((packed)); + +struct megasas_aen { + u16 host_no; + u16 __pad1; + u32 seq_num; + u32 class_locale_word; +} __attribute__ ((packed)); + +#ifdef CONFIG_COMPAT +struct compat_megasas_iocpacket { + u16 host_no; + u16 __pad1; + u32 sgl_off; + u32 sge_count; + u32 sense_off; + u32 sense_len; + union { + u8 raw[128]; + struct megasas_header hdr; + } frame; + struct compat_iovec sgl[MAX_IOCTL_SGE]; +} __attribute__ ((packed)); + +#define MEGASAS_IOC_FIRMWARE _IOWR('M', 1, struct compat_megasas_iocpacket) +#else +#define MEGASAS_IOC_FIRMWARE _IOWR('M', 1, struct megasas_iocpacket) +#endif + +#define MEGASAS_IOC_GET_AEN _IOW('M', 3, struct megasas_aen) + +struct megasas_mgmt_info { + + u16 count; + struct megasas_instance *instance[MAX_MGMT_ADAPTERS]; + int max_index; +}; + +#endif /*LSI_MEGARAID_SAS_H */ diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index c49d28eca561..20fb79810c3c 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -185,6 +185,7 @@ #define PCI_DEVICE_ID_LSI_61C102 0x0901 #define PCI_DEVICE_ID_LSI_63C815 0x1000 #define PCI_DEVICE_ID_LSI_SAS1064 0x0050 +#define PCI_DEVICE_ID_LSI_SAS1064R 0x0411 #define PCI_DEVICE_ID_LSI_SAS1066 0x005E #define PCI_DEVICE_ID_LSI_SAS1068 0x0054 #define PCI_DEVICE_ID_LSI_SAS1064A 0x005C @@ -559,6 +560,7 @@ #define PCI_VENDOR_ID_DELL 0x1028 #define PCI_DEVICE_ID_DELL_RACIII 0x0008 #define PCI_DEVICE_ID_DELL_RAC4 0x0012 +#define PCI_DEVICE_ID_DELL_PERC5 0x0015 #define PCI_VENDOR_ID_MATROX 0x102B #define PCI_DEVICE_ID_MATROX_MGA_2 0x0518 -- cgit v1.2.3 From 9356b8fc07dc126cd91d2b12f314d760ab48996e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 27 Sep 2005 15:23:16 -0700 Subject: [NET]: Reorder some hot fields of struct net_device Place them on separate cache lines in SMP to lower memory bouncing between multiple CPU accessing the device. - One part is mostly used on receive path (including eth_type_trans()) (poll_list, poll, quota, weight, last_rx, dev_addr, broadcast) - One part is mostly used on queue transmit path (qdisc) (queue_lock, qdisc, qdisc_sleeping, qdisc_list, tx_queue_len) - One part is mostly used on xmit path (device) (xmit_lock, xmit_lock_owner, priv, hard_start_xmit, trans_start) 'features' is placed outside of these hot points, in a location that may be shared by all cpus (because mostly read) name_hlist is moved close to name[IFNAMSIZ] to speedup __dev_get_by_name() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 89 +++++++++++++++++++++++++++++------------------ 1 file changed, 55 insertions(+), 34 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7c717907896d..368e4c825ff1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -265,6 +265,8 @@ struct net_device * the interface. */ char name[IFNAMSIZ]; + /* device name hash chain */ + struct hlist_node name_hlist; /* * I/O specific fields @@ -292,6 +294,21 @@ struct net_device /* ------- Fields preinitialized in Space.c finish here ------- */ + /* Net device features */ + unsigned long features; +#define NETIF_F_SG 1 /* Scatter/gather IO. */ +#define NETIF_F_IP_CSUM 2 /* Can checksum only TCP/UDP over IPv4. */ +#define NETIF_F_NO_CSUM 4 /* Does not require checksum. F.e. loopack. */ +#define NETIF_F_HW_CSUM 8 /* Can checksum all the packets. */ +#define NETIF_F_HIGHDMA 32 /* Can DMA to high memory. */ +#define NETIF_F_FRAGLIST 64 /* Scatter/gather IO. */ +#define NETIF_F_HW_VLAN_TX 128 /* Transmit VLAN hw acceleration */ +#define NETIF_F_HW_VLAN_RX 256 /* Receive VLAN hw acceleration */ +#define NETIF_F_HW_VLAN_FILTER 512 /* Receive filtering on VLAN */ +#define NETIF_F_VLAN_CHALLENGED 1024 /* Device cannot handle VLAN packets */ +#define NETIF_F_TSO 2048 /* Can offload TCP/IP segmentation */ +#define NETIF_F_LLTX 4096 /* LockLess TX */ + struct net_device *next_sched; /* Interface index. Unique device identifier */ @@ -316,9 +333,6 @@ struct net_device * will (read: may be cleaned up at will). */ - /* These may be needed for future network-power-down code. */ - unsigned long trans_start; /* Time (in jiffies) of last Tx */ - unsigned long last_rx; /* Time of last Rx */ unsigned short flags; /* interface flags (a la BSD) */ unsigned short gflags; @@ -328,15 +342,12 @@ struct net_device unsigned mtu; /* interface MTU value */ unsigned short type; /* interface hardware type */ unsigned short hard_header_len; /* hardware hdr length */ - void *priv; /* pointer to private data */ struct net_device *master; /* Pointer to master device of a group, * which this device is member of. */ /* Interface address info. */ - unsigned char broadcast[MAX_ADDR_LEN]; /* hw bcast add */ - unsigned char dev_addr[MAX_ADDR_LEN]; /* hw address */ unsigned char perm_addr[MAX_ADDR_LEN]; /* permanent hw address */ unsigned char addr_len; /* hardware address length */ unsigned short dev_id; /* for shared network cards */ @@ -346,8 +357,6 @@ struct net_device int promiscuity; int allmulti; - int watchdog_timeo; - struct timer_list watchdog_timer; /* Protocol specific pointers */ @@ -358,32 +367,62 @@ struct net_device void *ec_ptr; /* Econet specific data */ void *ax25_ptr; /* AX.25 specific data */ - struct list_head poll_list; /* Link to poll list */ +/* + * Cache line mostly used on receive path (including eth_type_trans()) + */ + struct list_head poll_list ____cacheline_aligned_in_smp; + /* Link to poll list */ + + int (*poll) (struct net_device *dev, int *quota); int quota; int weight; + unsigned long last_rx; /* Time of last Rx */ + /* Interface address info used in eth_type_trans() */ + unsigned char dev_addr[MAX_ADDR_LEN]; /* hw address, (before bcast + because most packets are unicast) */ + + unsigned char broadcast[MAX_ADDR_LEN]; /* hw bcast add */ +/* + * Cache line mostly used on queue transmit path (qdisc) + */ + /* device queue lock */ + spinlock_t queue_lock ____cacheline_aligned_in_smp; struct Qdisc *qdisc; struct Qdisc *qdisc_sleeping; - struct Qdisc *qdisc_ingress; struct list_head qdisc_list; unsigned long tx_queue_len; /* Max frames per queue allowed */ /* ingress path synchronizer */ spinlock_t ingress_lock; + struct Qdisc *qdisc_ingress; + +/* + * One part is mostly used on xmit path (device) + */ /* hard_start_xmit synchronizer */ - spinlock_t xmit_lock; + spinlock_t xmit_lock ____cacheline_aligned_in_smp; /* cpu id of processor entered to hard_start_xmit or -1, if nobody entered there. */ int xmit_lock_owner; - /* device queue lock */ - spinlock_t queue_lock; + void *priv; /* pointer to private data */ + int (*hard_start_xmit) (struct sk_buff *skb, + struct net_device *dev); + /* These may be needed for future network-power-down code. */ + unsigned long trans_start; /* Time (in jiffies) of last Tx */ + + int watchdog_timeo; /* used by dev_watchdog() */ + struct timer_list watchdog_timer; + +/* + * refcnt is a very hot point, so align it on SMP + */ /* Number of references to this device */ - atomic_t refcnt; + atomic_t refcnt ____cacheline_aligned_in_smp; + /* delayed register/unregister */ struct list_head todo_list; - /* device name hash chain */ - struct hlist_node name_hlist; /* device index hash chain */ struct hlist_node index_hlist; @@ -396,21 +435,6 @@ struct net_device NETREG_RELEASED, /* called free_netdev */ } reg_state; - /* Net device features */ - unsigned long features; -#define NETIF_F_SG 1 /* Scatter/gather IO. */ -#define NETIF_F_IP_CSUM 2 /* Can checksum only TCP/UDP over IPv4. */ -#define NETIF_F_NO_CSUM 4 /* Does not require checksum. F.e. loopack. */ -#define NETIF_F_HW_CSUM 8 /* Can checksum all the packets. */ -#define NETIF_F_HIGHDMA 32 /* Can DMA to high memory. */ -#define NETIF_F_FRAGLIST 64 /* Scatter/gather IO. */ -#define NETIF_F_HW_VLAN_TX 128 /* Transmit VLAN hw acceleration */ -#define NETIF_F_HW_VLAN_RX 256 /* Receive VLAN hw acceleration */ -#define NETIF_F_HW_VLAN_FILTER 512 /* Receive filtering on VLAN */ -#define NETIF_F_VLAN_CHALLENGED 1024 /* Device cannot handle VLAN packets */ -#define NETIF_F_TSO 2048 /* Can offload TCP/IP segmentation */ -#define NETIF_F_LLTX 4096 /* LockLess TX */ - /* Called after device is detached from network. */ void (*uninit)(struct net_device *dev); /* Called after last user reference disappears. */ @@ -419,10 +443,7 @@ struct net_device /* Pointers to interface service routines. */ int (*open)(struct net_device *dev); int (*stop)(struct net_device *dev); - int (*hard_start_xmit) (struct sk_buff *skb, - struct net_device *dev); #define HAVE_NETDEV_POLL - int (*poll) (struct net_device *dev, int *quota); int (*hard_header) (struct sk_buff *skb, struct net_device *dev, unsigned short type, -- cgit v1.2.3 From 1f26dac32057baaf67d10b45c6b5277db862911d Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 27 Sep 2005 15:24:13 -0700 Subject: [NET]: Add Sun Cassini driver. Written by Adrian Sun (asun@darksunrising.com). Ported to 2.6.x by Tom 'spot' Callaway . Further cleaned up and integrated by David S. Miller Signed-off-by: David S. Miller --- drivers/net/Kconfig | 8 + drivers/net/Makefile | 1 + drivers/net/cassini.c | 5311 +++++++++++++++++++++++++++++++++++++++++++++++ drivers/net/cassini.h | 4425 +++++++++++++++++++++++++++++++++++++++ include/linux/pci_ids.h | 2 + 5 files changed, 9747 insertions(+) create mode 100644 drivers/net/cassini.c create mode 100644 drivers/net/cassini.h (limited to 'include/linux') diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 96f14ab1c1f5..2a908c4690a7 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -548,6 +548,14 @@ config SUNGEM Support for the Sun GEM chip, aka Sun GigabitEthernet/P 2.0. See also . +config CASSINI + tristate "Sun Cassini support" + depends on NET_ETHERNET && PCI + select CRC32 + help + Support for the Sun Cassini chip, aka Sun GigaSwift Ethernet. See also + + config NET_VENDOR_3COM bool "3COM cards" depends on NET_ETHERNET && (ISA || EISA || MCA || PCI) diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 8645c843cf4d..8aeec9f2495b 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -28,6 +28,7 @@ obj-$(CONFIG_SUNQE) += sunqe.o obj-$(CONFIG_SUNBMAC) += sunbmac.o obj-$(CONFIG_MYRI_SBUS) += myri_sbus.o obj-$(CONFIG_SUNGEM) += sungem.o sungem_phy.o +obj-$(CONFIG_CASSINI) += cassini.o obj-$(CONFIG_MACE) += mace.o obj-$(CONFIG_BMAC) += bmac.o diff --git a/drivers/net/cassini.c b/drivers/net/cassini.c new file mode 100644 index 000000000000..69cb368247e7 --- /dev/null +++ b/drivers/net/cassini.c @@ -0,0 +1,5311 @@ +/* cassini.c: Sun Microsystems Cassini(+) ethernet driver. + * + * Copyright (C) 2004 Sun Microsystems Inc. + * Copyright (C) 2003 Adrian Sun (asun@darksunrising.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + * + * This driver uses the sungem driver (c) David Miller + * (davem@redhat.com) as its basis. + * + * The cassini chip has a number of features that distinguish it from + * the gem chip: + * 4 transmit descriptor rings that are used for either QoS (VLAN) or + * load balancing (non-VLAN mode) + * batching of multiple packets + * multiple CPU dispatching + * page-based RX descriptor engine with separate completion rings + * Gigabit support (GMII and PCS interface) + * MIF link up/down detection works + * + * RX is handled by page sized buffers that are attached as fragments to + * the skb. here's what's done: + * -- driver allocates pages at a time and keeps reference counts + * on them. + * -- the upper protocol layers assume that the header is in the skb + * itself. as a result, cassini will copy a small amount (64 bytes) + * to make them happy. + * -- driver appends the rest of the data pages as frags to skbuffs + * and increments the reference count + * -- on page reclamation, the driver swaps the page with a spare page. + * if that page is still in use, it frees its reference to that page, + * and allocates a new page for use. otherwise, it just recycles the + * the page. + * + * NOTE: cassini can parse the header. however, it's not worth it + * as long as the network stack requires a header copy. + * + * TX has 4 queues. currently these queues are used in a round-robin + * fashion for load balancing. They can also be used for QoS. for that + * to work, however, QoS information needs to be exposed down to the driver + * level so that subqueues get targetted to particular transmit rings. + * alternatively, the queues can be configured via use of the all-purpose + * ioctl. + * + * RX DATA: the rx completion ring has all the info, but the rx desc + * ring has all of the data. RX can conceivably come in under multiple + * interrupts, but the INT# assignment needs to be set up properly by + * the BIOS and conveyed to the driver. PCI BIOSes don't know how to do + * that. also, the two descriptor rings are designed to distinguish between + * encrypted and non-encrypted packets, but we use them for buffering + * instead. + * + * by default, the selective clear mask is set up to process rx packets. + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include + +#define cas_page_map(x) kmap_atomic((x), KM_SKB_DATA_SOFTIRQ) +#define cas_page_unmap(x) kunmap_atomic((x), KM_SKB_DATA_SOFTIRQ) +#define CAS_NCPUS num_online_cpus() + +#if defined(CONFIG_CASSINI_NAPI) && defined(HAVE_NETDEV_POLL) +#define USE_NAPI +#define cas_skb_release(x) netif_receive_skb(x) +#else +#define cas_skb_release(x) netif_rx(x) +#endif + +/* select which firmware to use */ +#define USE_HP_WORKAROUND +#define HP_WORKAROUND_DEFAULT /* select which firmware to use as default */ +#define CAS_HP_ALT_FIRMWARE cas_prog_null /* alternate firmware */ + +#include "cassini.h" + +#define USE_TX_COMPWB /* use completion writeback registers */ +#define USE_CSMA_CD_PROTO /* standard CSMA/CD */ +#define USE_RX_BLANK /* hw interrupt mitigation */ +#undef USE_ENTROPY_DEV /* don't test for entropy device */ + +/* NOTE: these aren't useable unless PCI interrupts can be assigned. + * also, we need to make cp->lock finer-grained. + */ +#undef USE_PCI_INTB +#undef USE_PCI_INTC +#undef USE_PCI_INTD +#undef USE_QOS + +#undef USE_VPD_DEBUG /* debug vpd information if defined */ + +/* rx processing options */ +#define USE_PAGE_ORDER /* specify to allocate large rx pages */ +#define RX_DONT_BATCH 0 /* if 1, don't batch flows */ +#define RX_COPY_ALWAYS 0 /* if 0, use frags */ +#define RX_COPY_MIN 64 /* copy a little to make upper layers happy */ +#undef RX_COUNT_BUFFERS /* define to calculate RX buffer stats */ + +#define DRV_MODULE_NAME "cassini" +#define PFX DRV_MODULE_NAME ": " +#define DRV_MODULE_VERSION "1.4" +#define DRV_MODULE_RELDATE "1 July 2004" + +#define CAS_DEF_MSG_ENABLE \ + (NETIF_MSG_DRV | \ + NETIF_MSG_PROBE | \ + NETIF_MSG_LINK | \ + NETIF_MSG_TIMER | \ + NETIF_MSG_IFDOWN | \ + NETIF_MSG_IFUP | \ + NETIF_MSG_RX_ERR | \ + NETIF_MSG_TX_ERR) + +/* length of time before we decide the hardware is borked, + * and dev->tx_timeout() should be called to fix the problem + */ +#define CAS_TX_TIMEOUT (HZ) +#define CAS_LINK_TIMEOUT (22*HZ/10) +#define CAS_LINK_FAST_TIMEOUT (1) + +/* timeout values for state changing. these specify the number + * of 10us delays to be used before giving up. + */ +#define STOP_TRIES_PHY 1000 +#define STOP_TRIES 5000 + +/* specify a minimum frame size to deal with some fifo issues + * max mtu == 2 * page size - ethernet header - 64 - swivel = + * 2 * page_size - 0x50 + */ +#define CAS_MIN_FRAME 97 +#define CAS_1000MB_MIN_FRAME 255 +#define CAS_MIN_MTU 60 +#define CAS_MAX_MTU min(((cp->page_size << 1) - 0x50), 9000) + +#if 1 +/* + * Eliminate these and use separate atomic counters for each, to + * avoid a race condition. + */ +#else +#define CAS_RESET_MTU 1 +#define CAS_RESET_ALL 2 +#define CAS_RESET_SPARE 3 +#endif + +static char version[] __devinitdata = + DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n"; + +MODULE_AUTHOR("Adrian Sun (asun@darksunrising.com)"); +MODULE_DESCRIPTION("Sun Cassini(+) ethernet driver"); +MODULE_LICENSE("GPL"); +MODULE_PARM(cassini_debug, "i"); +MODULE_PARM_DESC(cassini_debug, "Cassini bitmapped debugging message enable value"); +MODULE_PARM(link_mode, "i"); +MODULE_PARM_DESC(link_mode, "default link mode"); + +/* + * Work around for a PCS bug in which the link goes down due to the chip + * being confused and never showing a link status of "up." + */ +#define DEFAULT_LINKDOWN_TIMEOUT 5 +/* + * Value in seconds, for user input. + */ +static int linkdown_timeout = DEFAULT_LINKDOWN_TIMEOUT; +MODULE_PARM(linkdown_timeout, "i"); +MODULE_PARM_DESC(linkdown_timeout, +"min reset interval in sec. for PCS linkdown issue; disabled if not positive"); + +/* + * value in 'ticks' (units used by jiffies). Set when we init the + * module because 'HZ' in actually a function call on some flavors of + * Linux. This will default to DEFAULT_LINKDOWN_TIMEOUT * HZ. + */ +static int link_transition_timeout; + + +static int cassini_debug = -1; /* -1 == use CAS_DEF_MSG_ENABLE as value */ +static int link_mode; + +static u16 link_modes[] __devinitdata = { + BMCR_ANENABLE, /* 0 : autoneg */ + 0, /* 1 : 10bt half duplex */ + BMCR_SPEED100, /* 2 : 100bt half duplex */ + BMCR_FULLDPLX, /* 3 : 10bt full duplex */ + BMCR_SPEED100|BMCR_FULLDPLX, /* 4 : 100bt full duplex */ + CAS_BMCR_SPEED1000|BMCR_FULLDPLX /* 5 : 1000bt full duplex */ +}; + +static struct pci_device_id cas_pci_tbl[] __devinitdata = { + { PCI_VENDOR_ID_SUN, PCI_DEVICE_ID_SUN_CASSINI, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, + { PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SATURN, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, + { 0, } +}; + +MODULE_DEVICE_TABLE(pci, cas_pci_tbl); + +static void cas_set_link_modes(struct cas *cp); + +static inline void cas_lock_tx(struct cas *cp) +{ + int i; + + for (i = 0; i < N_TX_RINGS; i++) + spin_lock(&cp->tx_lock[i]); +} + +static inline void cas_lock_all(struct cas *cp) +{ + spin_lock_irq(&cp->lock); + cas_lock_tx(cp); +} + +/* WTZ: QA was finding deadlock problems with the previous + * versions after long test runs with multiple cards per machine. + * See if replacing cas_lock_all with safer versions helps. The + * symptoms QA is reporting match those we'd expect if interrupts + * aren't being properly restored, and we fixed a previous deadlock + * with similar symptoms by using save/restore versions in other + * places. + */ +#define cas_lock_all_save(cp, flags) \ +do { \ + struct cas *xxxcp = (cp); \ + spin_lock_irqsave(&xxxcp->lock, flags); \ + cas_lock_tx(xxxcp); \ +} while (0) + +static inline void cas_unlock_tx(struct cas *cp) +{ + int i; + + for (i = N_TX_RINGS; i > 0; i--) + spin_unlock(&cp->tx_lock[i - 1]); +} + +static inline void cas_unlock_all(struct cas *cp) +{ + cas_unlock_tx(cp); + spin_unlock_irq(&cp->lock); +} + +#define cas_unlock_all_restore(cp, flags) \ +do { \ + struct cas *xxxcp = (cp); \ + cas_unlock_tx(xxxcp); \ + spin_unlock_irqrestore(&xxxcp->lock, flags); \ +} while (0) + +static void cas_disable_irq(struct cas *cp, const int ring) +{ + /* Make sure we won't get any more interrupts */ + if (ring == 0) { + writel(0xFFFFFFFF, cp->regs + REG_INTR_MASK); + return; + } + + /* disable completion interrupts and selectively mask */ + if (cp->cas_flags & CAS_FLAG_REG_PLUS) { + switch (ring) { +#if defined (USE_PCI_INTB) || defined(USE_PCI_INTC) || defined(USE_PCI_INTD) +#ifdef USE_PCI_INTB + case 1: +#endif +#ifdef USE_PCI_INTC + case 2: +#endif +#ifdef USE_PCI_INTD + case 3: +#endif + writel(INTRN_MASK_CLEAR_ALL | INTRN_MASK_RX_EN, + cp->regs + REG_PLUS_INTRN_MASK(ring)); + break; +#endif + default: + writel(INTRN_MASK_CLEAR_ALL, cp->regs + + REG_PLUS_INTRN_MASK(ring)); + break; + } + } +} + +static inline void cas_mask_intr(struct cas *cp) +{ + int i; + + for (i = 0; i < N_RX_COMP_RINGS; i++) + cas_disable_irq(cp, i); +} + +static void cas_enable_irq(struct cas *cp, const int ring) +{ + if (ring == 0) { /* all but TX_DONE */ + writel(INTR_TX_DONE, cp->regs + REG_INTR_MASK); + return; + } + + if (cp->cas_flags & CAS_FLAG_REG_PLUS) { + switch (ring) { +#if defined (USE_PCI_INTB) || defined(USE_PCI_INTC) || defined(USE_PCI_INTD) +#ifdef USE_PCI_INTB + case 1: +#endif +#ifdef USE_PCI_INTC + case 2: +#endif +#ifdef USE_PCI_INTD + case 3: +#endif + writel(INTRN_MASK_RX_EN, cp->regs + + REG_PLUS_INTRN_MASK(ring)); + break; +#endif + default: + break; + } + } +} + +static inline void cas_unmask_intr(struct cas *cp) +{ + int i; + + for (i = 0; i < N_RX_COMP_RINGS; i++) + cas_enable_irq(cp, i); +} + +static inline void cas_entropy_gather(struct cas *cp) +{ +#ifdef USE_ENTROPY_DEV + if ((cp->cas_flags & CAS_FLAG_ENTROPY_DEV) == 0) + return; + + batch_entropy_store(readl(cp->regs + REG_ENTROPY_IV), + readl(cp->regs + REG_ENTROPY_IV), + sizeof(uint64_t)*8); +#endif +} + +static inline void cas_entropy_reset(struct cas *cp) +{ +#ifdef USE_ENTROPY_DEV + if ((cp->cas_flags & CAS_FLAG_ENTROPY_DEV) == 0) + return; + + writel(BIM_LOCAL_DEV_PAD | BIM_LOCAL_DEV_PROM | BIM_LOCAL_DEV_EXT, + cp->regs + REG_BIM_LOCAL_DEV_EN); + writeb(ENTROPY_RESET_STC_MODE, cp->regs + REG_ENTROPY_RESET); + writeb(0x55, cp->regs + REG_ENTROPY_RAND_REG); + + /* if we read back 0x0, we don't have an entropy device */ + if (readb(cp->regs + REG_ENTROPY_RAND_REG) == 0) + cp->cas_flags &= ~CAS_FLAG_ENTROPY_DEV; +#endif +} + +/* access to the phy. the following assumes that we've initialized the MIF to + * be in frame rather than bit-bang mode + */ +static u16 cas_phy_read(struct cas *cp, int reg) +{ + u32 cmd; + int limit = STOP_TRIES_PHY; + + cmd = MIF_FRAME_ST | MIF_FRAME_OP_READ; + cmd |= CAS_BASE(MIF_FRAME_PHY_ADDR, cp->phy_addr); + cmd |= CAS_BASE(MIF_FRAME_REG_ADDR, reg); + cmd |= MIF_FRAME_TURN_AROUND_MSB; + writel(cmd, cp->regs + REG_MIF_FRAME); + + /* poll for completion */ + while (limit-- > 0) { + udelay(10); + cmd = readl(cp->regs + REG_MIF_FRAME); + if (cmd & MIF_FRAME_TURN_AROUND_LSB) + return (cmd & MIF_FRAME_DATA_MASK); + } + return 0xFFFF; /* -1 */ +} + +static int cas_phy_write(struct cas *cp, int reg, u16 val) +{ + int limit = STOP_TRIES_PHY; + u32 cmd; + + cmd = MIF_FRAME_ST | MIF_FRAME_OP_WRITE; + cmd |= CAS_BASE(MIF_FRAME_PHY_ADDR, cp->phy_addr); + cmd |= CAS_BASE(MIF_FRAME_REG_ADDR, reg); + cmd |= MIF_FRAME_TURN_AROUND_MSB; + cmd |= val & MIF_FRAME_DATA_MASK; + writel(cmd, cp->regs + REG_MIF_FRAME); + + /* poll for completion */ + while (limit-- > 0) { + udelay(10); + cmd = readl(cp->regs + REG_MIF_FRAME); + if (cmd & MIF_FRAME_TURN_AROUND_LSB) + return 0; + } + return -1; +} + +static void cas_phy_powerup(struct cas *cp) +{ + u16 ctl = cas_phy_read(cp, MII_BMCR); + + if ((ctl & BMCR_PDOWN) == 0) + return; + ctl &= ~BMCR_PDOWN; + cas_phy_write(cp, MII_BMCR, ctl); +} + +static void cas_phy_powerdown(struct cas *cp) +{ + u16 ctl = cas_phy_read(cp, MII_BMCR); + + if (ctl & BMCR_PDOWN) + return; + ctl |= BMCR_PDOWN; + cas_phy_write(cp, MII_BMCR, ctl); +} + +/* cp->lock held. note: the last put_page will free the buffer */ +static int cas_page_free(struct cas *cp, cas_page_t *page) +{ + pci_unmap_page(cp->pdev, page->dma_addr, cp->page_size, + PCI_DMA_FROMDEVICE); + __free_pages(page->buffer, cp->page_order); + kfree(page); + return 0; +} + +#ifdef RX_COUNT_BUFFERS +#define RX_USED_ADD(x, y) ((x)->used += (y)) +#define RX_USED_SET(x, y) ((x)->used = (y)) +#else +#define RX_USED_ADD(x, y) +#define RX_USED_SET(x, y) +#endif + +/* local page allocation routines for the receive buffers. jumbo pages + * require at least 8K contiguous and 8K aligned buffers. + */ +static cas_page_t *cas_page_alloc(struct cas *cp, const int flags) +{ + cas_page_t *page; + + page = kmalloc(sizeof(cas_page_t), flags); + if (!page) + return NULL; + + INIT_LIST_HEAD(&page->list); + RX_USED_SET(page, 0); + page->buffer = alloc_pages(flags, cp->page_order); + if (!page->buffer) + goto page_err; + page->dma_addr = pci_map_page(cp->pdev, page->buffer, 0, + cp->page_size, PCI_DMA_FROMDEVICE); + return page; + +page_err: + kfree(page); + return NULL; +} + +/* initialize spare pool of rx buffers, but allocate during the open */ +static void cas_spare_init(struct cas *cp) +{ + spin_lock(&cp->rx_inuse_lock); + INIT_LIST_HEAD(&cp->rx_inuse_list); + spin_unlock(&cp->rx_inuse_lock); + + spin_lock(&cp->rx_spare_lock); + INIT_LIST_HEAD(&cp->rx_spare_list); + cp->rx_spares_needed = RX_SPARE_COUNT; + spin_unlock(&cp->rx_spare_lock); +} + +/* used on close. free all the spare buffers. */ +static void cas_spare_free(struct cas *cp) +{ + struct list_head list, *elem, *tmp; + + /* free spare buffers */ + INIT_LIST_HEAD(&list); + spin_lock(&cp->rx_spare_lock); + list_splice(&cp->rx_spare_list, &list); + INIT_LIST_HEAD(&cp->rx_spare_list); + spin_unlock(&cp->rx_spare_lock); + list_for_each_safe(elem, tmp, &list) { + cas_page_free(cp, list_entry(elem, cas_page_t, list)); + } + + INIT_LIST_HEAD(&list); +#if 1 + /* + * Looks like Adrian had protected this with a different + * lock than used everywhere else to manipulate this list. + */ + spin_lock(&cp->rx_inuse_lock); + list_splice(&cp->rx_inuse_list, &list); + INIT_LIST_HEAD(&cp->rx_inuse_list); + spin_unlock(&cp->rx_inuse_lock); +#else + spin_lock(&cp->rx_spare_lock); + list_splice(&cp->rx_inuse_list, &list); + INIT_LIST_HEAD(&cp->rx_inuse_list); + spin_unlock(&cp->rx_spare_lock); +#endif + list_for_each_safe(elem, tmp, &list) { + cas_page_free(cp, list_entry(elem, cas_page_t, list)); + } +} + +/* replenish spares if needed */ +static void cas_spare_recover(struct cas *cp, const int flags) +{ + struct list_head list, *elem, *tmp; + int needed, i; + + /* check inuse list. if we don't need any more free buffers, + * just free it + */ + + /* make a local copy of the list */ + INIT_LIST_HEAD(&list); + spin_lock(&cp->rx_inuse_lock); + list_splice(&cp->rx_inuse_list, &list); + INIT_LIST_HEAD(&cp->rx_inuse_list); + spin_unlock(&cp->rx_inuse_lock); + + list_for_each_safe(elem, tmp, &list) { + cas_page_t *page = list_entry(elem, cas_page_t, list); + + if (page_count(page->buffer) > 1) + continue; + + list_del(elem); + spin_lock(&cp->rx_spare_lock); + if (cp->rx_spares_needed > 0) { + list_add(elem, &cp->rx_spare_list); + cp->rx_spares_needed--; + spin_unlock(&cp->rx_spare_lock); + } else { + spin_unlock(&cp->rx_spare_lock); + cas_page_free(cp, page); + } + } + + /* put any inuse buffers back on the list */ + if (!list_empty(&list)) { + spin_lock(&cp->rx_inuse_lock); + list_splice(&list, &cp->rx_inuse_list); + spin_unlock(&cp->rx_inuse_lock); + } + + spin_lock(&cp->rx_spare_lock); + needed = cp->rx_spares_needed; + spin_unlock(&cp->rx_spare_lock); + if (!needed) + return; + + /* we still need spares, so try to allocate some */ + INIT_LIST_HEAD(&list); + i = 0; + while (i < needed) { + cas_page_t *spare = cas_page_alloc(cp, flags); + if (!spare) + break; + list_add(&spare->list, &list); + i++; + } + + spin_lock(&cp->rx_spare_lock); + list_splice(&list, &cp->rx_spare_list); + cp->rx_spares_needed -= i; + spin_unlock(&cp->rx_spare_lock); +} + +/* pull a page from the list. */ +static cas_page_t *cas_page_dequeue(struct cas *cp) +{ + struct list_head *entry; + int recover; + + spin_lock(&cp->rx_spare_lock); + if (list_empty(&cp->rx_spare_list)) { + /* try to do a quick recovery */ + spin_unlock(&cp->rx_spare_lock); + cas_spare_recover(cp, GFP_ATOMIC); + spin_lock(&cp->rx_spare_lock); + if (list_empty(&cp->rx_spare_list)) { + if (netif_msg_rx_err(cp)) + printk(KERN_ERR "%s: no spare buffers " + "available.\n", cp->dev->name); + spin_unlock(&cp->rx_spare_lock); + return NULL; + } + } + + entry = cp->rx_spare_list.next; + list_del(entry); + recover = ++cp->rx_spares_needed; + spin_unlock(&cp->rx_spare_lock); + + /* trigger the timer to do the recovery */ + if ((recover & (RX_SPARE_RECOVER_VAL - 1)) == 0) { +#if 1 + atomic_inc(&cp->reset_task_pending); + atomic_inc(&cp->reset_task_pending_spare); + schedule_work(&cp->reset_task); +#else + atomic_set(&cp->reset_task_pending, CAS_RESET_SPARE); + schedule_work(&cp->reset_task); +#endif + } + return list_entry(entry, cas_page_t, list); +} + + +static void cas_mif_poll(struct cas *cp, const int enable) +{ + u32 cfg; + + cfg = readl(cp->regs + REG_MIF_CFG); + cfg &= (MIF_CFG_MDIO_0 | MIF_CFG_MDIO_1); + + if (cp->phy_type & CAS_PHY_MII_MDIO1) + cfg |= MIF_CFG_PHY_SELECT; + + /* poll and interrupt on link status change. */ + if (enable) { + cfg |= MIF_CFG_POLL_EN; + cfg |= CAS_BASE(MIF_CFG_POLL_REG, MII_BMSR); + cfg |= CAS_BASE(MIF_CFG_POLL_PHY, cp->phy_addr); + } + writel((enable) ? ~(BMSR_LSTATUS | BMSR_ANEGCOMPLETE) : 0xFFFF, + cp->regs + REG_MIF_MASK); + writel(cfg, cp->regs + REG_MIF_CFG); +} + +/* Must be invoked under cp->lock */ +static void cas_begin_auto_negotiation(struct cas *cp, struct ethtool_cmd *ep) +{ + u16 ctl; +#if 1 + int lcntl; + int changed = 0; + int oldstate = cp->lstate; + int link_was_not_down = !(oldstate == link_down); +#endif + /* Setup link parameters */ + if (!ep) + goto start_aneg; + lcntl = cp->link_cntl; + if (ep->autoneg == AUTONEG_ENABLE) + cp->link_cntl = BMCR_ANENABLE; + else { + cp->link_cntl = 0; + if (ep->speed == SPEED_100) + cp->link_cntl |= BMCR_SPEED100; + else if (ep->speed == SPEED_1000) + cp->link_cntl |= CAS_BMCR_SPEED1000; + if (ep->duplex == DUPLEX_FULL) + cp->link_cntl |= BMCR_FULLDPLX; + } +#if 1 + changed = (lcntl != cp->link_cntl); +#endif +start_aneg: + if (cp->lstate == link_up) { + printk(KERN_INFO "%s: PCS link down.\n", + cp->dev->name); + } else { + if (changed) { + printk(KERN_INFO "%s: link configuration changed\n", + cp->dev->name); + } + } + cp->lstate = link_down; + cp->link_transition = LINK_TRANSITION_LINK_DOWN; + if (!cp->hw_running) + return; +#if 1 + /* + * WTZ: If the old state was link_up, we turn off the carrier + * to replicate everything we do elsewhere on a link-down + * event when we were already in a link-up state.. + */ + if (oldstate == link_up) + netif_carrier_off(cp->dev); + if (changed && link_was_not_down) { + /* + * WTZ: This branch will simply schedule a full reset after + * we explicitly changed link modes in an ioctl. See if this + * fixes the link-problems we were having for forced mode. + */ + atomic_inc(&cp->reset_task_pending); + atomic_inc(&cp->reset_task_pending_all); + schedule_work(&cp->reset_task); + cp->timer_ticks = 0; + mod_timer(&cp->link_timer, jiffies + CAS_LINK_TIMEOUT); + return; + } +#endif + if (cp->phy_type & CAS_PHY_SERDES) { + u32 val = readl(cp->regs + REG_PCS_MII_CTRL); + + if (cp->link_cntl & BMCR_ANENABLE) { + val |= (PCS_MII_RESTART_AUTONEG | PCS_MII_AUTONEG_EN); + cp->lstate = link_aneg; + } else { + if (cp->link_cntl & BMCR_FULLDPLX) + val |= PCS_MII_CTRL_DUPLEX; + val &= ~PCS_MII_AUTONEG_EN; + cp->lstate = link_force_ok; + } + cp->link_transition = LINK_TRANSITION_LINK_CONFIG; + writel(val, cp->regs + REG_PCS_MII_CTRL); + + } else { + cas_mif_poll(cp, 0); + ctl = cas_phy_read(cp, MII_BMCR); + ctl &= ~(BMCR_FULLDPLX | BMCR_SPEED100 | + CAS_BMCR_SPEED1000 | BMCR_ANENABLE); + ctl |= cp->link_cntl; + if (ctl & BMCR_ANENABLE) { + ctl |= BMCR_ANRESTART; + cp->lstate = link_aneg; + } else { + cp->lstate = link_force_ok; + } + cp->link_transition = LINK_TRANSITION_LINK_CONFIG; + cas_phy_write(cp, MII_BMCR, ctl); + cas_mif_poll(cp, 1); + } + + cp->timer_ticks = 0; + mod_timer(&cp->link_timer, jiffies + CAS_LINK_TIMEOUT); +} + +/* Must be invoked under cp->lock. */ +static int cas_reset_mii_phy(struct cas *cp) +{ + int limit = STOP_TRIES_PHY; + u16 val; + + cas_phy_write(cp, MII_BMCR, BMCR_RESET); + udelay(100); + while (limit--) { + val = cas_phy_read(cp, MII_BMCR); + if ((val & BMCR_RESET) == 0) + break; + udelay(10); + } + return (limit <= 0); +} + +static void cas_saturn_firmware_load(struct cas *cp) +{ + cas_saturn_patch_t *patch = cas_saturn_patch; + + cas_phy_powerdown(cp); + + /* expanded memory access mode */ + cas_phy_write(cp, DP83065_MII_MEM, 0x0); + + /* pointer configuration for new firmware */ + cas_phy_write(cp, DP83065_MII_REGE, 0x8ff9); + cas_phy_write(cp, DP83065_MII_REGD, 0xbd); + cas_phy_write(cp, DP83065_MII_REGE, 0x8ffa); + cas_phy_write(cp, DP83065_MII_REGD, 0x82); + cas_phy_write(cp, DP83065_MII_REGE, 0x8ffb); + cas_phy_write(cp, DP83065_MII_REGD, 0x0); + cas_phy_write(cp, DP83065_MII_REGE, 0x8ffc); + cas_phy_write(cp, DP83065_MII_REGD, 0x39); + + /* download new firmware */ + cas_phy_write(cp, DP83065_MII_MEM, 0x1); + cas_phy_write(cp, DP83065_MII_REGE, patch->addr); + while (patch->addr) { + cas_phy_write(cp, DP83065_MII_REGD, patch->val); + patch++; + } + + /* enable firmware */ + cas_phy_write(cp, DP83065_MII_REGE, 0x8ff8); + cas_phy_write(cp, DP83065_MII_REGD, 0x1); +} + + +/* phy initialization */ +static void cas_phy_init(struct cas *cp) +{ + u16 val; + + /* if we're in MII/GMII mode, set up phy */ + if (CAS_PHY_MII(cp->phy_type)) { + writel(PCS_DATAPATH_MODE_MII, + cp->regs + REG_PCS_DATAPATH_MODE); + + cas_mif_poll(cp, 0); + cas_reset_mii_phy(cp); /* take out of isolate mode */ + + if (PHY_LUCENT_B0 == cp->phy_id) { + /* workaround link up/down issue with lucent */ + cas_phy_write(cp, LUCENT_MII_REG, 0x8000); + cas_phy_write(cp, MII_BMCR, 0x00f1); + cas_phy_write(cp, LUCENT_MII_REG, 0x0); + + } else if (PHY_BROADCOM_B0 == (cp->phy_id & 0xFFFFFFFC)) { + /* workarounds for broadcom phy */ + cas_phy_write(cp, BROADCOM_MII_REG8, 0x0C20); + cas_phy_write(cp, BROADCOM_MII_REG7, 0x0012); + cas_phy_write(cp, BROADCOM_MII_REG5, 0x1804); + cas_phy_write(cp, BROADCOM_MII_REG7, 0x0013); + cas_phy_write(cp, BROADCOM_MII_REG5, 0x1204); + cas_phy_write(cp, BROADCOM_MII_REG7, 0x8006); + cas_phy_write(cp, BROADCOM_MII_REG5, 0x0132); + cas_phy_write(cp, BROADCOM_MII_REG7, 0x8006); + cas_phy_write(cp, BROADCOM_MII_REG5, 0x0232); + cas_phy_write(cp, BROADCOM_MII_REG7, 0x201F); + cas_phy_write(cp, BROADCOM_MII_REG5, 0x0A20); + + } else if (PHY_BROADCOM_5411 == cp->phy_id) { + val = cas_phy_read(cp, BROADCOM_MII_REG4); + val = cas_phy_read(cp, BROADCOM_MII_REG4); + if (val & 0x0080) { + /* link workaround */ + cas_phy_write(cp, BROADCOM_MII_REG4, + val & ~0x0080); + } + + } else if (cp->cas_flags & CAS_FLAG_SATURN) { + writel((cp->phy_type & CAS_PHY_MII_MDIO0) ? + SATURN_PCFG_FSI : 0x0, + cp->regs + REG_SATURN_PCFG); + + /* load firmware to address 10Mbps auto-negotiation + * issue. NOTE: this will need to be changed if the + * default firmware gets fixed. + */ + if (PHY_NS_DP83065 == cp->phy_id) { + cas_saturn_firmware_load(cp); + } + cas_phy_powerup(cp); + } + + /* advertise capabilities */ + val = cas_phy_read(cp, MII_BMCR); + val &= ~BMCR_ANENABLE; + cas_phy_write(cp, MII_BMCR, val); + udelay(10); + + cas_phy_write(cp, MII_ADVERTISE, + cas_phy_read(cp, MII_ADVERTISE) | + (ADVERTISE_10HALF | ADVERTISE_10FULL | + ADVERTISE_100HALF | ADVERTISE_100FULL | + CAS_ADVERTISE_PAUSE | + CAS_ADVERTISE_ASYM_PAUSE)); + + if (cp->cas_flags & CAS_FLAG_1000MB_CAP) { + /* make sure that we don't advertise half + * duplex to avoid a chip issue + */ + val = cas_phy_read(cp, CAS_MII_1000_CTRL); + val &= ~CAS_ADVERTISE_1000HALF; + val |= CAS_ADVERTISE_1000FULL; + cas_phy_write(cp, CAS_MII_1000_CTRL, val); + } + + } else { + /* reset pcs for serdes */ + u32 val; + int limit; + + writel(PCS_DATAPATH_MODE_SERDES, + cp->regs + REG_PCS_DATAPATH_MODE); + + /* enable serdes pins on saturn */ + if (cp->cas_flags & CAS_FLAG_SATURN) + writel(0, cp->regs + REG_SATURN_PCFG); + + /* Reset PCS unit. */ + val = readl(cp->regs + REG_PCS_MII_CTRL); + val |= PCS_MII_RESET; + writel(val, cp->regs + REG_PCS_MII_CTRL); + + limit = STOP_TRIES; + while (limit-- > 0) { + udelay(10); + if ((readl(cp->regs + REG_PCS_MII_CTRL) & + PCS_MII_RESET) == 0) + break; + } + if (limit <= 0) + printk(KERN_WARNING "%s: PCS reset bit would not " + "clear [%08x].\n", cp->dev->name, + readl(cp->regs + REG_PCS_STATE_MACHINE)); + + /* Make sure PCS is disabled while changing advertisement + * configuration. + */ + writel(0x0, cp->regs + REG_PCS_CFG); + + /* Advertise all capabilities except half-duplex. */ + val = readl(cp->regs + REG_PCS_MII_ADVERT); + val &= ~PCS_MII_ADVERT_HD; + val |= (PCS_MII_ADVERT_FD | PCS_MII_ADVERT_SYM_PAUSE | + PCS_MII_ADVERT_ASYM_PAUSE); + writel(val, cp->regs + REG_PCS_MII_ADVERT); + + /* enable PCS */ + writel(PCS_CFG_EN, cp->regs + REG_PCS_CFG); + + /* pcs workaround: enable sync detect */ + writel(PCS_SERDES_CTRL_SYNCD_EN, + cp->regs + REG_PCS_SERDES_CTRL); + } +} + + +static int cas_pcs_link_check(struct cas *cp) +{ + u32 stat, state_machine; + int retval = 0; + + /* The link status bit latches on zero, so you must + * read it twice in such a case to see a transition + * to the link being up. + */ + stat = readl(cp->regs + REG_PCS_MII_STATUS); + if ((stat & PCS_MII_STATUS_LINK_STATUS) == 0) + stat = readl(cp->regs + REG_PCS_MII_STATUS); + + /* The remote-fault indication is only valid + * when autoneg has completed. + */ + if ((stat & (PCS_MII_STATUS_AUTONEG_COMP | + PCS_MII_STATUS_REMOTE_FAULT)) == + (PCS_MII_STATUS_AUTONEG_COMP | PCS_MII_STATUS_REMOTE_FAULT)) { + if (netif_msg_link(cp)) + printk(KERN_INFO "%s: PCS RemoteFault\n", + cp->dev->name); + } + + /* work around link detection issue by querying the PCS state + * machine directly. + */ + state_machine = readl(cp->regs + REG_PCS_STATE_MACHINE); + if ((state_machine & PCS_SM_LINK_STATE_MASK) != SM_LINK_STATE_UP) { + stat &= ~PCS_MII_STATUS_LINK_STATUS; + } else if (state_machine & PCS_SM_WORD_SYNC_STATE_MASK) { + stat |= PCS_MII_STATUS_LINK_STATUS; + } + + if (stat & PCS_MII_STATUS_LINK_STATUS) { + if (cp->lstate != link_up) { + if (cp->opened) { + cp->lstate = link_up; + cp->link_transition = LINK_TRANSITION_LINK_UP; + + cas_set_link_modes(cp); + netif_carrier_on(cp->dev); + } + } + } else if (cp->lstate == link_up) { + cp->lstate = link_down; + if (link_transition_timeout != 0 && + cp->link_transition != LINK_TRANSITION_REQUESTED_RESET && + !cp->link_transition_jiffies_valid) { + /* + * force a reset, as a workaround for the + * link-failure problem. May want to move this to a + * point a bit earlier in the sequence. If we had + * generated a reset a short time ago, we'll wait for + * the link timer to check the status until a + * timer expires (link_transistion_jiffies_valid is + * true when the timer is running.) Instead of using + * a system timer, we just do a check whenever the + * link timer is running - this clears the flag after + * a suitable delay. + */ + retval = 1; + cp->link_transition = LINK_TRANSITION_REQUESTED_RESET; + cp->link_transition_jiffies = jiffies; + cp->link_transition_jiffies_valid = 1; + } else { + cp->link_transition = LINK_TRANSITION_ON_FAILURE; + } + netif_carrier_off(cp->dev); + if (cp->opened && netif_msg_link(cp)) { + printk(KERN_INFO "%s: PCS link down.\n", + cp->dev->name); + } + + /* Cassini only: if you force a mode, there can be + * sync problems on link down. to fix that, the following + * things need to be checked: + * 1) read serialink state register + * 2) read pcs status register to verify link down. + * 3) if link down and serial link == 0x03, then you need + * to global reset the chip. + */ + if ((cp->cas_flags & CAS_FLAG_REG_PLUS) == 0) { + /* should check to see if we're in a forced mode */ + stat = readl(cp->regs + REG_PCS_SERDES_STATE); + if (stat == 0x03) + return 1; + } + } else if (cp->lstate == link_down) { + if (link_transition_timeout != 0 && + cp->link_transition != LINK_TRANSITION_REQUESTED_RESET && + !cp->link_transition_jiffies_valid) { + /* force a reset, as a workaround for the + * link-failure problem. May want to move + * this to a point a bit earlier in the + * sequence. + */ + retval = 1; + cp->link_transition = LINK_TRANSITION_REQUESTED_RESET; + cp->link_transition_jiffies = jiffies; + cp->link_transition_jiffies_valid = 1; + } else { + cp->link_transition = LINK_TRANSITION_STILL_FAILED; + } + } + + return retval; +} + +static int cas_pcs_interrupt(struct net_device *dev, + struct cas *cp, u32 status) +{ + u32 stat = readl(cp->regs + REG_PCS_INTR_STATUS); + + if ((stat & PCS_INTR_STATUS_LINK_CHANGE) == 0) + return 0; + return cas_pcs_link_check(cp); +} + +static int cas_txmac_interrupt(struct net_device *dev, + struct cas *cp, u32 status) +{ + u32 txmac_stat = readl(cp->regs + REG_MAC_TX_STATUS); + + if (!txmac_stat) + return 0; + + if (netif_msg_intr(cp)) + printk(KERN_DEBUG "%s: txmac interrupt, txmac_stat: 0x%x\n", + cp->dev->name, txmac_stat); + + /* Defer timer expiration is quite normal, + * don't even log the event. + */ + if ((txmac_stat & MAC_TX_DEFER_TIMER) && + !(txmac_stat & ~MAC_TX_DEFER_TIMER)) + return 0; + + spin_lock(&cp->stat_lock[0]); + if (txmac_stat & MAC_TX_UNDERRUN) { + printk(KERN_ERR "%s: TX MAC xmit underrun.\n", + dev->name); + cp->net_stats[0].tx_fifo_errors++; + } + + if (txmac_stat & MAC_TX_MAX_PACKET_ERR) { + printk(KERN_ERR "%s: TX MAC max packet size error.\n", + dev->name); + cp->net_stats[0].tx_errors++; + } + + /* The rest are all cases of one of the 16-bit TX + * counters expiring. + */ + if (txmac_stat & MAC_TX_COLL_NORMAL) + cp->net_stats[0].collisions += 0x10000; + + if (txmac_stat & MAC_TX_COLL_EXCESS) { + cp->net_stats[0].tx_aborted_errors += 0x10000; + cp->net_stats[0].collisions += 0x10000; + } + + if (txmac_stat & MAC_TX_COLL_LATE) { + cp->net_stats[0].tx_aborted_errors += 0x10000; + cp->net_stats[0].collisions += 0x10000; + } + spin_unlock(&cp->stat_lock[0]); + + /* We do not keep track of MAC_TX_COLL_FIRST and + * MAC_TX_PEAK_ATTEMPTS events. + */ + return 0; +} + +static void cas_load_firmware(struct cas *cp, cas_hp_inst_t *firmware) +{ + cas_hp_inst_t *inst; + u32 val; + int i; + + i = 0; + while ((inst = firmware) && inst->note) { + writel(i, cp->regs + REG_HP_INSTR_RAM_ADDR); + + val = CAS_BASE(HP_INSTR_RAM_HI_VAL, inst->val); + val |= CAS_BASE(HP_INSTR_RAM_HI_MASK, inst->mask); + writel(val, cp->regs + REG_HP_INSTR_RAM_DATA_HI); + + val = CAS_BASE(HP_INSTR_RAM_MID_OUTARG, inst->outarg >> 10); + val |= CAS_BASE(HP_INSTR_RAM_MID_OUTOP, inst->outop); + val |= CAS_BASE(HP_INSTR_RAM_MID_FNEXT, inst->fnext); + val |= CAS_BASE(HP_INSTR_RAM_MID_FOFF, inst->foff); + val |= CAS_BASE(HP_INSTR_RAM_MID_SNEXT, inst->snext); + val |= CAS_BASE(HP_INSTR_RAM_MID_SOFF, inst->soff); + val |= CAS_BASE(HP_INSTR_RAM_MID_OP, inst->op); + writel(val, cp->regs + REG_HP_INSTR_RAM_DATA_MID); + + val = CAS_BASE(HP_INSTR_RAM_LOW_OUTMASK, inst->outmask); + val |= CAS_BASE(HP_INSTR_RAM_LOW_OUTSHIFT, inst->outshift); + val |= CAS_BASE(HP_INSTR_RAM_LOW_OUTEN, inst->outenab); + val |= CAS_BASE(HP_INSTR_RAM_LOW_OUTARG, inst->outarg); + writel(val, cp->regs + REG_HP_INSTR_RAM_DATA_LOW); + ++firmware; + ++i; + } +} + +static void cas_init_rx_dma(struct cas *cp) +{ + u64 desc_dma = cp->block_dvma; + u32 val; + int i, size; + + /* rx free descriptors */ + val = CAS_BASE(RX_CFG_SWIVEL, RX_SWIVEL_OFF_VAL); + val |= CAS_BASE(RX_CFG_DESC_RING, RX_DESC_RINGN_INDEX(0)); + val |= CAS_BASE(RX_CFG_COMP_RING, RX_COMP_RINGN_INDEX(0)); + if ((N_RX_DESC_RINGS > 1) && + (cp->cas_flags & CAS_FLAG_REG_PLUS)) /* do desc 2 */ + val |= CAS_BASE(RX_CFG_DESC_RING1, RX_DESC_RINGN_INDEX(1)); + writel(val, cp->regs + REG_RX_CFG); + + val = (unsigned long) cp->init_rxds[0] - + (unsigned long) cp->init_block; + writel((desc_dma + val) >> 32, cp->regs + REG_RX_DB_HI); + writel((desc_dma + val) & 0xffffffff, cp->regs + REG_RX_DB_LOW); + writel(RX_DESC_RINGN_SIZE(0) - 4, cp->regs + REG_RX_KICK); + + if (cp->cas_flags & CAS_FLAG_REG_PLUS) { + /* rx desc 2 is for IPSEC packets. however, + * we don't it that for that purpose. + */ + val = (unsigned long) cp->init_rxds[1] - + (unsigned long) cp->init_block; + writel((desc_dma + val) >> 32, cp->regs + REG_PLUS_RX_DB1_HI); + writel((desc_dma + val) & 0xffffffff, cp->regs + + REG_PLUS_RX_DB1_LOW); + writel(RX_DESC_RINGN_SIZE(1) - 4, cp->regs + + REG_PLUS_RX_KICK1); + } + + /* rx completion registers */ + val = (unsigned long) cp->init_rxcs[0] - + (unsigned long) cp->init_block; + writel((desc_dma + val) >> 32, cp->regs + REG_RX_CB_HI); + writel((desc_dma + val) & 0xffffffff, cp->regs + REG_RX_CB_LOW); + + if (cp->cas_flags & CAS_FLAG_REG_PLUS) { + /* rx comp 2-4 */ + for (i = 1; i < MAX_RX_COMP_RINGS; i++) { + val = (unsigned long) cp->init_rxcs[i] - + (unsigned long) cp->init_block; + writel((desc_dma + val) >> 32, cp->regs + + REG_PLUS_RX_CBN_HI(i)); + writel((desc_dma + val) & 0xffffffff, cp->regs + + REG_PLUS_RX_CBN_LOW(i)); + } + } + + /* read selective clear regs to prevent spurious interrupts + * on reset because complete == kick. + * selective clear set up to prevent interrupts on resets + */ + readl(cp->regs + REG_INTR_STATUS_ALIAS); + writel(INTR_RX_DONE | INTR_RX_BUF_UNAVAIL, cp->regs + REG_ALIAS_CLEAR); + if (cp->cas_flags & CAS_FLAG_REG_PLUS) { + for (i = 1; i < N_RX_COMP_RINGS; i++) + readl(cp->regs + REG_PLUS_INTRN_STATUS_ALIAS(i)); + + /* 2 is different from 3 and 4 */ + if (N_RX_COMP_RINGS > 1) + writel(INTR_RX_DONE_ALT | INTR_RX_BUF_UNAVAIL_1, + cp->regs + REG_PLUS_ALIASN_CLEAR(1)); + + for (i = 2; i < N_RX_COMP_RINGS; i++) + writel(INTR_RX_DONE_ALT, + cp->regs + REG_PLUS_ALIASN_CLEAR(i)); + } + + /* set up pause thresholds */ + val = CAS_BASE(RX_PAUSE_THRESH_OFF, + cp->rx_pause_off / RX_PAUSE_THRESH_QUANTUM); + val |= CAS_BASE(RX_PAUSE_THRESH_ON, + cp->rx_pause_on / RX_PAUSE_THRESH_QUANTUM); + writel(val, cp->regs + REG_RX_PAUSE_THRESH); + + /* zero out dma reassembly buffers */ + for (i = 0; i < 64; i++) { + writel(i, cp->regs + REG_RX_TABLE_ADDR); + writel(0x0, cp->regs + REG_RX_TABLE_DATA_LOW); + writel(0x0, cp->regs + REG_RX_TABLE_DATA_MID); + writel(0x0, cp->regs + REG_RX_TABLE_DATA_HI); + } + + /* make sure address register is 0 for normal operation */ + writel(0x0, cp->regs + REG_RX_CTRL_FIFO_ADDR); + writel(0x0, cp->regs + REG_RX_IPP_FIFO_ADDR); + + /* interrupt mitigation */ +#ifdef USE_RX_BLANK + val = CAS_BASE(RX_BLANK_INTR_TIME, RX_BLANK_INTR_TIME_VAL); + val |= CAS_BASE(RX_BLANK_INTR_PKT, RX_BLANK_INTR_PKT_VAL); + writel(val, cp->regs + REG_RX_BLANK); +#else + writel(0x0, cp->regs + REG_RX_BLANK); +#endif + + /* interrupt generation as a function of low water marks for + * free desc and completion entries. these are used to trigger + * housekeeping for rx descs. we don't use the free interrupt + * as it's not very useful + */ + /* val = CAS_BASE(RX_AE_THRESH_FREE, RX_AE_FREEN_VAL(0)); */ + val = CAS_BASE(RX_AE_THRESH_COMP, RX_AE_COMP_VAL); + writel(val, cp->regs + REG_RX_AE_THRESH); + if (cp->cas_flags & CAS_FLAG_REG_PLUS) { + val = CAS_BASE(RX_AE1_THRESH_FREE, RX_AE_FREEN_VAL(1)); + writel(val, cp->regs + REG_PLUS_RX_AE1_THRESH); + } + + /* Random early detect registers. useful for congestion avoidance. + * this should be tunable. + */ + writel(0x0, cp->regs + REG_RX_RED); + + /* receive page sizes. default == 2K (0x800) */ + val = 0; + if (cp->page_size == 0x1000) + val = 0x1; + else if (cp->page_size == 0x2000) + val = 0x2; + else if (cp->page_size == 0x4000) + val = 0x3; + + /* round mtu + offset. constrain to page size. */ + size = cp->dev->mtu + 64; + if (size > cp->page_size) + size = cp->page_size; + + if (size <= 0x400) + i = 0x0; + else if (size <= 0x800) + i = 0x1; + else if (size <= 0x1000) + i = 0x2; + else + i = 0x3; + + cp->mtu_stride = 1 << (i + 10); + val = CAS_BASE(RX_PAGE_SIZE, val); + val |= CAS_BASE(RX_PAGE_SIZE_MTU_STRIDE, i); + val |= CAS_BASE(RX_PAGE_SIZE_MTU_COUNT, cp->page_size >> (i + 10)); + val |= CAS_BASE(RX_PAGE_SIZE_MTU_OFF, 0x1); + writel(val, cp->regs + REG_RX_PAGE_SIZE); + + /* enable the header parser if desired */ + if (CAS_HP_FIRMWARE == cas_prog_null) + return; + + val = CAS_BASE(HP_CFG_NUM_CPU, CAS_NCPUS > 63 ? 0 : CAS_NCPUS); + val |= HP_CFG_PARSE_EN | HP_CFG_SYN_INC_MASK; + val |= CAS_BASE(HP_CFG_TCP_THRESH, HP_TCP_THRESH_VAL); + writel(val, cp->regs + REG_HP_CFG); +} + +static inline void cas_rxc_init(struct cas_rx_comp *rxc) +{ + memset(rxc, 0, sizeof(*rxc)); + rxc->word4 = cpu_to_le64(RX_COMP4_ZERO); +} + +/* NOTE: we use the ENC RX DESC ring for spares. the rx_page[0,1] + * flipping is protected by the fact that the chip will not + * hand back the same page index while it's being processed. + */ +static inline cas_page_t *cas_page_spare(struct cas *cp, const int index) +{ + cas_page_t *page = cp->rx_pages[1][index]; + cas_page_t *new; + + if (page_count(page->buffer) == 1) + return page; + + new = cas_page_dequeue(cp); + if (new) { + spin_lock(&cp->rx_inuse_lock); + list_add(&page->list, &cp->rx_inuse_list); + spin_unlock(&cp->rx_inuse_lock); + } + return new; +} + +/* this needs to be changed if we actually use the ENC RX DESC ring */ +static cas_page_t *cas_page_swap(struct cas *cp, const int ring, + const int index) +{ + cas_page_t **page0 = cp->rx_pages[0]; + cas_page_t **page1 = cp->rx_pages[1]; + + /* swap if buffer is in use */ + if (page_count(page0[index]->buffer) > 1) { + cas_page_t *new = cas_page_spare(cp, index); + if (new) { + page1[index] = page0[index]; + page0[index] = new; + } + } + RX_USED_SET(page0[index], 0); + return page0[index]; +} + +static void cas_clean_rxds(struct cas *cp) +{ + /* only clean ring 0 as ring 1 is used for spare buffers */ + struct cas_rx_desc *rxd = cp->init_rxds[0]; + int i, size; + + /* release all rx flows */ + for (i = 0; i < N_RX_FLOWS; i++) { + struct sk_buff *skb; + while ((skb = __skb_dequeue(&cp->rx_flows[i]))) { + cas_skb_release(skb); + } + } + + /* initialize descriptors */ + size = RX_DESC_RINGN_SIZE(0); + for (i = 0; i < size; i++) { + cas_page_t *page = cas_page_swap(cp, 0, i); + rxd[i].buffer = cpu_to_le64(page->dma_addr); + rxd[i].index = cpu_to_le64(CAS_BASE(RX_INDEX_NUM, i) | + CAS_BASE(RX_INDEX_RING, 0)); + } + + cp->rx_old[0] = RX_DESC_RINGN_SIZE(0) - 4; + cp->rx_last[0] = 0; + cp->cas_flags &= ~CAS_FLAG_RXD_POST(0); +} + +static void cas_clean_rxcs(struct cas *cp) +{ + int i, j; + + /* take ownership of rx comp descriptors */ + memset(cp->rx_cur, 0, sizeof(*cp->rx_cur)*N_RX_COMP_RINGS); + memset(cp->rx_new, 0, sizeof(*cp->rx_new)*N_RX_COMP_RINGS); + for (i = 0; i < N_RX_COMP_RINGS; i++) { + struct cas_rx_comp *rxc = cp->init_rxcs[i]; + for (j = 0; j < RX_COMP_RINGN_SIZE(i); j++) { + cas_rxc_init(rxc + j); + } + } +} + +#if 0 +/* When we get a RX fifo overflow, the RX unit is probably hung + * so we do the following. + * + * If any part of the reset goes wrong, we return 1 and that causes the + * whole chip to be reset. + */ +static int cas_rxmac_reset(struct cas *cp) +{ + struct net_device *dev = cp->dev; + int limit; + u32 val; + + /* First, reset MAC RX. */ + writel(cp->mac_rx_cfg & ~MAC_RX_CFG_EN, cp->regs + REG_MAC_RX_CFG); + for (limit = 0; limit < STOP_TRIES; limit++) { + if (!(readl(cp->regs + REG_MAC_RX_CFG) & MAC_RX_CFG_EN)) + break; + udelay(10); + } + if (limit == STOP_TRIES) { + printk(KERN_ERR "%s: RX MAC will not disable, resetting whole " + "chip.\n", dev->name); + return 1; + } + + /* Second, disable RX DMA. */ + writel(0, cp->regs + REG_RX_CFG); + for (limit = 0; limit < STOP_TRIES; limit++) { + if (!(readl(cp->regs + REG_RX_CFG) & RX_CFG_DMA_EN)) + break; + udelay(10); + } + if (limit == STOP_TRIES) { + printk(KERN_ERR "%s: RX DMA will not disable, resetting whole " + "chip.\n", dev->name); + return 1; + } + + mdelay(5); + + /* Execute RX reset command. */ + writel(SW_RESET_RX, cp->regs + REG_SW_RESET); + for (limit = 0; limit < STOP_TRIES; limit++) { + if (!(readl(cp->regs + REG_SW_RESET) & SW_RESET_RX)) + break; + udelay(10); + } + if (limit == STOP_TRIES) { + printk(KERN_ERR "%s: RX reset command will not execute, " + "resetting whole chip.\n", dev->name); + return 1; + } + + /* reset driver rx state */ + cas_clean_rxds(cp); + cas_clean_rxcs(cp); + + /* Now, reprogram the rest of RX unit. */ + cas_init_rx_dma(cp); + + /* re-enable */ + val = readl(cp->regs + REG_RX_CFG); + writel(val | RX_CFG_DMA_EN, cp->regs + REG_RX_CFG); + writel(MAC_RX_FRAME_RECV, cp->regs + REG_MAC_RX_MASK); + val = readl(cp->regs + REG_MAC_RX_CFG); + writel(val | MAC_RX_CFG_EN, cp->regs + REG_MAC_RX_CFG); + return 0; +} +#endif + +static int cas_rxmac_interrupt(struct net_device *dev, struct cas *cp, + u32 status) +{ + u32 stat = readl(cp->regs + REG_MAC_RX_STATUS); + + if (!stat) + return 0; + + if (netif_msg_intr(cp)) + printk(KERN_DEBUG "%s: rxmac interrupt, stat: 0x%x\n", + cp->dev->name, stat); + + /* these are all rollovers */ + spin_lock(&cp->stat_lock[0]); + if (stat & MAC_RX_ALIGN_ERR) + cp->net_stats[0].rx_frame_errors += 0x10000; + + if (stat & MAC_RX_CRC_ERR) + cp->net_stats[0].rx_crc_errors += 0x10000; + + if (stat & MAC_RX_LEN_ERR) + cp->net_stats[0].rx_length_errors += 0x10000; + + if (stat & MAC_RX_OVERFLOW) { + cp->net_stats[0].rx_over_errors++; + cp->net_stats[0].rx_fifo_errors++; + } + + /* We do not track MAC_RX_FRAME_COUNT and MAC_RX_VIOL_ERR + * events. + */ + spin_unlock(&cp->stat_lock[0]); + return 0; +} + +static int cas_mac_interrupt(struct net_device *dev, struct cas *cp, + u32 status) +{ + u32 stat = readl(cp->regs + REG_MAC_CTRL_STATUS); + + if (!stat) + return 0; + + if (netif_msg_intr(cp)) + printk(KERN_DEBUG "%s: mac interrupt, stat: 0x%x\n", + cp->dev->name, stat); + + /* This interrupt is just for pause frame and pause + * tracking. It is useful for diagnostics and debug + * but probably by default we will mask these events. + */ + if (stat & MAC_CTRL_PAUSE_STATE) + cp->pause_entered++; + + if (stat & MAC_CTRL_PAUSE_RECEIVED) + cp->pause_last_time_recvd = (stat >> 16); + + return 0; +} + + +/* Must be invoked under cp->lock. */ +static inline int cas_mdio_link_not_up(struct cas *cp) +{ + u16 val; + + switch (cp->lstate) { + case link_force_ret: + if (netif_msg_link(cp)) + printk(KERN_INFO "%s: Autoneg failed again, keeping" + " forced mode\n", cp->dev->name); + cas_phy_write(cp, MII_BMCR, cp->link_fcntl); + cp->timer_ticks = 5; + cp->lstate = link_force_ok; + cp->link_transition = LINK_TRANSITION_LINK_CONFIG; + break; + + case link_aneg: + val = cas_phy_read(cp, MII_BMCR); + + /* Try forced modes. we try things in the following order: + * 1000 full -> 100 full/half -> 10 half + */ + val &= ~(BMCR_ANRESTART | BMCR_ANENABLE); + val |= BMCR_FULLDPLX; + val |= (cp->cas_flags & CAS_FLAG_1000MB_CAP) ? + CAS_BMCR_SPEED1000 : BMCR_SPEED100; + cas_phy_write(cp, MII_BMCR, val); + cp->timer_ticks = 5; + cp->lstate = link_force_try; + cp->link_transition = LINK_TRANSITION_LINK_CONFIG; + break; + + case link_force_try: + /* Downgrade from 1000 to 100 to 10 Mbps if necessary. */ + val = cas_phy_read(cp, MII_BMCR); + cp->timer_ticks = 5; + if (val & CAS_BMCR_SPEED1000) { /* gigabit */ + val &= ~CAS_BMCR_SPEED1000; + val |= (BMCR_SPEED100 | BMCR_FULLDPLX); + cas_phy_write(cp, MII_BMCR, val); + break; + } + + if (val & BMCR_SPEED100) { + if (val & BMCR_FULLDPLX) /* fd failed */ + val &= ~BMCR_FULLDPLX; + else { /* 100Mbps failed */ + val &= ~BMCR_SPEED100; + } + cas_phy_write(cp, MII_BMCR, val); + break; + } + default: + break; + } + return 0; +} + + +/* must be invoked with cp->lock held */ +static int cas_mii_link_check(struct cas *cp, const u16 bmsr) +{ + int restart; + + if (bmsr & BMSR_LSTATUS) { + /* Ok, here we got a link. If we had it due to a forced + * fallback, and we were configured for autoneg, we + * retry a short autoneg pass. If you know your hub is + * broken, use ethtool ;) + */ + if ((cp->lstate == link_force_try) && + (cp->link_cntl & BMCR_ANENABLE)) { + cp->lstate = link_force_ret; + cp->link_transition = LINK_TRANSITION_LINK_CONFIG; + cas_mif_poll(cp, 0); + cp->link_fcntl = cas_phy_read(cp, MII_BMCR); + cp->timer_ticks = 5; + if (cp->opened && netif_msg_link(cp)) + printk(KERN_INFO "%s: Got link after fallback, retrying" + " autoneg once...\n", cp->dev->name); + cas_phy_write(cp, MII_BMCR, + cp->link_fcntl | BMCR_ANENABLE | + BMCR_ANRESTART); + cas_mif_poll(cp, 1); + + } else if (cp->lstate != link_up) { + cp->lstate = link_up; + cp->link_transition = LINK_TRANSITION_LINK_UP; + + if (cp->opened) { + cas_set_link_modes(cp); + netif_carrier_on(cp->dev); + } + } + return 0; + } + + /* link not up. if the link was previously up, we restart the + * whole process + */ + restart = 0; + if (cp->lstate == link_up) { + cp->lstate = link_down; + cp->link_transition = LINK_TRANSITION_LINK_DOWN; + + netif_carrier_off(cp->dev); + if (cp->opened && netif_msg_link(cp)) + printk(KERN_INFO "%s: Link down\n", + cp->dev->name); + restart = 1; + + } else if (++cp->timer_ticks > 10) + cas_mdio_link_not_up(cp); + + return restart; +} + +static int cas_mif_interrupt(struct net_device *dev, struct cas *cp, + u32 status) +{ + u32 stat = readl(cp->regs + REG_MIF_STATUS); + u16 bmsr; + + /* check for a link change */ + if (CAS_VAL(MIF_STATUS_POLL_STATUS, stat) == 0) + return 0; + + bmsr = CAS_VAL(MIF_STATUS_POLL_DATA, stat); + return cas_mii_link_check(cp, bmsr); +} + +static int cas_pci_interrupt(struct net_device *dev, struct cas *cp, + u32 status) +{ + u32 stat = readl(cp->regs + REG_PCI_ERR_STATUS); + + if (!stat) + return 0; + + printk(KERN_ERR "%s: PCI error [%04x:%04x] ", dev->name, stat, + readl(cp->regs + REG_BIM_DIAG)); + + /* cassini+ has this reserved */ + if ((stat & PCI_ERR_BADACK) && + ((cp->cas_flags & CAS_FLAG_REG_PLUS) == 0)) + printk(" "); + + if (stat & PCI_ERR_DTRTO) + printk(" "); + if (stat & PCI_ERR_OTHER) + printk(" "); + if (stat & PCI_ERR_BIM_DMA_WRITE) + printk(" "); + if (stat & PCI_ERR_BIM_DMA_READ) + printk(" "); + printk("\n"); + + if (stat & PCI_ERR_OTHER) { + u16 cfg; + + /* Interrogate PCI config space for the + * true cause. + */ + pci_read_config_word(cp->pdev, PCI_STATUS, &cfg); + printk(KERN_ERR "%s: Read PCI cfg space status [%04x]\n", + dev->name, cfg); + if (cfg & PCI_STATUS_PARITY) + printk(KERN_ERR "%s: PCI parity error detected.\n", + dev->name); + if (cfg & PCI_STATUS_SIG_TARGET_ABORT) + printk(KERN_ERR "%s: PCI target abort.\n", + dev->name); + if (cfg & PCI_STATUS_REC_TARGET_ABORT) + printk(KERN_ERR "%s: PCI master acks target abort.\n", + dev->name); + if (cfg & PCI_STATUS_REC_MASTER_ABORT) + printk(KERN_ERR "%s: PCI master abort.\n", dev->name); + if (cfg & PCI_STATUS_SIG_SYSTEM_ERROR) + printk(KERN_ERR "%s: PCI system error SERR#.\n", + dev->name); + if (cfg & PCI_STATUS_DETECTED_PARITY) + printk(KERN_ERR "%s: PCI parity error.\n", + dev->name); + + /* Write the error bits back to clear them. */ + cfg &= (PCI_STATUS_PARITY | + PCI_STATUS_SIG_TARGET_ABORT | + PCI_STATUS_REC_TARGET_ABORT | + PCI_STATUS_REC_MASTER_ABORT | + PCI_STATUS_SIG_SYSTEM_ERROR | + PCI_STATUS_DETECTED_PARITY); + pci_write_config_word(cp->pdev, PCI_STATUS, cfg); + } + + /* For all PCI errors, we should reset the chip. */ + return 1; +} + +/* All non-normal interrupt conditions get serviced here. + * Returns non-zero if we should just exit the interrupt + * handler right now (ie. if we reset the card which invalidates + * all of the other original irq status bits). + */ +static int cas_abnormal_irq(struct net_device *dev, struct cas *cp, + u32 status) +{ + if (status & INTR_RX_TAG_ERROR) { + /* corrupt RX tag framing */ + if (netif_msg_rx_err(cp)) + printk(KERN_DEBUG "%s: corrupt rx tag framing\n", + cp->dev->name); + spin_lock(&cp->stat_lock[0]); + cp->net_stats[0].rx_errors++; + spin_unlock(&cp->stat_lock[0]); + goto do_reset; + } + + if (status & INTR_RX_LEN_MISMATCH) { + /* length mismatch. */ + if (netif_msg_rx_err(cp)) + printk(KERN_DEBUG "%s: length mismatch for rx frame\n", + cp->dev->name); + spin_lock(&cp->stat_lock[0]); + cp->net_stats[0].rx_errors++; + spin_unlock(&cp->stat_lock[0]); + goto do_reset; + } + + if (status & INTR_PCS_STATUS) { + if (cas_pcs_interrupt(dev, cp, status)) + goto do_reset; + } + + if (status & INTR_TX_MAC_STATUS) { + if (cas_txmac_interrupt(dev, cp, status)) + goto do_reset; + } + + if (status & INTR_RX_MAC_STATUS) { + if (cas_rxmac_interrupt(dev, cp, status)) + goto do_reset; + } + + if (status & INTR_MAC_CTRL_STATUS) { + if (cas_mac_interrupt(dev, cp, status)) + goto do_reset; + } + + if (status & INTR_MIF_STATUS) { + if (cas_mif_interrupt(dev, cp, status)) + goto do_reset; + } + + if (status & INTR_PCI_ERROR_STATUS) { + if (cas_pci_interrupt(dev, cp, status)) + goto do_reset; + } + return 0; + +do_reset: +#if 1 + atomic_inc(&cp->reset_task_pending); + atomic_inc(&cp->reset_task_pending_all); + printk(KERN_ERR "%s:reset called in cas_abnormal_irq [0x%x]\n", + dev->name, status); + schedule_work(&cp->reset_task); +#else + atomic_set(&cp->reset_task_pending, CAS_RESET_ALL); + printk(KERN_ERR "reset called in cas_abnormal_irq\n"); + schedule_work(&cp->reset_task); +#endif + return 1; +} + +/* NOTE: CAS_TABORT returns 1 or 2 so that it can be used when + * determining whether to do a netif_stop/wakeup + */ +#define CAS_TABORT(x) (((x)->cas_flags & CAS_FLAG_TARGET_ABORT) ? 2 : 1) +#define CAS_ROUND_PAGE(x) (((x) + PAGE_SIZE - 1) & PAGE_MASK) +static inline int cas_calc_tabort(struct cas *cp, const unsigned long addr, + const int len) +{ + unsigned long off = addr + len; + + if (CAS_TABORT(cp) == 1) + return 0; + if ((CAS_ROUND_PAGE(off) - off) > TX_TARGET_ABORT_LEN) + return 0; + return TX_TARGET_ABORT_LEN; +} + +static inline void cas_tx_ringN(struct cas *cp, int ring, int limit) +{ + struct cas_tx_desc *txds; + struct sk_buff **skbs; + struct net_device *dev = cp->dev; + int entry, count; + + spin_lock(&cp->tx_lock[ring]); + txds = cp->init_txds[ring]; + skbs = cp->tx_skbs[ring]; + entry = cp->tx_old[ring]; + + count = TX_BUFF_COUNT(ring, entry, limit); + while (entry != limit) { + struct sk_buff *skb = skbs[entry]; + dma_addr_t daddr; + u32 dlen; + int frag; + + if (!skb) { + /* this should never occur */ + entry = TX_DESC_NEXT(ring, entry); + continue; + } + + /* however, we might get only a partial skb release. */ + count -= skb_shinfo(skb)->nr_frags + + + cp->tx_tiny_use[ring][entry].nbufs + 1; + if (count < 0) + break; + + if (netif_msg_tx_done(cp)) + printk(KERN_DEBUG "%s: tx[%d] done, slot %d\n", + cp->dev->name, ring, entry); + + skbs[entry] = NULL; + cp->tx_tiny_use[ring][entry].nbufs = 0; + + for (frag = 0; frag <= skb_shinfo(skb)->nr_frags; frag++) { + struct cas_tx_desc *txd = txds + entry; + + daddr = le64_to_cpu(txd->buffer); + dlen = CAS_VAL(TX_DESC_BUFLEN, + le64_to_cpu(txd->control)); + pci_unmap_page(cp->pdev, daddr, dlen, + PCI_DMA_TODEVICE); + entry = TX_DESC_NEXT(ring, entry); + + /* tiny buffer may follow */ + if (cp->tx_tiny_use[ring][entry].used) { + cp->tx_tiny_use[ring][entry].used = 0; + entry = TX_DESC_NEXT(ring, entry); + } + } + + spin_lock(&cp->stat_lock[ring]); + cp->net_stats[ring].tx_packets++; + cp->net_stats[ring].tx_bytes += skb->len; + spin_unlock(&cp->stat_lock[ring]); + dev_kfree_skb_irq(skb); + } + cp->tx_old[ring] = entry; + + /* this is wrong for multiple tx rings. the net device needs + * multiple queues for this to do the right thing. we wait + * for 2*packets to be available when using tiny buffers + */ + if (netif_queue_stopped(dev) && + (TX_BUFFS_AVAIL(cp, ring) > CAS_TABORT(cp)*(MAX_SKB_FRAGS + 1))) + netif_wake_queue(dev); + spin_unlock(&cp->tx_lock[ring]); +} + +static void cas_tx(struct net_device *dev, struct cas *cp, + u32 status) +{ + int limit, ring; +#ifdef USE_TX_COMPWB + u64 compwb = le64_to_cpu(cp->init_block->tx_compwb); +#endif + if (netif_msg_intr(cp)) + printk(KERN_DEBUG "%s: tx interrupt, status: 0x%x, %lx\n", + cp->dev->name, status, compwb); + /* process all the rings */ + for (ring = 0; ring < N_TX_RINGS; ring++) { +#ifdef USE_TX_COMPWB + /* use the completion writeback registers */ + limit = (CAS_VAL(TX_COMPWB_MSB, compwb) << 8) | + CAS_VAL(TX_COMPWB_LSB, compwb); + compwb = TX_COMPWB_NEXT(compwb); +#else + limit = readl(cp->regs + REG_TX_COMPN(ring)); +#endif + if (cp->tx_old[ring] != limit) + cas_tx_ringN(cp, ring, limit); + } +} + + +static int cas_rx_process_pkt(struct cas *cp, struct cas_rx_comp *rxc, + int entry, const u64 *words, + struct sk_buff **skbref) +{ + int dlen, hlen, len, i, alloclen; + int off, swivel = RX_SWIVEL_OFF_VAL; + struct cas_page *page; + struct sk_buff *skb; + void *addr, *crcaddr; + char *p; + + hlen = CAS_VAL(RX_COMP2_HDR_SIZE, words[1]); + dlen = CAS_VAL(RX_COMP1_DATA_SIZE, words[0]); + len = hlen + dlen; + + if (RX_COPY_ALWAYS || (words[2] & RX_COMP3_SMALL_PKT)) + alloclen = len; + else + alloclen = max(hlen, RX_COPY_MIN); + + skb = dev_alloc_skb(alloclen + swivel + cp->crc_size); + if (skb == NULL) + return -1; + + *skbref = skb; + skb->dev = cp->dev; + skb_reserve(skb, swivel); + + p = skb->data; + addr = crcaddr = NULL; + if (hlen) { /* always copy header pages */ + i = CAS_VAL(RX_COMP2_HDR_INDEX, words[1]); + page = cp->rx_pages[CAS_VAL(RX_INDEX_RING, i)][CAS_VAL(RX_INDEX_NUM, i)]; + off = CAS_VAL(RX_COMP2_HDR_OFF, words[1]) * 0x100 + + swivel; + + i = hlen; + if (!dlen) /* attach FCS */ + i += cp->crc_size; + pci_dma_sync_single_for_cpu(cp->pdev, page->dma_addr + off, i, + PCI_DMA_FROMDEVICE); + addr = cas_page_map(page->buffer); + memcpy(p, addr + off, i); + pci_dma_sync_single_for_device(cp->pdev, page->dma_addr + off, i, + PCI_DMA_FROMDEVICE); + cas_page_unmap(addr); + RX_USED_ADD(page, 0x100); + p += hlen; + swivel = 0; + } + + + if (alloclen < (hlen + dlen)) { + skb_frag_t *frag = skb_shinfo(skb)->frags; + + /* normal or jumbo packets. we use frags */ + i = CAS_VAL(RX_COMP1_DATA_INDEX, words[0]); + page = cp->rx_pages[CAS_VAL(RX_INDEX_RING, i)][CAS_VAL(RX_INDEX_NUM, i)]; + off = CAS_VAL(RX_COMP1_DATA_OFF, words[0]) + swivel; + + hlen = min(cp->page_size - off, dlen); + if (hlen < 0) { + if (netif_msg_rx_err(cp)) { + printk(KERN_DEBUG "%s: rx page overflow: " + "%d\n", cp->dev->name, hlen); + } + dev_kfree_skb_irq(skb); + return -1; + } + i = hlen; + if (i == dlen) /* attach FCS */ + i += cp->crc_size; + pci_dma_sync_single_for_cpu(cp->pdev, page->dma_addr + off, i, + PCI_DMA_FROMDEVICE); + + /* make sure we always copy a header */ + swivel = 0; + if (p == (char *) skb->data) { /* not split */ + addr = cas_page_map(page->buffer); + memcpy(p, addr + off, RX_COPY_MIN); + pci_dma_sync_single_for_device(cp->pdev, page->dma_addr + off, i, + PCI_DMA_FROMDEVICE); + cas_page_unmap(addr); + off += RX_COPY_MIN; + swivel = RX_COPY_MIN; + RX_USED_ADD(page, cp->mtu_stride); + } else { + RX_USED_ADD(page, hlen); + } + skb_put(skb, alloclen); + + skb_shinfo(skb)->nr_frags++; + skb->data_len += hlen - swivel; + skb->len += hlen - swivel; + + get_page(page->buffer); + frag->page = page->buffer; + frag->page_offset = off; + frag->size = hlen - swivel; + + /* any more data? */ + if ((words[0] & RX_COMP1_SPLIT_PKT) && ((dlen -= hlen) > 0)) { + hlen = dlen; + off = 0; + + i = CAS_VAL(RX_COMP2_NEXT_INDEX, words[1]); + page = cp->rx_pages[CAS_VAL(RX_INDEX_RING, i)][CAS_VAL(RX_INDEX_NUM, i)]; + pci_dma_sync_single_for_cpu(cp->pdev, page->dma_addr, + hlen + cp->crc_size, + PCI_DMA_FROMDEVICE); + pci_dma_sync_single_for_device(cp->pdev, page->dma_addr, + hlen + cp->crc_size, + PCI_DMA_FROMDEVICE); + + skb_shinfo(skb)->nr_frags++; + skb->data_len += hlen; + skb->len += hlen; + frag++; + + get_page(page->buffer); + frag->page = page->buffer; + frag->page_offset = 0; + frag->size = hlen; + RX_USED_ADD(page, hlen + cp->crc_size); + } + + if (cp->crc_size) { + addr = cas_page_map(page->buffer); + crcaddr = addr + off + hlen; + } + + } else { + /* copying packet */ + if (!dlen) + goto end_copy_pkt; + + i = CAS_VAL(RX_COMP1_DATA_INDEX, words[0]); + page = cp->rx_pages[CAS_VAL(RX_INDEX_RING, i)][CAS_VAL(RX_INDEX_NUM, i)]; + off = CAS_VAL(RX_COMP1_DATA_OFF, words[0]) + swivel; + hlen = min(cp->page_size - off, dlen); + if (hlen < 0) { + if (netif_msg_rx_err(cp)) { + printk(KERN_DEBUG "%s: rx page overflow: " + "%d\n", cp->dev->name, hlen); + } + dev_kfree_skb_irq(skb); + return -1; + } + i = hlen; + if (i == dlen) /* attach FCS */ + i += cp->crc_size; + pci_dma_sync_single_for_cpu(cp->pdev, page->dma_addr + off, i, + PCI_DMA_FROMDEVICE); + addr = cas_page_map(page->buffer); + memcpy(p, addr + off, i); + pci_dma_sync_single_for_device(cp->pdev, page->dma_addr + off, i, + PCI_DMA_FROMDEVICE); + cas_page_unmap(addr); + if (p == (char *) skb->data) /* not split */ + RX_USED_ADD(page, cp->mtu_stride); + else + RX_USED_ADD(page, i); + + /* any more data? */ + if ((words[0] & RX_COMP1_SPLIT_PKT) && ((dlen -= hlen) > 0)) { + p += hlen; + i = CAS_VAL(RX_COMP2_NEXT_INDEX, words[1]); + page = cp->rx_pages[CAS_VAL(RX_INDEX_RING, i)][CAS_VAL(RX_INDEX_NUM, i)]; + pci_dma_sync_single_for_cpu(cp->pdev, page->dma_addr, + dlen + cp->crc_size, + PCI_DMA_FROMDEVICE); + addr = cas_page_map(page->buffer); + memcpy(p, addr, dlen + cp->crc_size); + pci_dma_sync_single_for_device(cp->pdev, page->dma_addr, + dlen + cp->crc_size, + PCI_DMA_FROMDEVICE); + cas_page_unmap(addr); + RX_USED_ADD(page, dlen + cp->crc_size); + } +end_copy_pkt: + if (cp->crc_size) { + addr = NULL; + crcaddr = skb->data + alloclen; + } + skb_put(skb, alloclen); + } + + i = CAS_VAL(RX_COMP4_TCP_CSUM, words[3]); + if (cp->crc_size) { + /* checksum includes FCS. strip it out. */ + i = csum_fold(csum_partial(crcaddr, cp->crc_size, i)); + if (addr) + cas_page_unmap(addr); + } + skb->csum = ntohs(i ^ 0xffff); + skb->ip_summed = CHECKSUM_HW; + skb->protocol = eth_type_trans(skb, cp->dev); + return len; +} + + +/* we can handle up to 64 rx flows at a time. we do the same thing + * as nonreassm except that we batch up the buffers. + * NOTE: we currently just treat each flow as a bunch of packets that + * we pass up. a better way would be to coalesce the packets + * into a jumbo packet. to do that, we need to do the following: + * 1) the first packet will have a clean split between header and + * data. save both. + * 2) each time the next flow packet comes in, extend the + * data length and merge the checksums. + * 3) on flow release, fix up the header. + * 4) make sure the higher layer doesn't care. + * because packets get coalesced, we shouldn't run into fragment count + * issues. + */ +static inline void cas_rx_flow_pkt(struct cas *cp, const u64 *words, + struct sk_buff *skb) +{ + int flowid = CAS_VAL(RX_COMP3_FLOWID, words[2]) & (N_RX_FLOWS - 1); + struct sk_buff_head *flow = &cp->rx_flows[flowid]; + + /* this is protected at a higher layer, so no need to + * do any additional locking here. stick the buffer + * at the end. + */ + __skb_insert(skb, flow->prev, (struct sk_buff *) flow, flow); + if (words[0] & RX_COMP1_RELEASE_FLOW) { + while ((skb = __skb_dequeue(flow))) { + cas_skb_release(skb); + } + } +} + +/* put rx descriptor back on ring. if a buffer is in use by a higher + * layer, this will need to put in a replacement. + */ +static void cas_post_page(struct cas *cp, const int ring, const int index) +{ + cas_page_t *new; + int entry; + + entry = cp->rx_old[ring]; + + new = cas_page_swap(cp, ring, index); + cp->init_rxds[ring][entry].buffer = cpu_to_le64(new->dma_addr); + cp->init_rxds[ring][entry].index = + cpu_to_le64(CAS_BASE(RX_INDEX_NUM, index) | + CAS_BASE(RX_INDEX_RING, ring)); + + entry = RX_DESC_ENTRY(ring, entry + 1); + cp->rx_old[ring] = entry; + + if (entry % 4) + return; + + if (ring == 0) + writel(entry, cp->regs + REG_RX_KICK); + else if ((N_RX_DESC_RINGS > 1) && + (cp->cas_flags & CAS_FLAG_REG_PLUS)) + writel(entry, cp->regs + REG_PLUS_RX_KICK1); +} + + +/* only when things are bad */ +static int cas_post_rxds_ringN(struct cas *cp, int ring, int num) +{ + unsigned int entry, last, count, released; + int cluster; + cas_page_t **page = cp->rx_pages[ring]; + + entry = cp->rx_old[ring]; + + if (netif_msg_intr(cp)) + printk(KERN_DEBUG "%s: rxd[%d] interrupt, done: %d\n", + cp->dev->name, ring, entry); + + cluster = -1; + count = entry & 0x3; + last = RX_DESC_ENTRY(ring, num ? entry + num - 4: entry - 4); + released = 0; + while (entry != last) { + /* make a new buffer if it's still in use */ + if (page_count(page[entry]->buffer) > 1) { + cas_page_t *new = cas_page_dequeue(cp); + if (!new) { + /* let the timer know that we need to + * do this again + */ + cp->cas_flags |= CAS_FLAG_RXD_POST(ring); + if (!timer_pending(&cp->link_timer)) + mod_timer(&cp->link_timer, jiffies + + CAS_LINK_FAST_TIMEOUT); + cp->rx_old[ring] = entry; + cp->rx_last[ring] = num ? num - released : 0; + return -ENOMEM; + } + spin_lock(&cp->rx_inuse_lock); + list_add(&page[entry]->list, &cp->rx_inuse_list); + spin_unlock(&cp->rx_inuse_lock); + cp->init_rxds[ring][entry].buffer = + cpu_to_le64(new->dma_addr); + page[entry] = new; + + } + + if (++count == 4) { + cluster = entry; + count = 0; + } + released++; + entry = RX_DESC_ENTRY(ring, entry + 1); + } + cp->rx_old[ring] = entry; + + if (cluster < 0) + return 0; + + if (ring == 0) + writel(cluster, cp->regs + REG_RX_KICK); + else if ((N_RX_DESC_RINGS > 1) && + (cp->cas_flags & CAS_FLAG_REG_PLUS)) + writel(cluster, cp->regs + REG_PLUS_RX_KICK1); + return 0; +} + + +/* process a completion ring. packets are set up in three basic ways: + * small packets: should be copied header + data in single buffer. + * large packets: header and data in a single buffer. + * split packets: header in a separate buffer from data. + * data may be in multiple pages. data may be > 256 + * bytes but in a single page. + * + * NOTE: RX page posting is done in this routine as well. while there's + * the capability of using multiple RX completion rings, it isn't + * really worthwhile due to the fact that the page posting will + * force serialization on the single descriptor ring. + */ +static int cas_rx_ringN(struct cas *cp, int ring, int budget) +{ + struct cas_rx_comp *rxcs = cp->init_rxcs[ring]; + int entry, drops; + int npackets = 0; + + if (netif_msg_intr(cp)) + printk(KERN_DEBUG "%s: rx[%d] interrupt, done: %d/%d\n", + cp->dev->name, ring, + readl(cp->regs + REG_RX_COMP_HEAD), + cp->rx_new[ring]); + + entry = cp->rx_new[ring]; + drops = 0; + while (1) { + struct cas_rx_comp *rxc = rxcs + entry; + struct sk_buff *skb; + int type, len; + u64 words[4]; + int i, dring; + + words[0] = le64_to_cpu(rxc->word1); + words[1] = le64_to_cpu(rxc->word2); + words[2] = le64_to_cpu(rxc->word3); + words[3] = le64_to_cpu(rxc->word4); + + /* don't touch if still owned by hw */ + type = CAS_VAL(RX_COMP1_TYPE, words[0]); + if (type == 0) + break; + + /* hw hasn't cleared the zero bit yet */ + if (words[3] & RX_COMP4_ZERO) { + break; + } + + /* get info on the packet */ + if (words[3] & (RX_COMP4_LEN_MISMATCH | RX_COMP4_BAD)) { + spin_lock(&cp->stat_lock[ring]); + cp->net_stats[ring].rx_errors++; + if (words[3] & RX_COMP4_LEN_MISMATCH) + cp->net_stats[ring].rx_length_errors++; + if (words[3] & RX_COMP4_BAD) + cp->net_stats[ring].rx_crc_errors++; + spin_unlock(&cp->stat_lock[ring]); + + /* We'll just return it to Cassini. */ + drop_it: + spin_lock(&cp->stat_lock[ring]); + ++cp->net_stats[ring].rx_dropped; + spin_unlock(&cp->stat_lock[ring]); + goto next; + } + + len = cas_rx_process_pkt(cp, rxc, entry, words, &skb); + if (len < 0) { + ++drops; + goto drop_it; + } + + /* see if it's a flow re-assembly or not. the driver + * itself handles release back up. + */ + if (RX_DONT_BATCH || (type == 0x2)) { + /* non-reassm: these always get released */ + cas_skb_release(skb); + } else { + cas_rx_flow_pkt(cp, words, skb); + } + + spin_lock(&cp->stat_lock[ring]); + cp->net_stats[ring].rx_packets++; + cp->net_stats[ring].rx_bytes += len; + spin_unlock(&cp->stat_lock[ring]); + cp->dev->last_rx = jiffies; + + next: + npackets++; + + /* should it be released? */ + if (words[0] & RX_COMP1_RELEASE_HDR) { + i = CAS_VAL(RX_COMP2_HDR_INDEX, words[1]); + dring = CAS_VAL(RX_INDEX_RING, i); + i = CAS_VAL(RX_INDEX_NUM, i); + cas_post_page(cp, dring, i); + } + + if (words[0] & RX_COMP1_RELEASE_DATA) { + i = CAS_VAL(RX_COMP1_DATA_INDEX, words[0]); + dring = CAS_VAL(RX_INDEX_RING, i); + i = CAS_VAL(RX_INDEX_NUM, i); + cas_post_page(cp, dring, i); + } + + if (words[0] & RX_COMP1_RELEASE_NEXT) { + i = CAS_VAL(RX_COMP2_NEXT_INDEX, words[1]); + dring = CAS_VAL(RX_INDEX_RING, i); + i = CAS_VAL(RX_INDEX_NUM, i); + cas_post_page(cp, dring, i); + } + + /* skip to the next entry */ + entry = RX_COMP_ENTRY(ring, entry + 1 + + CAS_VAL(RX_COMP1_SKIP, words[0])); +#ifdef USE_NAPI + if (budget && (npackets >= budget)) + break; +#endif + } + cp->rx_new[ring] = entry; + + if (drops) + printk(KERN_INFO "%s: Memory squeeze, deferring packet.\n", + cp->dev->name); + return npackets; +} + + +/* put completion entries back on the ring */ +static void cas_post_rxcs_ringN(struct net_device *dev, + struct cas *cp, int ring) +{ + struct cas_rx_comp *rxc = cp->init_rxcs[ring]; + int last, entry; + + last = cp->rx_cur[ring]; + entry = cp->rx_new[ring]; + if (netif_msg_intr(cp)) + printk(KERN_DEBUG "%s: rxc[%d] interrupt, done: %d/%d\n", + dev->name, ring, readl(cp->regs + REG_RX_COMP_HEAD), + entry); + + /* zero and re-mark descriptors */ + while (last != entry) { + cas_rxc_init(rxc + last); + last = RX_COMP_ENTRY(ring, last + 1); + } + cp->rx_cur[ring] = last; + + if (ring == 0) + writel(last, cp->regs + REG_RX_COMP_TAIL); + else if (cp->cas_flags & CAS_FLAG_REG_PLUS) + writel(last, cp->regs + REG_PLUS_RX_COMPN_TAIL(ring)); +} + + + +/* cassini can use all four PCI interrupts for the completion ring. + * rings 3 and 4 are identical + */ +#if defined(USE_PCI_INTC) || defined(USE_PCI_INTD) +static inline void cas_handle_irqN(struct net_device *dev, + struct cas *cp, const u32 status, + const int ring) +{ + if (status & (INTR_RX_COMP_FULL_ALT | INTR_RX_COMP_AF_ALT)) + cas_post_rxcs_ringN(dev, cp, ring); +} + +static irqreturn_t cas_interruptN(int irq, void *dev_id, struct pt_regs *regs) +{ + struct net_device *dev = dev_id; + struct cas *cp = netdev_priv(dev); + unsigned long flags; + int ring; + u32 status = readl(cp->regs + REG_PLUS_INTRN_STATUS(ring)); + + /* check for shared irq */ + if (status == 0) + return IRQ_NONE; + + ring = (irq == cp->pci_irq_INTC) ? 2 : 3; + spin_lock_irqsave(&cp->lock, flags); + if (status & INTR_RX_DONE_ALT) { /* handle rx separately */ +#ifdef USE_NAPI + cas_mask_intr(cp); + netif_rx_schedule(dev); +#else + cas_rx_ringN(cp, ring, 0); +#endif + status &= ~INTR_RX_DONE_ALT; + } + + if (status) + cas_handle_irqN(dev, cp, status, ring); + spin_unlock_irqrestore(&cp->lock, flags); + return IRQ_HANDLED; +} +#endif + +#ifdef USE_PCI_INTB +/* everything but rx packets */ +static inline void cas_handle_irq1(struct cas *cp, const u32 status) +{ + if (status & INTR_RX_BUF_UNAVAIL_1) { + /* Frame arrived, no free RX buffers available. + * NOTE: we can get this on a link transition. */ + cas_post_rxds_ringN(cp, 1, 0); + spin_lock(&cp->stat_lock[1]); + cp->net_stats[1].rx_dropped++; + spin_unlock(&cp->stat_lock[1]); + } + + if (status & INTR_RX_BUF_AE_1) + cas_post_rxds_ringN(cp, 1, RX_DESC_RINGN_SIZE(1) - + RX_AE_FREEN_VAL(1)); + + if (status & (INTR_RX_COMP_AF | INTR_RX_COMP_FULL)) + cas_post_rxcs_ringN(cp, 1); +} + +/* ring 2 handles a few more events than 3 and 4 */ +static irqreturn_t cas_interrupt1(int irq, void *dev_id, struct pt_regs *regs) +{ + struct net_device *dev = dev_id; + struct cas *cp = netdev_priv(dev); + unsigned long flags; + u32 status = readl(cp->regs + REG_PLUS_INTRN_STATUS(1)); + + /* check for shared interrupt */ + if (status == 0) + return IRQ_NONE; + + spin_lock_irqsave(&cp->lock, flags); + if (status & INTR_RX_DONE_ALT) { /* handle rx separately */ +#ifdef USE_NAPI + cas_mask_intr(cp); + netif_rx_schedule(dev); +#else + cas_rx_ringN(cp, 1, 0); +#endif + status &= ~INTR_RX_DONE_ALT; + } + if (status) + cas_handle_irq1(cp, status); + spin_unlock_irqrestore(&cp->lock, flags); + return IRQ_HANDLED; +} +#endif + +static inline void cas_handle_irq(struct net_device *dev, + struct cas *cp, const u32 status) +{ + /* housekeeping interrupts */ + if (status & INTR_ERROR_MASK) + cas_abnormal_irq(dev, cp, status); + + if (status & INTR_RX_BUF_UNAVAIL) { + /* Frame arrived, no free RX buffers available. + * NOTE: we can get this on a link transition. + */ + cas_post_rxds_ringN(cp, 0, 0); + spin_lock(&cp->stat_lock[0]); + cp->net_stats[0].rx_dropped++; + spin_unlock(&cp->stat_lock[0]); + } else if (status & INTR_RX_BUF_AE) { + cas_post_rxds_ringN(cp, 0, RX_DESC_RINGN_SIZE(0) - + RX_AE_FREEN_VAL(0)); + } + + if (status & (INTR_RX_COMP_AF | INTR_RX_COMP_FULL)) + cas_post_rxcs_ringN(dev, cp, 0); +} + +static irqreturn_t cas_interrupt(int irq, void *dev_id, struct pt_regs *regs) +{ + struct net_device *dev = dev_id; + struct cas *cp = netdev_priv(dev); + unsigned long flags; + u32 status = readl(cp->regs + REG_INTR_STATUS); + + if (status == 0) + return IRQ_NONE; + + spin_lock_irqsave(&cp->lock, flags); + if (status & (INTR_TX_ALL | INTR_TX_INTME)) { + cas_tx(dev, cp, status); + status &= ~(INTR_TX_ALL | INTR_TX_INTME); + } + + if (status & INTR_RX_DONE) { +#ifdef USE_NAPI + cas_mask_intr(cp); + netif_rx_schedule(dev); +#else + cas_rx_ringN(cp, 0, 0); +#endif + status &= ~INTR_RX_DONE; + } + + if (status) + cas_handle_irq(dev, cp, status); + spin_unlock_irqrestore(&cp->lock, flags); + return IRQ_HANDLED; +} + + +#ifdef USE_NAPI +static int cas_poll(struct net_device *dev, int *budget) +{ + struct cas *cp = netdev_priv(dev); + int i, enable_intr, todo, credits; + u32 status = readl(cp->regs + REG_INTR_STATUS); + unsigned long flags; + + spin_lock_irqsave(&cp->lock, flags); + cas_tx(dev, cp, status); + spin_unlock_irqrestore(&cp->lock, flags); + + /* NAPI rx packets. we spread the credits across all of the + * rxc rings + */ + todo = min(*budget, dev->quota); + + /* to make sure we're fair with the work we loop through each + * ring N_RX_COMP_RING times with a request of + * todo / N_RX_COMP_RINGS + */ + enable_intr = 1; + credits = 0; + for (i = 0; i < N_RX_COMP_RINGS; i++) { + int j; + for (j = 0; j < N_RX_COMP_RINGS; j++) { + credits += cas_rx_ringN(cp, j, todo / N_RX_COMP_RINGS); + if (credits >= todo) { + enable_intr = 0; + goto rx_comp; + } + } + } + +rx_comp: + *budget -= credits; + dev->quota -= credits; + + /* final rx completion */ + spin_lock_irqsave(&cp->lock, flags); + if (status) + cas_handle_irq(dev, cp, status); + +#ifdef USE_PCI_INTB + if (N_RX_COMP_RINGS > 1) { + status = readl(cp->regs + REG_PLUS_INTRN_STATUS(1)); + if (status) + cas_handle_irq1(dev, cp, status); + } +#endif + +#ifdef USE_PCI_INTC + if (N_RX_COMP_RINGS > 2) { + status = readl(cp->regs + REG_PLUS_INTRN_STATUS(2)); + if (status) + cas_handle_irqN(dev, cp, status, 2); + } +#endif + +#ifdef USE_PCI_INTD + if (N_RX_COMP_RINGS > 3) { + status = readl(cp->regs + REG_PLUS_INTRN_STATUS(3)); + if (status) + cas_handle_irqN(dev, cp, status, 3); + } +#endif + spin_unlock_irqrestore(&cp->lock, flags); + if (enable_intr) { + netif_rx_complete(dev); + cas_unmask_intr(cp); + return 0; + } + return 1; +} +#endif + +#ifdef CONFIG_NET_POLL_CONTROLLER +static void cas_netpoll(struct net_device *dev) +{ + struct cas *cp = netdev_priv(dev); + + cas_disable_irq(cp, 0); + cas_interrupt(cp->pdev->irq, dev, NULL); + cas_enable_irq(cp, 0); + +#ifdef USE_PCI_INTB + if (N_RX_COMP_RINGS > 1) { + /* cas_interrupt1(); */ + } +#endif +#ifdef USE_PCI_INTC + if (N_RX_COMP_RINGS > 2) { + /* cas_interruptN(); */ + } +#endif +#ifdef USE_PCI_INTD + if (N_RX_COMP_RINGS > 3) { + /* cas_interruptN(); */ + } +#endif +} +#endif + +static void cas_tx_timeout(struct net_device *dev) +{ + struct cas *cp = netdev_priv(dev); + + printk(KERN_ERR "%s: transmit timed out, resetting\n", dev->name); + if (!cp->hw_running) { + printk("%s: hrm.. hw not running!\n", dev->name); + return; + } + + printk(KERN_ERR "%s: MIF_STATE[%08x]\n", + dev->name, readl(cp->regs + REG_MIF_STATE_MACHINE)); + + printk(KERN_ERR "%s: MAC_STATE[%08x]\n", + dev->name, readl(cp->regs + REG_MAC_STATE_MACHINE)); + + printk(KERN_ERR "%s: TX_STATE[%08x:%08x:%08x] " + "FIFO[%08x:%08x:%08x] SM1[%08x] SM2[%08x]\n", + dev->name, + readl(cp->regs + REG_TX_CFG), + readl(cp->regs + REG_MAC_TX_STATUS), + readl(cp->regs + REG_MAC_TX_CFG), + readl(cp->regs + REG_TX_FIFO_PKT_CNT), + readl(cp->regs + REG_TX_FIFO_WRITE_PTR), + readl(cp->regs + REG_TX_FIFO_READ_PTR), + readl(cp->regs + REG_TX_SM_1), + readl(cp->regs + REG_TX_SM_2)); + + printk(KERN_ERR "%s: RX_STATE[%08x:%08x:%08x]\n", + dev->name, + readl(cp->regs + REG_RX_CFG), + readl(cp->regs + REG_MAC_RX_STATUS), + readl(cp->regs + REG_MAC_RX_CFG)); + + printk(KERN_ERR "%s: HP_STATE[%08x:%08x:%08x:%08x]\n", + dev->name, + readl(cp->regs + REG_HP_STATE_MACHINE), + readl(cp->regs + REG_HP_STATUS0), + readl(cp->regs + REG_HP_STATUS1), + readl(cp->regs + REG_HP_STATUS2)); + +#if 1 + atomic_inc(&cp->reset_task_pending); + atomic_inc(&cp->reset_task_pending_all); + schedule_work(&cp->reset_task); +#else + atomic_set(&cp->reset_task_pending, CAS_RESET_ALL); + schedule_work(&cp->reset_task); +#endif +} + +static inline int cas_intme(int ring, int entry) +{ + /* Algorithm: IRQ every 1/2 of descriptors. */ + if (!(entry & ((TX_DESC_RINGN_SIZE(ring) >> 1) - 1))) + return 1; + return 0; +} + + +static void cas_write_txd(struct cas *cp, int ring, int entry, + dma_addr_t mapping, int len, u64 ctrl, int last) +{ + struct cas_tx_desc *txd = cp->init_txds[ring] + entry; + + ctrl |= CAS_BASE(TX_DESC_BUFLEN, len); + if (cas_intme(ring, entry)) + ctrl |= TX_DESC_INTME; + if (last) + ctrl |= TX_DESC_EOF; + txd->control = cpu_to_le64(ctrl); + txd->buffer = cpu_to_le64(mapping); +} + +static inline void *tx_tiny_buf(struct cas *cp, const int ring, + const int entry) +{ + return cp->tx_tiny_bufs[ring] + TX_TINY_BUF_LEN*entry; +} + +static inline dma_addr_t tx_tiny_map(struct cas *cp, const int ring, + const int entry, const int tentry) +{ + cp->tx_tiny_use[ring][tentry].nbufs++; + cp->tx_tiny_use[ring][entry].used = 1; + return cp->tx_tiny_dvma[ring] + TX_TINY_BUF_LEN*entry; +} + +static inline int cas_xmit_tx_ringN(struct cas *cp, int ring, + struct sk_buff *skb) +{ + struct net_device *dev = cp->dev; + int entry, nr_frags, frag, tabort, tentry; + dma_addr_t mapping; + unsigned long flags; + u64 ctrl; + u32 len; + + spin_lock_irqsave(&cp->tx_lock[ring], flags); + + /* This is a hard error, log it. */ + if (TX_BUFFS_AVAIL(cp, ring) <= + CAS_TABORT(cp)*(skb_shinfo(skb)->nr_frags + 1)) { + netif_stop_queue(dev); + spin_unlock_irqrestore(&cp->tx_lock[ring], flags); + printk(KERN_ERR PFX "%s: BUG! Tx Ring full when " + "queue awake!\n", dev->name); + return 1; + } + + ctrl = 0; + if (skb->ip_summed == CHECKSUM_HW) { + u64 csum_start_off, csum_stuff_off; + + csum_start_off = (u64) (skb->h.raw - skb->data); + csum_stuff_off = (u64) ((skb->h.raw + skb->csum) - skb->data); + + ctrl = TX_DESC_CSUM_EN | + CAS_BASE(TX_DESC_CSUM_START, csum_start_off) | + CAS_BASE(TX_DESC_CSUM_STUFF, csum_stuff_off); + } + + entry = cp->tx_new[ring]; + cp->tx_skbs[ring][entry] = skb; + + nr_frags = skb_shinfo(skb)->nr_frags; + len = skb_headlen(skb); + mapping = pci_map_page(cp->pdev, virt_to_page(skb->data), + offset_in_page(skb->data), len, + PCI_DMA_TODEVICE); + + tentry = entry; + tabort = cas_calc_tabort(cp, (unsigned long) skb->data, len); + if (unlikely(tabort)) { + /* NOTE: len is always > tabort */ + cas_write_txd(cp, ring, entry, mapping, len - tabort, + ctrl | TX_DESC_SOF, 0); + entry = TX_DESC_NEXT(ring, entry); + + memcpy(tx_tiny_buf(cp, ring, entry), skb->data + + len - tabort, tabort); + mapping = tx_tiny_map(cp, ring, entry, tentry); + cas_write_txd(cp, ring, entry, mapping, tabort, ctrl, + (nr_frags == 0)); + } else { + cas_write_txd(cp, ring, entry, mapping, len, ctrl | + TX_DESC_SOF, (nr_frags == 0)); + } + entry = TX_DESC_NEXT(ring, entry); + + for (frag = 0; frag < nr_frags; frag++) { + skb_frag_t *fragp = &skb_shinfo(skb)->frags[frag]; + + len = fragp->size; + mapping = pci_map_page(cp->pdev, fragp->page, + fragp->page_offset, len, + PCI_DMA_TODEVICE); + + tabort = cas_calc_tabort(cp, fragp->page_offset, len); + if (unlikely(tabort)) { + void *addr; + + /* NOTE: len is always > tabort */ + cas_write_txd(cp, ring, entry, mapping, len - tabort, + ctrl, 0); + entry = TX_DESC_NEXT(ring, entry); + + addr = cas_page_map(fragp->page); + memcpy(tx_tiny_buf(cp, ring, entry), + addr + fragp->page_offset + len - tabort, + tabort); + cas_page_unmap(addr); + mapping = tx_tiny_map(cp, ring, entry, tentry); + len = tabort; + } + + cas_write_txd(cp, ring, entry, mapping, len, ctrl, + (frag + 1 == nr_frags)); + entry = TX_DESC_NEXT(ring, entry); + } + + cp->tx_new[ring] = entry; + if (TX_BUFFS_AVAIL(cp, ring) <= CAS_TABORT(cp)*(MAX_SKB_FRAGS + 1)) + netif_stop_queue(dev); + + if (netif_msg_tx_queued(cp)) + printk(KERN_DEBUG "%s: tx[%d] queued, slot %d, skblen %d, " + "avail %d\n", + dev->name, ring, entry, skb->len, + TX_BUFFS_AVAIL(cp, ring)); + writel(entry, cp->regs + REG_TX_KICKN(ring)); + spin_unlock_irqrestore(&cp->tx_lock[ring], flags); + return 0; +} + +static int cas_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct cas *cp = netdev_priv(dev); + + /* this is only used as a load-balancing hint, so it doesn't + * need to be SMP safe + */ + static int ring; + + skb = skb_padto(skb, cp->min_frame_size); + if (!skb) + return 0; + + /* XXX: we need some higher-level QoS hooks to steer packets to + * individual queues. + */ + if (cas_xmit_tx_ringN(cp, ring++ & N_TX_RINGS_MASK, skb)) + return 1; + dev->trans_start = jiffies; + return 0; +} + +static void cas_init_tx_dma(struct cas *cp) +{ + u64 desc_dma = cp->block_dvma; + unsigned long off; + u32 val; + int i; + + /* set up tx completion writeback registers. must be 8-byte aligned */ +#ifdef USE_TX_COMPWB + off = offsetof(struct cas_init_block, tx_compwb); + writel((desc_dma + off) >> 32, cp->regs + REG_TX_COMPWB_DB_HI); + writel((desc_dma + off) & 0xffffffff, cp->regs + REG_TX_COMPWB_DB_LOW); +#endif + + /* enable completion writebacks, enable paced mode, + * disable read pipe, and disable pre-interrupt compwbs + */ + val = TX_CFG_COMPWB_Q1 | TX_CFG_COMPWB_Q2 | + TX_CFG_COMPWB_Q3 | TX_CFG_COMPWB_Q4 | + TX_CFG_DMA_RDPIPE_DIS | TX_CFG_PACED_MODE | + TX_CFG_INTR_COMPWB_DIS; + + /* write out tx ring info and tx desc bases */ + for (i = 0; i < MAX_TX_RINGS; i++) { + off = (unsigned long) cp->init_txds[i] - + (unsigned long) cp->init_block; + + val |= CAS_TX_RINGN_BASE(i); + writel((desc_dma + off) >> 32, cp->regs + REG_TX_DBN_HI(i)); + writel((desc_dma + off) & 0xffffffff, cp->regs + + REG_TX_DBN_LOW(i)); + /* don't zero out the kick register here as the system + * will wedge + */ + } + writel(val, cp->regs + REG_TX_CFG); + + /* program max burst sizes. these numbers should be different + * if doing QoS. + */ +#ifdef USE_QOS + writel(0x800, cp->regs + REG_TX_MAXBURST_0); + writel(0x1600, cp->regs + REG_TX_MAXBURST_1); + writel(0x2400, cp->regs + REG_TX_MAXBURST_2); + writel(0x4800, cp->regs + REG_TX_MAXBURST_3); +#else + writel(0x800, cp->regs + REG_TX_MAXBURST_0); + writel(0x800, cp->regs + REG_TX_MAXBURST_1); + writel(0x800, cp->regs + REG_TX_MAXBURST_2); + writel(0x800, cp->regs + REG_TX_MAXBURST_3); +#endif +} + +/* Must be invoked under cp->lock. */ +static inline void cas_init_dma(struct cas *cp) +{ + cas_init_tx_dma(cp); + cas_init_rx_dma(cp); +} + +/* Must be invoked under cp->lock. */ +static u32 cas_setup_multicast(struct cas *cp) +{ + u32 rxcfg = 0; + int i; + + if (cp->dev->flags & IFF_PROMISC) { + rxcfg |= MAC_RX_CFG_PROMISC_EN; + + } else if (cp->dev->flags & IFF_ALLMULTI) { + for (i=0; i < 16; i++) + writel(0xFFFF, cp->regs + REG_MAC_HASH_TABLEN(i)); + rxcfg |= MAC_RX_CFG_HASH_FILTER_EN; + + } else { + u16 hash_table[16]; + u32 crc; + struct dev_mc_list *dmi = cp->dev->mc_list; + int i; + + /* use the alternate mac address registers for the + * first 15 multicast addresses + */ + for (i = 1; i <= CAS_MC_EXACT_MATCH_SIZE; i++) { + if (!dmi) { + writel(0x0, cp->regs + REG_MAC_ADDRN(i*3 + 0)); + writel(0x0, cp->regs + REG_MAC_ADDRN(i*3 + 1)); + writel(0x0, cp->regs + REG_MAC_ADDRN(i*3 + 2)); + continue; + } + writel((dmi->dmi_addr[4] << 8) | dmi->dmi_addr[5], + cp->regs + REG_MAC_ADDRN(i*3 + 0)); + writel((dmi->dmi_addr[2] << 8) | dmi->dmi_addr[3], + cp->regs + REG_MAC_ADDRN(i*3 + 1)); + writel((dmi->dmi_addr[0] << 8) | dmi->dmi_addr[1], + cp->regs + REG_MAC_ADDRN(i*3 + 2)); + dmi = dmi->next; + } + + /* use hw hash table for the next series of + * multicast addresses + */ + memset(hash_table, 0, sizeof(hash_table)); + while (dmi) { + crc = ether_crc_le(ETH_ALEN, dmi->dmi_addr); + crc >>= 24; + hash_table[crc >> 4] |= 1 << (15 - (crc & 0xf)); + dmi = dmi->next; + } + for (i=0; i < 16; i++) + writel(hash_table[i], cp->regs + + REG_MAC_HASH_TABLEN(i)); + rxcfg |= MAC_RX_CFG_HASH_FILTER_EN; + } + + return rxcfg; +} + +/* must be invoked under cp->stat_lock[N_TX_RINGS] */ +static void cas_clear_mac_err(struct cas *cp) +{ + writel(0, cp->regs + REG_MAC_COLL_NORMAL); + writel(0, cp->regs + REG_MAC_COLL_FIRST); + writel(0, cp->regs + REG_MAC_COLL_EXCESS); + writel(0, cp->regs + REG_MAC_COLL_LATE); + writel(0, cp->regs + REG_MAC_TIMER_DEFER); + writel(0, cp->regs + REG_MAC_ATTEMPTS_PEAK); + writel(0, cp->regs + REG_MAC_RECV_FRAME); + writel(0, cp->regs + REG_MAC_LEN_ERR); + writel(0, cp->regs + REG_MAC_ALIGN_ERR); + writel(0, cp->regs + REG_MAC_FCS_ERR); + writel(0, cp->regs + REG_MAC_RX_CODE_ERR); +} + + +static void cas_mac_reset(struct cas *cp) +{ + int i; + + /* do both TX and RX reset */ + writel(0x1, cp->regs + REG_MAC_TX_RESET); + writel(0x1, cp->regs + REG_MAC_RX_RESET); + + /* wait for TX */ + i = STOP_TRIES; + while (i-- > 0) { + if (readl(cp->regs + REG_MAC_TX_RESET) == 0) + break; + udelay(10); + } + + /* wait for RX */ + i = STOP_TRIES; + while (i-- > 0) { + if (readl(cp->regs + REG_MAC_RX_RESET) == 0) + break; + udelay(10); + } + + if (readl(cp->regs + REG_MAC_TX_RESET) | + readl(cp->regs + REG_MAC_RX_RESET)) + printk(KERN_ERR "%s: mac tx[%d]/rx[%d] reset failed [%08x]\n", + cp->dev->name, readl(cp->regs + REG_MAC_TX_RESET), + readl(cp->regs + REG_MAC_RX_RESET), + readl(cp->regs + REG_MAC_STATE_MACHINE)); +} + + +/* Must be invoked under cp->lock. */ +static void cas_init_mac(struct cas *cp) +{ + unsigned char *e = &cp->dev->dev_addr[0]; + int i; +#ifdef CONFIG_CASSINI_MULTICAST_REG_WRITE + u32 rxcfg; +#endif + cas_mac_reset(cp); + + /* setup core arbitration weight register */ + writel(CAWR_RR_DIS, cp->regs + REG_CAWR); + + /* XXX Use pci_dma_burst_advice() */ +#if !defined(CONFIG_SPARC64) && !defined(CONFIG_ALPHA) + /* set the infinite burst register for chips that don't have + * pci issues. + */ + if ((cp->cas_flags & CAS_FLAG_TARGET_ABORT) == 0) + writel(INF_BURST_EN, cp->regs + REG_INF_BURST); +#endif + + writel(0x1BF0, cp->regs + REG_MAC_SEND_PAUSE); + + writel(0x00, cp->regs + REG_MAC_IPG0); + writel(0x08, cp->regs + REG_MAC_IPG1); + writel(0x04, cp->regs + REG_MAC_IPG2); + + /* change later for 802.3z */ + writel(0x40, cp->regs + REG_MAC_SLOT_TIME); + + /* min frame + FCS */ + writel(ETH_ZLEN + 4, cp->regs + REG_MAC_FRAMESIZE_MIN); + + /* Ethernet payload + header + FCS + optional VLAN tag. NOTE: we + * specify the maximum frame size to prevent RX tag errors on + * oversized frames. + */ + writel(CAS_BASE(MAC_FRAMESIZE_MAX_BURST, 0x2000) | + CAS_BASE(MAC_FRAMESIZE_MAX_FRAME, + (CAS_MAX_MTU + ETH_HLEN + 4 + 4)), + cp->regs + REG_MAC_FRAMESIZE_MAX); + + /* NOTE: crc_size is used as a surrogate for half-duplex. + * workaround saturn half-duplex issue by increasing preamble + * size to 65 bytes. + */ + if ((cp->cas_flags & CAS_FLAG_SATURN) && cp->crc_size) + writel(0x41, cp->regs + REG_MAC_PA_SIZE); + else + writel(0x07, cp->regs + REG_MAC_PA_SIZE); + writel(0x04, cp->regs + REG_MAC_JAM_SIZE); + writel(0x10, cp->regs + REG_MAC_ATTEMPT_LIMIT); + writel(0x8808, cp->regs + REG_MAC_CTRL_TYPE); + + writel((e[5] | (e[4] << 8)) & 0x3ff, cp->regs + REG_MAC_RANDOM_SEED); + + writel(0, cp->regs + REG_MAC_ADDR_FILTER0); + writel(0, cp->regs + REG_MAC_ADDR_FILTER1); + writel(0, cp->regs + REG_MAC_ADDR_FILTER2); + writel(0, cp->regs + REG_MAC_ADDR_FILTER2_1_MASK); + writel(0, cp->regs + REG_MAC_ADDR_FILTER0_MASK); + + /* setup mac address in perfect filter array */ + for (i = 0; i < 45; i++) + writel(0x0, cp->regs + REG_MAC_ADDRN(i)); + + writel((e[4] << 8) | e[5], cp->regs + REG_MAC_ADDRN(0)); + writel((e[2] << 8) | e[3], cp->regs + REG_MAC_ADDRN(1)); + writel((e[0] << 8) | e[1], cp->regs + REG_MAC_ADDRN(2)); + + writel(0x0001, cp->regs + REG_MAC_ADDRN(42)); + writel(0xc200, cp->regs + REG_MAC_ADDRN(43)); + writel(0x0180, cp->regs + REG_MAC_ADDRN(44)); + +#ifndef CONFIG_CASSINI_MULTICAST_REG_WRITE + cp->mac_rx_cfg = cas_setup_multicast(cp); +#else + /* WTZ: Do what Adrian did in cas_set_multicast. Doing + * a writel does not seem to be necessary because Cassini + * seems to preserve the configuration when we do the reset. + * If the chip is in trouble, though, it is not clear if we + * can really count on this behavior. cas_set_multicast uses + * spin_lock_irqsave, but we are called only in cas_init_hw and + * cas_init_hw is protected by cas_lock_all, which calls + * spin_lock_irq (so it doesn't need to save the flags, and + * we should be OK for the writel, as that is the only + * difference). + */ + cp->mac_rx_cfg = rxcfg = cas_setup_multicast(cp); + writel(rxcfg, cp->regs + REG_MAC_RX_CFG); +#endif + spin_lock(&cp->stat_lock[N_TX_RINGS]); + cas_clear_mac_err(cp); + spin_unlock(&cp->stat_lock[N_TX_RINGS]); + + /* Setup MAC interrupts. We want to get all of the interesting + * counter expiration events, but we do not want to hear about + * normal rx/tx as the DMA engine tells us that. + */ + writel(MAC_TX_FRAME_XMIT, cp->regs + REG_MAC_TX_MASK); + writel(MAC_RX_FRAME_RECV, cp->regs + REG_MAC_RX_MASK); + + /* Don't enable even the PAUSE interrupts for now, we + * make no use of those events other than to record them. + */ + writel(0xffffffff, cp->regs + REG_MAC_CTRL_MASK); +} + +/* Must be invoked under cp->lock. */ +static void cas_init_pause_thresholds(struct cas *cp) +{ + /* Calculate pause thresholds. Setting the OFF threshold to the + * full RX fifo size effectively disables PAUSE generation + */ + if (cp->rx_fifo_size <= (2 * 1024)) { + cp->rx_pause_off = cp->rx_pause_on = cp->rx_fifo_size; + } else { + int max_frame = (cp->dev->mtu + ETH_HLEN + 4 + 4 + 64) & ~63; + if (max_frame * 3 > cp->rx_fifo_size) { + cp->rx_pause_off = 7104; + cp->rx_pause_on = 960; + } else { + int off = (cp->rx_fifo_size - (max_frame * 2)); + int on = off - max_frame; + cp->rx_pause_off = off; + cp->rx_pause_on = on; + } + } +} + +static int cas_vpd_match(const void __iomem *p, const char *str) +{ + int len = strlen(str) + 1; + int i; + + for (i = 0; i < len; i++) { + if (readb(p + i) != str[i]) + return 0; + } + return 1; +} + + +/* get the mac address by reading the vpd information in the rom. + * also get the phy type and determine if there's an entropy generator. + * NOTE: this is a bit convoluted for the following reasons: + * 1) vpd info has order-dependent mac addresses for multinic cards + * 2) the only way to determine the nic order is to use the slot + * number. + * 3) fiber cards don't have bridges, so their slot numbers don't + * mean anything. + * 4) we don't actually know we have a fiber card until after + * the mac addresses are parsed. + */ +static int cas_get_vpd_info(struct cas *cp, unsigned char *dev_addr, + const int offset) +{ + void __iomem *p = cp->regs + REG_EXPANSION_ROM_RUN_START; + void __iomem *base, *kstart; + int i, len; + int found = 0; +#define VPD_FOUND_MAC 0x01 +#define VPD_FOUND_PHY 0x02 + + int phy_type = CAS_PHY_MII_MDIO0; /* default phy type */ + int mac_off = 0; + + /* give us access to the PROM */ + writel(BIM_LOCAL_DEV_PROM | BIM_LOCAL_DEV_PAD, + cp->regs + REG_BIM_LOCAL_DEV_EN); + + /* check for an expansion rom */ + if (readb(p) != 0x55 || readb(p + 1) != 0xaa) + goto use_random_mac_addr; + + /* search for beginning of vpd */ + base = 0; + for (i = 2; i < EXPANSION_ROM_SIZE; i++) { + /* check for PCIR */ + if ((readb(p + i + 0) == 0x50) && + (readb(p + i + 1) == 0x43) && + (readb(p + i + 2) == 0x49) && + (readb(p + i + 3) == 0x52)) { + base = p + (readb(p + i + 8) | + (readb(p + i + 9) << 8)); + break; + } + } + + if (!base || (readb(base) != 0x82)) + goto use_random_mac_addr; + + i = (readb(base + 1) | (readb(base + 2) << 8)) + 3; + while (i < EXPANSION_ROM_SIZE) { + if (readb(base + i) != 0x90) /* no vpd found */ + goto use_random_mac_addr; + + /* found a vpd field */ + len = readb(base + i + 1) | (readb(base + i + 2) << 8); + + /* extract keywords */ + kstart = base + i + 3; + p = kstart; + while ((p - kstart) < len) { + int klen = readb(p + 2); + int j; + char type; + + p += 3; + + /* look for the following things: + * -- correct length == 29 + * 3 (type) + 2 (size) + + * 18 (strlen("local-mac-address") + 1) + + * 6 (mac addr) + * -- VPD Instance 'I' + * -- VPD Type Bytes 'B' + * -- VPD data length == 6 + * -- property string == local-mac-address + * + * -- correct length == 24 + * 3 (type) + 2 (size) + + * 12 (strlen("entropy-dev") + 1) + + * 7 (strlen("vms110") + 1) + * -- VPD Instance 'I' + * -- VPD Type String 'B' + * -- VPD data length == 7 + * -- property string == entropy-dev + * + * -- correct length == 18 + * 3 (type) + 2 (size) + + * 9 (strlen("phy-type") + 1) + + * 4 (strlen("pcs") + 1) + * -- VPD Instance 'I' + * -- VPD Type String 'S' + * -- VPD data length == 4 + * -- property string == phy-type + * + * -- correct length == 23 + * 3 (type) + 2 (size) + + * 14 (strlen("phy-interface") + 1) + + * 4 (strlen("pcs") + 1) + * -- VPD Instance 'I' + * -- VPD Type String 'S' + * -- VPD data length == 4 + * -- property string == phy-interface + */ + if (readb(p) != 'I') + goto next; + + /* finally, check string and length */ + type = readb(p + 3); + if (type == 'B') { + if ((klen == 29) && readb(p + 4) == 6 && + cas_vpd_match(p + 5, + "local-mac-address")) { + if (mac_off++ > offset) + goto next; + + /* set mac address */ + for (j = 0; j < 6; j++) + dev_addr[j] = + readb(p + 23 + j); + goto found_mac; + } + } + + if (type != 'S') + goto next; + +#ifdef USE_ENTROPY_DEV + if ((klen == 24) && + cas_vpd_match(p + 5, "entropy-dev") && + cas_vpd_match(p + 17, "vms110")) { + cp->cas_flags |= CAS_FLAG_ENTROPY_DEV; + goto next; + } +#endif + + if (found & VPD_FOUND_PHY) + goto next; + + if ((klen == 18) && readb(p + 4) == 4 && + cas_vpd_match(p + 5, "phy-type")) { + if (cas_vpd_match(p + 14, "pcs")) { + phy_type = CAS_PHY_SERDES; + goto found_phy; + } + } + + if ((klen == 23) && readb(p + 4) == 4 && + cas_vpd_match(p + 5, "phy-interface")) { + if (cas_vpd_match(p + 19, "pcs")) { + phy_type = CAS_PHY_SERDES; + goto found_phy; + } + } +found_mac: + found |= VPD_FOUND_MAC; + goto next; + +found_phy: + found |= VPD_FOUND_PHY; + +next: + p += klen; + } + i += len + 3; + } + +use_random_mac_addr: + if (found & VPD_FOUND_MAC) + goto done; + + /* Sun MAC prefix then 3 random bytes. */ + printk(PFX "MAC address not found in ROM VPD\n"); + dev_addr[0] = 0x08; + dev_addr[1] = 0x00; + dev_addr[2] = 0x20; + get_random_bytes(dev_addr + 3, 3); + +done: + writel(0, cp->regs + REG_BIM_LOCAL_DEV_EN); + return phy_type; +} + +/* check pci invariants */ +static void cas_check_pci_invariants(struct cas *cp) +{ + struct pci_dev *pdev = cp->pdev; + u8 rev; + + cp->cas_flags = 0; + pci_read_config_byte(pdev, PCI_REVISION_ID, &rev); + if ((pdev->vendor == PCI_VENDOR_ID_SUN) && + (pdev->device == PCI_DEVICE_ID_SUN_CASSINI)) { + if (rev >= CAS_ID_REVPLUS) + cp->cas_flags |= CAS_FLAG_REG_PLUS; + if (rev < CAS_ID_REVPLUS02u) + cp->cas_flags |= CAS_FLAG_TARGET_ABORT; + + /* Original Cassini supports HW CSUM, but it's not + * enabled by default as it can trigger TX hangs. + */ + if (rev < CAS_ID_REV2) + cp->cas_flags |= CAS_FLAG_NO_HW_CSUM; + } else { + /* Only sun has original cassini chips. */ + cp->cas_flags |= CAS_FLAG_REG_PLUS; + + /* We use a flag because the same phy might be externally + * connected. + */ + if ((pdev->vendor == PCI_VENDOR_ID_NS) && + (pdev->device == PCI_DEVICE_ID_NS_SATURN)) + cp->cas_flags |= CAS_FLAG_SATURN; + } +} + + +static int cas_check_invariants(struct cas *cp) +{ + struct pci_dev *pdev = cp->pdev; + u32 cfg; + int i; + + /* get page size for rx buffers. */ + cp->page_order = 0; +#ifdef USE_PAGE_ORDER + if (PAGE_SHIFT < CAS_JUMBO_PAGE_SHIFT) { + /* see if we can allocate larger pages */ + struct page *page = alloc_pages(GFP_ATOMIC, + CAS_JUMBO_PAGE_SHIFT - + PAGE_SHIFT); + if (page) { + __free_pages(page, CAS_JUMBO_PAGE_SHIFT - PAGE_SHIFT); + cp->page_order = CAS_JUMBO_PAGE_SHIFT - PAGE_SHIFT; + } else { + printk(PFX "MTU limited to %d bytes\n", CAS_MAX_MTU); + } + } +#endif + cp->page_size = (PAGE_SIZE << cp->page_order); + + /* Fetch the FIFO configurations. */ + cp->tx_fifo_size = readl(cp->regs + REG_TX_FIFO_SIZE) * 64; + cp->rx_fifo_size = RX_FIFO_SIZE; + + /* finish phy determination. MDIO1 takes precedence over MDIO0 if + * they're both connected. + */ + cp->phy_type = cas_get_vpd_info(cp, cp->dev->dev_addr, + PCI_SLOT(pdev->devfn)); + if (cp->phy_type & CAS_PHY_SERDES) { + cp->cas_flags |= CAS_FLAG_1000MB_CAP; + return 0; /* no more checking needed */ + } + + /* MII */ + cfg = readl(cp->regs + REG_MIF_CFG); + if (cfg & MIF_CFG_MDIO_1) { + cp->phy_type = CAS_PHY_MII_MDIO1; + } else if (cfg & MIF_CFG_MDIO_0) { + cp->phy_type = CAS_PHY_MII_MDIO0; + } + + cas_mif_poll(cp, 0); + writel(PCS_DATAPATH_MODE_MII, cp->regs + REG_PCS_DATAPATH_MODE); + + for (i = 0; i < 32; i++) { + u32 phy_id; + int j; + + for (j = 0; j < 3; j++) { + cp->phy_addr = i; + phy_id = cas_phy_read(cp, MII_PHYSID1) << 16; + phy_id |= cas_phy_read(cp, MII_PHYSID2); + if (phy_id && (phy_id != 0xFFFFFFFF)) { + cp->phy_id = phy_id; + goto done; + } + } + } + printk(KERN_ERR PFX "MII phy did not respond [%08x]\n", + readl(cp->regs + REG_MIF_STATE_MACHINE)); + return -1; + +done: + /* see if we can do gigabit */ + cfg = cas_phy_read(cp, MII_BMSR); + if ((cfg & CAS_BMSR_1000_EXTEND) && + cas_phy_read(cp, CAS_MII_1000_EXTEND)) + cp->cas_flags |= CAS_FLAG_1000MB_CAP; + return 0; +} + +/* Must be invoked under cp->lock. */ +static inline void cas_start_dma(struct cas *cp) +{ + int i; + u32 val; + int txfailed = 0; + + /* enable dma */ + val = readl(cp->regs + REG_TX_CFG) | TX_CFG_DMA_EN; + writel(val, cp->regs + REG_TX_CFG); + val = readl(cp->regs + REG_RX_CFG) | RX_CFG_DMA_EN; + writel(val, cp->regs + REG_RX_CFG); + + /* enable the mac */ + val = readl(cp->regs + REG_MAC_TX_CFG) | MAC_TX_CFG_EN; + writel(val, cp->regs + REG_MAC_TX_CFG); + val = readl(cp->regs + REG_MAC_RX_CFG) | MAC_RX_CFG_EN; + writel(val, cp->regs + REG_MAC_RX_CFG); + + i = STOP_TRIES; + while (i-- > 0) { + val = readl(cp->regs + REG_MAC_TX_CFG); + if ((val & MAC_TX_CFG_EN)) + break; + udelay(10); + } + if (i < 0) txfailed = 1; + i = STOP_TRIES; + while (i-- > 0) { + val = readl(cp->regs + REG_MAC_RX_CFG); + if ((val & MAC_RX_CFG_EN)) { + if (txfailed) { + printk(KERN_ERR + "%s: enabling mac failed [tx:%08x:%08x].\n", + cp->dev->name, + readl(cp->regs + REG_MIF_STATE_MACHINE), + readl(cp->regs + REG_MAC_STATE_MACHINE)); + } + goto enable_rx_done; + } + udelay(10); + } + printk(KERN_ERR "%s: enabling mac failed [%s:%08x:%08x].\n", + cp->dev->name, + (txfailed? "tx,rx":"rx"), + readl(cp->regs + REG_MIF_STATE_MACHINE), + readl(cp->regs + REG_MAC_STATE_MACHINE)); + +enable_rx_done: + cas_unmask_intr(cp); /* enable interrupts */ + writel(RX_DESC_RINGN_SIZE(0) - 4, cp->regs + REG_RX_KICK); + writel(0, cp->regs + REG_RX_COMP_TAIL); + + if (cp->cas_flags & CAS_FLAG_REG_PLUS) { + if (N_RX_DESC_RINGS > 1) + writel(RX_DESC_RINGN_SIZE(1) - 4, + cp->regs + REG_PLUS_RX_KICK1); + + for (i = 1; i < N_RX_COMP_RINGS; i++) + writel(0, cp->regs + REG_PLUS_RX_COMPN_TAIL(i)); + } +} + +/* Must be invoked under cp->lock. */ +static void cas_read_pcs_link_mode(struct cas *cp, int *fd, int *spd, + int *pause) +{ + u32 val = readl(cp->regs + REG_PCS_MII_LPA); + *fd = (val & PCS_MII_LPA_FD) ? 1 : 0; + *pause = (val & PCS_MII_LPA_SYM_PAUSE) ? 0x01 : 0x00; + if (val & PCS_MII_LPA_ASYM_PAUSE) + *pause |= 0x10; + *spd = 1000; +} + +/* Must be invoked under cp->lock. */ +static void cas_read_mii_link_mode(struct cas *cp, int *fd, int *spd, + int *pause) +{ + u32 val; + + *fd = 0; + *spd = 10; + *pause = 0; + + /* use GMII registers */ + val = cas_phy_read(cp, MII_LPA); + if (val & CAS_LPA_PAUSE) + *pause = 0x01; + + if (val & CAS_LPA_ASYM_PAUSE) + *pause |= 0x10; + + if (val & LPA_DUPLEX) + *fd = 1; + if (val & LPA_100) + *spd = 100; + + if (cp->cas_flags & CAS_FLAG_1000MB_CAP) { + val = cas_phy_read(cp, CAS_MII_1000_STATUS); + if (val & (CAS_LPA_1000FULL | CAS_LPA_1000HALF)) + *spd = 1000; + if (val & CAS_LPA_1000FULL) + *fd = 1; + } +} + +/* A link-up condition has occurred, initialize and enable the + * rest of the chip. + * + * Must be invoked under cp->lock. + */ +static void cas_set_link_modes(struct cas *cp) +{ + u32 val; + int full_duplex, speed, pause; + + full_duplex = 0; + speed = 10; + pause = 0; + + if (CAS_PHY_MII(cp->phy_type)) { + cas_mif_poll(cp, 0); + val = cas_phy_read(cp, MII_BMCR); + if (val & BMCR_ANENABLE) { + cas_read_mii_link_mode(cp, &full_duplex, &speed, + &pause); + } else { + if (val & BMCR_FULLDPLX) + full_duplex = 1; + + if (val & BMCR_SPEED100) + speed = 100; + else if (val & CAS_BMCR_SPEED1000) + speed = (cp->cas_flags & CAS_FLAG_1000MB_CAP) ? + 1000 : 100; + } + cas_mif_poll(cp, 1); + + } else { + val = readl(cp->regs + REG_PCS_MII_CTRL); + cas_read_pcs_link_mode(cp, &full_duplex, &speed, &pause); + if ((val & PCS_MII_AUTONEG_EN) == 0) { + if (val & PCS_MII_CTRL_DUPLEX) + full_duplex = 1; + } + } + + if (netif_msg_link(cp)) + printk(KERN_INFO "%s: Link up at %d Mbps, %s-duplex.\n", + cp->dev->name, speed, (full_duplex ? "full" : "half")); + + val = MAC_XIF_TX_MII_OUTPUT_EN | MAC_XIF_LINK_LED; + if (CAS_PHY_MII(cp->phy_type)) { + val |= MAC_XIF_MII_BUFFER_OUTPUT_EN; + if (!full_duplex) + val |= MAC_XIF_DISABLE_ECHO; + } + if (full_duplex) + val |= MAC_XIF_FDPLX_LED; + if (speed == 1000) + val |= MAC_XIF_GMII_MODE; + writel(val, cp->regs + REG_MAC_XIF_CFG); + + /* deal with carrier and collision detect. */ + val = MAC_TX_CFG_IPG_EN; + if (full_duplex) { + val |= MAC_TX_CFG_IGNORE_CARRIER; + val |= MAC_TX_CFG_IGNORE_COLL; + } else { +#ifndef USE_CSMA_CD_PROTO + val |= MAC_TX_CFG_NEVER_GIVE_UP_EN; + val |= MAC_TX_CFG_NEVER_GIVE_UP_LIM; +#endif + } + /* val now set up for REG_MAC_TX_CFG */ + + /* If gigabit and half-duplex, enable carrier extension + * mode. increase slot time to 512 bytes as well. + * else, disable it and make sure slot time is 64 bytes. + * also activate checksum bug workaround + */ + if ((speed == 1000) && !full_duplex) { + writel(val | MAC_TX_CFG_CARRIER_EXTEND, + cp->regs + REG_MAC_TX_CFG); + + val = readl(cp->regs + REG_MAC_RX_CFG); + val &= ~MAC_RX_CFG_STRIP_FCS; /* checksum workaround */ + writel(val | MAC_RX_CFG_CARRIER_EXTEND, + cp->regs + REG_MAC_RX_CFG); + + writel(0x200, cp->regs + REG_MAC_SLOT_TIME); + + cp->crc_size = 4; + /* minimum size gigabit frame at half duplex */ + cp->min_frame_size = CAS_1000MB_MIN_FRAME; + + } else { + writel(val, cp->regs + REG_MAC_TX_CFG); + + /* checksum bug workaround. don't strip FCS when in + * half-duplex mode + */ + val = readl(cp->regs + REG_MAC_RX_CFG); + if (full_duplex) { + val |= MAC_RX_CFG_STRIP_FCS; + cp->crc_size = 0; + cp->min_frame_size = CAS_MIN_MTU; + } else { + val &= ~MAC_RX_CFG_STRIP_FCS; + cp->crc_size = 4; + cp->min_frame_size = CAS_MIN_FRAME; + } + writel(val & ~MAC_RX_CFG_CARRIER_EXTEND, + cp->regs + REG_MAC_RX_CFG); + writel(0x40, cp->regs + REG_MAC_SLOT_TIME); + } + + if (netif_msg_link(cp)) { + if (pause & 0x01) { + printk(KERN_INFO "%s: Pause is enabled " + "(rxfifo: %d off: %d on: %d)\n", + cp->dev->name, + cp->rx_fifo_size, + cp->rx_pause_off, + cp->rx_pause_on); + } else if (pause & 0x10) { + printk(KERN_INFO "%s: TX pause enabled\n", + cp->dev->name); + } else { + printk(KERN_INFO "%s: Pause is disabled\n", + cp->dev->name); + } + } + + val = readl(cp->regs + REG_MAC_CTRL_CFG); + val &= ~(MAC_CTRL_CFG_SEND_PAUSE_EN | MAC_CTRL_CFG_RECV_PAUSE_EN); + if (pause) { /* symmetric or asymmetric pause */ + val |= MAC_CTRL_CFG_SEND_PAUSE_EN; + if (pause & 0x01) { /* symmetric pause */ + val |= MAC_CTRL_CFG_RECV_PAUSE_EN; + } + } + writel(val, cp->regs + REG_MAC_CTRL_CFG); + cas_start_dma(cp); +} + +/* Must be invoked under cp->lock. */ +static void cas_init_hw(struct cas *cp, int restart_link) +{ + if (restart_link) + cas_phy_init(cp); + + cas_init_pause_thresholds(cp); + cas_init_mac(cp); + cas_init_dma(cp); + + if (restart_link) { + /* Default aneg parameters */ + cp->timer_ticks = 0; + cas_begin_auto_negotiation(cp, NULL); + } else if (cp->lstate == link_up) { + cas_set_link_modes(cp); + netif_carrier_on(cp->dev); + } +} + +/* Must be invoked under cp->lock. on earlier cassini boards, + * SOFT_0 is tied to PCI reset. we use this to force a pci reset, + * let it settle out, and then restore pci state. + */ +static void cas_hard_reset(struct cas *cp) +{ + writel(BIM_LOCAL_DEV_SOFT_0, cp->regs + REG_BIM_LOCAL_DEV_EN); + udelay(20); + pci_restore_state(cp->pdev); +} + + +static void cas_global_reset(struct cas *cp, int blkflag) +{ + int limit; + + /* issue a global reset. don't use RSTOUT. */ + if (blkflag && !CAS_PHY_MII(cp->phy_type)) { + /* For PCS, when the blkflag is set, we should set the + * SW_REST_BLOCK_PCS_SLINK bit to prevent the results of + * the last autonegotiation from being cleared. We'll + * need some special handling if the chip is set into a + * loopback mode. + */ + writel((SW_RESET_TX | SW_RESET_RX | SW_RESET_BLOCK_PCS_SLINK), + cp->regs + REG_SW_RESET); + } else { + writel(SW_RESET_TX | SW_RESET_RX, cp->regs + REG_SW_RESET); + } + + /* need to wait at least 3ms before polling register */ + mdelay(3); + + limit = STOP_TRIES; + while (limit-- > 0) { + u32 val = readl(cp->regs + REG_SW_RESET); + if ((val & (SW_RESET_TX | SW_RESET_RX)) == 0) + goto done; + udelay(10); + } + printk(KERN_ERR "%s: sw reset failed.\n", cp->dev->name); + +done: + /* enable various BIM interrupts */ + writel(BIM_CFG_DPAR_INTR_ENABLE | BIM_CFG_RMA_INTR_ENABLE | + BIM_CFG_RTA_INTR_ENABLE, cp->regs + REG_BIM_CFG); + + /* clear out pci error status mask for handled errors. + * we don't deal with DMA counter overflows as they happen + * all the time. + */ + writel(0xFFFFFFFFU & ~(PCI_ERR_BADACK | PCI_ERR_DTRTO | + PCI_ERR_OTHER | PCI_ERR_BIM_DMA_WRITE | + PCI_ERR_BIM_DMA_READ), cp->regs + + REG_PCI_ERR_STATUS_MASK); + + /* set up for MII by default to address mac rx reset timeout + * issue + */ + writel(PCS_DATAPATH_MODE_MII, cp->regs + REG_PCS_DATAPATH_MODE); +} + +static void cas_reset(struct cas *cp, int blkflag) +{ + u32 val; + + cas_mask_intr(cp); + cas_global_reset(cp, blkflag); + cas_mac_reset(cp); + cas_entropy_reset(cp); + + /* disable dma engines. */ + val = readl(cp->regs + REG_TX_CFG); + val &= ~TX_CFG_DMA_EN; + writel(val, cp->regs + REG_TX_CFG); + + val = readl(cp->regs + REG_RX_CFG); + val &= ~RX_CFG_DMA_EN; + writel(val, cp->regs + REG_RX_CFG); + + /* program header parser */ + if ((cp->cas_flags & CAS_FLAG_TARGET_ABORT) || + (CAS_HP_ALT_FIRMWARE == cas_prog_null)) { + cas_load_firmware(cp, CAS_HP_FIRMWARE); + } else { + cas_load_firmware(cp, CAS_HP_ALT_FIRMWARE); + } + + /* clear out error registers */ + spin_lock(&cp->stat_lock[N_TX_RINGS]); + cas_clear_mac_err(cp); + spin_unlock(&cp->stat_lock[N_TX_RINGS]); +} + +/* Shut down the chip, must be called with pm_sem held. */ +static void cas_shutdown(struct cas *cp) +{ + unsigned long flags; + + /* Make us not-running to avoid timers respawning */ + cp->hw_running = 0; + + del_timer_sync(&cp->link_timer); + + /* Stop the reset task */ +#if 0 + while (atomic_read(&cp->reset_task_pending_mtu) || + atomic_read(&cp->reset_task_pending_spare) || + atomic_read(&cp->reset_task_pending_all)) + schedule(); + +#else + while (atomic_read(&cp->reset_task_pending)) + schedule(); +#endif + /* Actually stop the chip */ + cas_lock_all_save(cp, flags); + cas_reset(cp, 0); + if (cp->cas_flags & CAS_FLAG_SATURN) + cas_phy_powerdown(cp); + cas_unlock_all_restore(cp, flags); +} + +static int cas_change_mtu(struct net_device *dev, int new_mtu) +{ + struct cas *cp = netdev_priv(dev); + + if (new_mtu < CAS_MIN_MTU || new_mtu > CAS_MAX_MTU) + return -EINVAL; + + dev->mtu = new_mtu; + if (!netif_running(dev) || !netif_device_present(dev)) + return 0; + + /* let the reset task handle it */ +#if 1 + atomic_inc(&cp->reset_task_pending); + if ((cp->phy_type & CAS_PHY_SERDES)) { + atomic_inc(&cp->reset_task_pending_all); + } else { + atomic_inc(&cp->reset_task_pending_mtu); + } + schedule_work(&cp->reset_task); +#else + atomic_set(&cp->reset_task_pending, (cp->phy_type & CAS_PHY_SERDES) ? + CAS_RESET_ALL : CAS_RESET_MTU); + printk(KERN_ERR "reset called in cas_change_mtu\n"); + schedule_work(&cp->reset_task); +#endif + + flush_scheduled_work(); + return 0; +} + +static void cas_clean_txd(struct cas *cp, int ring) +{ + struct cas_tx_desc *txd = cp->init_txds[ring]; + struct sk_buff *skb, **skbs = cp->tx_skbs[ring]; + u64 daddr, dlen; + int i, size; + + size = TX_DESC_RINGN_SIZE(ring); + for (i = 0; i < size; i++) { + int frag; + + if (skbs[i] == NULL) + continue; + + skb = skbs[i]; + skbs[i] = NULL; + + for (frag = 0; frag <= skb_shinfo(skb)->nr_frags; frag++) { + int ent = i & (size - 1); + + /* first buffer is never a tiny buffer and so + * needs to be unmapped. + */ + daddr = le64_to_cpu(txd[ent].buffer); + dlen = CAS_VAL(TX_DESC_BUFLEN, + le64_to_cpu(txd[ent].control)); + pci_unmap_page(cp->pdev, daddr, dlen, + PCI_DMA_TODEVICE); + + if (frag != skb_shinfo(skb)->nr_frags) { + i++; + + /* next buffer might by a tiny buffer. + * skip past it. + */ + ent = i & (size - 1); + if (cp->tx_tiny_use[ring][ent].used) + i++; + } + } + dev_kfree_skb_any(skb); + } + + /* zero out tiny buf usage */ + memset(cp->tx_tiny_use[ring], 0, size*sizeof(*cp->tx_tiny_use[ring])); +} + +/* freed on close */ +static inline void cas_free_rx_desc(struct cas *cp, int ring) +{ + cas_page_t **page = cp->rx_pages[ring]; + int i, size; + + size = RX_DESC_RINGN_SIZE(ring); + for (i = 0; i < size; i++) { + if (page[i]) { + cas_page_free(cp, page[i]); + page[i] = NULL; + } + } +} + +static void cas_free_rxds(struct cas *cp) +{ + int i; + + for (i = 0; i < N_RX_DESC_RINGS; i++) + cas_free_rx_desc(cp, i); +} + +/* Must be invoked under cp->lock. */ +static void cas_clean_rings(struct cas *cp) +{ + int i; + + /* need to clean all tx rings */ + memset(cp->tx_old, 0, sizeof(*cp->tx_old)*N_TX_RINGS); + memset(cp->tx_new, 0, sizeof(*cp->tx_new)*N_TX_RINGS); + for (i = 0; i < N_TX_RINGS; i++) + cas_clean_txd(cp, i); + + /* zero out init block */ + memset(cp->init_block, 0, sizeof(struct cas_init_block)); + cas_clean_rxds(cp); + cas_clean_rxcs(cp); +} + +/* allocated on open */ +static inline int cas_alloc_rx_desc(struct cas *cp, int ring) +{ + cas_page_t **page = cp->rx_pages[ring]; + int size, i = 0; + + size = RX_DESC_RINGN_SIZE(ring); + for (i = 0; i < size; i++) { + if ((page[i] = cas_page_alloc(cp, GFP_KERNEL)) == NULL) + return -1; + } + return 0; +} + +static int cas_alloc_rxds(struct cas *cp) +{ + int i; + + for (i = 0; i < N_RX_DESC_RINGS; i++) { + if (cas_alloc_rx_desc(cp, i) < 0) { + cas_free_rxds(cp); + return -1; + } + } + return 0; +} + +static void cas_reset_task(void *data) +{ + struct cas *cp = (struct cas *) data; +#if 0 + int pending = atomic_read(&cp->reset_task_pending); +#else + int pending_all = atomic_read(&cp->reset_task_pending_all); + int pending_spare = atomic_read(&cp->reset_task_pending_spare); + int pending_mtu = atomic_read(&cp->reset_task_pending_mtu); + + if (pending_all == 0 && pending_spare == 0 && pending_mtu == 0) { + /* We can have more tasks scheduled than actually + * needed. + */ + atomic_dec(&cp->reset_task_pending); + return; + } +#endif + /* The link went down, we reset the ring, but keep + * DMA stopped. Use this function for reset + * on error as well. + */ + if (cp->hw_running) { + unsigned long flags; + + /* Make sure we don't get interrupts or tx packets */ + netif_device_detach(cp->dev); + cas_lock_all_save(cp, flags); + + if (cp->opened) { + /* We call cas_spare_recover when we call cas_open. + * but we do not initialize the lists cas_spare_recover + * uses until cas_open is called. + */ + cas_spare_recover(cp, GFP_ATOMIC); + } +#if 1 + /* test => only pending_spare set */ + if (!pending_all && !pending_mtu) + goto done; +#else + if (pending == CAS_RESET_SPARE) + goto done; +#endif + /* when pending == CAS_RESET_ALL, the following + * call to cas_init_hw will restart auto negotiation. + * Setting the second argument of cas_reset to + * !(pending == CAS_RESET_ALL) will set this argument + * to 1 (avoiding reinitializing the PHY for the normal + * PCS case) when auto negotiation is not restarted. + */ +#if 1 + cas_reset(cp, !(pending_all > 0)); + if (cp->opened) + cas_clean_rings(cp); + cas_init_hw(cp, (pending_all > 0)); +#else + cas_reset(cp, !(pending == CAS_RESET_ALL)); + if (cp->opened) + cas_clean_rings(cp); + cas_init_hw(cp, pending == CAS_RESET_ALL); +#endif + +done: + cas_unlock_all_restore(cp, flags); + netif_device_attach(cp->dev); + } +#if 1 + atomic_sub(pending_all, &cp->reset_task_pending_all); + atomic_sub(pending_spare, &cp->reset_task_pending_spare); + atomic_sub(pending_mtu, &cp->reset_task_pending_mtu); + atomic_dec(&cp->reset_task_pending); +#else + atomic_set(&cp->reset_task_pending, 0); +#endif +} + +static void cas_link_timer(unsigned long data) +{ + struct cas *cp = (struct cas *) data; + int mask, pending = 0, reset = 0; + unsigned long flags; + + if (link_transition_timeout != 0 && + cp->link_transition_jiffies_valid && + ((jiffies - cp->link_transition_jiffies) > + (link_transition_timeout))) { + /* One-second counter so link-down workaround doesn't + * cause resets to occur so fast as to fool the switch + * into thinking the link is down. + */ + cp->link_transition_jiffies_valid = 0; + } + + if (!cp->hw_running) + return; + + spin_lock_irqsave(&cp->lock, flags); + cas_lock_tx(cp); + cas_entropy_gather(cp); + + /* If the link task is still pending, we just + * reschedule the link timer + */ +#if 1 + if (atomic_read(&cp->reset_task_pending_all) || + atomic_read(&cp->reset_task_pending_spare) || + atomic_read(&cp->reset_task_pending_mtu)) + goto done; +#else + if (atomic_read(&cp->reset_task_pending)) + goto done; +#endif + + /* check for rx cleaning */ + if ((mask = (cp->cas_flags & CAS_FLAG_RXD_POST_MASK))) { + int i, rmask; + + for (i = 0; i < MAX_RX_DESC_RINGS; i++) { + rmask = CAS_FLAG_RXD_POST(i); + if ((mask & rmask) == 0) + continue; + + /* post_rxds will do a mod_timer */ + if (cas_post_rxds_ringN(cp, i, cp->rx_last[i]) < 0) { + pending = 1; + continue; + } + cp->cas_flags &= ~rmask; + } + } + + if (CAS_PHY_MII(cp->phy_type)) { + u16 bmsr; + cas_mif_poll(cp, 0); + bmsr = cas_phy_read(cp, MII_BMSR); + /* WTZ: Solaris driver reads this twice, but that + * may be due to the PCS case and the use of a + * common implementation. Read it twice here to be + * safe. + */ + bmsr = cas_phy_read(cp, MII_BMSR); + cas_mif_poll(cp, 1); + readl(cp->regs + REG_MIF_STATUS); /* avoid dups */ + reset = cas_mii_link_check(cp, bmsr); + } else { + reset = cas_pcs_link_check(cp); + } + + if (reset) + goto done; + + /* check for tx state machine confusion */ + if ((readl(cp->regs + REG_MAC_TX_STATUS) & MAC_TX_FRAME_XMIT) == 0) { + u32 val = readl(cp->regs + REG_MAC_STATE_MACHINE); + u32 wptr, rptr; + int tlm = CAS_VAL(MAC_SM_TLM, val); + + if (((tlm == 0x5) || (tlm == 0x3)) && + (CAS_VAL(MAC_SM_ENCAP_SM, val) == 0)) { + if (netif_msg_tx_err(cp)) + printk(KERN_DEBUG "%s: tx err: " + "MAC_STATE[%08x]\n", + cp->dev->name, val); + reset = 1; + goto done; + } + + val = readl(cp->regs + REG_TX_FIFO_PKT_CNT); + wptr = readl(cp->regs + REG_TX_FIFO_WRITE_PTR); + rptr = readl(cp->regs + REG_TX_FIFO_READ_PTR); + if ((val == 0) && (wptr != rptr)) { + if (netif_msg_tx_err(cp)) + printk(KERN_DEBUG "%s: tx err: " + "TX_FIFO[%08x:%08x:%08x]\n", + cp->dev->name, val, wptr, rptr); + reset = 1; + } + + if (reset) + cas_hard_reset(cp); + } + +done: + if (reset) { +#if 1 + atomic_inc(&cp->reset_task_pending); + atomic_inc(&cp->reset_task_pending_all); + schedule_work(&cp->reset_task); +#else + atomic_set(&cp->reset_task_pending, CAS_RESET_ALL); + printk(KERN_ERR "reset called in cas_link_timer\n"); + schedule_work(&cp->reset_task); +#endif + } + + if (!pending) + mod_timer(&cp->link_timer, jiffies + CAS_LINK_TIMEOUT); + cas_unlock_tx(cp); + spin_unlock_irqrestore(&cp->lock, flags); +} + +/* tiny buffers are used to avoid target abort issues with + * older cassini's + */ +static void cas_tx_tiny_free(struct cas *cp) +{ + struct pci_dev *pdev = cp->pdev; + int i; + + for (i = 0; i < N_TX_RINGS; i++) { + if (!cp->tx_tiny_bufs[i]) + continue; + + pci_free_consistent(pdev, TX_TINY_BUF_BLOCK, + cp->tx_tiny_bufs[i], + cp->tx_tiny_dvma[i]); + cp->tx_tiny_bufs[i] = NULL; + } +} + +static int cas_tx_tiny_alloc(struct cas *cp) +{ + struct pci_dev *pdev = cp->pdev; + int i; + + for (i = 0; i < N_TX_RINGS; i++) { + cp->tx_tiny_bufs[i] = + pci_alloc_consistent(pdev, TX_TINY_BUF_BLOCK, + &cp->tx_tiny_dvma[i]); + if (!cp->tx_tiny_bufs[i]) { + cas_tx_tiny_free(cp); + return -1; + } + } + return 0; +} + + +static int cas_open(struct net_device *dev) +{ + struct cas *cp = netdev_priv(dev); + int hw_was_up, err; + unsigned long flags; + + down(&cp->pm_sem); + + hw_was_up = cp->hw_running; + + /* The power-management semaphore protects the hw_running + * etc. state so it is safe to do this bit without cp->lock + */ + if (!cp->hw_running) { + /* Reset the chip */ + cas_lock_all_save(cp, flags); + /* We set the second arg to cas_reset to zero + * because cas_init_hw below will have its second + * argument set to non-zero, which will force + * autonegotiation to start. + */ + cas_reset(cp, 0); + cp->hw_running = 1; + cas_unlock_all_restore(cp, flags); + } + + if (cas_tx_tiny_alloc(cp) < 0) + return -ENOMEM; + + /* alloc rx descriptors */ + err = -ENOMEM; + if (cas_alloc_rxds(cp) < 0) + goto err_tx_tiny; + + /* allocate spares */ + cas_spare_init(cp); + cas_spare_recover(cp, GFP_KERNEL); + + /* We can now request the interrupt as we know it's masked + * on the controller. cassini+ has up to 4 interrupts + * that can be used, but you need to do explicit pci interrupt + * mapping to expose them + */ + if (request_irq(cp->pdev->irq, cas_interrupt, + SA_SHIRQ, dev->name, (void *) dev)) { + printk(KERN_ERR "%s: failed to request irq !\n", + cp->dev->name); + err = -EAGAIN; + goto err_spare; + } + + /* init hw */ + cas_lock_all_save(cp, flags); + cas_clean_rings(cp); + cas_init_hw(cp, !hw_was_up); + cp->opened = 1; + cas_unlock_all_restore(cp, flags); + + netif_start_queue(dev); + up(&cp->pm_sem); + return 0; + +err_spare: + cas_spare_free(cp); + cas_free_rxds(cp); +err_tx_tiny: + cas_tx_tiny_free(cp); + up(&cp->pm_sem); + return err; +} + +static int cas_close(struct net_device *dev) +{ + unsigned long flags; + struct cas *cp = netdev_priv(dev); + + /* Make sure we don't get distracted by suspend/resume */ + down(&cp->pm_sem); + + netif_stop_queue(dev); + + /* Stop traffic, mark us closed */ + cas_lock_all_save(cp, flags); + cp->opened = 0; + cas_reset(cp, 0); + cas_phy_init(cp); + cas_begin_auto_negotiation(cp, NULL); + cas_clean_rings(cp); + cas_unlock_all_restore(cp, flags); + + free_irq(cp->pdev->irq, (void *) dev); + cas_spare_free(cp); + cas_free_rxds(cp); + cas_tx_tiny_free(cp); + up(&cp->pm_sem); + return 0; +} + +static struct { + const char name[ETH_GSTRING_LEN]; +} ethtool_cassini_statnames[] = { + {"collisions"}, + {"rx_bytes"}, + {"rx_crc_errors"}, + {"rx_dropped"}, + {"rx_errors"}, + {"rx_fifo_errors"}, + {"rx_frame_errors"}, + {"rx_length_errors"}, + {"rx_over_errors"}, + {"rx_packets"}, + {"tx_aborted_errors"}, + {"tx_bytes"}, + {"tx_dropped"}, + {"tx_errors"}, + {"tx_fifo_errors"}, + {"tx_packets"} +}; +#define CAS_NUM_STAT_KEYS (sizeof(ethtool_cassini_statnames)/ETH_GSTRING_LEN) + +static struct { + const int offsets; /* neg. values for 2nd arg to cas_read_phy */ +} ethtool_register_table[] = { + {-MII_BMSR}, + {-MII_BMCR}, + {REG_CAWR}, + {REG_INF_BURST}, + {REG_BIM_CFG}, + {REG_RX_CFG}, + {REG_HP_CFG}, + {REG_MAC_TX_CFG}, + {REG_MAC_RX_CFG}, + {REG_MAC_CTRL_CFG}, + {REG_MAC_XIF_CFG}, + {REG_MIF_CFG}, + {REG_PCS_CFG}, + {REG_SATURN_PCFG}, + {REG_PCS_MII_STATUS}, + {REG_PCS_STATE_MACHINE}, + {REG_MAC_COLL_EXCESS}, + {REG_MAC_COLL_LATE} +}; +#define CAS_REG_LEN (sizeof(ethtool_register_table)/sizeof(int)) +#define CAS_MAX_REGS (sizeof (u32)*CAS_REG_LEN) + +static u8 *cas_get_regs(struct cas *cp) +{ + u8 *ptr = kmalloc(CAS_MAX_REGS, GFP_KERNEL); + u8 *p; + int i; + unsigned long flags; + + if (!ptr) + return NULL; + + spin_lock_irqsave(&cp->lock, flags); + for (i = 0, p = ptr; i < CAS_REG_LEN ; i ++, p += sizeof(u32)) { + u16 hval; + u32 val; + if (ethtool_register_table[i].offsets < 0) { + hval = cas_phy_read(cp, + -ethtool_register_table[i].offsets); + val = hval; + } else { + val= readl(cp->regs+ethtool_register_table[i].offsets); + } + memcpy(p, (u8 *)&val, sizeof(u32)); + } + spin_unlock_irqrestore(&cp->lock, flags); + + return ptr; +} + +static struct net_device_stats *cas_get_stats(struct net_device *dev) +{ + struct cas *cp = netdev_priv(dev); + struct net_device_stats *stats = cp->net_stats; + unsigned long flags; + int i; + unsigned long tmp; + + /* we collate all of the stats into net_stats[N_TX_RING] */ + if (!cp->hw_running) + return stats + N_TX_RINGS; + + /* collect outstanding stats */ + /* WTZ: the Cassini spec gives these as 16 bit counters but + * stored in 32-bit words. Added a mask of 0xffff to be safe, + * in case the chip somehow puts any garbage in the other bits. + * Also, counter usage didn't seem to mach what Adrian did + * in the parts of the code that set these quantities. Made + * that consistent. + */ + spin_lock_irqsave(&cp->stat_lock[N_TX_RINGS], flags); + stats[N_TX_RINGS].rx_crc_errors += + readl(cp->regs + REG_MAC_FCS_ERR) & 0xffff; + stats[N_TX_RINGS].rx_frame_errors += + readl(cp->regs + REG_MAC_ALIGN_ERR) &0xffff; + stats[N_TX_RINGS].rx_length_errors += + readl(cp->regs + REG_MAC_LEN_ERR) & 0xffff; +#if 1 + tmp = (readl(cp->regs + REG_MAC_COLL_EXCESS) & 0xffff) + + (readl(cp->regs + REG_MAC_COLL_LATE) & 0xffff); + stats[N_TX_RINGS].tx_aborted_errors += tmp; + stats[N_TX_RINGS].collisions += + tmp + (readl(cp->regs + REG_MAC_COLL_NORMAL) & 0xffff); +#else + stats[N_TX_RINGS].tx_aborted_errors += + readl(cp->regs + REG_MAC_COLL_EXCESS); + stats[N_TX_RINGS].collisions += readl(cp->regs + REG_MAC_COLL_EXCESS) + + readl(cp->regs + REG_MAC_COLL_LATE); +#endif + cas_clear_mac_err(cp); + + /* saved bits that are unique to ring 0 */ + spin_lock(&cp->stat_lock[0]); + stats[N_TX_RINGS].collisions += stats[0].collisions; + stats[N_TX_RINGS].rx_over_errors += stats[0].rx_over_errors; + stats[N_TX_RINGS].rx_frame_errors += stats[0].rx_frame_errors; + stats[N_TX_RINGS].rx_fifo_errors += stats[0].rx_fifo_errors; + stats[N_TX_RINGS].tx_aborted_errors += stats[0].tx_aborted_errors; + stats[N_TX_RINGS].tx_fifo_errors += stats[0].tx_fifo_errors; + spin_unlock(&cp->stat_lock[0]); + + for (i = 0; i < N_TX_RINGS; i++) { + spin_lock(&cp->stat_lock[i]); + stats[N_TX_RINGS].rx_length_errors += + stats[i].rx_length_errors; + stats[N_TX_RINGS].rx_crc_errors += stats[i].rx_crc_errors; + stats[N_TX_RINGS].rx_packets += stats[i].rx_packets; + stats[N_TX_RINGS].tx_packets += stats[i].tx_packets; + stats[N_TX_RINGS].rx_bytes += stats[i].rx_bytes; + stats[N_TX_RINGS].tx_bytes += stats[i].tx_bytes; + stats[N_TX_RINGS].rx_errors += stats[i].rx_errors; + stats[N_TX_RINGS].tx_errors += stats[i].tx_errors; + stats[N_TX_RINGS].rx_dropped += stats[i].rx_dropped; + stats[N_TX_RINGS].tx_dropped += stats[i].tx_dropped; + memset(stats + i, 0, sizeof(struct net_device_stats)); + spin_unlock(&cp->stat_lock[i]); + } + spin_unlock_irqrestore(&cp->stat_lock[N_TX_RINGS], flags); + return stats + N_TX_RINGS; +} + + +static void cas_set_multicast(struct net_device *dev) +{ + struct cas *cp = netdev_priv(dev); + u32 rxcfg, rxcfg_new; + unsigned long flags; + int limit = STOP_TRIES; + + if (!cp->hw_running) + return; + + spin_lock_irqsave(&cp->lock, flags); + rxcfg = readl(cp->regs + REG_MAC_RX_CFG); + + /* disable RX MAC and wait for completion */ + writel(rxcfg & ~MAC_RX_CFG_EN, cp->regs + REG_MAC_RX_CFG); + while (readl(cp->regs + REG_MAC_RX_CFG) & MAC_RX_CFG_EN) { + if (!limit--) + break; + udelay(10); + } + + /* disable hash filter and wait for completion */ + limit = STOP_TRIES; + rxcfg &= ~(MAC_RX_CFG_PROMISC_EN | MAC_RX_CFG_HASH_FILTER_EN); + writel(rxcfg & ~MAC_RX_CFG_EN, cp->regs + REG_MAC_RX_CFG); + while (readl(cp->regs + REG_MAC_RX_CFG) & MAC_RX_CFG_HASH_FILTER_EN) { + if (!limit--) + break; + udelay(10); + } + + /* program hash filters */ + cp->mac_rx_cfg = rxcfg_new = cas_setup_multicast(cp); + rxcfg |= rxcfg_new; + writel(rxcfg, cp->regs + REG_MAC_RX_CFG); + spin_unlock_irqrestore(&cp->lock, flags); +} + +/* Eventually add support for changing the advertisement + * on autoneg. + */ +static int cas_ethtool_ioctl(struct net_device *dev, void *ep_user) +{ + struct cas *cp = netdev_priv(dev); + u16 bmcr; + int full_duplex, speed, pause; + struct ethtool_cmd ecmd; + unsigned long flags; + enum link_state linkstate = link_up; + + if (copy_from_user(&ecmd, ep_user, sizeof(ecmd))) + return -EFAULT; + + switch(ecmd.cmd) { + case ETHTOOL_GDRVINFO: { + struct ethtool_drvinfo info = { cmd: ETHTOOL_GDRVINFO }; + + strncpy(info.driver, DRV_MODULE_NAME, + ETHTOOL_BUSINFO_LEN); + strncpy(info.version, DRV_MODULE_VERSION, + ETHTOOL_BUSINFO_LEN); + info.fw_version[0] = '\0'; + strncpy(info.bus_info, pci_name(cp->pdev), + ETHTOOL_BUSINFO_LEN); + info.regdump_len = cp->casreg_len < CAS_MAX_REGS ? + cp->casreg_len : CAS_MAX_REGS; + info.n_stats = CAS_NUM_STAT_KEYS; + if (copy_to_user(ep_user, &info, sizeof(info))) + return -EFAULT; + + return 0; + } + + case ETHTOOL_GSET: + ecmd.advertising = 0; + ecmd.supported = SUPPORTED_Autoneg; + if (cp->cas_flags & CAS_FLAG_1000MB_CAP) { + ecmd.supported |= SUPPORTED_1000baseT_Full; + ecmd.advertising |= ADVERTISED_1000baseT_Full; + } + + /* Record PHY settings if HW is on. */ + spin_lock_irqsave(&cp->lock, flags); + bmcr = 0; + linkstate = cp->lstate; + if (CAS_PHY_MII(cp->phy_type)) { + ecmd.port = PORT_MII; + ecmd.transceiver = (cp->cas_flags & CAS_FLAG_SATURN) ? + XCVR_INTERNAL : XCVR_EXTERNAL; + ecmd.phy_address = cp->phy_addr; + ecmd.advertising |= ADVERTISED_TP | ADVERTISED_MII | + ADVERTISED_10baseT_Half | + ADVERTISED_10baseT_Full | + ADVERTISED_100baseT_Half | + ADVERTISED_100baseT_Full; + + ecmd.supported |= + (SUPPORTED_10baseT_Half | + SUPPORTED_10baseT_Full | + SUPPORTED_100baseT_Half | + SUPPORTED_100baseT_Full | + SUPPORTED_TP | SUPPORTED_MII); + + if (cp->hw_running) { + cas_mif_poll(cp, 0); + bmcr = cas_phy_read(cp, MII_BMCR); + cas_read_mii_link_mode(cp, &full_duplex, + &speed, &pause); + cas_mif_poll(cp, 1); + } + + } else { + ecmd.port = PORT_FIBRE; + ecmd.transceiver = XCVR_INTERNAL; + ecmd.phy_address = 0; + ecmd.supported |= SUPPORTED_FIBRE; + ecmd.advertising |= ADVERTISED_FIBRE; + + if (cp->hw_running) { + /* pcs uses the same bits as mii */ + bmcr = readl(cp->regs + REG_PCS_MII_CTRL); + cas_read_pcs_link_mode(cp, &full_duplex, + &speed, &pause); + } + } + spin_unlock_irqrestore(&cp->lock, flags); + + if (bmcr & BMCR_ANENABLE) { + ecmd.advertising |= ADVERTISED_Autoneg; + ecmd.autoneg = AUTONEG_ENABLE; + ecmd.speed = ((speed == 10) ? + SPEED_10 : + ((speed == 1000) ? + SPEED_1000 : SPEED_100)); + ecmd.duplex = full_duplex ? DUPLEX_FULL : DUPLEX_HALF; + } else { + ecmd.autoneg = AUTONEG_DISABLE; + ecmd.speed = + (bmcr & CAS_BMCR_SPEED1000) ? + SPEED_1000 : + ((bmcr & BMCR_SPEED100) ? SPEED_100: + SPEED_10); + ecmd.duplex = + (bmcr & BMCR_FULLDPLX) ? + DUPLEX_FULL : DUPLEX_HALF; + } + if (linkstate != link_up) { + /* Force these to "unknown" if the link is not up and + * autonogotiation in enabled. We can set the link + * speed to 0, but not ecmd.duplex, + * because its legal values are 0 and 1. Ethtool will + * print the value reported in parentheses after the + * word "Unknown" for unrecognized values. + * + * If in forced mode, we report the speed and duplex + * settings that we configured. + */ + if (cp->link_cntl & BMCR_ANENABLE) { + ecmd.speed = 0; + ecmd.duplex = 0xff; + } else { + ecmd.speed = SPEED_10; + if (cp->link_cntl & BMCR_SPEED100) { + ecmd.speed = SPEED_100; + } else if (cp->link_cntl & CAS_BMCR_SPEED1000) { + ecmd.speed = SPEED_1000; + } + ecmd.duplex = (cp->link_cntl & BMCR_FULLDPLX)? + DUPLEX_FULL : DUPLEX_HALF; + } + } + if (copy_to_user(ep_user, &ecmd, sizeof(ecmd))) + return -EFAULT; + return 0; + + case ETHTOOL_SSET: + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + /* Verify the settings we care about. */ + if (ecmd.autoneg != AUTONEG_ENABLE && + ecmd.autoneg != AUTONEG_DISABLE) + return -EINVAL; + + if (ecmd.autoneg == AUTONEG_DISABLE && + ((ecmd.speed != SPEED_1000 && + ecmd.speed != SPEED_100 && + ecmd.speed != SPEED_10) || + (ecmd.duplex != DUPLEX_HALF && + ecmd.duplex != DUPLEX_FULL))) + return -EINVAL; + + /* Apply settings and restart link process. */ + spin_lock_irqsave(&cp->lock, flags); + cas_begin_auto_negotiation(cp, &ecmd); + spin_unlock_irqrestore(&cp->lock, flags); + return 0; + + case ETHTOOL_NWAY_RST: + if ((cp->link_cntl & BMCR_ANENABLE) == 0) + return -EINVAL; + + /* Restart link process. */ + spin_lock_irqsave(&cp->lock, flags); + cas_begin_auto_negotiation(cp, NULL); + spin_unlock_irqrestore(&cp->lock, flags); + + return 0; + + case ETHTOOL_GWOL: + case ETHTOOL_SWOL: + break; /* doesn't exist */ + + /* get link status */ + case ETHTOOL_GLINK: { + struct ethtool_value edata = { cmd: ETHTOOL_GLINK }; + + edata.data = (cp->lstate == link_up); + if (copy_to_user(ep_user, &edata, sizeof(edata))) + return -EFAULT; + return 0; + } + + /* get message-level */ + case ETHTOOL_GMSGLVL: { + struct ethtool_value edata = { cmd: ETHTOOL_GMSGLVL }; + + edata.data = cp->msg_enable; + if (copy_to_user(ep_user, &edata, sizeof(edata))) + return -EFAULT; + return 0; + } + + /* set message-level */ + case ETHTOOL_SMSGLVL: { + struct ethtool_value edata; + + if (!capable(CAP_NET_ADMIN)) { + return (-EPERM); + } + if (copy_from_user(&edata, ep_user, sizeof(edata))) + return -EFAULT; + cp->msg_enable = edata.data; + return 0; + } + + case ETHTOOL_GREGS: { + struct ethtool_regs edata; + u8 *ptr; + int len = cp->casreg_len < CAS_MAX_REGS ? + cp->casreg_len: CAS_MAX_REGS; + + if (copy_from_user(&edata, ep_user, sizeof (edata))) + return -EFAULT; + + if (edata.len > len) + edata.len = len; + edata.version = 0; + if (copy_to_user (ep_user, &edata, sizeof(edata))) + return -EFAULT; + + /* cas_get_regs handles locks (cp->lock). */ + ptr = cas_get_regs(cp); + if (ptr == NULL) + return -ENOMEM; + if (copy_to_user(ep_user + sizeof (edata), ptr, edata.len)) + return -EFAULT; + + kfree(ptr); + return (0); + } + case ETHTOOL_GSTRINGS: { + struct ethtool_gstrings edata; + int len; + + if (copy_from_user(&edata, ep_user, sizeof(edata))) + return -EFAULT; + + len = edata.len; + switch(edata.string_set) { + case ETH_SS_STATS: + edata.len = (len < CAS_NUM_STAT_KEYS) ? + len : CAS_NUM_STAT_KEYS; + if (copy_to_user(ep_user, &edata, sizeof(edata))) + return -EFAULT; + + if (copy_to_user(ep_user + sizeof(edata), + ðtool_cassini_statnames, + (edata.len * ETH_GSTRING_LEN))) + return -EFAULT; + return 0; + default: + return -EINVAL; + } + } + case ETHTOOL_GSTATS: { + int i = 0; + u64 *tmp; + struct ethtool_stats edata; + struct net_device_stats *stats; + int len; + + if (copy_from_user(&edata, ep_user, sizeof(edata))) + return -EFAULT; + + len = edata.n_stats; + stats = cas_get_stats(cp->dev); + edata.cmd = ETHTOOL_GSTATS; + edata.n_stats = (len < CAS_NUM_STAT_KEYS) ? + len : CAS_NUM_STAT_KEYS; + if (copy_to_user(ep_user, &edata, sizeof (edata))) + return -EFAULT; + + tmp = kmalloc(sizeof(u64)*CAS_NUM_STAT_KEYS, GFP_KERNEL); + if (tmp) { + tmp[i++] = stats->collisions; + tmp[i++] = stats->rx_bytes; + tmp[i++] = stats->rx_crc_errors; + tmp[i++] = stats->rx_dropped; + tmp[i++] = stats->rx_errors; + tmp[i++] = stats->rx_fifo_errors; + tmp[i++] = stats->rx_frame_errors; + tmp[i++] = stats->rx_length_errors; + tmp[i++] = stats->rx_over_errors; + tmp[i++] = stats->rx_packets; + tmp[i++] = stats->tx_aborted_errors; + tmp[i++] = stats->tx_bytes; + tmp[i++] = stats->tx_dropped; + tmp[i++] = stats->tx_errors; + tmp[i++] = stats->tx_fifo_errors; + tmp[i++] = stats->tx_packets; + BUG_ON(i != CAS_NUM_STAT_KEYS); + + i = copy_to_user(ep_user + sizeof(edata), + tmp, sizeof(u64)*edata.n_stats); + kfree(tmp); + } else { + return -ENOMEM; + } + if (i) + return -EFAULT; + return 0; + } + } + + return -EOPNOTSUPP; +} + +static int cas_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +{ + struct cas *cp = netdev_priv(dev); + struct mii_ioctl_data *data = (struct mii_ioctl_data *)&ifr->ifr_data; + unsigned long flags; + int rc = -EOPNOTSUPP; + + /* Hold the PM semaphore while doing ioctl's or we may collide + * with open/close and power management and oops. + */ + down(&cp->pm_sem); + switch (cmd) { + case SIOCETHTOOL: + rc = cas_ethtool_ioctl(dev, ifr->ifr_data); + break; + + case SIOCGMIIPHY: /* Get address of MII PHY in use. */ + data->phy_id = cp->phy_addr; + /* Fallthrough... */ + + case SIOCGMIIREG: /* Read MII PHY register. */ + spin_lock_irqsave(&cp->lock, flags); + cas_mif_poll(cp, 0); + data->val_out = cas_phy_read(cp, data->reg_num & 0x1f); + cas_mif_poll(cp, 1); + spin_unlock_irqrestore(&cp->lock, flags); + rc = 0; + break; + + case SIOCSMIIREG: /* Write MII PHY register. */ + if (!capable(CAP_NET_ADMIN)) { + rc = -EPERM; + break; + } + spin_lock_irqsave(&cp->lock, flags); + cas_mif_poll(cp, 0); + rc = cas_phy_write(cp, data->reg_num & 0x1f, data->val_in); + cas_mif_poll(cp, 1); + spin_unlock_irqrestore(&cp->lock, flags); + break; + default: + break; + }; + + up(&cp->pm_sem); + return rc; +} + +static int __devinit cas_init_one(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + static int cas_version_printed = 0; + unsigned long casreg_base, casreg_len; + struct net_device *dev; + struct cas *cp; + int i, err, pci_using_dac; + u16 pci_cmd; + u8 orig_cacheline_size = 0, cas_cacheline_size = 0; + + if (cas_version_printed++ == 0) + printk(KERN_INFO "%s", version); + + err = pci_enable_device(pdev); + if (err) { + printk(KERN_ERR PFX "Cannot enable PCI device, " + "aborting.\n"); + return err; + } + + if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) { + printk(KERN_ERR PFX "Cannot find proper PCI device " + "base address, aborting.\n"); + err = -ENODEV; + goto err_out_disable_pdev; + } + + dev = alloc_etherdev(sizeof(*cp)); + if (!dev) { + printk(KERN_ERR PFX "Etherdev alloc failed, aborting.\n"); + err = -ENOMEM; + goto err_out_disable_pdev; + } + SET_MODULE_OWNER(dev); + SET_NETDEV_DEV(dev, &pdev->dev); + + err = pci_request_regions(pdev, dev->name); + if (err) { + printk(KERN_ERR PFX "Cannot obtain PCI resources, " + "aborting.\n"); + goto err_out_free_netdev; + } + pci_set_master(pdev); + + /* we must always turn on parity response or else parity + * doesn't get generated properly. disable SERR/PERR as well. + * in addition, we want to turn MWI on. + */ + pci_read_config_word(pdev, PCI_COMMAND, &pci_cmd); + pci_cmd &= ~PCI_COMMAND_SERR; + pci_cmd |= PCI_COMMAND_PARITY; + pci_write_config_word(pdev, PCI_COMMAND, pci_cmd); + pci_set_mwi(pdev); + /* + * On some architectures, the default cache line size set + * by pci_set_mwi reduces perforamnce. We have to increase + * it for this case. To start, we'll print some configuration + * data. + */ +#if 1 + pci_read_config_byte(pdev, PCI_CACHE_LINE_SIZE, + &orig_cacheline_size); + if (orig_cacheline_size < CAS_PREF_CACHELINE_SIZE) { + cas_cacheline_size = + (CAS_PREF_CACHELINE_SIZE < SMP_CACHE_BYTES) ? + CAS_PREF_CACHELINE_SIZE : SMP_CACHE_BYTES; + if (pci_write_config_byte(pdev, + PCI_CACHE_LINE_SIZE, + cas_cacheline_size)) { + printk(KERN_ERR PFX "Could not set PCI cache " + "line size\n"); + goto err_write_cacheline; + } + } +#endif + + + /* Configure DMA attributes. */ + if (!pci_set_dma_mask(pdev, DMA_64BIT_MASK)) { + pci_using_dac = 1; + err = pci_set_consistent_dma_mask(pdev, + DMA_64BIT_MASK); + if (err < 0) { + printk(KERN_ERR PFX "Unable to obtain 64-bit DMA " + "for consistent allocations\n"); + goto err_out_free_res; + } + + } else { + err = pci_set_dma_mask(pdev, DMA_32BIT_MASK); + if (err) { + printk(KERN_ERR PFX "No usable DMA configuration, " + "aborting.\n"); + goto err_out_free_res; + } + pci_using_dac = 0; + } + + casreg_base = pci_resource_start(pdev, 0); + casreg_len = pci_resource_len(pdev, 0); + + cp = netdev_priv(dev); + cp->pdev = pdev; +#if 1 + /* A value of 0 indicates we never explicitly set it */ + cp->orig_cacheline_size = cas_cacheline_size ? orig_cacheline_size: 0; +#endif + cp->dev = dev; + cp->msg_enable = (cassini_debug < 0) ? CAS_DEF_MSG_ENABLE : + cassini_debug; + + cp->link_transition = LINK_TRANSITION_UNKNOWN; + cp->link_transition_jiffies_valid = 0; + + spin_lock_init(&cp->lock); + spin_lock_init(&cp->rx_inuse_lock); + spin_lock_init(&cp->rx_spare_lock); + for (i = 0; i < N_TX_RINGS; i++) { + spin_lock_init(&cp->stat_lock[i]); + spin_lock_init(&cp->tx_lock[i]); + } + spin_lock_init(&cp->stat_lock[N_TX_RINGS]); + init_MUTEX(&cp->pm_sem); + + init_timer(&cp->link_timer); + cp->link_timer.function = cas_link_timer; + cp->link_timer.data = (unsigned long) cp; + +#if 1 + /* Just in case the implementation of atomic operations + * change so that an explicit initialization is necessary. + */ + atomic_set(&cp->reset_task_pending, 0); + atomic_set(&cp->reset_task_pending_all, 0); + atomic_set(&cp->reset_task_pending_spare, 0); + atomic_set(&cp->reset_task_pending_mtu, 0); +#endif + INIT_WORK(&cp->reset_task, cas_reset_task, cp); + + /* Default link parameters */ + if (link_mode >= 0 && link_mode <= 6) + cp->link_cntl = link_modes[link_mode]; + else + cp->link_cntl = BMCR_ANENABLE; + cp->lstate = link_down; + cp->link_transition = LINK_TRANSITION_LINK_DOWN; + netif_carrier_off(cp->dev); + cp->timer_ticks = 0; + + /* give us access to cassini registers */ + cp->regs = ioremap(casreg_base, casreg_len); + if (cp->regs == 0UL) { + printk(KERN_ERR PFX "Cannot map device registers, " + "aborting.\n"); + goto err_out_free_res; + } + cp->casreg_len = casreg_len; + + pci_save_state(pdev); + cas_check_pci_invariants(cp); + cas_hard_reset(cp); + cas_reset(cp, 0); + if (cas_check_invariants(cp)) + goto err_out_iounmap; + + cp->init_block = (struct cas_init_block *) + pci_alloc_consistent(pdev, sizeof(struct cas_init_block), + &cp->block_dvma); + if (!cp->init_block) { + printk(KERN_ERR PFX "Cannot allocate init block, " + "aborting.\n"); + goto err_out_iounmap; + } + + for (i = 0; i < N_TX_RINGS; i++) + cp->init_txds[i] = cp->init_block->txds[i]; + + for (i = 0; i < N_RX_DESC_RINGS; i++) + cp->init_rxds[i] = cp->init_block->rxds[i]; + + for (i = 0; i < N_RX_COMP_RINGS; i++) + cp->init_rxcs[i] = cp->init_block->rxcs[i]; + + for (i = 0; i < N_RX_FLOWS; i++) + skb_queue_head_init(&cp->rx_flows[i]); + + dev->open = cas_open; + dev->stop = cas_close; + dev->hard_start_xmit = cas_start_xmit; + dev->get_stats = cas_get_stats; + dev->set_multicast_list = cas_set_multicast; + dev->do_ioctl = cas_ioctl; + dev->tx_timeout = cas_tx_timeout; + dev->watchdog_timeo = CAS_TX_TIMEOUT; + dev->change_mtu = cas_change_mtu; +#ifdef USE_NAPI + dev->poll = cas_poll; + dev->weight = 64; +#endif +#ifdef CONFIG_NET_POLL_CONTROLLER + dev->poll_controller = cas_netpoll; +#endif + dev->irq = pdev->irq; + dev->dma = 0; + + /* Cassini features. */ + if ((cp->cas_flags & CAS_FLAG_NO_HW_CSUM) == 0) + dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG; + + if (pci_using_dac) + dev->features |= NETIF_F_HIGHDMA; + + if (register_netdev(dev)) { + printk(KERN_ERR PFX "Cannot register net device, " + "aborting.\n"); + goto err_out_free_consistent; + } + + i = readl(cp->regs + REG_BIM_CFG); + printk(KERN_INFO "%s: Sun Cassini%s (%sbit/%sMHz PCI/%s) " + "Ethernet[%d] ", dev->name, + (cp->cas_flags & CAS_FLAG_REG_PLUS) ? "+" : "", + (i & BIM_CFG_32BIT) ? "32" : "64", + (i & BIM_CFG_66MHZ) ? "66" : "33", + (cp->phy_type == CAS_PHY_SERDES) ? "Fi" : "Cu", pdev->irq); + + for (i = 0; i < 6; i++) + printk("%2.2x%c", dev->dev_addr[i], + i == 5 ? ' ' : ':'); + printk("\n"); + + pci_set_drvdata(pdev, dev); + cp->hw_running = 1; + cas_entropy_reset(cp); + cas_phy_init(cp); + cas_begin_auto_negotiation(cp, NULL); + return 0; + +err_out_free_consistent: + pci_free_consistent(pdev, sizeof(struct cas_init_block), + cp->init_block, cp->block_dvma); + +err_out_iounmap: + down(&cp->pm_sem); + if (cp->hw_running) + cas_shutdown(cp); + up(&cp->pm_sem); + + iounmap((void *) cp->regs); + + +err_out_free_res: + pci_release_regions(pdev); + +err_write_cacheline: + /* Try to restore it in case the error occured after we + * set it. + */ + pci_write_config_byte(pdev, PCI_CACHE_LINE_SIZE, orig_cacheline_size); + +err_out_free_netdev: + free_netdev(dev); + +err_out_disable_pdev: + pci_disable_device(pdev); + pci_set_drvdata(pdev, NULL); + return -ENODEV; +} + +static void __devexit cas_remove_one(struct pci_dev *pdev) +{ + struct net_device *dev = pci_get_drvdata(pdev); + struct cas *cp; + if (!dev) + return; + + cp = netdev_priv(dev); + unregister_netdev(dev); + + down(&cp->pm_sem); + flush_scheduled_work(); + if (cp->hw_running) + cas_shutdown(cp); + up(&cp->pm_sem); + +#if 1 + if (cp->orig_cacheline_size) { + /* Restore the cache line size if we had modified + * it. + */ + pci_write_config_byte(pdev, PCI_CACHE_LINE_SIZE, + cp->orig_cacheline_size); + } +#endif + pci_free_consistent(pdev, sizeof(struct cas_init_block), + cp->init_block, cp->block_dvma); + iounmap((void *) cp->regs); + free_netdev(dev); + pci_release_regions(pdev); + pci_disable_device(pdev); + pci_set_drvdata(pdev, NULL); +} + +#ifdef CONFIG_PM +static int cas_suspend(struct pci_dev *pdev, u32 state) +{ + struct net_device *dev = pci_get_drvdata(pdev); + struct cas *cp = netdev_priv(dev); + unsigned long flags; + + /* We hold the PM semaphore during entire driver + * sleep time + */ + down(&cp->pm_sem); + + /* If the driver is opened, we stop the DMA */ + if (cp->opened) { + netif_device_detach(dev); + + cas_lock_all_save(cp, flags); + + /* We can set the second arg of cas_reset to 0 + * because on resume, we'll call cas_init_hw with + * its second arg set so that autonegotiation is + * restarted. + */ + cas_reset(cp, 0); + cas_clean_rings(cp); + cas_unlock_all_restore(cp, flags); + } + + if (cp->hw_running) + cas_shutdown(cp); + + return 0; +} + +static int cas_resume(struct pci_dev *pdev) +{ + struct net_device *dev = pci_get_drvdata(pdev); + struct cas *cp = netdev_priv(dev); + + printk(KERN_INFO "%s: resuming\n", dev->name); + + cas_hard_reset(cp); + if (cp->opened) { + unsigned long flags; + cas_lock_all_save(cp, flags); + cas_reset(cp, 0); + cp->hw_running = 1; + cas_clean_rings(cp); + cas_init_hw(cp, 1); + cas_unlock_all_restore(cp, flags); + + netif_device_attach(dev); + } + up(&cp->pm_sem); + return 0; +} +#endif /* CONFIG_PM */ + +static struct pci_driver cas_driver = { + .name = DRV_MODULE_NAME, + .id_table = cas_pci_tbl, + .probe = cas_init_one, + .remove = __devexit_p(cas_remove_one), +#ifdef CONFIG_PM + .suspend = cas_suspend, + .resume = cas_resume +#endif +}; + +static int __init cas_init(void) +{ + if (linkdown_timeout > 0) + link_transition_timeout = linkdown_timeout * HZ; + else + link_transition_timeout = 0; + + return pci_module_init(&cas_driver); +} + +static void __exit cas_cleanup(void) +{ + pci_unregister_driver(&cas_driver); +} + +module_init(cas_init); +module_exit(cas_cleanup); diff --git a/drivers/net/cassini.h b/drivers/net/cassini.h new file mode 100644 index 000000000000..88063ef16cf6 --- /dev/null +++ b/drivers/net/cassini.h @@ -0,0 +1,4425 @@ +/* $Id: cassini.h,v 1.16 2004/08/17 21:15:16 zaumen Exp $ + * cassini.h: Definitions for Sun Microsystems Cassini(+) ethernet driver. + * + * Copyright (C) 2004 Sun Microsystems Inc. + * Copyright (c) 2003 Adrian Sun (asun@darksunrising.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + * + * vendor id: 0x108E (Sun Microsystems, Inc.) + * device id: 0xabba (Cassini) + * revision ids: 0x01 = Cassini + * 0x02 = Cassini rev 2 + * 0x10 = Cassini+ + * 0x11 = Cassini+ 0.2u + * + * vendor id: 0x100b (National Semiconductor) + * device id: 0x0035 (DP83065/Saturn) + * revision ids: 0x30 = Saturn B2 + * + * rings are all offset from 0. + * + * there are two clock domains: + * PCI: 33/66MHz clock + * chip: 125MHz clock + */ + +#ifndef _CASSINI_H +#define _CASSINI_H + +/* cassini register map: 2M memory mapped in 32-bit memory space accessible as + * 32-bit words. there is no i/o port access. REG_ addresses are + * shared between cassini and cassini+. REG_PLUS_ addresses only + * appear in cassini+. REG_MINUS_ addresses only appear in cassini. + */ +#define CAS_ID_REV2 0x02 +#define CAS_ID_REVPLUS 0x10 +#define CAS_ID_REVPLUS02u 0x11 +#define CAS_ID_REVSATURNB2 0x30 + +/** global resources **/ + +/* this register sets the weights for the weighted round robin arbiter. e.g., + * if rx weight == 1 and tx weight == 0, rx == 2x tx transfer credit + * for its next turn to access the pci bus. + * map: 0x0 = x1, 0x1 = x2, 0x2 = x4, 0x3 = x8 + * DEFAULT: 0x0, SIZE: 5 bits + */ +#define REG_CAWR 0x0004 /* core arbitration weight */ +#define CAWR_RX_DMA_WEIGHT_SHIFT 0 +#define CAWR_RX_DMA_WEIGHT_MASK 0x03 /* [0:1] */ +#define CAWR_TX_DMA_WEIGHT_SHIFT 2 +#define CAWR_TX_DMA_WEIGHT_MASK 0x0C /* [3:2] */ +#define CAWR_RR_DIS 0x10 /* [4] */ + +/* if enabled, BIM can send bursts across PCI bus > cacheline size. burst + * sizes determined by length of packet or descriptor transfer and the + * max length allowed by the target. + * DEFAULT: 0x0, SIZE: 1 bit + */ +#define REG_INF_BURST 0x0008 /* infinite burst enable reg */ +#define INF_BURST_EN 0x1 /* enable */ + +/* top level interrupts [0-9] are auto-cleared to 0 when the status + * register is read. second level interrupts [13 - 18] are cleared at + * the source. tx completion register 3 is replicated in [19 - 31] + * DEFAULT: 0x00000000, SIZE: 29 bits + */ +#define REG_INTR_STATUS 0x000C /* interrupt status register */ +#define INTR_TX_INTME 0x00000001 /* frame w/ INT ME desc bit set + xferred from host queue to + TX FIFO */ +#define INTR_TX_ALL 0x00000002 /* all xmit frames xferred into + TX FIFO. i.e., + TX Kick == TX complete. if + PACED_MODE set, then TX FIFO + also empty */ +#define INTR_TX_DONE 0x00000004 /* any frame xferred into tx + FIFO */ +#define INTR_TX_TAG_ERROR 0x00000008 /* TX FIFO tag framing + corrupted. FATAL ERROR */ +#define INTR_RX_DONE 0x00000010 /* at least 1 frame xferred + from RX FIFO to host mem. + RX completion reg updated. + may be delayed by recv + intr blanking. */ +#define INTR_RX_BUF_UNAVAIL 0x00000020 /* no more receive buffers. + RX Kick == RX complete */ +#define INTR_RX_TAG_ERROR 0x00000040 /* RX FIFO tag framing + corrupted. FATAL ERROR */ +#define INTR_RX_COMP_FULL 0x00000080 /* no more room in completion + ring to post descriptors. + RX complete head incr to + almost reach RX complete + tail */ +#define INTR_RX_BUF_AE 0x00000100 /* less than the + programmable threshold # + of free descr avail for + hw use */ +#define INTR_RX_COMP_AF 0x00000200 /* less than the + programmable threshold # + of descr spaces for hw + use in completion descr + ring */ +#define INTR_RX_LEN_MISMATCH 0x00000400 /* len field from MAC != + len of non-reassembly pkt + from fifo during DMA or + header parser provides TCP + header and payload size > + MAC packet size. + FATAL ERROR */ +#define INTR_SUMMARY 0x00001000 /* summary interrupt bit. this + bit will be set if an interrupt + generated on the pci bus. useful + when driver is polling for + interrupts */ +#define INTR_PCS_STATUS 0x00002000 /* PCS interrupt status register */ +#define INTR_TX_MAC_STATUS 0x00004000 /* TX MAC status register has at + least 1 unmasked interrupt set */ +#define INTR_RX_MAC_STATUS 0x00008000 /* RX MAC status register has at + least 1 unmasked interrupt set */ +#define INTR_MAC_CTRL_STATUS 0x00010000 /* MAC control status register has + at least 1 unmasked interrupt + set */ +#define INTR_MIF_STATUS 0x00020000 /* MIF status register has at least + 1 unmasked interrupt set */ +#define INTR_PCI_ERROR_STATUS 0x00040000 /* PCI error status register in the + BIF has at least 1 unmasked + interrupt set */ +#define INTR_TX_COMP_3_MASK 0xFFF80000 /* mask for TX completion + 3 reg data */ +#define INTR_TX_COMP_3_SHIFT 19 +#define INTR_ERROR_MASK (INTR_MIF_STATUS | INTR_PCI_ERROR_STATUS | \ + INTR_PCS_STATUS | INTR_RX_LEN_MISMATCH | \ + INTR_TX_MAC_STATUS | INTR_RX_MAC_STATUS | \ + INTR_TX_TAG_ERROR | INTR_RX_TAG_ERROR | \ + INTR_MAC_CTRL_STATUS) + +/* determines which status events will cause an interrupt. layout same + * as REG_INTR_STATUS. + * DEFAULT: 0xFFFFFFFF, SIZE: 16 bits + */ +#define REG_INTR_MASK 0x0010 /* Interrupt mask */ + +/* top level interrupt bits that are cleared during read of REG_INTR_STATUS_ALIAS. + * useful when driver is polling for interrupts. layout same as REG_INTR_MASK. + * DEFAULT: 0x00000000, SIZE: 12 bits + */ +#define REG_ALIAS_CLEAR 0x0014 /* alias clear mask + (used w/ status alias) */ +/* same as REG_INTR_STATUS except that only bits cleared are those selected by + * REG_ALIAS_CLEAR + * DEFAULT: 0x00000000, SIZE: 29 bits + */ +#define REG_INTR_STATUS_ALIAS 0x001C /* interrupt status alias + (selective clear) */ + +/* DEFAULT: 0x0, SIZE: 3 bits */ +#define REG_PCI_ERR_STATUS 0x1000 /* PCI error status */ +#define PCI_ERR_BADACK 0x01 /* reserved in Cassini+. + set if no ACK64# during ABS64 cycle + in Cassini. */ +#define PCI_ERR_DTRTO 0x02 /* delayed xaction timeout. set if + no read retry after 2^15 clocks */ +#define PCI_ERR_OTHER 0x04 /* other PCI errors */ +#define PCI_ERR_BIM_DMA_WRITE 0x08 /* BIM received 0 count DMA write req. + unused in Cassini. */ +#define PCI_ERR_BIM_DMA_READ 0x10 /* BIM received 0 count DMA read req. + unused in Cassini. */ +#define PCI_ERR_BIM_DMA_TIMEOUT 0x20 /* BIM received 255 retries during + DMA. unused in cassini. */ + +/* mask for PCI status events that will set PCI_ERR_STATUS. if cleared, event + * causes an interrupt to be generated. + * DEFAULT: 0x7, SIZE: 3 bits + */ +#define REG_PCI_ERR_STATUS_MASK 0x1004 /* PCI Error status mask */ + +/* used to configure PCI related parameters that are not in PCI config space. + * DEFAULT: 0bxx000, SIZE: 5 bits + */ +#define REG_BIM_CFG 0x1008 /* BIM Configuration */ +#define BIM_CFG_RESERVED0 0x001 /* reserved */ +#define BIM_CFG_RESERVED1 0x002 /* reserved */ +#define BIM_CFG_64BIT_DISABLE 0x004 /* disable 64-bit mode */ +#define BIM_CFG_66MHZ 0x008 /* (ro) 1 = 66MHz, 0 = < 66MHz */ +#define BIM_CFG_32BIT 0x010 /* (ro) 1 = 32-bit slot, 0 = 64-bit */ +#define BIM_CFG_DPAR_INTR_ENABLE 0x020 /* detected parity err enable */ +#define BIM_CFG_RMA_INTR_ENABLE 0x040 /* master abort intr enable */ +#define BIM_CFG_RTA_INTR_ENABLE 0x080 /* target abort intr enable */ +#define BIM_CFG_RESERVED2 0x100 /* reserved */ +#define BIM_CFG_BIM_DISABLE 0x200 /* stop BIM DMA. use before global + reset. reserved in Cassini. */ +#define BIM_CFG_BIM_STATUS 0x400 /* (ro) 1 = BIM DMA suspended. + reserved in Cassini. */ +#define BIM_CFG_PERROR_BLOCK 0x800 /* block PERR# to pci bus. def: 0. + reserved in Cassini. */ + +/* DEFAULT: 0x00000000, SIZE: 32 bits */ +#define REG_BIM_DIAG 0x100C /* BIM Diagnostic */ +#define BIM_DIAG_MSTR_SM_MASK 0x3FFFFF00 /* PCI master controller state + machine bits [21:0] */ +#define BIM_DIAG_BRST_SM_MASK 0x7F /* PCI burst controller state + machine bits [6:0] */ + +/* writing to SW_RESET_TX and SW_RESET_RX will issue a global + * reset. poll until TX and RX read back as 0's for completion. + */ +#define REG_SW_RESET 0x1010 /* Software reset */ +#define SW_RESET_TX 0x00000001 /* reset TX DMA engine. poll until + cleared to 0. */ +#define SW_RESET_RX 0x00000002 /* reset RX DMA engine. poll until + cleared to 0. */ +#define SW_RESET_RSTOUT 0x00000004 /* force RSTOUT# pin active (low). + resets PHY and anything else + connected to RSTOUT#. RSTOUT# + is also activated by local PCI + reset when hot-swap is being + done. */ +#define SW_RESET_BLOCK_PCS_SLINK 0x00000008 /* if a global reset is done with + this bit set, PCS and SLINK + modules won't be reset. + i.e., link won't drop. */ +#define SW_RESET_BREQ_SM_MASK 0x00007F00 /* breq state machine [6:0] */ +#define SW_RESET_PCIARB_SM_MASK 0x00070000 /* pci arbitration state bits: + 0b000: ARB_IDLE1 + 0b001: ARB_IDLE2 + 0b010: ARB_WB_ACK + 0b011: ARB_WB_WAT + 0b100: ARB_RB_ACK + 0b101: ARB_RB_WAT + 0b110: ARB_RB_END + 0b111: ARB_WB_END */ +#define SW_RESET_RDPCI_SM_MASK 0x00300000 /* read pci state bits: + 0b00: RD_PCI_WAT + 0b01: RD_PCI_RDY + 0b11: RD_PCI_ACK */ +#define SW_RESET_RDARB_SM_MASK 0x00C00000 /* read arbitration state bits: + 0b00: AD_IDL_RX + 0b01: AD_ACK_RX + 0b10: AD_ACK_TX + 0b11: AD_IDL_TX */ +#define SW_RESET_WRPCI_SM_MASK 0x06000000 /* write pci state bits + 0b00: WR_PCI_WAT + 0b01: WR_PCI_RDY + 0b11: WR_PCI_ACK */ +#define SW_RESET_WRARB_SM_MASK 0x38000000 /* write arbitration state bits: + 0b000: ARB_IDLE1 + 0b001: ARB_IDLE2 + 0b010: ARB_TX_ACK + 0b011: ARB_TX_WAT + 0b100: ARB_RX_ACK + 0b110: ARB_RX_WAT */ + +/* Cassini only. 64-bit register used to check PCI datapath. when read, + * value written has both lower and upper 32-bit halves rotated to the right + * one bit position. e.g., FFFFFFFF FFFFFFFF -> 7FFFFFFF 7FFFFFFF + */ +#define REG_MINUS_BIM_DATAPATH_TEST 0x1018 /* Cassini: BIM datapath test + Cassini+: reserved */ + +/* output enables are provided for each device's chip select and for the rest + * of the outputs from cassini to its local bus devices. two sw programmable + * bits are connected to general purpus control/status bits. + * DEFAULT: 0x7 + */ +#define REG_BIM_LOCAL_DEV_EN 0x1020 /* BIM local device + output EN. default: 0x7 */ +#define BIM_LOCAL_DEV_PAD 0x01 /* address bus, RW signal, and + OE signal output enable on the + local bus interface. these + are shared between both local + bus devices. tristate when 0. */ +#define BIM_LOCAL_DEV_PROM 0x02 /* PROM chip select */ +#define BIM_LOCAL_DEV_EXT 0x04 /* secondary local bus device chip + select output enable */ +#define BIM_LOCAL_DEV_SOFT_0 0x08 /* sw programmable ctrl bit 0 */ +#define BIM_LOCAL_DEV_SOFT_1 0x10 /* sw programmable ctrl bit 1 */ +#define BIM_LOCAL_DEV_HW_RESET 0x20 /* internal hw reset. Cassini+ only. */ + +/* access 24 entry BIM read and write buffers. put address in REG_BIM_BUFFER_ADDR + * and read/write from/to it REG_BIM_BUFFER_DATA_LOW and _DATA_HI. + * _DATA_HI should be the last access of the sequence. + * DEFAULT: undefined + */ +#define REG_BIM_BUFFER_ADDR 0x1024 /* BIM buffer address. for + purposes. */ +#define BIM_BUFFER_ADDR_MASK 0x3F /* index (0 - 23) of buffer */ +#define BIM_BUFFER_WR_SELECT 0x40 /* write buffer access = 1 + read buffer access = 0 */ +/* DEFAULT: undefined */ +#define REG_BIM_BUFFER_DATA_LOW 0x1028 /* BIM buffer data low */ +#define REG_BIM_BUFFER_DATA_HI 0x102C /* BIM buffer data high */ + +/* set BIM_RAM_BIST_START to start built-in self test for BIM read buffer. + * bit auto-clears when done with status read from _SUMMARY and _PASS bits. + */ +#define REG_BIM_RAM_BIST 0x102C /* BIM RAM (read buffer) BIST + control/status */ +#define BIM_RAM_BIST_RD_START 0x01 /* start BIST for BIM read buffer */ +#define BIM_RAM_BIST_WR_START 0x02 /* start BIST for BIM write buffer. + Cassini only. reserved in + Cassini+. */ +#define BIM_RAM_BIST_RD_PASS 0x04 /* summary BIST pass status for read + buffer. */ +#define BIM_RAM_BIST_WR_PASS 0x08 /* summary BIST pass status for write + buffer. Cassini only. reserved + in Cassini+. */ +#define BIM_RAM_BIST_RD_LOW_PASS 0x10 /* read low bank passes BIST */ +#define BIM_RAM_BIST_RD_HI_PASS 0x20 /* read high bank passes BIST */ +#define BIM_RAM_BIST_WR_LOW_PASS 0x40 /* write low bank passes BIST. + Cassini only. reserved in + Cassini+. */ +#define BIM_RAM_BIST_WR_HI_PASS 0x80 /* write high bank passes BIST. + Cassini only. reserved in + Cassini+. */ + +/* ASUN: i'm not sure what this does as it's not in the spec. + * DEFAULT: 0xFC + */ +#define REG_BIM_DIAG_MUX 0x1030 /* BIM diagnostic probe mux + select register */ + +/* enable probe monitoring mode and select data appearing on the P_A* bus. bit + * values for _SEL_HI_MASK and _SEL_LOW_MASK: + * 0x0: internal probe[7:0] (pci arb state, wtc empty w, wtc full w, wtc empty w, + * wtc empty r, post pci) + * 0x1: internal probe[15:8] (pci wbuf comp, pci wpkt comp, pci rbuf comp, + * pci rpkt comp, txdma wr req, txdma wr ack, + * txdma wr rdy, txdma wr xfr done) + * 0x2: internal probe[23:16] (txdma rd req, txdma rd ack, txdma rd rdy, rxdma rd, + * rd arb state, rd pci state) + * 0x3: internal probe[31:24] (rxdma req, rxdma ack, rxdma rdy, wrarb state, + * wrpci state) + * 0x4: pci io probe[7:0] 0x5: pci io probe[15:8] + * 0x6: pci io probe[23:16] 0x7: pci io probe[31:24] + * 0x8: pci io probe[39:32] 0x9: pci io probe[47:40] + * 0xa: pci io probe[55:48] 0xb: pci io probe[63:56] + * the following are not available in Cassini: + * 0xc: rx probe[7:0] 0xd: tx probe[7:0] + * 0xe: hp probe[7:0] 0xf: mac probe[7:0] + */ +#define REG_PLUS_PROBE_MUX_SELECT 0x1034 /* Cassini+: PROBE MUX SELECT */ +#define PROBE_MUX_EN 0x80000000 /* allow probe signals to be + driven on local bus P_A[15:0] + for debugging */ +#define PROBE_MUX_SUB_MUX_MASK 0x0000FF00 /* select sub module probe signals: + 0x03 = mac[1:0] + 0x0C = rx[1:0] + 0x30 = tx[1:0] + 0xC0 = hp[1:0] */ +#define PROBE_MUX_SEL_HI_MASK 0x000000F0 /* select which module to appear + on P_A[15:8]. see above for + values. */ +#define PROBE_MUX_SEL_LOW_MASK 0x0000000F /* select which module to appear + on P_A[7:0]. see above for + values. */ + +/* values mean the same thing as REG_INTR_MASK excep that it's for INTB. + DEFAULT: 0x1F */ +#define REG_PLUS_INTR_MASK_1 0x1038 /* Cassini+: interrupt mask + register 2 for INTB */ +#define REG_PLUS_INTRN_MASK(x) (REG_PLUS_INTR_MASK_1 + ((x) - 1)*16) +/* bits correspond to both _MASK and _STATUS registers. _ALT corresponds to + * all of the alternate (2-4) INTR registers while _1 corresponds to only + * _MASK_1 and _STATUS_1 registers. + * DEFAULT: 0x7 for MASK registers, 0x0 for ALIAS_CLEAR registers + */ +#define INTR_RX_DONE_ALT 0x01 +#define INTR_RX_COMP_FULL_ALT 0x02 +#define INTR_RX_COMP_AF_ALT 0x04 +#define INTR_RX_BUF_UNAVAIL_1 0x08 +#define INTR_RX_BUF_AE_1 0x10 /* almost empty */ +#define INTRN_MASK_RX_EN 0x80 +#define INTRN_MASK_CLEAR_ALL (INTR_RX_DONE_ALT | \ + INTR_RX_COMP_FULL_ALT | \ + INTR_RX_COMP_AF_ALT | \ + INTR_RX_BUF_UNAVAIL_1 | \ + INTR_RX_BUF_AE_1) +#define REG_PLUS_INTR_STATUS_1 0x103C /* Cassini+: interrupt status + register 2 for INTB. default: 0x1F */ +#define REG_PLUS_INTRN_STATUS(x) (REG_PLUS_INTR_STATUS_1 + ((x) - 1)*16) +#define INTR_STATUS_ALT_INTX_EN 0x80 /* generate INTX when one of the + flags are set. enables desc ring. */ + +#define REG_PLUS_ALIAS_CLEAR_1 0x1040 /* Cassini+: alias clear mask + register 2 for INTB */ +#define REG_PLUS_ALIASN_CLEAR(x) (REG_PLUS_ALIAS_CLEAR_1 + ((x) - 1)*16) + +#define REG_PLUS_INTR_STATUS_ALIAS_1 0x1044 /* Cassini+: interrupt status + register alias 2 for INTB */ +#define REG_PLUS_INTRN_STATUS_ALIAS(x) (REG_PLUS_INTR_STATUS_ALIAS_1 + ((x) - 1)*16) + +#define REG_SATURN_PCFG 0x106c /* pin configuration register for + integrated macphy */ + +#define SATURN_PCFG_TLA 0x00000001 /* 1 = phy actled */ +#define SATURN_PCFG_FLA 0x00000002 /* 1 = phy link10led */ +#define SATURN_PCFG_CLA 0x00000004 /* 1 = phy link100led */ +#define SATURN_PCFG_LLA 0x00000008 /* 1 = phy link1000led */ +#define SATURN_PCFG_RLA 0x00000010 /* 1 = phy duplexled */ +#define SATURN_PCFG_PDS 0x00000020 /* phy debug mode. + 0 = normal */ +#define SATURN_PCFG_MTP 0x00000080 /* test point select */ +#define SATURN_PCFG_GMO 0x00000100 /* GMII observe. 1 = + GMII on SERDES pins for + monitoring. */ +#define SATURN_PCFG_FSI 0x00000200 /* 1 = freeze serdes/gmii. all + pins configed as outputs. + for power saving when using + internal phy. */ +#define SATURN_PCFG_LAD 0x00000800 /* 0 = mac core led ctrl + polarity from strapping + value. + 1 = mac core led ctrl + polarity active low. */ + + +/** transmit dma registers **/ +#define MAX_TX_RINGS_SHIFT 2 +#define MAX_TX_RINGS (1 << MAX_TX_RINGS_SHIFT) +#define MAX_TX_RINGS_MASK (MAX_TX_RINGS - 1) + +/* TX configuration. + * descr ring sizes size = 32 * (1 << n), n < 9. e.g., 0x8 = 8k. default: 0x8 + * DEFAULT: 0x3F000001 + */ +#define REG_TX_CFG 0x2004 /* TX config */ +#define TX_CFG_DMA_EN 0x00000001 /* enable TX DMA. if cleared, DMA + will stop after xfer of current + buffer has been completed. */ +#define TX_CFG_FIFO_PIO_SEL 0x00000002 /* TX DMA FIFO can be + accessed w/ FIFO addr + and data registers. + TX DMA should be + disabled. */ +#define TX_CFG_DESC_RING0_MASK 0x0000003C /* # desc entries in + ring 1. */ +#define TX_CFG_DESC_RING0_SHIFT 2 +#define TX_CFG_DESC_RINGN_MASK(a) (TX_CFG_DESC_RING0_MASK << (a)*4) +#define TX_CFG_DESC_RINGN_SHIFT(a) (TX_CFG_DESC_RING0_SHIFT + (a)*4) +#define TX_CFG_PACED_MODE 0x00100000 /* TX_ALL only set after + TX FIFO becomes empty. + if 0, TX_ALL set + if descr queue empty. */ +#define TX_CFG_DMA_RDPIPE_DIS 0x01000000 /* always set to 1 */ +#define TX_CFG_COMPWB_Q1 0x02000000 /* completion writeback happens at + the end of every packet kicked + through Q1. */ +#define TX_CFG_COMPWB_Q2 0x04000000 /* completion writeback happens at + the end of every packet kicked + through Q2. */ +#define TX_CFG_COMPWB_Q3 0x08000000 /* completion writeback happens at + the end of every packet kicked + through Q3 */ +#define TX_CFG_COMPWB_Q4 0x10000000 /* completion writeback happens at + the end of every packet kicked + through Q4 */ +#define TX_CFG_INTR_COMPWB_DIS 0x20000000 /* disable pre-interrupt completion + writeback */ +#define TX_CFG_CTX_SEL_MASK 0xC0000000 /* selects tx test port + connection + 0b00: tx mac req, + tx mac retry req, + tx ack and tx tag. + 0b01: txdma rd req, + txdma rd ack, + txdma rd rdy, + txdma rd type0 + 0b11: txdma wr req, + txdma wr ack, + txdma wr rdy, + txdma wr xfr done. */ +#define TX_CFG_CTX_SEL_SHIFT 30 + +/* 11-bit counters that point to next location in FIFO to be loaded/retrieved. + * used for diagnostics only. + */ +#define REG_TX_FIFO_WRITE_PTR 0x2014 /* TX FIFO write pointer */ +#define REG_TX_FIFO_SHADOW_WRITE_PTR 0x2018 /* TX FIFO shadow write + pointer. temp hold reg. + diagnostics only. */ +#define REG_TX_FIFO_READ_PTR 0x201C /* TX FIFO read pointer */ +#define REG_TX_FIFO_SHADOW_READ_PTR 0x2020 /* TX FIFO shadow read + pointer */ + +/* (ro) 11-bit up/down counter w/ # of frames currently in TX FIFO */ +#define REG_TX_FIFO_PKT_CNT 0x2024 /* TX FIFO packet counter */ + +/* current state of all state machines in TX */ +#define REG_TX_SM_1 0x2028 /* TX state machine reg #1 */ +#define TX_SM_1_CHAIN_MASK 0x000003FF /* chaining state machine */ +#define TX_SM_1_CSUM_MASK 0x00000C00 /* checksum state machine */ +#define TX_SM_1_FIFO_LOAD_MASK 0x0003F000 /* FIFO load state machine. + = 0x01 when TX disabled. */ +#define TX_SM_1_FIFO_UNLOAD_MASK 0x003C0000 /* FIFO unload state machine */ +#define TX_SM_1_CACHE_MASK 0x03C00000 /* desc. prefetch cache controller + state machine */ +#define TX_SM_1_CBQ_ARB_MASK 0xF8000000 /* CBQ arbiter state machine */ + +#define REG_TX_SM_2 0x202C /* TX state machine reg #2 */ +#define TX_SM_2_COMP_WB_MASK 0x07 /* completion writeback sm */ +#define TX_SM_2_SUB_LOAD_MASK 0x38 /* sub load state machine */ +#define TX_SM_2_KICK_MASK 0xC0 /* kick state machine */ + +/* 64-bit pointer to the transmit data buffer. only the 50 LSB are incremented + * while the upper 23 bits are taken from the TX descriptor + */ +#define REG_TX_DATA_PTR_LOW 0x2030 /* TX data pointer low */ +#define REG_TX_DATA_PTR_HI 0x2034 /* TX data pointer high */ + +/* 13 bit registers written by driver w/ descriptor value that follows + * last valid xmit descriptor. kick # and complete # values are used by + * the xmit dma engine to control tx descr fetching. if > 1 valid + * tx descr is available within the cache line being read, cassini will + * internally cache up to 4 of them. 0 on reset. _KICK = rw, _COMP = ro. + */ +#define REG_TX_KICK0 0x2038 /* TX kick reg #1 */ +#define REG_TX_KICKN(x) (REG_TX_KICK0 + (x)*4) +#define REG_TX_COMP0 0x2048 /* TX completion reg #1 */ +#define REG_TX_COMPN(x) (REG_TX_COMP0 + (x)*4) + +/* values of TX_COMPLETE_1-4 are written. each completion register + * is 2bytes in size and contiguous. 8B allocation w/ 8B alignment. + * NOTE: completion reg values are only written back prior to TX_INTME and + * TX_ALL interrupts. at all other times, the most up-to-date index values + * should be obtained from the REG_TX_COMPLETE_# registers. + * here's the layout: + * offset from base addr completion # byte + * 0 TX_COMPLETE_1_MSB + * 1 TX_COMPLETE_1_LSB + * 2 TX_COMPLETE_2_MSB + * 3 TX_COMPLETE_2_LSB + * 4 TX_COMPLETE_3_MSB + * 5 TX_COMPLETE_3_LSB + * 6 TX_COMPLETE_4_MSB + * 7 TX_COMPLETE_4_LSB + */ +#define TX_COMPWB_SIZE 8 +#define REG_TX_COMPWB_DB_LOW 0x2058 /* TX completion write back + base low */ +#define REG_TX_COMPWB_DB_HI 0x205C /* TX completion write back + base high */ +#define TX_COMPWB_MSB_MASK 0x00000000000000FFULL +#define TX_COMPWB_MSB_SHIFT 0 +#define TX_COMPWB_LSB_MASK 0x000000000000FF00ULL +#define TX_COMPWB_LSB_SHIFT 8 +#define TX_COMPWB_NEXT(x) ((x) >> 16) + +/* 53 MSB used as base address. 11 LSB assumed to be 0. TX desc pointer must + * be 2KB-aligned. */ +#define REG_TX_DB0_LOW 0x2060 /* TX descriptor base low #1 */ +#define REG_TX_DB0_HI 0x2064 /* TX descriptor base hi #1 */ +#define REG_TX_DBN_LOW(x) (REG_TX_DB0_LOW + (x)*8) +#define REG_TX_DBN_HI(x) (REG_TX_DB0_HI + (x)*8) + +/* 16-bit registers hold weights for the weighted round-robin of the + * four CBQ TX descr rings. weights correspond to # bytes xferred from + * host to TXFIFO in a round of WRR arbitration. can be set + * dynamically with new weights set upon completion of the current + * packet transfer from host memory to TXFIFO. a dummy write to any of + * these registers causes a queue1 pre-emption with all historical bw + * deficit data reset to 0 (useful when congestion requires a + * pre-emption/re-allocation of network bandwidth + */ +#define REG_TX_MAXBURST_0 0x2080 /* TX MaxBurst #1 */ +#define REG_TX_MAXBURST_1 0x2084 /* TX MaxBurst #2 */ +#define REG_TX_MAXBURST_2 0x2088 /* TX MaxBurst #3 */ +#define REG_TX_MAXBURST_3 0x208C /* TX MaxBurst #4 */ + +/* diagnostics access to any TX FIFO location. every access is 65 + * bits. _DATA_LOW = 32 LSB, _DATA_HI_T1/T0 = 32 MSB. _TAG = tag bit. + * writing _DATA_HI_T0 sets tag bit low, writing _DATA_HI_T1 sets tag + * bit high. TX_FIFO_PIO_SEL must be set for TX FIFO PIO access. if + * TX FIFO data integrity is desired, TX DMA should be + * disabled. _DATA_HI_Tx should be the last access of the sequence. + */ +#define REG_TX_FIFO_ADDR 0x2104 /* TX FIFO address */ +#define REG_TX_FIFO_TAG 0x2108 /* TX FIFO tag */ +#define REG_TX_FIFO_DATA_LOW 0x210C /* TX FIFO data low */ +#define REG_TX_FIFO_DATA_HI_T1 0x2110 /* TX FIFO data high t1 */ +#define REG_TX_FIFO_DATA_HI_T0 0x2114 /* TX FIFO data high t0 */ +#define REG_TX_FIFO_SIZE 0x2118 /* (ro) TX FIFO size = 0x090 = 9KB */ + +/* 9-bit register controls BIST of TX FIFO. bit set indicates that the BIST + * passed for the specified memory + */ +#define REG_TX_RAMBIST 0x211C /* TX RAMBIST control/status */ +#define TX_RAMBIST_STATE 0x01C0 /* progress state of RAMBIST + controller state machine */ +#define TX_RAMBIST_RAM33A_PASS 0x0020 /* RAM33A passed */ +#define TX_RAMBIST_RAM32A_PASS 0x0010 /* RAM32A passed */ +#define TX_RAMBIST_RAM33B_PASS 0x0008 /* RAM33B passed */ +#define TX_RAMBIST_RAM32B_PASS 0x0004 /* RAM32B passed */ +#define TX_RAMBIST_SUMMARY 0x0002 /* all RAM passed */ +#define TX_RAMBIST_START 0x0001 /* write 1 to start BIST. self + clears on completion. */ + +/** receive dma registers **/ +#define MAX_RX_DESC_RINGS 2 +#define MAX_RX_COMP_RINGS 4 + +/* receive DMA channel configuration. default: 0x80910 + * free ring size = (1 << n)*32 -> [32 - 8k] + * completion ring size = (1 << n)*128 -> [128 - 32k], n < 9 + * DEFAULT: 0x80910 + */ +#define REG_RX_CFG 0x4000 /* RX config */ +#define RX_CFG_DMA_EN 0x00000001 /* enable RX DMA. 0 stops + channel as soon as current + frame xfer has completed. + driver should disable MAC + for 200ms before disabling + RX */ +#define RX_CFG_DESC_RING_MASK 0x0000001E /* # desc entries in RX + free desc ring. + def: 0x8 = 8k */ +#define RX_CFG_DESC_RING_SHIFT 1 +#define RX_CFG_COMP_RING_MASK 0x000001E0 /* # desc entries in RX complete + ring. def: 0x8 = 32k */ +#define RX_CFG_COMP_RING_SHIFT 5 +#define RX_CFG_BATCH_DIS 0x00000200 /* disable receive desc + batching. def: 0x0 = + enabled */ +#define RX_CFG_SWIVEL_MASK 0x00001C00 /* byte offset of the 1st + data byte of the packet + w/in 8 byte boundares. + this swivels the data + DMA'ed to header + buffers, jumbo buffers + when header split is not + requested and MTU sized + buffers. def: 0x2 */ +#define RX_CFG_SWIVEL_SHIFT 10 + +/* cassini+ only */ +#define RX_CFG_DESC_RING1_MASK 0x000F0000 /* # of desc entries in + RX free desc ring 2. + def: 0x8 = 8k */ +#define RX_CFG_DESC_RING1_SHIFT 16 + + +/* the page size register allows cassini chips to do the following with + * received data: + * [--------------------------------------------------------------] page + * [off][buf1][pad][off][buf2][pad][off][buf3][pad][off][buf4][pad] + * |--------------| = PAGE_SIZE_BUFFER_STRIDE + * page = PAGE_SIZE + * offset = PAGE_SIZE_MTU_OFF + * for the above example, MTU_BUFFER_COUNT = 4. + * NOTE: as is apparent, you need to ensure that the following holds: + * MTU_BUFFER_COUNT <= PAGE_SIZE/PAGE_SIZE_BUFFER_STRIDE + * DEFAULT: 0x48002002 (8k pages) + */ +#define REG_RX_PAGE_SIZE 0x4004 /* RX page size */ +#define RX_PAGE_SIZE_MASK 0x00000003 /* size of pages pointed to + by receive descriptors. + if jumbo buffers are + supported the page size + should not be < 8k. + 0b00 = 2k, 0b01 = 4k + 0b10 = 8k, 0b11 = 16k + DEFAULT: 8k */ +#define RX_PAGE_SIZE_SHIFT 0 +#define RX_PAGE_SIZE_MTU_COUNT_MASK 0x00007800 /* # of MTU buffers the hw + packs into a page. + DEFAULT: 4 */ +#define RX_PAGE_SIZE_MTU_COUNT_SHIFT 11 +#define RX_PAGE_SIZE_MTU_STRIDE_MASK 0x18000000 /* # of bytes that separate + each MTU buffer + + offset from each + other. + 0b00 = 1k, 0b01 = 2k + 0b10 = 4k, 0b11 = 8k + DEFAULT: 0x1 */ +#define RX_PAGE_SIZE_MTU_STRIDE_SHIFT 27 +#define RX_PAGE_SIZE_MTU_OFF_MASK 0xC0000000 /* offset in each page that + hw writes the MTU buffer + into. + 0b00 = 0, + 0b01 = 64 bytes + 0b10 = 96, 0b11 = 128 + DEFAULT: 0x1 */ +#define RX_PAGE_SIZE_MTU_OFF_SHIFT 30 + +/* 11-bit counter points to next location in RX FIFO to be loaded/read. + * shadow write pointers enable retries in case of early receive aborts. + * DEFAULT: 0x0. generated on 64-bit boundaries. + */ +#define REG_RX_FIFO_WRITE_PTR 0x4008 /* RX FIFO write pointer */ +#define REG_RX_FIFO_READ_PTR 0x400C /* RX FIFO read pointer */ +#define REG_RX_IPP_FIFO_SHADOW_WRITE_PTR 0x4010 /* RX IPP FIFO shadow write + pointer */ +#define REG_RX_IPP_FIFO_SHADOW_READ_PTR 0x4014 /* RX IPP FIFO shadow read + pointer */ +#define REG_RX_IPP_FIFO_READ_PTR 0x400C /* RX IPP FIFO read + pointer. (8-bit counter) */ + +/* current state of RX DMA state engines + other info + * DEFAULT: 0x0 + */ +#define REG_RX_DEBUG 0x401C /* RX debug */ +#define RX_DEBUG_LOAD_STATE_MASK 0x0000000F /* load state machine w/ MAC: + 0x0 = idle, 0x1 = load_bop + 0x2 = load 1, 0x3 = load 2 + 0x4 = load 3, 0x5 = load 4 + 0x6 = last detect + 0x7 = wait req + 0x8 = wait req statuss 1st + 0x9 = load st + 0xa = bubble mac + 0xb = error */ +#define RX_DEBUG_LM_STATE_MASK 0x00000070 /* load state machine w/ HP and + RX FIFO: + 0x0 = idle, 0x1 = hp xfr + 0x2 = wait hp ready + 0x3 = wait flow code + 0x4 = fifo xfer + 0x5 = make status + 0x6 = csum ready + 0x7 = error */ +#define RX_DEBUG_FC_STATE_MASK 0x000000180 /* flow control state machine + w/ MAC: + 0x0 = idle + 0x1 = wait xoff ack + 0x2 = wait xon + 0x3 = wait xon ack */ +#define RX_DEBUG_DATA_STATE_MASK 0x000001E00 /* unload data state machine + states: + 0x0 = idle data + 0x1 = header begin + 0x2 = xfer header + 0x3 = xfer header ld + 0x4 = mtu begin + 0x5 = xfer mtu + 0x6 = xfer mtu ld + 0x7 = jumbo begin + 0x8 = xfer jumbo + 0x9 = xfer jumbo ld + 0xa = reas begin + 0xb = xfer reas + 0xc = flush tag + 0xd = xfer reas ld + 0xe = error + 0xf = bubble idle */ +#define RX_DEBUG_DESC_STATE_MASK 0x0001E000 /* unload desc state machine + states: + 0x0 = idle desc + 0x1 = wait ack + 0x9 = wait ack 2 + 0x2 = fetch desc 1 + 0xa = fetch desc 2 + 0x3 = load ptrs + 0x4 = wait dma + 0x5 = wait ack batch + 0x6 = post batch + 0x7 = xfr done */ +#define RX_DEBUG_INTR_READ_PTR_MASK 0x30000000 /* interrupt read ptr of the + interrupt queue */ +#define RX_DEBUG_INTR_WRITE_PTR_MASK 0xC0000000 /* interrupt write pointer + of the interrupt queue */ + +/* flow control frames are emmitted using two PAUSE thresholds: + * XOFF PAUSE uses pause time value pre-programmed in the Send PAUSE MAC reg + * XON PAUSE uses a pause time of 0. granularity of threshold is 64bytes. + * PAUSE thresholds defined in terms of FIFO occupancy and may be translated + * into FIFO vacancy using RX_FIFO_SIZE. setting ON will trigger XON frames + * when FIFO reaches 0. OFF threshold should not be > size of RX FIFO. max + * value is is 0x6F. + * DEFAULT: 0x00078 + */ +#define REG_RX_PAUSE_THRESH 0x4020 /* RX pause thresholds */ +#define RX_PAUSE_THRESH_QUANTUM 64 +#define RX_PAUSE_THRESH_OFF_MASK 0x000001FF /* XOFF PAUSE emitted when + RX FIFO occupancy > + value*64B */ +#define RX_PAUSE_THRESH_OFF_SHIFT 0 +#define RX_PAUSE_THRESH_ON_MASK 0x001FF000 /* XON PAUSE emitted after + emitting XOFF PAUSE when RX + FIFO occupancy falls below + this value*64B. must be + < XOFF threshold. if = + RX_FIFO_SIZE< XON frames are + never emitted. */ +#define RX_PAUSE_THRESH_ON_SHIFT 12 + +/* 13-bit register used to control RX desc fetching and intr generation. if 4+ + * valid RX descriptors are available, Cassini will read 4 at a time. + * writing N means that all desc up to *but* excluding N are available. N must + * be a multiple of 4 (N % 4 = 0). first desc should be cache-line aligned. + * DEFAULT: 0 on reset + */ +#define REG_RX_KICK 0x4024 /* RX kick reg */ + +/* 8KB aligned 64-bit pointer to the base of the RX free/completion rings. + * lower 13 bits of the low register are hard-wired to 0. + */ +#define REG_RX_DB_LOW 0x4028 /* RX descriptor ring + base low */ +#define REG_RX_DB_HI 0x402C /* RX descriptor ring + base hi */ +#define REG_RX_CB_LOW 0x4030 /* RX completion ring + base low */ +#define REG_RX_CB_HI 0x4034 /* RX completion ring + base hi */ +/* 13-bit register indicate desc used by cassini for receive frames. used + * for diagnostic purposes. + * DEFAULT: 0 on reset + */ +#define REG_RX_COMP 0x4038 /* (ro) RX completion */ + +/* HEAD and TAIL are used to control RX desc posting and interrupt + * generation. hw moves the head register to pass ownership to sw. sw + * moves the tail register to pass ownership back to hw. to give all + * entries to hw, set TAIL = HEAD. if HEAD and TAIL indicate that no + * more entries are available, DMA will pause and an interrupt will be + * generated to indicate no more entries are available. sw can use + * this interrupt to reduce the # of times it must update the + * completion tail register. + * DEFAULT: 0 on reset + */ +#define REG_RX_COMP_HEAD 0x403C /* RX completion head */ +#define REG_RX_COMP_TAIL 0x4040 /* RX completion tail */ + +/* values used for receive interrupt blanking. loaded each time the ISR is read + * DEFAULT: 0x00000000 + */ +#define REG_RX_BLANK 0x4044 /* RX blanking register + for ISR read */ +#define RX_BLANK_INTR_PKT_MASK 0x000001FF /* RX_DONE intr asserted if + this many sets of completion + writebacks (up to 2 packets) + occur since the last time + the ISR was read. 0 = no + packet blanking */ +#define RX_BLANK_INTR_PKT_SHIFT 0 +#define RX_BLANK_INTR_TIME_MASK 0x3FFFF000 /* RX_DONE interrupt asserted + if that many clocks were + counted since last time the + ISR was read. + each count is 512 core + clocks (125MHz). 0 = no + time blanking */ +#define RX_BLANK_INTR_TIME_SHIFT 12 + +/* values used for interrupt generation based on threshold values of how + * many free desc and completion entries are available for hw use. + * DEFAULT: 0x00000000 + */ +#define REG_RX_AE_THRESH 0x4048 /* RX almost empty + thresholds */ +#define RX_AE_THRESH_FREE_MASK 0x00001FFF /* RX_BUF_AE will be + generated if # desc + avail for hw use <= + # */ +#define RX_AE_THRESH_FREE_SHIFT 0 +#define RX_AE_THRESH_COMP_MASK 0x0FFFE000 /* RX_COMP_AE will be + generated if # of + completion entries + avail for hw use <= + # */ +#define RX_AE_THRESH_COMP_SHIFT 13 + +/* probabilities for random early drop (RED) thresholds on a FIFO threshold + * basis. probability should increase when the FIFO level increases. control + * packets are never dropped and not counted in stats. probability programmed + * on a 12.5% granularity. e.g., 0x1 = 1/8 packets dropped. + * DEFAULT: 0x00000000 + */ +#define REG_RX_RED 0x404C /* RX random early detect enable */ +#define RX_RED_4K_6K_FIFO_MASK 0x000000FF /* 4KB < FIFO thresh < 6KB */ +#define RX_RED_6K_8K_FIFO_MASK 0x0000FF00 /* 6KB < FIFO thresh < 8KB */ +#define RX_RED_8K_10K_FIFO_MASK 0x00FF0000 /* 8KB < FIFO thresh < 10KB */ +#define RX_RED_10K_12K_FIFO_MASK 0xFF000000 /* 10KB < FIFO thresh < 12KB */ + +/* FIFO fullness levels for RX FIFO, RX control FIFO, and RX IPP FIFO. + * RX control FIFO = # of packets in RX FIFO. + * DEFAULT: 0x0 + */ +#define REG_RX_FIFO_FULLNESS 0x4050 /* (ro) RX FIFO fullness */ +#define RX_FIFO_FULLNESS_RX_FIFO_MASK 0x3FF80000 /* level w/ 8B granularity */ +#define RX_FIFO_FULLNESS_IPP_FIFO_MASK 0x0007FF00 /* level w/ 8B granularity */ +#define RX_FIFO_FULLNESS_RX_PKT_MASK 0x000000FF /* # packets in RX FIFO */ +#define REG_RX_IPP_PACKET_COUNT 0x4054 /* RX IPP packet counter */ +#define REG_RX_WORK_DMA_PTR_LOW 0x4058 /* RX working DMA ptr low */ +#define REG_RX_WORK_DMA_PTR_HI 0x405C /* RX working DMA ptr + high */ + +/* BIST testing ro RX FIFO, RX control FIFO, and RX IPP FIFO. only RX BIST + * START/COMPLETE is writeable. START will clear when the BIST has completed + * checking all 17 RAMS. + * DEFAULT: 0bxxxx xxxxx xxxx xxxx xxxx x000 0000 0000 00x0 + */ +#define REG_RX_BIST 0x4060 /* (ro) RX BIST */ +#define RX_BIST_32A_PASS 0x80000000 /* RX FIFO 32A passed */ +#define RX_BIST_33A_PASS 0x40000000 /* RX FIFO 33A passed */ +#define RX_BIST_32B_PASS 0x20000000 /* RX FIFO 32B passed */ +#define RX_BIST_33B_PASS 0x10000000 /* RX FIFO 33B passed */ +#define RX_BIST_32C_PASS 0x08000000 /* RX FIFO 32C passed */ +#define RX_BIST_33C_PASS 0x04000000 /* RX FIFO 33C passed */ +#define RX_BIST_IPP_32A_PASS 0x02000000 /* RX IPP FIFO 33B passed */ +#define RX_BIST_IPP_33A_PASS 0x01000000 /* RX IPP FIFO 33A passed */ +#define RX_BIST_IPP_32B_PASS 0x00800000 /* RX IPP FIFO 32B passed */ +#define RX_BIST_IPP_33B_PASS 0x00400000 /* RX IPP FIFO 33B passed */ +#define RX_BIST_IPP_32C_PASS 0x00200000 /* RX IPP FIFO 32C passed */ +#define RX_BIST_IPP_33C_PASS 0x00100000 /* RX IPP FIFO 33C passed */ +#define RX_BIST_CTRL_32_PASS 0x00800000 /* RX CTRL FIFO 32 passed */ +#define RX_BIST_CTRL_33_PASS 0x00400000 /* RX CTRL FIFO 33 passed */ +#define RX_BIST_REAS_26A_PASS 0x00200000 /* RX Reas 26A passed */ +#define RX_BIST_REAS_26B_PASS 0x00100000 /* RX Reas 26B passed */ +#define RX_BIST_REAS_27_PASS 0x00080000 /* RX Reas 27 passed */ +#define RX_BIST_STATE_MASK 0x00078000 /* BIST state machine */ +#define RX_BIST_SUMMARY 0x00000002 /* when BIST complete, + summary pass bit + contains AND of BIST + results of all 16 + RAMS */ +#define RX_BIST_START 0x00000001 /* write 1 to start + BIST. self clears + on completion. */ + +/* next location in RX CTRL FIFO that will be loaded w/ data from RX IPP/read + * from to retrieve packet control info. + * DEFAULT: 0 + */ +#define REG_RX_CTRL_FIFO_WRITE_PTR 0x4064 /* (ro) RX control FIFO + write ptr */ +#define REG_RX_CTRL_FIFO_READ_PTR 0x4068 /* (ro) RX control FIFO read + ptr */ + +/* receive interrupt blanking. loaded each time interrupt alias register is + * read. + * DEFAULT: 0x0 + */ +#define REG_RX_BLANK_ALIAS_READ 0x406C /* RX blanking register for + alias read */ +#define RX_BAR_INTR_PACKET_MASK 0x000001FF /* assert RX_DONE if # + completion writebacks + > # since last ISR + read. 0 = no + blanking. up to 2 + packets per + completion wb. */ +#define RX_BAR_INTR_TIME_MASK 0x3FFFF000 /* assert RX_DONE if # + clocks > # since last + ISR read. each count + is 512 core clocks + (125MHz). 0 = no + blanking. */ + +/* diagnostic access to RX FIFO. 32 LSB accessed via DATA_LOW. 32 MSB accessed + * via DATA_HI_T0 or DATA_HI_T1. TAG reads the tag bit. writing HI_T0 + * will unset the tag bit while writing HI_T1 will set the tag bit. to reset + * to normal operation after diagnostics, write to address location 0x0. + * RX_DMA_EN bit must be set to 0x0 for RX FIFO PIO access. DATA_HI should + * be the last write access of a write sequence. + * DEFAULT: undefined + */ +#define REG_RX_FIFO_ADDR 0x4080 /* RX FIFO address */ +#define REG_RX_FIFO_TAG 0x4084 /* RX FIFO tag */ +#define REG_RX_FIFO_DATA_LOW 0x4088 /* RX FIFO data low */ +#define REG_RX_FIFO_DATA_HI_T0 0x408C /* RX FIFO data high T0 */ +#define REG_RX_FIFO_DATA_HI_T1 0x4090 /* RX FIFO data high T1 */ + +/* diagnostic assess to RX CTRL FIFO. 8-bit FIFO_ADDR holds address of + * 81 bit control entry and 6 bit flow id. LOW and MID are both 32-bit + * accesses. HI is 7-bits with 6-bit flow id and 1 bit control + * word. RX_DMA_EN must be 0 for RX CTRL FIFO PIO access. DATA_HI + * should be last write access of the write sequence. + * DEFAULT: undefined + */ +#define REG_RX_CTRL_FIFO_ADDR 0x4094 /* RX Control FIFO and + Batching FIFO addr */ +#define REG_RX_CTRL_FIFO_DATA_LOW 0x4098 /* RX Control FIFO data + low */ +#define REG_RX_CTRL_FIFO_DATA_MID 0x409C /* RX Control FIFO data + mid */ +#define REG_RX_CTRL_FIFO_DATA_HI 0x4100 /* RX Control FIFO data + hi and flow id */ +#define RX_CTRL_FIFO_DATA_HI_CTRL 0x0001 /* upper bit of ctrl word */ +#define RX_CTRL_FIFO_DATA_HI_FLOW_MASK 0x007E /* flow id */ + +/* diagnostic access to RX IPP FIFO. same semantics as RX_FIFO. + * DEFAULT: undefined + */ +#define REG_RX_IPP_FIFO_ADDR 0x4104 /* RX IPP FIFO address */ +#define REG_RX_IPP_FIFO_TAG 0x4108 /* RX IPP FIFO tag */ +#define REG_RX_IPP_FIFO_DATA_LOW 0x410C /* RX IPP FIFO data low */ +#define REG_RX_IPP_FIFO_DATA_HI_T0 0x4110 /* RX IPP FIFO data high + T0 */ +#define REG_RX_IPP_FIFO_DATA_HI_T1 0x4114 /* RX IPP FIFO data high + T1 */ + +/* 64-bit pointer to receive data buffer in host memory used for headers and + * small packets. MSB in high register. loaded by DMA state machine and + * increments as DMA writes receive data. only 50 LSB are incremented. top + * 13 bits taken from RX descriptor. + * DEFAULT: undefined + */ +#define REG_RX_HEADER_PAGE_PTR_LOW 0x4118 /* (ro) RX header page ptr + low */ +#define REG_RX_HEADER_PAGE_PTR_HI 0x411C /* (ro) RX header page ptr + high */ +#define REG_RX_MTU_PAGE_PTR_LOW 0x4120 /* (ro) RX MTU page pointer + low */ +#define REG_RX_MTU_PAGE_PTR_HI 0x4124 /* (ro) RX MTU page pointer + high */ + +/* PIO diagnostic access to RX reassembly DMA Table RAM. 6-bit register holds + * one of 64 79-bit locations in the RX Reassembly DMA table and the addr of + * one of the 64 byte locations in the Batching table. LOW holds 32 LSB. + * MID holds the next 32 LSB. HIGH holds the 15 MSB. RX_DMA_EN must be set + * to 0 for PIO access. DATA_HIGH should be last write of write sequence. + * layout: + * reassmbl ptr [78:15] | reassmbl index [14:1] | reassmbl entry valid [0] + * DEFAULT: undefined + */ +#define REG_RX_TABLE_ADDR 0x4128 /* RX reassembly DMA table + address */ +#define RX_TABLE_ADDR_MASK 0x0000003F /* address mask */ + +#define REG_RX_TABLE_DATA_LOW 0x412C /* RX reassembly DMA table + data low */ +#define REG_RX_TABLE_DATA_MID 0x4130 /* RX reassembly DMA table + data mid */ +#define REG_RX_TABLE_DATA_HI 0x4134 /* RX reassembly DMA table + data high */ + +/* cassini+ only */ +/* 8KB aligned 64-bit pointer to base of RX rings. lower 13 bits hardwired to + * 0. same semantics as primary desc/complete rings. + */ +#define REG_PLUS_RX_DB1_LOW 0x4200 /* RX descriptor ring + 2 base low */ +#define REG_PLUS_RX_DB1_HI 0x4204 /* RX descriptor ring + 2 base high */ +#define REG_PLUS_RX_CB1_LOW 0x4208 /* RX completion ring + 2 base low. 4 total */ +#define REG_PLUS_RX_CB1_HI 0x420C /* RX completion ring + 2 base high. 4 total */ +#define REG_PLUS_RX_CBN_LOW(x) (REG_PLUS_RX_CB1_LOW + 8*((x) - 1)) +#define REG_PLUS_RX_CBN_HI(x) (REG_PLUS_RX_CB1_HI + 8*((x) - 1)) +#define REG_PLUS_RX_KICK1 0x4220 /* RX Kick 2 register */ +#define REG_PLUS_RX_COMP1 0x4224 /* (ro) RX completion 2 + reg */ +#define REG_PLUS_RX_COMP1_HEAD 0x4228 /* (ro) RX completion 2 + head reg. 4 total. */ +#define REG_PLUS_RX_COMP1_TAIL 0x422C /* RX completion 2 + tail reg. 4 total. */ +#define REG_PLUS_RX_COMPN_HEAD(x) (REG_PLUS_RX_COMP1_HEAD + 8*((x) - 1)) +#define REG_PLUS_RX_COMPN_TAIL(x) (REG_PLUS_RX_COMP1_TAIL + 8*((x) - 1)) +#define REG_PLUS_RX_AE1_THRESH 0x4240 /* RX almost empty 2 + thresholds */ +#define RX_AE1_THRESH_FREE_MASK RX_AE_THRESH_FREE_MASK +#define RX_AE1_THRESH_FREE_SHIFT RX_AE_THRESH_FREE_SHIFT + +/** header parser registers **/ + +/* RX parser configuration register. + * DEFAULT: 0x1651004 + */ +#define REG_HP_CFG 0x4140 /* header parser + configuration reg */ +#define HP_CFG_PARSE_EN 0x00000001 /* enab header parsing */ +#define HP_CFG_NUM_CPU_MASK 0x000000FC /* # processors + 0 = 64. 0x3f = 63 */ +#define HP_CFG_NUM_CPU_SHIFT 2 +#define HP_CFG_SYN_INC_MASK 0x00000100 /* SYN bit won't increment + TCP seq # by one when + stored in FDBM */ +#define HP_CFG_TCP_THRESH_MASK 0x000FFE00 /* # bytes of TCP data + needed to be considered + for reassembly */ +#define HP_CFG_TCP_THRESH_SHIFT 9 + +/* access to RX Instruction RAM. 5-bit register/counter holds addr + * of 39 bit entry to be read/written. 32 LSB in _DATA_LOW. 7 MSB in _DATA_HI. + * RX_DMA_EN must be 0 for RX instr PIO access. DATA_HI should be last access + * of sequence. + * DEFAULT: undefined + */ +#define REG_HP_INSTR_RAM_ADDR 0x4144 /* HP instruction RAM + address */ +#define HP_INSTR_RAM_ADDR_MASK 0x01F /* 5-bit mask */ +#define REG_HP_INSTR_RAM_DATA_LOW 0x4148 /* HP instruction RAM + data low */ +#define HP_INSTR_RAM_LOW_OUTMASK_MASK 0x0000FFFF +#define HP_INSTR_RAM_LOW_OUTMASK_SHIFT 0 +#define HP_INSTR_RAM_LOW_OUTSHIFT_MASK 0x000F0000 +#define HP_INSTR_RAM_LOW_OUTSHIFT_SHIFT 16 +#define HP_INSTR_RAM_LOW_OUTEN_MASK 0x00300000 +#define HP_INSTR_RAM_LOW_OUTEN_SHIFT 20 +#define HP_INSTR_RAM_LOW_OUTARG_MASK 0xFFC00000 +#define HP_INSTR_RAM_LOW_OUTARG_SHIFT 22 +#define REG_HP_INSTR_RAM_DATA_MID 0x414C /* HP instruction RAM + data mid */ +#define HP_INSTR_RAM_MID_OUTARG_MASK 0x00000003 +#define HP_INSTR_RAM_MID_OUTARG_SHIFT 0 +#define HP_INSTR_RAM_MID_OUTOP_MASK 0x0000003C +#define HP_INSTR_RAM_MID_OUTOP_SHIFT 2 +#define HP_INSTR_RAM_MID_FNEXT_MASK 0x000007C0 +#define HP_INSTR_RAM_MID_FNEXT_SHIFT 6 +#define HP_INSTR_RAM_MID_FOFF_MASK 0x0003F800 +#define HP_INSTR_RAM_MID_FOFF_SHIFT 11 +#define HP_INSTR_RAM_MID_SNEXT_MASK 0x007C0000 +#define HP_INSTR_RAM_MID_SNEXT_SHIFT 18 +#define HP_INSTR_RAM_MID_SOFF_MASK 0x3F800000 +#define HP_INSTR_RAM_MID_SOFF_SHIFT 23 +#define HP_INSTR_RAM_MID_OP_MASK 0xC0000000 +#define HP_INSTR_RAM_MID_OP_SHIFT 30 +#define REG_HP_INSTR_RAM_DATA_HI 0x4150 /* HP instruction RAM + data high */ +#define HP_INSTR_RAM_HI_VAL_MASK 0x0000FFFF +#define HP_INSTR_RAM_HI_VAL_SHIFT 0 +#define HP_INSTR_RAM_HI_MASK_MASK 0xFFFF0000 +#define HP_INSTR_RAM_HI_MASK_SHIFT 16 + +/* PIO access into RX Header parser data RAM and flow database. + * 11-bit register. Data fills the LSB portion of bus if less than 32 bits. + * DATA_RAM: write RAM_FDB_DATA with index to access DATA_RAM. + * RAM bytes = 4*(x - 1) + [3:0]. e.g., 0 -> [3:0], 31 -> [123:120] + * FLOWDB: write DATA_RAM_FDB register and then read/write FDB1-12 to access + * flow database. + * RX_DMA_EN must be 0 for RX parser RAM PIO access. RX Parser RAM data reg + * should be the last write access of the write sequence. + * DEFAULT: undefined + */ +#define REG_HP_DATA_RAM_FDB_ADDR 0x4154 /* HP data and FDB + RAM address */ +#define HP_DATA_RAM_FDB_DATA_MASK 0x001F /* select 1 of 86 byte + locations in header + parser data ram to + read/write */ +#define HP_DATA_RAM_FDB_FDB_MASK 0x3F00 /* 1 of 64 353-bit locations + in the flow database */ +#define REG_HP_DATA_RAM_DATA 0x4158 /* HP data RAM data */ + +/* HP flow database registers: 1 - 12, 0x415C - 0x4188, 4 8-bit bytes + * FLOW_DB(1) = IP_SA[127:96], FLOW_DB(2) = IP_SA[95:64] + * FLOW_DB(3) = IP_SA[63:32], FLOW_DB(4) = IP_SA[31:0] + * FLOW_DB(5) = IP_DA[127:96], FLOW_DB(6) = IP_DA[95:64] + * FLOW_DB(7) = IP_DA[63:32], FLOW_DB(8) = IP_DA[31:0] + * FLOW_DB(9) = {TCP_SP[15:0],TCP_DP[15:0]} + * FLOW_DB(10) = bit 0 has value for flow valid + * FLOW_DB(11) = TCP_SEQ[63:32], FLOW_DB(12) = TCP_SEQ[31:0] + */ +#define REG_HP_FLOW_DB0 0x415C /* HP flow database 1 reg */ +#define REG_HP_FLOW_DBN(x) (REG_HP_FLOW_DB0 + (x)*4) + +/* diagnostics for RX Header Parser block. + * ASUN: the header parser state machine register is used for diagnostics + * purposes. however, the spec doesn't have any details on it. + */ +#define REG_HP_STATE_MACHINE 0x418C /* (ro) HP state machine */ +#define REG_HP_STATUS0 0x4190 /* (ro) HP status 1 */ +#define HP_STATUS0_SAP_MASK 0xFFFF0000 /* SAP */ +#define HP_STATUS0_L3_OFF_MASK 0x0000FE00 /* L3 offset */ +#define HP_STATUS0_LB_CPUNUM_MASK 0x000001F8 /* load balancing CPU + number */ +#define HP_STATUS0_HRP_OPCODE_MASK 0x00000007 /* HRP opcode */ + +#define REG_HP_STATUS1 0x4194 /* (ro) HP status 2 */ +#define HP_STATUS1_ACCUR2_MASK 0xE0000000 /* accu R2[6:4] */ +#define HP_STATUS1_FLOWID_MASK 0x1F800000 /* flow id */ +#define HP_STATUS1_TCP_OFF_MASK 0x007F0000 /* tcp payload offset */ +#define HP_STATUS1_TCP_SIZE_MASK 0x0000FFFF /* tcp payload size */ + +#define REG_HP_STATUS2 0x4198 /* (ro) HP status 3 */ +#define HP_STATUS2_ACCUR2_MASK 0xF0000000 /* accu R2[3:0] */ +#define HP_STATUS2_CSUM_OFF_MASK 0x07F00000 /* checksum start + start offset */ +#define HP_STATUS2_ACCUR1_MASK 0x000FE000 /* accu R1 */ +#define HP_STATUS2_FORCE_DROP 0x00001000 /* force drop */ +#define HP_STATUS2_BWO_REASSM 0x00000800 /* batching w/o + reassembly */ +#define HP_STATUS2_JH_SPLIT_EN 0x00000400 /* jumbo header split + enable */ +#define HP_STATUS2_FORCE_TCP_NOCHECK 0x00000200 /* force tcp no payload + check */ +#define HP_STATUS2_DATA_MASK_ZERO 0x00000100 /* mask of data length + equal to zero */ +#define HP_STATUS2_FORCE_TCP_CHECK 0x00000080 /* force tcp payload + chk */ +#define HP_STATUS2_MASK_TCP_THRESH 0x00000040 /* mask of payload + threshold */ +#define HP_STATUS2_NO_ASSIST 0x00000020 /* no assist */ +#define HP_STATUS2_CTRL_PACKET_FLAG 0x00000010 /* control packet flag */ +#define HP_STATUS2_TCP_FLAG_CHECK 0x00000008 /* tcp flag check */ +#define HP_STATUS2_SYN_FLAG 0x00000004 /* syn flag */ +#define HP_STATUS2_TCP_CHECK 0x00000002 /* tcp payload chk */ +#define HP_STATUS2_TCP_NOCHECK 0x00000001 /* tcp no payload chk */ + +/* BIST for header parser(HP) and flow database memories (FDBM). set _START + * to start BIST. controller clears _START on completion. _START can also + * be cleared to force termination of BIST. a bit set indicates that that + * memory passed its BIST. + */ +#define REG_HP_RAM_BIST 0x419C /* HP RAM BIST reg */ +#define HP_RAM_BIST_HP_DATA_PASS 0x80000000 /* HP data ram */ +#define HP_RAM_BIST_HP_INSTR0_PASS 0x40000000 /* HP instr ram 0 */ +#define HP_RAM_BIST_HP_INSTR1_PASS 0x20000000 /* HP instr ram 1 */ +#define HP_RAM_BIST_HP_INSTR2_PASS 0x10000000 /* HP instr ram 2 */ +#define HP_RAM_BIST_FDBM_AGE0_PASS 0x08000000 /* FDBM aging RAM0 */ +#define HP_RAM_BIST_FDBM_AGE1_PASS 0x04000000 /* FDBM aging RAM1 */ +#define HP_RAM_BIST_FDBM_FLOWID00_PASS 0x02000000 /* FDBM flowid RAM0 + bank 0 */ +#define HP_RAM_BIST_FDBM_FLOWID10_PASS 0x01000000 /* FDBM flowid RAM1 + bank 0 */ +#define HP_RAM_BIST_FDBM_FLOWID20_PASS 0x00800000 /* FDBM flowid RAM2 + bank 0 */ +#define HP_RAM_BIST_FDBM_FLOWID30_PASS 0x00400000 /* FDBM flowid RAM3 + bank 0 */ +#define HP_RAM_BIST_FDBM_FLOWID01_PASS 0x00200000 /* FDBM flowid RAM0 + bank 1 */ +#define HP_RAM_BIST_FDBM_FLOWID11_PASS 0x00100000 /* FDBM flowid RAM1 + bank 2 */ +#define HP_RAM_BIST_FDBM_FLOWID21_PASS 0x00080000 /* FDBM flowid RAM2 + bank 1 */ +#define HP_RAM_BIST_FDBM_FLOWID31_PASS 0x00040000 /* FDBM flowid RAM3 + bank 1 */ +#define HP_RAM_BIST_FDBM_TCPSEQ_PASS 0x00020000 /* FDBM tcp sequence + RAM */ +#define HP_RAM_BIST_SUMMARY 0x00000002 /* all BIST tests */ +#define HP_RAM_BIST_START 0x00000001 /* start/stop BIST */ + + +/** MAC registers. **/ +/* reset bits are set using a PIO write and self-cleared after the command + * execution has completed. + */ +#define REG_MAC_TX_RESET 0x6000 /* TX MAC software reset + command (default: 0x0) */ +#define REG_MAC_RX_RESET 0x6004 /* RX MAC software reset + command (default: 0x0) */ +/* execute a pause flow control frame transmission + DEFAULT: 0x0XXXX */ +#define REG_MAC_SEND_PAUSE 0x6008 /* send pause command reg */ +#define MAC_SEND_PAUSE_TIME_MASK 0x0000FFFF /* value of pause time + to be sent on network + in units of slot + times */ +#define MAC_SEND_PAUSE_SEND 0x00010000 /* send pause flow ctrl + frame on network */ + +/* bit set indicates that event occurred. auto-cleared when status register + * is read and have corresponding mask bits in mask register. events will + * trigger an interrupt if the corresponding mask bit is 0. + * status register default: 0x00000000 + * mask register default = 0xFFFFFFFF on reset + */ +#define REG_MAC_TX_STATUS 0x6010 /* TX MAC status reg */ +#define MAC_TX_FRAME_XMIT 0x0001 /* successful frame + transmision */ +#define MAC_TX_UNDERRUN 0x0002 /* terminated frame + transmission due to + data starvation in the + xmit data path */ +#define MAC_TX_MAX_PACKET_ERR 0x0004 /* frame exceeds max allowed + length passed to TX MAC + by the DMA engine */ +#define MAC_TX_COLL_NORMAL 0x0008 /* rollover of the normal + collision counter */ +#define MAC_TX_COLL_EXCESS 0x0010 /* rollover of the excessive + collision counter */ +#define MAC_TX_COLL_LATE 0x0020 /* rollover of the late + collision counter */ +#define MAC_TX_COLL_FIRST 0x0040 /* rollover of the first + collision counter */ +#define MAC_TX_DEFER_TIMER 0x0080 /* rollover of the defer + timer */ +#define MAC_TX_PEAK_ATTEMPTS 0x0100 /* rollover of the peak + attempts counter */ + +#define REG_MAC_RX_STATUS 0x6014 /* RX MAC status reg */ +#define MAC_RX_FRAME_RECV 0x0001 /* successful receipt of + a frame */ +#define MAC_RX_OVERFLOW 0x0002 /* dropped frame due to + RX FIFO overflow */ +#define MAC_RX_FRAME_COUNT 0x0004 /* rollover of receive frame + counter */ +#define MAC_RX_ALIGN_ERR 0x0008 /* rollover of alignment + error counter */ +#define MAC_RX_CRC_ERR 0x0010 /* rollover of crc error + counter */ +#define MAC_RX_LEN_ERR 0x0020 /* rollover of length + error counter */ +#define MAC_RX_VIOL_ERR 0x0040 /* rollover of code + violation error */ + +/* DEFAULT: 0xXXXX0000 on reset */ +#define REG_MAC_CTRL_STATUS 0x6018 /* MAC control status reg */ +#define MAC_CTRL_PAUSE_RECEIVED 0x00000001 /* successful + reception of a + pause control + frame */ +#define MAC_CTRL_PAUSE_STATE 0x00000002 /* MAC has made a + transition from + "not paused" to + "paused" */ +#define MAC_CTRL_NOPAUSE_STATE 0x00000004 /* MAC has made a + transition from + "paused" to "not + paused" */ +#define MAC_CTRL_PAUSE_TIME_MASK 0xFFFF0000 /* value of pause time + operand that was + received in the last + pause flow control + frame */ + +/* layout identical to TX MAC[8:0] */ +#define REG_MAC_TX_MASK 0x6020 /* TX MAC mask reg */ +/* layout identical to RX MAC[6:0] */ +#define REG_MAC_RX_MASK 0x6024 /* RX MAC mask reg */ +/* layout identical to CTRL MAC[2:0] */ +#define REG_MAC_CTRL_MASK 0x6028 /* MAC control mask reg */ + +/* to ensure proper operation, CFG_EN must be cleared to 0 and a delay + * imposed before writes to other bits in the TX_MAC_CFG register or any of + * the MAC parameters is performed. delay dependent upon time required to + * transmit a maximum size frame (= MAC_FRAMESIZE_MAX*8/Mbps). e.g., + * the delay for a 1518-byte frame on a 100Mbps network is 125us. + * alternatively, just poll TX_CFG_EN until it reads back as 0. + * NOTE: on half-duplex 1Gbps, TX_CFG_CARRIER_EXTEND and + * RX_CFG_CARRIER_EXTEND should be set and the SLOT_TIME register should + * be 0x200 (slot time of 512 bytes) + */ +#define REG_MAC_TX_CFG 0x6030 /* TX MAC config reg */ +#define MAC_TX_CFG_EN 0x0001 /* enable TX MAC. 0 will + force TXMAC state + machine to remain in + idle state or to + transition to idle state + on completion of an + ongoing packet. */ +#define MAC_TX_CFG_IGNORE_CARRIER 0x0002 /* disable CSMA/CD deferral + process. set to 1 when + full duplex and 0 when + half duplex */ +#define MAC_TX_CFG_IGNORE_COLL 0x0004 /* disable CSMA/CD backoff + algorithm. set to 1 when + full duplex and 0 when + half duplex */ +#define MAC_TX_CFG_IPG_EN 0x0008 /* enable extension of the + Rx-to-TX IPG. after + receiving a frame, TX + MAC will reset its + deferral process to + carrier sense for the + amount of time = IPG0 + + IPG1 and commit to + transmission for time + specified in IPG2. when + 0 or when xmitting frames + back-to-pack (Tx-to-Tx + IPG), TX MAC ignores + IPG0 and will only use + IPG1 for deferral time. + IPG2 still used. */ +#define MAC_TX_CFG_NEVER_GIVE_UP_EN 0x0010 /* TX MAC will not easily + give up on frame + xmission. if backoff + algorithm reaches the + ATTEMPT_LIMIT, it will + clear attempts counter + and continue trying to + send the frame as + specified by + GIVE_UP_LIM. when 0, + TX MAC will execute + standard CSMA/CD prot. */ +#define MAC_TX_CFG_NEVER_GIVE_UP_LIM 0x0020 /* when set, TX MAC will + continue to try to xmit + until successful. when + 0, TX MAC will continue + to try xmitting until + successful or backoff + algorithm reaches + ATTEMPT_LIMIT*16 */ +#define MAC_TX_CFG_NO_BACKOFF 0x0040 /* modify CSMA/CD to disable + backoff algorithm. TX + MAC will not back off + after a xmission attempt + that resulted in a + collision. */ +#define MAC_TX_CFG_SLOW_DOWN 0x0080 /* modify CSMA/CD so that + deferral process is reset + in response to carrier + sense during the entire + duration of IPG. TX MAC + will only commit to frame + xmission after frame + xmission has actually + begun. */ +#define MAC_TX_CFG_NO_FCS 0x0100 /* TX MAC will not generate + CRC for all xmitted + packets. when clear, CRC + generation is dependent + upon NO_CRC bit in the + xmit control word from + TX DMA */ +#define MAC_TX_CFG_CARRIER_EXTEND 0x0200 /* enables xmit part of the + carrier extension + feature. this allows for + longer collision domains + by extending the carrier + and collision window + from the end of FCS until + the end of the slot time + if necessary. Required + for half-duplex at 1Gbps, + clear otherwise. */ + +/* when CRC is not stripped, reassembly packets will not contain the CRC. + * these will be stripped by HRP because it reassembles layer 4 data, and the + * CRC is layer 2. however, non-reassembly packets will still contain the CRC + * when passed to the host. to ensure proper operation, need to wait 3.2ms + * after clearing RX_CFG_EN before writing to any other RX MAC registers + * or other MAC parameters. alternatively, poll RX_CFG_EN until it clears + * to 0. similary, HASH_FILTER_EN and ADDR_FILTER_EN have the same + * restrictions as CFG_EN. + */ +#define REG_MAC_RX_CFG 0x6034 /* RX MAC config reg */ +#define MAC_RX_CFG_EN 0x0001 /* enable RX MAC */ +#define MAC_RX_CFG_STRIP_PAD 0x0002 /* always program to 0. + feature not supported */ +#define MAC_RX_CFG_STRIP_FCS 0x0004 /* RX MAC will strip the + last 4 bytes of a + received frame. */ +#define MAC_RX_CFG_PROMISC_EN 0x0008 /* promiscuous mode */ +#define MAC_RX_CFG_PROMISC_GROUP_EN 0x0010 /* accept all valid + multicast frames (group + bit in DA field set) */ +#define MAC_RX_CFG_HASH_FILTER_EN 0x0020 /* use hash table to filter + multicast addresses */ +#define MAC_RX_CFG_ADDR_FILTER_EN 0x0040 /* cause RX MAC to use + address filtering regs + to filter both unicast + and multicast + addresses */ +#define MAC_RX_CFG_DISABLE_DISCARD 0x0080 /* pass errored frames to + RX DMA by setting BAD + bit but not Abort bit + in the status. CRC, + framing, and length errs + will not increment + error counters. frames + which don't match dest + addr will be passed up + w/ BAD bit set. */ +#define MAC_RX_CFG_CARRIER_EXTEND 0x0100 /* enable reception of + packet bursts generated + by carrier extension + with packet bursting + senders. only applies + to half-duplex 1Gbps */ + +/* DEFAULT: 0x0 */ +#define REG_MAC_CTRL_CFG 0x6038 /* MAC control config reg */ +#define MAC_CTRL_CFG_SEND_PAUSE_EN 0x0001 /* respond to requests for + sending pause flow ctrl + frames */ +#define MAC_CTRL_CFG_RECV_PAUSE_EN 0x0002 /* respond to received + pause flow ctrl frames */ +#define MAC_CTRL_CFG_PASS_CTRL 0x0004 /* pass valid MAC ctrl + packets to RX DMA */ + +/* to ensure proper operation, a global initialization sequence should be + * performed when a loopback config is entered or exited. if programmed after + * a hw or global sw reset, RX/TX MAC software reset and initialization + * should be done to ensure stable clocking. + * DEFAULT: 0x0 + */ +#define REG_MAC_XIF_CFG 0x603C /* XIF config reg */ +#define MAC_XIF_TX_MII_OUTPUT_EN 0x0001 /* enable output drivers + on MII xmit bus */ +#define MAC_XIF_MII_INT_LOOPBACK 0x0002 /* loopback GMII xmit data + path to GMII recv data + path. phy mode register + clock selection must be + set to GMII mode and + GMII_MODE should be set + to 1. in loopback mode, + REFCLK will drive the + entire mac core. 0 for + normal operation. */ +#define MAC_XIF_DISABLE_ECHO 0x0004 /* disables receive data + path during packet + xmission. clear to 0 + in any full duplex mode, + in any loopback mode, + or in half-duplex SERDES + or SLINK modes. set when + in half-duplex when + using external phy. */ +#define MAC_XIF_GMII_MODE 0x0008 /* MAC operates with GMII + clocks and datapath */ +#define MAC_XIF_MII_BUFFER_OUTPUT_EN 0x0010 /* MII_BUF_EN pin. enable + external tristate buffer + on the MII receive + bus. */ +#define MAC_XIF_LINK_LED 0x0020 /* LINKLED# active (low) */ +#define MAC_XIF_FDPLX_LED 0x0040 /* FDPLXLED# active (low) */ + +#define REG_MAC_IPG0 0x6040 /* inter-packet gap0 reg. + recommended: 0x00 */ +#define REG_MAC_IPG1 0x6044 /* inter-packet gap1 reg + recommended: 0x08 */ +#define REG_MAC_IPG2 0x6048 /* inter-packet gap2 reg + recommended: 0x04 */ +#define REG_MAC_SLOT_TIME 0x604C /* slot time reg + recommended: 0x40 */ +#define REG_MAC_FRAMESIZE_MIN 0x6050 /* min frame size reg + recommended: 0x40 */ + +/* FRAMESIZE_MAX holds both the max frame size as well as the max burst size. + * recommended value: 0x2000.05EE + */ +#define REG_MAC_FRAMESIZE_MAX 0x6054 /* max frame size reg */ +#define MAC_FRAMESIZE_MAX_BURST_MASK 0x3FFF0000 /* max burst size */ +#define MAC_FRAMESIZE_MAX_BURST_SHIFT 16 +#define MAC_FRAMESIZE_MAX_FRAME_MASK 0x00007FFF /* max frame size */ +#define MAC_FRAMESIZE_MAX_FRAME_SHIFT 0 +#define REG_MAC_PA_SIZE 0x6058 /* PA size reg. number of + preamble bytes that the + TX MAC will xmit at the + beginning of each frame + value should be 2 or + greater. recommended + value: 0x07 */ +#define REG_MAC_JAM_SIZE 0x605C /* jam size reg. duration + of jam in units of media + byte time. recommended + value: 0x04 */ +#define REG_MAC_ATTEMPT_LIMIT 0x6060 /* attempt limit reg. # + of attempts TX MAC will + make to xmit a frame + before it resets its + attempts counter. after + the limit has been + reached, TX MAC may or + may not drop the frame + dependent upon value + in TX_MAC_CFG. + recommended + value: 0x10 */ +#define REG_MAC_CTRL_TYPE 0x6064 /* MAC control type reg. + type field of a MAC + ctrl frame. recommended + value: 0x8808 */ + +/* mac address registers: 0 - 44, 0x6080 - 0x6130, 4 8-bit bytes. + * register contains comparison + * 0 16 MSB of primary MAC addr [47:32] of DA field + * 1 16 middle bits "" [31:16] of DA field + * 2 16 LSB "" [15:0] of DA field + * 3*x 16MSB of alt MAC addr 1-15 [47:32] of DA field + * 4*x 16 middle bits "" [31:16] + * 5*x 16 LSB "" [15:0] + * 42 16 MSB of MAC CTRL addr [47:32] of DA. + * 43 16 middle bits "" [31:16] + * 44 16 LSB "" [15:0] + * MAC CTRL addr must be the reserved multicast addr for MAC CTRL frames. + * if there is a match, MAC will set the bit for alternative address + * filter pass [15] + + * here is the map of registers given MAC address notation: a:b:c:d:e:f + * ab cd ef + * primary addr reg 2 reg 1 reg 0 + * alt addr 1 reg 5 reg 4 reg 3 + * alt addr x reg 5*x reg 4*x reg 3*x + * ctrl addr reg 44 reg 43 reg 42 + */ +#define REG_MAC_ADDR0 0x6080 /* MAC address 0 reg */ +#define REG_MAC_ADDRN(x) (REG_MAC_ADDR0 + (x)*4) +#define REG_MAC_ADDR_FILTER0 0x614C /* address filter 0 reg + [47:32] */ +#define REG_MAC_ADDR_FILTER1 0x6150 /* address filter 1 reg + [31:16] */ +#define REG_MAC_ADDR_FILTER2 0x6154 /* address filter 2 reg + [15:0] */ +#define REG_MAC_ADDR_FILTER2_1_MASK 0x6158 /* address filter 2 and 1 + mask reg. 8-bit reg + contains nibble mask for + reg 2 and 1. */ +#define REG_MAC_ADDR_FILTER0_MASK 0x615C /* address filter 0 mask + reg */ + +/* hash table registers: 0 - 15, 0x6160 - 0x619C, 4 8-bit bytes + * 16-bit registers contain bits of the hash table. + * reg x -> [16*(15 - x) + 15 : 16*(15 - x)]. + * e.g., 15 -> [15:0], 0 -> [255:240] + */ +#define REG_MAC_HASH_TABLE0 0x6160 /* hash table 0 reg */ +#define REG_MAC_HASH_TABLEN(x) (REG_MAC_HASH_TABLE0 + (x)*4) + +/* statistics registers. these registers generate an interrupt on + * overflow. recommended initialization: 0x0000. most are 16-bits except + * for PEAK_ATTEMPTS register which is 8 bits. + */ +#define REG_MAC_COLL_NORMAL 0x61A0 /* normal collision + counter. */ +#define REG_MAC_COLL_FIRST 0x61A4 /* first attempt + successful collision + counter */ +#define REG_MAC_COLL_EXCESS 0x61A8 /* excessive collision + counter */ +#define REG_MAC_COLL_LATE 0x61AC /* late collision counter */ +#define REG_MAC_TIMER_DEFER 0x61B0 /* defer timer. time base + is the media byte + clock/256 */ +#define REG_MAC_ATTEMPTS_PEAK 0x61B4 /* peak attempts reg */ +#define REG_MAC_RECV_FRAME 0x61B8 /* receive frame counter */ +#define REG_MAC_LEN_ERR 0x61BC /* length error counter */ +#define REG_MAC_ALIGN_ERR 0x61C0 /* alignment error counter */ +#define REG_MAC_FCS_ERR 0x61C4 /* FCS error counter */ +#define REG_MAC_RX_CODE_ERR 0x61C8 /* RX code violation + error counter */ + +/* misc registers */ +#define REG_MAC_RANDOM_SEED 0x61CC /* random number seed reg. + 10-bit register used as a + seed for the random number + generator for the CSMA/CD + backoff algorithm. only + programmed after power-on + reset and should be a + random value which has a + high likelihood of being + unique for each MAC + attached to a network + segment (e.g., 10 LSB of + MAC address) */ + +/* ASUN: there's a PAUSE_TIMER (ro) described, but it's not in the address + * map + */ + +/* 27-bit register has the current state for key state machines in the MAC */ +#define REG_MAC_STATE_MACHINE 0x61D0 /* (ro) state machine reg */ +#define MAC_SM_RLM_MASK 0x07800000 +#define MAC_SM_RLM_SHIFT 23 +#define MAC_SM_RX_FC_MASK 0x00700000 +#define MAC_SM_RX_FC_SHIFT 20 +#define MAC_SM_TLM_MASK 0x000F0000 +#define MAC_SM_TLM_SHIFT 16 +#define MAC_SM_ENCAP_SM_MASK 0x0000F000 +#define MAC_SM_ENCAP_SM_SHIFT 12 +#define MAC_SM_TX_REQ_MASK 0x00000C00 +#define MAC_SM_TX_REQ_SHIFT 10 +#define MAC_SM_TX_FC_MASK 0x000003C0 +#define MAC_SM_TX_FC_SHIFT 6 +#define MAC_SM_FIFO_WRITE_SEL_MASK 0x00000038 +#define MAC_SM_FIFO_WRITE_SEL_SHIFT 3 +#define MAC_SM_TX_FIFO_EMPTY_MASK 0x00000007 +#define MAC_SM_TX_FIFO_EMPTY_SHIFT 0 + +/** MIF registers. the MIF can be programmed in either bit-bang or + * frame mode. + **/ +#define REG_MIF_BIT_BANG_CLOCK 0x6200 /* MIF bit-bang clock. + 1 -> 0 will generate a + rising edge. 0 -> 1 will + generate a falling edge. */ +#define REG_MIF_BIT_BANG_DATA 0x6204 /* MIF bit-bang data. 1-bit + register generates data */ +#define REG_MIF_BIT_BANG_OUTPUT_EN 0x6208 /* MIF bit-bang output + enable. enable when + xmitting data from MIF to + transceiver. */ + +/* 32-bit register serves as an instruction register when the MIF is + * programmed in frame mode. load this register w/ a valid instruction + * (as per IEEE 802.3u MII spec). poll this register to check for instruction + * execution completion. during a read operation, this register will also + * contain the 16-bit data returned by the tranceiver. unless specified + * otherwise, fields are considered "don't care" when polling for + * completion. + */ +#define REG_MIF_FRAME 0x620C /* MIF frame/output reg */ +#define MIF_FRAME_START_MASK 0xC0000000 /* start of frame. + load w/ 01 when + issuing an instr */ +#define MIF_FRAME_ST 0x40000000 /* STart of frame */ +#define MIF_FRAME_OPCODE_MASK 0x30000000 /* opcode. 01 for a + write. 10 for a + read */ +#define MIF_FRAME_OP_READ 0x20000000 /* read OPcode */ +#define MIF_FRAME_OP_WRITE 0x10000000 /* write OPcode */ +#define MIF_FRAME_PHY_ADDR_MASK 0x0F800000 /* phy address. when + issuing an instr, + this field should be + loaded w/ the XCVR + addr */ +#define MIF_FRAME_PHY_ADDR_SHIFT 23 +#define MIF_FRAME_REG_ADDR_MASK 0x007C0000 /* register address. + when issuing an instr, + addr of register + to be read/written */ +#define MIF_FRAME_REG_ADDR_SHIFT 18 +#define MIF_FRAME_TURN_AROUND_MSB 0x00020000 /* turn around, MSB. + when issuing an instr, + set this bit to 1 */ +#define MIF_FRAME_TURN_AROUND_LSB 0x00010000 /* turn around, LSB. + when issuing an instr, + set this bit to 0. + when polling for + completion, 1 means + that instr execution + has been completed */ +#define MIF_FRAME_DATA_MASK 0x0000FFFF /* instruction payload + load with 16-bit data + to be written in + transceiver reg for a + write. doesn't matter + in a read. when + polling for + completion, field is + "don't care" for write + and 16-bit data + returned by the + transceiver for a + read (if valid bit + is set) */ +#define REG_MIF_CFG 0x6210 /* MIF config reg */ +#define MIF_CFG_PHY_SELECT 0x0001 /* 1 -> select MDIO_1 + 0 -> select MDIO_0 */ +#define MIF_CFG_POLL_EN 0x0002 /* enable polling + mechanism. if set, + BB_MODE should be 0 */ +#define MIF_CFG_BB_MODE 0x0004 /* 1 -> bit-bang mode + 0 -> frame mode */ +#define MIF_CFG_POLL_REG_MASK 0x00F8 /* register address to be + used by polling mode. + only meaningful if POLL_EN + is set to 1 */ +#define MIF_CFG_POLL_REG_SHIFT 3 +#define MIF_CFG_MDIO_0 0x0100 /* (ro) dual purpose. + when MDIO_0 is idle, + 1 -> tranceiver is + connected to MDIO_0. + when MIF is communicating + w/ MDIO_0 in bit-bang + mode, this bit indicates + the incoming bit stream + during a read op */ +#define MIF_CFG_MDIO_1 0x0200 /* (ro) dual purpose. + when MDIO_1 is idle, + 1 -> transceiver is + connected to MDIO_1. + when MIF is communicating + w/ MDIO_1 in bit-bang + mode, this bit indicates + the incoming bit stream + during a read op */ +#define MIF_CFG_POLL_PHY_MASK 0x7C00 /* tranceiver address to + be polled */ +#define MIF_CFG_POLL_PHY_SHIFT 10 + +/* 16-bit register used to determine which bits in the POLL_STATUS portion of + * the MIF_STATUS register will cause an interrupt. if a mask bit is 0, + * corresponding bit of the POLL_STATUS will generate a MIF interrupt when + * set. DEFAULT: 0xFFFF + */ +#define REG_MIF_MASK 0x6214 /* MIF mask reg */ + +/* 32-bit register used when in poll mode. auto-cleared after being read */ +#define REG_MIF_STATUS 0x6218 /* MIF status reg */ +#define MIF_STATUS_POLL_DATA_MASK 0xFFFF0000 /* poll data contains + the "latest image" + update of the XCVR + reg being read */ +#define MIF_STATUS_POLL_DATA_SHIFT 16 +#define MIF_STATUS_POLL_STATUS_MASK 0x0000FFFF /* poll status indicates + which bits in the + POLL_DATA field have + changed since the + MIF_STATUS reg was + last read */ +#define MIF_STATUS_POLL_STATUS_SHIFT 0 + +/* 7-bit register has current state for all state machines in the MIF */ +#define REG_MIF_STATE_MACHINE 0x621C /* MIF state machine reg */ +#define MIF_SM_CONTROL_MASK 0x07 /* control state machine + state */ +#define MIF_SM_EXECUTION_MASK 0x60 /* execution state machine + state */ + +/** PCS/Serialink. the following registers are equivalent to the standard + * MII management registers except that they're directly mapped in + * Cassini's register space. + **/ + +/* the auto-negotiation enable bit should be programmed the same at + * the link partner as in the local device to enable auto-negotiation to + * complete. when that bit is reprogrammed, auto-neg/manual config is + * restarted automatically. + * DEFAULT: 0x1040 + */ +#define REG_PCS_MII_CTRL 0x9000 /* PCS MII control reg */ +#define PCS_MII_CTRL_1000_SEL 0x0040 /* reads 1. ignored on + writes */ +#define PCS_MII_CTRL_COLLISION_TEST 0x0080 /* COL signal at the PCS + to MAC interface is + activated regardless + of activity */ +#define PCS_MII_CTRL_DUPLEX 0x0100 /* forced 0x0. PCS + behaviour same for + half and full dplx */ +#define PCS_MII_RESTART_AUTONEG 0x0200 /* self clearing. + restart auto- + negotiation */ +#define PCS_MII_ISOLATE 0x0400 /* read as 0. ignored + on writes */ +#define PCS_MII_POWER_DOWN 0x0800 /* read as 0. ignored + on writes */ +#define PCS_MII_AUTONEG_EN 0x1000 /* default 1. PCS goes + through automatic + link config before it + can be used. when 0, + link can be used + w/out any link config + phase */ +#define PCS_MII_10_100_SEL 0x2000 /* read as 0. ignored on + writes */ +#define PCS_MII_RESET 0x8000 /* reset PCS. self-clears + when done */ + +/* DEFAULT: 0x0108 */ +#define REG_PCS_MII_STATUS 0x9004 /* PCS MII status reg */ +#define PCS_MII_STATUS_EXTEND_CAP 0x0001 /* reads 0 */ +#define PCS_MII_STATUS_JABBER_DETECT 0x0002 /* reads 0 */ +#define PCS_MII_STATUS_LINK_STATUS 0x0004 /* 1 -> link up. + 0 -> link down. 0 is + latched so that 0 is + kept until read. read + 2x to determine if the + link has gone up again */ +#define PCS_MII_STATUS_AUTONEG_ABLE 0x0008 /* reads 1 (able to perform + auto-neg) */ +#define PCS_MII_STATUS_REMOTE_FAULT 0x0010 /* 1 -> remote fault detected + from received link code + word. only valid after + auto-neg completed */ +#define PCS_MII_STATUS_AUTONEG_COMP 0x0020 /* 1 -> auto-negotiation + completed + 0 -> auto-negotiation not + completed */ +#define PCS_MII_STATUS_EXTEND_STATUS 0x0100 /* reads as 1. used as an + indication that this is + a 1000 Base-X PHY. writes + to it are ignored */ + +/* used during auto-negotiation. + * DEFAULT: 0x00E0 + */ +#define REG_PCS_MII_ADVERT 0x9008 /* PCS MII advertisement + reg */ +#define PCS_MII_ADVERT_FD 0x0020 /* advertise full duplex + 1000 Base-X */ +#define PCS_MII_ADVERT_HD 0x0040 /* advertise half-duplex + 1000 Base-X */ +#define PCS_MII_ADVERT_SYM_PAUSE 0x0080 /* advertise PAUSE + symmetric capability */ +#define PCS_MII_ADVERT_ASYM_PAUSE 0x0100 /* advertises PAUSE + asymmetric capability */ +#define PCS_MII_ADVERT_RF_MASK 0x3000 /* remote fault. write bit13 + to optionally indicate to + link partner that chip is + going off-line. bit12 will + get set when signal + detect == FAIL and will + remain set until + successful negotiation */ +#define PCS_MII_ADVERT_ACK 0x4000 /* (ro) */ +#define PCS_MII_ADVERT_NEXT_PAGE 0x8000 /* (ro) forced 0x0 */ + +/* contents updated as a result of autonegotiation. layout and definitions + * identical to PCS_MII_ADVERT + */ +#define REG_PCS_MII_LPA 0x900C /* PCS MII link partner + ability reg */ +#define PCS_MII_LPA_FD PCS_MII_ADVERT_FD +#define PCS_MII_LPA_HD PCS_MII_ADVERT_HD +#define PCS_MII_LPA_SYM_PAUSE PCS_MII_ADVERT_SYM_PAUSE +#define PCS_MII_LPA_ASYM_PAUSE PCS_MII_ADVERT_ASYM_PAUSE +#define PCS_MII_LPA_RF_MASK PCS_MII_ADVERT_RF_MASK +#define PCS_MII_LPA_ACK PCS_MII_ADVERT_ACK +#define PCS_MII_LPA_NEXT_PAGE PCS_MII_ADVERT_NEXT_PAGE + +/* DEFAULT: 0x0 */ +#define REG_PCS_CFG 0x9010 /* PCS config reg */ +#define PCS_CFG_EN 0x01 /* enable PCS. must be + 0 when modifying + PCS_MII_ADVERT */ +#define PCS_CFG_SD_OVERRIDE 0x02 /* sets signal detect to + OK. bit is + non-resettable */ +#define PCS_CFG_SD_ACTIVE_LOW 0x04 /* changes interpretation + of optical signal to make + signal detect okay when + signal is low */ +#define PCS_CFG_JITTER_STUDY_MASK 0x18 /* used to make jitter + measurements. a single + code group is xmitted + regularly. + 0x0 = normal operation + 0x1 = high freq test + pattern, D21.5 + 0x2 = low freq test + pattern, K28.7 + 0x3 = reserved */ +#define PCS_CFG_10MS_TIMER_OVERRIDE 0x20 /* shortens 10-20ms auto- + negotiation timer to + a few cycles for test + purposes */ + +/* used for diagnostic purposes. bits 20-22 autoclear on read */ +#define REG_PCS_STATE_MACHINE 0x9014 /* (ro) PCS state machine + and diagnostic reg */ +#define PCS_SM_TX_STATE_MASK 0x0000000F /* 0 and 1 indicate + xmission of idle. + otherwise, xmission of + a packet */ +#define PCS_SM_RX_STATE_MASK 0x000000F0 /* 0 indicates reception + of idle. otherwise, + reception of packet */ +#define PCS_SM_WORD_SYNC_STATE_MASK 0x00000700 /* 0 indicates loss of + sync */ +#define PCS_SM_SEQ_DETECT_STATE_MASK 0x00001800 /* cycling through 0-3 + indicates reception of + Config codes. cycling + through 0-1 indicates + reception of idles */ +#define PCS_SM_LINK_STATE_MASK 0x0001E000 +#define SM_LINK_STATE_UP 0x00016000 /* link state is up */ + +#define PCS_SM_LOSS_LINK_C 0x00100000 /* loss of link due to + recept of Config + codes */ +#define PCS_SM_LOSS_LINK_SYNC 0x00200000 /* loss of link due to + loss of sync */ +#define PCS_SM_LOSS_SIGNAL_DETECT 0x00400000 /* signal detect goes + from OK to FAIL. bit29 + will also be set if + this is set */ +#define PCS_SM_NO_LINK_BREAKLINK 0x01000000 /* link not up due to + receipt of breaklink + C codes from partner. + C codes w/ 0 content + received triggering + start/restart of + autonegotiation. + should be sent for + no longer than 20ms */ +#define PCS_SM_NO_LINK_SERDES 0x02000000 /* serdes being + initialized. see serdes + state reg */ +#define PCS_SM_NO_LINK_C 0x04000000 /* C codes not stable or + not received */ +#define PCS_SM_NO_LINK_SYNC 0x08000000 /* word sync not + achieved */ +#define PCS_SM_NO_LINK_WAIT_C 0x10000000 /* waiting for C codes + w/ ack bit set */ +#define PCS_SM_NO_LINK_NO_IDLE 0x20000000 /* link partner continues + to send C codes + instead of idle + symbols or pkt data */ + +/* this register indicates interrupt changes in specific PCS MII status bits. + * PCS_INT may be masked at the ISR level. only a single bit is implemented + * for link status change. + */ +#define REG_PCS_INTR_STATUS 0x9018 /* PCS interrupt status */ +#define PCS_INTR_STATUS_LINK_CHANGE 0x04 /* link status has changed + since last read */ + +/* control which network interface is used. no more than one bit should + * be set. + * DEFAULT: none + */ +#define REG_PCS_DATAPATH_MODE 0x9050 /* datapath mode reg */ +#define PCS_DATAPATH_MODE_MII 0x00 /* PCS is not used and + MII/GMII is selected. + selection between MII and + GMII is controlled by + XIF_CFG */ +#define PCS_DATAPATH_MODE_SERDES 0x02 /* PCS is used via the + 10-bit interface */ + +/* input to serdes chip or serialink block */ +#define REG_PCS_SERDES_CTRL 0x9054 /* serdes control reg */ +#define PCS_SERDES_CTRL_LOOPBACK 0x01 /* enable loopback on + serdes interface */ +#define PCS_SERDES_CTRL_SYNCD_EN 0x02 /* enable sync carrier + detection. should be + 0x0 for normal + operation */ +#define PCS_SERDES_CTRL_LOCKREF 0x04 /* frequency-lock RBC[0:1] + to REFCLK when set. + when clear, receiver + clock locks to incoming + serial data */ + +/* multiplex test outputs into the PROM address (PA_3 through PA_0) pins. + * should be 0x0 for normal operations. + * 0b000 normal operation, PROM address[3:0] selected + * 0b001 rxdma req, rxdma ack, rxdma ready, rxdma read + * 0b010 rxmac req, rx ack, rx tag, rx clk shared + * 0b011 txmac req, tx ack, tx tag, tx retry req + * 0b100 tx tp3, tx tp2, tx tp1, tx tp0 + * 0b101 R period RX, R period TX, R period HP, R period BIM + * DEFAULT: 0x0 + */ +#define REG_PCS_SHARED_OUTPUT_SEL 0x9058 /* shared output select */ +#define PCS_SOS_PROM_ADDR_MASK 0x0007 + +/* used for diagnostics. this register indicates progress of the SERDES + * boot up. + * 0b00 undergoing reset + * 0b01 waiting 500us while lockrefn is asserted + * 0b10 waiting for comma detect + * 0b11 receive data is synchronized + * DEFAULT: 0x0 + */ +#define REG_PCS_SERDES_STATE 0x905C /* (ro) serdes state */ +#define PCS_SERDES_STATE_MASK 0x03 + +/* used for diagnostics. indicates number of packets transmitted or received. + * counters rollover w/out generating an interrupt. + * DEFAULT: 0x0 + */ +#define REG_PCS_PACKET_COUNT 0x9060 /* (ro) PCS packet counter */ +#define PCS_PACKET_COUNT_TX 0x000007FF /* pkts xmitted by PCS */ +#define PCS_PACKET_COUNT_RX 0x07FF0000 /* pkts recvd by PCS + whether they + encountered an error + or not */ + +/** LocalBus Devices. the following provides run-time access to the + * Cassini's PROM + ***/ +#define REG_EXPANSION_ROM_RUN_START 0x100000 /* expansion rom run time + access */ +#define REG_EXPANSION_ROM_RUN_END 0x17FFFF + +#define REG_SECOND_LOCALBUS_START 0x180000 /* secondary local bus + device */ +#define REG_SECOND_LOCALBUS_END 0x1FFFFF + +/* entropy device */ +#define REG_ENTROPY_START REG_SECOND_LOCALBUS_START +#define REG_ENTROPY_DATA (REG_ENTROPY_START + 0x00) +#define REG_ENTROPY_STATUS (REG_ENTROPY_START + 0x04) +#define ENTROPY_STATUS_DRDY 0x01 +#define ENTROPY_STATUS_BUSY 0x02 +#define ENTROPY_STATUS_CIPHER 0x04 +#define ENTROPY_STATUS_BYPASS_MASK 0x18 +#define REG_ENTROPY_MODE (REG_ENTROPY_START + 0x05) +#define ENTROPY_MODE_KEY_MASK 0x07 +#define ENTROPY_MODE_ENCRYPT 0x40 +#define REG_ENTROPY_RAND_REG (REG_ENTROPY_START + 0x06) +#define REG_ENTROPY_RESET (REG_ENTROPY_START + 0x07) +#define ENTROPY_RESET_DES_IO 0x01 +#define ENTROPY_RESET_STC_MODE 0x02 +#define ENTROPY_RESET_KEY_CACHE 0x04 +#define ENTROPY_RESET_IV 0x08 +#define REG_ENTROPY_IV (REG_ENTROPY_START + 0x08) +#define REG_ENTROPY_KEY0 (REG_ENTROPY_START + 0x10) +#define REG_ENTROPY_KEYN(x) (REG_ENTROPY_KEY0 + 4*(x)) + +/* phys of interest w/ their special mii registers */ +#define PHY_LUCENT_B0 0x00437421 +#define LUCENT_MII_REG 0x1F + +#define PHY_NS_DP83065 0x20005c78 +#define DP83065_MII_MEM 0x16 +#define DP83065_MII_REGD 0x1D +#define DP83065_MII_REGE 0x1E + +#define PHY_BROADCOM_5411 0x00206071 +#define PHY_BROADCOM_B0 0x00206050 +#define BROADCOM_MII_REG4 0x14 +#define BROADCOM_MII_REG5 0x15 +#define BROADCOM_MII_REG7 0x17 +#define BROADCOM_MII_REG8 0x18 + +#define CAS_MII_ANNPTR 0x07 +#define CAS_MII_ANNPRR 0x08 +#define CAS_MII_1000_CTRL 0x09 +#define CAS_MII_1000_STATUS 0x0A +#define CAS_MII_1000_EXTEND 0x0F + +#define CAS_BMSR_1000_EXTEND 0x0100 /* supports 1000Base-T extended status */ +/* + * if autoneg is disabled, here's the table: + * BMCR_SPEED100 = 100Mbps + * BMCR_SPEED1000 = 1000Mbps + * ~(BMCR_SPEED100 | BMCR_SPEED1000) = 10Mbps + */ +#define CAS_BMCR_SPEED1000 0x0040 /* Select 1000Mbps */ + +#define CAS_ADVERTISE_1000HALF 0x0100 +#define CAS_ADVERTISE_1000FULL 0x0200 +#define CAS_ADVERTISE_PAUSE 0x0400 +#define CAS_ADVERTISE_ASYM_PAUSE 0x0800 + +/* regular lpa register */ +#define CAS_LPA_PAUSE CAS_ADVERTISE_PAUSE +#define CAS_LPA_ASYM_PAUSE CAS_ADVERTISE_ASYM_PAUSE + +/* 1000_STATUS register */ +#define CAS_LPA_1000HALF 0x0400 +#define CAS_LPA_1000FULL 0x0800 + +#define CAS_EXTEND_1000XFULL 0x8000 +#define CAS_EXTEND_1000XHALF 0x4000 +#define CAS_EXTEND_1000TFULL 0x2000 +#define CAS_EXTEND_1000THALF 0x1000 + +/* cassini header parser firmware */ +typedef struct cas_hp_inst { + const char *note; + + u16 mask, val; + + u8 op; + u8 soff, snext; /* if match succeeds, new offset and match */ + u8 foff, fnext; /* if match fails, new offset and match */ + /* output info */ + u8 outop; /* output opcode */ + + u16 outarg; /* output argument */ + u8 outenab; /* output enable: 0 = not, 1 = if match + 2 = if !match, 3 = always */ + u8 outshift; /* barrel shift right, 4 bits */ + u16 outmask; +} cas_hp_inst_t; + +/* comparison */ +#define OP_EQ 0 /* packet == value */ +#define OP_LT 1 /* packet < value */ +#define OP_GT 2 /* packet > value */ +#define OP_NP 3 /* new packet */ + +/* output opcodes */ +#define CL_REG 0 +#define LD_FID 1 +#define LD_SEQ 2 +#define LD_CTL 3 +#define LD_SAP 4 +#define LD_R1 5 +#define LD_L3 6 +#define LD_SUM 7 +#define LD_HDR 8 +#define IM_FID 9 +#define IM_SEQ 10 +#define IM_SAP 11 +#define IM_R1 12 +#define IM_CTL 13 +#define LD_LEN 14 +#define ST_FLG 15 + +/* match setp #s for IP4TCP4 */ +#define S1_PCKT 0 +#define S1_VLAN 1 +#define S1_CFI 2 +#define S1_8023 3 +#define S1_LLC 4 +#define S1_LLCc 5 +#define S1_IPV4 6 +#define S1_IPV4c 7 +#define S1_IPV4F 8 +#define S1_TCP44 9 +#define S1_IPV6 10 +#define S1_IPV6L 11 +#define S1_IPV6c 12 +#define S1_TCP64 13 +#define S1_TCPSQ 14 +#define S1_TCPFG 15 +#define S1_TCPHL 16 +#define S1_TCPHc 17 +#define S1_CLNP 18 +#define S1_CLNP2 19 +#define S1_DROP 20 +#define S2_HTTP 21 +#define S1_ESP4 22 +#define S1_AH4 23 +#define S1_ESP6 24 +#define S1_AH6 25 + +#define CAS_PROG_IP46TCP4_PREAMBLE \ +{ "packet arrival?", 0xffff, 0x0000, OP_NP, 6, S1_VLAN, 0, S1_PCKT, \ + CL_REG, 0x3ff, 1, 0x0, 0x0000}, \ +{ "VLAN?", 0xffff, 0x8100, OP_EQ, 1, S1_CFI, 0, S1_8023, \ + IM_CTL, 0x00a, 3, 0x0, 0xffff}, \ +{ "CFI?", 0x1000, 0x1000, OP_EQ, 0, S1_DROP, 1, S1_8023, \ + CL_REG, 0x000, 0, 0x0, 0x0000}, \ +{ "8023?", 0xffff, 0x0600, OP_LT, 1, S1_LLC, 0, S1_IPV4, \ + CL_REG, 0x000, 0, 0x0, 0x0000}, \ +{ "LLC?", 0xffff, 0xaaaa, OP_EQ, 1, S1_LLCc, 0, S1_CLNP, \ + CL_REG, 0x000, 0, 0x0, 0x0000}, \ +{ "LLCc?", 0xff00, 0x0300, OP_EQ, 2, S1_IPV4, 0, S1_CLNP, \ + CL_REG, 0x000, 0, 0x0, 0x0000}, \ +{ "IPV4?", 0xffff, 0x0800, OP_EQ, 1, S1_IPV4c, 0, S1_IPV6, \ + LD_SAP, 0x100, 3, 0x0, 0xffff}, \ +{ "IPV4 cont?", 0xff00, 0x4500, OP_EQ, 3, S1_IPV4F, 0, S1_CLNP, \ + LD_SUM, 0x00a, 1, 0x0, 0x0000}, \ +{ "IPV4 frag?", 0x3fff, 0x0000, OP_EQ, 1, S1_TCP44, 0, S1_CLNP, \ + LD_LEN, 0x03e, 1, 0x0, 0xffff}, \ +{ "TCP44?", 0x00ff, 0x0006, OP_EQ, 7, S1_TCPSQ, 0, S1_CLNP, \ + LD_FID, 0x182, 1, 0x0, 0xffff}, /* FID IP4&TCP src+dst */ \ +{ "IPV6?", 0xffff, 0x86dd, OP_EQ, 1, S1_IPV6L, 0, S1_CLNP, \ + LD_SUM, 0x015, 1, 0x0, 0x0000}, \ +{ "IPV6 len", 0xf000, 0x6000, OP_EQ, 0, S1_IPV6c, 0, S1_CLNP, \ + IM_R1, 0x128, 1, 0x0, 0xffff}, \ +{ "IPV6 cont?", 0x0000, 0x0000, OP_EQ, 3, S1_TCP64, 0, S1_CLNP, \ + LD_FID, 0x484, 1, 0x0, 0xffff}, /* FID IP6&TCP src+dst */ \ +{ "TCP64?", 0xff00, 0x0600, OP_EQ, 18, S1_TCPSQ, 0, S1_CLNP, \ + LD_LEN, 0x03f, 1, 0x0, 0xffff} + +#ifdef USE_HP_IP46TCP4 +static cas_hp_inst_t cas_prog_ip46tcp4tab[] = { + CAS_PROG_IP46TCP4_PREAMBLE, + { "TCP seq", /* DADDR should point to dest port */ + 0x0000, 0x0000, OP_EQ, 0, S1_TCPFG, 4, S1_TCPFG, LD_SEQ, + 0x081, 3, 0x0, 0xffff}, /* Load TCP seq # */ + { "TCP control flags", 0x0000, 0x0000, OP_EQ, 0, S1_TCPHL, 0, + S1_TCPHL, ST_FLG, 0x045, 3, 0x0, 0x002f}, /* Load TCP flags */ + { "TCP length", 0x0000, 0x0000, OP_EQ, 0, S1_TCPHc, 0, + S1_TCPHc, LD_R1, 0x205, 3, 0xB, 0xf000}, + { "TCP length cont", 0x0000, 0x0000, OP_EQ, 0, S1_PCKT, 0, + S1_PCKT, LD_HDR, 0x0ff, 3, 0x0, 0xffff}, + { "Cleanup", 0x0000, 0x0000, OP_EQ, 0, S1_CLNP2, 0, S1_CLNP2, + IM_CTL, 0x001, 3, 0x0, 0x0001}, + { "Cleanup 2", 0x0000, 0x0000, OP_EQ, 0, S1_PCKT, 0, S1_PCKT, + IM_CTL, 0x000, 0, 0x0, 0x0000}, + { "Drop packet", 0x0000, 0x0000, OP_EQ, 0, S1_PCKT, 0, S1_PCKT, + IM_CTL, 0x080, 3, 0x0, 0xffff}, + { NULL }, +}; +#ifdef HP_IP46TCP4_DEFAULT +#define CAS_HP_FIRMWARE cas_prog_ip46tcp4tab +#endif +#endif + +/* + * Alternate table load which excludes HTTP server traffic from reassembly. + * It is substantially similar to the basic table, with one extra state + * and a few extra compares. */ +#ifdef USE_HP_IP46TCP4NOHTTP +static cas_hp_inst_t cas_prog_ip46tcp4nohttptab[] = { + CAS_PROG_IP46TCP4_PREAMBLE, + { "TCP seq", /* DADDR should point to dest port */ + 0xFFFF, 0x0080, OP_EQ, 0, S2_HTTP, 0, S1_TCPFG, LD_SEQ, + 0x081, 3, 0x0, 0xffff} , /* Load TCP seq # */ + { "TCP control flags", 0xFFFF, 0x8080, OP_EQ, 0, S2_HTTP, 0, + S1_TCPHL, ST_FLG, 0x145, 2, 0x0, 0x002f, }, /* Load TCP flags */ + { "TCP length", 0x0000, 0x0000, OP_EQ, 0, S1_TCPHc, 0, S1_TCPHc, + LD_R1, 0x205, 3, 0xB, 0xf000}, + { "TCP length cont", 0x0000, 0x0000, OP_EQ, 0, S1_PCKT, 0, S1_PCKT, + LD_HDR, 0x0ff, 3, 0x0, 0xffff}, + { "Cleanup", 0x0000, 0x0000, OP_EQ, 0, S1_CLNP2, 0, S1_CLNP2, + IM_CTL, 0x001, 3, 0x0, 0x0001}, + { "Cleanup 2", 0x0000, 0x0000, OP_EQ, 0, S1_PCKT, 0, S1_PCKT, + CL_REG, 0x002, 3, 0x0, 0x0000}, + { "Drop packet", 0x0000, 0x0000, OP_EQ, 0, S1_PCKT, 0, S1_PCKT, + IM_CTL, 0x080, 3, 0x0, 0xffff}, + { "No HTTP", 0x0000, 0x0000, OP_EQ, 0, S1_PCKT, 0, S1_PCKT, + IM_CTL, 0x044, 3, 0x0, 0xffff}, + { NULL }, +}; +#ifdef HP_IP46TCP4NOHTTP_DEFAULT +#define CAS_HP_FIRMWARE cas_prog_ip46tcp4nohttptab +#endif +#endif + +/* match step #s for IP4FRAG */ +#define S3_IPV6c 11 +#define S3_TCP64 12 +#define S3_TCPSQ 13 +#define S3_TCPFG 14 +#define S3_TCPHL 15 +#define S3_TCPHc 16 +#define S3_FRAG 17 +#define S3_FOFF 18 +#define S3_CLNP 19 + +#ifdef USE_HP_IP4FRAG +static cas_hp_inst_t cas_prog_ip4fragtab[] = { + { "packet arrival?", 0xffff, 0x0000, OP_NP, 6, S1_VLAN, 0, S1_PCKT, + CL_REG, 0x3ff, 1, 0x0, 0x0000}, + { "VLAN?", 0xffff, 0x8100, OP_EQ, 1, S1_CFI, 0, S1_8023, + IM_CTL, 0x00a, 3, 0x0, 0xffff}, + { "CFI?", 0x1000, 0x1000, OP_EQ, 0, S3_CLNP, 1, S1_8023, + CL_REG, 0x000, 0, 0x0, 0x0000}, + { "8023?", 0xffff, 0x0600, OP_LT, 1, S1_LLC, 0, S1_IPV4, + CL_REG, 0x000, 0, 0x0, 0x0000}, + { "LLC?", 0xffff, 0xaaaa, OP_EQ, 1, S1_LLCc, 0, S3_CLNP, + CL_REG, 0x000, 0, 0x0, 0x0000}, + { "LLCc?",0xff00, 0x0300, OP_EQ, 2, S1_IPV4, 0, S3_CLNP, + CL_REG, 0x000, 0, 0x0, 0x0000}, + { "IPV4?", 0xffff, 0x0800, OP_EQ, 1, S1_IPV4c, 0, S1_IPV6, + LD_SAP, 0x100, 3, 0x0, 0xffff}, + { "IPV4 cont?", 0xff00, 0x4500, OP_EQ, 3, S1_IPV4F, 0, S3_CLNP, + LD_SUM, 0x00a, 1, 0x0, 0x0000}, + { "IPV4 frag?", 0x3fff, 0x0000, OP_EQ, 1, S1_TCP44, 0, S3_FRAG, + LD_LEN, 0x03e, 3, 0x0, 0xffff}, + { "TCP44?", 0x00ff, 0x0006, OP_EQ, 7, S3_TCPSQ, 0, S3_CLNP, + LD_FID, 0x182, 3, 0x0, 0xffff}, /* FID IP4&TCP src+dst */ + { "IPV6?", 0xffff, 0x86dd, OP_EQ, 1, S3_IPV6c, 0, S3_CLNP, + LD_SUM, 0x015, 1, 0x0, 0x0000}, + { "IPV6 cont?", 0xf000, 0x6000, OP_EQ, 3, S3_TCP64, 0, S3_CLNP, + LD_FID, 0x484, 1, 0x0, 0xffff}, /* FID IP6&TCP src+dst */ + { "TCP64?", 0xff00, 0x0600, OP_EQ, 18, S3_TCPSQ, 0, S3_CLNP, + LD_LEN, 0x03f, 1, 0x0, 0xffff}, + { "TCP seq", /* DADDR should point to dest port */ + 0x0000, 0x0000, OP_EQ, 0, S3_TCPFG, 4, S3_TCPFG, LD_SEQ, + 0x081, 3, 0x0, 0xffff}, /* Load TCP seq # */ + { "TCP control flags", 0x0000, 0x0000, OP_EQ, 0, S3_TCPHL, 0, + S3_TCPHL, ST_FLG, 0x045, 3, 0x0, 0x002f}, /* Load TCP flags */ + { "TCP length", 0x0000, 0x0000, OP_EQ, 0, S3_TCPHc, 0, S3_TCPHc, + LD_R1, 0x205, 3, 0xB, 0xf000}, + { "TCP length cont", 0x0000, 0x0000, OP_EQ, 0, S1_PCKT, 0, S1_PCKT, + LD_HDR, 0x0ff, 3, 0x0, 0xffff}, + { "IP4 Fragment", 0x0000, 0x0000, OP_EQ, 0, S3_FOFF, 0, S3_FOFF, + LD_FID, 0x103, 3, 0x0, 0xffff}, /* FID IP4 src+dst */ + { "IP4 frag offset", 0x0000, 0x0000, OP_EQ, 0, S3_FOFF, 0, S3_FOFF, + LD_SEQ, 0x040, 1, 0xD, 0xfff8}, + { "Cleanup", 0x0000, 0x0000, OP_EQ, 0, S1_PCKT, 0, S1_PCKT, + IM_CTL, 0x001, 3, 0x0, 0x0001}, + { NULL }, +}; +#ifdef HP_IP4FRAG_DEFAULT +#define CAS_HP_FIRMWARE cas_prog_ip4fragtab +#endif +#endif + +/* + * Alternate table which does batching without reassembly + */ +#ifdef USE_HP_IP46TCP4BATCH +static cas_hp_inst_t cas_prog_ip46tcp4batchtab[] = { + CAS_PROG_IP46TCP4_PREAMBLE, + { "TCP seq", /* DADDR should point to dest port */ + 0x0000, 0x0000, OP_EQ, 0, S1_TCPFG, 0, S1_TCPFG, LD_SEQ, + 0x081, 3, 0x0, 0xffff}, /* Load TCP seq # */ + { "TCP control flags", 0x0000, 0x0000, OP_EQ, 0, S1_TCPHL, 0, + S1_TCPHL, ST_FLG, 0x000, 3, 0x0, 0x0000}, /* Load TCP flags */ + { "TCP length", 0x0000, 0x0000, OP_EQ, 0, S1_TCPHc, 0, + S1_TCPHc, LD_R1, 0x205, 3, 0xB, 0xf000}, + { "TCP length cont", 0x0000, 0x0000, OP_EQ, 0, S1_PCKT, 0, + S1_PCKT, IM_CTL, 0x040, 3, 0x0, 0xffff}, /* set batch bit */ + { "Cleanup", 0x0000, 0x0000, OP_EQ, 0, S1_PCKT, 0, S1_PCKT, + IM_CTL, 0x001, 3, 0x0, 0x0001}, + { "Drop packet", 0x0000, 0x0000, OP_EQ, 0, S1_PCKT, 0, + S1_PCKT, IM_CTL, 0x080, 3, 0x0, 0xffff}, + { NULL }, +}; +#ifdef HP_IP46TCP4BATCH_DEFAULT +#define CAS_HP_FIRMWARE cas_prog_ip46tcp4batchtab +#endif +#endif + +/* Workaround for Cassini rev2 descriptor corruption problem. + * Does batching without reassembly, and sets the SAP to a known + * data pattern for all packets. + */ +#ifdef USE_HP_WORKAROUND +static cas_hp_inst_t cas_prog_workaroundtab[] = { + { "packet arrival?", 0xffff, 0x0000, OP_NP, 6, S1_VLAN, 0, + S1_PCKT, CL_REG, 0x3ff, 1, 0x0, 0x0000} , + { "VLAN?", 0xffff, 0x8100, OP_EQ, 1, S1_CFI, 0, S1_8023, + IM_CTL, 0x04a, 3, 0x0, 0xffff}, + { "CFI?", 0x1000, 0x1000, OP_EQ, 0, S1_CLNP, 1, S1_8023, + CL_REG, 0x000, 0, 0x0, 0x0000}, + { "8023?", 0xffff, 0x0600, OP_LT, 1, S1_LLC, 0, S1_IPV4, + CL_REG, 0x000, 0, 0x0, 0x0000}, + { "LLC?", 0xffff, 0xaaaa, OP_EQ, 1, S1_LLCc, 0, S1_CLNP, + CL_REG, 0x000, 0, 0x0, 0x0000}, + { "LLCc?", 0xff00, 0x0300, OP_EQ, 2, S1_IPV4, 0, S1_CLNP, + CL_REG, 0x000, 0, 0x0, 0x0000}, + { "IPV4?", 0xffff, 0x0800, OP_EQ, 1, S1_IPV4c, 0, S1_IPV6, + IM_SAP, 0x6AE, 3, 0x0, 0xffff}, + { "IPV4 cont?", 0xff00, 0x4500, OP_EQ, 3, S1_IPV4F, 0, S1_CLNP, + LD_SUM, 0x00a, 1, 0x0, 0x0000}, + { "IPV4 frag?", 0x3fff, 0x0000, OP_EQ, 1, S1_TCP44, 0, S1_CLNP, + LD_LEN, 0x03e, 1, 0x0, 0xffff}, + { "TCP44?", 0x00ff, 0x0006, OP_EQ, 7, S1_TCPSQ, 0, S1_CLNP, + LD_FID, 0x182, 3, 0x0, 0xffff}, /* FID IP4&TCP src+dst */ + { "IPV6?", 0xffff, 0x86dd, OP_EQ, 1, S1_IPV6L, 0, S1_CLNP, + LD_SUM, 0x015, 1, 0x0, 0x0000}, + { "IPV6 len", 0xf000, 0x6000, OP_EQ, 0, S1_IPV6c, 0, S1_CLNP, + IM_R1, 0x128, 1, 0x0, 0xffff}, + { "IPV6 cont?", 0x0000, 0x0000, OP_EQ, 3, S1_TCP64, 0, S1_CLNP, + LD_FID, 0x484, 1, 0x0, 0xffff}, /* FID IP6&TCP src+dst */ + { "TCP64?", 0xff00, 0x0600, OP_EQ, 18, S1_TCPSQ, 0, S1_CLNP, + LD_LEN, 0x03f, 1, 0x0, 0xffff}, + { "TCP seq", /* DADDR should point to dest port */ + 0x0000, 0x0000, OP_EQ, 0, S1_TCPFG, 4, S1_TCPFG, LD_SEQ, + 0x081, 3, 0x0, 0xffff}, /* Load TCP seq # */ + { "TCP control flags", 0x0000, 0x0000, OP_EQ, 0, S1_TCPHL, 0, + S1_TCPHL, ST_FLG, 0x045, 3, 0x0, 0x002f}, /* Load TCP flags */ + { "TCP length", 0x0000, 0x0000, OP_EQ, 0, S1_TCPHc, 0, S1_TCPHc, + LD_R1, 0x205, 3, 0xB, 0xf000}, + { "TCP length cont", 0x0000, 0x0000, OP_EQ, 0, S1_PCKT, 0, + S1_PCKT, LD_HDR, 0x0ff, 3, 0x0, 0xffff}, + { "Cleanup", 0x0000, 0x0000, OP_EQ, 0, S1_CLNP2, 0, S1_CLNP2, + IM_SAP, 0x6AE, 3, 0x0, 0xffff} , + { "Cleanup 2", 0x0000, 0x0000, OP_EQ, 0, S1_PCKT, 0, S1_PCKT, + IM_CTL, 0x001, 3, 0x0, 0x0001}, + { NULL }, +}; +#ifdef HP_WORKAROUND_DEFAULT +#define CAS_HP_FIRMWARE cas_prog_workaroundtab +#endif +#endif + +#ifdef USE_HP_ENCRYPT +static cas_hp_inst_t cas_prog_encryptiontab[] = { + { "packet arrival?", 0xffff, 0x0000, OP_NP, 6, S1_VLAN, 0, + S1_PCKT, CL_REG, 0x3ff, 1, 0x0, 0x0000}, + { "VLAN?", 0xffff, 0x8100, OP_EQ, 1, S1_CFI, 0, S1_8023, + IM_CTL, 0x00a, 3, 0x0, 0xffff}, +#if 0 +//"CFI?", /* 02 FIND CFI and If FIND go to S1_DROP */ +//0x1000, 0x1000, OP_EQ, 0, S1_DROP, 1, S1_8023, CL_REG, 0x000, 0, 0x0, 0x00 + 00, +#endif + { "CFI?", /* FIND CFI and If FIND go to CleanUP1 (ignore and send to host) */ + 0x1000, 0x1000, OP_EQ, 0, S1_CLNP, 1, S1_8023, + CL_REG, 0x000, 0, 0x0, 0x0000}, + { "8023?", 0xffff, 0x0600, OP_LT, 1, S1_LLC, 0, S1_IPV4, + CL_REG, 0x000, 0, 0x0, 0x0000}, + { "LLC?", 0xffff, 0xaaaa, OP_EQ, 1, S1_LLCc, 0, S1_CLNP, + CL_REG, 0x000, 0, 0x0, 0x0000}, + { "LLCc?", 0xff00, 0x0300, OP_EQ, 2, S1_IPV4, 0, S1_CLNP, + CL_REG, 0x000, 0, 0x0, 0x0000}, + { "IPV4?", 0xffff, 0x0800, OP_EQ, 1, S1_IPV4c, 0, S1_IPV6, + LD_SAP, 0x100, 3, 0x0, 0xffff}, + { "IPV4 cont?", 0xff00, 0x4500, OP_EQ, 3, S1_IPV4F, 0, S1_CLNP, + LD_SUM, 0x00a, 1, 0x0, 0x0000}, + { "IPV4 frag?", 0x3fff, 0x0000, OP_EQ, 1, S1_TCP44, 0, S1_CLNP, + LD_LEN, 0x03e, 1, 0x0, 0xffff}, + { "TCP44?", 0x00ff, 0x0006, OP_EQ, 7, S1_TCPSQ, 0, S1_ESP4, + LD_FID, 0x182, 1, 0x0, 0xffff}, /* FID IP4&TCP src+dst */ + { "IPV6?", 0xffff, 0x86dd, OP_EQ, 1, S1_IPV6L, 0, S1_CLNP, + LD_SUM, 0x015, 1, 0x0, 0x0000}, + { "IPV6 len", 0xf000, 0x6000, OP_EQ, 0, S1_IPV6c, 0, S1_CLNP, + IM_R1, 0x128, 1, 0x0, 0xffff}, + { "IPV6 cont?", 0x0000, 0x0000, OP_EQ, 3, S1_TCP64, 0, S1_CLNP, + LD_FID, 0x484, 1, 0x0, 0xffff}, /* FID IP6&TCP src+dst */ + { "TCP64?", +#if 0 +//@@@0xff00, 0x0600, OP_EQ, 18, S1_TCPSQ, 0, S1_ESP6, LD_LEN, 0x03f, 1, 0x0, 0xffff, +#endif + 0xff00, 0x0600, OP_EQ, 12, S1_TCPSQ, 0, S1_ESP6, LD_LEN, + 0x03f, 1, 0x0, 0xffff}, + { "TCP seq", /* 14:DADDR should point to dest port */ + 0xFFFF, 0x0080, OP_EQ, 0, S2_HTTP, 0, S1_TCPFG, LD_SEQ, + 0x081, 3, 0x0, 0xffff}, /* Load TCP seq # */ + { "TCP control flags", 0xFFFF, 0x8080, OP_EQ, 0, S2_HTTP, 0, + S1_TCPHL, ST_FLG, 0x145, 2, 0x0, 0x002f}, /* Load TCP flags */ + { "TCP length", 0x0000, 0x0000, OP_EQ, 0, S1_TCPHc, 0, S1_TCPHc, + LD_R1, 0x205, 3, 0xB, 0xf000} , + { "TCP length cont", 0x0000, 0x0000, OP_EQ, 0, S1_PCKT, 0, + S1_PCKT, LD_HDR, 0x0ff, 3, 0x0, 0xffff}, + { "Cleanup", 0x0000, 0x0000, OP_EQ, 0, S1_CLNP2, 0, S1_CLNP2, + IM_CTL, 0x001, 3, 0x0, 0x0001}, + { "Cleanup 2", 0x0000, 0x0000, OP_EQ, 0, S1_PCKT, 0, S1_PCKT, + CL_REG, 0x002, 3, 0x0, 0x0000}, + { "Drop packet", 0x0000, 0x0000, OP_EQ, 0, S1_PCKT, 0, S1_PCKT, + IM_CTL, 0x080, 3, 0x0, 0xffff}, + { "No HTTP", 0x0000, 0x0000, OP_EQ, 0, S1_PCKT, 0, S1_PCKT, + IM_CTL, 0x044, 3, 0x0, 0xffff}, + { "IPV4 ESP encrypted?", /* S1_ESP4 */ + 0x00ff, 0x0032, OP_EQ, 0, S1_CLNP2, 0, S1_AH4, IM_CTL, + 0x021, 1, 0x0, 0xffff}, + { "IPV4 AH encrypted?", /* S1_AH4 */ + 0x00ff, 0x0033, OP_EQ, 0, S1_CLNP2, 0, S1_CLNP, IM_CTL, + 0x021, 1, 0x0, 0xffff}, + { "IPV6 ESP encrypted?", /* S1_ESP6 */ +#if 0 +//@@@0x00ff, 0x0032, OP_EQ, 0, S1_CLNP2, 0, S1_AH6, IM_CTL, 0x021, 1, 0x0, 0xffff, +#endif + 0xff00, 0x3200, OP_EQ, 0, S1_CLNP2, 0, S1_AH6, IM_CTL, + 0x021, 1, 0x0, 0xffff}, + { "IPV6 AH encrypted?", /* S1_AH6 */ +#if 0 +//@@@0x00ff, 0x0033, OP_EQ, 0, S1_CLNP2, 0, S1_CLNP, IM_CTL, 0x021, 1, 0x0, 0xffff, +#endif + 0xff00, 0x3300, OP_EQ, 0, S1_CLNP2, 0, S1_CLNP, IM_CTL, + 0x021, 1, 0x0, 0xffff}, + { NULL }, +}; +#ifdef HP_ENCRYPT_DEFAULT +#define CAS_HP_FIRMWARE cas_prog_encryptiontab +#endif +#endif + +static cas_hp_inst_t cas_prog_null[] = { {NULL} }; +#ifdef HP_NULL_DEFAULT +#define CAS_HP_FIRMWARE cas_prog_null +#endif + +/* firmware patch for NS_DP83065 */ +typedef struct cas_saturn_patch { + u16 addr; + u16 val; +} cas_saturn_patch_t; + +#if 1 +cas_saturn_patch_t cas_saturn_patch[] = { +{0x8200, 0x007e}, {0x8201, 0x0082}, {0x8202, 0x0009}, +{0x8203, 0x0000}, {0x8204, 0x0000}, {0x8205, 0x0000}, +{0x8206, 0x0000}, {0x8207, 0x0000}, {0x8208, 0x0000}, +{0x8209, 0x008e}, {0x820a, 0x008e}, {0x820b, 0x00ff}, +{0x820c, 0x00ce}, {0x820d, 0x0082}, {0x820e, 0x0025}, +{0x820f, 0x00ff}, {0x8210, 0x0001}, {0x8211, 0x000f}, +{0x8212, 0x00ce}, {0x8213, 0x0084}, {0x8214, 0x0026}, +{0x8215, 0x00ff}, {0x8216, 0x0001}, {0x8217, 0x0011}, +{0x8218, 0x00ce}, {0x8219, 0x0085}, {0x821a, 0x003d}, +{0x821b, 0x00df}, {0x821c, 0x00e5}, {0x821d, 0x0086}, +{0x821e, 0x0039}, {0x821f, 0x00b7}, {0x8220, 0x008f}, +{0x8221, 0x00f8}, {0x8222, 0x007e}, {0x8223, 0x00c3}, +{0x8224, 0x00c2}, {0x8225, 0x0096}, {0x8226, 0x0047}, +{0x8227, 0x0084}, {0x8228, 0x00f3}, {0x8229, 0x008a}, +{0x822a, 0x0000}, {0x822b, 0x0097}, {0x822c, 0x0047}, +{0x822d, 0x00ce}, {0x822e, 0x0082}, {0x822f, 0x0033}, +{0x8230, 0x00ff}, {0x8231, 0x0001}, {0x8232, 0x000f}, +{0x8233, 0x0096}, {0x8234, 0x0046}, {0x8235, 0x0084}, +{0x8236, 0x000c}, {0x8237, 0x0081}, {0x8238, 0x0004}, +{0x8239, 0x0027}, {0x823a, 0x000b}, {0x823b, 0x0096}, +{0x823c, 0x0046}, {0x823d, 0x0084}, {0x823e, 0x000c}, +{0x823f, 0x0081}, {0x8240, 0x0008}, {0x8241, 0x0027}, +{0x8242, 0x0057}, {0x8243, 0x007e}, {0x8244, 0x0084}, +{0x8245, 0x0025}, {0x8246, 0x0096}, {0x8247, 0x0047}, +{0x8248, 0x0084}, {0x8249, 0x00f3}, {0x824a, 0x008a}, +{0x824b, 0x0004}, {0x824c, 0x0097}, {0x824d, 0x0047}, +{0x824e, 0x00ce}, {0x824f, 0x0082}, {0x8250, 0x0054}, +{0x8251, 0x00ff}, {0x8252, 0x0001}, {0x8253, 0x000f}, +{0x8254, 0x0096}, {0x8255, 0x0046}, {0x8256, 0x0084}, +{0x8257, 0x000c}, {0x8258, 0x0081}, {0x8259, 0x0004}, +{0x825a, 0x0026}, {0x825b, 0x0038}, {0x825c, 0x00b6}, +{0x825d, 0x0012}, {0x825e, 0x0020}, {0x825f, 0x0084}, +{0x8260, 0x0020}, {0x8261, 0x0026}, {0x8262, 0x0003}, +{0x8263, 0x007e}, {0x8264, 0x0084}, {0x8265, 0x0025}, +{0x8266, 0x0096}, {0x8267, 0x007b}, {0x8268, 0x00d6}, +{0x8269, 0x007c}, {0x826a, 0x00fe}, {0x826b, 0x008f}, +{0x826c, 0x0056}, {0x826d, 0x00bd}, {0x826e, 0x00f7}, +{0x826f, 0x00b6}, {0x8270, 0x00fe}, {0x8271, 0x008f}, +{0x8272, 0x004e}, {0x8273, 0x00bd}, {0x8274, 0x00ec}, +{0x8275, 0x008e}, {0x8276, 0x00bd}, {0x8277, 0x00fa}, +{0x8278, 0x00f7}, {0x8279, 0x00bd}, {0x827a, 0x00f7}, +{0x827b, 0x0028}, {0x827c, 0x00ce}, {0x827d, 0x0082}, +{0x827e, 0x0082}, {0x827f, 0x00ff}, {0x8280, 0x0001}, +{0x8281, 0x000f}, {0x8282, 0x0096}, {0x8283, 0x0046}, +{0x8284, 0x0084}, {0x8285, 0x000c}, {0x8286, 0x0081}, +{0x8287, 0x0004}, {0x8288, 0x0026}, {0x8289, 0x000a}, +{0x828a, 0x00b6}, {0x828b, 0x0012}, {0x828c, 0x0020}, +{0x828d, 0x0084}, {0x828e, 0x0020}, {0x828f, 0x0027}, +{0x8290, 0x00b5}, {0x8291, 0x007e}, {0x8292, 0x0084}, +{0x8293, 0x0025}, {0x8294, 0x00bd}, {0x8295, 0x00f7}, +{0x8296, 0x001f}, {0x8297, 0x007e}, {0x8298, 0x0084}, +{0x8299, 0x001f}, {0x829a, 0x0096}, {0x829b, 0x0047}, +{0x829c, 0x0084}, {0x829d, 0x00f3}, {0x829e, 0x008a}, +{0x829f, 0x0008}, {0x82a0, 0x0097}, {0x82a1, 0x0047}, +{0x82a2, 0x00de}, {0x82a3, 0x00e1}, {0x82a4, 0x00ad}, +{0x82a5, 0x0000}, {0x82a6, 0x00ce}, {0x82a7, 0x0082}, +{0x82a8, 0x00af}, {0x82a9, 0x00ff}, {0x82aa, 0x0001}, +{0x82ab, 0x000f}, {0x82ac, 0x007e}, {0x82ad, 0x0084}, +{0x82ae, 0x0025}, {0x82af, 0x0096}, {0x82b0, 0x0041}, +{0x82b1, 0x0085}, {0x82b2, 0x0010}, {0x82b3, 0x0026}, +{0x82b4, 0x0006}, {0x82b5, 0x0096}, {0x82b6, 0x0023}, +{0x82b7, 0x0085}, {0x82b8, 0x0040}, {0x82b9, 0x0027}, +{0x82ba, 0x0006}, {0x82bb, 0x00bd}, {0x82bc, 0x00ed}, +{0x82bd, 0x0000}, {0x82be, 0x007e}, {0x82bf, 0x0083}, +{0x82c0, 0x00a2}, {0x82c1, 0x00de}, {0x82c2, 0x0042}, +{0x82c3, 0x00bd}, {0x82c4, 0x00eb}, {0x82c5, 0x008e}, +{0x82c6, 0x0096}, {0x82c7, 0x0024}, {0x82c8, 0x0084}, +{0x82c9, 0x0008}, {0x82ca, 0x0027}, {0x82cb, 0x0003}, +{0x82cc, 0x007e}, {0x82cd, 0x0083}, {0x82ce, 0x00df}, +{0x82cf, 0x0096}, {0x82d0, 0x007b}, {0x82d1, 0x00d6}, +{0x82d2, 0x007c}, {0x82d3, 0x00fe}, {0x82d4, 0x008f}, +{0x82d5, 0x0056}, {0x82d6, 0x00bd}, {0x82d7, 0x00f7}, +{0x82d8, 0x00b6}, {0x82d9, 0x00fe}, {0x82da, 0x008f}, +{0x82db, 0x0050}, {0x82dc, 0x00bd}, {0x82dd, 0x00ec}, +{0x82de, 0x008e}, {0x82df, 0x00bd}, {0x82e0, 0x00fa}, +{0x82e1, 0x00f7}, {0x82e2, 0x0086}, {0x82e3, 0x0011}, +{0x82e4, 0x00c6}, {0x82e5, 0x0049}, {0x82e6, 0x00bd}, +{0x82e7, 0x00e4}, {0x82e8, 0x0012}, {0x82e9, 0x00ce}, +{0x82ea, 0x0082}, {0x82eb, 0x00ef}, {0x82ec, 0x00ff}, +{0x82ed, 0x0001}, {0x82ee, 0x000f}, {0x82ef, 0x0096}, +{0x82f0, 0x0046}, {0x82f1, 0x0084}, {0x82f2, 0x000c}, +{0x82f3, 0x0081}, {0x82f4, 0x0000}, {0x82f5, 0x0027}, +{0x82f6, 0x0017}, {0x82f7, 0x00c6}, {0x82f8, 0x0049}, +{0x82f9, 0x00bd}, {0x82fa, 0x00e4}, {0x82fb, 0x0091}, +{0x82fc, 0x0024}, {0x82fd, 0x000d}, {0x82fe, 0x00b6}, +{0x82ff, 0x0012}, {0x8300, 0x0020}, {0x8301, 0x0085}, +{0x8302, 0x0020}, {0x8303, 0x0026}, {0x8304, 0x000c}, +{0x8305, 0x00ce}, {0x8306, 0x0082}, {0x8307, 0x00c1}, +{0x8308, 0x00ff}, {0x8309, 0x0001}, {0x830a, 0x000f}, +{0x830b, 0x007e}, {0x830c, 0x0084}, {0x830d, 0x0025}, +{0x830e, 0x007e}, {0x830f, 0x0084}, {0x8310, 0x0016}, +{0x8311, 0x00fe}, {0x8312, 0x008f}, {0x8313, 0x0052}, +{0x8314, 0x00bd}, {0x8315, 0x00ec}, {0x8316, 0x008e}, +{0x8317, 0x00bd}, {0x8318, 0x00fa}, {0x8319, 0x00f7}, +{0x831a, 0x0086}, {0x831b, 0x006a}, {0x831c, 0x00c6}, +{0x831d, 0x0049}, {0x831e, 0x00bd}, {0x831f, 0x00e4}, +{0x8320, 0x0012}, {0x8321, 0x00ce}, {0x8322, 0x0083}, +{0x8323, 0x0027}, {0x8324, 0x00ff}, {0x8325, 0x0001}, +{0x8326, 0x000f}, {0x8327, 0x0096}, {0x8328, 0x0046}, +{0x8329, 0x0084}, {0x832a, 0x000c}, {0x832b, 0x0081}, +{0x832c, 0x0000}, {0x832d, 0x0027}, {0x832e, 0x000a}, +{0x832f, 0x00c6}, {0x8330, 0x0049}, {0x8331, 0x00bd}, +{0x8332, 0x00e4}, {0x8333, 0x0091}, {0x8334, 0x0025}, +{0x8335, 0x0006}, {0x8336, 0x007e}, {0x8337, 0x0084}, +{0x8338, 0x0025}, {0x8339, 0x007e}, {0x833a, 0x0084}, +{0x833b, 0x0016}, {0x833c, 0x00b6}, {0x833d, 0x0018}, +{0x833e, 0x0070}, {0x833f, 0x00bb}, {0x8340, 0x0019}, +{0x8341, 0x0070}, {0x8342, 0x002a}, {0x8343, 0x0004}, +{0x8344, 0x0081}, {0x8345, 0x00af}, {0x8346, 0x002e}, +{0x8347, 0x0019}, {0x8348, 0x0096}, {0x8349, 0x007b}, +{0x834a, 0x00f6}, {0x834b, 0x0020}, {0x834c, 0x0007}, +{0x834d, 0x00fa}, {0x834e, 0x0020}, {0x834f, 0x0027}, +{0x8350, 0x00c4}, {0x8351, 0x0038}, {0x8352, 0x0081}, +{0x8353, 0x0038}, {0x8354, 0x0027}, {0x8355, 0x000b}, +{0x8356, 0x00f6}, {0x8357, 0x0020}, {0x8358, 0x0007}, +{0x8359, 0x00fa}, {0x835a, 0x0020}, {0x835b, 0x0027}, +{0x835c, 0x00cb}, {0x835d, 0x0008}, {0x835e, 0x007e}, +{0x835f, 0x0082}, {0x8360, 0x00d3}, {0x8361, 0x00bd}, +{0x8362, 0x00f7}, {0x8363, 0x0066}, {0x8364, 0x0086}, +{0x8365, 0x0074}, {0x8366, 0x00c6}, {0x8367, 0x0049}, +{0x8368, 0x00bd}, {0x8369, 0x00e4}, {0x836a, 0x0012}, +{0x836b, 0x00ce}, {0x836c, 0x0083}, {0x836d, 0x0071}, +{0x836e, 0x00ff}, {0x836f, 0x0001}, {0x8370, 0x000f}, +{0x8371, 0x0096}, {0x8372, 0x0046}, {0x8373, 0x0084}, +{0x8374, 0x000c}, {0x8375, 0x0081}, {0x8376, 0x0008}, +{0x8377, 0x0026}, {0x8378, 0x000a}, {0x8379, 0x00c6}, +{0x837a, 0x0049}, {0x837b, 0x00bd}, {0x837c, 0x00e4}, +{0x837d, 0x0091}, {0x837e, 0x0025}, {0x837f, 0x0006}, +{0x8380, 0x007e}, {0x8381, 0x0084}, {0x8382, 0x0025}, +{0x8383, 0x007e}, {0x8384, 0x0084}, {0x8385, 0x0016}, +{0x8386, 0x00bd}, {0x8387, 0x00f7}, {0x8388, 0x003e}, +{0x8389, 0x0026}, {0x838a, 0x000e}, {0x838b, 0x00bd}, +{0x838c, 0x00e5}, {0x838d, 0x0009}, {0x838e, 0x0026}, +{0x838f, 0x0006}, {0x8390, 0x00ce}, {0x8391, 0x0082}, +{0x8392, 0x00c1}, {0x8393, 0x00ff}, {0x8394, 0x0001}, +{0x8395, 0x000f}, {0x8396, 0x007e}, {0x8397, 0x0084}, +{0x8398, 0x0025}, {0x8399, 0x00fe}, {0x839a, 0x008f}, +{0x839b, 0x0054}, {0x839c, 0x00bd}, {0x839d, 0x00ec}, +{0x839e, 0x008e}, {0x839f, 0x00bd}, {0x83a0, 0x00fa}, +{0x83a1, 0x00f7}, {0x83a2, 0x00bd}, {0x83a3, 0x00f7}, +{0x83a4, 0x0033}, {0x83a5, 0x0086}, {0x83a6, 0x000f}, +{0x83a7, 0x00c6}, {0x83a8, 0x0051}, {0x83a9, 0x00bd}, +{0x83aa, 0x00e4}, {0x83ab, 0x0012}, {0x83ac, 0x00ce}, +{0x83ad, 0x0083}, {0x83ae, 0x00b2}, {0x83af, 0x00ff}, +{0x83b0, 0x0001}, {0x83b1, 0x000f}, {0x83b2, 0x0096}, +{0x83b3, 0x0046}, {0x83b4, 0x0084}, {0x83b5, 0x000c}, +{0x83b6, 0x0081}, {0x83b7, 0x0008}, {0x83b8, 0x0026}, +{0x83b9, 0x005c}, {0x83ba, 0x00b6}, {0x83bb, 0x0012}, +{0x83bc, 0x0020}, {0x83bd, 0x0084}, {0x83be, 0x003f}, +{0x83bf, 0x0081}, {0x83c0, 0x003a}, {0x83c1, 0x0027}, +{0x83c2, 0x001c}, {0x83c3, 0x0096}, {0x83c4, 0x0023}, +{0x83c5, 0x0085}, {0x83c6, 0x0040}, {0x83c7, 0x0027}, +{0x83c8, 0x0003}, {0x83c9, 0x007e}, {0x83ca, 0x0084}, +{0x83cb, 0x0025}, {0x83cc, 0x00c6}, {0x83cd, 0x0051}, +{0x83ce, 0x00bd}, {0x83cf, 0x00e4}, {0x83d0, 0x0091}, +{0x83d1, 0x0025}, {0x83d2, 0x0003}, {0x83d3, 0x007e}, +{0x83d4, 0x0084}, {0x83d5, 0x0025}, {0x83d6, 0x00ce}, +{0x83d7, 0x0082}, {0x83d8, 0x00c1}, {0x83d9, 0x00ff}, +{0x83da, 0x0001}, {0x83db, 0x000f}, {0x83dc, 0x007e}, +{0x83dd, 0x0084}, {0x83de, 0x0025}, {0x83df, 0x00bd}, +{0x83e0, 0x00f8}, {0x83e1, 0x0037}, {0x83e2, 0x007c}, +{0x83e3, 0x0000}, {0x83e4, 0x007a}, {0x83e5, 0x00ce}, +{0x83e6, 0x0083}, {0x83e7, 0x00ee}, {0x83e8, 0x00ff}, +{0x83e9, 0x0001}, {0x83ea, 0x000f}, {0x83eb, 0x007e}, +{0x83ec, 0x0084}, {0x83ed, 0x0025}, {0x83ee, 0x0096}, +{0x83ef, 0x0046}, {0x83f0, 0x0084}, {0x83f1, 0x000c}, +{0x83f2, 0x0081}, {0x83f3, 0x0008}, {0x83f4, 0x0026}, +{0x83f5, 0x0020}, {0x83f6, 0x0096}, {0x83f7, 0x0024}, +{0x83f8, 0x0084}, {0x83f9, 0x0008}, {0x83fa, 0x0026}, +{0x83fb, 0x0029}, {0x83fc, 0x00b6}, {0x83fd, 0x0018}, +{0x83fe, 0x0082}, {0x83ff, 0x00bb}, {0x8400, 0x0019}, +{0x8401, 0x0082}, {0x8402, 0x00b1}, {0x8403, 0x0001}, +{0x8404, 0x003b}, {0x8405, 0x0022}, {0x8406, 0x0009}, +{0x8407, 0x00b6}, {0x8408, 0x0012}, {0x8409, 0x0020}, +{0x840a, 0x0084}, {0x840b, 0x0037}, {0x840c, 0x0081}, +{0x840d, 0x0032}, {0x840e, 0x0027}, {0x840f, 0x0015}, +{0x8410, 0x00bd}, {0x8411, 0x00f8}, {0x8412, 0x0044}, +{0x8413, 0x007e}, {0x8414, 0x0082}, {0x8415, 0x00c1}, +{0x8416, 0x00bd}, {0x8417, 0x00f7}, {0x8418, 0x001f}, +{0x8419, 0x00bd}, {0x841a, 0x00f8}, {0x841b, 0x0044}, +{0x841c, 0x00bd}, {0x841d, 0x00fc}, {0x841e, 0x0029}, +{0x841f, 0x00ce}, {0x8420, 0x0082}, {0x8421, 0x0025}, +{0x8422, 0x00ff}, {0x8423, 0x0001}, {0x8424, 0x000f}, +{0x8425, 0x0039}, {0x8426, 0x0096}, {0x8427, 0x0047}, +{0x8428, 0x0084}, {0x8429, 0x00fc}, {0x842a, 0x008a}, +{0x842b, 0x0000}, {0x842c, 0x0097}, {0x842d, 0x0047}, +{0x842e, 0x00ce}, {0x842f, 0x0084}, {0x8430, 0x0034}, +{0x8431, 0x00ff}, {0x8432, 0x0001}, {0x8433, 0x0011}, +{0x8434, 0x0096}, {0x8435, 0x0046}, {0x8436, 0x0084}, +{0x8437, 0x0003}, {0x8438, 0x0081}, {0x8439, 0x0002}, +{0x843a, 0x0027}, {0x843b, 0x0003}, {0x843c, 0x007e}, +{0x843d, 0x0085}, {0x843e, 0x001e}, {0x843f, 0x0096}, +{0x8440, 0x0047}, {0x8441, 0x0084}, {0x8442, 0x00fc}, +{0x8443, 0x008a}, {0x8444, 0x0002}, {0x8445, 0x0097}, +{0x8446, 0x0047}, {0x8447, 0x00de}, {0x8448, 0x00e1}, +{0x8449, 0x00ad}, {0x844a, 0x0000}, {0x844b, 0x0086}, +{0x844c, 0x0001}, {0x844d, 0x00b7}, {0x844e, 0x0012}, +{0x844f, 0x0051}, {0x8450, 0x00bd}, {0x8451, 0x00f7}, +{0x8452, 0x0014}, {0x8453, 0x00b6}, {0x8454, 0x0010}, +{0x8455, 0x0031}, {0x8456, 0x0084}, {0x8457, 0x00fd}, +{0x8458, 0x00b7}, {0x8459, 0x0010}, {0x845a, 0x0031}, +{0x845b, 0x00bd}, {0x845c, 0x00f8}, {0x845d, 0x001e}, +{0x845e, 0x0096}, {0x845f, 0x0081}, {0x8460, 0x00d6}, +{0x8461, 0x0082}, {0x8462, 0x00fe}, {0x8463, 0x008f}, +{0x8464, 0x005a}, {0x8465, 0x00bd}, {0x8466, 0x00f7}, +{0x8467, 0x00b6}, {0x8468, 0x00fe}, {0x8469, 0x008f}, +{0x846a, 0x005c}, {0x846b, 0x00bd}, {0x846c, 0x00ec}, +{0x846d, 0x008e}, {0x846e, 0x00bd}, {0x846f, 0x00fa}, +{0x8470, 0x00f7}, {0x8471, 0x0086}, {0x8472, 0x0008}, +{0x8473, 0x00d6}, {0x8474, 0x0000}, {0x8475, 0x00c5}, +{0x8476, 0x0010}, {0x8477, 0x0026}, {0x8478, 0x0002}, +{0x8479, 0x008b}, {0x847a, 0x0020}, {0x847b, 0x00c6}, +{0x847c, 0x0051}, {0x847d, 0x00bd}, {0x847e, 0x00e4}, +{0x847f, 0x0012}, {0x8480, 0x00ce}, {0x8481, 0x0084}, +{0x8482, 0x0086}, {0x8483, 0x00ff}, {0x8484, 0x0001}, +{0x8485, 0x0011}, {0x8486, 0x0096}, {0x8487, 0x0046}, +{0x8488, 0x0084}, {0x8489, 0x0003}, {0x848a, 0x0081}, +{0x848b, 0x0002}, {0x848c, 0x0027}, {0x848d, 0x0003}, +{0x848e, 0x007e}, {0x848f, 0x0085}, {0x8490, 0x000f}, +{0x8491, 0x00c6}, {0x8492, 0x0051}, {0x8493, 0x00bd}, +{0x8494, 0x00e4}, {0x8495, 0x0091}, {0x8496, 0x0025}, +{0x8497, 0x0003}, {0x8498, 0x007e}, {0x8499, 0x0085}, +{0x849a, 0x001e}, {0x849b, 0x0096}, {0x849c, 0x0044}, +{0x849d, 0x0085}, {0x849e, 0x0010}, {0x849f, 0x0026}, +{0x84a0, 0x000a}, {0x84a1, 0x00b6}, {0x84a2, 0x0012}, +{0x84a3, 0x0050}, {0x84a4, 0x00ba}, {0x84a5, 0x0001}, +{0x84a6, 0x003c}, {0x84a7, 0x0085}, {0x84a8, 0x0010}, +{0x84a9, 0x0027}, {0x84aa, 0x00a8}, {0x84ab, 0x00bd}, +{0x84ac, 0x00f7}, {0x84ad, 0x0066}, {0x84ae, 0x00ce}, +{0x84af, 0x0084}, {0x84b0, 0x00b7}, {0x84b1, 0x00ff}, +{0x84b2, 0x0001}, {0x84b3, 0x0011}, {0x84b4, 0x007e}, +{0x84b5, 0x0085}, {0x84b6, 0x001e}, {0x84b7, 0x0096}, +{0x84b8, 0x0046}, {0x84b9, 0x0084}, {0x84ba, 0x0003}, +{0x84bb, 0x0081}, {0x84bc, 0x0002}, {0x84bd, 0x0026}, +{0x84be, 0x0050}, {0x84bf, 0x00b6}, {0x84c0, 0x0012}, +{0x84c1, 0x0030}, {0x84c2, 0x0084}, {0x84c3, 0x0003}, +{0x84c4, 0x0081}, {0x84c5, 0x0001}, {0x84c6, 0x0027}, +{0x84c7, 0x0003}, {0x84c8, 0x007e}, {0x84c9, 0x0085}, +{0x84ca, 0x001e}, {0x84cb, 0x0096}, {0x84cc, 0x0044}, +{0x84cd, 0x0085}, {0x84ce, 0x0010}, {0x84cf, 0x0026}, +{0x84d0, 0x0013}, {0x84d1, 0x00b6}, {0x84d2, 0x0012}, +{0x84d3, 0x0050}, {0x84d4, 0x00ba}, {0x84d5, 0x0001}, +{0x84d6, 0x003c}, {0x84d7, 0x0085}, {0x84d8, 0x0010}, +{0x84d9, 0x0026}, {0x84da, 0x0009}, {0x84db, 0x00ce}, +{0x84dc, 0x0084}, {0x84dd, 0x0053}, {0x84de, 0x00ff}, +{0x84df, 0x0001}, {0x84e0, 0x0011}, {0x84e1, 0x007e}, +{0x84e2, 0x0085}, {0x84e3, 0x001e}, {0x84e4, 0x00b6}, +{0x84e5, 0x0010}, {0x84e6, 0x0031}, {0x84e7, 0x008a}, +{0x84e8, 0x0002}, {0x84e9, 0x00b7}, {0x84ea, 0x0010}, +{0x84eb, 0x0031}, {0x84ec, 0x00bd}, {0x84ed, 0x0085}, +{0x84ee, 0x001f}, {0x84ef, 0x00bd}, {0x84f0, 0x00f8}, +{0x84f1, 0x0037}, {0x84f2, 0x007c}, {0x84f3, 0x0000}, +{0x84f4, 0x0080}, {0x84f5, 0x00ce}, {0x84f6, 0x0084}, +{0x84f7, 0x00fe}, {0x84f8, 0x00ff}, {0x84f9, 0x0001}, +{0x84fa, 0x0011}, {0x84fb, 0x007e}, {0x84fc, 0x0085}, +{0x84fd, 0x001e}, {0x84fe, 0x0096}, {0x84ff, 0x0046}, +{0x8500, 0x0084}, {0x8501, 0x0003}, {0x8502, 0x0081}, +{0x8503, 0x0002}, {0x8504, 0x0026}, {0x8505, 0x0009}, +{0x8506, 0x00b6}, {0x8507, 0x0012}, {0x8508, 0x0030}, +{0x8509, 0x0084}, {0x850a, 0x0003}, {0x850b, 0x0081}, +{0x850c, 0x0001}, {0x850d, 0x0027}, {0x850e, 0x000f}, +{0x850f, 0x00bd}, {0x8510, 0x00f8}, {0x8511, 0x0044}, +{0x8512, 0x00bd}, {0x8513, 0x00f7}, {0x8514, 0x000b}, +{0x8515, 0x00bd}, {0x8516, 0x00fc}, {0x8517, 0x0029}, +{0x8518, 0x00ce}, {0x8519, 0x0084}, {0x851a, 0x0026}, +{0x851b, 0x00ff}, {0x851c, 0x0001}, {0x851d, 0x0011}, +{0x851e, 0x0039}, {0x851f, 0x00d6}, {0x8520, 0x0022}, +{0x8521, 0x00c4}, {0x8522, 0x000f}, {0x8523, 0x00b6}, +{0x8524, 0x0012}, {0x8525, 0x0030}, {0x8526, 0x00ba}, +{0x8527, 0x0012}, {0x8528, 0x0032}, {0x8529, 0x0084}, +{0x852a, 0x0004}, {0x852b, 0x0027}, {0x852c, 0x000d}, +{0x852d, 0x0096}, {0x852e, 0x0022}, {0x852f, 0x0085}, +{0x8530, 0x0004}, {0x8531, 0x0027}, {0x8532, 0x0005}, +{0x8533, 0x00ca}, {0x8534, 0x0010}, {0x8535, 0x007e}, +{0x8536, 0x0085}, {0x8537, 0x003a}, {0x8538, 0x00ca}, +{0x8539, 0x0020}, {0x853a, 0x00d7}, {0x853b, 0x0022}, +{0x853c, 0x0039}, {0x853d, 0x0086}, {0x853e, 0x0000}, +{0x853f, 0x0097}, {0x8540, 0x0083}, {0x8541, 0x0018}, +{0x8542, 0x00ce}, {0x8543, 0x001c}, {0x8544, 0x0000}, +{0x8545, 0x00bd}, {0x8546, 0x00eb}, {0x8547, 0x0046}, +{0x8548, 0x0096}, {0x8549, 0x0057}, {0x854a, 0x0085}, +{0x854b, 0x0001}, {0x854c, 0x0027}, {0x854d, 0x0002}, +{0x854e, 0x004f}, {0x854f, 0x0039}, {0x8550, 0x0085}, +{0x8551, 0x0002}, {0x8552, 0x0027}, {0x8553, 0x0001}, +{0x8554, 0x0039}, {0x8555, 0x007f}, {0x8556, 0x008f}, +{0x8557, 0x007d}, {0x8558, 0x0086}, {0x8559, 0x0004}, +{0x855a, 0x00b7}, {0x855b, 0x0012}, {0x855c, 0x0004}, +{0x855d, 0x0086}, {0x855e, 0x0008}, {0x855f, 0x00b7}, +{0x8560, 0x0012}, {0x8561, 0x0007}, {0x8562, 0x0086}, +{0x8563, 0x0010}, {0x8564, 0x00b7}, {0x8565, 0x0012}, +{0x8566, 0x000c}, {0x8567, 0x0086}, {0x8568, 0x0007}, +{0x8569, 0x00b7}, {0x856a, 0x0012}, {0x856b, 0x0006}, +{0x856c, 0x00b6}, {0x856d, 0x008f}, {0x856e, 0x007d}, +{0x856f, 0x00b7}, {0x8570, 0x0012}, {0x8571, 0x0070}, +{0x8572, 0x0086}, {0x8573, 0x0001}, {0x8574, 0x00ba}, +{0x8575, 0x0012}, {0x8576, 0x0004}, {0x8577, 0x00b7}, +{0x8578, 0x0012}, {0x8579, 0x0004}, {0x857a, 0x0001}, +{0x857b, 0x0001}, {0x857c, 0x0001}, {0x857d, 0x0001}, +{0x857e, 0x0001}, {0x857f, 0x0001}, {0x8580, 0x00b6}, +{0x8581, 0x0012}, {0x8582, 0x0004}, {0x8583, 0x0084}, +{0x8584, 0x00fe}, {0x8585, 0x008a}, {0x8586, 0x0002}, +{0x8587, 0x00b7}, {0x8588, 0x0012}, {0x8589, 0x0004}, +{0x858a, 0x0001}, {0x858b, 0x0001}, {0x858c, 0x0001}, +{0x858d, 0x0001}, {0x858e, 0x0001}, {0x858f, 0x0001}, +{0x8590, 0x0086}, {0x8591, 0x00fd}, {0x8592, 0x00b4}, +{0x8593, 0x0012}, {0x8594, 0x0004}, {0x8595, 0x00b7}, +{0x8596, 0x0012}, {0x8597, 0x0004}, {0x8598, 0x00b6}, +{0x8599, 0x0012}, {0x859a, 0x0000}, {0x859b, 0x0084}, +{0x859c, 0x0008}, {0x859d, 0x0081}, {0x859e, 0x0008}, +{0x859f, 0x0027}, {0x85a0, 0x0016}, {0x85a1, 0x00b6}, +{0x85a2, 0x008f}, {0x85a3, 0x007d}, {0x85a4, 0x0081}, +{0x85a5, 0x000c}, {0x85a6, 0x0027}, {0x85a7, 0x0008}, +{0x85a8, 0x008b}, {0x85a9, 0x0004}, {0x85aa, 0x00b7}, +{0x85ab, 0x008f}, {0x85ac, 0x007d}, {0x85ad, 0x007e}, +{0x85ae, 0x0085}, {0x85af, 0x006c}, {0x85b0, 0x0086}, +{0x85b1, 0x0003}, {0x85b2, 0x0097}, {0x85b3, 0x0040}, +{0x85b4, 0x007e}, {0x85b5, 0x0089}, {0x85b6, 0x006e}, +{0x85b7, 0x0086}, {0x85b8, 0x0007}, {0x85b9, 0x00b7}, +{0x85ba, 0x0012}, {0x85bb, 0x0006}, {0x85bc, 0x005f}, +{0x85bd, 0x00f7}, {0x85be, 0x008f}, {0x85bf, 0x0082}, +{0x85c0, 0x005f}, {0x85c1, 0x00f7}, {0x85c2, 0x008f}, +{0x85c3, 0x007f}, {0x85c4, 0x00f7}, {0x85c5, 0x008f}, +{0x85c6, 0x0070}, {0x85c7, 0x00f7}, {0x85c8, 0x008f}, +{0x85c9, 0x0071}, {0x85ca, 0x00f7}, {0x85cb, 0x008f}, +{0x85cc, 0x0072}, {0x85cd, 0x00f7}, {0x85ce, 0x008f}, +{0x85cf, 0x0073}, {0x85d0, 0x00f7}, {0x85d1, 0x008f}, +{0x85d2, 0x0074}, {0x85d3, 0x00f7}, {0x85d4, 0x008f}, +{0x85d5, 0x0075}, {0x85d6, 0x00f7}, {0x85d7, 0x008f}, +{0x85d8, 0x0076}, {0x85d9, 0x00f7}, {0x85da, 0x008f}, +{0x85db, 0x0077}, {0x85dc, 0x00f7}, {0x85dd, 0x008f}, +{0x85de, 0x0078}, {0x85df, 0x00f7}, {0x85e0, 0x008f}, +{0x85e1, 0x0079}, {0x85e2, 0x00f7}, {0x85e3, 0x008f}, +{0x85e4, 0x007a}, {0x85e5, 0x00f7}, {0x85e6, 0x008f}, +{0x85e7, 0x007b}, {0x85e8, 0x00b6}, {0x85e9, 0x0012}, +{0x85ea, 0x0004}, {0x85eb, 0x008a}, {0x85ec, 0x0010}, +{0x85ed, 0x00b7}, {0x85ee, 0x0012}, {0x85ef, 0x0004}, +{0x85f0, 0x0086}, {0x85f1, 0x00e4}, {0x85f2, 0x00b7}, +{0x85f3, 0x0012}, {0x85f4, 0x0070}, {0x85f5, 0x00b7}, +{0x85f6, 0x0012}, {0x85f7, 0x0007}, {0x85f8, 0x00f7}, +{0x85f9, 0x0012}, {0x85fa, 0x0005}, {0x85fb, 0x00f7}, +{0x85fc, 0x0012}, {0x85fd, 0x0009}, {0x85fe, 0x0086}, +{0x85ff, 0x0008}, {0x8600, 0x00ba}, {0x8601, 0x0012}, +{0x8602, 0x0004}, {0x8603, 0x00b7}, {0x8604, 0x0012}, +{0x8605, 0x0004}, {0x8606, 0x0086}, {0x8607, 0x00f7}, +{0x8608, 0x00b4}, {0x8609, 0x0012}, {0x860a, 0x0004}, +{0x860b, 0x00b7}, {0x860c, 0x0012}, {0x860d, 0x0004}, +{0x860e, 0x0001}, {0x860f, 0x0001}, {0x8610, 0x0001}, +{0x8611, 0x0001}, {0x8612, 0x0001}, {0x8613, 0x0001}, +{0x8614, 0x00b6}, {0x8615, 0x0012}, {0x8616, 0x0008}, +{0x8617, 0x0027}, {0x8618, 0x007f}, {0x8619, 0x0081}, +{0x861a, 0x0080}, {0x861b, 0x0026}, {0x861c, 0x000b}, +{0x861d, 0x0086}, {0x861e, 0x0008}, {0x861f, 0x00ce}, +{0x8620, 0x008f}, {0x8621, 0x0079}, {0x8622, 0x00bd}, +{0x8623, 0x0089}, {0x8624, 0x007b}, {0x8625, 0x007e}, +{0x8626, 0x0086}, {0x8627, 0x008e}, {0x8628, 0x0081}, +{0x8629, 0x0040}, {0x862a, 0x0026}, {0x862b, 0x000b}, +{0x862c, 0x0086}, {0x862d, 0x0004}, {0x862e, 0x00ce}, +{0x862f, 0x008f}, {0x8630, 0x0076}, {0x8631, 0x00bd}, +{0x8632, 0x0089}, {0x8633, 0x007b}, {0x8634, 0x007e}, +{0x8635, 0x0086}, {0x8636, 0x008e}, {0x8637, 0x0081}, +{0x8638, 0x0020}, {0x8639, 0x0026}, {0x863a, 0x000b}, +{0x863b, 0x0086}, {0x863c, 0x0002}, {0x863d, 0x00ce}, +{0x863e, 0x008f}, {0x863f, 0x0073}, {0x8640, 0x00bd}, +{0x8641, 0x0089}, {0x8642, 0x007b}, {0x8643, 0x007e}, +{0x8644, 0x0086}, {0x8645, 0x008e}, {0x8646, 0x0081}, +{0x8647, 0x0010}, {0x8648, 0x0026}, {0x8649, 0x000b}, +{0x864a, 0x0086}, {0x864b, 0x0001}, {0x864c, 0x00ce}, +{0x864d, 0x008f}, {0x864e, 0x0070}, {0x864f, 0x00bd}, +{0x8650, 0x0089}, {0x8651, 0x007b}, {0x8652, 0x007e}, +{0x8653, 0x0086}, {0x8654, 0x008e}, {0x8655, 0x0081}, +{0x8656, 0x0008}, {0x8657, 0x0026}, {0x8658, 0x000b}, +{0x8659, 0x0086}, {0x865a, 0x0008}, {0x865b, 0x00ce}, +{0x865c, 0x008f}, {0x865d, 0x0079}, {0x865e, 0x00bd}, +{0x865f, 0x0089}, {0x8660, 0x007f}, {0x8661, 0x007e}, +{0x8662, 0x0086}, {0x8663, 0x008e}, {0x8664, 0x0081}, +{0x8665, 0x0004}, {0x8666, 0x0026}, {0x8667, 0x000b}, +{0x8668, 0x0086}, {0x8669, 0x0004}, {0x866a, 0x00ce}, +{0x866b, 0x008f}, {0x866c, 0x0076}, {0x866d, 0x00bd}, +{0x866e, 0x0089}, {0x866f, 0x007f}, {0x8670, 0x007e}, +{0x8671, 0x0086}, {0x8672, 0x008e}, {0x8673, 0x0081}, +{0x8674, 0x0002}, {0x8675, 0x0026}, {0x8676, 0x000b}, +{0x8677, 0x008a}, {0x8678, 0x0002}, {0x8679, 0x00ce}, +{0x867a, 0x008f}, {0x867b, 0x0073}, {0x867c, 0x00bd}, +{0x867d, 0x0089}, {0x867e, 0x007f}, {0x867f, 0x007e}, +{0x8680, 0x0086}, {0x8681, 0x008e}, {0x8682, 0x0081}, +{0x8683, 0x0001}, {0x8684, 0x0026}, {0x8685, 0x0008}, +{0x8686, 0x0086}, {0x8687, 0x0001}, {0x8688, 0x00ce}, +{0x8689, 0x008f}, {0x868a, 0x0070}, {0x868b, 0x00bd}, +{0x868c, 0x0089}, {0x868d, 0x007f}, {0x868e, 0x00b6}, +{0x868f, 0x008f}, {0x8690, 0x007f}, {0x8691, 0x0081}, +{0x8692, 0x000f}, {0x8693, 0x0026}, {0x8694, 0x0003}, +{0x8695, 0x007e}, {0x8696, 0x0087}, {0x8697, 0x0047}, +{0x8698, 0x00b6}, {0x8699, 0x0012}, {0x869a, 0x0009}, +{0x869b, 0x0084}, {0x869c, 0x0003}, {0x869d, 0x0081}, +{0x869e, 0x0003}, {0x869f, 0x0027}, {0x86a0, 0x0006}, +{0x86a1, 0x007c}, {0x86a2, 0x0012}, {0x86a3, 0x0009}, +{0x86a4, 0x007e}, {0x86a5, 0x0085}, {0x86a6, 0x00fe}, +{0x86a7, 0x00b6}, {0x86a8, 0x0012}, {0x86a9, 0x0006}, +{0x86aa, 0x0084}, {0x86ab, 0x0007}, {0x86ac, 0x0081}, +{0x86ad, 0x0007}, {0x86ae, 0x0027}, {0x86af, 0x0008}, +{0x86b0, 0x008b}, {0x86b1, 0x0001}, {0x86b2, 0x00b7}, +{0x86b3, 0x0012}, {0x86b4, 0x0006}, {0x86b5, 0x007e}, +{0x86b6, 0x0086}, {0x86b7, 0x00d5}, {0x86b8, 0x00b6}, +{0x86b9, 0x008f}, {0x86ba, 0x0082}, {0x86bb, 0x0026}, +{0x86bc, 0x000a}, {0x86bd, 0x007c}, {0x86be, 0x008f}, +{0x86bf, 0x0082}, {0x86c0, 0x004f}, {0x86c1, 0x00b7}, +{0x86c2, 0x0012}, {0x86c3, 0x0006}, {0x86c4, 0x007e}, +{0x86c5, 0x0085}, {0x86c6, 0x00c0}, {0x86c7, 0x00b6}, +{0x86c8, 0x0012}, {0x86c9, 0x0006}, {0x86ca, 0x0084}, +{0x86cb, 0x003f}, {0x86cc, 0x0081}, {0x86cd, 0x003f}, +{0x86ce, 0x0027}, {0x86cf, 0x0010}, {0x86d0, 0x008b}, +{0x86d1, 0x0008}, {0x86d2, 0x00b7}, {0x86d3, 0x0012}, +{0x86d4, 0x0006}, {0x86d5, 0x00b6}, {0x86d6, 0x0012}, +{0x86d7, 0x0009}, {0x86d8, 0x0084}, {0x86d9, 0x00fc}, +{0x86da, 0x00b7}, {0x86db, 0x0012}, {0x86dc, 0x0009}, +{0x86dd, 0x007e}, {0x86de, 0x0085}, {0x86df, 0x00fe}, +{0x86e0, 0x00ce}, {0x86e1, 0x008f}, {0x86e2, 0x0070}, +{0x86e3, 0x0018}, {0x86e4, 0x00ce}, {0x86e5, 0x008f}, +{0x86e6, 0x0084}, {0x86e7, 0x00c6}, {0x86e8, 0x000c}, +{0x86e9, 0x00bd}, {0x86ea, 0x0089}, {0x86eb, 0x006f}, +{0x86ec, 0x00ce}, {0x86ed, 0x008f}, {0x86ee, 0x0084}, +{0x86ef, 0x0018}, {0x86f0, 0x00ce}, {0x86f1, 0x008f}, +{0x86f2, 0x0070}, {0x86f3, 0x00c6}, {0x86f4, 0x000c}, +{0x86f5, 0x00bd}, {0x86f6, 0x0089}, {0x86f7, 0x006f}, +{0x86f8, 0x00d6}, {0x86f9, 0x0083}, {0x86fa, 0x00c1}, +{0x86fb, 0x004f}, {0x86fc, 0x002d}, {0x86fd, 0x0003}, +{0x86fe, 0x007e}, {0x86ff, 0x0087}, {0x8700, 0x0040}, +{0x8701, 0x00b6}, {0x8702, 0x008f}, {0x8703, 0x007f}, +{0x8704, 0x0081}, {0x8705, 0x0007}, {0x8706, 0x0027}, +{0x8707, 0x000f}, {0x8708, 0x0081}, {0x8709, 0x000b}, +{0x870a, 0x0027}, {0x870b, 0x0015}, {0x870c, 0x0081}, +{0x870d, 0x000d}, {0x870e, 0x0027}, {0x870f, 0x001b}, +{0x8710, 0x0081}, {0x8711, 0x000e}, {0x8712, 0x0027}, +{0x8713, 0x0021}, {0x8714, 0x007e}, {0x8715, 0x0087}, +{0x8716, 0x0040}, {0x8717, 0x00f7}, {0x8718, 0x008f}, +{0x8719, 0x007b}, {0x871a, 0x0086}, {0x871b, 0x0002}, +{0x871c, 0x00b7}, {0x871d, 0x008f}, {0x871e, 0x007a}, +{0x871f, 0x0020}, {0x8720, 0x001c}, {0x8721, 0x00f7}, +{0x8722, 0x008f}, {0x8723, 0x0078}, {0x8724, 0x0086}, +{0x8725, 0x0002}, {0x8726, 0x00b7}, {0x8727, 0x008f}, +{0x8728, 0x0077}, {0x8729, 0x0020}, {0x872a, 0x0012}, +{0x872b, 0x00f7}, {0x872c, 0x008f}, {0x872d, 0x0075}, +{0x872e, 0x0086}, {0x872f, 0x0002}, {0x8730, 0x00b7}, +{0x8731, 0x008f}, {0x8732, 0x0074}, {0x8733, 0x0020}, +{0x8734, 0x0008}, {0x8735, 0x00f7}, {0x8736, 0x008f}, +{0x8737, 0x0072}, {0x8738, 0x0086}, {0x8739, 0x0002}, +{0x873a, 0x00b7}, {0x873b, 0x008f}, {0x873c, 0x0071}, +{0x873d, 0x007e}, {0x873e, 0x0087}, {0x873f, 0x0047}, +{0x8740, 0x0086}, {0x8741, 0x0004}, {0x8742, 0x0097}, +{0x8743, 0x0040}, {0x8744, 0x007e}, {0x8745, 0x0089}, +{0x8746, 0x006e}, {0x8747, 0x00ce}, {0x8748, 0x008f}, +{0x8749, 0x0072}, {0x874a, 0x00bd}, {0x874b, 0x0089}, +{0x874c, 0x00f7}, {0x874d, 0x00ce}, {0x874e, 0x008f}, +{0x874f, 0x0075}, {0x8750, 0x00bd}, {0x8751, 0x0089}, +{0x8752, 0x00f7}, {0x8753, 0x00ce}, {0x8754, 0x008f}, +{0x8755, 0x0078}, {0x8756, 0x00bd}, {0x8757, 0x0089}, +{0x8758, 0x00f7}, {0x8759, 0x00ce}, {0x875a, 0x008f}, +{0x875b, 0x007b}, {0x875c, 0x00bd}, {0x875d, 0x0089}, +{0x875e, 0x00f7}, {0x875f, 0x004f}, {0x8760, 0x00b7}, +{0x8761, 0x008f}, {0x8762, 0x007d}, {0x8763, 0x00b7}, +{0x8764, 0x008f}, {0x8765, 0x0081}, {0x8766, 0x00b6}, +{0x8767, 0x008f}, {0x8768, 0x0072}, {0x8769, 0x0027}, +{0x876a, 0x0047}, {0x876b, 0x007c}, {0x876c, 0x008f}, +{0x876d, 0x007d}, {0x876e, 0x00b6}, {0x876f, 0x008f}, +{0x8770, 0x0075}, {0x8771, 0x0027}, {0x8772, 0x003f}, +{0x8773, 0x007c}, {0x8774, 0x008f}, {0x8775, 0x007d}, +{0x8776, 0x00b6}, {0x8777, 0x008f}, {0x8778, 0x0078}, +{0x8779, 0x0027}, {0x877a, 0x0037}, {0x877b, 0x007c}, +{0x877c, 0x008f}, {0x877d, 0x007d}, {0x877e, 0x00b6}, +{0x877f, 0x008f}, {0x8780, 0x007b}, {0x8781, 0x0027}, +{0x8782, 0x002f}, {0x8783, 0x007f}, {0x8784, 0x008f}, +{0x8785, 0x007d}, {0x8786, 0x007c}, {0x8787, 0x008f}, +{0x8788, 0x0081}, {0x8789, 0x007a}, {0x878a, 0x008f}, +{0x878b, 0x0072}, {0x878c, 0x0027}, {0x878d, 0x001b}, +{0x878e, 0x007c}, {0x878f, 0x008f}, {0x8790, 0x007d}, +{0x8791, 0x007a}, {0x8792, 0x008f}, {0x8793, 0x0075}, +{0x8794, 0x0027}, {0x8795, 0x0016}, {0x8796, 0x007c}, +{0x8797, 0x008f}, {0x8798, 0x007d}, {0x8799, 0x007a}, +{0x879a, 0x008f}, {0x879b, 0x0078}, {0x879c, 0x0027}, +{0x879d, 0x0011}, {0x879e, 0x007c}, {0x879f, 0x008f}, +{0x87a0, 0x007d}, {0x87a1, 0x007a}, {0x87a2, 0x008f}, +{0x87a3, 0x007b}, {0x87a4, 0x0027}, {0x87a5, 0x000c}, +{0x87a6, 0x007e}, {0x87a7, 0x0087}, {0x87a8, 0x0083}, +{0x87a9, 0x007a}, {0x87aa, 0x008f}, {0x87ab, 0x0075}, +{0x87ac, 0x007a}, {0x87ad, 0x008f}, {0x87ae, 0x0078}, +{0x87af, 0x007a}, {0x87b0, 0x008f}, {0x87b1, 0x007b}, +{0x87b2, 0x00ce}, {0x87b3, 0x00c1}, {0x87b4, 0x00fc}, +{0x87b5, 0x00f6}, {0x87b6, 0x008f}, {0x87b7, 0x007d}, +{0x87b8, 0x003a}, {0x87b9, 0x00a6}, {0x87ba, 0x0000}, +{0x87bb, 0x00b7}, {0x87bc, 0x0012}, {0x87bd, 0x0070}, +{0x87be, 0x00b6}, {0x87bf, 0x008f}, {0x87c0, 0x0072}, +{0x87c1, 0x0026}, {0x87c2, 0x0003}, {0x87c3, 0x007e}, +{0x87c4, 0x0087}, {0x87c5, 0x00fa}, {0x87c6, 0x00b6}, +{0x87c7, 0x008f}, {0x87c8, 0x0075}, {0x87c9, 0x0026}, +{0x87ca, 0x000a}, {0x87cb, 0x0018}, {0x87cc, 0x00ce}, +{0x87cd, 0x008f}, {0x87ce, 0x0073}, {0x87cf, 0x00bd}, +{0x87d0, 0x0089}, {0x87d1, 0x00d5}, {0x87d2, 0x007e}, +{0x87d3, 0x0087}, {0x87d4, 0x00fa}, {0x87d5, 0x00b6}, +{0x87d6, 0x008f}, {0x87d7, 0x0078}, {0x87d8, 0x0026}, +{0x87d9, 0x000a}, {0x87da, 0x0018}, {0x87db, 0x00ce}, +{0x87dc, 0x008f}, {0x87dd, 0x0076}, {0x87de, 0x00bd}, +{0x87df, 0x0089}, {0x87e0, 0x00d5}, {0x87e1, 0x007e}, +{0x87e2, 0x0087}, {0x87e3, 0x00fa}, {0x87e4, 0x00b6}, +{0x87e5, 0x008f}, {0x87e6, 0x007b}, {0x87e7, 0x0026}, +{0x87e8, 0x000a}, {0x87e9, 0x0018}, {0x87ea, 0x00ce}, +{0x87eb, 0x008f}, {0x87ec, 0x0079}, {0x87ed, 0x00bd}, +{0x87ee, 0x0089}, {0x87ef, 0x00d5}, {0x87f0, 0x007e}, +{0x87f1, 0x0087}, {0x87f2, 0x00fa}, {0x87f3, 0x0086}, +{0x87f4, 0x0005}, {0x87f5, 0x0097}, {0x87f6, 0x0040}, +{0x87f7, 0x007e}, {0x87f8, 0x0089}, {0x87f9, 0x0000}, +{0x87fa, 0x00b6}, {0x87fb, 0x008f}, {0x87fc, 0x0075}, +{0x87fd, 0x0081}, {0x87fe, 0x0007}, {0x87ff, 0x002e}, +{0x8800, 0x00f2}, {0x8801, 0x00f6}, {0x8802, 0x0012}, +{0x8803, 0x0006}, {0x8804, 0x00c4}, {0x8805, 0x00f8}, +{0x8806, 0x001b}, {0x8807, 0x00b7}, {0x8808, 0x0012}, +{0x8809, 0x0006}, {0x880a, 0x00b6}, {0x880b, 0x008f}, +{0x880c, 0x0078}, {0x880d, 0x0081}, {0x880e, 0x0007}, +{0x880f, 0x002e}, {0x8810, 0x00e2}, {0x8811, 0x0048}, +{0x8812, 0x0048}, {0x8813, 0x0048}, {0x8814, 0x00f6}, +{0x8815, 0x0012}, {0x8816, 0x0006}, {0x8817, 0x00c4}, +{0x8818, 0x00c7}, {0x8819, 0x001b}, {0x881a, 0x00b7}, +{0x881b, 0x0012}, {0x881c, 0x0006}, {0x881d, 0x00b6}, +{0x881e, 0x008f}, {0x881f, 0x007b}, {0x8820, 0x0081}, +{0x8821, 0x0007}, {0x8822, 0x002e}, {0x8823, 0x00cf}, +{0x8824, 0x00f6}, {0x8825, 0x0012}, {0x8826, 0x0005}, +{0x8827, 0x00c4}, {0x8828, 0x00f8}, {0x8829, 0x001b}, +{0x882a, 0x00b7}, {0x882b, 0x0012}, {0x882c, 0x0005}, +{0x882d, 0x0086}, {0x882e, 0x0000}, {0x882f, 0x00f6}, +{0x8830, 0x008f}, {0x8831, 0x0071}, {0x8832, 0x00bd}, +{0x8833, 0x0089}, {0x8834, 0x0094}, {0x8835, 0x0086}, +{0x8836, 0x0001}, {0x8837, 0x00f6}, {0x8838, 0x008f}, +{0x8839, 0x0074}, {0x883a, 0x00bd}, {0x883b, 0x0089}, +{0x883c, 0x0094}, {0x883d, 0x0086}, {0x883e, 0x0002}, +{0x883f, 0x00f6}, {0x8840, 0x008f}, {0x8841, 0x0077}, +{0x8842, 0x00bd}, {0x8843, 0x0089}, {0x8844, 0x0094}, +{0x8845, 0x0086}, {0x8846, 0x0003}, {0x8847, 0x00f6}, +{0x8848, 0x008f}, {0x8849, 0x007a}, {0x884a, 0x00bd}, +{0x884b, 0x0089}, {0x884c, 0x0094}, {0x884d, 0x00ce}, +{0x884e, 0x008f}, {0x884f, 0x0070}, {0x8850, 0x00a6}, +{0x8851, 0x0001}, {0x8852, 0x0081}, {0x8853, 0x0001}, +{0x8854, 0x0027}, {0x8855, 0x0007}, {0x8856, 0x0081}, +{0x8857, 0x0003}, {0x8858, 0x0027}, {0x8859, 0x0003}, +{0x885a, 0x007e}, {0x885b, 0x0088}, {0x885c, 0x0066}, +{0x885d, 0x00a6}, {0x885e, 0x0000}, {0x885f, 0x00b8}, +{0x8860, 0x008f}, {0x8861, 0x0081}, {0x8862, 0x0084}, +{0x8863, 0x0001}, {0x8864, 0x0026}, {0x8865, 0x000b}, +{0x8866, 0x008c}, {0x8867, 0x008f}, {0x8868, 0x0079}, +{0x8869, 0x002c}, {0x886a, 0x000e}, {0x886b, 0x0008}, +{0x886c, 0x0008}, {0x886d, 0x0008}, {0x886e, 0x007e}, +{0x886f, 0x0088}, {0x8870, 0x0050}, {0x8871, 0x00b6}, +{0x8872, 0x0012}, {0x8873, 0x0004}, {0x8874, 0x008a}, +{0x8875, 0x0040}, {0x8876, 0x00b7}, {0x8877, 0x0012}, +{0x8878, 0x0004}, {0x8879, 0x00b6}, {0x887a, 0x0012}, +{0x887b, 0x0004}, {0x887c, 0x0084}, {0x887d, 0x00fb}, +{0x887e, 0x0084}, {0x887f, 0x00ef}, {0x8880, 0x00b7}, +{0x8881, 0x0012}, {0x8882, 0x0004}, {0x8883, 0x00b6}, +{0x8884, 0x0012}, {0x8885, 0x0007}, {0x8886, 0x0036}, +{0x8887, 0x00b6}, {0x8888, 0x008f}, {0x8889, 0x007c}, +{0x888a, 0x0048}, {0x888b, 0x0048}, {0x888c, 0x00b7}, +{0x888d, 0x0012}, {0x888e, 0x0007}, {0x888f, 0x0086}, +{0x8890, 0x0001}, {0x8891, 0x00ba}, {0x8892, 0x0012}, +{0x8893, 0x0004}, {0x8894, 0x00b7}, {0x8895, 0x0012}, +{0x8896, 0x0004}, {0x8897, 0x0001}, {0x8898, 0x0001}, +{0x8899, 0x0001}, {0x889a, 0x0001}, {0x889b, 0x0001}, +{0x889c, 0x0001}, {0x889d, 0x0086}, {0x889e, 0x00fe}, +{0x889f, 0x00b4}, {0x88a0, 0x0012}, {0x88a1, 0x0004}, +{0x88a2, 0x00b7}, {0x88a3, 0x0012}, {0x88a4, 0x0004}, +{0x88a5, 0x0086}, {0x88a6, 0x0002}, {0x88a7, 0x00ba}, +{0x88a8, 0x0012}, {0x88a9, 0x0004}, {0x88aa, 0x00b7}, +{0x88ab, 0x0012}, {0x88ac, 0x0004}, {0x88ad, 0x0086}, +{0x88ae, 0x00fd}, {0x88af, 0x00b4}, {0x88b0, 0x0012}, +{0x88b1, 0x0004}, {0x88b2, 0x00b7}, {0x88b3, 0x0012}, +{0x88b4, 0x0004}, {0x88b5, 0x0032}, {0x88b6, 0x00b7}, +{0x88b7, 0x0012}, {0x88b8, 0x0007}, {0x88b9, 0x00b6}, +{0x88ba, 0x0012}, {0x88bb, 0x0000}, {0x88bc, 0x0084}, +{0x88bd, 0x0008}, {0x88be, 0x0081}, {0x88bf, 0x0008}, +{0x88c0, 0x0027}, {0x88c1, 0x000f}, {0x88c2, 0x007c}, +{0x88c3, 0x0082}, {0x88c4, 0x0008}, {0x88c5, 0x0026}, +{0x88c6, 0x0007}, {0x88c7, 0x0086}, {0x88c8, 0x0076}, +{0x88c9, 0x0097}, {0x88ca, 0x0040}, {0x88cb, 0x007e}, +{0x88cc, 0x0089}, {0x88cd, 0x006e}, {0x88ce, 0x007e}, +{0x88cf, 0x0086}, {0x88d0, 0x00ec}, {0x88d1, 0x00b6}, +{0x88d2, 0x008f}, {0x88d3, 0x007f}, {0x88d4, 0x0081}, +{0x88d5, 0x000f}, {0x88d6, 0x0027}, {0x88d7, 0x003c}, +{0x88d8, 0x00bd}, {0x88d9, 0x00e6}, {0x88da, 0x00c7}, +{0x88db, 0x00b7}, {0x88dc, 0x0012}, {0x88dd, 0x000d}, +{0x88de, 0x00bd}, {0x88df, 0x00e6}, {0x88e0, 0x00cb}, +{0x88e1, 0x00b6}, {0x88e2, 0x0012}, {0x88e3, 0x0004}, +{0x88e4, 0x008a}, {0x88e5, 0x0020}, {0x88e6, 0x00b7}, +{0x88e7, 0x0012}, {0x88e8, 0x0004}, {0x88e9, 0x00ce}, +{0x88ea, 0x00ff}, {0x88eb, 0x00ff}, {0x88ec, 0x00b6}, +{0x88ed, 0x0012}, {0x88ee, 0x0000}, {0x88ef, 0x0081}, +{0x88f0, 0x000c}, {0x88f1, 0x0026}, {0x88f2, 0x0005}, +{0x88f3, 0x0009}, {0x88f4, 0x0026}, {0x88f5, 0x00f6}, +{0x88f6, 0x0027}, {0x88f7, 0x001c}, {0x88f8, 0x00b6}, +{0x88f9, 0x0012}, {0x88fa, 0x0004}, {0x88fb, 0x0084}, +{0x88fc, 0x00df}, {0x88fd, 0x00b7}, {0x88fe, 0x0012}, +{0x88ff, 0x0004}, {0x8900, 0x0096}, {0x8901, 0x0083}, +{0x8902, 0x0081}, {0x8903, 0x0007}, {0x8904, 0x002c}, +{0x8905, 0x0005}, {0x8906, 0x007c}, {0x8907, 0x0000}, +{0x8908, 0x0083}, {0x8909, 0x0020}, {0x890a, 0x0006}, +{0x890b, 0x0096}, {0x890c, 0x0083}, {0x890d, 0x008b}, +{0x890e, 0x0008}, {0x890f, 0x0097}, {0x8910, 0x0083}, +{0x8911, 0x007e}, {0x8912, 0x0085}, {0x8913, 0x0041}, +{0x8914, 0x007f}, {0x8915, 0x008f}, {0x8916, 0x007e}, +{0x8917, 0x0086}, {0x8918, 0x0080}, {0x8919, 0x00b7}, +{0x891a, 0x0012}, {0x891b, 0x000c}, {0x891c, 0x0086}, +{0x891d, 0x0001}, {0x891e, 0x00b7}, {0x891f, 0x008f}, +{0x8920, 0x007d}, {0x8921, 0x00b6}, {0x8922, 0x0012}, +{0x8923, 0x000c}, {0x8924, 0x0084}, {0x8925, 0x007f}, +{0x8926, 0x00b7}, {0x8927, 0x0012}, {0x8928, 0x000c}, +{0x8929, 0x008a}, {0x892a, 0x0080}, {0x892b, 0x00b7}, +{0x892c, 0x0012}, {0x892d, 0x000c}, {0x892e, 0x0086}, +{0x892f, 0x000a}, {0x8930, 0x00bd}, {0x8931, 0x008a}, +{0x8932, 0x0006}, {0x8933, 0x00b6}, {0x8934, 0x0012}, +{0x8935, 0x000a}, {0x8936, 0x002a}, {0x8937, 0x0009}, +{0x8938, 0x00b6}, {0x8939, 0x0012}, {0x893a, 0x000c}, +{0x893b, 0x00ba}, {0x893c, 0x008f}, {0x893d, 0x007d}, +{0x893e, 0x00b7}, {0x893f, 0x0012}, {0x8940, 0x000c}, +{0x8941, 0x00b6}, {0x8942, 0x008f}, {0x8943, 0x007e}, +{0x8944, 0x0081}, {0x8945, 0x0060}, {0x8946, 0x0027}, +{0x8947, 0x001a}, {0x8948, 0x008b}, {0x8949, 0x0020}, +{0x894a, 0x00b7}, {0x894b, 0x008f}, {0x894c, 0x007e}, +{0x894d, 0x00b6}, {0x894e, 0x0012}, {0x894f, 0x000c}, +{0x8950, 0x0084}, {0x8951, 0x009f}, {0x8952, 0x00ba}, +{0x8953, 0x008f}, {0x8954, 0x007e}, {0x8955, 0x00b7}, +{0x8956, 0x0012}, {0x8957, 0x000c}, {0x8958, 0x00b6}, +{0x8959, 0x008f}, {0x895a, 0x007d}, {0x895b, 0x0048}, +{0x895c, 0x00b7}, {0x895d, 0x008f}, {0x895e, 0x007d}, +{0x895f, 0x007e}, {0x8960, 0x0089}, {0x8961, 0x0021}, +{0x8962, 0x00b6}, {0x8963, 0x0012}, {0x8964, 0x0004}, +{0x8965, 0x008a}, {0x8966, 0x0020}, {0x8967, 0x00b7}, +{0x8968, 0x0012}, {0x8969, 0x0004}, {0x896a, 0x00bd}, +{0x896b, 0x008a}, {0x896c, 0x000a}, {0x896d, 0x004f}, +{0x896e, 0x0039}, {0x896f, 0x00a6}, {0x8970, 0x0000}, +{0x8971, 0x0018}, {0x8972, 0x00a7}, {0x8973, 0x0000}, +{0x8974, 0x0008}, {0x8975, 0x0018}, {0x8976, 0x0008}, +{0x8977, 0x005a}, {0x8978, 0x0026}, {0x8979, 0x00f5}, +{0x897a, 0x0039}, {0x897b, 0x0036}, {0x897c, 0x006c}, +{0x897d, 0x0000}, {0x897e, 0x0032}, {0x897f, 0x00ba}, +{0x8980, 0x008f}, {0x8981, 0x007f}, {0x8982, 0x00b7}, +{0x8983, 0x008f}, {0x8984, 0x007f}, {0x8985, 0x00b6}, +{0x8986, 0x0012}, {0x8987, 0x0009}, {0x8988, 0x0084}, +{0x8989, 0x0003}, {0x898a, 0x00a7}, {0x898b, 0x0001}, +{0x898c, 0x00b6}, {0x898d, 0x0012}, {0x898e, 0x0006}, +{0x898f, 0x0084}, {0x8990, 0x003f}, {0x8991, 0x00a7}, +{0x8992, 0x0002}, {0x8993, 0x0039}, {0x8994, 0x0036}, +{0x8995, 0x0086}, {0x8996, 0x0003}, {0x8997, 0x00b7}, +{0x8998, 0x008f}, {0x8999, 0x0080}, {0x899a, 0x0032}, +{0x899b, 0x00c1}, {0x899c, 0x0000}, {0x899d, 0x0026}, +{0x899e, 0x0006}, {0x899f, 0x00b7}, {0x89a0, 0x008f}, +{0x89a1, 0x007c}, {0x89a2, 0x007e}, {0x89a3, 0x0089}, +{0x89a4, 0x00c9}, {0x89a5, 0x00c1}, {0x89a6, 0x0001}, +{0x89a7, 0x0027}, {0x89a8, 0x0018}, {0x89a9, 0x00c1}, +{0x89aa, 0x0002}, {0x89ab, 0x0027}, {0x89ac, 0x000c}, +{0x89ad, 0x00c1}, {0x89ae, 0x0003}, {0x89af, 0x0027}, +{0x89b0, 0x0000}, {0x89b1, 0x00f6}, {0x89b2, 0x008f}, +{0x89b3, 0x0080}, {0x89b4, 0x0005}, {0x89b5, 0x0005}, +{0x89b6, 0x00f7}, {0x89b7, 0x008f}, {0x89b8, 0x0080}, +{0x89b9, 0x00f6}, {0x89ba, 0x008f}, {0x89bb, 0x0080}, +{0x89bc, 0x0005}, {0x89bd, 0x0005}, {0x89be, 0x00f7}, +{0x89bf, 0x008f}, {0x89c0, 0x0080}, {0x89c1, 0x00f6}, +{0x89c2, 0x008f}, {0x89c3, 0x0080}, {0x89c4, 0x0005}, +{0x89c5, 0x0005}, {0x89c6, 0x00f7}, {0x89c7, 0x008f}, +{0x89c8, 0x0080}, {0x89c9, 0x00f6}, {0x89ca, 0x008f}, +{0x89cb, 0x0080}, {0x89cc, 0x0053}, {0x89cd, 0x00f4}, +{0x89ce, 0x0012}, {0x89cf, 0x0007}, {0x89d0, 0x001b}, +{0x89d1, 0x00b7}, {0x89d2, 0x0012}, {0x89d3, 0x0007}, +{0x89d4, 0x0039}, {0x89d5, 0x00ce}, {0x89d6, 0x008f}, +{0x89d7, 0x0070}, {0x89d8, 0x00a6}, {0x89d9, 0x0000}, +{0x89da, 0x0018}, {0x89db, 0x00e6}, {0x89dc, 0x0000}, +{0x89dd, 0x0018}, {0x89de, 0x00a7}, {0x89df, 0x0000}, +{0x89e0, 0x00e7}, {0x89e1, 0x0000}, {0x89e2, 0x00a6}, +{0x89e3, 0x0001}, {0x89e4, 0x0018}, {0x89e5, 0x00e6}, +{0x89e6, 0x0001}, {0x89e7, 0x0018}, {0x89e8, 0x00a7}, +{0x89e9, 0x0001}, {0x89ea, 0x00e7}, {0x89eb, 0x0001}, +{0x89ec, 0x00a6}, {0x89ed, 0x0002}, {0x89ee, 0x0018}, +{0x89ef, 0x00e6}, {0x89f0, 0x0002}, {0x89f1, 0x0018}, +{0x89f2, 0x00a7}, {0x89f3, 0x0002}, {0x89f4, 0x00e7}, +{0x89f5, 0x0002}, {0x89f6, 0x0039}, {0x89f7, 0x00a6}, +{0x89f8, 0x0000}, {0x89f9, 0x0084}, {0x89fa, 0x0007}, +{0x89fb, 0x00e6}, {0x89fc, 0x0000}, {0x89fd, 0x00c4}, +{0x89fe, 0x0038}, {0x89ff, 0x0054}, {0x8a00, 0x0054}, +{0x8a01, 0x0054}, {0x8a02, 0x001b}, {0x8a03, 0x00a7}, +{0x8a04, 0x0000}, {0x8a05, 0x0039}, {0x8a06, 0x004a}, +{0x8a07, 0x0026}, {0x8a08, 0x00fd}, {0x8a09, 0x0039}, +{0x8a0a, 0x0096}, {0x8a0b, 0x0022}, {0x8a0c, 0x0084}, +{0x8a0d, 0x000f}, {0x8a0e, 0x0097}, {0x8a0f, 0x0022}, +{0x8a10, 0x0086}, {0x8a11, 0x0001}, {0x8a12, 0x00b7}, +{0x8a13, 0x008f}, {0x8a14, 0x0070}, {0x8a15, 0x00b6}, +{0x8a16, 0x0012}, {0x8a17, 0x0007}, {0x8a18, 0x00b7}, +{0x8a19, 0x008f}, {0x8a1a, 0x0071}, {0x8a1b, 0x00f6}, +{0x8a1c, 0x0012}, {0x8a1d, 0x000c}, {0x8a1e, 0x00c4}, +{0x8a1f, 0x000f}, {0x8a20, 0x00c8}, {0x8a21, 0x000f}, +{0x8a22, 0x00f7}, {0x8a23, 0x008f}, {0x8a24, 0x0072}, +{0x8a25, 0x00f6}, {0x8a26, 0x008f}, {0x8a27, 0x0072}, +{0x8a28, 0x00b6}, {0x8a29, 0x008f}, {0x8a2a, 0x0071}, +{0x8a2b, 0x0084}, {0x8a2c, 0x0003}, {0x8a2d, 0x0027}, +{0x8a2e, 0x0014}, {0x8a2f, 0x0081}, {0x8a30, 0x0001}, +{0x8a31, 0x0027}, {0x8a32, 0x001c}, {0x8a33, 0x0081}, +{0x8a34, 0x0002}, {0x8a35, 0x0027}, {0x8a36, 0x0024}, +{0x8a37, 0x00f4}, {0x8a38, 0x008f}, {0x8a39, 0x0070}, +{0x8a3a, 0x0027}, {0x8a3b, 0x002a}, {0x8a3c, 0x0096}, +{0x8a3d, 0x0022}, {0x8a3e, 0x008a}, {0x8a3f, 0x0080}, +{0x8a40, 0x007e}, {0x8a41, 0x008a}, {0x8a42, 0x0064}, +{0x8a43, 0x00f4}, {0x8a44, 0x008f}, {0x8a45, 0x0070}, +{0x8a46, 0x0027}, {0x8a47, 0x001e}, {0x8a48, 0x0096}, +{0x8a49, 0x0022}, {0x8a4a, 0x008a}, {0x8a4b, 0x0010}, +{0x8a4c, 0x007e}, {0x8a4d, 0x008a}, {0x8a4e, 0x0064}, +{0x8a4f, 0x00f4}, {0x8a50, 0x008f}, {0x8a51, 0x0070}, +{0x8a52, 0x0027}, {0x8a53, 0x0012}, {0x8a54, 0x0096}, +{0x8a55, 0x0022}, {0x8a56, 0x008a}, {0x8a57, 0x0020}, +{0x8a58, 0x007e}, {0x8a59, 0x008a}, {0x8a5a, 0x0064}, +{0x8a5b, 0x00f4}, {0x8a5c, 0x008f}, {0x8a5d, 0x0070}, +{0x8a5e, 0x0027}, {0x8a5f, 0x0006}, {0x8a60, 0x0096}, +{0x8a61, 0x0022}, {0x8a62, 0x008a}, {0x8a63, 0x0040}, +{0x8a64, 0x0097}, {0x8a65, 0x0022}, {0x8a66, 0x0074}, +{0x8a67, 0x008f}, {0x8a68, 0x0071}, {0x8a69, 0x0074}, +{0x8a6a, 0x008f}, {0x8a6b, 0x0071}, {0x8a6c, 0x0078}, +{0x8a6d, 0x008f}, {0x8a6e, 0x0070}, {0x8a6f, 0x00b6}, +{0x8a70, 0x008f}, {0x8a71, 0x0070}, {0x8a72, 0x0085}, +{0x8a73, 0x0010}, {0x8a74, 0x0027}, {0x8a75, 0x00af}, +{0x8a76, 0x00d6}, {0x8a77, 0x0022}, {0x8a78, 0x00c4}, +{0x8a79, 0x0010}, {0x8a7a, 0x0058}, {0x8a7b, 0x00b6}, +{0x8a7c, 0x0012}, {0x8a7d, 0x0070}, {0x8a7e, 0x0081}, +{0x8a7f, 0x00e4}, {0x8a80, 0x0027}, {0x8a81, 0x0036}, +{0x8a82, 0x0081}, {0x8a83, 0x00e1}, {0x8a84, 0x0026}, +{0x8a85, 0x000c}, {0x8a86, 0x0096}, {0x8a87, 0x0022}, +{0x8a88, 0x0084}, {0x8a89, 0x0020}, {0x8a8a, 0x0044}, +{0x8a8b, 0x001b}, {0x8a8c, 0x00d6}, {0x8a8d, 0x0022}, +{0x8a8e, 0x00c4}, {0x8a8f, 0x00cf}, {0x8a90, 0x0020}, +{0x8a91, 0x0023}, {0x8a92, 0x0058}, {0x8a93, 0x0081}, +{0x8a94, 0x00c6}, {0x8a95, 0x0026}, {0x8a96, 0x000d}, +{0x8a97, 0x0096}, {0x8a98, 0x0022}, {0x8a99, 0x0084}, +{0x8a9a, 0x0040}, {0x8a9b, 0x0044}, {0x8a9c, 0x0044}, +{0x8a9d, 0x001b}, {0x8a9e, 0x00d6}, {0x8a9f, 0x0022}, +{0x8aa0, 0x00c4}, {0x8aa1, 0x00af}, {0x8aa2, 0x0020}, +{0x8aa3, 0x0011}, {0x8aa4, 0x0058}, {0x8aa5, 0x0081}, +{0x8aa6, 0x0027}, {0x8aa7, 0x0026}, {0x8aa8, 0x000f}, +{0x8aa9, 0x0096}, {0x8aaa, 0x0022}, {0x8aab, 0x0084}, +{0x8aac, 0x0080}, {0x8aad, 0x0044}, {0x8aae, 0x0044}, +{0x8aaf, 0x0044}, {0x8ab0, 0x001b}, {0x8ab1, 0x00d6}, +{0x8ab2, 0x0022}, {0x8ab3, 0x00c4}, {0x8ab4, 0x006f}, +{0x8ab5, 0x001b}, {0x8ab6, 0x0097}, {0x8ab7, 0x0022}, +{0x8ab8, 0x0039}, {0x8ab9, 0x0027}, {0x8aba, 0x000c}, +{0x8abb, 0x007c}, {0x8abc, 0x0082}, {0x8abd, 0x0006}, +{0x8abe, 0x00bd}, {0x8abf, 0x00d9}, {0x8ac0, 0x00ed}, +{0x8ac1, 0x00b6}, {0x8ac2, 0x0082}, {0x8ac3, 0x0007}, +{0x8ac4, 0x007e}, {0x8ac5, 0x008a}, {0x8ac6, 0x00b9}, +{0x8ac7, 0x007f}, {0x8ac8, 0x0082}, {0x8ac9, 0x0006}, +{0x8aca, 0x0039}, { 0x0, 0x0 } +}; +#else +cas_saturn_patch_t cas_saturn_patch[] = { +{0x8200, 0x007e}, {0x8201, 0x0082}, {0x8202, 0x0009}, +{0x8203, 0x0000}, {0x8204, 0x0000}, {0x8205, 0x0000}, +{0x8206, 0x0000}, {0x8207, 0x0000}, {0x8208, 0x0000}, +{0x8209, 0x008e}, {0x820a, 0x008e}, {0x820b, 0x00ff}, +{0x820c, 0x00ce}, {0x820d, 0x0082}, {0x820e, 0x0025}, +{0x820f, 0x00ff}, {0x8210, 0x0001}, {0x8211, 0x000f}, +{0x8212, 0x00ce}, {0x8213, 0x0084}, {0x8214, 0x0026}, +{0x8215, 0x00ff}, {0x8216, 0x0001}, {0x8217, 0x0011}, +{0x8218, 0x00ce}, {0x8219, 0x0085}, {0x821a, 0x003d}, +{0x821b, 0x00df}, {0x821c, 0x00e5}, {0x821d, 0x0086}, +{0x821e, 0x0039}, {0x821f, 0x00b7}, {0x8220, 0x008f}, +{0x8221, 0x00f8}, {0x8222, 0x007e}, {0x8223, 0x00c3}, +{0x8224, 0x00c2}, {0x8225, 0x0096}, {0x8226, 0x0047}, +{0x8227, 0x0084}, {0x8228, 0x00f3}, {0x8229, 0x008a}, +{0x822a, 0x0000}, {0x822b, 0x0097}, {0x822c, 0x0047}, +{0x822d, 0x00ce}, {0x822e, 0x0082}, {0x822f, 0x0033}, +{0x8230, 0x00ff}, {0x8231, 0x0001}, {0x8232, 0x000f}, +{0x8233, 0x0096}, {0x8234, 0x0046}, {0x8235, 0x0084}, +{0x8236, 0x000c}, {0x8237, 0x0081}, {0x8238, 0x0004}, +{0x8239, 0x0027}, {0x823a, 0x000b}, {0x823b, 0x0096}, +{0x823c, 0x0046}, {0x823d, 0x0084}, {0x823e, 0x000c}, +{0x823f, 0x0081}, {0x8240, 0x0008}, {0x8241, 0x0027}, +{0x8242, 0x0057}, {0x8243, 0x007e}, {0x8244, 0x0084}, +{0x8245, 0x0025}, {0x8246, 0x0096}, {0x8247, 0x0047}, +{0x8248, 0x0084}, {0x8249, 0x00f3}, {0x824a, 0x008a}, +{0x824b, 0x0004}, {0x824c, 0x0097}, {0x824d, 0x0047}, +{0x824e, 0x00ce}, {0x824f, 0x0082}, {0x8250, 0x0054}, +{0x8251, 0x00ff}, {0x8252, 0x0001}, {0x8253, 0x000f}, +{0x8254, 0x0096}, {0x8255, 0x0046}, {0x8256, 0x0084}, +{0x8257, 0x000c}, {0x8258, 0x0081}, {0x8259, 0x0004}, +{0x825a, 0x0026}, {0x825b, 0x0038}, {0x825c, 0x00b6}, +{0x825d, 0x0012}, {0x825e, 0x0020}, {0x825f, 0x0084}, +{0x8260, 0x0020}, {0x8261, 0x0026}, {0x8262, 0x0003}, +{0x8263, 0x007e}, {0x8264, 0x0084}, {0x8265, 0x0025}, +{0x8266, 0x0096}, {0x8267, 0x007b}, {0x8268, 0x00d6}, +{0x8269, 0x007c}, {0x826a, 0x00fe}, {0x826b, 0x008f}, +{0x826c, 0x0056}, {0x826d, 0x00bd}, {0x826e, 0x00f7}, +{0x826f, 0x00b6}, {0x8270, 0x00fe}, {0x8271, 0x008f}, +{0x8272, 0x004e}, {0x8273, 0x00bd}, {0x8274, 0x00ec}, +{0x8275, 0x008e}, {0x8276, 0x00bd}, {0x8277, 0x00fa}, +{0x8278, 0x00f7}, {0x8279, 0x00bd}, {0x827a, 0x00f7}, +{0x827b, 0x0028}, {0x827c, 0x00ce}, {0x827d, 0x0082}, +{0x827e, 0x0082}, {0x827f, 0x00ff}, {0x8280, 0x0001}, +{0x8281, 0x000f}, {0x8282, 0x0096}, {0x8283, 0x0046}, +{0x8284, 0x0084}, {0x8285, 0x000c}, {0x8286, 0x0081}, +{0x8287, 0x0004}, {0x8288, 0x0026}, {0x8289, 0x000a}, +{0x828a, 0x00b6}, {0x828b, 0x0012}, {0x828c, 0x0020}, +{0x828d, 0x0084}, {0x828e, 0x0020}, {0x828f, 0x0027}, +{0x8290, 0x00b5}, {0x8291, 0x007e}, {0x8292, 0x0084}, +{0x8293, 0x0025}, {0x8294, 0x00bd}, {0x8295, 0x00f7}, +{0x8296, 0x001f}, {0x8297, 0x007e}, {0x8298, 0x0084}, +{0x8299, 0x001f}, {0x829a, 0x0096}, {0x829b, 0x0047}, +{0x829c, 0x0084}, {0x829d, 0x00f3}, {0x829e, 0x008a}, +{0x829f, 0x0008}, {0x82a0, 0x0097}, {0x82a1, 0x0047}, +{0x82a2, 0x00de}, {0x82a3, 0x00e1}, {0x82a4, 0x00ad}, +{0x82a5, 0x0000}, {0x82a6, 0x00ce}, {0x82a7, 0x0082}, +{0x82a8, 0x00af}, {0x82a9, 0x00ff}, {0x82aa, 0x0001}, +{0x82ab, 0x000f}, {0x82ac, 0x007e}, {0x82ad, 0x0084}, +{0x82ae, 0x0025}, {0x82af, 0x0096}, {0x82b0, 0x0041}, +{0x82b1, 0x0085}, {0x82b2, 0x0010}, {0x82b3, 0x0026}, +{0x82b4, 0x0006}, {0x82b5, 0x0096}, {0x82b6, 0x0023}, +{0x82b7, 0x0085}, {0x82b8, 0x0040}, {0x82b9, 0x0027}, +{0x82ba, 0x0006}, {0x82bb, 0x00bd}, {0x82bc, 0x00ed}, +{0x82bd, 0x0000}, {0x82be, 0x007e}, {0x82bf, 0x0083}, +{0x82c0, 0x00a2}, {0x82c1, 0x00de}, {0x82c2, 0x0042}, +{0x82c3, 0x00bd}, {0x82c4, 0x00eb}, {0x82c5, 0x008e}, +{0x82c6, 0x0096}, {0x82c7, 0x0024}, {0x82c8, 0x0084}, +{0x82c9, 0x0008}, {0x82ca, 0x0027}, {0x82cb, 0x0003}, +{0x82cc, 0x007e}, {0x82cd, 0x0083}, {0x82ce, 0x00df}, +{0x82cf, 0x0096}, {0x82d0, 0x007b}, {0x82d1, 0x00d6}, +{0x82d2, 0x007c}, {0x82d3, 0x00fe}, {0x82d4, 0x008f}, +{0x82d5, 0x0056}, {0x82d6, 0x00bd}, {0x82d7, 0x00f7}, +{0x82d8, 0x00b6}, {0x82d9, 0x00fe}, {0x82da, 0x008f}, +{0x82db, 0x0050}, {0x82dc, 0x00bd}, {0x82dd, 0x00ec}, +{0x82de, 0x008e}, {0x82df, 0x00bd}, {0x82e0, 0x00fa}, +{0x82e1, 0x00f7}, {0x82e2, 0x0086}, {0x82e3, 0x0011}, +{0x82e4, 0x00c6}, {0x82e5, 0x0049}, {0x82e6, 0x00bd}, +{0x82e7, 0x00e4}, {0x82e8, 0x0012}, {0x82e9, 0x00ce}, +{0x82ea, 0x0082}, {0x82eb, 0x00ef}, {0x82ec, 0x00ff}, +{0x82ed, 0x0001}, {0x82ee, 0x000f}, {0x82ef, 0x0096}, +{0x82f0, 0x0046}, {0x82f1, 0x0084}, {0x82f2, 0x000c}, +{0x82f3, 0x0081}, {0x82f4, 0x0000}, {0x82f5, 0x0027}, +{0x82f6, 0x0017}, {0x82f7, 0x00c6}, {0x82f8, 0x0049}, +{0x82f9, 0x00bd}, {0x82fa, 0x00e4}, {0x82fb, 0x0091}, +{0x82fc, 0x0024}, {0x82fd, 0x000d}, {0x82fe, 0x00b6}, +{0x82ff, 0x0012}, {0x8300, 0x0020}, {0x8301, 0x0085}, +{0x8302, 0x0020}, {0x8303, 0x0026}, {0x8304, 0x000c}, +{0x8305, 0x00ce}, {0x8306, 0x0082}, {0x8307, 0x00c1}, +{0x8308, 0x00ff}, {0x8309, 0x0001}, {0x830a, 0x000f}, +{0x830b, 0x007e}, {0x830c, 0x0084}, {0x830d, 0x0025}, +{0x830e, 0x007e}, {0x830f, 0x0084}, {0x8310, 0x0016}, +{0x8311, 0x00fe}, {0x8312, 0x008f}, {0x8313, 0x0052}, +{0x8314, 0x00bd}, {0x8315, 0x00ec}, {0x8316, 0x008e}, +{0x8317, 0x00bd}, {0x8318, 0x00fa}, {0x8319, 0x00f7}, +{0x831a, 0x0086}, {0x831b, 0x006a}, {0x831c, 0x00c6}, +{0x831d, 0x0049}, {0x831e, 0x00bd}, {0x831f, 0x00e4}, +{0x8320, 0x0012}, {0x8321, 0x00ce}, {0x8322, 0x0083}, +{0x8323, 0x0027}, {0x8324, 0x00ff}, {0x8325, 0x0001}, +{0x8326, 0x000f}, {0x8327, 0x0096}, {0x8328, 0x0046}, +{0x8329, 0x0084}, {0x832a, 0x000c}, {0x832b, 0x0081}, +{0x832c, 0x0000}, {0x832d, 0x0027}, {0x832e, 0x000a}, +{0x832f, 0x00c6}, {0x8330, 0x0049}, {0x8331, 0x00bd}, +{0x8332, 0x00e4}, {0x8333, 0x0091}, {0x8334, 0x0025}, +{0x8335, 0x0006}, {0x8336, 0x007e}, {0x8337, 0x0084}, +{0x8338, 0x0025}, {0x8339, 0x007e}, {0x833a, 0x0084}, +{0x833b, 0x0016}, {0x833c, 0x00b6}, {0x833d, 0x0018}, +{0x833e, 0x0070}, {0x833f, 0x00bb}, {0x8340, 0x0019}, +{0x8341, 0x0070}, {0x8342, 0x002a}, {0x8343, 0x0004}, +{0x8344, 0x0081}, {0x8345, 0x00af}, {0x8346, 0x002e}, +{0x8347, 0x0019}, {0x8348, 0x0096}, {0x8349, 0x007b}, +{0x834a, 0x00f6}, {0x834b, 0x0020}, {0x834c, 0x0007}, +{0x834d, 0x00fa}, {0x834e, 0x0020}, {0x834f, 0x0027}, +{0x8350, 0x00c4}, {0x8351, 0x0038}, {0x8352, 0x0081}, +{0x8353, 0x0038}, {0x8354, 0x0027}, {0x8355, 0x000b}, +{0x8356, 0x00f6}, {0x8357, 0x0020}, {0x8358, 0x0007}, +{0x8359, 0x00fa}, {0x835a, 0x0020}, {0x835b, 0x0027}, +{0x835c, 0x00cb}, {0x835d, 0x0008}, {0x835e, 0x007e}, +{0x835f, 0x0082}, {0x8360, 0x00d3}, {0x8361, 0x00bd}, +{0x8362, 0x00f7}, {0x8363, 0x0066}, {0x8364, 0x0086}, +{0x8365, 0x0074}, {0x8366, 0x00c6}, {0x8367, 0x0049}, +{0x8368, 0x00bd}, {0x8369, 0x00e4}, {0x836a, 0x0012}, +{0x836b, 0x00ce}, {0x836c, 0x0083}, {0x836d, 0x0071}, +{0x836e, 0x00ff}, {0x836f, 0x0001}, {0x8370, 0x000f}, +{0x8371, 0x0096}, {0x8372, 0x0046}, {0x8373, 0x0084}, +{0x8374, 0x000c}, {0x8375, 0x0081}, {0x8376, 0x0008}, +{0x8377, 0x0026}, {0x8378, 0x000a}, {0x8379, 0x00c6}, +{0x837a, 0x0049}, {0x837b, 0x00bd}, {0x837c, 0x00e4}, +{0x837d, 0x0091}, {0x837e, 0x0025}, {0x837f, 0x0006}, +{0x8380, 0x007e}, {0x8381, 0x0084}, {0x8382, 0x0025}, +{0x8383, 0x007e}, {0x8384, 0x0084}, {0x8385, 0x0016}, +{0x8386, 0x00bd}, {0x8387, 0x00f7}, {0x8388, 0x003e}, +{0x8389, 0x0026}, {0x838a, 0x000e}, {0x838b, 0x00bd}, +{0x838c, 0x00e5}, {0x838d, 0x0009}, {0x838e, 0x0026}, +{0x838f, 0x0006}, {0x8390, 0x00ce}, {0x8391, 0x0082}, +{0x8392, 0x00c1}, {0x8393, 0x00ff}, {0x8394, 0x0001}, +{0x8395, 0x000f}, {0x8396, 0x007e}, {0x8397, 0x0084}, +{0x8398, 0x0025}, {0x8399, 0x00fe}, {0x839a, 0x008f}, +{0x839b, 0x0054}, {0x839c, 0x00bd}, {0x839d, 0x00ec}, +{0x839e, 0x008e}, {0x839f, 0x00bd}, {0x83a0, 0x00fa}, +{0x83a1, 0x00f7}, {0x83a2, 0x00bd}, {0x83a3, 0x00f7}, +{0x83a4, 0x0033}, {0x83a5, 0x0086}, {0x83a6, 0x000f}, +{0x83a7, 0x00c6}, {0x83a8, 0x0051}, {0x83a9, 0x00bd}, +{0x83aa, 0x00e4}, {0x83ab, 0x0012}, {0x83ac, 0x00ce}, +{0x83ad, 0x0083}, {0x83ae, 0x00b2}, {0x83af, 0x00ff}, +{0x83b0, 0x0001}, {0x83b1, 0x000f}, {0x83b2, 0x0096}, +{0x83b3, 0x0046}, {0x83b4, 0x0084}, {0x83b5, 0x000c}, +{0x83b6, 0x0081}, {0x83b7, 0x0008}, {0x83b8, 0x0026}, +{0x83b9, 0x005c}, {0x83ba, 0x00b6}, {0x83bb, 0x0012}, +{0x83bc, 0x0020}, {0x83bd, 0x0084}, {0x83be, 0x003f}, +{0x83bf, 0x0081}, {0x83c0, 0x003a}, {0x83c1, 0x0027}, +{0x83c2, 0x001c}, {0x83c3, 0x0096}, {0x83c4, 0x0023}, +{0x83c5, 0x0085}, {0x83c6, 0x0040}, {0x83c7, 0x0027}, +{0x83c8, 0x0003}, {0x83c9, 0x007e}, {0x83ca, 0x0084}, +{0x83cb, 0x0025}, {0x83cc, 0x00c6}, {0x83cd, 0x0051}, +{0x83ce, 0x00bd}, {0x83cf, 0x00e4}, {0x83d0, 0x0091}, +{0x83d1, 0x0025}, {0x83d2, 0x0003}, {0x83d3, 0x007e}, +{0x83d4, 0x0084}, {0x83d5, 0x0025}, {0x83d6, 0x00ce}, +{0x83d7, 0x0082}, {0x83d8, 0x00c1}, {0x83d9, 0x00ff}, +{0x83da, 0x0001}, {0x83db, 0x000f}, {0x83dc, 0x007e}, +{0x83dd, 0x0084}, {0x83de, 0x0025}, {0x83df, 0x00bd}, +{0x83e0, 0x00f8}, {0x83e1, 0x0037}, {0x83e2, 0x007c}, +{0x83e3, 0x0000}, {0x83e4, 0x007a}, {0x83e5, 0x00ce}, +{0x83e6, 0x0083}, {0x83e7, 0x00ee}, {0x83e8, 0x00ff}, +{0x83e9, 0x0001}, {0x83ea, 0x000f}, {0x83eb, 0x007e}, +{0x83ec, 0x0084}, {0x83ed, 0x0025}, {0x83ee, 0x0096}, +{0x83ef, 0x0046}, {0x83f0, 0x0084}, {0x83f1, 0x000c}, +{0x83f2, 0x0081}, {0x83f3, 0x0008}, {0x83f4, 0x0026}, +{0x83f5, 0x0020}, {0x83f6, 0x0096}, {0x83f7, 0x0024}, +{0x83f8, 0x0084}, {0x83f9, 0x0008}, {0x83fa, 0x0026}, +{0x83fb, 0x0029}, {0x83fc, 0x00b6}, {0x83fd, 0x0018}, +{0x83fe, 0x0082}, {0x83ff, 0x00bb}, {0x8400, 0x0019}, +{0x8401, 0x0082}, {0x8402, 0x00b1}, {0x8403, 0x0001}, +{0x8404, 0x003b}, {0x8405, 0x0022}, {0x8406, 0x0009}, +{0x8407, 0x00b6}, {0x8408, 0x0012}, {0x8409, 0x0020}, +{0x840a, 0x0084}, {0x840b, 0x0037}, {0x840c, 0x0081}, +{0x840d, 0x0032}, {0x840e, 0x0027}, {0x840f, 0x0015}, +{0x8410, 0x00bd}, {0x8411, 0x00f8}, {0x8412, 0x0044}, +{0x8413, 0x007e}, {0x8414, 0x0082}, {0x8415, 0x00c1}, +{0x8416, 0x00bd}, {0x8417, 0x00f7}, {0x8418, 0x001f}, +{0x8419, 0x00bd}, {0x841a, 0x00f8}, {0x841b, 0x0044}, +{0x841c, 0x00bd}, {0x841d, 0x00fc}, {0x841e, 0x0029}, +{0x841f, 0x00ce}, {0x8420, 0x0082}, {0x8421, 0x0025}, +{0x8422, 0x00ff}, {0x8423, 0x0001}, {0x8424, 0x000f}, +{0x8425, 0x0039}, {0x8426, 0x0096}, {0x8427, 0x0047}, +{0x8428, 0x0084}, {0x8429, 0x00fc}, {0x842a, 0x008a}, +{0x842b, 0x0000}, {0x842c, 0x0097}, {0x842d, 0x0047}, +{0x842e, 0x00ce}, {0x842f, 0x0084}, {0x8430, 0x0034}, +{0x8431, 0x00ff}, {0x8432, 0x0001}, {0x8433, 0x0011}, +{0x8434, 0x0096}, {0x8435, 0x0046}, {0x8436, 0x0084}, +{0x8437, 0x0003}, {0x8438, 0x0081}, {0x8439, 0x0002}, +{0x843a, 0x0027}, {0x843b, 0x0003}, {0x843c, 0x007e}, +{0x843d, 0x0085}, {0x843e, 0x001e}, {0x843f, 0x0096}, +{0x8440, 0x0047}, {0x8441, 0x0084}, {0x8442, 0x00fc}, +{0x8443, 0x008a}, {0x8444, 0x0002}, {0x8445, 0x0097}, +{0x8446, 0x0047}, {0x8447, 0x00de}, {0x8448, 0x00e1}, +{0x8449, 0x00ad}, {0x844a, 0x0000}, {0x844b, 0x0086}, +{0x844c, 0x0001}, {0x844d, 0x00b7}, {0x844e, 0x0012}, +{0x844f, 0x0051}, {0x8450, 0x00bd}, {0x8451, 0x00f7}, +{0x8452, 0x0014}, {0x8453, 0x00b6}, {0x8454, 0x0010}, +{0x8455, 0x0031}, {0x8456, 0x0084}, {0x8457, 0x00fd}, +{0x8458, 0x00b7}, {0x8459, 0x0010}, {0x845a, 0x0031}, +{0x845b, 0x00bd}, {0x845c, 0x00f8}, {0x845d, 0x001e}, +{0x845e, 0x0096}, {0x845f, 0x0081}, {0x8460, 0x00d6}, +{0x8461, 0x0082}, {0x8462, 0x00fe}, {0x8463, 0x008f}, +{0x8464, 0x005a}, {0x8465, 0x00bd}, {0x8466, 0x00f7}, +{0x8467, 0x00b6}, {0x8468, 0x00fe}, {0x8469, 0x008f}, +{0x846a, 0x005c}, {0x846b, 0x00bd}, {0x846c, 0x00ec}, +{0x846d, 0x008e}, {0x846e, 0x00bd}, {0x846f, 0x00fa}, +{0x8470, 0x00f7}, {0x8471, 0x0086}, {0x8472, 0x0008}, +{0x8473, 0x00d6}, {0x8474, 0x0000}, {0x8475, 0x00c5}, +{0x8476, 0x0010}, {0x8477, 0x0026}, {0x8478, 0x0002}, +{0x8479, 0x008b}, {0x847a, 0x0020}, {0x847b, 0x00c6}, +{0x847c, 0x0051}, {0x847d, 0x00bd}, {0x847e, 0x00e4}, +{0x847f, 0x0012}, {0x8480, 0x00ce}, {0x8481, 0x0084}, +{0x8482, 0x0086}, {0x8483, 0x00ff}, {0x8484, 0x0001}, +{0x8485, 0x0011}, {0x8486, 0x0096}, {0x8487, 0x0046}, +{0x8488, 0x0084}, {0x8489, 0x0003}, {0x848a, 0x0081}, +{0x848b, 0x0002}, {0x848c, 0x0027}, {0x848d, 0x0003}, +{0x848e, 0x007e}, {0x848f, 0x0085}, {0x8490, 0x000f}, +{0x8491, 0x00c6}, {0x8492, 0x0051}, {0x8493, 0x00bd}, +{0x8494, 0x00e4}, {0x8495, 0x0091}, {0x8496, 0x0025}, +{0x8497, 0x0003}, {0x8498, 0x007e}, {0x8499, 0x0085}, +{0x849a, 0x001e}, {0x849b, 0x0096}, {0x849c, 0x0044}, +{0x849d, 0x0085}, {0x849e, 0x0010}, {0x849f, 0x0026}, +{0x84a0, 0x000a}, {0x84a1, 0x00b6}, {0x84a2, 0x0012}, +{0x84a3, 0x0050}, {0x84a4, 0x00ba}, {0x84a5, 0x0001}, +{0x84a6, 0x003c}, {0x84a7, 0x0085}, {0x84a8, 0x0010}, +{0x84a9, 0x0027}, {0x84aa, 0x00a8}, {0x84ab, 0x00bd}, +{0x84ac, 0x00f7}, {0x84ad, 0x0066}, {0x84ae, 0x00ce}, +{0x84af, 0x0084}, {0x84b0, 0x00b7}, {0x84b1, 0x00ff}, +{0x84b2, 0x0001}, {0x84b3, 0x0011}, {0x84b4, 0x007e}, +{0x84b5, 0x0085}, {0x84b6, 0x001e}, {0x84b7, 0x0096}, +{0x84b8, 0x0046}, {0x84b9, 0x0084}, {0x84ba, 0x0003}, +{0x84bb, 0x0081}, {0x84bc, 0x0002}, {0x84bd, 0x0026}, +{0x84be, 0x0050}, {0x84bf, 0x00b6}, {0x84c0, 0x0012}, +{0x84c1, 0x0030}, {0x84c2, 0x0084}, {0x84c3, 0x0003}, +{0x84c4, 0x0081}, {0x84c5, 0x0001}, {0x84c6, 0x0027}, +{0x84c7, 0x0003}, {0x84c8, 0x007e}, {0x84c9, 0x0085}, +{0x84ca, 0x001e}, {0x84cb, 0x0096}, {0x84cc, 0x0044}, +{0x84cd, 0x0085}, {0x84ce, 0x0010}, {0x84cf, 0x0026}, +{0x84d0, 0x0013}, {0x84d1, 0x00b6}, {0x84d2, 0x0012}, +{0x84d3, 0x0050}, {0x84d4, 0x00ba}, {0x84d5, 0x0001}, +{0x84d6, 0x003c}, {0x84d7, 0x0085}, {0x84d8, 0x0010}, +{0x84d9, 0x0026}, {0x84da, 0x0009}, {0x84db, 0x00ce}, +{0x84dc, 0x0084}, {0x84dd, 0x0053}, {0x84de, 0x00ff}, +{0x84df, 0x0001}, {0x84e0, 0x0011}, {0x84e1, 0x007e}, +{0x84e2, 0x0085}, {0x84e3, 0x001e}, {0x84e4, 0x00b6}, +{0x84e5, 0x0010}, {0x84e6, 0x0031}, {0x84e7, 0x008a}, +{0x84e8, 0x0002}, {0x84e9, 0x00b7}, {0x84ea, 0x0010}, +{0x84eb, 0x0031}, {0x84ec, 0x00bd}, {0x84ed, 0x0085}, +{0x84ee, 0x001f}, {0x84ef, 0x00bd}, {0x84f0, 0x00f8}, +{0x84f1, 0x0037}, {0x84f2, 0x007c}, {0x84f3, 0x0000}, +{0x84f4, 0x0080}, {0x84f5, 0x00ce}, {0x84f6, 0x0084}, +{0x84f7, 0x00fe}, {0x84f8, 0x00ff}, {0x84f9, 0x0001}, +{0x84fa, 0x0011}, {0x84fb, 0x007e}, {0x84fc, 0x0085}, +{0x84fd, 0x001e}, {0x84fe, 0x0096}, {0x84ff, 0x0046}, +{0x8500, 0x0084}, {0x8501, 0x0003}, {0x8502, 0x0081}, +{0x8503, 0x0002}, {0x8504, 0x0026}, {0x8505, 0x0009}, +{0x8506, 0x00b6}, {0x8507, 0x0012}, {0x8508, 0x0030}, +{0x8509, 0x0084}, {0x850a, 0x0003}, {0x850b, 0x0081}, +{0x850c, 0x0001}, {0x850d, 0x0027}, {0x850e, 0x000f}, +{0x850f, 0x00bd}, {0x8510, 0x00f8}, {0x8511, 0x0044}, +{0x8512, 0x00bd}, {0x8513, 0x00f7}, {0x8514, 0x000b}, +{0x8515, 0x00bd}, {0x8516, 0x00fc}, {0x8517, 0x0029}, +{0x8518, 0x00ce}, {0x8519, 0x0084}, {0x851a, 0x0026}, +{0x851b, 0x00ff}, {0x851c, 0x0001}, {0x851d, 0x0011}, +{0x851e, 0x0039}, {0x851f, 0x00d6}, {0x8520, 0x0022}, +{0x8521, 0x00c4}, {0x8522, 0x000f}, {0x8523, 0x00b6}, +{0x8524, 0x0012}, {0x8525, 0x0030}, {0x8526, 0x00ba}, +{0x8527, 0x0012}, {0x8528, 0x0032}, {0x8529, 0x0084}, +{0x852a, 0x0004}, {0x852b, 0x0027}, {0x852c, 0x000d}, +{0x852d, 0x0096}, {0x852e, 0x0022}, {0x852f, 0x0085}, +{0x8530, 0x0004}, {0x8531, 0x0027}, {0x8532, 0x0005}, +{0x8533, 0x00ca}, {0x8534, 0x0010}, {0x8535, 0x007e}, +{0x8536, 0x0085}, {0x8537, 0x003a}, {0x8538, 0x00ca}, +{0x8539, 0x0020}, {0x853a, 0x00d7}, {0x853b, 0x0022}, +{0x853c, 0x0039}, {0x853d, 0x0086}, {0x853e, 0x0000}, +{0x853f, 0x0097}, {0x8540, 0x0083}, {0x8541, 0x0018}, +{0x8542, 0x00ce}, {0x8543, 0x001c}, {0x8544, 0x0000}, +{0x8545, 0x00bd}, {0x8546, 0x00eb}, {0x8547, 0x0046}, +{0x8548, 0x0096}, {0x8549, 0x0057}, {0x854a, 0x0085}, +{0x854b, 0x0001}, {0x854c, 0x0027}, {0x854d, 0x0002}, +{0x854e, 0x004f}, {0x854f, 0x0039}, {0x8550, 0x0085}, +{0x8551, 0x0002}, {0x8552, 0x0027}, {0x8553, 0x0001}, +{0x8554, 0x0039}, {0x8555, 0x007f}, {0x8556, 0x008f}, +{0x8557, 0x007d}, {0x8558, 0x0086}, {0x8559, 0x0004}, +{0x855a, 0x00b7}, {0x855b, 0x0012}, {0x855c, 0x0004}, +{0x855d, 0x0086}, {0x855e, 0x0008}, {0x855f, 0x00b7}, +{0x8560, 0x0012}, {0x8561, 0x0007}, {0x8562, 0x0086}, +{0x8563, 0x0010}, {0x8564, 0x00b7}, {0x8565, 0x0012}, +{0x8566, 0x000c}, {0x8567, 0x0086}, {0x8568, 0x0007}, +{0x8569, 0x00b7}, {0x856a, 0x0012}, {0x856b, 0x0006}, +{0x856c, 0x00b6}, {0x856d, 0x008f}, {0x856e, 0x007d}, +{0x856f, 0x00b7}, {0x8570, 0x0012}, {0x8571, 0x0070}, +{0x8572, 0x0086}, {0x8573, 0x0001}, {0x8574, 0x00ba}, +{0x8575, 0x0012}, {0x8576, 0x0004}, {0x8577, 0x00b7}, +{0x8578, 0x0012}, {0x8579, 0x0004}, {0x857a, 0x0001}, +{0x857b, 0x0001}, {0x857c, 0x0001}, {0x857d, 0x0001}, +{0x857e, 0x0001}, {0x857f, 0x0001}, {0x8580, 0x00b6}, +{0x8581, 0x0012}, {0x8582, 0x0004}, {0x8583, 0x0084}, +{0x8584, 0x00fe}, {0x8585, 0x008a}, {0x8586, 0x0002}, +{0x8587, 0x00b7}, {0x8588, 0x0012}, {0x8589, 0x0004}, +{0x858a, 0x0001}, {0x858b, 0x0001}, {0x858c, 0x0001}, +{0x858d, 0x0001}, {0x858e, 0x0001}, {0x858f, 0x0001}, +{0x8590, 0x0086}, {0x8591, 0x00fd}, {0x8592, 0x00b4}, +{0x8593, 0x0012}, {0x8594, 0x0004}, {0x8595, 0x00b7}, +{0x8596, 0x0012}, {0x8597, 0x0004}, {0x8598, 0x00b6}, +{0x8599, 0x0012}, {0x859a, 0x0000}, {0x859b, 0x0084}, +{0x859c, 0x0008}, {0x859d, 0x0081}, {0x859e, 0x0008}, +{0x859f, 0x0027}, {0x85a0, 0x0016}, {0x85a1, 0x00b6}, +{0x85a2, 0x008f}, {0x85a3, 0x007d}, {0x85a4, 0x0081}, +{0x85a5, 0x000c}, {0x85a6, 0x0027}, {0x85a7, 0x0008}, +{0x85a8, 0x008b}, {0x85a9, 0x0004}, {0x85aa, 0x00b7}, +{0x85ab, 0x008f}, {0x85ac, 0x007d}, {0x85ad, 0x007e}, +{0x85ae, 0x0085}, {0x85af, 0x006c}, {0x85b0, 0x0086}, +{0x85b1, 0x0003}, {0x85b2, 0x0097}, {0x85b3, 0x0040}, +{0x85b4, 0x007e}, {0x85b5, 0x0089}, {0x85b6, 0x006e}, +{0x85b7, 0x0086}, {0x85b8, 0x0007}, {0x85b9, 0x00b7}, +{0x85ba, 0x0012}, {0x85bb, 0x0006}, {0x85bc, 0x005f}, +{0x85bd, 0x00f7}, {0x85be, 0x008f}, {0x85bf, 0x0082}, +{0x85c0, 0x005f}, {0x85c1, 0x00f7}, {0x85c2, 0x008f}, +{0x85c3, 0x007f}, {0x85c4, 0x00f7}, {0x85c5, 0x008f}, +{0x85c6, 0x0070}, {0x85c7, 0x00f7}, {0x85c8, 0x008f}, +{0x85c9, 0x0071}, {0x85ca, 0x00f7}, {0x85cb, 0x008f}, +{0x85cc, 0x0072}, {0x85cd, 0x00f7}, {0x85ce, 0x008f}, +{0x85cf, 0x0073}, {0x85d0, 0x00f7}, {0x85d1, 0x008f}, +{0x85d2, 0x0074}, {0x85d3, 0x00f7}, {0x85d4, 0x008f}, +{0x85d5, 0x0075}, {0x85d6, 0x00f7}, {0x85d7, 0x008f}, +{0x85d8, 0x0076}, {0x85d9, 0x00f7}, {0x85da, 0x008f}, +{0x85db, 0x0077}, {0x85dc, 0x00f7}, {0x85dd, 0x008f}, +{0x85de, 0x0078}, {0x85df, 0x00f7}, {0x85e0, 0x008f}, +{0x85e1, 0x0079}, {0x85e2, 0x00f7}, {0x85e3, 0x008f}, +{0x85e4, 0x007a}, {0x85e5, 0x00f7}, {0x85e6, 0x008f}, +{0x85e7, 0x007b}, {0x85e8, 0x00b6}, {0x85e9, 0x0012}, +{0x85ea, 0x0004}, {0x85eb, 0x008a}, {0x85ec, 0x0010}, +{0x85ed, 0x00b7}, {0x85ee, 0x0012}, {0x85ef, 0x0004}, +{0x85f0, 0x0086}, {0x85f1, 0x00e4}, {0x85f2, 0x00b7}, +{0x85f3, 0x0012}, {0x85f4, 0x0070}, {0x85f5, 0x00b7}, +{0x85f6, 0x0012}, {0x85f7, 0x0007}, {0x85f8, 0x00f7}, +{0x85f9, 0x0012}, {0x85fa, 0x0005}, {0x85fb, 0x00f7}, +{0x85fc, 0x0012}, {0x85fd, 0x0009}, {0x85fe, 0x0086}, +{0x85ff, 0x0008}, {0x8600, 0x00ba}, {0x8601, 0x0012}, +{0x8602, 0x0004}, {0x8603, 0x00b7}, {0x8604, 0x0012}, +{0x8605, 0x0004}, {0x8606, 0x0086}, {0x8607, 0x00f7}, +{0x8608, 0x00b4}, {0x8609, 0x0012}, {0x860a, 0x0004}, +{0x860b, 0x00b7}, {0x860c, 0x0012}, {0x860d, 0x0004}, +{0x860e, 0x0001}, {0x860f, 0x0001}, {0x8610, 0x0001}, +{0x8611, 0x0001}, {0x8612, 0x0001}, {0x8613, 0x0001}, +{0x8614, 0x00b6}, {0x8615, 0x0012}, {0x8616, 0x0008}, +{0x8617, 0x0027}, {0x8618, 0x007f}, {0x8619, 0x0081}, +{0x861a, 0x0080}, {0x861b, 0x0026}, {0x861c, 0x000b}, +{0x861d, 0x0086}, {0x861e, 0x0008}, {0x861f, 0x00ce}, +{0x8620, 0x008f}, {0x8621, 0x0079}, {0x8622, 0x00bd}, +{0x8623, 0x0089}, {0x8624, 0x007b}, {0x8625, 0x007e}, +{0x8626, 0x0086}, {0x8627, 0x008e}, {0x8628, 0x0081}, +{0x8629, 0x0040}, {0x862a, 0x0026}, {0x862b, 0x000b}, +{0x862c, 0x0086}, {0x862d, 0x0004}, {0x862e, 0x00ce}, +{0x862f, 0x008f}, {0x8630, 0x0076}, {0x8631, 0x00bd}, +{0x8632, 0x0089}, {0x8633, 0x007b}, {0x8634, 0x007e}, +{0x8635, 0x0086}, {0x8636, 0x008e}, {0x8637, 0x0081}, +{0x8638, 0x0020}, {0x8639, 0x0026}, {0x863a, 0x000b}, +{0x863b, 0x0086}, {0x863c, 0x0002}, {0x863d, 0x00ce}, +{0x863e, 0x008f}, {0x863f, 0x0073}, {0x8640, 0x00bd}, +{0x8641, 0x0089}, {0x8642, 0x007b}, {0x8643, 0x007e}, +{0x8644, 0x0086}, {0x8645, 0x008e}, {0x8646, 0x0081}, +{0x8647, 0x0010}, {0x8648, 0x0026}, {0x8649, 0x000b}, +{0x864a, 0x0086}, {0x864b, 0x0001}, {0x864c, 0x00ce}, +{0x864d, 0x008f}, {0x864e, 0x0070}, {0x864f, 0x00bd}, +{0x8650, 0x0089}, {0x8651, 0x007b}, {0x8652, 0x007e}, +{0x8653, 0x0086}, {0x8654, 0x008e}, {0x8655, 0x0081}, +{0x8656, 0x0008}, {0x8657, 0x0026}, {0x8658, 0x000b}, +{0x8659, 0x0086}, {0x865a, 0x0008}, {0x865b, 0x00ce}, +{0x865c, 0x008f}, {0x865d, 0x0079}, {0x865e, 0x00bd}, +{0x865f, 0x0089}, {0x8660, 0x007f}, {0x8661, 0x007e}, +{0x8662, 0x0086}, {0x8663, 0x008e}, {0x8664, 0x0081}, +{0x8665, 0x0004}, {0x8666, 0x0026}, {0x8667, 0x000b}, +{0x8668, 0x0086}, {0x8669, 0x0004}, {0x866a, 0x00ce}, +{0x866b, 0x008f}, {0x866c, 0x0076}, {0x866d, 0x00bd}, +{0x866e, 0x0089}, {0x866f, 0x007f}, {0x8670, 0x007e}, +{0x8671, 0x0086}, {0x8672, 0x008e}, {0x8673, 0x0081}, +{0x8674, 0x0002}, {0x8675, 0x0026}, {0x8676, 0x000b}, +{0x8677, 0x008a}, {0x8678, 0x0002}, {0x8679, 0x00ce}, +{0x867a, 0x008f}, {0x867b, 0x0073}, {0x867c, 0x00bd}, +{0x867d, 0x0089}, {0x867e, 0x007f}, {0x867f, 0x007e}, +{0x8680, 0x0086}, {0x8681, 0x008e}, {0x8682, 0x0081}, +{0x8683, 0x0001}, {0x8684, 0x0026}, {0x8685, 0x0008}, +{0x8686, 0x0086}, {0x8687, 0x0001}, {0x8688, 0x00ce}, +{0x8689, 0x008f}, {0x868a, 0x0070}, {0x868b, 0x00bd}, +{0x868c, 0x0089}, {0x868d, 0x007f}, {0x868e, 0x00b6}, +{0x868f, 0x008f}, {0x8690, 0x007f}, {0x8691, 0x0081}, +{0x8692, 0x000f}, {0x8693, 0x0026}, {0x8694, 0x0003}, +{0x8695, 0x007e}, {0x8696, 0x0087}, {0x8697, 0x0047}, +{0x8698, 0x00b6}, {0x8699, 0x0012}, {0x869a, 0x0009}, +{0x869b, 0x0084}, {0x869c, 0x0003}, {0x869d, 0x0081}, +{0x869e, 0x0003}, {0x869f, 0x0027}, {0x86a0, 0x0006}, +{0x86a1, 0x007c}, {0x86a2, 0x0012}, {0x86a3, 0x0009}, +{0x86a4, 0x007e}, {0x86a5, 0x0085}, {0x86a6, 0x00fe}, +{0x86a7, 0x00b6}, {0x86a8, 0x0012}, {0x86a9, 0x0006}, +{0x86aa, 0x0084}, {0x86ab, 0x0007}, {0x86ac, 0x0081}, +{0x86ad, 0x0007}, {0x86ae, 0x0027}, {0x86af, 0x0008}, +{0x86b0, 0x008b}, {0x86b1, 0x0001}, {0x86b2, 0x00b7}, +{0x86b3, 0x0012}, {0x86b4, 0x0006}, {0x86b5, 0x007e}, +{0x86b6, 0x0086}, {0x86b7, 0x00d5}, {0x86b8, 0x00b6}, +{0x86b9, 0x008f}, {0x86ba, 0x0082}, {0x86bb, 0x0026}, +{0x86bc, 0x000a}, {0x86bd, 0x007c}, {0x86be, 0x008f}, +{0x86bf, 0x0082}, {0x86c0, 0x004f}, {0x86c1, 0x00b7}, +{0x86c2, 0x0012}, {0x86c3, 0x0006}, {0x86c4, 0x007e}, +{0x86c5, 0x0085}, {0x86c6, 0x00c0}, {0x86c7, 0x00b6}, +{0x86c8, 0x0012}, {0x86c9, 0x0006}, {0x86ca, 0x0084}, +{0x86cb, 0x003f}, {0x86cc, 0x0081}, {0x86cd, 0x003f}, +{0x86ce, 0x0027}, {0x86cf, 0x0010}, {0x86d0, 0x008b}, +{0x86d1, 0x0008}, {0x86d2, 0x00b7}, {0x86d3, 0x0012}, +{0x86d4, 0x0006}, {0x86d5, 0x00b6}, {0x86d6, 0x0012}, +{0x86d7, 0x0009}, {0x86d8, 0x0084}, {0x86d9, 0x00fc}, +{0x86da, 0x00b7}, {0x86db, 0x0012}, {0x86dc, 0x0009}, +{0x86dd, 0x007e}, {0x86de, 0x0085}, {0x86df, 0x00fe}, +{0x86e0, 0x00ce}, {0x86e1, 0x008f}, {0x86e2, 0x0070}, +{0x86e3, 0x0018}, {0x86e4, 0x00ce}, {0x86e5, 0x008f}, +{0x86e6, 0x0084}, {0x86e7, 0x00c6}, {0x86e8, 0x000c}, +{0x86e9, 0x00bd}, {0x86ea, 0x0089}, {0x86eb, 0x006f}, +{0x86ec, 0x00ce}, {0x86ed, 0x008f}, {0x86ee, 0x0084}, +{0x86ef, 0x0018}, {0x86f0, 0x00ce}, {0x86f1, 0x008f}, +{0x86f2, 0x0070}, {0x86f3, 0x00c6}, {0x86f4, 0x000c}, +{0x86f5, 0x00bd}, {0x86f6, 0x0089}, {0x86f7, 0x006f}, +{0x86f8, 0x00d6}, {0x86f9, 0x0083}, {0x86fa, 0x00c1}, +{0x86fb, 0x004f}, {0x86fc, 0x002d}, {0x86fd, 0x0003}, +{0x86fe, 0x007e}, {0x86ff, 0x0087}, {0x8700, 0x0040}, +{0x8701, 0x00b6}, {0x8702, 0x008f}, {0x8703, 0x007f}, +{0x8704, 0x0081}, {0x8705, 0x0007}, {0x8706, 0x0027}, +{0x8707, 0x000f}, {0x8708, 0x0081}, {0x8709, 0x000b}, +{0x870a, 0x0027}, {0x870b, 0x0015}, {0x870c, 0x0081}, +{0x870d, 0x000d}, {0x870e, 0x0027}, {0x870f, 0x001b}, +{0x8710, 0x0081}, {0x8711, 0x000e}, {0x8712, 0x0027}, +{0x8713, 0x0021}, {0x8714, 0x007e}, {0x8715, 0x0087}, +{0x8716, 0x0040}, {0x8717, 0x00f7}, {0x8718, 0x008f}, +{0x8719, 0x007b}, {0x871a, 0x0086}, {0x871b, 0x0002}, +{0x871c, 0x00b7}, {0x871d, 0x008f}, {0x871e, 0x007a}, +{0x871f, 0x0020}, {0x8720, 0x001c}, {0x8721, 0x00f7}, +{0x8722, 0x008f}, {0x8723, 0x0078}, {0x8724, 0x0086}, +{0x8725, 0x0002}, {0x8726, 0x00b7}, {0x8727, 0x008f}, +{0x8728, 0x0077}, {0x8729, 0x0020}, {0x872a, 0x0012}, +{0x872b, 0x00f7}, {0x872c, 0x008f}, {0x872d, 0x0075}, +{0x872e, 0x0086}, {0x872f, 0x0002}, {0x8730, 0x00b7}, +{0x8731, 0x008f}, {0x8732, 0x0074}, {0x8733, 0x0020}, +{0x8734, 0x0008}, {0x8735, 0x00f7}, {0x8736, 0x008f}, +{0x8737, 0x0072}, {0x8738, 0x0086}, {0x8739, 0x0002}, +{0x873a, 0x00b7}, {0x873b, 0x008f}, {0x873c, 0x0071}, +{0x873d, 0x007e}, {0x873e, 0x0087}, {0x873f, 0x0047}, +{0x8740, 0x0086}, {0x8741, 0x0004}, {0x8742, 0x0097}, +{0x8743, 0x0040}, {0x8744, 0x007e}, {0x8745, 0x0089}, +{0x8746, 0x006e}, {0x8747, 0x00ce}, {0x8748, 0x008f}, +{0x8749, 0x0072}, {0x874a, 0x00bd}, {0x874b, 0x0089}, +{0x874c, 0x00f7}, {0x874d, 0x00ce}, {0x874e, 0x008f}, +{0x874f, 0x0075}, {0x8750, 0x00bd}, {0x8751, 0x0089}, +{0x8752, 0x00f7}, {0x8753, 0x00ce}, {0x8754, 0x008f}, +{0x8755, 0x0078}, {0x8756, 0x00bd}, {0x8757, 0x0089}, +{0x8758, 0x00f7}, {0x8759, 0x00ce}, {0x875a, 0x008f}, +{0x875b, 0x007b}, {0x875c, 0x00bd}, {0x875d, 0x0089}, +{0x875e, 0x00f7}, {0x875f, 0x004f}, {0x8760, 0x00b7}, +{0x8761, 0x008f}, {0x8762, 0x007d}, {0x8763, 0x00b7}, +{0x8764, 0x008f}, {0x8765, 0x0081}, {0x8766, 0x00b6}, +{0x8767, 0x008f}, {0x8768, 0x0072}, {0x8769, 0x0027}, +{0x876a, 0x0047}, {0x876b, 0x007c}, {0x876c, 0x008f}, +{0x876d, 0x007d}, {0x876e, 0x00b6}, {0x876f, 0x008f}, +{0x8770, 0x0075}, {0x8771, 0x0027}, {0x8772, 0x003f}, +{0x8773, 0x007c}, {0x8774, 0x008f}, {0x8775, 0x007d}, +{0x8776, 0x00b6}, {0x8777, 0x008f}, {0x8778, 0x0078}, +{0x8779, 0x0027}, {0x877a, 0x0037}, {0x877b, 0x007c}, +{0x877c, 0x008f}, {0x877d, 0x007d}, {0x877e, 0x00b6}, +{0x877f, 0x008f}, {0x8780, 0x007b}, {0x8781, 0x0027}, +{0x8782, 0x002f}, {0x8783, 0x007f}, {0x8784, 0x008f}, +{0x8785, 0x007d}, {0x8786, 0x007c}, {0x8787, 0x008f}, +{0x8788, 0x0081}, {0x8789, 0x007a}, {0x878a, 0x008f}, +{0x878b, 0x0072}, {0x878c, 0x0027}, {0x878d, 0x001b}, +{0x878e, 0x007c}, {0x878f, 0x008f}, {0x8790, 0x007d}, +{0x8791, 0x007a}, {0x8792, 0x008f}, {0x8793, 0x0075}, +{0x8794, 0x0027}, {0x8795, 0x0016}, {0x8796, 0x007c}, +{0x8797, 0x008f}, {0x8798, 0x007d}, {0x8799, 0x007a}, +{0x879a, 0x008f}, {0x879b, 0x0078}, {0x879c, 0x0027}, +{0x879d, 0x0011}, {0x879e, 0x007c}, {0x879f, 0x008f}, +{0x87a0, 0x007d}, {0x87a1, 0x007a}, {0x87a2, 0x008f}, +{0x87a3, 0x007b}, {0x87a4, 0x0027}, {0x87a5, 0x000c}, +{0x87a6, 0x007e}, {0x87a7, 0x0087}, {0x87a8, 0x0083}, +{0x87a9, 0x007a}, {0x87aa, 0x008f}, {0x87ab, 0x0075}, +{0x87ac, 0x007a}, {0x87ad, 0x008f}, {0x87ae, 0x0078}, +{0x87af, 0x007a}, {0x87b0, 0x008f}, {0x87b1, 0x007b}, +{0x87b2, 0x00ce}, {0x87b3, 0x00c1}, {0x87b4, 0x00fc}, +{0x87b5, 0x00f6}, {0x87b6, 0x008f}, {0x87b7, 0x007d}, +{0x87b8, 0x003a}, {0x87b9, 0x00a6}, {0x87ba, 0x0000}, +{0x87bb, 0x00b7}, {0x87bc, 0x0012}, {0x87bd, 0x0070}, +{0x87be, 0x00b6}, {0x87bf, 0x008f}, {0x87c0, 0x0072}, +{0x87c1, 0x0026}, {0x87c2, 0x0003}, {0x87c3, 0x007e}, +{0x87c4, 0x0087}, {0x87c5, 0x00fa}, {0x87c6, 0x00b6}, +{0x87c7, 0x008f}, {0x87c8, 0x0075}, {0x87c9, 0x0026}, +{0x87ca, 0x000a}, {0x87cb, 0x0018}, {0x87cc, 0x00ce}, +{0x87cd, 0x008f}, {0x87ce, 0x0073}, {0x87cf, 0x00bd}, +{0x87d0, 0x0089}, {0x87d1, 0x00d5}, {0x87d2, 0x007e}, +{0x87d3, 0x0087}, {0x87d4, 0x00fa}, {0x87d5, 0x00b6}, +{0x87d6, 0x008f}, {0x87d7, 0x0078}, {0x87d8, 0x0026}, +{0x87d9, 0x000a}, {0x87da, 0x0018}, {0x87db, 0x00ce}, +{0x87dc, 0x008f}, {0x87dd, 0x0076}, {0x87de, 0x00bd}, +{0x87df, 0x0089}, {0x87e0, 0x00d5}, {0x87e1, 0x007e}, +{0x87e2, 0x0087}, {0x87e3, 0x00fa}, {0x87e4, 0x00b6}, +{0x87e5, 0x008f}, {0x87e6, 0x007b}, {0x87e7, 0x0026}, +{0x87e8, 0x000a}, {0x87e9, 0x0018}, {0x87ea, 0x00ce}, +{0x87eb, 0x008f}, {0x87ec, 0x0079}, {0x87ed, 0x00bd}, +{0x87ee, 0x0089}, {0x87ef, 0x00d5}, {0x87f0, 0x007e}, +{0x87f1, 0x0087}, {0x87f2, 0x00fa}, {0x87f3, 0x0086}, +{0x87f4, 0x0005}, {0x87f5, 0x0097}, {0x87f6, 0x0040}, +{0x87f7, 0x007e}, {0x87f8, 0x0089}, {0x87f9, 0x006e}, +{0x87fa, 0x00b6}, {0x87fb, 0x008f}, {0x87fc, 0x0075}, +{0x87fd, 0x0081}, {0x87fe, 0x0007}, {0x87ff, 0x002e}, +{0x8800, 0x00f2}, {0x8801, 0x00f6}, {0x8802, 0x0012}, +{0x8803, 0x0006}, {0x8804, 0x00c4}, {0x8805, 0x00f8}, +{0x8806, 0x001b}, {0x8807, 0x00b7}, {0x8808, 0x0012}, +{0x8809, 0x0006}, {0x880a, 0x00b6}, {0x880b, 0x008f}, +{0x880c, 0x0078}, {0x880d, 0x0081}, {0x880e, 0x0007}, +{0x880f, 0x002e}, {0x8810, 0x00e2}, {0x8811, 0x0048}, +{0x8812, 0x0048}, {0x8813, 0x0048}, {0x8814, 0x00f6}, +{0x8815, 0x0012}, {0x8816, 0x0006}, {0x8817, 0x00c4}, +{0x8818, 0x00c7}, {0x8819, 0x001b}, {0x881a, 0x00b7}, +{0x881b, 0x0012}, {0x881c, 0x0006}, {0x881d, 0x00b6}, +{0x881e, 0x008f}, {0x881f, 0x007b}, {0x8820, 0x0081}, +{0x8821, 0x0007}, {0x8822, 0x002e}, {0x8823, 0x00cf}, +{0x8824, 0x00f6}, {0x8825, 0x0012}, {0x8826, 0x0005}, +{0x8827, 0x00c4}, {0x8828, 0x00f8}, {0x8829, 0x001b}, +{0x882a, 0x00b7}, {0x882b, 0x0012}, {0x882c, 0x0005}, +{0x882d, 0x0086}, {0x882e, 0x0000}, {0x882f, 0x00f6}, +{0x8830, 0x008f}, {0x8831, 0x0071}, {0x8832, 0x00bd}, +{0x8833, 0x0089}, {0x8834, 0x0094}, {0x8835, 0x0086}, +{0x8836, 0x0001}, {0x8837, 0x00f6}, {0x8838, 0x008f}, +{0x8839, 0x0074}, {0x883a, 0x00bd}, {0x883b, 0x0089}, +{0x883c, 0x0094}, {0x883d, 0x0086}, {0x883e, 0x0002}, +{0x883f, 0x00f6}, {0x8840, 0x008f}, {0x8841, 0x0077}, +{0x8842, 0x00bd}, {0x8843, 0x0089}, {0x8844, 0x0094}, +{0x8845, 0x0086}, {0x8846, 0x0003}, {0x8847, 0x00f6}, +{0x8848, 0x008f}, {0x8849, 0x007a}, {0x884a, 0x00bd}, +{0x884b, 0x0089}, {0x884c, 0x0094}, {0x884d, 0x00ce}, +{0x884e, 0x008f}, {0x884f, 0x0070}, {0x8850, 0x00a6}, +{0x8851, 0x0001}, {0x8852, 0x0081}, {0x8853, 0x0001}, +{0x8854, 0x0027}, {0x8855, 0x0007}, {0x8856, 0x0081}, +{0x8857, 0x0003}, {0x8858, 0x0027}, {0x8859, 0x0003}, +{0x885a, 0x007e}, {0x885b, 0x0088}, {0x885c, 0x0066}, +{0x885d, 0x00a6}, {0x885e, 0x0000}, {0x885f, 0x00b8}, +{0x8860, 0x008f}, {0x8861, 0x0081}, {0x8862, 0x0084}, +{0x8863, 0x0001}, {0x8864, 0x0026}, {0x8865, 0x000b}, +{0x8866, 0x008c}, {0x8867, 0x008f}, {0x8868, 0x0079}, +{0x8869, 0x002c}, {0x886a, 0x000e}, {0x886b, 0x0008}, +{0x886c, 0x0008}, {0x886d, 0x0008}, {0x886e, 0x007e}, +{0x886f, 0x0088}, {0x8870, 0x0050}, {0x8871, 0x00b6}, +{0x8872, 0x0012}, {0x8873, 0x0004}, {0x8874, 0x008a}, +{0x8875, 0x0040}, {0x8876, 0x00b7}, {0x8877, 0x0012}, +{0x8878, 0x0004}, {0x8879, 0x00b6}, {0x887a, 0x0012}, +{0x887b, 0x0004}, {0x887c, 0x0084}, {0x887d, 0x00fb}, +{0x887e, 0x0084}, {0x887f, 0x00ef}, {0x8880, 0x00b7}, +{0x8881, 0x0012}, {0x8882, 0x0004}, {0x8883, 0x00b6}, +{0x8884, 0x0012}, {0x8885, 0x0007}, {0x8886, 0x0036}, +{0x8887, 0x00b6}, {0x8888, 0x008f}, {0x8889, 0x007c}, +{0x888a, 0x0048}, {0x888b, 0x0048}, {0x888c, 0x00b7}, +{0x888d, 0x0012}, {0x888e, 0x0007}, {0x888f, 0x0086}, +{0x8890, 0x0001}, {0x8891, 0x00ba}, {0x8892, 0x0012}, +{0x8893, 0x0004}, {0x8894, 0x00b7}, {0x8895, 0x0012}, +{0x8896, 0x0004}, {0x8897, 0x0001}, {0x8898, 0x0001}, +{0x8899, 0x0001}, {0x889a, 0x0001}, {0x889b, 0x0001}, +{0x889c, 0x0001}, {0x889d, 0x0086}, {0x889e, 0x00fe}, +{0x889f, 0x00b4}, {0x88a0, 0x0012}, {0x88a1, 0x0004}, +{0x88a2, 0x00b7}, {0x88a3, 0x0012}, {0x88a4, 0x0004}, +{0x88a5, 0x0086}, {0x88a6, 0x0002}, {0x88a7, 0x00ba}, +{0x88a8, 0x0012}, {0x88a9, 0x0004}, {0x88aa, 0x00b7}, +{0x88ab, 0x0012}, {0x88ac, 0x0004}, {0x88ad, 0x0086}, +{0x88ae, 0x00fd}, {0x88af, 0x00b4}, {0x88b0, 0x0012}, +{0x88b1, 0x0004}, {0x88b2, 0x00b7}, {0x88b3, 0x0012}, +{0x88b4, 0x0004}, {0x88b5, 0x0032}, {0x88b6, 0x00b7}, +{0x88b7, 0x0012}, {0x88b8, 0x0007}, {0x88b9, 0x00b6}, +{0x88ba, 0x0012}, {0x88bb, 0x0000}, {0x88bc, 0x0084}, +{0x88bd, 0x0008}, {0x88be, 0x0081}, {0x88bf, 0x0008}, +{0x88c0, 0x0027}, {0x88c1, 0x000f}, {0x88c2, 0x007c}, +{0x88c3, 0x0082}, {0x88c4, 0x0008}, {0x88c5, 0x0026}, +{0x88c6, 0x0007}, {0x88c7, 0x0086}, {0x88c8, 0x0076}, +{0x88c9, 0x0097}, {0x88ca, 0x0040}, {0x88cb, 0x007e}, +{0x88cc, 0x0089}, {0x88cd, 0x006e}, {0x88ce, 0x007e}, +{0x88cf, 0x0086}, {0x88d0, 0x00ec}, {0x88d1, 0x00b6}, +{0x88d2, 0x008f}, {0x88d3, 0x007f}, {0x88d4, 0x0081}, +{0x88d5, 0x000f}, {0x88d6, 0x0027}, {0x88d7, 0x003c}, +{0x88d8, 0x00bd}, {0x88d9, 0x00e6}, {0x88da, 0x00c7}, +{0x88db, 0x00b7}, {0x88dc, 0x0012}, {0x88dd, 0x000d}, +{0x88de, 0x00bd}, {0x88df, 0x00e6}, {0x88e0, 0x00cb}, +{0x88e1, 0x00b6}, {0x88e2, 0x0012}, {0x88e3, 0x0004}, +{0x88e4, 0x008a}, {0x88e5, 0x0020}, {0x88e6, 0x00b7}, +{0x88e7, 0x0012}, {0x88e8, 0x0004}, {0x88e9, 0x00ce}, +{0x88ea, 0x00ff}, {0x88eb, 0x00ff}, {0x88ec, 0x00b6}, +{0x88ed, 0x0012}, {0x88ee, 0x0000}, {0x88ef, 0x0081}, +{0x88f0, 0x000c}, {0x88f1, 0x0026}, {0x88f2, 0x0005}, +{0x88f3, 0x0009}, {0x88f4, 0x0026}, {0x88f5, 0x00f6}, +{0x88f6, 0x0027}, {0x88f7, 0x001c}, {0x88f8, 0x00b6}, +{0x88f9, 0x0012}, {0x88fa, 0x0004}, {0x88fb, 0x0084}, +{0x88fc, 0x00df}, {0x88fd, 0x00b7}, {0x88fe, 0x0012}, +{0x88ff, 0x0004}, {0x8900, 0x0096}, {0x8901, 0x0083}, +{0x8902, 0x0081}, {0x8903, 0x0007}, {0x8904, 0x002c}, +{0x8905, 0x0005}, {0x8906, 0x007c}, {0x8907, 0x0000}, +{0x8908, 0x0083}, {0x8909, 0x0020}, {0x890a, 0x0006}, +{0x890b, 0x0096}, {0x890c, 0x0083}, {0x890d, 0x008b}, +{0x890e, 0x0008}, {0x890f, 0x0097}, {0x8910, 0x0083}, +{0x8911, 0x007e}, {0x8912, 0x0085}, {0x8913, 0x0041}, +{0x8914, 0x007f}, {0x8915, 0x008f}, {0x8916, 0x007e}, +{0x8917, 0x0086}, {0x8918, 0x0080}, {0x8919, 0x00b7}, +{0x891a, 0x0012}, {0x891b, 0x000c}, {0x891c, 0x0086}, +{0x891d, 0x0001}, {0x891e, 0x00b7}, {0x891f, 0x008f}, +{0x8920, 0x007d}, {0x8921, 0x00b6}, {0x8922, 0x0012}, +{0x8923, 0x000c}, {0x8924, 0x0084}, {0x8925, 0x007f}, +{0x8926, 0x00b7}, {0x8927, 0x0012}, {0x8928, 0x000c}, +{0x8929, 0x008a}, {0x892a, 0x0080}, {0x892b, 0x00b7}, +{0x892c, 0x0012}, {0x892d, 0x000c}, {0x892e, 0x0086}, +{0x892f, 0x000a}, {0x8930, 0x00bd}, {0x8931, 0x008a}, +{0x8932, 0x0006}, {0x8933, 0x00b6}, {0x8934, 0x0012}, +{0x8935, 0x000a}, {0x8936, 0x002a}, {0x8937, 0x0009}, +{0x8938, 0x00b6}, {0x8939, 0x0012}, {0x893a, 0x000c}, +{0x893b, 0x00ba}, {0x893c, 0x008f}, {0x893d, 0x007d}, +{0x893e, 0x00b7}, {0x893f, 0x0012}, {0x8940, 0x000c}, +{0x8941, 0x00b6}, {0x8942, 0x008f}, {0x8943, 0x007e}, +{0x8944, 0x0081}, {0x8945, 0x0060}, {0x8946, 0x0027}, +{0x8947, 0x001a}, {0x8948, 0x008b}, {0x8949, 0x0020}, +{0x894a, 0x00b7}, {0x894b, 0x008f}, {0x894c, 0x007e}, +{0x894d, 0x00b6}, {0x894e, 0x0012}, {0x894f, 0x000c}, +{0x8950, 0x0084}, {0x8951, 0x009f}, {0x8952, 0x00ba}, +{0x8953, 0x008f}, {0x8954, 0x007e}, {0x8955, 0x00b7}, +{0x8956, 0x0012}, {0x8957, 0x000c}, {0x8958, 0x00b6}, +{0x8959, 0x008f}, {0x895a, 0x007d}, {0x895b, 0x0048}, +{0x895c, 0x00b7}, {0x895d, 0x008f}, {0x895e, 0x007d}, +{0x895f, 0x007e}, {0x8960, 0x0089}, {0x8961, 0x0021}, +{0x8962, 0x00b6}, {0x8963, 0x0012}, {0x8964, 0x0004}, +{0x8965, 0x008a}, {0x8966, 0x0020}, {0x8967, 0x00b7}, +{0x8968, 0x0012}, {0x8969, 0x0004}, {0x896a, 0x00bd}, +{0x896b, 0x008a}, {0x896c, 0x000a}, {0x896d, 0x004f}, +{0x896e, 0x0039}, {0x896f, 0x00a6}, {0x8970, 0x0000}, +{0x8971, 0x0018}, {0x8972, 0x00a7}, {0x8973, 0x0000}, +{0x8974, 0x0008}, {0x8975, 0x0018}, {0x8976, 0x0008}, +{0x8977, 0x005a}, {0x8978, 0x0026}, {0x8979, 0x00f5}, +{0x897a, 0x0039}, {0x897b, 0x0036}, {0x897c, 0x006c}, +{0x897d, 0x0000}, {0x897e, 0x0032}, {0x897f, 0x00ba}, +{0x8980, 0x008f}, {0x8981, 0x007f}, {0x8982, 0x00b7}, +{0x8983, 0x008f}, {0x8984, 0x007f}, {0x8985, 0x00b6}, +{0x8986, 0x0012}, {0x8987, 0x0009}, {0x8988, 0x0084}, +{0x8989, 0x0003}, {0x898a, 0x00a7}, {0x898b, 0x0001}, +{0x898c, 0x00b6}, {0x898d, 0x0012}, {0x898e, 0x0006}, +{0x898f, 0x0084}, {0x8990, 0x003f}, {0x8991, 0x00a7}, +{0x8992, 0x0002}, {0x8993, 0x0039}, {0x8994, 0x0036}, +{0x8995, 0x0086}, {0x8996, 0x0003}, {0x8997, 0x00b7}, +{0x8998, 0x008f}, {0x8999, 0x0080}, {0x899a, 0x0032}, +{0x899b, 0x00c1}, {0x899c, 0x0000}, {0x899d, 0x0026}, +{0x899e, 0x0006}, {0x899f, 0x00b7}, {0x89a0, 0x008f}, +{0x89a1, 0x007c}, {0x89a2, 0x007e}, {0x89a3, 0x0089}, +{0x89a4, 0x00c9}, {0x89a5, 0x00c1}, {0x89a6, 0x0001}, +{0x89a7, 0x0027}, {0x89a8, 0x0018}, {0x89a9, 0x00c1}, +{0x89aa, 0x0002}, {0x89ab, 0x0027}, {0x89ac, 0x000c}, +{0x89ad, 0x00c1}, {0x89ae, 0x0003}, {0x89af, 0x0027}, +{0x89b0, 0x0000}, {0x89b1, 0x00f6}, {0x89b2, 0x008f}, +{0x89b3, 0x0080}, {0x89b4, 0x0005}, {0x89b5, 0x0005}, +{0x89b6, 0x00f7}, {0x89b7, 0x008f}, {0x89b8, 0x0080}, +{0x89b9, 0x00f6}, {0x89ba, 0x008f}, {0x89bb, 0x0080}, +{0x89bc, 0x0005}, {0x89bd, 0x0005}, {0x89be, 0x00f7}, +{0x89bf, 0x008f}, {0x89c0, 0x0080}, {0x89c1, 0x00f6}, +{0x89c2, 0x008f}, {0x89c3, 0x0080}, {0x89c4, 0x0005}, +{0x89c5, 0x0005}, {0x89c6, 0x00f7}, {0x89c7, 0x008f}, +{0x89c8, 0x0080}, {0x89c9, 0x00f6}, {0x89ca, 0x008f}, +{0x89cb, 0x0080}, {0x89cc, 0x0053}, {0x89cd, 0x00f4}, +{0x89ce, 0x0012}, {0x89cf, 0x0007}, {0x89d0, 0x001b}, +{0x89d1, 0x00b7}, {0x89d2, 0x0012}, {0x89d3, 0x0007}, +{0x89d4, 0x0039}, {0x89d5, 0x00ce}, {0x89d6, 0x008f}, +{0x89d7, 0x0070}, {0x89d8, 0x00a6}, {0x89d9, 0x0000}, +{0x89da, 0x0018}, {0x89db, 0x00e6}, {0x89dc, 0x0000}, +{0x89dd, 0x0018}, {0x89de, 0x00a7}, {0x89df, 0x0000}, +{0x89e0, 0x00e7}, {0x89e1, 0x0000}, {0x89e2, 0x00a6}, +{0x89e3, 0x0001}, {0x89e4, 0x0018}, {0x89e5, 0x00e6}, +{0x89e6, 0x0001}, {0x89e7, 0x0018}, {0x89e8, 0x00a7}, +{0x89e9, 0x0001}, {0x89ea, 0x00e7}, {0x89eb, 0x0001}, +{0x89ec, 0x00a6}, {0x89ed, 0x0002}, {0x89ee, 0x0018}, +{0x89ef, 0x00e6}, {0x89f0, 0x0002}, {0x89f1, 0x0018}, +{0x89f2, 0x00a7}, {0x89f3, 0x0002}, {0x89f4, 0x00e7}, +{0x89f5, 0x0002}, {0x89f6, 0x0039}, {0x89f7, 0x00a6}, +{0x89f8, 0x0000}, {0x89f9, 0x0084}, {0x89fa, 0x0007}, +{0x89fb, 0x00e6}, {0x89fc, 0x0000}, {0x89fd, 0x00c4}, +{0x89fe, 0x0038}, {0x89ff, 0x0054}, {0x8a00, 0x0054}, +{0x8a01, 0x0054}, {0x8a02, 0x001b}, {0x8a03, 0x00a7}, +{0x8a04, 0x0000}, {0x8a05, 0x0039}, {0x8a06, 0x004a}, +{0x8a07, 0x0026}, {0x8a08, 0x00fd}, {0x8a09, 0x0039}, +{0x8a0a, 0x0096}, {0x8a0b, 0x0022}, {0x8a0c, 0x0084}, +{0x8a0d, 0x000f}, {0x8a0e, 0x0097}, {0x8a0f, 0x0022}, +{0x8a10, 0x0086}, {0x8a11, 0x0001}, {0x8a12, 0x00b7}, +{0x8a13, 0x008f}, {0x8a14, 0x0070}, {0x8a15, 0x00b6}, +{0x8a16, 0x0012}, {0x8a17, 0x0007}, {0x8a18, 0x00b7}, +{0x8a19, 0x008f}, {0x8a1a, 0x0071}, {0x8a1b, 0x00f6}, +{0x8a1c, 0x0012}, {0x8a1d, 0x000c}, {0x8a1e, 0x00c4}, +{0x8a1f, 0x000f}, {0x8a20, 0x00c8}, {0x8a21, 0x000f}, +{0x8a22, 0x00f7}, {0x8a23, 0x008f}, {0x8a24, 0x0072}, +{0x8a25, 0x00f6}, {0x8a26, 0x008f}, {0x8a27, 0x0072}, +{0x8a28, 0x00b6}, {0x8a29, 0x008f}, {0x8a2a, 0x0071}, +{0x8a2b, 0x0084}, {0x8a2c, 0x0003}, {0x8a2d, 0x0027}, +{0x8a2e, 0x0014}, {0x8a2f, 0x0081}, {0x8a30, 0x0001}, +{0x8a31, 0x0027}, {0x8a32, 0x001c}, {0x8a33, 0x0081}, +{0x8a34, 0x0002}, {0x8a35, 0x0027}, {0x8a36, 0x0024}, +{0x8a37, 0x00f4}, {0x8a38, 0x008f}, {0x8a39, 0x0070}, +{0x8a3a, 0x0027}, {0x8a3b, 0x002a}, {0x8a3c, 0x0096}, +{0x8a3d, 0x0022}, {0x8a3e, 0x008a}, {0x8a3f, 0x0080}, +{0x8a40, 0x007e}, {0x8a41, 0x008a}, {0x8a42, 0x0064}, +{0x8a43, 0x00f4}, {0x8a44, 0x008f}, {0x8a45, 0x0070}, +{0x8a46, 0x0027}, {0x8a47, 0x001e}, {0x8a48, 0x0096}, +{0x8a49, 0x0022}, {0x8a4a, 0x008a}, {0x8a4b, 0x0010}, +{0x8a4c, 0x007e}, {0x8a4d, 0x008a}, {0x8a4e, 0x0064}, +{0x8a4f, 0x00f4}, {0x8a50, 0x008f}, {0x8a51, 0x0070}, +{0x8a52, 0x0027}, {0x8a53, 0x0012}, {0x8a54, 0x0096}, +{0x8a55, 0x0022}, {0x8a56, 0x008a}, {0x8a57, 0x0020}, +{0x8a58, 0x007e}, {0x8a59, 0x008a}, {0x8a5a, 0x0064}, +{0x8a5b, 0x00f4}, {0x8a5c, 0x008f}, {0x8a5d, 0x0070}, +{0x8a5e, 0x0027}, {0x8a5f, 0x0006}, {0x8a60, 0x0096}, +{0x8a61, 0x0022}, {0x8a62, 0x008a}, {0x8a63, 0x0040}, +{0x8a64, 0x0097}, {0x8a65, 0x0022}, {0x8a66, 0x0074}, +{0x8a67, 0x008f}, {0x8a68, 0x0071}, {0x8a69, 0x0074}, +{0x8a6a, 0x008f}, {0x8a6b, 0x0071}, {0x8a6c, 0x0078}, +{0x8a6d, 0x008f}, {0x8a6e, 0x0070}, {0x8a6f, 0x00b6}, +{0x8a70, 0x008f}, {0x8a71, 0x0070}, {0x8a72, 0x0085}, +{0x8a73, 0x0010}, {0x8a74, 0x0027}, {0x8a75, 0x00af}, +{0x8a76, 0x00d6}, {0x8a77, 0x0022}, {0x8a78, 0x00c4}, +{0x8a79, 0x0010}, {0x8a7a, 0x0058}, {0x8a7b, 0x00b6}, +{0x8a7c, 0x0012}, {0x8a7d, 0x0070}, {0x8a7e, 0x0081}, +{0x8a7f, 0x00e4}, {0x8a80, 0x0027}, {0x8a81, 0x0036}, +{0x8a82, 0x0081}, {0x8a83, 0x00e1}, {0x8a84, 0x0026}, +{0x8a85, 0x000c}, {0x8a86, 0x0096}, {0x8a87, 0x0022}, +{0x8a88, 0x0084}, {0x8a89, 0x0020}, {0x8a8a, 0x0044}, +{0x8a8b, 0x001b}, {0x8a8c, 0x00d6}, {0x8a8d, 0x0022}, +{0x8a8e, 0x00c4}, {0x8a8f, 0x00cf}, {0x8a90, 0x0020}, +{0x8a91, 0x0023}, {0x8a92, 0x0058}, {0x8a93, 0x0081}, +{0x8a94, 0x00c6}, {0x8a95, 0x0026}, {0x8a96, 0x000d}, +{0x8a97, 0x0096}, {0x8a98, 0x0022}, {0x8a99, 0x0084}, +{0x8a9a, 0x0040}, {0x8a9b, 0x0044}, {0x8a9c, 0x0044}, +{0x8a9d, 0x001b}, {0x8a9e, 0x00d6}, {0x8a9f, 0x0022}, +{0x8aa0, 0x00c4}, {0x8aa1, 0x00af}, {0x8aa2, 0x0020}, +{0x8aa3, 0x0011}, {0x8aa4, 0x0058}, {0x8aa5, 0x0081}, +{0x8aa6, 0x0027}, {0x8aa7, 0x0026}, {0x8aa8, 0x000f}, +{0x8aa9, 0x0096}, {0x8aaa, 0x0022}, {0x8aab, 0x0084}, +{0x8aac, 0x0080}, {0x8aad, 0x0044}, {0x8aae, 0x0044}, +{0x8aaf, 0x0044}, {0x8ab0, 0x001b}, {0x8ab1, 0x00d6}, +{0x8ab2, 0x0022}, {0x8ab3, 0x00c4}, {0x8ab4, 0x006f}, +{0x8ab5, 0x001b}, {0x8ab6, 0x0097}, {0x8ab7, 0x0022}, +{0x8ab8, 0x0039}, {0x8ab9, 0x0027}, {0x8aba, 0x000c}, +{0x8abb, 0x007c}, {0x8abc, 0x0082}, {0x8abd, 0x0006}, +{0x8abe, 0x00bd}, {0x8abf, 0x00d9}, {0x8ac0, 0x00ed}, +{0x8ac1, 0x00b6}, {0x8ac2, 0x0082}, {0x8ac3, 0x0007}, +{0x8ac4, 0x007e}, {0x8ac5, 0x008a}, {0x8ac6, 0x00b9}, +{0x8ac7, 0x007f}, {0x8ac8, 0x0082}, {0x8ac9, 0x0006}, +{0x8aca, 0x0039}, { 0x0, 0x0 } +}; +#endif + + +/* phy types */ +#define CAS_PHY_UNKNOWN 0x00 +#define CAS_PHY_SERDES 0x01 +#define CAS_PHY_MII_MDIO0 0x02 +#define CAS_PHY_MII_MDIO1 0x04 +#define CAS_PHY_MII(x) ((x) & (CAS_PHY_MII_MDIO0 | CAS_PHY_MII_MDIO1)) + +/* _RING_INDEX is the index for the ring sizes to be used. _RING_SIZE + * is the actual size. the default index for the various rings is + * 8. NOTE: there a bunch of alignment constraints for the rings. to + * deal with that, i just allocate rings to create the desired + * alignment. here are the constraints: + * RX DESC and COMP rings must be 8KB aligned + * TX DESC must be 2KB aligned. + * if you change the numbers, be cognizant of how the alignment will change + * in INIT_BLOCK as well. + */ + +#define DESC_RING_I_TO_S(x) (32*(1 << (x))) +#define COMP_RING_I_TO_S(x) (128*(1 << (x))) +#define TX_DESC_RING_INDEX 4 /* 512 = 8k */ +#define RX_DESC_RING_INDEX 4 /* 512 = 8k */ +#define RX_COMP_RING_INDEX 4 /* 2048 = 64k: should be 4x rx ring size */ + +#if (TX_DESC_RING_INDEX > 8) || (TX_DESC_RING_INDEX < 0) +#error TX_DESC_RING_INDEX must be between 0 and 8 +#endif + +#if (RX_DESC_RING_INDEX > 8) || (RX_DESC_RING_INDEX < 0) +#error RX_DESC_RING_INDEX must be between 0 and 8 +#endif + +#if (RX_COMP_RING_INDEX > 8) || (RX_COMP_RING_INDEX < 0) +#error RX_COMP_RING_INDEX must be between 0 and 8 +#endif + +#define N_TX_RINGS MAX_TX_RINGS /* for QoS */ +#define N_TX_RINGS_MASK MAX_TX_RINGS_MASK +#define N_RX_DESC_RINGS MAX_RX_DESC_RINGS /* 1 for ipsec */ +#define N_RX_COMP_RINGS 0x1 /* for mult. PCI interrupts */ + +/* number of flows that can go through re-assembly */ +#define N_RX_FLOWS 64 + +#define TX_DESC_RING_SIZE DESC_RING_I_TO_S(TX_DESC_RING_INDEX) +#define RX_DESC_RING_SIZE DESC_RING_I_TO_S(RX_DESC_RING_INDEX) +#define RX_COMP_RING_SIZE COMP_RING_I_TO_S(RX_COMP_RING_INDEX) +#define TX_DESC_RINGN_INDEX(x) TX_DESC_RING_INDEX +#define RX_DESC_RINGN_INDEX(x) RX_DESC_RING_INDEX +#define RX_COMP_RINGN_INDEX(x) RX_COMP_RING_INDEX +#define TX_DESC_RINGN_SIZE(x) TX_DESC_RING_SIZE +#define RX_DESC_RINGN_SIZE(x) RX_DESC_RING_SIZE +#define RX_COMP_RINGN_SIZE(x) RX_COMP_RING_SIZE + +/* convert values */ +#define CAS_BASE(x, y) (((y) << (x ## _SHIFT)) & (x ## _MASK)) +#define CAS_VAL(x, y) (((y) & (x ## _MASK)) >> (x ## _SHIFT)) +#define CAS_TX_RINGN_BASE(y) ((TX_DESC_RINGN_INDEX(y) << \ + TX_CFG_DESC_RINGN_SHIFT(y)) & \ + TX_CFG_DESC_RINGN_MASK(y)) + +/* min is 2k, but we can't do jumbo frames unless it's at least 8k */ +#define CAS_MIN_PAGE_SHIFT 11 /* 2048 */ +#define CAS_JUMBO_PAGE_SHIFT 13 /* 8192 */ +#define CAS_MAX_PAGE_SHIFT 14 /* 16384 */ + +#define TX_DESC_BUFLEN_MASK 0x0000000000003FFFULL /* buffer length in + bytes. 0 - 9256 */ +#define TX_DESC_BUFLEN_SHIFT 0 +#define TX_DESC_CSUM_START_MASK 0x00000000001F8000ULL /* checksum start. # + of bytes to be + skipped before + csum calc begins. + value must be + even */ +#define TX_DESC_CSUM_START_SHIFT 15 +#define TX_DESC_CSUM_STUFF_MASK 0x000000001FE00000ULL /* checksum stuff. + byte offset w/in + the pkt for the + 1st csum byte. + must be > 8 */ +#define TX_DESC_CSUM_STUFF_SHIFT 21 +#define TX_DESC_CSUM_EN 0x0000000020000000ULL /* enable checksum */ +#define TX_DESC_EOF 0x0000000040000000ULL /* end of frame */ +#define TX_DESC_SOF 0x0000000080000000ULL /* start of frame */ +#define TX_DESC_INTME 0x0000000100000000ULL /* interrupt me */ +#define TX_DESC_NO_CRC 0x0000000200000000ULL /* debugging only. + CRC will not be + inserted into + outgoing frame. */ +struct cas_tx_desc { + u64 control; + u64 buffer; +}; + +/* descriptor ring for free buffers contains page-sized buffers. the index + * value is not used by the hw in any way. it's just stored and returned in + * the completion ring. + */ +struct cas_rx_desc { + u64 index; + u64 buffer; +}; + +/* received packets are put on the completion ring. */ +/* word 1 */ +#define RX_COMP1_DATA_SIZE_MASK 0x0000000007FFE000ULL +#define RX_COMP1_DATA_SIZE_SHIFT 13 +#define RX_COMP1_DATA_OFF_MASK 0x000001FFF8000000ULL +#define RX_COMP1_DATA_OFF_SHIFT 27 +#define RX_COMP1_DATA_INDEX_MASK 0x007FFE0000000000ULL +#define RX_COMP1_DATA_INDEX_SHIFT 41 +#define RX_COMP1_SKIP_MASK 0x0180000000000000ULL +#define RX_COMP1_SKIP_SHIFT 55 +#define RX_COMP1_RELEASE_NEXT 0x0200000000000000ULL +#define RX_COMP1_SPLIT_PKT 0x0400000000000000ULL +#define RX_COMP1_RELEASE_FLOW 0x0800000000000000ULL +#define RX_COMP1_RELEASE_DATA 0x1000000000000000ULL +#define RX_COMP1_RELEASE_HDR 0x2000000000000000ULL +#define RX_COMP1_TYPE_MASK 0xC000000000000000ULL +#define RX_COMP1_TYPE_SHIFT 62 + +/* word 2 */ +#define RX_COMP2_NEXT_INDEX_MASK 0x00000007FFE00000ULL +#define RX_COMP2_NEXT_INDEX_SHIFT 21 +#define RX_COMP2_HDR_SIZE_MASK 0x00000FF800000000ULL +#define RX_COMP2_HDR_SIZE_SHIFT 35 +#define RX_COMP2_HDR_OFF_MASK 0x0003F00000000000ULL +#define RX_COMP2_HDR_OFF_SHIFT 44 +#define RX_COMP2_HDR_INDEX_MASK 0xFFFC000000000000ULL +#define RX_COMP2_HDR_INDEX_SHIFT 50 + +/* word 3 */ +#define RX_COMP3_SMALL_PKT 0x0000000000000001ULL +#define RX_COMP3_JUMBO_PKT 0x0000000000000002ULL +#define RX_COMP3_JUMBO_HDR_SPLIT_EN 0x0000000000000004ULL +#define RX_COMP3_CSUM_START_MASK 0x000000000007F000ULL +#define RX_COMP3_CSUM_START_SHIFT 12 +#define RX_COMP3_FLOWID_MASK 0x0000000001F80000ULL +#define RX_COMP3_FLOWID_SHIFT 19 +#define RX_COMP3_OPCODE_MASK 0x000000000E000000ULL +#define RX_COMP3_OPCODE_SHIFT 25 +#define RX_COMP3_FORCE_FLAG 0x0000000010000000ULL +#define RX_COMP3_NO_ASSIST 0x0000000020000000ULL +#define RX_COMP3_LOAD_BAL_MASK 0x000001F800000000ULL +#define RX_COMP3_LOAD_BAL_SHIFT 35 +#define RX_PLUS_COMP3_ENC_PKT 0x0000020000000000ULL /* cas+ */ +#define RX_COMP3_L3_HEAD_OFF_MASK 0x0000FE0000000000ULL /* cas */ +#define RX_COMP3_L3_HEAD_OFF_SHIFT 41 +#define RX_PLUS_COMP_L3_HEAD_OFF_MASK 0x0000FC0000000000ULL /* cas+ */ +#define RX_PLUS_COMP_L3_HEAD_OFF_SHIFT 42 +#define RX_COMP3_SAP_MASK 0xFFFF000000000000ULL +#define RX_COMP3_SAP_SHIFT 48 + +/* word 4 */ +#define RX_COMP4_TCP_CSUM_MASK 0x000000000000FFFFULL +#define RX_COMP4_TCP_CSUM_SHIFT 0 +#define RX_COMP4_PKT_LEN_MASK 0x000000003FFF0000ULL +#define RX_COMP4_PKT_LEN_SHIFT 16 +#define RX_COMP4_PERFECT_MATCH_MASK 0x00000003C0000000ULL +#define RX_COMP4_PERFECT_MATCH_SHIFT 30 +#define RX_COMP4_ZERO 0x0000080000000000ULL +#define RX_COMP4_HASH_VAL_MASK 0x0FFFF00000000000ULL +#define RX_COMP4_HASH_VAL_SHIFT 44 +#define RX_COMP4_HASH_PASS 0x1000000000000000ULL +#define RX_COMP4_BAD 0x4000000000000000ULL +#define RX_COMP4_LEN_MISMATCH 0x8000000000000000ULL + +/* we encode the following: ring/index/release. only 14 bits + * are usable. + * NOTE: the encoding is dependent upon RX_DESC_RING_SIZE and + * MAX_RX_DESC_RINGS. */ +#define RX_INDEX_NUM_MASK 0x0000000000000FFFULL +#define RX_INDEX_NUM_SHIFT 0 +#define RX_INDEX_RING_MASK 0x0000000000001000ULL +#define RX_INDEX_RING_SHIFT 12 +#define RX_INDEX_RELEASE 0x0000000000002000ULL + +struct cas_rx_comp { + u64 word1; + u64 word2; + u64 word3; + u64 word4; +}; + +enum link_state { + link_down = 0, /* No link, will retry */ + link_aneg, /* Autoneg in progress */ + link_force_try, /* Try Forced link speed */ + link_force_ret, /* Forced mode worked, retrying autoneg */ + link_force_ok, /* Stay in forced mode */ + link_up /* Link is up */ +}; + +typedef struct cas_page { + struct list_head list; + struct page *buffer; + dma_addr_t dma_addr; + int used; +} cas_page_t; + + +/* some alignment constraints: + * TX DESC, RX DESC, and RX COMP must each be 8K aligned. + * TX COMPWB must be 8-byte aligned. + * to accomplish this, here's what we do: + * + * INIT_BLOCK_RX_COMP = 64k (already aligned) + * INIT_BLOCK_RX_DESC = 8k + * INIT_BLOCK_TX = 8k + * INIT_BLOCK_RX1_DESC = 8k + * TX COMPWB + */ +#define INIT_BLOCK_TX (TX_DESC_RING_SIZE) +#define INIT_BLOCK_RX_DESC (RX_DESC_RING_SIZE) +#define INIT_BLOCK_RX_COMP (RX_COMP_RING_SIZE) + +struct cas_init_block { + struct cas_rx_comp rxcs[N_RX_COMP_RINGS][INIT_BLOCK_RX_COMP]; + struct cas_rx_desc rxds[N_RX_DESC_RINGS][INIT_BLOCK_RX_DESC]; + struct cas_tx_desc txds[N_TX_RINGS][INIT_BLOCK_TX]; + u64 tx_compwb; +}; + +/* tiny buffers to deal with target abort issue. we allocate a bit + * over so that we don't have target abort issues with these buffers + * as well. + */ +#define TX_TINY_BUF_LEN 0x100 +#define TX_TINY_BUF_BLOCK ((INIT_BLOCK_TX + 1)*TX_TINY_BUF_LEN) + +struct cas_tiny_count { + int nbufs; + int used; +}; + +struct cas { + spinlock_t lock; /* for most bits */ + spinlock_t tx_lock[N_TX_RINGS]; /* tx bits */ + spinlock_t stat_lock[N_TX_RINGS + 1]; /* for stat gathering */ + spinlock_t rx_inuse_lock; /* rx inuse list */ + spinlock_t rx_spare_lock; /* rx spare list */ + + void __iomem *regs; + int tx_new[N_TX_RINGS], tx_old[N_TX_RINGS]; + int rx_old[N_RX_DESC_RINGS]; + int rx_cur[N_RX_COMP_RINGS], rx_new[N_RX_COMP_RINGS]; + int rx_last[N_RX_DESC_RINGS]; + + /* Set when chip is actually in operational state + * (ie. not power managed) */ + int hw_running; + int opened; + struct semaphore pm_sem; /* open/close/suspend/resume */ + + struct cas_init_block *init_block; + struct cas_tx_desc *init_txds[MAX_TX_RINGS]; + struct cas_rx_desc *init_rxds[MAX_RX_DESC_RINGS]; + struct cas_rx_comp *init_rxcs[MAX_RX_COMP_RINGS]; + + /* we use sk_buffs for tx and pages for rx. the rx skbuffs + * are there for flow re-assembly. */ + struct sk_buff *tx_skbs[N_TX_RINGS][TX_DESC_RING_SIZE]; + struct sk_buff_head rx_flows[N_RX_FLOWS]; + cas_page_t *rx_pages[N_RX_DESC_RINGS][RX_DESC_RING_SIZE]; + struct list_head rx_spare_list, rx_inuse_list; + int rx_spares_needed; + + /* for small packets when copying would be quicker than + mapping */ + struct cas_tiny_count tx_tiny_use[N_TX_RINGS][TX_DESC_RING_SIZE]; + u8 *tx_tiny_bufs[N_TX_RINGS]; + + u32 msg_enable; + + /* N_TX_RINGS must be >= N_RX_DESC_RINGS */ + struct net_device_stats net_stats[N_TX_RINGS + 1]; + + u32 pci_cfg[64 >> 2]; + u8 pci_revision; + + int phy_type; + int phy_addr; + u32 phy_id; +#define CAS_FLAG_1000MB_CAP 0x00000001 +#define CAS_FLAG_REG_PLUS 0x00000002 +#define CAS_FLAG_TARGET_ABORT 0x00000004 +#define CAS_FLAG_SATURN 0x00000008 +#define CAS_FLAG_RXD_POST_MASK 0x000000F0 +#define CAS_FLAG_RXD_POST_SHIFT 4 +#define CAS_FLAG_RXD_POST(x) ((1 << (CAS_FLAG_RXD_POST_SHIFT + (x))) & \ + CAS_FLAG_RXD_POST_MASK) +#define CAS_FLAG_ENTROPY_DEV 0x00000100 +#define CAS_FLAG_NO_HW_CSUM 0x00000200 + u32 cas_flags; + int packet_min; /* minimum packet size */ + int tx_fifo_size; + int rx_fifo_size; + int rx_pause_off; + int rx_pause_on; + int crc_size; /* 4 if half-duplex */ + + int pci_irq_INTC; + int min_frame_size; /* for tx fifo workaround */ + + /* page size allocation */ + int page_size; + int page_order; + int mtu_stride; + + u32 mac_rx_cfg; + + /* Autoneg & PHY control */ + int link_cntl; + int link_fcntl; + enum link_state lstate; + struct timer_list link_timer; + int timer_ticks; + struct work_struct reset_task; +#if 0 + atomic_t reset_task_pending; +#else + atomic_t reset_task_pending; + atomic_t reset_task_pending_mtu; + atomic_t reset_task_pending_spare; + atomic_t reset_task_pending_all; +#endif + +#ifdef CONFIG_CASSINI_QGE_DEBUG + atomic_t interrupt_seen; /* 1 if any interrupts are getting through */ +#endif + + /* Link-down problem workaround */ +#define LINK_TRANSITION_UNKNOWN 0 +#define LINK_TRANSITION_ON_FAILURE 1 +#define LINK_TRANSITION_STILL_FAILED 2 +#define LINK_TRANSITION_LINK_UP 3 +#define LINK_TRANSITION_LINK_CONFIG 4 +#define LINK_TRANSITION_LINK_DOWN 5 +#define LINK_TRANSITION_REQUESTED_RESET 6 + int link_transition; + int link_transition_jiffies_valid; + unsigned long link_transition_jiffies; + + /* Tuning */ + u8 orig_cacheline_size; /* value when loaded */ +#define CAS_PREF_CACHELINE_SIZE 0x20 /* Minimum desired */ + + /* Diagnostic counters and state. */ + int casreg_len; /* reg-space size for dumping */ + u64 pause_entered; + u16 pause_last_time_recvd; + + dma_addr_t block_dvma, tx_tiny_dvma[N_TX_RINGS]; + struct pci_dev *pdev; + struct net_device *dev; +}; + +#define TX_DESC_NEXT(r, x) (((x) + 1) & (TX_DESC_RINGN_SIZE(r) - 1)) +#define RX_DESC_ENTRY(r, x) ((x) & (RX_DESC_RINGN_SIZE(r) - 1)) +#define RX_COMP_ENTRY(r, x) ((x) & (RX_COMP_RINGN_SIZE(r) - 1)) + +#define TX_BUFF_COUNT(r, x, y) ((x) <= (y) ? ((y) - (x)) : \ + (TX_DESC_RINGN_SIZE(r) - (x) + (y))) + +#define TX_BUFFS_AVAIL(cp, i) ((cp)->tx_old[(i)] <= (cp)->tx_new[(i)] ? \ + (cp)->tx_old[(i)] + (TX_DESC_RINGN_SIZE(i) - 1) - (cp)->tx_new[(i)] : \ + (cp)->tx_old[(i)] - (cp)->tx_new[(i)] - 1) + +#define CAS_ALIGN(addr, align) \ + (((unsigned long) (addr) + ((align) - 1UL)) & ~((align) - 1)) + +#define RX_FIFO_SIZE 16384 +#define EXPANSION_ROM_SIZE 65536 + +#define CAS_MC_EXACT_MATCH_SIZE 15 +#define CAS_MC_HASH_SIZE 256 +#define CAS_MC_HASH_MAX (CAS_MC_EXACT_MATCH_SIZE + \ + CAS_MC_HASH_SIZE) + +#define TX_TARGET_ABORT_LEN 0x20 +#define RX_SWIVEL_OFF_VAL 0x2 +#define RX_AE_FREEN_VAL(x) (RX_DESC_RINGN_SIZE(x) >> 1) +#define RX_AE_COMP_VAL (RX_COMP_RING_SIZE >> 1) +#define RX_BLANK_INTR_PKT_VAL 0x05 +#define RX_BLANK_INTR_TIME_VAL 0x0F +#define HP_TCP_THRESH_VAL 1530 /* reduce to enable reassembly */ + +#define RX_SPARE_COUNT (RX_DESC_RING_SIZE >> 1) +#define RX_SPARE_RECOVER_VAL (RX_SPARE_COUNT >> 2) + +#endif /* _CASSINI_H */ diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 68f11ac1a314..eb36fd293b41 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -392,6 +392,7 @@ #define PCI_DEVICE_ID_NS_87560_USB 0x0012 #define PCI_DEVICE_ID_NS_83815 0x0020 #define PCI_DEVICE_ID_NS_83820 0x0022 +#define PCI_DEVICE_ID_NS_SATURN 0x0035 #define PCI_DEVICE_ID_NS_SCx200_BRIDGE 0x0500 #define PCI_DEVICE_ID_NS_SCx200_SMI 0x0501 #define PCI_DEVICE_ID_NS_SCx200_IDE 0x0502 @@ -983,6 +984,7 @@ #define PCI_DEVICE_ID_SUN_SABRE 0xa000 #define PCI_DEVICE_ID_SUN_HUMMINGBIRD 0xa001 #define PCI_DEVICE_ID_SUN_TOMATILLO 0xa801 +#define PCI_DEVICE_ID_SUN_CASSINI 0xabba #define PCI_VENDOR_ID_CMD 0x1095 #define PCI_DEVICE_ID_CMD_640 0x0640 -- cgit v1.2.3 From 2fab35d78f32fc107e1af4b1ec23f557fa20d911 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Tue, 27 Sep 2005 15:59:43 -0700 Subject: [NET]: Fix GCC4 compile error: sysctl in linux/if_ether.h The following is generated when compiling a recent (2.6.14-rc2-git5) kernel configured for ARM, with GCC4. CC init/main.o In file included from include/linux/netdevice.h:29, from include/net/sock.h:48, from init/main.c:50: include/linux/if_ether.h:114: error: array type has incomplete element type It seems that if CONFIG_SYSCTL is not set, then the compiler will throw an error due to the definition of the ether_table[] array Attached is a solution to the problem Signed-off-by: Ben Dooks Signed-off-by: David S. Miller --- include/linux/if_ether.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h index fc2d4c8225aa..d21c305c6c64 100644 --- a/include/linux/if_ether.h +++ b/include/linux/if_ether.h @@ -111,7 +111,9 @@ static inline struct ethhdr *eth_hdr(const struct sk_buff *skb) return (struct ethhdr *)skb->mac.raw; } +#ifdef CONFIG_SYSCTL extern struct ctl_table ether_table[]; #endif +#endif #endif /* _LINUX_IF_ETHER_H */ -- cgit v1.2.3 From 14be71f4c5c5ad1e222c5202ee6d234e9c8828b7 Mon Sep 17 00:00:00 2001 From: Albert Lee Date: Tue, 27 Sep 2005 17:36:35 +0800 Subject: [PATCH] libata: rename host states Changes: s/PIO_ST_/HSM_ST_/ and s/pio_task_state/hsm_task_state/. Signed-off-by: Albert Lee Signed-off-by: Jeff Garzik --- drivers/scsi/libata-core.c | 78 +++++++++++++++++++++++----------------------- include/linux/libata.h | 20 ++++++------ 2 files changed, 49 insertions(+), 49 deletions(-) (limited to 'include/linux') diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c index cc68f5706acf..c4fcdc30f18c 100644 --- a/drivers/scsi/libata-core.c +++ b/drivers/scsi/libata-core.c @@ -2425,20 +2425,20 @@ void ata_poll_qc_complete(struct ata_queued_cmd *qc, u8 drv_stat) static unsigned long ata_pio_poll(struct ata_port *ap) { u8 status; - unsigned int poll_state = PIO_ST_UNKNOWN; - unsigned int reg_state = PIO_ST_UNKNOWN; - const unsigned int tmout_state = PIO_ST_TMOUT; - - switch (ap->pio_task_state) { - case PIO_ST: - case PIO_ST_POLL: - poll_state = PIO_ST_POLL; - reg_state = PIO_ST; + unsigned int poll_state = HSM_ST_UNKNOWN; + unsigned int reg_state = HSM_ST_UNKNOWN; + const unsigned int tmout_state = HSM_ST_TMOUT; + + switch (ap->hsm_task_state) { + case HSM_ST: + case HSM_ST_POLL: + poll_state = HSM_ST_POLL; + reg_state = HSM_ST; break; - case PIO_ST_LAST: - case PIO_ST_LAST_POLL: - poll_state = PIO_ST_LAST_POLL; - reg_state = PIO_ST_LAST; + case HSM_ST_LAST: + case HSM_ST_LAST_POLL: + poll_state = HSM_ST_LAST_POLL; + reg_state = HSM_ST_LAST; break; default: BUG(); @@ -2448,14 +2448,14 @@ static unsigned long ata_pio_poll(struct ata_port *ap) status = ata_chk_status(ap); if (status & ATA_BUSY) { if (time_after(jiffies, ap->pio_task_timeout)) { - ap->pio_task_state = tmout_state; + ap->hsm_task_state = tmout_state; return 0; } - ap->pio_task_state = poll_state; + ap->hsm_task_state = poll_state; return ATA_SHORT_PAUSE; } - ap->pio_task_state = reg_state; + ap->hsm_task_state = reg_state; return 0; } @@ -2480,14 +2480,14 @@ static int ata_pio_complete (struct ata_port *ap) * we enter, BSY will be cleared in a chk-status or two. If not, * the drive is probably seeking or something. Snooze for a couple * msecs, then chk-status again. If still busy, fall back to - * PIO_ST_POLL state. + * HSM_ST_POLL state. */ drv_stat = ata_busy_wait(ap, ATA_BUSY | ATA_DRQ, 10); if (drv_stat & (ATA_BUSY | ATA_DRQ)) { msleep(2); drv_stat = ata_busy_wait(ap, ATA_BUSY | ATA_DRQ, 10); if (drv_stat & (ATA_BUSY | ATA_DRQ)) { - ap->pio_task_state = PIO_ST_LAST_POLL; + ap->hsm_task_state = HSM_ST_LAST_POLL; ap->pio_task_timeout = jiffies + ATA_TMOUT_PIO; return 0; } @@ -2495,14 +2495,14 @@ static int ata_pio_complete (struct ata_port *ap) drv_stat = ata_wait_idle(ap); if (!ata_ok(drv_stat)) { - ap->pio_task_state = PIO_ST_ERR; + ap->hsm_task_state = HSM_ST_ERR; return 0; } qc = ata_qc_from_tag(ap, ap->active_tag); assert(qc != NULL); - ap->pio_task_state = PIO_ST_IDLE; + ap->hsm_task_state = HSM_ST_IDLE; ata_poll_qc_complete(qc, drv_stat); @@ -2662,7 +2662,7 @@ static void ata_pio_sector(struct ata_queued_cmd *qc) unsigned char *buf; if (qc->cursect == (qc->nsect - 1)) - ap->pio_task_state = PIO_ST_LAST; + ap->hsm_task_state = HSM_ST_LAST; page = sg[qc->cursg].page; offset = sg[qc->cursg].offset + qc->cursg_ofs * ATA_SECT_SIZE; @@ -2712,7 +2712,7 @@ static void __atapi_pio_bytes(struct ata_queued_cmd *qc, unsigned int bytes) unsigned int offset, count; if (qc->curbytes + bytes >= qc->nbytes) - ap->pio_task_state = PIO_ST_LAST; + ap->hsm_task_state = HSM_ST_LAST; next_sg: if (unlikely(qc->cursg >= qc->n_elem)) { @@ -2734,7 +2734,7 @@ next_sg: for (i = 0; i < words; i++) ata_data_xfer(ap, (unsigned char*)pad_buf, 2, do_write); - ap->pio_task_state = PIO_ST_LAST; + ap->hsm_task_state = HSM_ST_LAST; return; } @@ -2815,7 +2815,7 @@ static void atapi_pio_bytes(struct ata_queued_cmd *qc) err_out: printk(KERN_INFO "ata%u: dev %u: ATAPI check failed\n", ap->id, dev->devno); - ap->pio_task_state = PIO_ST_ERR; + ap->hsm_task_state = HSM_ST_ERR; } /** @@ -2837,14 +2837,14 @@ static void ata_pio_block(struct ata_port *ap) * a chk-status or two. If not, the drive is probably seeking * or something. Snooze for a couple msecs, then * chk-status again. If still busy, fall back to - * PIO_ST_POLL state. + * HSM_ST_POLL state. */ status = ata_busy_wait(ap, ATA_BUSY, 5); if (status & ATA_BUSY) { msleep(2); status = ata_busy_wait(ap, ATA_BUSY, 10); if (status & ATA_BUSY) { - ap->pio_task_state = PIO_ST_POLL; + ap->hsm_task_state = HSM_ST_POLL; ap->pio_task_timeout = jiffies + ATA_TMOUT_PIO; return; } @@ -2856,7 +2856,7 @@ static void ata_pio_block(struct ata_port *ap) if (is_atapi_taskfile(&qc->tf)) { /* no more data to transfer or unsupported ATAPI command */ if ((status & ATA_DRQ) == 0) { - ap->pio_task_state = PIO_ST_LAST; + ap->hsm_task_state = HSM_ST_LAST; return; } @@ -2864,7 +2864,7 @@ static void ata_pio_block(struct ata_port *ap) } else { /* handle BSY=0, DRQ=0 as error */ if ((status & ATA_DRQ) == 0) { - ap->pio_task_state = PIO_ST_ERR; + ap->hsm_task_state = HSM_ST_ERR; return; } @@ -2884,7 +2884,7 @@ static void ata_pio_error(struct ata_port *ap) printk(KERN_WARNING "ata%u: PIO error, drv_stat 0x%x\n", ap->id, drv_stat); - ap->pio_task_state = PIO_ST_IDLE; + ap->hsm_task_state = HSM_ST_IDLE; ata_poll_qc_complete(qc, drv_stat | ATA_ERR); } @@ -2899,25 +2899,25 @@ fsm_start: timeout = 0; qc_completed = 0; - switch (ap->pio_task_state) { - case PIO_ST_IDLE: + switch (ap->hsm_task_state) { + case HSM_ST_IDLE: return; - case PIO_ST: + case HSM_ST: ata_pio_block(ap); break; - case PIO_ST_LAST: + case HSM_ST_LAST: qc_completed = ata_pio_complete(ap); break; - case PIO_ST_POLL: - case PIO_ST_LAST_POLL: + case HSM_ST_POLL: + case HSM_ST_LAST_POLL: timeout = ata_pio_poll(ap); break; - case PIO_ST_TMOUT: - case PIO_ST_ERR: + case HSM_ST_TMOUT: + case HSM_ST_ERR: ata_pio_error(ap); return; } @@ -3360,7 +3360,7 @@ int ata_qc_issue_prot(struct ata_queued_cmd *qc) case ATA_PROT_PIO: /* load tf registers, initiate polling pio */ ata_qc_set_polling(qc); ata_tf_to_host_nolock(ap, &qc->tf); - ap->pio_task_state = PIO_ST; + ap->hsm_task_state = HSM_ST; queue_work(ata_wq, &ap->pio_task); break; @@ -3806,7 +3806,7 @@ static void atapi_packet_task(void *_data) ata_data_xfer(ap, qc->cdb, ap->cdb_len, 1); /* PIO commands are handled by polling */ - ap->pio_task_state = PIO_ST; + ap->hsm_task_state = HSM_ST; queue_work(ata_wq, &ap->pio_task); } diff --git a/include/linux/libata.h b/include/linux/libata.h index ceee1fc42c60..bb2d916bce44 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -156,15 +156,15 @@ enum { ATA_SHIFT_PIO = 11, }; -enum pio_task_states { - PIO_ST_UNKNOWN, - PIO_ST_IDLE, - PIO_ST_POLL, - PIO_ST_TMOUT, - PIO_ST, - PIO_ST_LAST, - PIO_ST_LAST_POLL, - PIO_ST_ERR, +enum hsm_task_states { + HSM_ST_UNKNOWN, + HSM_ST_IDLE, + HSM_ST_POLL, + HSM_ST_TMOUT, + HSM_ST, + HSM_ST_LAST, + HSM_ST_LAST_POLL, + HSM_ST_ERR, }; /* forward declarations */ @@ -319,7 +319,7 @@ struct ata_port { struct work_struct packet_task; struct work_struct pio_task; - unsigned int pio_task_state; + unsigned int hsm_task_state; unsigned long pio_task_timeout; void *private_data; -- cgit v1.2.3 From 664cceb0093b755739e56572b836a99104ee8a75 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 28 Sep 2005 17:03:15 +0100 Subject: [PATCH] Keys: Add possessor permissions to keys [try #3] The attached patch adds extra permission grants to keys for the possessor of a key in addition to the owner, group and other permissions bits. This makes SUID binaries easier to support without going as far as labelling keys and key targets using the LSM facilities. This patch adds a second "pointer type" to key structures (struct key_ref *) that can have the bottom bit of the address set to indicate the possession of a key. This is propagated through searches from the keyring to the discovered key. It has been made a separate type so that the compiler can spot attempts to dereference a potentially incorrect pointer. The "possession" attribute can't be attached to a key structure directly as it's not an intrinsic property of a key. Pointers to keys have been replaced with struct key_ref *'s wherever possession information needs to be passed through. This does assume that the bottom bit of the pointer will always be zero on return from kmem_cache_alloc(). The key reference type has been made into a typedef so that at least it can be located in the sources, even though it's basically a pointer to an undefined type. I've also renamed the accessor functions to be more useful, and all reference variables should now end in "_ref". Signed-Off-By: David Howells Signed-off-by: Linus Torvalds --- Documentation/keys.txt | 74 +++++++--- include/linux/key-ui.h | 28 ++-- include/linux/key.h | 78 ++++++++-- security/keys/internal.h | 26 ++-- security/keys/key.c | 81 ++++++----- security/keys/keyctl.c | 301 ++++++++++++++++++++------------------- security/keys/keyring.c | 86 ++++++----- security/keys/proc.c | 2 +- security/keys/process_keys.c | 164 +++++++++++++-------- security/keys/request_key.c | 36 ++++- security/keys/request_key_auth.c | 2 +- 11 files changed, 528 insertions(+), 350 deletions(-) (limited to 'include/linux') diff --git a/Documentation/keys.txt b/Documentation/keys.txt index 0321ded4b9ae..b22e7c8d059a 100644 --- a/Documentation/keys.txt +++ b/Documentation/keys.txt @@ -195,8 +195,8 @@ KEY ACCESS PERMISSIONS ====================== Keys have an owner user ID, a group access ID, and a permissions mask. The mask -has up to eight bits each for user, group and other access. Only five of each -set of eight bits are defined. These permissions granted are: +has up to eight bits each for possessor, user, group and other access. Only +five of each set of eight bits are defined. These permissions granted are: (*) View @@ -241,16 +241,16 @@ about the status of the key service: type, description and permissions. The payload of the key is not available this way: - SERIAL FLAGS USAGE EXPY PERM UID GID TYPE DESCRIPTION: SUMMARY - 00000001 I----- 39 perm 1f0000 0 0 keyring _uid_ses.0: 1/4 - 00000002 I----- 2 perm 1f0000 0 0 keyring _uid.0: empty - 00000007 I----- 1 perm 1f0000 0 0 keyring _pid.1: empty - 0000018d I----- 1 perm 1f0000 0 0 keyring _pid.412: empty - 000004d2 I--Q-- 1 perm 1f0000 32 -1 keyring _uid.32: 1/4 - 000004d3 I--Q-- 3 perm 1f0000 32 -1 keyring _uid_ses.32: empty - 00000892 I--QU- 1 perm 1f0000 0 0 user metal:copper: 0 - 00000893 I--Q-N 1 35s 1f0000 0 0 user metal:silver: 0 - 00000894 I--Q-- 1 10h 1f0000 0 0 user metal:gold: 0 + SERIAL FLAGS USAGE EXPY PERM UID GID TYPE DESCRIPTION: SUMMARY + 00000001 I----- 39 perm 1f1f0000 0 0 keyring _uid_ses.0: 1/4 + 00000002 I----- 2 perm 1f1f0000 0 0 keyring _uid.0: empty + 00000007 I----- 1 perm 1f1f0000 0 0 keyring _pid.1: empty + 0000018d I----- 1 perm 1f1f0000 0 0 keyring _pid.412: empty + 000004d2 I--Q-- 1 perm 1f1f0000 32 -1 keyring _uid.32: 1/4 + 000004d3 I--Q-- 3 perm 1f1f0000 32 -1 keyring _uid_ses.32: empty + 00000892 I--QU- 1 perm 1f000000 0 0 user metal:copper: 0 + 00000893 I--Q-N 1 35s 1f1f0000 0 0 user metal:silver: 0 + 00000894 I--Q-- 1 10h 001f0000 0 0 user metal:gold: 0 The flags are: @@ -637,6 +637,34 @@ call, and the key released upon close. How to deal with conflicting keys due to two different users opening the same file is left to the filesystem author to solve. +Note that there are two different types of pointers to keys that may be +encountered: + + (*) struct key * + + This simply points to the key structure itself. Key structures will be at + least four-byte aligned. + + (*) key_ref_t + + This is equivalent to a struct key *, but the least significant bit is set + if the caller "possesses" the key. By "possession" it is meant that the + calling processes has a searchable link to the key from one of its + keyrings. There are three functions for dealing with these: + + key_ref_t make_key_ref(const struct key *key, + unsigned long possession); + + struct key *key_ref_to_ptr(const key_ref_t key_ref); + + unsigned long is_key_possessed(const key_ref_t key_ref); + + The first function constructs a key reference from a key pointer and + possession information (which must be 0 or 1 and not any other value). + + The second function retrieves the key pointer from a reference and the + third retrieves the possession flag. + When accessing a key's payload contents, certain precautions must be taken to prevent access vs modification races. See the section "Notes on accessing payload contents" for more information. @@ -665,7 +693,11 @@ payload contents" for more information. void key_put(struct key *key); - This can be called from interrupt context. If CONFIG_KEYS is not set then + Or: + + void key_ref_put(key_ref_t key_ref); + + These can be called from interrupt context. If CONFIG_KEYS is not set then the argument will not be parsed. @@ -689,13 +721,17 @@ payload contents" for more information. (*) If a keyring was found in the search, this can be further searched by: - struct key *keyring_search(struct key *keyring, - const struct key_type *type, - const char *description) + key_ref_t keyring_search(key_ref_t keyring_ref, + const struct key_type *type, + const char *description) This searches the keyring tree specified for a matching key. Error ENOKEY - is returned upon failure. If successful, the returned key will need to be - released. + is returned upon failure (use IS_ERR/PTR_ERR to determine). If successful, + the returned key will need to be released. + + The possession attribute from the keyring reference is used to control + access through the permissions mask and is propagated to the returned key + reference pointer if successful. (*) To check the validity of a key, this function can be called: @@ -732,7 +768,7 @@ More complex payload contents must be allocated and a pointer to them set in key->payload.data. One of the following ways must be selected to access the data: - (1) Unmodifyable key type. + (1) Unmodifiable key type. If the key type does not have a modify method, then the key's payload can be accessed without any form of locking, provided that it's known to be diff --git a/include/linux/key-ui.h b/include/linux/key-ui.h index cc326174a808..918c34a8347e 100644 --- a/include/linux/key-ui.h +++ b/include/linux/key-ui.h @@ -42,11 +42,14 @@ struct keyring_list { /* * check to see whether permission is granted to use a key in the desired way */ -static inline int key_permission(const struct key *key, key_perm_t perm) +static inline int key_permission(const key_ref_t key_ref, key_perm_t perm) { + struct key *key = key_ref_to_ptr(key_ref); key_perm_t kperm; - if (key->uid == current->fsuid) + if (is_key_possessed(key_ref)) + kperm = key->perm >> 24; + else if (key->uid == current->fsuid) kperm = key->perm >> 16; else if (key->gid != -1 && key->perm & KEY_GRP_ALL && @@ -65,11 +68,14 @@ static inline int key_permission(const struct key *key, key_perm_t perm) * check to see whether permission is granted to use a key in at least one of * the desired ways */ -static inline int key_any_permission(const struct key *key, key_perm_t perm) +static inline int key_any_permission(const key_ref_t key_ref, key_perm_t perm) { + struct key *key = key_ref_to_ptr(key_ref); key_perm_t kperm; - if (key->uid == current->fsuid) + if (is_key_possessed(key_ref)) + kperm = key->perm >> 24; + else if (key->uid == current->fsuid) kperm = key->perm >> 16; else if (key->gid != -1 && key->perm & KEY_GRP_ALL && @@ -94,13 +100,17 @@ static inline int key_task_groups_search(struct task_struct *tsk, gid_t gid) return ret; } -static inline int key_task_permission(const struct key *key, +static inline int key_task_permission(const key_ref_t key_ref, struct task_struct *context, key_perm_t perm) { + struct key *key = key_ref_to_ptr(key_ref); key_perm_t kperm; - if (key->uid == context->fsuid) { + if (is_key_possessed(key_ref)) { + kperm = key->perm >> 24; + } + else if (key->uid == context->fsuid) { kperm = key->perm >> 16; } else if (key->gid != -1 && @@ -121,9 +131,9 @@ static inline int key_task_permission(const struct key *key, } -extern struct key *lookup_user_key(struct task_struct *context, - key_serial_t id, int create, int partial, - key_perm_t perm); +extern key_ref_t lookup_user_key(struct task_struct *context, + key_serial_t id, int create, int partial, + key_perm_t perm); extern long join_session_keyring(const char *name); diff --git a/include/linux/key.h b/include/linux/key.h index 970bbd916cf4..f1efa016dbf3 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -35,11 +35,18 @@ struct key; #undef KEY_DEBUGGING -#define KEY_USR_VIEW 0x00010000 /* user can view a key's attributes */ -#define KEY_USR_READ 0x00020000 /* user can read key payload / view keyring */ -#define KEY_USR_WRITE 0x00040000 /* user can update key payload / add link to keyring */ -#define KEY_USR_SEARCH 0x00080000 /* user can find a key in search / search a keyring */ -#define KEY_USR_LINK 0x00100000 /* user can create a link to a key/keyring */ +#define KEY_POS_VIEW 0x01000000 /* possessor can view a key's attributes */ +#define KEY_POS_READ 0x02000000 /* possessor can read key payload / view keyring */ +#define KEY_POS_WRITE 0x04000000 /* possessor can update key payload / add link to keyring */ +#define KEY_POS_SEARCH 0x08000000 /* possessor can find a key in search / search a keyring */ +#define KEY_POS_LINK 0x10000000 /* possessor can create a link to a key/keyring */ +#define KEY_POS_ALL 0x1f000000 + +#define KEY_USR_VIEW 0x00010000 /* user permissions... */ +#define KEY_USR_READ 0x00020000 +#define KEY_USR_WRITE 0x00040000 +#define KEY_USR_SEARCH 0x00080000 +#define KEY_USR_LINK 0x00100000 #define KEY_USR_ALL 0x001f0000 #define KEY_GRP_VIEW 0x00000100 /* group permissions... */ @@ -65,6 +72,38 @@ struct key_owner; struct keyring_list; struct keyring_name; +/*****************************************************************************/ +/* + * key reference with possession attribute handling + * + * NOTE! key_ref_t is a typedef'd pointer to a type that is not actually + * defined. This is because we abuse the bottom bit of the reference to carry a + * flag to indicate whether the calling process possesses that key in one of + * its keyrings. + * + * the key_ref_t has been made a separate type so that the compiler can reject + * attempts to dereference it without proper conversion. + * + * the three functions are used to assemble and disassemble references + */ +typedef struct __key_reference_with_attributes *key_ref_t; + +static inline key_ref_t make_key_ref(const struct key *key, + unsigned long possession) +{ + return (key_ref_t) ((unsigned long) key | possession); +} + +static inline struct key *key_ref_to_ptr(const key_ref_t key_ref) +{ + return (struct key *) ((unsigned long) key_ref & ~1UL); +} + +static inline unsigned long is_key_possessed(const key_ref_t key_ref) +{ + return (unsigned long) key_ref & 1UL; +} + /*****************************************************************************/ /* * authentication token / access credential / keyring @@ -215,20 +254,25 @@ static inline struct key *key_get(struct key *key) return key; } +static inline void key_ref_put(key_ref_t key_ref) +{ + key_put(key_ref_to_ptr(key_ref)); +} + extern struct key *request_key(struct key_type *type, const char *description, const char *callout_info); extern int key_validate(struct key *key); -extern struct key *key_create_or_update(struct key *keyring, - const char *type, - const char *description, - const void *payload, - size_t plen, - int not_in_quota); +extern key_ref_t key_create_or_update(key_ref_t keyring, + const char *type, + const char *description, + const void *payload, + size_t plen, + int not_in_quota); -extern int key_update(struct key *key, +extern int key_update(key_ref_t key, const void *payload, size_t plen); @@ -243,9 +287,9 @@ extern struct key *keyring_alloc(const char *description, uid_t uid, gid_t gid, extern int keyring_clear(struct key *keyring); -extern struct key *keyring_search(struct key *keyring, - struct key_type *type, - const char *description); +extern key_ref_t keyring_search(key_ref_t keyring, + struct key_type *type, + const char *description); extern int keyring_add_key(struct key *keyring, struct key *key); @@ -285,6 +329,10 @@ extern void key_init(void); #define key_serial(k) 0 #define key_get(k) ({ NULL; }) #define key_put(k) do { } while(0) +#define key_ref_put(k) do { } while(0) +#define make_key_ref(k) ({ NULL; }) +#define key_ref_to_ptr(k) ({ NULL; }) +#define is_key_possessed(k) 0 #define alloc_uid_keyring(u) 0 #define switch_uid_keyring(u) do { } while(0) #define __install_session_keyring(t, k) ({ NULL; }) diff --git a/security/keys/internal.h b/security/keys/internal.h index 46c8602661c9..db99ed434f3a 100644 --- a/security/keys/internal.h +++ b/security/keys/internal.h @@ -71,26 +71,26 @@ extern void keyring_publish_name(struct key *keyring); extern int __key_link(struct key *keyring, struct key *key); -extern struct key *__keyring_search_one(struct key *keyring, - const struct key_type *type, - const char *description, - key_perm_t perm); +extern key_ref_t __keyring_search_one(key_ref_t keyring_ref, + const struct key_type *type, + const char *description, + key_perm_t perm); extern struct key *keyring_search_instkey(struct key *keyring, key_serial_t target_id); typedef int (*key_match_func_t)(const struct key *, const void *); -extern struct key *keyring_search_aux(struct key *keyring, - struct task_struct *tsk, - struct key_type *type, - const void *description, - key_match_func_t match); +extern key_ref_t keyring_search_aux(key_ref_t keyring_ref, + struct task_struct *tsk, + struct key_type *type, + const void *description, + key_match_func_t match); -extern struct key *search_process_keyrings(struct key_type *type, - const void *description, - key_match_func_t match, - struct task_struct *tsk); +extern key_ref_t search_process_keyrings(struct key_type *type, + const void *description, + key_match_func_t match, + struct task_struct *tsk); extern struct key *find_keyring_by_name(const char *name, key_serial_t bound); diff --git a/security/keys/key.c b/security/keys/key.c index fb89f9844465..2182be9e9309 100644 --- a/security/keys/key.c +++ b/security/keys/key.c @@ -693,14 +693,15 @@ void key_type_put(struct key_type *ktype) * - the key has an incremented refcount * - we need to put the key if we get an error */ -static inline struct key *__key_update(struct key *key, const void *payload, - size_t plen) +static inline key_ref_t __key_update(key_ref_t key_ref, + const void *payload, size_t plen) { + struct key *key = key_ref_to_ptr(key_ref); int ret; /* need write permission on the key to update it */ ret = -EACCES; - if (!key_permission(key, KEY_WRITE)) + if (!key_permission(key_ref, KEY_WRITE)) goto error; ret = -EEXIST; @@ -719,12 +720,12 @@ static inline struct key *__key_update(struct key *key, const void *payload, if (ret < 0) goto error; - out: - return key; +out: + return key_ref; - error: +error: key_put(key); - key = ERR_PTR(ret); + key_ref = ERR_PTR(ret); goto out; } /* end __key_update() */ @@ -734,52 +735,56 @@ static inline struct key *__key_update(struct key *key, const void *payload, * search the specified keyring for a key of the same description; if one is * found, update it, otherwise add a new one */ -struct key *key_create_or_update(struct key *keyring, - const char *type, - const char *description, - const void *payload, - size_t plen, - int not_in_quota) +key_ref_t key_create_or_update(key_ref_t keyring_ref, + const char *type, + const char *description, + const void *payload, + size_t plen, + int not_in_quota) { struct key_type *ktype; - struct key *key = NULL; + struct key *keyring, *key = NULL; key_perm_t perm; + key_ref_t key_ref; int ret; - key_check(keyring); - /* look up the key type to see if it's one of the registered kernel * types */ ktype = key_type_lookup(type); if (IS_ERR(ktype)) { - key = ERR_PTR(-ENODEV); + key_ref = ERR_PTR(-ENODEV); goto error; } - ret = -EINVAL; + key_ref = ERR_PTR(-EINVAL); if (!ktype->match || !ktype->instantiate) goto error_2; + keyring = key_ref_to_ptr(keyring_ref); + + key_check(keyring); + + down_write(&keyring->sem); + + /* if we're going to allocate a new key, we're going to have + * to modify the keyring */ + key_ref = ERR_PTR(-EACCES); + if (!key_permission(keyring_ref, KEY_WRITE)) + goto error_3; + /* search for an existing key of the same type and description in the * destination keyring */ - down_write(&keyring->sem); - - key = __keyring_search_one(keyring, ktype, description, 0); - if (!IS_ERR(key)) + key_ref = __keyring_search_one(keyring_ref, ktype, description, 0); + if (!IS_ERR(key_ref)) goto found_matching_key; - /* if we're going to allocate a new key, we're going to have to modify - * the keyring */ - ret = -EACCES; - if (!key_permission(keyring, KEY_WRITE)) - goto error_3; - /* decide on the permissions we want */ - perm = KEY_USR_VIEW | KEY_USR_SEARCH | KEY_USR_LINK; + perm = KEY_POS_VIEW | KEY_POS_SEARCH | KEY_POS_LINK; + perm |= KEY_USR_VIEW | KEY_USR_SEARCH | KEY_USR_LINK; if (ktype->read) - perm |= KEY_USR_READ; + perm |= KEY_POS_READ | KEY_USR_READ; if (ktype == &key_type_keyring || ktype->update) perm |= KEY_USR_WRITE; @@ -788,7 +793,7 @@ struct key *key_create_or_update(struct key *keyring, key = key_alloc(ktype, description, current->fsuid, current->fsgid, perm, not_in_quota); if (IS_ERR(key)) { - ret = PTR_ERR(key); + key_ref = ERR_PTR(PTR_ERR(key)); goto error_3; } @@ -796,15 +801,18 @@ struct key *key_create_or_update(struct key *keyring, ret = __key_instantiate_and_link(key, payload, plen, keyring, NULL); if (ret < 0) { key_put(key); - key = ERR_PTR(ret); + key_ref = ERR_PTR(ret); + goto error_3; } + key_ref = make_key_ref(key, is_key_possessed(keyring_ref)); + error_3: up_write(&keyring->sem); error_2: key_type_put(ktype); error: - return key; + return key_ref; found_matching_key: /* we found a matching key, so we're going to try to update it @@ -813,7 +821,7 @@ struct key *key_create_or_update(struct key *keyring, up_write(&keyring->sem); key_type_put(ktype); - key = __key_update(key, payload, plen); + key_ref = __key_update(key_ref, payload, plen); goto error; } /* end key_create_or_update() */ @@ -824,15 +832,16 @@ EXPORT_SYMBOL(key_create_or_update); /* * update a key */ -int key_update(struct key *key, const void *payload, size_t plen) +int key_update(key_ref_t key_ref, const void *payload, size_t plen) { + struct key *key = key_ref_to_ptr(key_ref); int ret; key_check(key); /* the key must be writable */ ret = -EACCES; - if (!key_permission(key, KEY_WRITE)) + if (!key_permission(key_ref, KEY_WRITE)) goto error; /* attempt to update it if supported */ diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index a6516a64b297..4c670ee6acf9 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -34,7 +34,7 @@ asmlinkage long sys_add_key(const char __user *_type, size_t plen, key_serial_t ringid) { - struct key *keyring, *key; + key_ref_t keyring_ref, key_ref; char type[32], *description; void *payload; long dlen, ret; @@ -86,25 +86,25 @@ asmlinkage long sys_add_key(const char __user *_type, } /* find the target keyring (which must be writable) */ - keyring = lookup_user_key(NULL, ringid, 1, 0, KEY_WRITE); - if (IS_ERR(keyring)) { - ret = PTR_ERR(keyring); + keyring_ref = lookup_user_key(NULL, ringid, 1, 0, KEY_WRITE); + if (IS_ERR(keyring_ref)) { + ret = PTR_ERR(keyring_ref); goto error3; } /* create or update the requested key and add it to the target * keyring */ - key = key_create_or_update(keyring, type, description, - payload, plen, 0); - if (!IS_ERR(key)) { - ret = key->serial; - key_put(key); + key_ref = key_create_or_update(keyring_ref, type, description, + payload, plen, 0); + if (!IS_ERR(key_ref)) { + ret = key_ref_to_ptr(key_ref)->serial; + key_ref_put(key_ref); } else { - ret = PTR_ERR(key); + ret = PTR_ERR(key_ref); } - key_put(keyring); + key_ref_put(keyring_ref); error3: kfree(payload); error2: @@ -131,7 +131,8 @@ asmlinkage long sys_request_key(const char __user *_type, key_serial_t destringid) { struct key_type *ktype; - struct key *key, *dest; + struct key *key; + key_ref_t dest_ref; char type[32], *description, *callout_info; long dlen, ret; @@ -187,11 +188,11 @@ asmlinkage long sys_request_key(const char __user *_type, } /* get the destination keyring if specified */ - dest = NULL; + dest_ref = NULL; if (destringid) { - dest = lookup_user_key(NULL, destringid, 1, 0, KEY_WRITE); - if (IS_ERR(dest)) { - ret = PTR_ERR(dest); + dest_ref = lookup_user_key(NULL, destringid, 1, 0, KEY_WRITE); + if (IS_ERR(dest_ref)) { + ret = PTR_ERR(dest_ref); goto error3; } } @@ -204,7 +205,8 @@ asmlinkage long sys_request_key(const char __user *_type, } /* do the search */ - key = request_key_and_link(ktype, description, callout_info, dest); + key = request_key_and_link(ktype, description, callout_info, + key_ref_to_ptr(dest_ref)); if (IS_ERR(key)) { ret = PTR_ERR(key); goto error5; @@ -216,7 +218,7 @@ asmlinkage long sys_request_key(const char __user *_type, error5: key_type_put(ktype); error4: - key_put(dest); + key_ref_put(dest_ref); error3: kfree(callout_info); error2: @@ -234,17 +236,17 @@ asmlinkage long sys_request_key(const char __user *_type, */ long keyctl_get_keyring_ID(key_serial_t id, int create) { - struct key *key; + key_ref_t key_ref; long ret; - key = lookup_user_key(NULL, id, create, 0, KEY_SEARCH); - if (IS_ERR(key)) { - ret = PTR_ERR(key); + key_ref = lookup_user_key(NULL, id, create, 0, KEY_SEARCH); + if (IS_ERR(key_ref)) { + ret = PTR_ERR(key_ref); goto error; } - ret = key->serial; - key_put(key); + ret = key_ref_to_ptr(key_ref)->serial; + key_ref_put(key_ref); error: return ret; @@ -302,7 +304,7 @@ long keyctl_update_key(key_serial_t id, const void __user *_payload, size_t plen) { - struct key *key; + key_ref_t key_ref; void *payload; long ret; @@ -324,16 +326,16 @@ long keyctl_update_key(key_serial_t id, } /* find the target key (which must be writable) */ - key = lookup_user_key(NULL, id, 0, 0, KEY_WRITE); - if (IS_ERR(key)) { - ret = PTR_ERR(key); + key_ref = lookup_user_key(NULL, id, 0, 0, KEY_WRITE); + if (IS_ERR(key_ref)) { + ret = PTR_ERR(key_ref); goto error2; } /* update the key */ - ret = key_update(key, payload, plen); + ret = key_update(key_ref, payload, plen); - key_put(key); + key_ref_put(key_ref); error2: kfree(payload); error: @@ -349,19 +351,19 @@ long keyctl_update_key(key_serial_t id, */ long keyctl_revoke_key(key_serial_t id) { - struct key *key; + key_ref_t key_ref; long ret; - key = lookup_user_key(NULL, id, 0, 0, KEY_WRITE); - if (IS_ERR(key)) { - ret = PTR_ERR(key); + key_ref = lookup_user_key(NULL, id, 0, 0, KEY_WRITE); + if (IS_ERR(key_ref)) { + ret = PTR_ERR(key_ref); goto error; } - key_revoke(key); + key_revoke(key_ref_to_ptr(key_ref)); ret = 0; - key_put(key); + key_ref_put(key_ref); error: return ret; @@ -375,18 +377,18 @@ long keyctl_revoke_key(key_serial_t id) */ long keyctl_keyring_clear(key_serial_t ringid) { - struct key *keyring; + key_ref_t keyring_ref; long ret; - keyring = lookup_user_key(NULL, ringid, 1, 0, KEY_WRITE); - if (IS_ERR(keyring)) { - ret = PTR_ERR(keyring); + keyring_ref = lookup_user_key(NULL, ringid, 1, 0, KEY_WRITE); + if (IS_ERR(keyring_ref)) { + ret = PTR_ERR(keyring_ref); goto error; } - ret = keyring_clear(keyring); + ret = keyring_clear(key_ref_to_ptr(keyring_ref)); - key_put(keyring); + key_ref_put(keyring_ref); error: return ret; @@ -401,26 +403,26 @@ long keyctl_keyring_clear(key_serial_t ringid) */ long keyctl_keyring_link(key_serial_t id, key_serial_t ringid) { - struct key *keyring, *key; + key_ref_t keyring_ref, key_ref; long ret; - keyring = lookup_user_key(NULL, ringid, 1, 0, KEY_WRITE); - if (IS_ERR(keyring)) { - ret = PTR_ERR(keyring); + keyring_ref = lookup_user_key(NULL, ringid, 1, 0, KEY_WRITE); + if (IS_ERR(keyring_ref)) { + ret = PTR_ERR(keyring_ref); goto error; } - key = lookup_user_key(NULL, id, 1, 0, KEY_LINK); - if (IS_ERR(key)) { - ret = PTR_ERR(key); + key_ref = lookup_user_key(NULL, id, 1, 0, KEY_LINK); + if (IS_ERR(key_ref)) { + ret = PTR_ERR(key_ref); goto error2; } - ret = key_link(keyring, key); + ret = key_link(key_ref_to_ptr(keyring_ref), key_ref_to_ptr(key_ref)); - key_put(key); + key_ref_put(key_ref); error2: - key_put(keyring); + key_ref_put(keyring_ref); error: return ret; @@ -435,26 +437,26 @@ long keyctl_keyring_link(key_serial_t id, key_serial_t ringid) */ long keyctl_keyring_unlink(key_serial_t id, key_serial_t ringid) { - struct key *keyring, *key; + key_ref_t keyring_ref, key_ref; long ret; - keyring = lookup_user_key(NULL, ringid, 0, 0, KEY_WRITE); - if (IS_ERR(keyring)) { - ret = PTR_ERR(keyring); + keyring_ref = lookup_user_key(NULL, ringid, 0, 0, KEY_WRITE); + if (IS_ERR(keyring_ref)) { + ret = PTR_ERR(keyring_ref); goto error; } - key = lookup_user_key(NULL, id, 0, 0, 0); - if (IS_ERR(key)) { - ret = PTR_ERR(key); + key_ref = lookup_user_key(NULL, id, 0, 0, 0); + if (IS_ERR(key_ref)) { + ret = PTR_ERR(key_ref); goto error2; } - ret = key_unlink(keyring, key); + ret = key_unlink(key_ref_to_ptr(keyring_ref), key_ref_to_ptr(key_ref)); - key_put(key); + key_ref_put(key_ref); error2: - key_put(keyring); + key_ref_put(keyring_ref); error: return ret; @@ -476,24 +478,26 @@ long keyctl_describe_key(key_serial_t keyid, size_t buflen) { struct key *key, *instkey; + key_ref_t key_ref; char *tmpbuf; long ret; - key = lookup_user_key(NULL, keyid, 0, 1, KEY_VIEW); - if (IS_ERR(key)) { + key_ref = lookup_user_key(NULL, keyid, 0, 1, KEY_VIEW); + if (IS_ERR(key_ref)) { /* viewing a key under construction is permitted if we have the * authorisation token handy */ - if (PTR_ERR(key) == -EACCES) { + if (PTR_ERR(key_ref) == -EACCES) { instkey = key_get_instantiation_authkey(keyid); if (!IS_ERR(instkey)) { key_put(instkey); - key = lookup_user_key(NULL, keyid, 0, 1, 0); - if (!IS_ERR(key)) + key_ref = lookup_user_key(NULL, keyid, + 0, 1, 0); + if (!IS_ERR(key_ref)) goto okay; } } - ret = PTR_ERR(key); + ret = PTR_ERR(key_ref); goto error; } @@ -504,13 +508,16 @@ okay: if (!tmpbuf) goto error2; + key = key_ref_to_ptr(key_ref); + ret = snprintf(tmpbuf, PAGE_SIZE - 1, - "%s;%d;%d;%06x;%s", - key->type->name, - key->uid, - key->gid, - key->perm, - key->description ? key->description :"" + "%s;%d;%d;%08x;%s", + key_ref_to_ptr(key_ref)->type->name, + key_ref_to_ptr(key_ref)->uid, + key_ref_to_ptr(key_ref)->gid, + key_ref_to_ptr(key_ref)->perm, + key_ref_to_ptr(key_ref)->description ? + key_ref_to_ptr(key_ref)->description : "" ); /* include a NUL char at the end of the data */ @@ -530,7 +537,7 @@ okay: kfree(tmpbuf); error2: - key_put(key); + key_ref_put(key_ref); error: return ret; @@ -552,7 +559,7 @@ long keyctl_keyring_search(key_serial_t ringid, key_serial_t destringid) { struct key_type *ktype; - struct key *keyring, *key, *dest; + key_ref_t keyring_ref, key_ref, dest_ref; char type[32], *description; long dlen, ret; @@ -581,18 +588,18 @@ long keyctl_keyring_search(key_serial_t ringid, goto error2; /* get the keyring at which to begin the search */ - keyring = lookup_user_key(NULL, ringid, 0, 0, KEY_SEARCH); - if (IS_ERR(keyring)) { - ret = PTR_ERR(keyring); + keyring_ref = lookup_user_key(NULL, ringid, 0, 0, KEY_SEARCH); + if (IS_ERR(keyring_ref)) { + ret = PTR_ERR(keyring_ref); goto error2; } /* get the destination keyring if specified */ - dest = NULL; + dest_ref = NULL; if (destringid) { - dest = lookup_user_key(NULL, destringid, 1, 0, KEY_WRITE); - if (IS_ERR(dest)) { - ret = PTR_ERR(dest); + dest_ref = lookup_user_key(NULL, destringid, 1, 0, KEY_WRITE); + if (IS_ERR(dest_ref)) { + ret = PTR_ERR(dest_ref); goto error3; } } @@ -605,9 +612,9 @@ long keyctl_keyring_search(key_serial_t ringid, } /* do the search */ - key = keyring_search(keyring, ktype, description); - if (IS_ERR(key)) { - ret = PTR_ERR(key); + key_ref = keyring_search(keyring_ref, ktype, description); + if (IS_ERR(key_ref)) { + ret = PTR_ERR(key_ref); /* treat lack or presence of a negative key the same */ if (ret == -EAGAIN) @@ -616,26 +623,26 @@ long keyctl_keyring_search(key_serial_t ringid, } /* link the resulting key to the destination keyring if we can */ - if (dest) { + if (dest_ref) { ret = -EACCES; - if (!key_permission(key, KEY_LINK)) + if (!key_permission(key_ref, KEY_LINK)) goto error6; - ret = key_link(dest, key); + ret = key_link(key_ref_to_ptr(dest_ref), key_ref_to_ptr(key_ref)); if (ret < 0) goto error6; } - ret = key->serial; + ret = key_ref_to_ptr(key_ref)->serial; error6: - key_put(key); + key_ref_put(key_ref); error5: key_type_put(ktype); error4: - key_put(dest); + key_ref_put(dest_ref); error3: - key_put(keyring); + key_ref_put(keyring_ref); error2: kfree(description); error: @@ -643,16 +650,6 @@ long keyctl_keyring_search(key_serial_t ringid, } /* end keyctl_keyring_search() */ -/*****************************************************************************/ -/* - * see if the key we're looking at is the target key - */ -static int keyctl_read_key_same(const struct key *key, const void *target) -{ - return key == target; - -} /* end keyctl_read_key_same() */ - /*****************************************************************************/ /* * read a user key's payload @@ -665,38 +662,33 @@ static int keyctl_read_key_same(const struct key *key, const void *target) */ long keyctl_read_key(key_serial_t keyid, char __user *buffer, size_t buflen) { - struct key *key, *skey; + struct key *key; + key_ref_t key_ref; long ret; /* find the key first */ - key = lookup_user_key(NULL, keyid, 0, 0, 0); - if (!IS_ERR(key)) { - /* see if we can read it directly */ - if (key_permission(key, KEY_READ)) - goto can_read_key; - - /* we can't; see if it's searchable from this process's - * keyrings - * - we automatically take account of the fact that it may be - * dangling off an instantiation key - */ - skey = search_process_keyrings(key->type, key, - keyctl_read_key_same, current); - if (!IS_ERR(skey)) - goto can_read_key2; - - ret = PTR_ERR(skey); - if (ret == -EAGAIN) - ret = -EACCES; - goto error2; + key_ref = lookup_user_key(NULL, keyid, 0, 0, 0); + if (IS_ERR(key_ref)) { + ret = -ENOKEY; + goto error; } - ret = -ENOKEY; - goto error; + key = key_ref_to_ptr(key_ref); + + /* see if we can read it directly */ + if (key_permission(key_ref, KEY_READ)) + goto can_read_key; + + /* we can't; see if it's searchable from this process's keyrings + * - we automatically take account of the fact that it may be + * dangling off an instantiation key + */ + if (!is_key_possessed(key_ref)) { + ret = -EACCES; + goto error2; + } /* the key is probably readable - now try to read it */ - can_read_key2: - key_put(skey); can_read_key: ret = key_validate(key); if (ret == 0) { @@ -727,18 +719,21 @@ long keyctl_read_key(key_serial_t keyid, char __user *buffer, size_t buflen) long keyctl_chown_key(key_serial_t id, uid_t uid, gid_t gid) { struct key *key; + key_ref_t key_ref; long ret; ret = 0; if (uid == (uid_t) -1 && gid == (gid_t) -1) goto error; - key = lookup_user_key(NULL, id, 1, 1, 0); - if (IS_ERR(key)) { - ret = PTR_ERR(key); + key_ref = lookup_user_key(NULL, id, 1, 1, 0); + if (IS_ERR(key_ref)) { + ret = PTR_ERR(key_ref); goto error; } + key = key_ref_to_ptr(key_ref); + /* make the changes with the locks held to prevent chown/chown races */ ret = -EACCES; down_write(&key->sem); @@ -784,18 +779,21 @@ long keyctl_chown_key(key_serial_t id, uid_t uid, gid_t gid) long keyctl_setperm_key(key_serial_t id, key_perm_t perm) { struct key *key; + key_ref_t key_ref; long ret; ret = -EINVAL; - if (perm & ~(KEY_USR_ALL | KEY_GRP_ALL | KEY_OTH_ALL)) + if (perm & ~(KEY_POS_ALL | KEY_USR_ALL | KEY_GRP_ALL | KEY_OTH_ALL)) goto error; - key = lookup_user_key(NULL, id, 1, 1, 0); - if (IS_ERR(key)) { - ret = PTR_ERR(key); + key_ref = lookup_user_key(NULL, id, 1, 1, 0); + if (IS_ERR(key_ref)) { + ret = PTR_ERR(key_ref); goto error; } + key = key_ref_to_ptr(key_ref); + /* make the changes with the locks held to prevent chown/chmod races */ ret = -EACCES; down_write(&key->sem); @@ -824,7 +822,8 @@ long keyctl_instantiate_key(key_serial_t id, key_serial_t ringid) { struct request_key_auth *rka; - struct key *instkey, *keyring; + struct key *instkey; + key_ref_t keyring_ref; void *payload; long ret; @@ -857,21 +856,21 @@ long keyctl_instantiate_key(key_serial_t id, /* find the destination keyring amongst those belonging to the * requesting task */ - keyring = NULL; + keyring_ref = NULL; if (ringid) { - keyring = lookup_user_key(rka->context, ringid, 1, 0, - KEY_WRITE); - if (IS_ERR(keyring)) { - ret = PTR_ERR(keyring); + keyring_ref = lookup_user_key(rka->context, ringid, 1, 0, + KEY_WRITE); + if (IS_ERR(keyring_ref)) { + ret = PTR_ERR(keyring_ref); goto error3; } } /* instantiate the key and link it into a keyring */ ret = key_instantiate_and_link(rka->target_key, payload, plen, - keyring, instkey); + key_ref_to_ptr(keyring_ref), instkey); - key_put(keyring); + key_ref_put(keyring_ref); error3: key_put(instkey); error2: @@ -889,7 +888,8 @@ long keyctl_instantiate_key(key_serial_t id, long keyctl_negate_key(key_serial_t id, unsigned timeout, key_serial_t ringid) { struct request_key_auth *rka; - struct key *instkey, *keyring; + struct key *instkey; + key_ref_t keyring_ref; long ret; /* find the instantiation authorisation key */ @@ -903,19 +903,20 @@ long keyctl_negate_key(key_serial_t id, unsigned timeout, key_serial_t ringid) /* find the destination keyring if present (which must also be * writable) */ - keyring = NULL; + keyring_ref = NULL; if (ringid) { - keyring = lookup_user_key(NULL, ringid, 1, 0, KEY_WRITE); - if (IS_ERR(keyring)) { - ret = PTR_ERR(keyring); + keyring_ref = lookup_user_key(NULL, ringid, 1, 0, KEY_WRITE); + if (IS_ERR(keyring_ref)) { + ret = PTR_ERR(keyring_ref); goto error2; } } /* instantiate the key and link it into a keyring */ - ret = key_negate_and_link(rka->target_key, timeout, keyring, instkey); + ret = key_negate_and_link(rka->target_key, timeout, + key_ref_to_ptr(keyring_ref), instkey); - key_put(keyring); + key_ref_put(keyring_ref); error2: key_put(instkey); error: diff --git a/security/keys/keyring.c b/security/keys/keyring.c index 9c208c756df8..0639396dd441 100644 --- a/security/keys/keyring.c +++ b/security/keys/keyring.c @@ -309,7 +309,7 @@ struct key *keyring_alloc(const char *description, uid_t uid, gid_t gid, int ret; keyring = key_alloc(&key_type_keyring, description, - uid, gid, KEY_USR_ALL, not_in_quota); + uid, gid, KEY_POS_ALL | KEY_USR_ALL, not_in_quota); if (!IS_ERR(keyring)) { ret = key_instantiate_and_link(keyring, NULL, 0, dest, NULL); @@ -333,12 +333,13 @@ struct key *keyring_alloc(const char *description, uid_t uid, gid_t gid, * - we rely on RCU to prevent the keyring lists from disappearing on us * - we return -EAGAIN if we didn't find any matching key * - we return -ENOKEY if we only found negative matching keys + * - we propagate the possession attribute from the keyring ref to the key ref */ -struct key *keyring_search_aux(struct key *keyring, - struct task_struct *context, - struct key_type *type, - const void *description, - key_match_func_t match) +key_ref_t keyring_search_aux(key_ref_t keyring_ref, + struct task_struct *context, + struct key_type *type, + const void *description, + key_match_func_t match) { struct { struct keyring_list *keylist; @@ -347,29 +348,33 @@ struct key *keyring_search_aux(struct key *keyring, struct keyring_list *keylist; struct timespec now; - struct key *key; + unsigned long possessed; + struct key *keyring, *key; + key_ref_t key_ref; long err; int sp, kix; + keyring = key_ref_to_ptr(keyring_ref); + possessed = is_key_possessed(keyring_ref); key_check(keyring); - rcu_read_lock(); - /* top keyring must have search permission to begin the search */ - key = ERR_PTR(-EACCES); - if (!key_task_permission(keyring, context, KEY_SEARCH)) + key_ref = ERR_PTR(-EACCES); + if (!key_task_permission(keyring_ref, context, KEY_SEARCH)) goto error; - key = ERR_PTR(-ENOTDIR); + key_ref = ERR_PTR(-ENOTDIR); if (keyring->type != &key_type_keyring) goto error; + rcu_read_lock(); + now = current_kernel_time(); err = -EAGAIN; sp = 0; /* start processing a new keyring */ - descend: +descend: if (test_bit(KEY_FLAG_REVOKED, &keyring->flags)) goto not_this_keyring; @@ -397,7 +402,8 @@ struct key *keyring_search_aux(struct key *keyring, continue; /* key must have search permissions */ - if (!key_task_permission(key, context, KEY_SEARCH)) + if (!key_task_permission(make_key_ref(key, possessed), + context, KEY_SEARCH)) continue; /* we set a different error code if we find a negative key */ @@ -411,7 +417,7 @@ struct key *keyring_search_aux(struct key *keyring, /* search through the keyrings nested in this one */ kix = 0; - ascend: +ascend: for (; kix < keylist->nkeys; kix++) { key = keylist->keys[kix]; if (key->type != &key_type_keyring) @@ -423,7 +429,8 @@ struct key *keyring_search_aux(struct key *keyring, if (sp >= KEYRING_SEARCH_MAX_DEPTH) continue; - if (!key_task_permission(key, context, KEY_SEARCH)) + if (!key_task_permission(make_key_ref(key, possessed), + context, KEY_SEARCH)) continue; /* stack the current position */ @@ -438,7 +445,7 @@ struct key *keyring_search_aux(struct key *keyring, /* the keyring we're looking at was disqualified or didn't contain a * matching key */ - not_this_keyring: +not_this_keyring: if (sp > 0) { /* resume the processing of a keyring higher up in the tree */ sp--; @@ -447,16 +454,18 @@ struct key *keyring_search_aux(struct key *keyring, goto ascend; } - key = ERR_PTR(err); - goto error; + key_ref = ERR_PTR(err); + goto error_2; /* we found a viable match */ - found: +found: atomic_inc(&key->usage); key_check(key); - error: + key_ref = make_key_ref(key, possessed); +error_2: rcu_read_unlock(); - return key; +error: + return key_ref; } /* end keyring_search_aux() */ @@ -469,9 +478,9 @@ struct key *keyring_search_aux(struct key *keyring, * - we return -EAGAIN if we didn't find any matching key * - we return -ENOKEY if we only found negative matching keys */ -struct key *keyring_search(struct key *keyring, - struct key_type *type, - const char *description) +key_ref_t keyring_search(key_ref_t keyring, + struct key_type *type, + const char *description) { if (!type->match) return ERR_PTR(-ENOKEY); @@ -488,15 +497,19 @@ EXPORT_SYMBOL(keyring_search); * search the given keyring only (no recursion) * - keyring must be locked by caller */ -struct key *__keyring_search_one(struct key *keyring, - const struct key_type *ktype, - const char *description, - key_perm_t perm) +key_ref_t __keyring_search_one(key_ref_t keyring_ref, + const struct key_type *ktype, + const char *description, + key_perm_t perm) { struct keyring_list *klist; - struct key *key; + unsigned long possessed; + struct key *keyring, *key; int loop; + keyring = key_ref_to_ptr(keyring_ref); + possessed = is_key_possessed(keyring_ref); + rcu_read_lock(); klist = rcu_dereference(keyring->payload.subscriptions); @@ -507,21 +520,21 @@ struct key *__keyring_search_one(struct key *keyring, if (key->type == ktype && (!key->type->match || key->type->match(key, description)) && - key_permission(key, perm) && + key_permission(make_key_ref(key, possessed), + perm) && !test_bit(KEY_FLAG_REVOKED, &key->flags) ) goto found; } } - key = ERR_PTR(-ENOKEY); - goto error; + rcu_read_unlock(); + return ERR_PTR(-ENOKEY); found: atomic_inc(&key->usage); - error: rcu_read_unlock(); - return key; + return make_key_ref(key, possessed); } /* end __keyring_search_one() */ @@ -603,7 +616,8 @@ struct key *find_keyring_by_name(const char *name, key_serial_t bound) if (strcmp(keyring->description, name) != 0) continue; - if (!key_permission(keyring, KEY_SEARCH)) + if (!key_permission(make_key_ref(keyring, 0), + KEY_SEARCH)) continue; /* found a potential candidate, but we still need to diff --git a/security/keys/proc.c b/security/keys/proc.c index c55cf1fd0826..12b750e51fbf 100644 --- a/security/keys/proc.c +++ b/security/keys/proc.c @@ -167,7 +167,7 @@ static int proc_keys_show(struct seq_file *m, void *v) #define showflag(KEY, LETTER, FLAG) \ (test_bit(FLAG, &(KEY)->flags) ? LETTER : '-') - seq_printf(m, "%08x %c%c%c%c%c%c %5d %4s %06x %5d %5d %-9.9s ", + seq_printf(m, "%08x %c%c%c%c%c%c %5d %4s %08x %5d %5d %-9.9s ", key->serial, showflag(key, 'I', KEY_FLAG_INSTANTIATED), showflag(key, 'R', KEY_FLAG_REVOKED), diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index c089f78fb94e..d42d2158ce13 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -39,7 +39,7 @@ struct key root_user_keyring = { .type = &key_type_keyring, .user = &root_key_user, .sem = __RWSEM_INITIALIZER(root_user_keyring.sem), - .perm = KEY_USR_ALL, + .perm = KEY_POS_ALL | KEY_USR_ALL, .flags = 1 << KEY_FLAG_INSTANTIATED, .description = "_uid.0", #ifdef KEY_DEBUGGING @@ -54,7 +54,7 @@ struct key root_session_keyring = { .type = &key_type_keyring, .user = &root_key_user, .sem = __RWSEM_INITIALIZER(root_session_keyring.sem), - .perm = KEY_USR_ALL, + .perm = KEY_POS_ALL | KEY_USR_ALL, .flags = 1 << KEY_FLAG_INSTANTIATED, .description = "_uid_ses.0", #ifdef KEY_DEBUGGING @@ -98,7 +98,7 @@ int alloc_uid_keyring(struct user_struct *user) user->session_keyring = session_keyring; ret = 0; - error: +error: return ret; } /* end alloc_uid_keyring() */ @@ -156,7 +156,7 @@ int install_thread_keyring(struct task_struct *tsk) ret = 0; key_put(old); - error: +error: return ret; } /* end install_thread_keyring() */ @@ -193,7 +193,7 @@ int install_process_keyring(struct task_struct *tsk) } ret = 0; - error: +error: return ret; } /* end install_process_keyring() */ @@ -236,7 +236,7 @@ static int install_session_keyring(struct task_struct *tsk, /* we're using RCU on the pointer */ synchronize_rcu(); key_put(old); - error: +error: return ret; } /* end install_session_keyring() */ @@ -376,13 +376,13 @@ void key_fsgid_changed(struct task_struct *tsk) * - we return -EAGAIN if we didn't find any matching key * - we return -ENOKEY if we found only negative matching keys */ -struct key *search_process_keyrings(struct key_type *type, - const void *description, - key_match_func_t match, - struct task_struct *context) +key_ref_t search_process_keyrings(struct key_type *type, + const void *description, + key_match_func_t match, + struct task_struct *context) { struct request_key_auth *rka; - struct key *key, *ret, *err, *instkey; + key_ref_t key_ref, ret, err, instkey_ref; /* we want to return -EAGAIN or -ENOKEY if any of the keyrings were * searchable, but we failed to find a key or we found a negative key; @@ -391,46 +391,48 @@ struct key *search_process_keyrings(struct key_type *type, * * in terms of priority: success > -ENOKEY > -EAGAIN > other error */ - key = NULL; + key_ref = NULL; ret = NULL; err = ERR_PTR(-EAGAIN); /* search the thread keyring first */ if (context->thread_keyring) { - key = keyring_search_aux(context->thread_keyring, - context, type, description, match); - if (!IS_ERR(key)) + key_ref = keyring_search_aux( + make_key_ref(context->thread_keyring, 1), + context, type, description, match); + if (!IS_ERR(key_ref)) goto found; - switch (PTR_ERR(key)) { + switch (PTR_ERR(key_ref)) { case -EAGAIN: /* no key */ if (ret) break; case -ENOKEY: /* negative key */ - ret = key; + ret = key_ref; break; default: - err = key; + err = key_ref; break; } } /* search the process keyring second */ if (context->signal->process_keyring) { - key = keyring_search_aux(context->signal->process_keyring, - context, type, description, match); - if (!IS_ERR(key)) + key_ref = keyring_search_aux( + make_key_ref(context->signal->process_keyring, 1), + context, type, description, match); + if (!IS_ERR(key_ref)) goto found; - switch (PTR_ERR(key)) { + switch (PTR_ERR(key_ref)) { case -EAGAIN: /* no key */ if (ret) break; case -ENOKEY: /* negative key */ - ret = key; + ret = key_ref; break; default: - err = key; + err = key_ref; break; } } @@ -438,23 +440,25 @@ struct key *search_process_keyrings(struct key_type *type, /* search the session keyring */ if (context->signal->session_keyring) { rcu_read_lock(); - key = keyring_search_aux( - rcu_dereference(context->signal->session_keyring), + key_ref = keyring_search_aux( + make_key_ref(rcu_dereference( + context->signal->session_keyring), + 1), context, type, description, match); rcu_read_unlock(); - if (!IS_ERR(key)) + if (!IS_ERR(key_ref)) goto found; - switch (PTR_ERR(key)) { + switch (PTR_ERR(key_ref)) { case -EAGAIN: /* no key */ if (ret) break; case -ENOKEY: /* negative key */ - ret = key; + ret = key_ref; break; default: - err = key; + err = key_ref; break; } @@ -465,51 +469,54 @@ struct key *search_process_keyrings(struct key_type *type, goto no_key; rcu_read_lock(); - instkey = __keyring_search_one( - rcu_dereference(context->signal->session_keyring), + instkey_ref = __keyring_search_one( + make_key_ref(rcu_dereference( + context->signal->session_keyring), + 1), &key_type_request_key_auth, NULL, 0); rcu_read_unlock(); - if (IS_ERR(instkey)) + if (IS_ERR(instkey_ref)) goto no_key; - rka = instkey->payload.data; + rka = key_ref_to_ptr(instkey_ref)->payload.data; - key = search_process_keyrings(type, description, match, - rka->context); - key_put(instkey); + key_ref = search_process_keyrings(type, description, match, + rka->context); + key_ref_put(instkey_ref); - if (!IS_ERR(key)) + if (!IS_ERR(key_ref)) goto found; - switch (PTR_ERR(key)) { + switch (PTR_ERR(key_ref)) { case -EAGAIN: /* no key */ if (ret) break; case -ENOKEY: /* negative key */ - ret = key; + ret = key_ref; break; default: - err = key; + err = key_ref; break; } } /* or search the user-session keyring */ else { - key = keyring_search_aux(context->user->session_keyring, - context, type, description, match); - if (!IS_ERR(key)) + key_ref = keyring_search_aux( + make_key_ref(context->user->session_keyring, 1), + context, type, description, match); + if (!IS_ERR(key_ref)) goto found; - switch (PTR_ERR(key)) { + switch (PTR_ERR(key_ref)) { case -EAGAIN: /* no key */ if (ret) break; case -ENOKEY: /* negative key */ - ret = key; + ret = key_ref; break; default: - err = key; + err = key_ref; break; } } @@ -517,29 +524,40 @@ struct key *search_process_keyrings(struct key_type *type, no_key: /* no key - decide on the error we're going to go for */ - key = ret ? ret : err; + key_ref = ret ? ret : err; found: - return key; + return key_ref; } /* end search_process_keyrings() */ +/*****************************************************************************/ +/* + * see if the key we're looking at is the target key + */ +static int lookup_user_key_possessed(const struct key *key, const void *target) +{ + return key == target; + +} /* end lookup_user_key_possessed() */ + /*****************************************************************************/ /* * lookup a key given a key ID from userspace with a given permissions mask * - don't create special keyrings unless so requested * - partially constructed keys aren't found unless requested */ -struct key *lookup_user_key(struct task_struct *context, key_serial_t id, - int create, int partial, key_perm_t perm) +key_ref_t lookup_user_key(struct task_struct *context, key_serial_t id, + int create, int partial, key_perm_t perm) { + key_ref_t key_ref, skey_ref; struct key *key; int ret; if (!context) context = current; - key = ERR_PTR(-ENOKEY); + key_ref = ERR_PTR(-ENOKEY); switch (id) { case KEY_SPEC_THREAD_KEYRING: @@ -556,6 +574,7 @@ struct key *lookup_user_key(struct task_struct *context, key_serial_t id, key = context->thread_keyring; atomic_inc(&key->usage); + key_ref = make_key_ref(key, 1); break; case KEY_SPEC_PROCESS_KEYRING: @@ -572,6 +591,7 @@ struct key *lookup_user_key(struct task_struct *context, key_serial_t id, key = context->signal->process_keyring; atomic_inc(&key->usage); + key_ref = make_key_ref(key, 1); break; case KEY_SPEC_SESSION_KEYRING: @@ -579,7 +599,7 @@ struct key *lookup_user_key(struct task_struct *context, key_serial_t id, /* always install a session keyring upon access if one * doesn't exist yet */ ret = install_session_keyring( - context, context->user->session_keyring); + context, context->user->session_keyring); if (ret < 0) goto error; } @@ -588,16 +608,19 @@ struct key *lookup_user_key(struct task_struct *context, key_serial_t id, key = rcu_dereference(context->signal->session_keyring); atomic_inc(&key->usage); rcu_read_unlock(); + key_ref = make_key_ref(key, 1); break; case KEY_SPEC_USER_KEYRING: key = context->user->uid_keyring; atomic_inc(&key->usage); + key_ref = make_key_ref(key, 1); break; case KEY_SPEC_USER_SESSION_KEYRING: key = context->user->session_keyring; atomic_inc(&key->usage); + key_ref = make_key_ref(key, 1); break; case KEY_SPEC_GROUP_KEYRING: @@ -606,13 +629,28 @@ struct key *lookup_user_key(struct task_struct *context, key_serial_t id, goto error; default: - key = ERR_PTR(-EINVAL); + key_ref = ERR_PTR(-EINVAL); if (id < 1) goto error; key = key_lookup(id); - if (IS_ERR(key)) + if (IS_ERR(key)) { + key_ref = ERR_PTR(PTR_ERR(key)); goto error; + } + + key_ref = make_key_ref(key, 0); + + /* check to see if we possess the key */ + skey_ref = search_process_keyrings(key->type, key, + lookup_user_key_possessed, + current); + + if (!IS_ERR(skey_ref)) { + key_put(key); + key_ref = skey_ref; + } + break; } @@ -630,15 +668,15 @@ struct key *lookup_user_key(struct task_struct *context, key_serial_t id, /* check the permissions */ ret = -EACCES; - if (!key_task_permission(key, context, perm)) + if (!key_task_permission(key_ref, context, perm)) goto invalid_key; - error: - return key; +error: + return key_ref; - invalid_key: - key_put(key); - key = ERR_PTR(ret); +invalid_key: + key_ref_put(key_ref); + key_ref = ERR_PTR(ret); goto error; } /* end lookup_user_key() */ @@ -694,9 +732,9 @@ long join_session_keyring(const char *name) ret = keyring->serial; key_put(keyring); - error2: +error2: up(&key_session_sem); - error: +error: return ret; } /* end join_session_keyring() */ diff --git a/security/keys/request_key.c b/security/keys/request_key.c index 90c1506d007c..e6dd366d43a3 100644 --- a/security/keys/request_key.c +++ b/security/keys/request_key.c @@ -129,7 +129,7 @@ static struct key *__request_key_construction(struct key_type *type, /* create a key and add it to the queue */ key = key_alloc(type, description, - current->fsuid, current->fsgid, KEY_USR_ALL, 0); + current->fsuid, current->fsgid, KEY_POS_ALL, 0); if (IS_ERR(key)) goto alloc_failed; @@ -365,14 +365,24 @@ struct key *request_key_and_link(struct key_type *type, { struct key_user *user; struct key *key; + key_ref_t key_ref; kenter("%s,%s,%s,%p", type->name, description, callout_info, dest_keyring); /* search all the process keyrings for a key */ - key = search_process_keyrings(type, description, type->match, current); + key_ref = search_process_keyrings(type, description, type->match, + current); - if (PTR_ERR(key) == -EAGAIN) { + kdebug("search 1: %p", key_ref); + + if (!IS_ERR(key_ref)) { + key = key_ref_to_ptr(key_ref); + } + else if (PTR_ERR(key_ref) != -EAGAIN) { + key = ERR_PTR(PTR_ERR(key_ref)); + } + else { /* the search failed, but the keyrings were searchable, so we * should consult userspace if we can */ key = ERR_PTR(-ENOKEY); @@ -384,7 +394,7 @@ struct key *request_key_and_link(struct key_type *type, if (!user) goto nomem; - do { + for (;;) { if (signal_pending(current)) goto interrupted; @@ -397,10 +407,22 @@ struct key *request_key_and_link(struct key_type *type, /* someone else made the key we want, so we need to * search again as it might now be available to us */ - key = search_process_keyrings(type, description, - type->match, current); + key_ref = search_process_keyrings(type, description, + type->match, + current); + + kdebug("search 2: %p", key_ref); - } while (PTR_ERR(key) == -EAGAIN); + if (!IS_ERR(key_ref)) { + key = key_ref_to_ptr(key_ref); + break; + } + + if (PTR_ERR(key_ref) != -EAGAIN) { + key = ERR_PTR(PTR_ERR(key_ref)); + break; + } + } key_user_put(user); diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c index f22264632229..1ecd3d3fa9f8 100644 --- a/security/keys/request_key_auth.c +++ b/security/keys/request_key_auth.c @@ -126,7 +126,7 @@ struct key *request_key_auth_new(struct key *target, struct key **_rkakey) rkakey = key_alloc(&key_type_request_key_auth, desc, current->fsuid, current->fsgid, - KEY_USR_VIEW, 1); + KEY_POS_VIEW | KEY_USR_VIEW, 1); if (IS_ERR(rkakey)) { key_put(keyring); kleave("= %ld", PTR_ERR(rkakey)); -- cgit v1.2.3 From b4b52db71529bbe46da914eda772fb574914c94d Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Mon, 26 Sep 2005 12:48:41 +0100 Subject: [PATCH] ata: re-order speeds sensibly. Signed-off-by: Jeff Garzik --- include/linux/ata.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ata.h b/include/linux/ata.h index 85169ea9eb01..ecb7346d0c16 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -147,14 +147,14 @@ enum { XFER_MW_DMA_2 = 0x22, XFER_MW_DMA_1 = 0x21, XFER_MW_DMA_0 = 0x20, + XFER_SW_DMA_2 = 0x12, + XFER_SW_DMA_1 = 0x11, + XFER_SW_DMA_0 = 0x10, XFER_PIO_4 = 0x0C, XFER_PIO_3 = 0x0B, XFER_PIO_2 = 0x0A, XFER_PIO_1 = 0x09, XFER_PIO_0 = 0x08, - XFER_SW_DMA_2 = 0x12, - XFER_SW_DMA_1 = 0x11, - XFER_SW_DMA_0 = 0x10, XFER_PIO_SLOW = 0x00, /* ATAPI stuff */ -- cgit v1.2.3 From aa55a08687059aa169d10a313c41f238c2070488 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 29 Sep 2005 19:58:53 +0400 Subject: [PATCH] fix TASK_STOPPED vs TASK_NONINTERACTIVE interaction do_signal_stop: for_each_thread(t) { if (t->state < TASK_STOPPED) ++sig->group_stop_count; } However, TASK_NONINTERACTIVE > TASK_STOPPED, so this loop will not count TASK_INTERRUPTIBLE | TASK_NONINTERACTIVE threads. See also wait_task_stopped(), which checks ->state > TASK_STOPPED. Signed-off-by: Oleg Nesterov [ We really probably should always use the appropriate bitmasks to test task states, not do it like this. Using something like #define TASK_RUNNABLE (TASK_RUNNING | TASK_INTERRUPTIBLE | \ TASK_UNINTERRUPTIBLE | TASK_NONINTERACTIVE) and then doing "if (task->state & TASK_RUNNABLE)" or similar. But the ordering of the task states is historical, and keeping the ordering does make sense regardless. ] Signed-off-by: Linus Torvalds --- include/linux/sched.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 49e617fa0f66..afe6c61f13e5 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -110,11 +110,11 @@ extern unsigned long nr_iowait(void); #define TASK_RUNNING 0 #define TASK_INTERRUPTIBLE 1 #define TASK_UNINTERRUPTIBLE 2 -#define TASK_STOPPED 4 -#define TASK_TRACED 8 -#define EXIT_ZOMBIE 16 -#define EXIT_DEAD 32 -#define TASK_NONINTERACTIVE 64 +#define TASK_NONINTERACTIVE 4 +#define TASK_STOPPED 8 +#define TASK_TRACED 16 +#define EXIT_ZOMBIE 32 +#define EXIT_DEAD 64 #define __set_task_state(tsk, state_value) \ do { (tsk)->state = (state_value); } while (0) -- cgit v1.2.3 From 4a8342d233a39ee582e9f7260e12d2f5fd194a05 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 29 Sep 2005 15:18:21 -0700 Subject: Revert task flag re-ordering, add comments Roland points out that the flags end up having non-obvious dependencies elsewhere, so revert aa55a08687059aa169d10a313c41f238c2070488 and add some comments about why things are as they are. We'll just have to fix up the broken comparisons. Roland has a patch. Signed-off-by: Linus Torvalds --- include/linux/sched.h | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index afe6c61f13e5..c3ba31f210a9 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -107,14 +107,26 @@ extern unsigned long nr_iowait(void); #include +/* + * Task state bitmask. NOTE! These bits are also + * encoded in fs/proc/array.c: get_task_state(). + * + * We have two separate sets of flags: task->state + * is about runnability, while task->exit_state are + * about the task exiting. Confusing, but this way + * modifying one set can't modify the other one by + * mistake. + */ #define TASK_RUNNING 0 #define TASK_INTERRUPTIBLE 1 #define TASK_UNINTERRUPTIBLE 2 -#define TASK_NONINTERACTIVE 4 -#define TASK_STOPPED 8 -#define TASK_TRACED 16 -#define EXIT_ZOMBIE 32 -#define EXIT_DEAD 64 +#define TASK_STOPPED 4 +#define TASK_TRACED 8 +/* in tsk->exit_state */ +#define EXIT_ZOMBIE 16 +#define EXIT_DEAD 32 +/* in tsk->state again */ +#define TASK_NONINTERACTIVE 64 #define __set_task_state(tsk, state_value) \ do { (tsk)->state = (state_value); } while (0) -- cgit v1.2.3 From 897f15fb587fd2772b9e7ff6ec0265057f3c3975 Mon Sep 17 00:00:00 2001 From: Zach Brown Date: Fri, 30 Sep 2005 11:58:55 -0700 Subject: [PATCH] aio: remove unlocked task_list test and resulting race Only one of the run or kick path is supposed to put an iocb on the run list. If both of them do it than one of them can end up referencing a freed iocb. The kick path could delete the task_list item from the wait queue before getting the ctx_lock and putting the iocb on the run list. The run path was testing the task_list item outside the lock so that it could catch ki_retry methods that return -EIOCBRETRY *without* putting the iocb on a wait queue and promising to call kick_iocb. This unlocked check could then race with the kick path to cause both to try and put the iocb on the run list. The patch stops the run path from testing task_list by requring that any ki_retry that returns -EIOCBRETRY *must* guarantee that kick_iocb() will be called in the future. aio_p{read,write}, the only in-tree -EIOCBRETRY users, are updated. Signed-off-by: Zach Brown Signed-off-by: Benjamin LaHaise Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/aio.c | 79 ++++++++++++++++++++++------------------------------- include/linux/aio.h | 34 +++++++++++++++++++++++ 2 files changed, 67 insertions(+), 46 deletions(-) (limited to 'include/linux') diff --git a/fs/aio.c b/fs/aio.c index b8f296999c04..9edc0e4a1219 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -741,19 +741,9 @@ static ssize_t aio_run_iocb(struct kiocb *iocb) ret = retry(iocb); current->io_wait = NULL; - if (-EIOCBRETRY != ret) { - if (-EIOCBQUEUED != ret) { - BUG_ON(!list_empty(&iocb->ki_wait.task_list)); - aio_complete(iocb, ret, 0); - /* must not access the iocb after this */ - } - } else { - /* - * Issue an additional retry to avoid waiting forever if - * no waits were queued (e.g. in case of a short read). - */ - if (list_empty(&iocb->ki_wait.task_list)) - kiocbSetKicked(iocb); + if (ret != -EIOCBRETRY && ret != -EIOCBQUEUED) { + BUG_ON(!list_empty(&iocb->ki_wait.task_list)); + aio_complete(iocb, ret, 0); } out: spin_lock_irq(&ctx->ctx_lock); @@ -1327,8 +1317,11 @@ asmlinkage long sys_io_destroy(aio_context_t ctx) } /* - * Default retry method for aio_read (also used for first time submit) - * Responsible for updating iocb state as retries progress + * aio_p{read,write} are the default ki_retry methods for + * IO_CMD_P{READ,WRITE}. They maintains kiocb retry state around potentially + * multiple calls to f_op->aio_read(). They loop around partial progress + * instead of returning -EIOCBRETRY because they don't have the means to call + * kick_iocb(). */ static ssize_t aio_pread(struct kiocb *iocb) { @@ -1337,25 +1330,25 @@ static ssize_t aio_pread(struct kiocb *iocb) struct inode *inode = mapping->host; ssize_t ret = 0; - ret = file->f_op->aio_read(iocb, iocb->ki_buf, - iocb->ki_left, iocb->ki_pos); + do { + ret = file->f_op->aio_read(iocb, iocb->ki_buf, + iocb->ki_left, iocb->ki_pos); + /* + * Can't just depend on iocb->ki_left to determine + * whether we are done. This may have been a short read. + */ + if (ret > 0) { + iocb->ki_buf += ret; + iocb->ki_left -= ret; + } - /* - * Can't just depend on iocb->ki_left to determine - * whether we are done. This may have been a short read. - */ - if (ret > 0) { - iocb->ki_buf += ret; - iocb->ki_left -= ret; /* - * For pipes and sockets we return once we have - * some data; for regular files we retry till we - * complete the entire read or find that we can't - * read any more data (e.g short reads). + * For pipes and sockets we return once we have some data; for + * regular files we retry till we complete the entire read or + * find that we can't read any more data (e.g short reads). */ - if (!S_ISFIFO(inode->i_mode) && !S_ISSOCK(inode->i_mode)) - ret = -EIOCBRETRY; - } + } while (ret > 0 && + !S_ISFIFO(inode->i_mode) && !S_ISSOCK(inode->i_mode)); /* This means we must have transferred all that we could */ /* No need to retry anymore */ @@ -1365,27 +1358,21 @@ static ssize_t aio_pread(struct kiocb *iocb) return ret; } -/* - * Default retry method for aio_write (also used for first time submit) - * Responsible for updating iocb state as retries progress - */ +/* see aio_pread() */ static ssize_t aio_pwrite(struct kiocb *iocb) { struct file *file = iocb->ki_filp; ssize_t ret = 0; - ret = file->f_op->aio_write(iocb, iocb->ki_buf, - iocb->ki_left, iocb->ki_pos); - - if (ret > 0) { - iocb->ki_buf += ret; - iocb->ki_left -= ret; - - ret = -EIOCBRETRY; - } + do { + ret = file->f_op->aio_write(iocb, iocb->ki_buf, + iocb->ki_left, iocb->ki_pos); + if (ret > 0) { + iocb->ki_buf += ret; + iocb->ki_left -= ret; + } + } while (ret > 0); - /* This means we must have transferred all that we could */ - /* No need to retry anymore */ if ((ret == 0) || (iocb->ki_left == 0)) ret = iocb->ki_nbytes - iocb->ki_left; diff --git a/include/linux/aio.h b/include/linux/aio.h index a4d5af907f90..60def658b246 100644 --- a/include/linux/aio.h +++ b/include/linux/aio.h @@ -43,6 +43,40 @@ struct kioctx; #define kiocbIsKicked(iocb) test_bit(KIF_KICKED, &(iocb)->ki_flags) #define kiocbIsCancelled(iocb) test_bit(KIF_CANCELLED, &(iocb)->ki_flags) +/* is there a better place to document function pointer methods? */ +/** + * ki_retry - iocb forward progress callback + * @kiocb: The kiocb struct to advance by performing an operation. + * + * This callback is called when the AIO core wants a given AIO operation + * to make forward progress. The kiocb argument describes the operation + * that is to be performed. As the operation proceeds, perhaps partially, + * ki_retry is expected to update the kiocb with progress made. Typically + * ki_retry is set in the AIO core and it itself calls file_operations + * helpers. + * + * ki_retry's return value determines when the AIO operation is completed + * and an event is generated in the AIO event ring. Except the special + * return values described below, the value that is returned from ki_retry + * is transferred directly into the completion ring as the operation's + * resulting status. Once this has happened ki_retry *MUST NOT* reference + * the kiocb pointer again. + * + * If ki_retry returns -EIOCBQUEUED it has made a promise that aio_complete() + * will be called on the kiocb pointer in the future. The AIO core will + * not ask the method again -- ki_retry must ensure forward progress. + * aio_complete() must be called once and only once in the future, multiple + * calls may result in undefined behaviour. + * + * If ki_retry returns -EIOCBRETRY it has made a promise that kick_iocb() + * will be called on the kiocb pointer in the future. This may happen + * through generic helpers that associate kiocb->ki_wait with a wait + * queue head that ki_retry uses via current->io_wait. It can also happen + * with custom tracking and manual calls to kick_iocb(), though that is + * discouraged. In either case, kick_iocb() must be called once and only + * once. ki_retry must ensure forward progress, the AIO core will wait + * indefinitely for kick_iocb() to be called. + */ struct kiocb { struct list_head ki_run_list; long ki_flags; -- cgit v1.2.3 From fd2e54b35bd70d11c160ded4834e2378e915356e Mon Sep 17 00:00:00 2001 From: Diego Calleja Date: Sat, 1 Oct 2005 17:00:48 +0200 Subject: [PATCH] trivial #if -> #ifdef Use '#ifdef' consistently on __KERNEL__. This was reported as bug #5340 (isn't easier to send a fix than report the bug?!) Signed-off-by: Diego Calleja Signed-off-by: Linus Torvalds --- include/linux/mod_devicetable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 4ed2107bc020..2f0299a448f6 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -183,7 +183,7 @@ struct of_device_id char name[32]; char type[32]; char compatible[128]; -#if __KERNEL__ +#ifdef __KERNEL__ void *data; #else kernel_ulong_t data; -- cgit v1.2.3 From 325ed8239309cb29f10ea58c5a668058ead11479 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 3 Oct 2005 13:57:23 -0700 Subject: [NET]: Fix packet timestamping. I've found the problem in general. It affects any 64-bit architecture. The problem occurs when you change the system time. Suppose that when you boot your system clock is forward by a day. This gets recorded down in skb_tv_base. You then wind the clock back by a day. From that point onwards the offset will be negative which essentially overflows the 32-bit variables they're stored in. In fact, why don't we just store the real time stamp in those 32-bit variables? After all, we're not going to overflow for quite a while yet. When we do overflow, we'll need a better solution of course. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/skbuff.h | 12 +++--------- net/core/skbuff.c | 5 ----- net/ipv4/netfilter/ip_queue.c | 4 ++-- net/ipv4/netfilter/ipt_ULOG.c | 4 ++-- net/ipv6/netfilter/ip6_queue.c | 4 ++-- net/netfilter/nfnetlink_log.c | 4 ++-- net/netfilter/nfnetlink_queue.c | 4 ++-- net/packet/af_packet.c | 4 ++-- 8 files changed, 15 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 2741c0c55e83..466c879f82b8 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -155,8 +155,6 @@ struct skb_shared_info { #define SKB_DATAREF_SHIFT 16 #define SKB_DATAREF_MASK ((1 << SKB_DATAREF_SHIFT) - 1) -extern struct timeval skb_tv_base; - struct skb_timeval { u32 off_sec; u32 off_usec; @@ -175,7 +173,7 @@ enum { * @prev: Previous buffer in list * @list: List we are on * @sk: Socket we are owned by - * @tstamp: Time we arrived stored as offset to skb_tv_base + * @tstamp: Time we arrived * @dev: Device we arrived on/are leaving by * @input_dev: Device we arrived on * @h: Transport layer header @@ -1255,10 +1253,6 @@ static inline void skb_get_timestamp(const struct sk_buff *skb, struct timeval * { stamp->tv_sec = skb->tstamp.off_sec; stamp->tv_usec = skb->tstamp.off_usec; - if (skb->tstamp.off_sec) { - stamp->tv_sec += skb_tv_base.tv_sec; - stamp->tv_usec += skb_tv_base.tv_usec; - } } /** @@ -1272,8 +1266,8 @@ static inline void skb_get_timestamp(const struct sk_buff *skb, struct timeval * */ static inline void skb_set_timestamp(struct sk_buff *skb, const struct timeval *stamp) { - skb->tstamp.off_sec = stamp->tv_sec - skb_tv_base.tv_sec; - skb->tstamp.off_usec = stamp->tv_usec - skb_tv_base.tv_usec; + skb->tstamp.off_sec = stamp->tv_sec; + skb->tstamp.off_usec = stamp->tv_usec; } extern void __net_timestamp(struct sk_buff *skb); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index f80a28785610..0e9431b59fb2 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -71,8 +71,6 @@ static kmem_cache_t *skbuff_head_cache __read_mostly; static kmem_cache_t *skbuff_fclone_cache __read_mostly; -struct timeval __read_mostly skb_tv_base; - /* * Keep out-of-line to prevent kernel bloat. * __builtin_return_address is not used because it is not always @@ -1708,8 +1706,6 @@ void __init skb_init(void) NULL, NULL); if (!skbuff_fclone_cache) panic("cannot create skbuff cache"); - - do_gettimeofday(&skb_tv_base); } EXPORT_SYMBOL(___pskb_trim); @@ -1743,4 +1739,3 @@ EXPORT_SYMBOL(skb_prepare_seq_read); EXPORT_SYMBOL(skb_seq_read); EXPORT_SYMBOL(skb_abort_seq_read); EXPORT_SYMBOL(skb_find_text); -EXPORT_SYMBOL(skb_tv_base); diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index d54f14d926f6..36339eb39e17 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -240,8 +240,8 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp) pmsg->packet_id = (unsigned long )entry; pmsg->data_len = data_len; - pmsg->timestamp_sec = skb_tv_base.tv_sec + entry->skb->tstamp.off_sec; - pmsg->timestamp_usec = skb_tv_base.tv_usec + entry->skb->tstamp.off_usec; + pmsg->timestamp_sec = entry->skb->tstamp.off_sec; + pmsg->timestamp_usec = entry->skb->tstamp.off_usec; pmsg->mark = entry->skb->nfmark; pmsg->hook = entry->info->hook; pmsg->hw_protocol = entry->skb->protocol; diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index e2c14f3cb2fc..2883ccd8a91d 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -225,8 +225,8 @@ static void ipt_ulog_packet(unsigned int hooknum, /* copy hook, prefix, timestamp, payload, etc. */ pm->data_len = copy_len; - pm->timestamp_sec = skb_tv_base.tv_sec + skb->tstamp.off_sec; - pm->timestamp_usec = skb_tv_base.tv_usec + skb->tstamp.off_usec; + pm->timestamp_sec = skb->tstamp.off_sec; + pm->timestamp_usec = skb->tstamp.off_usec; pm->mark = skb->nfmark; pm->hook = hooknum; if (prefix != NULL) diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index aa11cf366efa..5027bbe6415e 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -238,8 +238,8 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp) pmsg->packet_id = (unsigned long )entry; pmsg->data_len = data_len; - pmsg->timestamp_sec = skb_tv_base.tv_sec + entry->skb->tstamp.off_sec; - pmsg->timestamp_usec = skb_tv_base.tv_usec + entry->skb->tstamp.off_usec; + pmsg->timestamp_sec = entry->skb->tstamp.off_sec; + pmsg->timestamp_usec = entry->skb->tstamp.off_usec; pmsg->mark = entry->skb->nfmark; pmsg->hook = entry->info->hook; pmsg->hw_protocol = entry->skb->protocol; diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index ff5601ceedcb..efcd10f996ba 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -494,8 +494,8 @@ __build_packet_message(struct nfulnl_instance *inst, if (skb->tstamp.off_sec) { struct nfulnl_msg_packet_timestamp ts; - ts.sec = cpu_to_be64(skb_tv_base.tv_sec + skb->tstamp.off_sec); - ts.usec = cpu_to_be64(skb_tv_base.tv_usec + skb->tstamp.off_usec); + ts.sec = cpu_to_be64(skb->tstamp.off_sec); + ts.usec = cpu_to_be64(skb->tstamp.off_usec); NFA_PUT(inst->skb, NFULA_TIMESTAMP, sizeof(ts), &ts); } diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index f81fe8c52e99..eaa44c49567b 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -492,8 +492,8 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, if (entry->skb->tstamp.off_sec) { struct nfqnl_msg_packet_timestamp ts; - ts.sec = cpu_to_be64(skb_tv_base.tv_sec + entry->skb->tstamp.off_sec); - ts.usec = cpu_to_be64(skb_tv_base.tv_usec + entry->skb->tstamp.off_usec); + ts.sec = cpu_to_be64(entry->skb->tstamp.off_sec); + ts.usec = cpu_to_be64(entry->skb->tstamp.off_usec); NFA_PUT(skb, NFQA_TIMESTAMP, sizeof(ts), &ts); } diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 6a67a87384cc..499ae3df4a44 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -654,8 +654,8 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe __net_timestamp(skb); sock_enable_timestamp(sk); } - h->tp_sec = skb_tv_base.tv_sec + skb->tstamp.off_sec; - h->tp_usec = skb_tv_base.tv_usec + skb->tstamp.off_usec; + h->tp_sec = skb->tstamp.off_sec; + h->tp_usec = skb->tstamp.off_usec; sll = (struct sockaddr_ll*)((u8*)h + TPACKET_ALIGN(sizeof(*h))); sll->sll_halen = 0; -- cgit v1.2.3 From 81c3d5470ecc70564eb9209946730fe2be93ad06 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 3 Oct 2005 14:13:38 -0700 Subject: [INET]: speedup inet (tcp/dccp) lookups Arnaldo and I agreed it could be applied now, because I have other pending patches depending on this one (Thank you Arnaldo) (The other important patch moves skc_refcnt in a separate cache line, so that the SMP/NUMA performance doesnt suffer from cache line ping pongs) 1) First some performance data : -------------------------------- tcp_v4_rcv() wastes a *lot* of time in __inet_lookup_established() The most time critical code is : sk_for_each(sk, node, &head->chain) { if (INET_MATCH(sk, acookie, saddr, daddr, ports, dif)) goto hit; /* You sunk my battleship! */ } The sk_for_each() does use prefetch() hints but only the begining of "struct sock" is prefetched. As INET_MATCH first comparison uses inet_sk(__sk)->daddr, wich is far away from the begining of "struct sock", it has to bring into CPU cache cold cache line. Each iteration has to use at least 2 cache lines. This can be problematic if some chains are very long. 2) The goal ----------- The idea I had is to change things so that INET_MATCH() may return FALSE in 99% of cases only using the data already in the CPU cache, using one cache line per iteration. 3) Description of the patch --------------------------- Adds a new 'unsigned int skc_hash' field in 'struct sock_common', filling a 32 bits hole on 64 bits platform. struct sock_common { unsigned short skc_family; volatile unsigned char skc_state; unsigned char skc_reuse; int skc_bound_dev_if; struct hlist_node skc_node; struct hlist_node skc_bind_node; atomic_t skc_refcnt; + unsigned int skc_hash; struct proto *skc_prot; }; Store in this 32 bits field the full hash, not masked by (ehash_size - 1) Using this full hash as the first comparison done in INET_MATCH permits us immediatly skip the element without touching a second cache line in case of a miss. Suppress the sk_hashent/tw_hashent fields since skc_hash (aliased to sk_hash and tw_hash) already contains the slot number if we mask with (ehash_size - 1) File include/net/inet_hashtables.h 64 bits platforms : #define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\ (((__sk)->sk_hash == (__hash)) ((*((__u64 *)&(inet_sk(__sk)->daddr)))== (__cookie)) && \ ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) 32bits platforms: #define TCP_IPV4_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\ (((__sk)->sk_hash == (__hash)) && \ (inet_sk(__sk)->daddr == (__saddr)) && \ (inet_sk(__sk)->rcv_saddr == (__daddr)) && \ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) - Adds a prefetch(head->chain.first) in __inet_lookup_established()/__tcp_v4_check_established() and __inet6_lookup_established()/__tcp_v6_check_established() and __dccp_v4_check_established() to bring into cache the first element of the list, before the {read|write}_lock(&head->lock); Signed-off-by: Eric Dumazet Acked-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/ipv6.h | 5 +-- include/linux/tc_ematch/tc_em_meta.h | 2 +- include/net/inet6_hashtables.h | 21 ++++++------ include/net/inet_hashtables.h | 64 +++++++++++++++++++++--------------- include/net/inet_timewait_sock.h | 2 +- include/net/sock.h | 5 +-- net/atm/common.c | 2 +- net/dccp/ipv4.c | 12 +++---- net/ipv4/inet_timewait_sock.c | 6 ++-- net/ipv4/tcp_ipv4.c | 11 ++++--- net/ipv6/tcp_ipv6.c | 18 +++++----- net/sched/em_meta.c | 6 ++-- 12 files changed, 85 insertions(+), 69 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index bb6f88e14061..e0b922785d98 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -372,8 +372,9 @@ static inline struct raw6_sock *raw6_sk(const struct sock *sk) #define inet_v6_ipv6only(__sk) 0 #endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */ -#define INET6_MATCH(__sk, __saddr, __daddr, __ports, __dif) \ - (((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \ +#define INET6_MATCH(__sk, __hash, __saddr, __daddr, __ports, __dif)\ + (((__sk)->sk_hash == (__hash)) && \ + ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \ ((__sk)->sk_family == AF_INET6) && \ ipv6_addr_equal(&inet6_sk(__sk)->daddr, (__saddr)) && \ ipv6_addr_equal(&inet6_sk(__sk)->rcv_saddr, (__daddr)) && \ diff --git a/include/linux/tc_ematch/tc_em_meta.h b/include/linux/tc_ematch/tc_em_meta.h index 081b1ee8516e..e21937cf91d0 100644 --- a/include/linux/tc_ematch/tc_em_meta.h +++ b/include/linux/tc_ematch/tc_em_meta.h @@ -71,7 +71,7 @@ enum TCF_META_ID_SK_SNDBUF, TCF_META_ID_SK_ALLOCS, TCF_META_ID_SK_ROUTE_CAPS, - TCF_META_ID_SK_HASHENT, + TCF_META_ID_SK_HASH, TCF_META_ID_SK_LINGERTIME, TCF_META_ID_SK_ACK_BACKLOG, TCF_META_ID_SK_MAX_ACK_BACKLOG, diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h index 03df3b157960..5a2beed5a770 100644 --- a/include/net/inet6_hashtables.h +++ b/include/net/inet6_hashtables.h @@ -26,19 +26,18 @@ struct inet_hashinfo; /* I have no idea if this is a good hash for v6 or not. -DaveM */ -static inline int inet6_ehashfn(const struct in6_addr *laddr, const u16 lport, - const struct in6_addr *faddr, const u16 fport, - const int ehash_size) +static inline unsigned int inet6_ehashfn(const struct in6_addr *laddr, const u16 lport, + const struct in6_addr *faddr, const u16 fport) { - int hashent = (lport ^ fport); + unsigned int hashent = (lport ^ fport); hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]); hashent ^= hashent >> 16; hashent ^= hashent >> 8; - return (hashent & (ehash_size - 1)); + return hashent; } -static inline int inet6_sk_ehashfn(const struct sock *sk, const int ehash_size) +static inline int inet6_sk_ehashfn(const struct sock *sk) { const struct inet_sock *inet = inet_sk(sk); const struct ipv6_pinfo *np = inet6_sk(sk); @@ -46,7 +45,7 @@ static inline int inet6_sk_ehashfn(const struct sock *sk, const int ehash_size) const struct in6_addr *faddr = &np->daddr; const __u16 lport = inet->num; const __u16 fport = inet->dport; - return inet6_ehashfn(laddr, lport, faddr, fport, ehash_size); + return inet6_ehashfn(laddr, lport, faddr, fport); } /* @@ -69,14 +68,14 @@ static inline struct sock * /* Optimize here for direct hit, only listening connections can * have wildcards anyways. */ - const int hash = inet6_ehashfn(daddr, hnum, saddr, sport, - hashinfo->ehash_size); - struct inet_ehash_bucket *head = &hashinfo->ehash[hash]; + unsigned int hash = inet6_ehashfn(daddr, hnum, saddr, sport); + struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash); + prefetch(head->chain.first); read_lock(&head->lock); sk_for_each(sk, node, &head->chain) { /* For IPV6 do the cheaper port and family tests first. */ - if (INET6_MATCH(sk, saddr, daddr, ports, dif)) + if (INET6_MATCH(sk, hash, saddr, daddr, ports, dif)) goto hit; /* You sunk my battleship! */ } /* Must check for a TIME_WAIT'er before going to listener hash. */ diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 646b6ea7fe26..35f49e65e295 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -108,7 +108,7 @@ struct inet_hashinfo { struct inet_bind_hashbucket *bhash; int bhash_size; - int ehash_size; + unsigned int ehash_size; /* All sockets in TCP_LISTEN state will be in here. This is the only * table where wildcard'd TCP sockets can exist. Hash function here @@ -130,17 +130,16 @@ struct inet_hashinfo { int port_rover; }; -static inline int inet_ehashfn(const __u32 laddr, const __u16 lport, - const __u32 faddr, const __u16 fport, - const int ehash_size) +static inline unsigned int inet_ehashfn(const __u32 laddr, const __u16 lport, + const __u32 faddr, const __u16 fport) { - int h = (laddr ^ lport) ^ (faddr ^ fport); + unsigned int h = (laddr ^ lport) ^ (faddr ^ fport); h ^= h >> 16; h ^= h >> 8; - return h & (ehash_size - 1); + return h; } -static inline int inet_sk_ehashfn(const struct sock *sk, const int ehash_size) +static inline int inet_sk_ehashfn(const struct sock *sk) { const struct inet_sock *inet = inet_sk(sk); const __u32 laddr = inet->rcv_saddr; @@ -148,7 +147,14 @@ static inline int inet_sk_ehashfn(const struct sock *sk, const int ehash_size) const __u32 faddr = inet->daddr; const __u16 fport = inet->dport; - return inet_ehashfn(laddr, lport, faddr, fport, ehash_size); + return inet_ehashfn(laddr, lport, faddr, fport); +} + +static inline struct inet_ehash_bucket *inet_ehash_bucket( + struct inet_hashinfo *hashinfo, + unsigned int hash) +{ + return &hashinfo->ehash[hash & (hashinfo->ehash_size - 1)]; } extern struct inet_bind_bucket * @@ -235,9 +241,11 @@ static inline void __inet_hash(struct inet_hashinfo *hashinfo, lock = &hashinfo->lhash_lock; inet_listen_wlock(hashinfo); } else { - sk->sk_hashent = inet_sk_ehashfn(sk, hashinfo->ehash_size); - list = &hashinfo->ehash[sk->sk_hashent].chain; - lock = &hashinfo->ehash[sk->sk_hashent].lock; + struct inet_ehash_bucket *head; + sk->sk_hash = inet_sk_ehashfn(sk); + head = inet_ehash_bucket(hashinfo, sk->sk_hash); + list = &head->chain; + lock = &head->lock; write_lock(lock); } __sk_add_node(sk, list); @@ -268,9 +276,8 @@ static inline void inet_unhash(struct inet_hashinfo *hashinfo, struct sock *sk) inet_listen_wlock(hashinfo); lock = &hashinfo->lhash_lock; } else { - struct inet_ehash_bucket *head = &hashinfo->ehash[sk->sk_hashent]; - lock = &head->lock; - write_lock_bh(&head->lock); + lock = &inet_ehash_bucket(hashinfo, sk->sk_hash)->lock; + write_lock_bh(lock); } if (__sk_del_node_init(sk)) @@ -337,23 +344,27 @@ sherry_cache: #define INET_ADDR_COOKIE(__name, __saddr, __daddr) \ const __u64 __name = (((__u64)(__daddr)) << 32) | ((__u64)(__saddr)); #endif /* __BIG_ENDIAN */ -#define INET_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\ - (((*((__u64 *)&(inet_sk(__sk)->daddr))) == (__cookie)) && \ +#define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\ + (((__sk)->sk_hash == (__hash)) && \ + ((*((__u64 *)&(inet_sk(__sk)->daddr))) == (__cookie)) && \ ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) -#define INET_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\ - (((*((__u64 *)&(inet_twsk(__sk)->tw_daddr))) == (__cookie)) && \ +#define INET_TW_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\ + (((__sk)->sk_hash == (__hash)) && \ + ((*((__u64 *)&(inet_twsk(__sk)->tw_daddr))) == (__cookie)) && \ ((*((__u32 *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) #else /* 32-bit arch */ #define INET_ADDR_COOKIE(__name, __saddr, __daddr) -#define INET_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif) \ - ((inet_sk(__sk)->daddr == (__saddr)) && \ +#define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif) \ + (((__sk)->sk_hash == (__hash)) && \ + (inet_sk(__sk)->daddr == (__saddr)) && \ (inet_sk(__sk)->rcv_saddr == (__daddr)) && \ ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) -#define INET_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif) \ - ((inet_twsk(__sk)->tw_daddr == (__saddr)) && \ +#define INET_TW_MATCH(__sk, __hash,__cookie, __saddr, __daddr, __ports, __dif) \ + (((__sk)->sk_hash == (__hash)) && \ + (inet_twsk(__sk)->tw_daddr == (__saddr)) && \ (inet_twsk(__sk)->tw_rcv_saddr == (__daddr)) && \ ((*((__u32 *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) @@ -378,18 +389,19 @@ static inline struct sock * /* Optimize here for direct hit, only listening connections can * have wildcards anyways. */ - const int hash = inet_ehashfn(daddr, hnum, saddr, sport, hashinfo->ehash_size); - struct inet_ehash_bucket *head = &hashinfo->ehash[hash]; + unsigned int hash = inet_ehashfn(daddr, hnum, saddr, sport); + struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash); + prefetch(head->chain.first); read_lock(&head->lock); sk_for_each(sk, node, &head->chain) { - if (INET_MATCH(sk, acookie, saddr, daddr, ports, dif)) + if (INET_MATCH(sk, hash, acookie, saddr, daddr, ports, dif)) goto hit; /* You sunk my battleship! */ } /* Must check for a TIME_WAIT'er before going to listener hash. */ sk_for_each(sk, node, &(head + hashinfo->ehash_size)->chain) { - if (INET_TW_MATCH(sk, acookie, saddr, daddr, ports, dif)) + if (INET_TW_MATCH(sk, hash, acookie, saddr, daddr, ports, dif)) goto hit; } sk = NULL; diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 3b070352e869..4ade56ef3a4d 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -112,6 +112,7 @@ struct inet_timewait_sock { #define tw_node __tw_common.skc_node #define tw_bind_node __tw_common.skc_bind_node #define tw_refcnt __tw_common.skc_refcnt +#define tw_hash __tw_common.skc_hash #define tw_prot __tw_common.skc_prot volatile unsigned char tw_substate; /* 3 bits hole, try to pack */ @@ -126,7 +127,6 @@ struct inet_timewait_sock { /* And these are ours. */ __u8 tw_ipv6only:1; /* 31 bits hole, try to pack */ - int tw_hashent; int tw_timeout; unsigned long tw_ttd; struct inet_bind_bucket *tw_tb; diff --git a/include/net/sock.h b/include/net/sock.h index 8c48fbecb7cf..b6440805c420 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -99,6 +99,7 @@ struct proto; * @skc_node: main hash linkage for various protocol lookup tables * @skc_bind_node: bind hash linkage for various protocol lookup tables * @skc_refcnt: reference count + * @skc_hash: hash value used with various protocol lookup tables * @skc_prot: protocol handlers inside a network family * * This is the minimal network layer representation of sockets, the header @@ -112,6 +113,7 @@ struct sock_common { struct hlist_node skc_node; struct hlist_node skc_bind_node; atomic_t skc_refcnt; + unsigned int skc_hash; struct proto *skc_prot; }; @@ -139,7 +141,6 @@ struct sock_common { * @sk_no_check: %SO_NO_CHECK setting, wether or not checkup packets * @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO) * @sk_lingertime: %SO_LINGER l_linger setting - * @sk_hashent: hash entry in several tables (e.g. inet_hashinfo.ehash) * @sk_backlog: always used with the per-socket spinlock held * @sk_callback_lock: used with the callbacks in the end of this struct * @sk_error_queue: rarely used @@ -186,6 +187,7 @@ struct sock { #define sk_node __sk_common.skc_node #define sk_bind_node __sk_common.skc_bind_node #define sk_refcnt __sk_common.skc_refcnt +#define sk_hash __sk_common.skc_hash #define sk_prot __sk_common.skc_prot unsigned char sk_shutdown : 2, sk_no_check : 2, @@ -208,7 +210,6 @@ struct sock { unsigned int sk_allocation; int sk_sndbuf; int sk_route_caps; - int sk_hashent; unsigned long sk_flags; unsigned long sk_lingertime; /* diff --git a/net/atm/common.c b/net/atm/common.c index 801a5813ec60..63feea49fb13 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -46,7 +46,7 @@ static void __vcc_insert_socket(struct sock *sk) struct atm_vcc *vcc = atm_sk(sk); struct hlist_head *head = &vcc_hash[vcc->vci & (VCC_HTABLE_SIZE - 1)]; - sk->sk_hashent = vcc->vci & (VCC_HTABLE_SIZE - 1); + sk->sk_hash = vcc->vci & (VCC_HTABLE_SIZE - 1); sk_add_node(sk, head); } diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 40fe6afacde6..ae088d1347af 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -62,27 +62,27 @@ static int __dccp_v4_check_established(struct sock *sk, const __u16 lport, const int dif = sk->sk_bound_dev_if; INET_ADDR_COOKIE(acookie, saddr, daddr) const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport); - const int hash = inet_ehashfn(daddr, lport, saddr, inet->dport, - dccp_hashinfo.ehash_size); - struct inet_ehash_bucket *head = &dccp_hashinfo.ehash[hash]; + unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport); + struct inet_ehash_bucket *head = inet_ehash_bucket(&dccp_hashinfo, hash); const struct sock *sk2; const struct hlist_node *node; struct inet_timewait_sock *tw; + prefetch(head->chain.first); write_lock(&head->lock); /* Check TIME-WAIT sockets first. */ sk_for_each(sk2, node, &(head + dccp_hashinfo.ehash_size)->chain) { tw = inet_twsk(sk2); - if (INET_TW_MATCH(sk2, acookie, saddr, daddr, ports, dif)) + if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) goto not_unique; } tw = NULL; /* And established part... */ sk_for_each(sk2, node, &head->chain) { - if (INET_MATCH(sk2, acookie, saddr, daddr, ports, dif)) + if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) goto not_unique; } @@ -90,7 +90,7 @@ static int __dccp_v4_check_established(struct sock *sk, const __u16 lport, * in hash table socket with a funny identity. */ inet->num = lport; inet->sport = htons(lport); - sk->sk_hashent = hash; + sk->sk_hash = hash; BUG_TRAP(sk_unhashed(sk)); __sk_add_node(sk, &head->chain); sock_prot_inc_use(sk->sk_prot); diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 4d1502a49852..f9076ef3a1a8 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -20,7 +20,7 @@ void __inet_twsk_kill(struct inet_timewait_sock *tw, struct inet_hashinfo *hashi struct inet_bind_hashbucket *bhead; struct inet_bind_bucket *tb; /* Unlink from established hashes. */ - struct inet_ehash_bucket *ehead = &hashinfo->ehash[tw->tw_hashent]; + struct inet_ehash_bucket *ehead = inet_ehash_bucket(hashinfo, tw->tw_hash); write_lock(&ehead->lock); if (hlist_unhashed(&tw->tw_node)) { @@ -60,7 +60,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, { const struct inet_sock *inet = inet_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); - struct inet_ehash_bucket *ehead = &hashinfo->ehash[sk->sk_hashent]; + struct inet_ehash_bucket *ehead = inet_ehash_bucket(hashinfo, sk->sk_hash); struct inet_bind_hashbucket *bhead; /* Step 1: Put TW into bind hash. Original socket stays there too. Note, that any socket with inet->num != 0 MUST be bound in @@ -106,7 +106,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat tw->tw_dport = inet->dport; tw->tw_family = sk->sk_family; tw->tw_reuse = sk->sk_reuse; - tw->tw_hashent = sk->sk_hashent; + tw->tw_hash = sk->sk_hash; tw->tw_ipv6only = 0; tw->tw_prot = sk->sk_prot_creator; atomic_set(&tw->tw_refcnt, 1); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 13dfb391cdf1..c85819d8474b 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -130,19 +130,20 @@ static int __tcp_v4_check_established(struct sock *sk, __u16 lport, int dif = sk->sk_bound_dev_if; INET_ADDR_COOKIE(acookie, saddr, daddr) const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport); - const int hash = inet_ehashfn(daddr, lport, saddr, inet->dport, tcp_hashinfo.ehash_size); - struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[hash]; + unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport); + struct inet_ehash_bucket *head = inet_ehash_bucket(&tcp_hashinfo, hash); struct sock *sk2; const struct hlist_node *node; struct inet_timewait_sock *tw; + prefetch(head->chain.first); write_lock(&head->lock); /* Check TIME-WAIT sockets first. */ sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) { tw = inet_twsk(sk2); - if (INET_TW_MATCH(sk2, acookie, saddr, daddr, ports, dif)) { + if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) { const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2); struct tcp_sock *tp = tcp_sk(sk); @@ -179,7 +180,7 @@ static int __tcp_v4_check_established(struct sock *sk, __u16 lport, /* And established part... */ sk_for_each(sk2, node, &head->chain) { - if (INET_MATCH(sk2, acookie, saddr, daddr, ports, dif)) + if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) goto not_unique; } @@ -188,7 +189,7 @@ unique: * in hash table socket with a funny identity. */ inet->num = lport; inet->sport = htons(lport); - sk->sk_hashent = hash; + sk->sk_hash = hash; BUG_TRAP(sk_unhashed(sk)); __sk_add_node(sk, &head->chain); sock_prot_inc_use(sk->sk_prot); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 80643e6b346b..d693cb988b78 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -209,9 +209,11 @@ static __inline__ void __tcp_v6_hash(struct sock *sk) lock = &tcp_hashinfo.lhash_lock; inet_listen_wlock(&tcp_hashinfo); } else { - sk->sk_hashent = inet6_sk_ehashfn(sk, tcp_hashinfo.ehash_size); - list = &tcp_hashinfo.ehash[sk->sk_hashent].chain; - lock = &tcp_hashinfo.ehash[sk->sk_hashent].lock; + unsigned int hash; + sk->sk_hash = hash = inet6_sk_ehashfn(sk); + hash &= (tcp_hashinfo.ehash_size - 1); + list = &tcp_hashinfo.ehash[hash].chain; + lock = &tcp_hashinfo.ehash[hash].lock; write_lock(lock); } @@ -322,13 +324,13 @@ static int __tcp_v6_check_established(struct sock *sk, const __u16 lport, const struct in6_addr *saddr = &np->daddr; const int dif = sk->sk_bound_dev_if; const u32 ports = INET_COMBINED_PORTS(inet->dport, lport); - const int hash = inet6_ehashfn(daddr, inet->num, saddr, inet->dport, - tcp_hashinfo.ehash_size); - struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[hash]; + unsigned int hash = inet6_ehashfn(daddr, inet->num, saddr, inet->dport); + struct inet_ehash_bucket *head = inet_ehash_bucket(&tcp_hashinfo, hash); struct sock *sk2; const struct hlist_node *node; struct inet_timewait_sock *tw; + prefetch(head->chain.first); write_lock(&head->lock); /* Check TIME-WAIT sockets first. */ @@ -365,14 +367,14 @@ static int __tcp_v6_check_established(struct sock *sk, const __u16 lport, /* And established part... */ sk_for_each(sk2, node, &head->chain) { - if (INET6_MATCH(sk2, saddr, daddr, ports, dif)) + if (INET6_MATCH(sk2, hash, saddr, daddr, ports, dif)) goto not_unique; } unique: BUG_TRAP(sk_unhashed(sk)); __sk_add_node(sk, &head->chain); - sk->sk_hashent = hash; + sk->sk_hash = hash; sock_prot_inc_use(sk->sk_prot); write_unlock(&head->lock); diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index 00eae5f9a01a..cf68a59fdc5a 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -393,10 +393,10 @@ META_COLLECTOR(int_sk_route_caps) dst->value = skb->sk->sk_route_caps; } -META_COLLECTOR(int_sk_hashent) +META_COLLECTOR(int_sk_hash) { SKIP_NONLOCAL(skb); - dst->value = skb->sk->sk_hashent; + dst->value = skb->sk->sk_hash; } META_COLLECTOR(int_sk_lingertime) @@ -515,7 +515,7 @@ static struct meta_ops __meta_ops[TCF_META_TYPE_MAX+1][TCF_META_ID_MAX+1] = { [META_ID(SK_FORWARD_ALLOCS)] = META_FUNC(int_sk_fwd_alloc), [META_ID(SK_ALLOCS)] = META_FUNC(int_sk_alloc), [META_ID(SK_ROUTE_CAPS)] = META_FUNC(int_sk_route_caps), - [META_ID(SK_HASHENT)] = META_FUNC(int_sk_hashent), + [META_ID(SK_HASH)] = META_FUNC(int_sk_hash), [META_ID(SK_LINGERTIME)] = META_FUNC(int_sk_lingertime), [META_ID(SK_ACK_BACKLOG)] = META_FUNC(int_sk_ack_bl), [META_ID(SK_MAX_ACK_BACKLOG)] = META_FUNC(int_sk_max_ack_bl), -- cgit v1.2.3 From e5ed639913eea3e4783a550291775ab78dd84966 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 3 Oct 2005 14:35:55 -0700 Subject: [IPV4]: Replace __in_dev_get with __in_dev_get_rcu/rtnl The following patch renames __in_dev_get() to __in_dev_get_rtnl() and introduces __in_dev_get_rcu() to cover the second case. 1) RCU with refcnt should use in_dev_get(). 2) RCU without refcnt should use __in_dev_get_rcu(). 3) All others must hold RTNL and use __in_dev_get_rtnl(). There is one exception in net/ipv4/route.c which is in fact a pre-existing race condition. I've marked it as such so that we remember to fix it. This patch is based on suggestions and prior work by Suzanne Wood and Paul McKenney. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 2 +- drivers/net/wan/sdlamain.c | 23 ++++++++++++++--------- drivers/net/wan/syncppp.c | 2 +- drivers/net/wireless/strip.c | 4 ++-- drivers/parisc/led.c | 5 ++++- drivers/s390/net/qeth_main.c | 4 ++-- include/linux/inetdevice.h | 12 ++++++++++-- net/atm/clip.c | 2 +- net/core/netpoll.c | 2 +- net/core/pktgen.c | 2 +- net/econet/af_econet.c | 2 +- net/ipv4/arp.c | 10 +++++----- net/ipv4/devinet.c | 22 +++++++++++----------- net/ipv4/fib_frontend.c | 4 ++-- net/ipv4/fib_semantics.c | 4 ++-- net/ipv4/igmp.c | 2 +- net/ipv4/ip_gre.c | 4 ++-- net/ipv4/ipmr.c | 6 +++--- net/ipv4/netfilter/ip_conntrack_netbios_ns.c | 2 +- net/ipv4/netfilter/ipt_REDIRECT.c | 2 +- net/ipv4/route.c | 6 ++++-- net/ipv6/addrconf.c | 2 +- net/irda/irlan/irlan_eth.c | 2 +- net/sctp/protocol.c | 2 +- 24 files changed, 73 insertions(+), 55 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 6d00c3de1a83..bf81cd45e4d4 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2776,7 +2776,7 @@ static u32 bond_glean_dev_ip(struct net_device *dev) return 0; rcu_read_lock(); - idev = __in_dev_get(dev); + idev = __in_dev_get_rcu(dev); if (!idev) goto out; diff --git a/drivers/net/wan/sdlamain.c b/drivers/net/wan/sdlamain.c index 74e151acef3e..7a8b22a7ea31 100644 --- a/drivers/net/wan/sdlamain.c +++ b/drivers/net/wan/sdlamain.c @@ -57,6 +57,7 @@ #include /* request_region(), release_region() */ #include /* WAN router definitions */ #include /* WANPIPE common user API definitions */ +#include #include #include /* phys_to_virt() */ @@ -1268,37 +1269,41 @@ unsigned long get_ip_address(struct net_device *dev, int option) struct in_ifaddr *ifaddr; struct in_device *in_dev; + unsigned long addr = 0; - if ((in_dev = __in_dev_get(dev)) == NULL){ - return 0; + rcu_read_lock(); + if ((in_dev = __in_dev_get_rcu(dev)) == NULL){ + goto out; } if ((ifaddr = in_dev->ifa_list)== NULL ){ - return 0; + goto out; } switch (option){ case WAN_LOCAL_IP: - return ifaddr->ifa_local; + addr = ifaddr->ifa_local; break; case WAN_POINTOPOINT_IP: - return ifaddr->ifa_address; + addr = ifaddr->ifa_address; break; case WAN_NETMASK_IP: - return ifaddr->ifa_mask; + addr = ifaddr->ifa_mask; break; case WAN_BROADCAST_IP: - return ifaddr->ifa_broadcast; + addr = ifaddr->ifa_broadcast; break; default: - return 0; + break; } - return 0; +out: + rcu_read_unlock(); + return addr; } void add_gateway(sdla_t *card, struct net_device *dev) diff --git a/drivers/net/wan/syncppp.c b/drivers/net/wan/syncppp.c index b56a7b516d24..a6d3b55013a5 100644 --- a/drivers/net/wan/syncppp.c +++ b/drivers/net/wan/syncppp.c @@ -769,7 +769,7 @@ static void sppp_cisco_input (struct sppp *sp, struct sk_buff *skb) u32 addr = 0, mask = ~0; /* FIXME: is the mask correct? */ #ifdef CONFIG_INET rcu_read_lock(); - if ((in_dev = __in_dev_get(dev)) != NULL) + if ((in_dev = __in_dev_get_rcu(dev)) != NULL) { for (ifa=in_dev->ifa_list; ifa != NULL; ifa=ifa->ifa_next) { diff --git a/drivers/net/wireless/strip.c b/drivers/net/wireless/strip.c index 4b0acae22b0d..7bc7fc823128 100644 --- a/drivers/net/wireless/strip.c +++ b/drivers/net/wireless/strip.c @@ -1352,7 +1352,7 @@ static unsigned char *strip_make_packet(unsigned char *buffer, struct in_device *in_dev; rcu_read_lock(); - in_dev = __in_dev_get(strip_info->dev); + in_dev = __in_dev_get_rcu(strip_info->dev); if (in_dev == NULL) { rcu_read_unlock(); return NULL; @@ -1508,7 +1508,7 @@ static void strip_send(struct strip *strip_info, struct sk_buff *skb) brd = addr = 0; rcu_read_lock(); - in_dev = __in_dev_get(strip_info->dev); + in_dev = __in_dev_get_rcu(strip_info->dev); if (in_dev) { if (in_dev->ifa_list) { brd = in_dev->ifa_list->ifa_broadcast; diff --git a/drivers/parisc/led.c b/drivers/parisc/led.c index e90fb72a6962..286902298e33 100644 --- a/drivers/parisc/led.c +++ b/drivers/parisc/led.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -358,9 +359,10 @@ static __inline__ int led_get_net_activity(void) /* we are running as tasklet, so locking dev_base * for reading should be OK */ read_lock(&dev_base_lock); + rcu_read_lock(); for (dev = dev_base; dev; dev = dev->next) { struct net_device_stats *stats; - struct in_device *in_dev = __in_dev_get(dev); + struct in_device *in_dev = __in_dev_get_rcu(dev); if (!in_dev || !in_dev->ifa_list) continue; if (LOOPBACK(in_dev->ifa_list->ifa_local)) @@ -371,6 +373,7 @@ static __inline__ int led_get_net_activity(void) rx_total += stats->rx_packets; tx_total += stats->tx_packets; } + rcu_read_unlock(); read_unlock(&dev_base_lock); retval = 0; diff --git a/drivers/s390/net/qeth_main.c b/drivers/s390/net/qeth_main.c index 86582cf1e19e..71de834ece1a 100644 --- a/drivers/s390/net/qeth_main.c +++ b/drivers/s390/net/qeth_main.c @@ -5200,7 +5200,7 @@ qeth_free_vlan_addresses4(struct qeth_card *card, unsigned short vid) if (!card->vlangrp) return; rcu_read_lock(); - in_dev = __in_dev_get(card->vlangrp->vlan_devices[vid]); + in_dev = __in_dev_get_rcu(card->vlangrp->vlan_devices[vid]); if (!in_dev) goto out; for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { @@ -7725,7 +7725,7 @@ qeth_arp_constructor(struct neighbour *neigh) goto out; rcu_read_lock(); - in_dev = rcu_dereference(__in_dev_get(dev)); + in_dev = __in_dev_get_rcu(dev); if (in_dev == NULL) { rcu_read_unlock(); return -EINVAL; diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index 7e1e15f934f3..fd7af86151b1 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -142,13 +142,21 @@ static __inline__ int bad_mask(u32 mask, u32 addr) #define endfor_ifa(in_dev) } +static inline struct in_device *__in_dev_get_rcu(const struct net_device *dev) +{ + struct in_device *in_dev = dev->ip_ptr; + if (in_dev) + in_dev = rcu_dereference(in_dev); + return in_dev; +} + static __inline__ struct in_device * in_dev_get(const struct net_device *dev) { struct in_device *in_dev; rcu_read_lock(); - in_dev = dev->ip_ptr; + in_dev = __in_dev_get_rcu(dev); if (in_dev) atomic_inc(&in_dev->refcnt); rcu_read_unlock(); @@ -156,7 +164,7 @@ in_dev_get(const struct net_device *dev) } static __inline__ struct in_device * -__in_dev_get(const struct net_device *dev) +__in_dev_get_rtnl(const struct net_device *dev) { return (struct in_device*)dev->ip_ptr; } diff --git a/net/atm/clip.c b/net/atm/clip.c index 28dab55a4387..4f54c9a5e84a 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -310,7 +310,7 @@ static int clip_constructor(struct neighbour *neigh) if (neigh->type != RTN_UNICAST) return -EINVAL; rcu_read_lock(); - in_dev = rcu_dereference(__in_dev_get(dev)); + in_dev = __in_dev_get_rcu(dev); if (!in_dev) { rcu_read_unlock(); return -EINVAL; diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 5265dfd69928..802fe11efad0 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -703,7 +703,7 @@ int netpoll_setup(struct netpoll *np) if (!np->local_ip) { rcu_read_lock(); - in_dev = __in_dev_get(ndev); + in_dev = __in_dev_get_rcu(ndev); if (!in_dev || !in_dev->ifa_list) { rcu_read_unlock(); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index b7f2d65a614f..44de070b6045 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -1667,7 +1667,7 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev) struct in_device *in_dev; rcu_read_lock(); - in_dev = __in_dev_get(pkt_dev->odev); + in_dev = __in_dev_get_rcu(pkt_dev->odev); if (in_dev) { if (in_dev->ifa_list) { pkt_dev->saddr_min = in_dev->ifa_list->ifa_address; diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c index 4a62093eb343..34fdac51df96 100644 --- a/net/econet/af_econet.c +++ b/net/econet/af_econet.c @@ -406,7 +406,7 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, unsigned long network = 0; rcu_read_lock(); - idev = __in_dev_get(dev); + idev = __in_dev_get_rcu(dev); if (idev) { if (idev->ifa_list) network = ntohl(idev->ifa_list->ifa_address) & diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index ec0e36893b01..b425748f02d7 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -241,7 +241,7 @@ static int arp_constructor(struct neighbour *neigh) neigh->type = inet_addr_type(addr); rcu_read_lock(); - in_dev = rcu_dereference(__in_dev_get(dev)); + in_dev = __in_dev_get_rcu(dev); if (in_dev == NULL) { rcu_read_unlock(); return -EINVAL; @@ -989,8 +989,8 @@ static int arp_req_set(struct arpreq *r, struct net_device * dev) ipv4_devconf.proxy_arp = 1; return 0; } - if (__in_dev_get(dev)) { - __in_dev_get(dev)->cnf.proxy_arp = 1; + if (__in_dev_get_rtnl(dev)) { + __in_dev_get_rtnl(dev)->cnf.proxy_arp = 1; return 0; } return -ENXIO; @@ -1095,8 +1095,8 @@ static int arp_req_delete(struct arpreq *r, struct net_device * dev) ipv4_devconf.proxy_arp = 0; return 0; } - if (__in_dev_get(dev)) { - __in_dev_get(dev)->cnf.proxy_arp = 0; + if (__in_dev_get_rtnl(dev)) { + __in_dev_get_rtnl(dev)->cnf.proxy_arp = 0; return 0; } return -ENXIO; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index ba2895ae8151..74f2207e131a 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -351,7 +351,7 @@ static int inet_insert_ifa(struct in_ifaddr *ifa) static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa) { - struct in_device *in_dev = __in_dev_get(dev); + struct in_device *in_dev = __in_dev_get_rtnl(dev); ASSERT_RTNL(); @@ -449,7 +449,7 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg goto out; rc = -ENOBUFS; - if ((in_dev = __in_dev_get(dev)) == NULL) { + if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) { in_dev = inetdev_init(dev); if (!in_dev) goto out; @@ -584,7 +584,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg) if (colon) *colon = ':'; - if ((in_dev = __in_dev_get(dev)) != NULL) { + if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) { if (tryaddrmatch) { /* Matthias Andree */ /* compare label and address (4.4BSD style) */ @@ -748,7 +748,7 @@ rarok: static int inet_gifconf(struct net_device *dev, char __user *buf, int len) { - struct in_device *in_dev = __in_dev_get(dev); + struct in_device *in_dev = __in_dev_get_rtnl(dev); struct in_ifaddr *ifa; struct ifreq ifr; int done = 0; @@ -791,7 +791,7 @@ u32 inet_select_addr(const struct net_device *dev, u32 dst, int scope) struct in_device *in_dev; rcu_read_lock(); - in_dev = __in_dev_get(dev); + in_dev = __in_dev_get_rcu(dev); if (!in_dev) goto no_in_dev; @@ -818,7 +818,7 @@ no_in_dev: read_lock(&dev_base_lock); rcu_read_lock(); for (dev = dev_base; dev; dev = dev->next) { - if ((in_dev = __in_dev_get(dev)) == NULL) + if ((in_dev = __in_dev_get_rcu(dev)) == NULL) continue; for_primary_ifa(in_dev) { @@ -887,7 +887,7 @@ u32 inet_confirm_addr(const struct net_device *dev, u32 dst, u32 local, int scop if (dev) { rcu_read_lock(); - if ((in_dev = __in_dev_get(dev))) + if ((in_dev = __in_dev_get_rcu(dev))) addr = confirm_addr_indev(in_dev, dst, local, scope); rcu_read_unlock(); @@ -897,7 +897,7 @@ u32 inet_confirm_addr(const struct net_device *dev, u32 dst, u32 local, int scop read_lock(&dev_base_lock); rcu_read_lock(); for (dev = dev_base; dev; dev = dev->next) { - if ((in_dev = __in_dev_get(dev))) { + if ((in_dev = __in_dev_get_rcu(dev))) { addr = confirm_addr_indev(in_dev, dst, local, scope); if (addr) break; @@ -957,7 +957,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = ptr; - struct in_device *in_dev = __in_dev_get(dev); + struct in_device *in_dev = __in_dev_get_rtnl(dev); ASSERT_RTNL(); @@ -1078,7 +1078,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) if (idx > s_idx) s_ip_idx = 0; rcu_read_lock(); - if ((in_dev = __in_dev_get(dev)) == NULL) { + if ((in_dev = __in_dev_get_rcu(dev)) == NULL) { rcu_read_unlock(); continue; } @@ -1149,7 +1149,7 @@ void inet_forward_change(void) for (dev = dev_base; dev; dev = dev->next) { struct in_device *in_dev; rcu_read_lock(); - in_dev = __in_dev_get(dev); + in_dev = __in_dev_get_rcu(dev); if (in_dev) in_dev->cnf.forwarding = on; rcu_read_unlock(); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 4e1379f71269..e61bc7177eb1 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -173,7 +173,7 @@ int fib_validate_source(u32 src, u32 dst, u8 tos, int oif, no_addr = rpf = 0; rcu_read_lock(); - in_dev = __in_dev_get(dev); + in_dev = __in_dev_get_rcu(dev); if (in_dev) { no_addr = in_dev->ifa_list == NULL; rpf = IN_DEV_RPFILTER(in_dev); @@ -607,7 +607,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = ptr; - struct in_device *in_dev = __in_dev_get(dev); + struct in_device *in_dev = __in_dev_get_rtnl(dev); if (event == NETDEV_UNREGISTER) { fib_disable_ip(dev, 2); diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index d41219e8037c..186f20c4a45e 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1087,7 +1087,7 @@ fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm, rta->rta_oif = &dev->ifindex; if (colon) { struct in_ifaddr *ifa; - struct in_device *in_dev = __in_dev_get(dev); + struct in_device *in_dev = __in_dev_get_rtnl(dev); if (!in_dev) return -ENODEV; *colon = ':'; @@ -1268,7 +1268,7 @@ int fib_sync_up(struct net_device *dev) } if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP)) continue; - if (nh->nh_dev != dev || __in_dev_get(dev) == NULL) + if (nh->nh_dev != dev || !__in_dev_get_rtnl(dev)) continue; alive++; spin_lock_bh(&fib_multipath_lock); diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 70c44e4c3ceb..8b6d3939e1e6 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1323,7 +1323,7 @@ static struct in_device * ip_mc_find_dev(struct ip_mreqn *imr) } if (dev) { imr->imr_ifindex = dev->ifindex; - idev = __in_dev_get(dev); + idev = __in_dev_get_rtnl(dev); } return idev; } diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index f0d5740d7e22..896ce3f8f53a 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -1104,10 +1104,10 @@ static int ipgre_open(struct net_device *dev) return -EADDRNOTAVAIL; dev = rt->u.dst.dev; ip_rt_put(rt); - if (__in_dev_get(dev) == NULL) + if (__in_dev_get_rtnl(dev) == NULL) return -EADDRNOTAVAIL; t->mlink = dev->ifindex; - ip_mc_inc_group(__in_dev_get(dev), t->parms.iph.daddr); + ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr); } return 0; } diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 9dbf5909f3a6..302b7eb507c9 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -149,7 +149,7 @@ struct net_device *ipmr_new_tunnel(struct vifctl *v) if (err == 0 && (dev = __dev_get_by_name(p.name)) != NULL) { dev->flags |= IFF_MULTICAST; - in_dev = __in_dev_get(dev); + in_dev = __in_dev_get_rtnl(dev); if (in_dev == NULL && (in_dev = inetdev_init(dev)) == NULL) goto failure; in_dev->cnf.rp_filter = 0; @@ -278,7 +278,7 @@ static int vif_delete(int vifi) dev_set_allmulti(dev, -1); - if ((in_dev = __in_dev_get(dev)) != NULL) { + if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) { in_dev->cnf.mc_forwarding--; ip_rt_multicast_event(in_dev); } @@ -421,7 +421,7 @@ static int vif_add(struct vifctl *vifc, int mrtsock) return -EINVAL; } - if ((in_dev = __in_dev_get(dev)) == NULL) + if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) return -EADDRNOTAVAIL; in_dev->cnf.mc_forwarding++; dev_set_allmulti(dev, +1); diff --git a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c index 577bac22dcc6..186646eb249f 100644 --- a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c +++ b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c @@ -58,7 +58,7 @@ static int help(struct sk_buff **pskb, goto out; rcu_read_lock(); - in_dev = __in_dev_get(rt->u.dst.dev); + in_dev = __in_dev_get_rcu(rt->u.dst.dev); if (in_dev != NULL) { for_primary_ifa(in_dev) { if (ifa->ifa_broadcast == iph->daddr) { diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c index 715cb613405c..5245bfd33d52 100644 --- a/net/ipv4/netfilter/ipt_REDIRECT.c +++ b/net/ipv4/netfilter/ipt_REDIRECT.c @@ -93,7 +93,7 @@ redirect_target(struct sk_buff **pskb, newdst = 0; rcu_read_lock(); - indev = __in_dev_get((*pskb)->dev); + indev = __in_dev_get_rcu((*pskb)->dev); if (indev && (ifa = indev->ifa_list)) newdst = ifa->ifa_local; rcu_read_unlock(); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 8549f26e2495..381dd6a6aebb 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2128,7 +2128,7 @@ int ip_route_input(struct sk_buff *skb, u32 daddr, u32 saddr, struct in_device *in_dev; rcu_read_lock(); - if ((in_dev = __in_dev_get(dev)) != NULL) { + if ((in_dev = __in_dev_get_rcu(dev)) != NULL) { int our = ip_check_mc(in_dev, daddr, saddr, skb->nh.iph->protocol); if (our @@ -2443,7 +2443,9 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp) err = -ENODEV; if (dev_out == NULL) goto out; - if (__in_dev_get(dev_out) == NULL) { + + /* RACE: Check return value of inet_select_addr instead. */ + if (__in_dev_get_rtnl(dev_out) == NULL) { dev_put(dev_out); goto out; /* Wrong error code */ } diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 4e509e52fbc1..a970b4727ce8 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1806,7 +1806,7 @@ static void sit_add_v4_addrs(struct inet6_dev *idev) } for (dev = dev_base; dev != NULL; dev = dev->next) { - struct in_device * in_dev = __in_dev_get(dev); + struct in_device * in_dev = __in_dev_get_rtnl(dev); if (in_dev && (dev->flags & IFF_UP)) { struct in_ifaddr * ifa; diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c index 071cd2cefd8a..953e255d2bc8 100644 --- a/net/irda/irlan/irlan_eth.c +++ b/net/irda/irlan/irlan_eth.c @@ -310,7 +310,7 @@ void irlan_eth_send_gratuitous_arp(struct net_device *dev) #ifdef CONFIG_INET IRDA_DEBUG(4, "IrLAN: Sending gratuitous ARP\n"); rcu_read_lock(); - in_dev = __in_dev_get(dev); + in_dev = __in_dev_get_rcu(dev); if (in_dev == NULL) goto out; if (in_dev->ifa_list) diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index e7025be77691..f01d1c9002a1 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -147,7 +147,7 @@ static void sctp_v4_copy_addrlist(struct list_head *addrlist, struct sctp_sockaddr_entry *addr; rcu_read_lock(); - if ((in_dev = __in_dev_get(dev)) == NULL) { + if ((in_dev = __in_dev_get_rcu(dev)) == NULL) { rcu_read_unlock(); return; } -- cgit v1.2.3 From 47a8659380d40d5c0786ddb62a89b3f7f1392430 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Tue, 4 Oct 2005 08:09:19 -0400 Subject: libata: bitmask based pci init functions for one or two ports This redoes the n_ports logic I proposed before as a bitmask. ata_pci_init_native_mode is now used with a mask allowing for mixed mode stuff later on. ata_pci_init_legacy_port is called with port number and does one port now not two. Instead it is called twice by the ata init logic which cleans both of them up. There are stil limits in the original code left over - IRQ/port mapping for legacy mode should be arch specific values - You can have one legacy mode IDE adapter per PCI root bridge on some systems - Doesn't handle mixed mode devices yet (but is now a lot closer to it) --- drivers/scsi/libata-core.c | 133 ++++++++++++++++++++++++++------------------- drivers/scsi/sata_nv.c | 2 +- drivers/scsi/sata_sis.c | 2 +- drivers/scsi/sata_uli.c | 2 +- drivers/scsi/sata_via.c | 2 +- include/linux/libata.h | 6 +- 6 files changed, 85 insertions(+), 62 deletions(-) (limited to 'include/linux') diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c index 03d422e99e58..82ec7f30bf3f 100644 --- a/drivers/scsi/libata-core.c +++ b/drivers/scsi/libata-core.c @@ -4363,85 +4363,87 @@ void ata_pci_host_stop (struct ata_host_set *host_set) * ata_pci_init_native_mode - Initialize native-mode driver * @pdev: pci device to be initialized * @port: array[2] of pointers to port info structures. + * @ports: bitmap of ports present * * Utility function which allocates and initializes an * ata_probe_ent structure for a standard dual-port * PIO-based IDE controller. The returned ata_probe_ent * structure can be passed to ata_device_add(). The returned * ata_probe_ent structure should then be freed with kfree(). + * + * The caller need only pass the address of the primary port, the + * secondary will be deduced automatically. If the device has non + * standard secondary port mappings this function can be called twice, + * once for each interface. */ struct ata_probe_ent * -ata_pci_init_native_mode(struct pci_dev *pdev, struct ata_port_info **port) +ata_pci_init_native_mode(struct pci_dev *pdev, struct ata_port_info **port, int ports) { struct ata_probe_ent *probe_ent = ata_probe_ent_alloc(pci_dev_to_dev(pdev), port[0]); + int p = 0; + if (!probe_ent) return NULL; - probe_ent->n_ports = 2; probe_ent->irq = pdev->irq; probe_ent->irq_flags = SA_SHIRQ; - probe_ent->port[0].cmd_addr = pci_resource_start(pdev, 0); - probe_ent->port[0].altstatus_addr = - probe_ent->port[0].ctl_addr = - pci_resource_start(pdev, 1) | ATA_PCI_CTL_OFS; - probe_ent->port[0].bmdma_addr = pci_resource_start(pdev, 4); - - probe_ent->port[1].cmd_addr = pci_resource_start(pdev, 2); - probe_ent->port[1].altstatus_addr = - probe_ent->port[1].ctl_addr = - pci_resource_start(pdev, 3) | ATA_PCI_CTL_OFS; - probe_ent->port[1].bmdma_addr = pci_resource_start(pdev, 4) + 8; + if (ports & ATA_PORT_PRIMARY) { + probe_ent->port[p].cmd_addr = pci_resource_start(pdev, 0); + probe_ent->port[p].altstatus_addr = + probe_ent->port[p].ctl_addr = + pci_resource_start(pdev, 1) | ATA_PCI_CTL_OFS; + probe_ent->port[p].bmdma_addr = pci_resource_start(pdev, 4); + ata_std_ports(&probe_ent->port[p]); + p++; + } - ata_std_ports(&probe_ent->port[0]); - ata_std_ports(&probe_ent->port[1]); + if (ports & ATA_PORT_SECONDARY) { + probe_ent->port[p].cmd_addr = pci_resource_start(pdev, 2); + probe_ent->port[p].altstatus_addr = + probe_ent->port[p].ctl_addr = + pci_resource_start(pdev, 3) | ATA_PCI_CTL_OFS; + probe_ent->port[p].bmdma_addr = pci_resource_start(pdev, 4) + 8; + ata_std_ports(&probe_ent->port[p]); + p++; + } + probe_ent->n_ports = p; return probe_ent; } -static struct ata_probe_ent * -ata_pci_init_legacy_mode(struct pci_dev *pdev, struct ata_port_info **port, - struct ata_probe_ent **ppe2) +static struct ata_probe_ent *ata_pci_init_legacy_port(struct pci_dev *pdev, struct ata_port_info **port, int port_num) { - struct ata_probe_ent *probe_ent, *probe_ent2; + struct ata_probe_ent *probe_ent; probe_ent = ata_probe_ent_alloc(pci_dev_to_dev(pdev), port[0]); if (!probe_ent) return NULL; - probe_ent2 = ata_probe_ent_alloc(pci_dev_to_dev(pdev), port[1]); - if (!probe_ent2) { - kfree(probe_ent); - return NULL; - } - - probe_ent->n_ports = 1; - probe_ent->irq = 14; - probe_ent->hard_port_no = 0; + probe_ent->legacy_mode = 1; - - probe_ent2->n_ports = 1; - probe_ent2->irq = 15; - - probe_ent2->hard_port_no = 1; - probe_ent2->legacy_mode = 1; - - probe_ent->port[0].cmd_addr = 0x1f0; - probe_ent->port[0].altstatus_addr = - probe_ent->port[0].ctl_addr = 0x3f6; - probe_ent->port[0].bmdma_addr = pci_resource_start(pdev, 4); - - probe_ent2->port[0].cmd_addr = 0x170; - probe_ent2->port[0].altstatus_addr = - probe_ent2->port[0].ctl_addr = 0x376; - probe_ent2->port[0].bmdma_addr = pci_resource_start(pdev, 4)+8; - + probe_ent->n_ports = 1; + probe_ent->hard_port_no = port_num; + + switch(port_num) + { + case 0: + probe_ent->irq = 14; + probe_ent->port[0].cmd_addr = 0x1f0; + probe_ent->port[0].altstatus_addr = + probe_ent->port[0].ctl_addr = 0x3f6; + break; + case 1: + probe_ent->irq = 15; + probe_ent->port[0].cmd_addr = 0x170; + probe_ent->port[0].altstatus_addr = + probe_ent->port[0].ctl_addr = 0x376; + break; + } + probe_ent->port[0].bmdma_addr = pci_resource_start(pdev, 4) + 8 * port_num; ata_std_ports(&probe_ent->port[0]); - ata_std_ports(&probe_ent2->port[0]); - - *ppe2 = probe_ent2; return probe_ent; } @@ -4470,7 +4472,7 @@ ata_pci_init_legacy_mode(struct pci_dev *pdev, struct ata_port_info **port, int ata_pci_init_one (struct pci_dev *pdev, struct ata_port_info **port_info, unsigned int n_ports) { - struct ata_probe_ent *probe_ent, *probe_ent2 = NULL; + struct ata_probe_ent *probe_ent = NULL, *probe_ent2 = NULL; struct ata_port_info *port[2]; u8 tmp8, mask; unsigned int legacy_mode = 0; @@ -4487,7 +4489,7 @@ int ata_pci_init_one (struct pci_dev *pdev, struct ata_port_info **port_info, if ((port[0]->host_flags & ATA_FLAG_NO_LEGACY) == 0 && (pdev->class >> 8) == PCI_CLASS_STORAGE_IDE) { - /* TODO: support transitioning to native mode? */ + /* TODO: What if one channel is in native mode ... */ pci_read_config_byte(pdev, PCI_CLASS_PROG, &tmp8); mask = (1 << 2) | (1 << 0); if ((tmp8 & mask) != mask) @@ -4495,11 +4497,20 @@ int ata_pci_init_one (struct pci_dev *pdev, struct ata_port_info **port_info, } /* FIXME... */ - if ((!legacy_mode) && (n_ports > 1)) { - printk(KERN_ERR "ata: BUG: native mode, n_ports > 1\n"); - return -EINVAL; + if ((!legacy_mode) && (n_ports > 2)) { + printk(KERN_ERR "ata: BUG: native mode, n_ports > 2\n"); + n_ports = 2; + /* For now */ } + /* FIXME: Really for ATA it isn't safe because the device may be + multi-purpose and we want to leave it alone if it was already + enabled. Secondly for shared use as Arjan says we want refcounting + + Checking dev->is_enabled is insufficient as this is not set at + boot for the primary video which is BIOS enabled + */ + rc = pci_enable_device(pdev); if (rc) return rc; @@ -4510,6 +4521,7 @@ int ata_pci_init_one (struct pci_dev *pdev, struct ata_port_info **port_info, goto err_out; } + /* FIXME: Should use platform specific mappers for legacy port ranges */ if (legacy_mode) { if (!request_region(0x1f0, 8, "libata")) { struct resource *conflict, res; @@ -4554,10 +4566,17 @@ int ata_pci_init_one (struct pci_dev *pdev, struct ata_port_info **port_info, goto err_out_regions; if (legacy_mode) { - probe_ent = ata_pci_init_legacy_mode(pdev, port, &probe_ent2); - } else - probe_ent = ata_pci_init_native_mode(pdev, port); - if (!probe_ent) { + if (legacy_mode & (1 << 0)) + probe_ent = ata_pci_init_legacy_port(pdev, port, 0); + if (legacy_mode & (1 << 1)) + probe_ent2 = ata_pci_init_legacy_port(pdev, port, 1); + } else { + if (n_ports == 2) + probe_ent = ata_pci_init_native_mode(pdev, port, ATA_PORT_PRIMARY | ATA_PORT_SECONDARY); + else + probe_ent = ata_pci_init_native_mode(pdev, port, ATA_PORT_PRIMARY); + } + if (!probe_ent && !probe_ent2) { rc = -ENOMEM; goto err_out_regions; } diff --git a/drivers/scsi/sata_nv.c b/drivers/scsi/sata_nv.c index c05653c7779d..749ff92d8c63 100644 --- a/drivers/scsi/sata_nv.c +++ b/drivers/scsi/sata_nv.c @@ -405,7 +405,7 @@ static int nv_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) rc = -ENOMEM; ppi = &nv_port_info; - probe_ent = ata_pci_init_native_mode(pdev, &ppi); + probe_ent = ata_pci_init_native_mode(pdev, &ppi, ATA_PORT_PRIMARY | ATA_PORT_SECONDARY); if (!probe_ent) goto err_out_regions; diff --git a/drivers/scsi/sata_sis.c b/drivers/scsi/sata_sis.c index b227e51d12f4..0761a3234fcf 100644 --- a/drivers/scsi/sata_sis.c +++ b/drivers/scsi/sata_sis.c @@ -263,7 +263,7 @@ static int sis_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) goto err_out_regions; ppi = &sis_port_info; - probe_ent = ata_pci_init_native_mode(pdev, &ppi); + probe_ent = ata_pci_init_native_mode(pdev, &ppi, ATA_PORT_PRIMARY | ATA_PORT_SECONDARY); if (!probe_ent) { rc = -ENOMEM; goto err_out_regions; diff --git a/drivers/scsi/sata_uli.c b/drivers/scsi/sata_uli.c index 4c9fb8b71be1..9c06f2abe7f7 100644 --- a/drivers/scsi/sata_uli.c +++ b/drivers/scsi/sata_uli.c @@ -202,7 +202,7 @@ static int uli_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) goto err_out_regions; ppi = &uli_port_info; - probe_ent = ata_pci_init_native_mode(pdev, &ppi); + probe_ent = ata_pci_init_native_mode(pdev, &ppi, ATA_PORT_PRIMARY | ATA_PORT_SECONDARY); if (!probe_ent) { rc = -ENOMEM; goto err_out_regions; diff --git a/drivers/scsi/sata_via.c b/drivers/scsi/sata_via.c index 128b996b07b7..565872479b9a 100644 --- a/drivers/scsi/sata_via.c +++ b/drivers/scsi/sata_via.c @@ -212,7 +212,7 @@ static struct ata_probe_ent *vt6420_init_probe_ent(struct pci_dev *pdev) struct ata_probe_ent *probe_ent; struct ata_port_info *ppi = &svia_port_info; - probe_ent = ata_pci_init_native_mode(pdev, &ppi); + probe_ent = ata_pci_init_native_mode(pdev, &ppi, ATA_PORT_PRIMARY | ATA_PORT_SECONDARY); if (!probe_ent) return NULL; diff --git a/include/linux/libata.h b/include/linux/libata.h index 4739a75b983d..4d45179872cc 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -155,6 +155,10 @@ enum { ATA_SHIFT_UDMA = 0, ATA_SHIFT_MWDMA = 8, ATA_SHIFT_PIO = 11, + + /* Masks for port functions */ + ATA_PORT_PRIMARY = (1 << 0), + ATA_PORT_SECONDARY = (1 << 1), }; enum hsm_task_states { @@ -458,7 +462,7 @@ struct pci_bits { extern void ata_pci_host_stop (struct ata_host_set *host_set); extern struct ata_probe_ent * -ata_pci_init_native_mode(struct pci_dev *pdev, struct ata_port_info **port); +ata_pci_init_native_mode(struct pci_dev *pdev, struct ata_port_info **port, int portmask); extern int pci_test_config_bits(struct pci_dev *pdev, struct pci_bits *bits); #endif /* CONFIG_PCI */ -- cgit v1.2.3 From ce0fe7e70a0ad11097a3773e9f3f0de3d859edf0 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 4 Oct 2005 17:43:06 +0100 Subject: [PATCH] bfs endianness annotations Signed-off-by: Alexey Dobriyan Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- fs/bfs/dir.c | 2 +- fs/bfs/inode.c | 2 +- include/linux/bfs_fs.h | 42 +++++++++++++++++++++--------------------- 3 files changed, 23 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c index e240c335eb23..5af928fa0449 100644 --- a/fs/bfs/dir.c +++ b/fs/bfs/dir.c @@ -108,7 +108,7 @@ static int bfs_create(struct inode * dir, struct dentry * dentry, int mode, inode->i_mapping->a_ops = &bfs_aops; inode->i_mode = mode; inode->i_ino = ino; - BFS_I(inode)->i_dsk_ino = cpu_to_le16(ino); + BFS_I(inode)->i_dsk_ino = ino; BFS_I(inode)->i_sblock = 0; BFS_I(inode)->i_eblock = 0; insert_inode_hash(inode); diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index c7b39aa279d7..868af0f224fb 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -357,7 +357,7 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) } info->si_blocks = (le32_to_cpu(bfs_sb->s_end) + 1)>>BFS_BSIZE_BITS; /* for statfs(2) */ - info->si_freeb = (le32_to_cpu(bfs_sb->s_end) + 1 - cpu_to_le32(bfs_sb->s_start))>>BFS_BSIZE_BITS; + info->si_freeb = (le32_to_cpu(bfs_sb->s_end) + 1 - le32_to_cpu(bfs_sb->s_start))>>BFS_BSIZE_BITS; info->si_freei = 0; info->si_lf_eblk = 0; info->si_lf_sblk = 0; diff --git a/include/linux/bfs_fs.h b/include/linux/bfs_fs.h index c1237aa92e38..8ed6dfdcd783 100644 --- a/include/linux/bfs_fs.h +++ b/include/linux/bfs_fs.h @@ -20,19 +20,19 @@ /* BFS inode layout on disk */ struct bfs_inode { - __u16 i_ino; + __le16 i_ino; __u16 i_unused; - __u32 i_sblock; - __u32 i_eblock; - __u32 i_eoffset; - __u32 i_vtype; - __u32 i_mode; - __s32 i_uid; - __s32 i_gid; - __u32 i_nlink; - __u32 i_atime; - __u32 i_mtime; - __u32 i_ctime; + __le32 i_sblock; + __le32 i_eblock; + __le32 i_eoffset; + __le32 i_vtype; + __le32 i_mode; + __le32 i_uid; + __le32 i_gid; + __le32 i_nlink; + __le32 i_atime; + __le32 i_mtime; + __le32 i_ctime; __u32 i_padding[4]; }; @@ -41,17 +41,17 @@ struct bfs_inode { #define BFS_DIRS_PER_BLOCK 32 struct bfs_dirent { - __u16 ino; + __le16 ino; char name[BFS_NAMELEN]; }; /* BFS superblock layout on disk */ struct bfs_super_block { - __u32 s_magic; - __u32 s_start; - __u32 s_end; - __s32 s_from; - __s32 s_to; + __le32 s_magic; + __le32 s_start; + __le32 s_end; + __le32 s_from; + __le32 s_to; __s32 s_bfrom; __s32 s_bto; char s_fsname[6]; @@ -66,15 +66,15 @@ struct bfs_super_block { #define BFS_INO2OFF(ino) \ ((__u32)(((ino) - BFS_ROOT_INO) * sizeof(struct bfs_inode)) + BFS_BSIZE) #define BFS_NZFILESIZE(ip) \ - ((cpu_to_le32((ip)->i_eoffset) + 1) - cpu_to_le32((ip)->i_sblock) * BFS_BSIZE) + ((le32_to_cpu((ip)->i_eoffset) + 1) - le32_to_cpu((ip)->i_sblock) * BFS_BSIZE) #define BFS_FILESIZE(ip) \ ((ip)->i_sblock == 0 ? 0 : BFS_NZFILESIZE(ip)) #define BFS_FILEBLOCKS(ip) \ - ((ip)->i_sblock == 0 ? 0 : (cpu_to_le32((ip)->i_eblock) + 1) - cpu_to_le32((ip)->i_sblock)) + ((ip)->i_sblock == 0 ? 0 : (le32_to_cpu((ip)->i_eblock) + 1) - le32_to_cpu((ip)->i_sblock)) #define BFS_UNCLEAN(bfs_sb, sb) \ - ((cpu_to_le32(bfs_sb->s_from) != -1) && (cpu_to_le32(bfs_sb->s_to) != -1) && !(sb->s_flags & MS_RDONLY)) + ((le32_to_cpu(bfs_sb->s_from) != -1) && (le32_to_cpu(bfs_sb->s_to) != -1) && !(sb->s_flags & MS_RDONLY)) #endif /* _LINUX_BFS_FS_H */ -- cgit v1.2.3 From 7b5b3f3d826ea87c224c66de9c95c09e7f110ecd Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 4 Oct 2005 22:38:44 -0700 Subject: [ATM]: fix sparse gfp nocast warnings Fix implicit nocast warnings in atm code: net/atm/atm_misc.c:35:44: warning: implicit cast to nocast type drivers/atm/fore200e.c:183:33: warning: implicit cast to nocast type Also use kzalloc() instead of kmalloc(). Signed-off-by: Randy Dunlap Signed-off-by: David S. Miller --- drivers/atm/fore200e.c | 10 ++++------ include/linux/atmdev.h | 2 +- net/atm/atm_misc.c | 2 +- 3 files changed, 6 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c index 2bf723a7b6e6..6f1a83c9d9e0 100644 --- a/drivers/atm/fore200e.c +++ b/drivers/atm/fore200e.c @@ -178,14 +178,12 @@ fore200e_irq_itoa(int irq) static void* -fore200e_kmalloc(int size, int flags) +fore200e_kmalloc(int size, unsigned int __nocast flags) { - void* chunk = kmalloc(size, flags); + void *chunk = kzalloc(size, flags); - if (chunk) - memset(chunk, 0x00, size); - else - printk(FORE200E "kmalloc() failed, requested size = %d, flags = 0x%x\n", size, flags); + if (!chunk) + printk(FORE200E "kmalloc() failed, requested size = %d, flags = 0x%x\n", size, flags); return chunk; } diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h index 9f374cfa1b05..f1fd849e5535 100644 --- a/include/linux/atmdev.h +++ b/include/linux/atmdev.h @@ -457,7 +457,7 @@ static inline void atm_dev_put(struct atm_dev *dev) int atm_charge(struct atm_vcc *vcc,int truesize); struct sk_buff *atm_alloc_charge(struct atm_vcc *vcc,int pdu_size, - int gfp_flags); + unsigned int __nocast gfp_flags); int atm_pcr_goal(struct atm_trafprm *tp); void vcc_release_async(struct atm_vcc *vcc, int reply); diff --git a/net/atm/atm_misc.c b/net/atm/atm_misc.c index b2113c3454ae..71abc99ec815 100644 --- a/net/atm/atm_misc.c +++ b/net/atm/atm_misc.c @@ -25,7 +25,7 @@ int atm_charge(struct atm_vcc *vcc,int truesize) struct sk_buff *atm_alloc_charge(struct atm_vcc *vcc,int pdu_size, - int gfp_flags) + unsigned int __nocast gfp_flags) { struct sock *sk = sk_atm(vcc); int guess = atm_guess_pdu2truesize(pdu_size); -- cgit v1.2.3 From 17b698856328a42d5874ac87640e2cd84a824eef Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 4 Oct 2005 22:41:16 -0700 Subject: [CONNECTOR]: fix sparse gfp nocast warnings Fix implicit nocast warnings in connector code: drivers/connector/connector.c:102:24: warning: implicit cast to nocast type drivers/connector/connector.c:114:45: warning: implicit cast to nocast type Signed-off-by: Randy Dunlap Signed-off-by: David S. Miller --- drivers/connector/connector.c | 3 ++- include/linux/connector.h | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c index bb0b3a8de14b..1422285d537c 100644 --- a/drivers/connector/connector.c +++ b/drivers/connector/connector.c @@ -69,7 +69,8 @@ int cn_already_initialized = 0; * a new message. * */ -int cn_netlink_send(struct cn_msg *msg, u32 __group, int gfp_mask) +int cn_netlink_send(struct cn_msg *msg, u32 __group, + unsigned int __nocast gfp_mask) { struct cn_callback_entry *__cbq; unsigned int size; diff --git a/include/linux/connector.h b/include/linux/connector.h index 86d4b0a81713..96582c9911ac 100644 --- a/include/linux/connector.h +++ b/include/linux/connector.h @@ -149,7 +149,7 @@ struct cn_dev { int cn_add_callback(struct cb_id *, char *, void (*callback) (void *)); void cn_del_callback(struct cb_id *); -int cn_netlink_send(struct cn_msg *, u32, int); +int cn_netlink_send(struct cn_msg *, u32, unsigned int __nocast); int cn_queue_add_callback(struct cn_queue_dev *dev, char *name, struct cb_id *id, void (*callback)(void *)); void cn_queue_del_callback(struct cn_queue_dev *dev, struct cb_id *id); -- cgit v1.2.3 From 3d2aef668920e8d93b77f145f8f647f62abe75db Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 4 Oct 2005 22:45:14 -0700 Subject: [TEXTSEARCH]: fix sparse gfp nocast warnings Fix nocast sparse warnings: include/linux/textsearch.h:165:57: warning: implicit cast to nocast type Signed-off-by: Randy Dunlap Signed-off-by: David S. Miller --- include/linux/textsearch.h | 3 ++- lib/ts_bm.c | 2 +- lib/ts_fsm.c | 2 +- lib/ts_kmp.c | 2 +- 4 files changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/textsearch.h b/include/linux/textsearch.h index 941f45ac117a..1a4990e448e9 100644 --- a/include/linux/textsearch.h +++ b/include/linux/textsearch.h @@ -158,7 +158,8 @@ extern unsigned int textsearch_find_continuous(struct ts_config *, #define TS_PRIV_ALIGNTO 8 #define TS_PRIV_ALIGN(len) (((len) + TS_PRIV_ALIGNTO-1) & ~(TS_PRIV_ALIGNTO-1)) -static inline struct ts_config *alloc_ts_config(size_t payload, int gfp_mask) +static inline struct ts_config *alloc_ts_config(size_t payload, + unsigned int __nocast gfp_mask) { struct ts_config *conf; diff --git a/lib/ts_bm.c b/lib/ts_bm.c index 2cc79112ecc3..1b61fceef777 100644 --- a/lib/ts_bm.c +++ b/lib/ts_bm.c @@ -127,7 +127,7 @@ static void compute_prefix_tbl(struct ts_bm *bm, const u8 *pattern, } static struct ts_config *bm_init(const void *pattern, unsigned int len, - int gfp_mask) + unsigned int __nocast gfp_mask) { struct ts_config *conf; struct ts_bm *bm; diff --git a/lib/ts_fsm.c b/lib/ts_fsm.c index d27c0a072940..ef9779e00506 100644 --- a/lib/ts_fsm.c +++ b/lib/ts_fsm.c @@ -258,7 +258,7 @@ found_match: } static struct ts_config *fsm_init(const void *pattern, unsigned int len, - int gfp_mask) + unsigned int __nocast gfp_mask) { int i, err = -EINVAL; struct ts_config *conf; diff --git a/lib/ts_kmp.c b/lib/ts_kmp.c index 73266b975585..e45f0f0c2379 100644 --- a/lib/ts_kmp.c +++ b/lib/ts_kmp.c @@ -87,7 +87,7 @@ static inline void compute_prefix_tbl(const u8 *pattern, unsigned int len, } static struct ts_config *kmp_init(const void *pattern, unsigned int len, - int gfp_mask) + unsigned int __nocast gfp_mask) { struct ts_config *conf; struct ts_kmp *kmp; -- cgit v1.2.3 From 67846b30171cc4d706125f630193a76a26bb334a Mon Sep 17 00:00:00 2001 From: Jeff Garzik Date: Wed, 5 Oct 2005 02:58:32 -0400 Subject: libata: add ata_ratelimit(), use it in AHCI driver irq handler --- drivers/scsi/ahci.c | 31 +++++++++++++++++++++++++------ drivers/scsi/libata-core.c | 23 +++++++++++++++++++++++ include/linux/libata.h | 2 ++ 3 files changed, 50 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/scsi/ahci.c b/drivers/scsi/ahci.c index c2c8fa828e24..5ec866b00479 100644 --- a/drivers/scsi/ahci.c +++ b/drivers/scsi/ahci.c @@ -672,17 +672,36 @@ static irqreturn_t ahci_interrupt (int irq, void *dev_instance, struct pt_regs * for (i = 0; i < host_set->n_ports; i++) { struct ata_port *ap; - u32 tmp; - VPRINTK("port %u\n", i); + if (!(irq_stat & (1 << i))) + continue; + ap = host_set->ports[i]; - tmp = irq_stat & (1 << i); - if (tmp && ap) { + if (ap) { struct ata_queued_cmd *qc; qc = ata_qc_from_tag(ap, ap->active_tag); - if (ahci_host_intr(ap, qc)) - irq_ack |= (1 << i); + if (!ahci_host_intr(ap, qc)) + if (ata_ratelimit()) { + struct pci_dev *pdev = + to_pci_dev(ap->host_set->dev); + printk(KERN_WARNING + "ahci(%s): unhandled interrupt on port %u\n", + pci_name(pdev), i); + } + + VPRINTK("port %u\n", i); + } else { + VPRINTK("port %u (no irq)\n", i); + if (ata_ratelimit()) { + struct pci_dev *pdev = + to_pci_dev(ap->host_set->dev); + printk(KERN_WARNING + "ahci(%s): interrupt on disabled port %u\n", + pci_name(pdev), i); + } } + + irq_ack |= (1 << i); } if (irq_ack) { diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c index f0894bfa908b..ceffaef37d17 100644 --- a/drivers/scsi/libata-core.c +++ b/drivers/scsi/libata-core.c @@ -48,6 +48,7 @@ #include #include #include +#include #include #include "scsi.h" #include "scsi_priv.h" @@ -4688,6 +4689,27 @@ static void __exit ata_exit(void) module_init(ata_init); module_exit(ata_exit); +static unsigned long ratelimit_time; +static spinlock_t ata_ratelimit_lock = SPIN_LOCK_UNLOCKED; + +int ata_ratelimit(void) +{ + int rc; + unsigned long flags; + + spin_lock_irqsave(&ata_ratelimit_lock, flags); + + if (time_after(jiffies, ratelimit_time)) { + rc = 1; + ratelimit_time = jiffies + (HZ/5); + } else + rc = 0; + + spin_unlock_irqrestore(&ata_ratelimit_lock, flags); + + return rc; +} + /* * libata is essentially a library of internal helper functions for * low-level ATA host controller drivers. As such, the API/ABI is @@ -4729,6 +4751,7 @@ EXPORT_SYMBOL_GPL(sata_phy_reset); EXPORT_SYMBOL_GPL(__sata_phy_reset); EXPORT_SYMBOL_GPL(ata_bus_reset); EXPORT_SYMBOL_GPL(ata_port_disable); +EXPORT_SYMBOL_GPL(ata_ratelimit); EXPORT_SYMBOL_GPL(ata_scsi_ioctl); EXPORT_SYMBOL_GPL(ata_scsi_queuecmd); EXPORT_SYMBOL_GPL(ata_scsi_error); diff --git a/include/linux/libata.h b/include/linux/libata.h index 4d45179872cc..7929cfc9318d 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -410,6 +410,8 @@ extern int ata_scsi_queuecmd(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmn extern int ata_scsi_error(struct Scsi_Host *host); extern int ata_scsi_release(struct Scsi_Host *host); extern unsigned int ata_host_intr(struct ata_port *ap, struct ata_queued_cmd *qc); +extern int ata_ratelimit(void); + /* * Default driver ops implementations */ -- cgit v1.2.3 From 0f21ba7cc3320d33459ecb3f538f1a42040c29cd Mon Sep 17 00:00:00 2001 From: Eric Kinzie Date: Thu, 6 Oct 2005 22:19:28 -0700 Subject: [ATM]: add support for LECS addresses learned from network From: Eric Kinzie Signed-off-by: Chas Williams Signed-off-by: David S. Miller --- include/linux/atmdev.h | 10 ++++++++++ net/atm/addr.c | 51 +++++++++++++++++++++++++++++++++++++------------- net/atm/addr.h | 12 +++++++----- net/atm/resources.c | 20 +++++++++++++++----- 4 files changed, 70 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h index f1fd849e5535..aca9b344bd35 100644 --- a/include/linux/atmdev.h +++ b/include/linux/atmdev.h @@ -76,6 +76,13 @@ struct atm_dev_stats { /* set interface ESI */ #define ATM_SETESIF _IOW('a',ATMIOC_ITF+13,struct atmif_sioc) /* force interface ESI */ +#define ATM_ADDLECSADDR _IOW('a', ATMIOC_ITF+14, struct atmif_sioc) + /* register a LECS address */ +#define ATM_DELLECSADDR _IOW('a', ATMIOC_ITF+15, struct atmif_sioc) + /* unregister a LECS address */ +#define ATM_GETLECSADDR _IOW('a', ATMIOC_ITF+16, struct atmif_sioc) + /* retrieve LECS address(es) */ + #define ATM_GETSTAT _IOW('a',ATMIOC_SARCOM+0,struct atmif_sioc) /* get AAL layer statistics */ #define ATM_GETSTATZ _IOW('a',ATMIOC_SARCOM+1,struct atmif_sioc) @@ -328,6 +335,8 @@ struct atm_dev_addr { struct list_head entry; /* next address */ }; +enum atm_addr_type_t { ATM_ADDR_LOCAL, ATM_ADDR_LECS }; + struct atm_dev { const struct atmdev_ops *ops; /* device operations; NULL if unused */ const struct atmphy_ops *phy; /* PHY operations, may be undefined */ @@ -338,6 +347,7 @@ struct atm_dev { void *phy_data; /* private PHY date */ unsigned long flags; /* device flags (ATM_DF_*) */ struct list_head local; /* local ATM addresses */ + struct list_head lecs; /* LECS ATM addresses learned via ILMI */ unsigned char esi[ESI_LEN]; /* ESI ("MAC" addr) */ struct atm_cirange ci_range; /* VPI/VCI range */ struct k_atm_dev_stats stats; /* statistics */ diff --git a/net/atm/addr.c b/net/atm/addr.c index a30d0bf48063..3060fd0ba4b9 100644 --- a/net/atm/addr.c +++ b/net/atm/addr.c @@ -44,31 +44,43 @@ static void notify_sigd(struct atm_dev *dev) sigd_enq(NULL, as_itf_notify, NULL, &pvc, NULL); } -void atm_reset_addr(struct atm_dev *dev) +void atm_reset_addr(struct atm_dev *dev, enum atm_addr_type_t atype) { unsigned long flags; struct atm_dev_addr *this, *p; + struct list_head *head; spin_lock_irqsave(&dev->lock, flags); - list_for_each_entry_safe(this, p, &dev->local, entry) { + if (atype == ATM_ADDR_LECS) + head = &dev->lecs; + else + head = &dev->local; + list_for_each_entry_safe(this, p, head, entry) { list_del(&this->entry); kfree(this); } spin_unlock_irqrestore(&dev->lock, flags); - notify_sigd(dev); + if (head == &dev->local) + notify_sigd(dev); } -int atm_add_addr(struct atm_dev *dev, struct sockaddr_atmsvc *addr) +int atm_add_addr(struct atm_dev *dev, struct sockaddr_atmsvc *addr, + enum atm_addr_type_t atype) { unsigned long flags; struct atm_dev_addr *this; + struct list_head *head; int error; error = check_addr(addr); if (error) return error; spin_lock_irqsave(&dev->lock, flags); - list_for_each_entry(this, &dev->local, entry) { + if (atype == ATM_ADDR_LECS) + head = &dev->lecs; + else + head = &dev->local; + list_for_each_entry(this, head, entry) { if (identical(&this->addr, addr)) { spin_unlock_irqrestore(&dev->lock, flags); return -EEXIST; @@ -80,28 +92,36 @@ int atm_add_addr(struct atm_dev *dev, struct sockaddr_atmsvc *addr) return -ENOMEM; } this->addr = *addr; - list_add(&this->entry, &dev->local); + list_add(&this->entry, head); spin_unlock_irqrestore(&dev->lock, flags); - notify_sigd(dev); + if (head == &dev->local) + notify_sigd(dev); return 0; } -int atm_del_addr(struct atm_dev *dev, struct sockaddr_atmsvc *addr) +int atm_del_addr(struct atm_dev *dev, struct sockaddr_atmsvc *addr, + enum atm_addr_type_t atype) { unsigned long flags; struct atm_dev_addr *this; + struct list_head *head; int error; error = check_addr(addr); if (error) return error; spin_lock_irqsave(&dev->lock, flags); - list_for_each_entry(this, &dev->local, entry) { + if (atype == ATM_ADDR_LECS) + head = &dev->lecs; + else + head = &dev->local; + list_for_each_entry(this, head, entry) { if (identical(&this->addr, addr)) { list_del(&this->entry); spin_unlock_irqrestore(&dev->lock, flags); kfree(this); - notify_sigd(dev); + if (head == &dev->local) + notify_sigd(dev); return 0; } } @@ -110,22 +130,27 @@ int atm_del_addr(struct atm_dev *dev, struct sockaddr_atmsvc *addr) } int atm_get_addr(struct atm_dev *dev, struct sockaddr_atmsvc __user * buf, - size_t size) + size_t size, enum atm_addr_type_t atype) { unsigned long flags; struct atm_dev_addr *this; + struct list_head *head; int total = 0, error; struct sockaddr_atmsvc *tmp_buf, *tmp_bufp; spin_lock_irqsave(&dev->lock, flags); - list_for_each_entry(this, &dev->local, entry) + if (atype == ATM_ADDR_LECS) + head = &dev->lecs; + else + head = &dev->local; + list_for_each_entry(this, head, entry) total += sizeof(struct sockaddr_atmsvc); tmp_buf = tmp_bufp = kmalloc(total, GFP_ATOMIC); if (!tmp_buf) { spin_unlock_irqrestore(&dev->lock, flags); return -ENOMEM; } - list_for_each_entry(this, &dev->local, entry) + list_for_each_entry(this, head, entry) memcpy(tmp_bufp++, &this->addr, sizeof(struct sockaddr_atmsvc)); spin_unlock_irqrestore(&dev->lock, flags); error = total > size ? -E2BIG : total; diff --git a/net/atm/addr.h b/net/atm/addr.h index 3099d21feeaa..f39433ad45da 100644 --- a/net/atm/addr.h +++ b/net/atm/addr.h @@ -9,10 +9,12 @@ #include #include - -void atm_reset_addr(struct atm_dev *dev); -int atm_add_addr(struct atm_dev *dev,struct sockaddr_atmsvc *addr); -int atm_del_addr(struct atm_dev *dev,struct sockaddr_atmsvc *addr); -int atm_get_addr(struct atm_dev *dev,struct sockaddr_atmsvc __user *buf,size_t size); +void atm_reset_addr(struct atm_dev *dev, enum atm_addr_type_t type); +int atm_add_addr(struct atm_dev *dev, struct sockaddr_atmsvc *addr, + enum atm_addr_type_t type); +int atm_del_addr(struct atm_dev *dev, struct sockaddr_atmsvc *addr, + enum atm_addr_type_t type); +int atm_get_addr(struct atm_dev *dev, struct sockaddr_atmsvc __user *buf, + size_t size, enum atm_addr_type_t type); #endif diff --git a/net/atm/resources.c b/net/atm/resources.c index a57a9268bd24..415d2615d475 100644 --- a/net/atm/resources.c +++ b/net/atm/resources.c @@ -40,6 +40,7 @@ static struct atm_dev *__alloc_atm_dev(const char *type) dev->link_rate = ATM_OC3_PCR; spin_lock_init(&dev->lock); INIT_LIST_HEAD(&dev->local); + INIT_LIST_HEAD(&dev->lecs); return dev; } @@ -320,10 +321,12 @@ int atm_dev_ioctl(unsigned int cmd, void __user *arg) error = -EPERM; goto done; } - atm_reset_addr(dev); + atm_reset_addr(dev, ATM_ADDR_LOCAL); break; case ATM_ADDADDR: case ATM_DELADDR: + case ATM_ADDLECSADDR: + case ATM_DELLECSADDR: if (!capable(CAP_NET_ADMIN)) { error = -EPERM; goto done; @@ -335,14 +338,21 @@ int atm_dev_ioctl(unsigned int cmd, void __user *arg) error = -EFAULT; goto done; } - if (cmd == ATM_ADDADDR) - error = atm_add_addr(dev, &addr); + if (cmd == ATM_ADDADDR || cmd == ATM_ADDLECSADDR) + error = atm_add_addr(dev, &addr, + (cmd == ATM_ADDADDR ? + ATM_ADDR_LOCAL : ATM_ADDR_LECS)); else - error = atm_del_addr(dev, &addr); + error = atm_del_addr(dev, &addr, + (cmd == ATM_DELADDR ? + ATM_ADDR_LOCAL : ATM_ADDR_LECS)); goto done; } case ATM_GETADDR: - error = atm_get_addr(dev, buf, len); + case ATM_GETLECSADDR: + error = atm_get_addr(dev, buf, len, + (cmd == ATM_GETADDR ? + ATM_ADDR_LOCAL : ATM_ADDR_LECS)); if (error < 0) goto done; size = error; -- cgit v1.2.3 From 468ed2b0c85ec4310b429e60358213b6d077289e Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 7 Oct 2005 15:07:38 +0100 Subject: [PATCH] Keys: Split key permissions checking into a .c file The attached patch splits key permissions checking out of key-ui.h and moves it into a .c file. It's quite large and called quite a lot, and it's about to get bigger with the addition of LSM support for keys... key_any_permission() is also discarded as it's no longer used. Signed-Off-By: David Howells Signed-off-by: Linus Torvalds --- include/linux/key-ui.h | 91 +++------------------------------------------- security/keys/Makefile | 1 + security/keys/permission.c | 70 +++++++++++++++++++++++++++++++++++ 3 files changed, 76 insertions(+), 86 deletions(-) create mode 100644 security/keys/permission.c (limited to 'include/linux') diff --git a/include/linux/key-ui.h b/include/linux/key-ui.h index 918c34a8347e..7a2e332067c3 100644 --- a/include/linux/key-ui.h +++ b/include/linux/key-ui.h @@ -38,97 +38,16 @@ struct keyring_list { struct key *keys[0]; }; - /* * check to see whether permission is granted to use a key in the desired way */ -static inline int key_permission(const key_ref_t key_ref, key_perm_t perm) -{ - struct key *key = key_ref_to_ptr(key_ref); - key_perm_t kperm; - - if (is_key_possessed(key_ref)) - kperm = key->perm >> 24; - else if (key->uid == current->fsuid) - kperm = key->perm >> 16; - else if (key->gid != -1 && - key->perm & KEY_GRP_ALL && - in_group_p(key->gid) - ) - kperm = key->perm >> 8; - else - kperm = key->perm; - - kperm = kperm & perm & KEY_ALL; - - return kperm == perm; -} - -/* - * check to see whether permission is granted to use a key in at least one of - * the desired ways - */ -static inline int key_any_permission(const key_ref_t key_ref, key_perm_t perm) -{ - struct key *key = key_ref_to_ptr(key_ref); - key_perm_t kperm; - - if (is_key_possessed(key_ref)) - kperm = key->perm >> 24; - else if (key->uid == current->fsuid) - kperm = key->perm >> 16; - else if (key->gid != -1 && - key->perm & KEY_GRP_ALL && - in_group_p(key->gid) - ) - kperm = key->perm >> 8; - else - kperm = key->perm; +extern int key_task_permission(const key_ref_t key_ref, + struct task_struct *context, + key_perm_t perm); - kperm = kperm & perm & KEY_ALL; - - return kperm != 0; -} - -static inline int key_task_groups_search(struct task_struct *tsk, gid_t gid) -{ - int ret; - - task_lock(tsk); - ret = groups_search(tsk->group_info, gid); - task_unlock(tsk); - return ret; -} - -static inline int key_task_permission(const key_ref_t key_ref, - struct task_struct *context, - key_perm_t perm) +static inline int key_permission(const key_ref_t key_ref, key_perm_t perm) { - struct key *key = key_ref_to_ptr(key_ref); - key_perm_t kperm; - - if (is_key_possessed(key_ref)) { - kperm = key->perm >> 24; - } - else if (key->uid == context->fsuid) { - kperm = key->perm >> 16; - } - else if (key->gid != -1 && - key->perm & KEY_GRP_ALL && ( - key->gid == context->fsgid || - key_task_groups_search(context, key->gid) - ) - ) { - kperm = key->perm >> 8; - } - else { - kperm = key->perm; - } - - kperm = kperm & perm & KEY_ALL; - - return kperm == perm; - + return key_task_permission(key_ref, current, perm); } extern key_ref_t lookup_user_key(struct task_struct *context, diff --git a/security/keys/Makefile b/security/keys/Makefile index c392d750b208..5145adfb6a05 100644 --- a/security/keys/Makefile +++ b/security/keys/Makefile @@ -6,6 +6,7 @@ obj-y := \ key.o \ keyring.o \ keyctl.o \ + permission.o \ process_keys.o \ request_key.o \ request_key_auth.o \ diff --git a/security/keys/permission.c b/security/keys/permission.c new file mode 100644 index 000000000000..1c3651670ce9 --- /dev/null +++ b/security/keys/permission.c @@ -0,0 +1,70 @@ +/* permission.c: key permission determination + * + * Copyright (C) 2005 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include "internal.h" + +/*****************************************************************************/ +/* + * check to see whether permission is granted to use a key in the desired way, + * but permit the security modules to override + */ +int key_task_permission(const key_ref_t key_ref, + struct task_struct *context, + key_perm_t perm) +{ + struct key *key; + key_perm_t kperm; + int ret; + + key = key_ref_to_ptr(key_ref); + + /* use the top 8-bits of permissions for keys the caller possesses */ + if (is_key_possessed(key_ref)) { + kperm = key->perm >> 24; + goto use_these_perms; + } + + /* use the second 8-bits of permissions for keys the caller owns */ + if (key->uid == context->fsuid) { + kperm = key->perm >> 16; + goto use_these_perms; + } + + /* use the third 8-bits of permissions for keys the caller has a group + * membership in common with */ + if (key->gid != -1 && key->perm & KEY_GRP_ALL) { + if (key->gid == context->fsgid) { + kperm = key->perm >> 8; + goto use_these_perms; + } + + task_lock(context); + ret = groups_search(context->group_info, key->gid); + task_unlock(context); + + if (ret) { + kperm = key->perm >> 8; + goto use_these_perms; + } + } + + /* otherwise use the least-significant 8-bits */ + kperm = key->perm; + +use_these_perms: + kperm = kperm & perm & KEY_ALL; + + return kperm == perm; + +} /* end key_task_permission() */ + +EXPORT_SYMBOL(key_task_permission); -- cgit v1.2.3 From dd0fc66fb33cd610bc1a5db8a5e232d34879b4d7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 7 Oct 2005 07:46:04 +0100 Subject: [PATCH] gfp flags annotations - part 1 - added typedef unsigned int __nocast gfp_t; - replaced __nocast uses for gfp flags with gfp_t - it gives exactly the same warnings as far as sparse is concerned, doesn't change generated code (from gcc point of view we replaced unsigned int with typedef) and documents what's going on far better. Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- arch/cris/arch-v32/drivers/pci/dma.c | 2 +- arch/i386/kernel/pci-dma.c | 2 +- arch/ppc64/kernel/bpa_iommu.c | 2 +- arch/ppc64/kernel/dma.c | 2 +- arch/ppc64/kernel/iommu.c | 2 +- arch/ppc64/kernel/pci_direct_iommu.c | 2 +- arch/ppc64/kernel/pci_iommu.c | 2 +- arch/ppc64/kernel/vio.c | 2 +- drivers/atm/ambassador.c | 2 +- drivers/atm/firestream.c | 5 ++--- drivers/atm/fore200e.c | 2 +- drivers/base/dmapool.c | 5 ++--- drivers/block/pktcdvd.c | 4 ++-- drivers/bluetooth/bpa10x.c | 2 +- drivers/bluetooth/hci_usb.c | 2 +- drivers/connector/connector.c | 3 +-- drivers/ieee1394/raw1394.c | 2 +- drivers/infiniband/core/mad.c | 2 +- drivers/infiniband/core/sa_query.c | 6 +++--- drivers/md/dm-crypt.c | 2 +- drivers/md/dm-io.c | 2 +- drivers/md/dm-raid1.c | 2 +- drivers/md/multipath.c | 2 +- drivers/md/raid1.c | 4 ++-- drivers/md/raid10.c | 4 ++-- drivers/net/bonding/bond_main.c | 2 +- drivers/net/ns83820.c | 2 +- drivers/net/sungem.h | 2 +- drivers/s390/scsi/zfcp_aux.c | 2 +- fs/bio.c | 10 +++++----- fs/buffer.c | 2 +- fs/mpage.c | 2 +- fs/ntfs/malloc.h | 2 +- fs/posix_acl.c | 6 +++--- fs/xfs/linux-2.6/kmem.c | 10 +++++----- fs/xfs/linux-2.6/kmem.h | 13 ++++++------- include/asm-generic/dma-mapping.h | 4 ++-- include/asm-i386/dma-mapping.h | 2 +- include/asm-ppc/dma-mapping.h | 2 +- include/asm-ppc64/dma-mapping.h | 4 ++-- include/asm-ppc64/iommu.h | 2 +- include/linux/atmdev.h | 2 +- include/linux/bio.h | 6 +++--- include/linux/buffer_head.h | 2 +- include/linux/connector.h | 2 +- include/linux/cpuset.h | 5 ++--- include/linux/dmapool.h | 2 +- include/linux/gfp.h | 14 +++++++------- include/linux/jbd.h | 2 +- include/linux/kfifo.h | 4 ++-- include/linux/mempool.h | 9 ++++----- include/linux/netlink.h | 2 +- include/linux/pagemap.h | 2 +- include/linux/posix_acl.h | 6 +++--- include/linux/radix-tree.h | 2 +- include/linux/security.h | 6 ++---- include/linux/skbuff.h | 28 ++++++++++++++-------------- include/linux/slab.h | 19 +++++++++---------- include/linux/string.h | 2 +- include/linux/swap.h | 2 +- include/linux/textsearch.h | 2 +- include/linux/types.h | 4 ++++ include/linux/vmalloc.h | 4 ++-- include/net/bluetooth/bluetooth.h | 2 +- include/net/bluetooth/rfcomm.h | 2 +- include/net/dn_nsp.h | 8 ++++---- include/net/dn_route.h | 2 +- include/net/inet_connection_sock.h | 2 +- include/net/ip_vs.h | 2 +- include/net/llc_conn.h | 2 +- include/net/sctp/sctp.h | 2 +- include/net/sctp/sm.h | 10 +++++----- include/net/sctp/structs.h | 24 ++++++++++++------------ include/net/sctp/ulpevent.h | 16 ++++++++-------- include/net/sctp/ulpqueue.h | 11 ++++------- include/net/sock.h | 16 ++++++++-------- include/net/tcp.h | 3 +-- include/net/xfrm.h | 2 +- include/rdma/ib_mad.h | 2 +- include/rdma/ib_sa.h | 10 +++++----- include/rxrpc/call.h | 2 +- include/rxrpc/message.h | 2 +- include/sound/core.h | 8 ++++---- include/sound/driver.h | 2 +- kernel/audit.c | 2 +- kernel/cpuset.c | 2 +- kernel/kfifo.c | 4 ++-- kernel/signal.c | 2 +- lib/radix-tree.c | 2 +- lib/ts_bm.c | 2 +- lib/ts_fsm.c | 2 +- lib/ts_kmp.c | 2 +- mm/highmem.c | 2 +- mm/mempolicy.c | 8 ++++---- mm/mempool.c | 6 +++--- mm/nommu.c | 3 +-- mm/oom_kill.c | 2 +- mm/page_alloc.c | 12 ++++++------ mm/page_io.c | 2 +- mm/shmem.c | 3 +-- mm/slab.c | 34 ++++++++++++++++------------------ mm/swap_state.c | 2 +- mm/vmalloc.c | 4 ++-- net/atm/atm_misc.c | 2 +- net/bluetooth/l2cap.c | 2 +- net/bluetooth/rfcomm/core.c | 2 +- net/bluetooth/rfcomm/sock.c | 2 +- net/bluetooth/rfcomm/tty.c | 2 +- net/bluetooth/sco.c | 2 +- net/core/dev.c | 2 +- net/core/skbuff.c | 14 +++++++------- net/core/sock.c | 10 +++++----- net/dccp/ackvec.c | 2 +- net/dccp/ackvec.h | 4 ++-- net/dccp/ccids/lib/loss_interval.h | 2 +- net/dccp/ccids/lib/packet_history.h | 4 ++-- net/decnet/af_decnet.c | 6 ++---- net/decnet/dn_nsp_out.c | 20 +++++++++----------- net/ieee80211/ieee80211_tx.c | 2 +- net/ipv4/inet_connection_sock.c | 2 +- net/ipv4/ipvs/ip_vs_app.c | 2 +- net/ipv4/tcp_output.c | 2 +- net/key/af_key.c | 6 +++--- net/llc/llc_conn.c | 3 +-- net/netfilter/nfnetlink.c | 3 +-- net/netlink/af_netlink.c | 4 ++-- net/rxrpc/call.c | 2 +- net/rxrpc/connection.c | 2 +- net/sctp/associola.c | 10 +++++----- net/sctp/bind_addr.c | 12 ++++++------ net/sctp/chunk.c | 2 +- net/sctp/endpointola.c | 5 ++--- net/sctp/protocol.c | 2 +- net/sctp/sm_make_chunk.c | 14 +++++++------- net/sctp/sm_sideeffect.c | 12 ++++++------ net/sctp/ssnmap.c | 2 +- net/sctp/transport.c | 4 ++-- net/sctp/ulpevent.c | 18 +++++++++--------- net/sctp/ulpqueue.c | 8 ++++---- net/sunrpc/sched.c | 2 +- net/xfrm/xfrm_policy.c | 2 +- sound/core/memalloc.c | 2 +- sound/core/memory.c | 10 +++++----- sound/core/seq/instr/ainstr_iw.c | 2 +- sound/core/wrappers.c | 2 +- 145 files changed, 340 insertions(+), 360 deletions(-) (limited to 'include/linux') diff --git a/arch/cris/arch-v32/drivers/pci/dma.c b/arch/cris/arch-v32/drivers/pci/dma.c index 10329306d23c..426b09878a05 100644 --- a/arch/cris/arch-v32/drivers/pci/dma.c +++ b/arch/cris/arch-v32/drivers/pci/dma.c @@ -24,7 +24,7 @@ struct dma_coherent_mem { }; void *dma_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, unsigned int __nocast gfp) + dma_addr_t *dma_handle, gfp_t gfp) { void *ret; struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; diff --git a/arch/i386/kernel/pci-dma.c b/arch/i386/kernel/pci-dma.c index 1e51427cc9eb..25fe66853934 100644 --- a/arch/i386/kernel/pci-dma.c +++ b/arch/i386/kernel/pci-dma.c @@ -23,7 +23,7 @@ struct dma_coherent_mem { }; void *dma_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, unsigned int __nocast gfp) + dma_addr_t *dma_handle, gfp_t gfp) { void *ret; struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; diff --git a/arch/ppc64/kernel/bpa_iommu.c b/arch/ppc64/kernel/bpa_iommu.c index 507eb9d0223f..5f2460090e03 100644 --- a/arch/ppc64/kernel/bpa_iommu.c +++ b/arch/ppc64/kernel/bpa_iommu.c @@ -310,7 +310,7 @@ static void bpa_map_iommu(void) static void *bpa_alloc_coherent(struct device *hwdev, size_t size, - dma_addr_t *dma_handle, unsigned int __nocast flag) + dma_addr_t *dma_handle, gfp_t flag) { void *ret; diff --git a/arch/ppc64/kernel/dma.c b/arch/ppc64/kernel/dma.c index 4da8e31b2b61..7c3419656ccc 100644 --- a/arch/ppc64/kernel/dma.c +++ b/arch/ppc64/kernel/dma.c @@ -53,7 +53,7 @@ int dma_set_mask(struct device *dev, u64 dma_mask) EXPORT_SYMBOL(dma_set_mask); void *dma_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, unsigned int __nocast flag) + dma_addr_t *dma_handle, gfp_t flag) { struct dma_mapping_ops *dma_ops = get_dma_ops(dev); diff --git a/arch/ppc64/kernel/iommu.c b/arch/ppc64/kernel/iommu.c index 9032b6bfe036..4d9b4388918b 100644 --- a/arch/ppc64/kernel/iommu.c +++ b/arch/ppc64/kernel/iommu.c @@ -519,7 +519,7 @@ void iommu_unmap_single(struct iommu_table *tbl, dma_addr_t dma_handle, * to the dma address (mapping) of the first page. */ void *iommu_alloc_coherent(struct iommu_table *tbl, size_t size, - dma_addr_t *dma_handle, unsigned int __nocast flag) + dma_addr_t *dma_handle, gfp_t flag) { void *ret = NULL; dma_addr_t mapping; diff --git a/arch/ppc64/kernel/pci_direct_iommu.c b/arch/ppc64/kernel/pci_direct_iommu.c index b8f7f58824f4..54055c81017a 100644 --- a/arch/ppc64/kernel/pci_direct_iommu.c +++ b/arch/ppc64/kernel/pci_direct_iommu.c @@ -31,7 +31,7 @@ #include "pci.h" static void *pci_direct_alloc_coherent(struct device *hwdev, size_t size, - dma_addr_t *dma_handle, unsigned int __nocast flag) + dma_addr_t *dma_handle, gfp_t flag) { void *ret; diff --git a/arch/ppc64/kernel/pci_iommu.c b/arch/ppc64/kernel/pci_iommu.c index 14647e09c9cd..d9e33b7d4203 100644 --- a/arch/ppc64/kernel/pci_iommu.c +++ b/arch/ppc64/kernel/pci_iommu.c @@ -76,7 +76,7 @@ static inline struct iommu_table *devnode_table(struct device *dev) * to the dma address (mapping) of the first page. */ static void *pci_iommu_alloc_coherent(struct device *hwdev, size_t size, - dma_addr_t *dma_handle, unsigned int __nocast flag) + dma_addr_t *dma_handle, gfp_t flag) { return iommu_alloc_coherent(devnode_table(hwdev), size, dma_handle, flag); diff --git a/arch/ppc64/kernel/vio.c b/arch/ppc64/kernel/vio.c index c90e1dd875ce..0e555b7a6587 100644 --- a/arch/ppc64/kernel/vio.c +++ b/arch/ppc64/kernel/vio.c @@ -218,7 +218,7 @@ static void vio_unmap_sg(struct device *dev, struct scatterlist *sglist, } static void *vio_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, unsigned int __nocast flag) + dma_addr_t *dma_handle, gfp_t flag) { return iommu_alloc_coherent(to_vio_dev(dev)->iommu_table, size, dma_handle, flag); diff --git a/drivers/atm/ambassador.c b/drivers/atm/ambassador.c index d74a7c5e75dd..4b6bf19c39c0 100644 --- a/drivers/atm/ambassador.c +++ b/drivers/atm/ambassador.c @@ -795,7 +795,7 @@ static void drain_rx_pools (amb_dev * dev) { } static inline void fill_rx_pool (amb_dev * dev, unsigned char pool, - unsigned int __nocast priority) + gfp_t priority) { rx_in rx; amb_rxq * rxq; diff --git a/drivers/atm/firestream.c b/drivers/atm/firestream.c index 58219744f5db..7f7ec288824d 100644 --- a/drivers/atm/firestream.c +++ b/drivers/atm/firestream.c @@ -1374,8 +1374,7 @@ static void reset_chip (struct fs_dev *dev) } } -static void __devinit *aligned_kmalloc (int size, unsigned int __nocast flags, - int alignment) +static void __devinit *aligned_kmalloc (int size, gfp_t flags, int alignment) { void *t; @@ -1466,7 +1465,7 @@ static inline int nr_buffers_in_freepool (struct fs_dev *dev, struct freepool *f working again after that... -- REW */ static void top_off_fp (struct fs_dev *dev, struct freepool *fp, - unsigned int __nocast gfp_flags) + gfp_t gfp_flags) { struct FS_BPENTRY *qe, *ne; struct sk_buff *skb; diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c index 6f1a83c9d9e0..14f6a6201da3 100644 --- a/drivers/atm/fore200e.c +++ b/drivers/atm/fore200e.c @@ -178,7 +178,7 @@ fore200e_irq_itoa(int irq) static void* -fore200e_kmalloc(int size, unsigned int __nocast flags) +fore200e_kmalloc(int size, gfp_t flags) { void *chunk = kzalloc(size, flags); diff --git a/drivers/base/dmapool.c b/drivers/base/dmapool.c index 60a7ef6a201b..e2f64f91ed05 100644 --- a/drivers/base/dmapool.c +++ b/drivers/base/dmapool.c @@ -156,7 +156,7 @@ dma_pool_create (const char *name, struct device *dev, static struct dma_page * -pool_alloc_page (struct dma_pool *pool, unsigned int __nocast mem_flags) +pool_alloc_page (struct dma_pool *pool, gfp_t mem_flags) { struct dma_page *page; int mapsize; @@ -262,8 +262,7 @@ dma_pool_destroy (struct dma_pool *pool) * If such a memory block can't be allocated, null is returned. */ void * -dma_pool_alloc (struct dma_pool *pool, unsigned int __nocast mem_flags, - dma_addr_t *handle) +dma_pool_alloc (struct dma_pool *pool, gfp_t mem_flags, dma_addr_t *handle) { unsigned long flags; struct dma_page *page; diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 7e22a58926b8..a280e679b1ca 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -229,7 +229,7 @@ static int pkt_grow_pktlist(struct pktcdvd_device *pd, int nr_packets) return 1; } -static void *pkt_rb_alloc(unsigned int __nocast gfp_mask, void *data) +static void *pkt_rb_alloc(gfp_t gfp_mask, void *data) { return kmalloc(sizeof(struct pkt_rb_node), gfp_mask); } @@ -2082,7 +2082,7 @@ static int pkt_close(struct inode *inode, struct file *file) } -static void *psd_pool_alloc(unsigned int __nocast gfp_mask, void *data) +static void *psd_pool_alloc(gfp_t gfp_mask, void *data) { return kmalloc(sizeof(struct packet_stacked_data), gfp_mask); } diff --git a/drivers/bluetooth/bpa10x.c b/drivers/bluetooth/bpa10x.c index a1bf8f066c88..4fa85234d8b5 100644 --- a/drivers/bluetooth/bpa10x.c +++ b/drivers/bluetooth/bpa10x.c @@ -308,7 +308,7 @@ unlock: } static inline struct urb *bpa10x_alloc_urb(struct usb_device *udev, unsigned int pipe, - size_t size, unsigned int __nocast flags, void *data) + size_t size, gfp_t flags, void *data) { struct urb *urb; struct usb_ctrlrequest *cr; diff --git a/drivers/bluetooth/hci_usb.c b/drivers/bluetooth/hci_usb.c index 57c48bbf6fe6..6756cb20b753 100644 --- a/drivers/bluetooth/hci_usb.c +++ b/drivers/bluetooth/hci_usb.c @@ -132,7 +132,7 @@ static struct usb_device_id blacklist_ids[] = { { } /* Terminating entry */ }; -static struct _urb *_urb_alloc(int isoc, unsigned int __nocast gfp) +static struct _urb *_urb_alloc(int isoc, gfp_t gfp) { struct _urb *_urb = kmalloc(sizeof(struct _urb) + sizeof(struct usb_iso_packet_descriptor) * isoc, gfp); diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c index 1422285d537c..505677fb3157 100644 --- a/drivers/connector/connector.c +++ b/drivers/connector/connector.c @@ -69,8 +69,7 @@ int cn_already_initialized = 0; * a new message. * */ -int cn_netlink_send(struct cn_msg *msg, u32 __group, - unsigned int __nocast gfp_mask) +int cn_netlink_send(struct cn_msg *msg, u32 __group, gfp_t gfp_mask) { struct cn_callback_entry *__cbq; unsigned int size; diff --git a/drivers/ieee1394/raw1394.c b/drivers/ieee1394/raw1394.c index 5fe4f2ba0979..315f5ca8bedb 100644 --- a/drivers/ieee1394/raw1394.c +++ b/drivers/ieee1394/raw1394.c @@ -98,7 +98,7 @@ static struct hpsb_address_ops arm_ops = { static void queue_complete_cb(struct pending_request *req); -static struct pending_request *__alloc_pending_request(unsigned int __nocast flags) +static struct pending_request *__alloc_pending_request(gfp_t flags) { struct pending_request *req; diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index a4a4d9c1eef3..a14ca87fda18 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -783,7 +783,7 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, u32 remote_qpn, u16 pkey_index, struct ib_ah *ah, int rmpp_active, int hdr_len, int data_len, - unsigned int __nocast gfp_mask) + gfp_t gfp_mask) { struct ib_mad_agent_private *mad_agent_priv; struct ib_mad_send_buf *send_buf; diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 78de2dd1a4f2..262618210c1c 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -574,7 +574,7 @@ static void ib_sa_path_rec_release(struct ib_sa_query *sa_query) int ib_sa_path_rec_get(struct ib_device *device, u8 port_num, struct ib_sa_path_rec *rec, ib_sa_comp_mask comp_mask, - int timeout_ms, unsigned int __nocast gfp_mask, + int timeout_ms, gfp_t gfp_mask, void (*callback)(int status, struct ib_sa_path_rec *resp, void *context), @@ -676,7 +676,7 @@ static void ib_sa_service_rec_release(struct ib_sa_query *sa_query) int ib_sa_service_rec_query(struct ib_device *device, u8 port_num, u8 method, struct ib_sa_service_rec *rec, ib_sa_comp_mask comp_mask, - int timeout_ms, unsigned int __nocast gfp_mask, + int timeout_ms, gfp_t gfp_mask, void (*callback)(int status, struct ib_sa_service_rec *resp, void *context), @@ -759,7 +759,7 @@ int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num, u8 method, struct ib_sa_mcmember_rec *rec, ib_sa_comp_mask comp_mask, - int timeout_ms, unsigned int __nocast gfp_mask, + int timeout_ms, gfp_t gfp_mask, void (*callback)(int status, struct ib_sa_mcmember_rec *resp, void *context), diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index b82bc3150476..b6148f6f7836 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -96,7 +96,7 @@ static kmem_cache_t *_crypt_io_pool; /* * Mempool alloc and free functions for the page */ -static void *mempool_alloc_page(unsigned int __nocast gfp_mask, void *data) +static void *mempool_alloc_page(gfp_t gfp_mask, void *data) { return alloc_page(gfp_mask); } diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index 9de000131a8a..4809b209fbb1 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -32,7 +32,7 @@ struct io { static unsigned _num_ios; static mempool_t *_io_pool; -static void *alloc_io(unsigned int __nocast gfp_mask, void *pool_data) +static void *alloc_io(gfp_t gfp_mask, void *pool_data) { return kmalloc(sizeof(struct io), gfp_mask); } diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 863282513753..2375709a392c 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -122,7 +122,7 @@ static inline sector_t region_to_sector(struct region_hash *rh, region_t region) /* FIXME move this */ static void queue_bio(struct mirror_set *ms, struct bio *bio, int rw); -static void *region_alloc(unsigned int __nocast gfp_mask, void *pool_data) +static void *region_alloc(gfp_t gfp_mask, void *pool_data) { return kmalloc(sizeof(struct region), gfp_mask); } diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index 286342375fb7..1151c3ed3006 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c @@ -38,7 +38,7 @@ static mdk_personality_t multipath_personality; -static void *mp_pool_alloc(unsigned int __nocast gfp_flags, void *data) +static void *mp_pool_alloc(gfp_t gfp_flags, void *data) { struct multipath_bh *mpb; mpb = kmalloc(sizeof(*mpb), gfp_flags); diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index a93ca478142a..0e1f148dd41d 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -52,7 +52,7 @@ static mdk_personality_t raid1_personality; static void unplug_slaves(mddev_t *mddev); -static void * r1bio_pool_alloc(unsigned int __nocast gfp_flags, void *data) +static void * r1bio_pool_alloc(gfp_t gfp_flags, void *data) { struct pool_info *pi = data; r1bio_t *r1_bio; @@ -79,7 +79,7 @@ static void r1bio_pool_free(void *r1_bio, void *data) #define RESYNC_PAGES ((RESYNC_BLOCK_SIZE + PAGE_SIZE-1) / PAGE_SIZE) #define RESYNC_WINDOW (2048*1024) -static void * r1buf_pool_alloc(unsigned int __nocast gfp_flags, void *data) +static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data) { struct pool_info *pi = data; struct page *page; diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 5bd1e9ec899d..28dd028415e4 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -47,7 +47,7 @@ static void unplug_slaves(mddev_t *mddev); -static void * r10bio_pool_alloc(unsigned int __nocast gfp_flags, void *data) +static void * r10bio_pool_alloc(gfp_t gfp_flags, void *data) { conf_t *conf = data; r10bio_t *r10_bio; @@ -81,7 +81,7 @@ static void r10bio_pool_free(void *r10_bio, void *data) * one for write (we recover only one drive per r10buf) * */ -static void * r10buf_pool_alloc(unsigned int __nocast gfp_flags, void *data) +static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data) { conf_t *conf = data; struct page *page; diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index f0a5b772a386..f264ff162979 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1290,7 +1290,7 @@ static void bond_mc_list_destroy(struct bonding *bond) * Copy all the Multicast addresses from src to the bonding device dst */ static int bond_mc_list_copy(struct dev_mc_list *mc_list, struct bonding *bond, - unsigned int __nocast gfp_flag) + gfp_t gfp_flag) { struct dev_mc_list *dmi, *new_dmi; diff --git a/drivers/net/ns83820.c b/drivers/net/ns83820.c index 83334db2921c..e4811b42a6b7 100644 --- a/drivers/net/ns83820.c +++ b/drivers/net/ns83820.c @@ -584,7 +584,7 @@ static inline int ns83820_add_rx_skb(struct ns83820 *dev, struct sk_buff *skb) return 0; } -static inline int rx_refill(struct net_device *ndev, unsigned int __nocast gfp) +static inline int rx_refill(struct net_device *ndev, gfp_t gfp) { struct ns83820 *dev = PRIV(ndev); unsigned i; diff --git a/drivers/net/sungem.h b/drivers/net/sungem.h index 16edbb1a4a7a..13006d759ad8 100644 --- a/drivers/net/sungem.h +++ b/drivers/net/sungem.h @@ -1036,7 +1036,7 @@ struct gem { #define ALIGNED_RX_SKB_ADDR(addr) \ ((((unsigned long)(addr) + (64UL - 1UL)) & ~(64UL - 1UL)) - (unsigned long)(addr)) static __inline__ struct sk_buff *gem_alloc_skb(int size, - unsigned int __nocast gfp_flags) + gfp_t gfp_flags) { struct sk_buff *skb = alloc_skb(size + 64, gfp_flags); diff --git a/drivers/s390/scsi/zfcp_aux.c b/drivers/s390/scsi/zfcp_aux.c index 0b5087f7cabc..cab098556b44 100644 --- a/drivers/s390/scsi/zfcp_aux.c +++ b/drivers/s390/scsi/zfcp_aux.c @@ -833,7 +833,7 @@ zfcp_unit_dequeue(struct zfcp_unit *unit) } static void * -zfcp_mempool_alloc(unsigned int __nocast gfp_mask, void *size) +zfcp_mempool_alloc(gfp_t gfp_mask, void *size) { return kmalloc((size_t) size, gfp_mask); } diff --git a/fs/bio.c b/fs/bio.c index 83a349574567..7d81a93afd48 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -75,7 +75,7 @@ struct bio_set { */ static struct bio_set *fs_bio_set; -static inline struct bio_vec *bvec_alloc_bs(unsigned int __nocast gfp_mask, int nr, unsigned long *idx, struct bio_set *bs) +static inline struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, struct bio_set *bs) { struct bio_vec *bvl; struct biovec_slab *bp; @@ -155,7 +155,7 @@ inline void bio_init(struct bio *bio) * allocate bio and iovecs from the memory pools specified by the * bio_set structure. **/ -struct bio *bio_alloc_bioset(unsigned int __nocast gfp_mask, int nr_iovecs, struct bio_set *bs) +struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) { struct bio *bio = mempool_alloc(bs->bio_pool, gfp_mask); @@ -181,7 +181,7 @@ out: return bio; } -struct bio *bio_alloc(unsigned int __nocast gfp_mask, int nr_iovecs) +struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs) { struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set); @@ -277,7 +277,7 @@ inline void __bio_clone(struct bio *bio, struct bio *bio_src) * * Like __bio_clone, only also allocates the returned bio */ -struct bio *bio_clone(struct bio *bio, unsigned int __nocast gfp_mask) +struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask) { struct bio *b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs, fs_bio_set); @@ -1078,7 +1078,7 @@ struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors) return bp; } -static void *bio_pair_alloc(unsigned int __nocast gfp_flags, void *data) +static void *bio_pair_alloc(gfp_t gfp_flags, void *data) { return kmalloc(sizeof(struct bio_pair), gfp_flags); } diff --git a/fs/buffer.c b/fs/buffer.c index 6cbfceabd95d..1216c0d3c8ce 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -3045,7 +3045,7 @@ static void recalc_bh_state(void) buffer_heads_over_limit = (tot > max_buffer_heads); } -struct buffer_head *alloc_buffer_head(unsigned int __nocast gfp_flags) +struct buffer_head *alloc_buffer_head(gfp_t gfp_flags) { struct buffer_head *ret = kmem_cache_alloc(bh_cachep, gfp_flags); if (ret) { diff --git a/fs/mpage.c b/fs/mpage.c index bb9aebe93862..c5adcdddf3cc 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -102,7 +102,7 @@ static struct bio *mpage_bio_submit(int rw, struct bio *bio) static struct bio * mpage_alloc(struct block_device *bdev, sector_t first_sector, int nr_vecs, - unsigned int __nocast gfp_flags) + gfp_t gfp_flags) { struct bio *bio; diff --git a/fs/ntfs/malloc.h b/fs/ntfs/malloc.h index 006946efca8c..590887b943f5 100644 --- a/fs/ntfs/malloc.h +++ b/fs/ntfs/malloc.h @@ -40,7 +40,7 @@ * Depending on @gfp_mask the allocation may be guaranteed to succeed. */ static inline void *__ntfs_malloc(unsigned long size, - unsigned int __nocast gfp_mask) + gfp_t gfp_mask) { if (likely(size <= PAGE_SIZE)) { BUG_ON(!size); diff --git a/fs/posix_acl.c b/fs/posix_acl.c index 296480e96dd5..6c8dcf7613fd 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -35,7 +35,7 @@ EXPORT_SYMBOL(posix_acl_permission); * Allocate a new ACL with the specified number of entries. */ struct posix_acl * -posix_acl_alloc(int count, unsigned int __nocast flags) +posix_acl_alloc(int count, gfp_t flags) { const size_t size = sizeof(struct posix_acl) + count * sizeof(struct posix_acl_entry); @@ -51,7 +51,7 @@ posix_acl_alloc(int count, unsigned int __nocast flags) * Clone an ACL. */ struct posix_acl * -posix_acl_clone(const struct posix_acl *acl, unsigned int __nocast flags) +posix_acl_clone(const struct posix_acl *acl, gfp_t flags) { struct posix_acl *clone = NULL; @@ -185,7 +185,7 @@ posix_acl_equiv_mode(const struct posix_acl *acl, mode_t *mode_p) * Create an ACL representing the file mode permission bits of an inode. */ struct posix_acl * -posix_acl_from_mode(mode_t mode, unsigned int __nocast flags) +posix_acl_from_mode(mode_t mode, gfp_t flags) { struct posix_acl *acl = posix_acl_alloc(3, flags); if (!acl) diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c index 4b184559f231..d2653b589b1c 100644 --- a/fs/xfs/linux-2.6/kmem.c +++ b/fs/xfs/linux-2.6/kmem.c @@ -45,7 +45,7 @@ void * -kmem_alloc(size_t size, unsigned int __nocast flags) +kmem_alloc(size_t size, gfp_t flags) { int retries = 0; unsigned int lflags = kmem_flags_convert(flags); @@ -67,7 +67,7 @@ kmem_alloc(size_t size, unsigned int __nocast flags) } void * -kmem_zalloc(size_t size, unsigned int __nocast flags) +kmem_zalloc(size_t size, gfp_t flags) { void *ptr; @@ -90,7 +90,7 @@ kmem_free(void *ptr, size_t size) void * kmem_realloc(void *ptr, size_t newsize, size_t oldsize, - unsigned int __nocast flags) + gfp_t flags) { void *new; @@ -105,7 +105,7 @@ kmem_realloc(void *ptr, size_t newsize, size_t oldsize, } void * -kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags) +kmem_zone_alloc(kmem_zone_t *zone, gfp_t flags) { int retries = 0; unsigned int lflags = kmem_flags_convert(flags); @@ -124,7 +124,7 @@ kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags) } void * -kmem_zone_zalloc(kmem_zone_t *zone, unsigned int __nocast flags) +kmem_zone_zalloc(kmem_zone_t *zone, gfp_t flags) { void *ptr; diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h index 109fcf27e256..ee7010f085bc 100644 --- a/fs/xfs/linux-2.6/kmem.h +++ b/fs/xfs/linux-2.6/kmem.h @@ -81,7 +81,7 @@ typedef unsigned long xfs_pflags_t; *(NSTATEP) = *(OSTATEP); \ } while (0) -static __inline unsigned int kmem_flags_convert(unsigned int __nocast flags) +static __inline unsigned int kmem_flags_convert(gfp_t flags) { unsigned int lflags = __GFP_NOWARN; /* we'll report problems, if need be */ @@ -125,13 +125,12 @@ kmem_zone_destroy(kmem_zone_t *zone) BUG(); } -extern void *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast); -extern void *kmem_zone_alloc(kmem_zone_t *, unsigned int __nocast); +extern void *kmem_zone_zalloc(kmem_zone_t *, gfp_t); +extern void *kmem_zone_alloc(kmem_zone_t *, gfp_t); -extern void *kmem_alloc(size_t, unsigned int __nocast); -extern void *kmem_realloc(void *, size_t, size_t, - unsigned int __nocast); -extern void *kmem_zalloc(size_t, unsigned int __nocast); +extern void *kmem_alloc(size_t, gfp_t); +extern void *kmem_realloc(void *, size_t, size_t, gfp_t); +extern void *kmem_zalloc(size_t, gfp_t); extern void kmem_free(void *, size_t); typedef struct shrinker *kmem_shaker_t; diff --git a/include/asm-generic/dma-mapping.h b/include/asm-generic/dma-mapping.h index 8cef663c5cd9..747d790295f3 100644 --- a/include/asm-generic/dma-mapping.h +++ b/include/asm-generic/dma-mapping.h @@ -35,7 +35,7 @@ dma_set_mask(struct device *dev, u64 dma_mask) static inline void * dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, - unsigned int __nocast flag) + gfp_t flag) { BUG_ON(dev->bus != &pci_bus_type); @@ -168,7 +168,7 @@ dma_set_mask(struct device *dev, u64 dma_mask) static inline void * dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, - unsigned int __nocast flag) + gfp_t flag) { BUG(); return NULL; diff --git a/include/asm-i386/dma-mapping.h b/include/asm-i386/dma-mapping.h index 563964b2995b..e56c335f8ef9 100644 --- a/include/asm-i386/dma-mapping.h +++ b/include/asm-i386/dma-mapping.h @@ -11,7 +11,7 @@ #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) void *dma_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, unsigned int __nocast flag); + dma_addr_t *dma_handle, gfp_t flag); void dma_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle); diff --git a/include/asm-ppc/dma-mapping.h b/include/asm-ppc/dma-mapping.h index 92b8ee78dcc2..061bfcac1bf1 100644 --- a/include/asm-ppc/dma-mapping.h +++ b/include/asm-ppc/dma-mapping.h @@ -61,7 +61,7 @@ static inline int dma_set_mask(struct device *dev, u64 dma_mask) static inline void *dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t * dma_handle, - unsigned int __nocast gfp) + gfp_t gfp) { #ifdef CONFIG_NOT_COHERENT_CACHE return __dma_alloc_coherent(size, dma_handle, gfp); diff --git a/include/asm-ppc64/dma-mapping.h b/include/asm-ppc64/dma-mapping.h index 9ad8adee0067..fb68fa23bea8 100644 --- a/include/asm-ppc64/dma-mapping.h +++ b/include/asm-ppc64/dma-mapping.h @@ -19,7 +19,7 @@ extern int dma_supported(struct device *dev, u64 mask); extern int dma_set_mask(struct device *dev, u64 dma_mask); extern void *dma_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, unsigned int __nocast flag); + dma_addr_t *dma_handle, gfp_t flag); extern void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_handle); extern dma_addr_t dma_map_single(struct device *dev, void *cpu_addr, @@ -118,7 +118,7 @@ dma_cache_sync(void *vaddr, size_t size, */ struct dma_mapping_ops { void * (*alloc_coherent)(struct device *dev, size_t size, - dma_addr_t *dma_handle, unsigned int __nocast flag); + dma_addr_t *dma_handle, gfp_t flag); void (*free_coherent)(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle); dma_addr_t (*map_single)(struct device *dev, void *ptr, diff --git a/include/asm-ppc64/iommu.h b/include/asm-ppc64/iommu.h index 72dcf8116b04..c2f3b6e8a42f 100644 --- a/include/asm-ppc64/iommu.h +++ b/include/asm-ppc64/iommu.h @@ -122,7 +122,7 @@ extern void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist, int nelems, enum dma_data_direction direction); extern void *iommu_alloc_coherent(struct iommu_table *tbl, size_t size, - dma_addr_t *dma_handle, unsigned int __nocast flag); + dma_addr_t *dma_handle, gfp_t flag); extern void iommu_free_coherent(struct iommu_table *tbl, size_t size, void *vaddr, dma_addr_t dma_handle); extern dma_addr_t iommu_map_single(struct iommu_table *tbl, void *vaddr, diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h index aca9b344bd35..e7d0593bb576 100644 --- a/include/linux/atmdev.h +++ b/include/linux/atmdev.h @@ -467,7 +467,7 @@ static inline void atm_dev_put(struct atm_dev *dev) int atm_charge(struct atm_vcc *vcc,int truesize); struct sk_buff *atm_alloc_charge(struct atm_vcc *vcc,int pdu_size, - unsigned int __nocast gfp_flags); + gfp_t gfp_flags); int atm_pcr_goal(struct atm_trafprm *tp); void vcc_release_async(struct atm_vcc *vcc, int reply); diff --git a/include/linux/bio.h b/include/linux/bio.h index 6e1c79c8b6bf..3344b4e8e43a 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -276,8 +276,8 @@ extern void bio_pair_release(struct bio_pair *dbio); extern struct bio_set *bioset_create(int, int, int); extern void bioset_free(struct bio_set *); -extern struct bio *bio_alloc(unsigned int __nocast, int); -extern struct bio *bio_alloc_bioset(unsigned int __nocast, int, struct bio_set *); +extern struct bio *bio_alloc(gfp_t, int); +extern struct bio *bio_alloc_bioset(gfp_t, int, struct bio_set *); extern void bio_put(struct bio *); extern void bio_free(struct bio *, struct bio_set *); @@ -287,7 +287,7 @@ extern int bio_phys_segments(struct request_queue *, struct bio *); extern int bio_hw_segments(struct request_queue *, struct bio *); extern void __bio_clone(struct bio *, struct bio *); -extern struct bio *bio_clone(struct bio *, unsigned int __nocast); +extern struct bio *bio_clone(struct bio *, gfp_t); extern void bio_init(struct bio *); diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 90828493791f..6a1d154c0825 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -172,7 +172,7 @@ void __brelse(struct buffer_head *); void __bforget(struct buffer_head *); void __breadahead(struct block_device *, sector_t block, int size); struct buffer_head *__bread(struct block_device *, sector_t block, int size); -struct buffer_head *alloc_buffer_head(unsigned int __nocast gfp_flags); +struct buffer_head *alloc_buffer_head(gfp_t gfp_flags); void free_buffer_head(struct buffer_head * bh); void FASTCALL(unlock_buffer(struct buffer_head *bh)); void FASTCALL(__lock_buffer(struct buffer_head *bh)); diff --git a/include/linux/connector.h b/include/linux/connector.h index 96582c9911ac..95952cc1f525 100644 --- a/include/linux/connector.h +++ b/include/linux/connector.h @@ -149,7 +149,7 @@ struct cn_dev { int cn_add_callback(struct cb_id *, char *, void (*callback) (void *)); void cn_del_callback(struct cb_id *); -int cn_netlink_send(struct cn_msg *, u32, unsigned int __nocast); +int cn_netlink_send(struct cn_msg *, u32, gfp_t); int cn_queue_add_callback(struct cn_queue_dev *dev, char *name, struct cb_id *id, void (*callback)(void *)); void cn_queue_del_callback(struct cn_queue_dev *dev, struct cb_id *id); diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 24062a1dbf61..6e2deef96b34 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -23,7 +23,7 @@ void cpuset_init_current_mems_allowed(void); void cpuset_update_current_mems_allowed(void); void cpuset_restrict_to_mems_allowed(unsigned long *nodes); int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl); -extern int cpuset_zone_allowed(struct zone *z, unsigned int __nocast gfp_mask); +extern int cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask); extern int cpuset_excl_nodes_overlap(const struct task_struct *p); extern struct file_operations proc_cpuset_operations; extern char *cpuset_task_status_allowed(struct task_struct *task, char *buffer); @@ -49,8 +49,7 @@ static inline int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl) return 1; } -static inline int cpuset_zone_allowed(struct zone *z, - unsigned int __nocast gfp_mask) +static inline int cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) { return 1; } diff --git a/include/linux/dmapool.h b/include/linux/dmapool.h index 4932ee5c77f0..76f12f46db7f 100644 --- a/include/linux/dmapool.h +++ b/include/linux/dmapool.h @@ -19,7 +19,7 @@ struct dma_pool *dma_pool_create(const char *name, struct device *dev, void dma_pool_destroy(struct dma_pool *pool); -void *dma_pool_alloc(struct dma_pool *pool, unsigned int __nocast mem_flags, +void *dma_pool_alloc(struct dma_pool *pool, gfp_t mem_flags, dma_addr_t *handle); void dma_pool_free(struct dma_pool *pool, void *vaddr, dma_addr_t addr); diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 4dc990f3b5cc..3010e172394d 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -85,9 +85,9 @@ static inline void arch_free_page(struct page *page, int order) { } #endif extern struct page * -FASTCALL(__alloc_pages(unsigned int, unsigned int, struct zonelist *)); +FASTCALL(__alloc_pages(gfp_t, unsigned int, struct zonelist *)); -static inline struct page *alloc_pages_node(int nid, unsigned int __nocast gfp_mask, +static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask, unsigned int order) { if (unlikely(order >= MAX_ORDER)) @@ -98,17 +98,17 @@ static inline struct page *alloc_pages_node(int nid, unsigned int __nocast gfp_m } #ifdef CONFIG_NUMA -extern struct page *alloc_pages_current(unsigned int __nocast gfp_mask, unsigned order); +extern struct page *alloc_pages_current(gfp_t gfp_mask, unsigned order); static inline struct page * -alloc_pages(unsigned int __nocast gfp_mask, unsigned int order) +alloc_pages(gfp_t gfp_mask, unsigned int order) { if (unlikely(order >= MAX_ORDER)) return NULL; return alloc_pages_current(gfp_mask, order); } -extern struct page *alloc_page_vma(unsigned __nocast gfp_mask, +extern struct page *alloc_page_vma(gfp_t gfp_mask, struct vm_area_struct *vma, unsigned long addr); #else #define alloc_pages(gfp_mask, order) \ @@ -117,8 +117,8 @@ extern struct page *alloc_page_vma(unsigned __nocast gfp_mask, #endif #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0) -extern unsigned long FASTCALL(__get_free_pages(unsigned int __nocast gfp_mask, unsigned int order)); -extern unsigned long FASTCALL(get_zeroed_page(unsigned int __nocast gfp_mask)); +extern unsigned long FASTCALL(__get_free_pages(gfp_t gfp_mask, unsigned int order)); +extern unsigned long FASTCALL(get_zeroed_page(gfp_t gfp_mask)); #define __get_free_page(gfp_mask) \ __get_free_pages((gfp_mask),0) diff --git a/include/linux/jbd.h b/include/linux/jbd.h index de097269bd7f..ff853b3173c6 100644 --- a/include/linux/jbd.h +++ b/include/linux/jbd.h @@ -935,7 +935,7 @@ void journal_put_journal_head(struct journal_head *jh); */ extern kmem_cache_t *jbd_handle_cache; -static inline handle_t *jbd_alloc_handle(unsigned int __nocast gfp_flags) +static inline handle_t *jbd_alloc_handle(gfp_t gfp_flags) { return kmem_cache_alloc(jbd_handle_cache, gfp_flags); } diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h index c27cd428d269..48eccd865bd8 100644 --- a/include/linux/kfifo.h +++ b/include/linux/kfifo.h @@ -35,8 +35,8 @@ struct kfifo { }; extern struct kfifo *kfifo_init(unsigned char *buffer, unsigned int size, - unsigned int __nocast gfp_mask, spinlock_t *lock); -extern struct kfifo *kfifo_alloc(unsigned int size, unsigned int __nocast gfp_mask, + gfp_t gfp_mask, spinlock_t *lock); +extern struct kfifo *kfifo_alloc(unsigned int size, gfp_t gfp_mask, spinlock_t *lock); extern void kfifo_free(struct kfifo *fifo); extern unsigned int __kfifo_put(struct kfifo *fifo, diff --git a/include/linux/mempool.h b/include/linux/mempool.h index 796220ce47cc..f2427d7394b0 100644 --- a/include/linux/mempool.h +++ b/include/linux/mempool.h @@ -6,7 +6,7 @@ #include -typedef void * (mempool_alloc_t)(unsigned int __nocast gfp_mask, void *pool_data); +typedef void * (mempool_alloc_t)(gfp_t gfp_mask, void *pool_data); typedef void (mempool_free_t)(void *element, void *pool_data); typedef struct mempool_s { @@ -26,17 +26,16 @@ extern mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn, extern mempool_t *mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn, mempool_free_t *free_fn, void *pool_data, int nid); -extern int mempool_resize(mempool_t *pool, int new_min_nr, - unsigned int __nocast gfp_mask); +extern int mempool_resize(mempool_t *pool, int new_min_nr, gfp_t gfp_mask); extern void mempool_destroy(mempool_t *pool); -extern void * mempool_alloc(mempool_t *pool, unsigned int __nocast gfp_mask); +extern void * mempool_alloc(mempool_t *pool, gfp_t gfp_mask); extern void mempool_free(void *element, mempool_t *pool); /* * A mempool_alloc_t and mempool_free_t that get the memory from * a slab that is passed in through pool_data. */ -void *mempool_alloc_slab(unsigned int __nocast gfp_mask, void *pool_data); +void *mempool_alloc_slab(gfp_t gfp_mask, void *pool_data); void mempool_free_slab(void *element, void *pool_data); #endif /* _LINUX_MEMPOOL_H */ diff --git a/include/linux/netlink.h b/include/linux/netlink.h index bdebdc564506..ba25ca874c20 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -131,7 +131,7 @@ extern struct sock *netlink_kernel_create(int unit, unsigned int groups, void (* extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err); extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 pid, int nonblock); extern int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 pid, - __u32 group, unsigned int __nocast allocation); + __u32 group, gfp_t allocation); extern void netlink_set_err(struct sock *ssk, __u32 pid, __u32 group, int code); extern int netlink_register_notifier(struct notifier_block *nb); extern int netlink_unregister_notifier(struct notifier_block *nb); diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index d9a25647a295..acbf31c154f8 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -19,7 +19,7 @@ #define AS_EIO (__GFP_BITS_SHIFT + 0) /* IO error on async write */ #define AS_ENOSPC (__GFP_BITS_SHIFT + 1) /* ENOSPC on async write */ -static inline unsigned int __nocast mapping_gfp_mask(struct address_space * mapping) +static inline gfp_t mapping_gfp_mask(struct address_space * mapping) { return mapping->flags & __GFP_BITS_MASK; } diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h index 4caedddaa033..4bc241290c24 100644 --- a/include/linux/posix_acl.h +++ b/include/linux/posix_acl.h @@ -71,11 +71,11 @@ posix_acl_release(struct posix_acl *acl) /* posix_acl.c */ -extern struct posix_acl *posix_acl_alloc(int, unsigned int __nocast); -extern struct posix_acl *posix_acl_clone(const struct posix_acl *, unsigned int __nocast); +extern struct posix_acl *posix_acl_alloc(int, gfp_t); +extern struct posix_acl *posix_acl_clone(const struct posix_acl *, gfp_t); extern int posix_acl_valid(const struct posix_acl *); extern int posix_acl_permission(struct inode *, const struct posix_acl *, int); -extern struct posix_acl *posix_acl_from_mode(mode_t, unsigned int __nocast); +extern struct posix_acl *posix_acl_from_mode(mode_t, gfp_t); extern int posix_acl_equiv_mode(const struct posix_acl *, mode_t *); extern int posix_acl_create_masq(struct posix_acl *, mode_t *); extern int posix_acl_chmod_masq(struct posix_acl *, mode_t); diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 9c51917b1cce..045d4761febc 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -50,7 +50,7 @@ void *radix_tree_delete(struct radix_tree_root *, unsigned long); unsigned int radix_tree_gang_lookup(struct radix_tree_root *root, void **results, unsigned long first_index, unsigned int max_items); -int radix_tree_preload(unsigned int __nocast gfp_mask); +int radix_tree_preload(gfp_t gfp_mask); void radix_tree_init(void); void *radix_tree_tag_set(struct radix_tree_root *root, unsigned long index, int tag); diff --git a/include/linux/security.h b/include/linux/security.h index 0e43460d374e..627382e74057 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -2634,8 +2634,7 @@ static inline int security_socket_getpeersec(struct socket *sock, char __user *o return security_ops->socket_getpeersec(sock, optval, optlen, len); } -static inline int security_sk_alloc(struct sock *sk, int family, - unsigned int __nocast priority) +static inline int security_sk_alloc(struct sock *sk, int family, gfp_t priority) { return security_ops->sk_alloc_security(sk, family, priority); } @@ -2752,8 +2751,7 @@ static inline int security_socket_getpeersec(struct socket *sock, char __user *o return -ENOPROTOOPT; } -static inline int security_sk_alloc(struct sock *sk, int family, - unsigned int __nocast priority) +static inline int security_sk_alloc(struct sock *sk, int family, gfp_t priority) { return 0; } diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 466c879f82b8..8f5d9e7f8734 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -302,37 +302,37 @@ struct sk_buff { extern void __kfree_skb(struct sk_buff *skb); extern struct sk_buff *__alloc_skb(unsigned int size, - unsigned int __nocast priority, int fclone); + gfp_t priority, int fclone); static inline struct sk_buff *alloc_skb(unsigned int size, - unsigned int __nocast priority) + gfp_t priority) { return __alloc_skb(size, priority, 0); } static inline struct sk_buff *alloc_skb_fclone(unsigned int size, - unsigned int __nocast priority) + gfp_t priority) { return __alloc_skb(size, priority, 1); } extern struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp, unsigned int size, - unsigned int __nocast priority); + gfp_t priority); extern void kfree_skbmem(struct sk_buff *skb); extern struct sk_buff *skb_clone(struct sk_buff *skb, - unsigned int __nocast priority); + gfp_t priority); extern struct sk_buff *skb_copy(const struct sk_buff *skb, - unsigned int __nocast priority); + gfp_t priority); extern struct sk_buff *pskb_copy(struct sk_buff *skb, - unsigned int __nocast gfp_mask); + gfp_t gfp_mask); extern int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, - unsigned int __nocast gfp_mask); + gfp_t gfp_mask); extern struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom); extern struct sk_buff *skb_copy_expand(const struct sk_buff *skb, int newheadroom, int newtailroom, - unsigned int __nocast priority); + gfp_t priority); extern struct sk_buff * skb_pad(struct sk_buff *skb, int pad); #define dev_kfree_skb(a) kfree_skb(a) extern void skb_over_panic(struct sk_buff *skb, int len, @@ -484,7 +484,7 @@ static inline int skb_shared(const struct sk_buff *skb) * NULL is returned on a memory allocation failure. */ static inline struct sk_buff *skb_share_check(struct sk_buff *skb, - unsigned int __nocast pri) + gfp_t pri) { might_sleep_if(pri & __GFP_WAIT); if (skb_shared(skb)) { @@ -516,7 +516,7 @@ static inline struct sk_buff *skb_share_check(struct sk_buff *skb, * %NULL is returned on a memory allocation failure. */ static inline struct sk_buff *skb_unshare(struct sk_buff *skb, - unsigned int __nocast pri) + gfp_t pri) { might_sleep_if(pri & __GFP_WAIT); if (skb_cloned(skb)) { @@ -1017,7 +1017,7 @@ static inline void __skb_queue_purge(struct sk_buff_head *list) * %NULL is returned in there is no free memory. */ static inline struct sk_buff *__dev_alloc_skb(unsigned int length, - unsigned int __nocast gfp_mask) + gfp_t gfp_mask) { struct sk_buff *skb = alloc_skb(length + 16, gfp_mask); if (likely(skb)) @@ -1130,8 +1130,8 @@ static inline int skb_can_coalesce(struct sk_buff *skb, int i, * If there is no free memory -ENOMEM is returned, otherwise zero * is returned and the old skb data released. */ -extern int __skb_linearize(struct sk_buff *skb, unsigned int __nocast gfp); -static inline int skb_linearize(struct sk_buff *skb, unsigned int __nocast gfp) +extern int __skb_linearize(struct sk_buff *skb, gfp_t gfp); +static inline int skb_linearize(struct sk_buff *skb, gfp_t gfp) { return __skb_linearize(skb, gfp); } diff --git a/include/linux/slab.h b/include/linux/slab.h index 1f356f3bbc64..5fc04a16ecb0 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -61,11 +61,11 @@ extern kmem_cache_t *kmem_cache_create(const char *, size_t, size_t, unsigned lo void (*)(void *, kmem_cache_t *, unsigned long)); extern int kmem_cache_destroy(kmem_cache_t *); extern int kmem_cache_shrink(kmem_cache_t *); -extern void *kmem_cache_alloc(kmem_cache_t *, unsigned int __nocast); +extern void *kmem_cache_alloc(kmem_cache_t *, gfp_t); extern void kmem_cache_free(kmem_cache_t *, void *); extern unsigned int kmem_cache_size(kmem_cache_t *); extern const char *kmem_cache_name(kmem_cache_t *); -extern kmem_cache_t *kmem_find_general_cachep(size_t size, unsigned int __nocast gfpflags); +extern kmem_cache_t *kmem_find_general_cachep(size_t size, gfp_t gfpflags); /* Size description struct for general caches. */ struct cache_sizes { @@ -74,9 +74,9 @@ struct cache_sizes { kmem_cache_t *cs_dmacachep; }; extern struct cache_sizes malloc_sizes[]; -extern void *__kmalloc(size_t, unsigned int __nocast); +extern void *__kmalloc(size_t, gfp_t); -static inline void *kmalloc(size_t size, unsigned int __nocast flags) +static inline void *kmalloc(size_t size, gfp_t flags) { if (__builtin_constant_p(size)) { int i = 0; @@ -99,7 +99,7 @@ found: return __kmalloc(size, flags); } -extern void *kzalloc(size_t, unsigned int __nocast); +extern void *kzalloc(size_t, gfp_t); /** * kcalloc - allocate memory for an array. The memory is set to zero. @@ -107,7 +107,7 @@ extern void *kzalloc(size_t, unsigned int __nocast); * @size: element size. * @flags: the type of memory to allocate. */ -static inline void *kcalloc(size_t n, size_t size, unsigned int __nocast flags) +static inline void *kcalloc(size_t n, size_t size, gfp_t flags) { if (n != 0 && size > INT_MAX / n) return NULL; @@ -118,15 +118,14 @@ extern void kfree(const void *); extern unsigned int ksize(const void *); #ifdef CONFIG_NUMA -extern void *kmem_cache_alloc_node(kmem_cache_t *, - unsigned int __nocast flags, int node); -extern void *kmalloc_node(size_t size, unsigned int __nocast flags, int node); +extern void *kmem_cache_alloc_node(kmem_cache_t *, gfp_t flags, int node); +extern void *kmalloc_node(size_t size, gfp_t flags, int node); #else static inline void *kmem_cache_alloc_node(kmem_cache_t *cachep, int flags, int node) { return kmem_cache_alloc(cachep, flags); } -static inline void *kmalloc_node(size_t size, unsigned int __nocast flags, int node) +static inline void *kmalloc_node(size_t size, gfp_t flags, int node) { return kmalloc(size, flags); } diff --git a/include/linux/string.h b/include/linux/string.h index dab2652acbd8..369be3264a55 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -88,7 +88,7 @@ extern int memcmp(const void *,const void *,__kernel_size_t); extern void * memchr(const void *,int,__kernel_size_t); #endif -extern char *kstrdup(const char *s, unsigned int __nocast gfp); +extern char *kstrdup(const char *s, gfp_t gfp); #ifdef __cplusplus } diff --git a/include/linux/swap.h b/include/linux/swap.h index 3c9ff0048153..a7bf1a3b1496 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -147,7 +147,7 @@ struct swap_list_t { #define vm_swap_full() (nr_swap_pages*2 < total_swap_pages) /* linux/mm/oom_kill.c */ -extern void out_of_memory(unsigned int __nocast gfp_mask, int order); +extern void out_of_memory(gfp_t gfp_mask, int order); /* linux/mm/memory.c */ extern void swapin_readahead(swp_entry_t, unsigned long, struct vm_area_struct *); diff --git a/include/linux/textsearch.h b/include/linux/textsearch.h index 1a4990e448e9..515046d1b2f4 100644 --- a/include/linux/textsearch.h +++ b/include/linux/textsearch.h @@ -159,7 +159,7 @@ extern unsigned int textsearch_find_continuous(struct ts_config *, #define TS_PRIV_ALIGN(len) (((len) + TS_PRIV_ALIGNTO-1) & ~(TS_PRIV_ALIGNTO-1)) static inline struct ts_config *alloc_ts_config(size_t payload, - unsigned int __nocast gfp_mask) + gfp_t gfp_mask) { struct ts_config *conf; diff --git a/include/linux/types.h b/include/linux/types.h index 2b678c22ca4a..0aee34f9da9f 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -165,6 +165,10 @@ typedef __u64 __bitwise __le64; typedef __u64 __bitwise __be64; #endif +#ifdef __KERNEL__ +typedef unsigned __nocast gfp_t; +#endif + struct ustat { __kernel_daddr_t f_tfree; __kernel_ino_t f_tinode; diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index b244f69ef682..3701a0673d2c 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -34,8 +34,8 @@ struct vm_struct { extern void *vmalloc(unsigned long size); extern void *vmalloc_exec(unsigned long size); extern void *vmalloc_32(unsigned long size); -extern void *__vmalloc(unsigned long size, unsigned int __nocast gfp_mask, pgprot_t prot); -extern void *__vmalloc_area(struct vm_struct *area, unsigned int __nocast gfp_mask, pgprot_t prot); +extern void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot); +extern void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask, pgprot_t prot); extern void vfree(void *addr); extern void *vmap(struct page **pages, unsigned int count, diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 6dfa4a61ffd0..210458624840 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -136,7 +136,7 @@ struct bt_skb_cb { }; #define bt_cb(skb) ((struct bt_skb_cb *)(skb->cb)) -static inline struct sk_buff *bt_skb_alloc(unsigned int len, unsigned int __nocast how) +static inline struct sk_buff *bt_skb_alloc(unsigned int len, gfp_t how) { struct sk_buff *skb; diff --git a/include/net/bluetooth/rfcomm.h b/include/net/bluetooth/rfcomm.h index ffea9d54071f..fbe557f7ea1d 100644 --- a/include/net/bluetooth/rfcomm.h +++ b/include/net/bluetooth/rfcomm.h @@ -230,7 +230,7 @@ int rfcomm_send_rpn(struct rfcomm_session *s, int cr, u8 dlci, u8 xon_char, u8 xoff_char, u16 param_mask); /* ---- RFCOMM DLCs (channels) ---- */ -struct rfcomm_dlc *rfcomm_dlc_alloc(unsigned int __nocast prio); +struct rfcomm_dlc *rfcomm_dlc_alloc(gfp_t prio); void rfcomm_dlc_free(struct rfcomm_dlc *d); int rfcomm_dlc_open(struct rfcomm_dlc *d, bdaddr_t *src, bdaddr_t *dst, u8 channel); int rfcomm_dlc_close(struct rfcomm_dlc *d, int reason); diff --git a/include/net/dn_nsp.h b/include/net/dn_nsp.h index 8a0891e2e888..1ba03be0af3a 100644 --- a/include/net/dn_nsp.h +++ b/include/net/dn_nsp.h @@ -19,9 +19,9 @@ extern void dn_nsp_send_data_ack(struct sock *sk); extern void dn_nsp_send_oth_ack(struct sock *sk); extern void dn_nsp_delayed_ack(struct sock *sk); extern void dn_send_conn_ack(struct sock *sk); -extern void dn_send_conn_conf(struct sock *sk, unsigned int __nocast gfp); +extern void dn_send_conn_conf(struct sock *sk, gfp_t gfp); extern void dn_nsp_send_disc(struct sock *sk, unsigned char type, - unsigned short reason, unsigned int __nocast gfp); + unsigned short reason, gfp_t gfp); extern void dn_nsp_return_disc(struct sk_buff *skb, unsigned char type, unsigned short reason); extern void dn_nsp_send_link(struct sock *sk, unsigned char lsflags, char fcval); @@ -29,14 +29,14 @@ extern void dn_nsp_send_conninit(struct sock *sk, unsigned char flags); extern void dn_nsp_output(struct sock *sk); extern int dn_nsp_check_xmit_queue(struct sock *sk, struct sk_buff *skb, struct sk_buff_head *q, unsigned short acknum); -extern void dn_nsp_queue_xmit(struct sock *sk, struct sk_buff *skb, unsigned int __nocast gfp, int oob); +extern void dn_nsp_queue_xmit(struct sock *sk, struct sk_buff *skb, gfp_t gfp, int oob); extern unsigned long dn_nsp_persist(struct sock *sk); extern int dn_nsp_xmit_timeout(struct sock *sk); extern int dn_nsp_rx(struct sk_buff *); extern int dn_nsp_backlog_rcv(struct sock *sk, struct sk_buff *skb); -extern struct sk_buff *dn_alloc_skb(struct sock *sk, int size, unsigned int __nocast pri); +extern struct sk_buff *dn_alloc_skb(struct sock *sk, int size, gfp_t pri); extern struct sk_buff *dn_alloc_send_skb(struct sock *sk, size_t *size, int noblock, long timeo, int *err); #define NSP_REASON_OK 0 /* No error */ diff --git a/include/net/dn_route.h b/include/net/dn_route.h index 11fe973cf383..5122da3f2eb3 100644 --- a/include/net/dn_route.h +++ b/include/net/dn_route.h @@ -15,7 +15,7 @@ GNU General Public License for more details. *******************************************************************************/ -extern struct sk_buff *dn_alloc_skb(struct sock *sk, int size, unsigned int __nocast pri); +extern struct sk_buff *dn_alloc_skb(struct sock *sk, int size, gfp_t pri); extern int dn_route_output_sock(struct dst_entry **pprt, struct flowi *, struct sock *sk, int flags); extern int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb); extern int dn_cache_getroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg); diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 651f824c1008..b0c99060b78d 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -94,7 +94,7 @@ static inline void *inet_csk_ca(const struct sock *sk) extern struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, - const unsigned int __nocast priority); + const gfp_t priority); enum inet_csk_ack_state_t { ICSK_ACK_SCHED = 1, diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index ecb2b061f597..3b5559a023a4 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -832,7 +832,7 @@ extern void ip_vs_app_inc_put(struct ip_vs_app *inc); extern int ip_vs_app_pkt_out(struct ip_vs_conn *, struct sk_buff **pskb); extern int ip_vs_app_pkt_in(struct ip_vs_conn *, struct sk_buff **pskb); -extern int ip_vs_skb_replace(struct sk_buff *skb, unsigned int __nocast pri, +extern int ip_vs_skb_replace(struct sk_buff *skb, gfp_t pri, char *o_buf, int o_len, char *n_buf, int n_len); extern int ip_vs_app_init(void); extern void ip_vs_app_cleanup(void); diff --git a/include/net/llc_conn.h b/include/net/llc_conn.h index 54852ff6033b..00730d21b522 100644 --- a/include/net/llc_conn.h +++ b/include/net/llc_conn.h @@ -93,7 +93,7 @@ static __inline__ char llc_backlog_type(struct sk_buff *skb) return skb->cb[sizeof(skb->cb) - 1]; } -extern struct sock *llc_sk_alloc(int family, unsigned int __nocast priority, +extern struct sock *llc_sk_alloc(int family, gfp_t priority, struct proto *prot); extern void llc_sk_free(struct sock *sk); diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index e1d5ec1c23c0..8f241216f46b 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -125,7 +125,7 @@ */ extern struct sock *sctp_get_ctl_sock(void); extern int sctp_copy_local_addr_list(struct sctp_bind_addr *, - sctp_scope_t, unsigned int __nocast gfp, + sctp_scope_t, gfp_t gfp, int flags); extern struct sctp_pf *sctp_get_pf_specific(sa_family_t family); extern int sctp_register_pf(struct sctp_pf *, sa_family_t); diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index 58462164d960..1eac3d0eb7a9 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -181,17 +181,17 @@ const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t, int sctp_chunk_iif(const struct sctp_chunk *); struct sctp_association *sctp_make_temp_asoc(const struct sctp_endpoint *, struct sctp_chunk *, - unsigned int __nocast gfp); + gfp_t gfp); __u32 sctp_generate_verification_tag(void); void sctp_populate_tie_tags(__u8 *cookie, __u32 curTag, __u32 hisTag); /* Prototypes for chunk-building functions. */ struct sctp_chunk *sctp_make_init(const struct sctp_association *, const struct sctp_bind_addr *, - unsigned int __nocast gfp, int vparam_len); + gfp_t gfp, int vparam_len); struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *, const struct sctp_chunk *, - const unsigned int __nocast gfp, + const gfp_t gfp, const int unkparam_len); struct sctp_chunk *sctp_make_cookie_echo(const struct sctp_association *, const struct sctp_chunk *); @@ -265,7 +265,7 @@ int sctp_do_sm(sctp_event_t event_type, sctp_subtype_t subtype, struct sctp_endpoint *, struct sctp_association *asoc, void *event_arg, - unsigned int __nocast gfp); + gfp_t gfp); /* 2nd level prototypes */ void sctp_generate_t3_rtx_event(unsigned long peer); @@ -276,7 +276,7 @@ void sctp_ootb_pkt_free(struct sctp_packet *); struct sctp_association *sctp_unpack_cookie(const struct sctp_endpoint *, const struct sctp_association *, struct sctp_chunk *, - unsigned int __nocast gfp, int *err, + gfp_t gfp, int *err, struct sctp_chunk **err_chk_p); int sctp_addip_addr_config(struct sctp_association *, sctp_param_t, struct sockaddr_storage*, int); diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 994009bbe3b4..9c385b6417c7 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -446,7 +446,7 @@ struct sctp_ssnmap { }; struct sctp_ssnmap *sctp_ssnmap_new(__u16 in, __u16 out, - unsigned int __nocast gfp); + gfp_t gfp); void sctp_ssnmap_free(struct sctp_ssnmap *map); void sctp_ssnmap_clear(struct sctp_ssnmap *map); @@ -947,7 +947,7 @@ struct sctp_transport { }; struct sctp_transport *sctp_transport_new(const union sctp_addr *, - unsigned int __nocast); + gfp_t); void sctp_transport_set_owner(struct sctp_transport *, struct sctp_association *); void sctp_transport_route(struct sctp_transport *, union sctp_addr *, @@ -1095,10 +1095,10 @@ void sctp_bind_addr_init(struct sctp_bind_addr *, __u16 port); void sctp_bind_addr_free(struct sctp_bind_addr *); int sctp_bind_addr_copy(struct sctp_bind_addr *dest, const struct sctp_bind_addr *src, - sctp_scope_t scope, unsigned int __nocast gfp, + sctp_scope_t scope, gfp_t gfp, int flags); int sctp_add_bind_addr(struct sctp_bind_addr *, union sctp_addr *, - unsigned int __nocast gfp); + gfp_t gfp); int sctp_del_bind_addr(struct sctp_bind_addr *, union sctp_addr *); int sctp_bind_addr_match(struct sctp_bind_addr *, const union sctp_addr *, struct sctp_sock *); @@ -1108,9 +1108,9 @@ union sctp_addr *sctp_find_unmatch_addr(struct sctp_bind_addr *bp, struct sctp_sock *opt); union sctp_params sctp_bind_addrs_to_raw(const struct sctp_bind_addr *bp, int *addrs_len, - unsigned int __nocast gfp); + gfp_t gfp); int sctp_raw_to_bind_addrs(struct sctp_bind_addr *bp, __u8 *raw, int len, - __u16 port, unsigned int __nocast gfp); + __u16 port, gfp_t gfp); sctp_scope_t sctp_scope(const union sctp_addr *); int sctp_in_scope(const union sctp_addr *addr, const sctp_scope_t scope); @@ -1239,7 +1239,7 @@ static inline struct sctp_endpoint *sctp_ep(struct sctp_ep_common *base) } /* These are function signatures for manipulating endpoints. */ -struct sctp_endpoint *sctp_endpoint_new(struct sock *, unsigned int __nocast); +struct sctp_endpoint *sctp_endpoint_new(struct sock *, gfp_t); void sctp_endpoint_free(struct sctp_endpoint *); void sctp_endpoint_put(struct sctp_endpoint *); void sctp_endpoint_hold(struct sctp_endpoint *); @@ -1260,7 +1260,7 @@ int sctp_verify_init(const struct sctp_association *asoc, sctp_cid_t, struct sctp_chunk **err_chunk); int sctp_process_init(struct sctp_association *, sctp_cid_t cid, const union sctp_addr *peer, - sctp_init_chunk_t *init, unsigned int __nocast gfp); + sctp_init_chunk_t *init, gfp_t gfp); __u32 sctp_generate_tag(const struct sctp_endpoint *); __u32 sctp_generate_tsn(const struct sctp_endpoint *); @@ -1723,7 +1723,7 @@ static inline struct sctp_association *sctp_assoc(struct sctp_ep_common *base) struct sctp_association * sctp_association_new(const struct sctp_endpoint *, const struct sock *, - sctp_scope_t scope, unsigned int __nocast gfp); + sctp_scope_t scope, gfp_t gfp); void sctp_association_free(struct sctp_association *); void sctp_association_put(struct sctp_association *); void sctp_association_hold(struct sctp_association *); @@ -1739,7 +1739,7 @@ int sctp_assoc_lookup_laddr(struct sctp_association *asoc, const union sctp_addr *laddr); struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *, const union sctp_addr *address, - const unsigned int __nocast gfp, + const gfp_t gfp, const int peer_state); void sctp_assoc_del_peer(struct sctp_association *asoc, const union sctp_addr *addr); @@ -1764,10 +1764,10 @@ void sctp_assoc_rwnd_decrease(struct sctp_association *, unsigned); void sctp_assoc_set_primary(struct sctp_association *, struct sctp_transport *); int sctp_assoc_set_bind_addr_from_ep(struct sctp_association *, - unsigned int __nocast); + gfp_t); int sctp_assoc_set_bind_addr_from_cookie(struct sctp_association *, struct sctp_cookie*, - unsigned int __nocast gfp); + gfp_t gfp); int sctp_cmp_addr_exact(const union sctp_addr *ss1, const union sctp_addr *ss2); diff --git a/include/net/sctp/ulpevent.h b/include/net/sctp/ulpevent.h index 90fe4bf6754f..6c40cfc4832d 100644 --- a/include/net/sctp/ulpevent.h +++ b/include/net/sctp/ulpevent.h @@ -88,7 +88,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_assoc_change( __u16 error, __u16 outbound, __u16 inbound, - unsigned int __nocast gfp); + gfp_t gfp); struct sctp_ulpevent *sctp_ulpevent_make_peer_addr_change( const struct sctp_association *asoc, @@ -96,35 +96,35 @@ struct sctp_ulpevent *sctp_ulpevent_make_peer_addr_change( int flags, int state, int error, - unsigned int __nocast gfp); + gfp_t gfp); struct sctp_ulpevent *sctp_ulpevent_make_remote_error( const struct sctp_association *asoc, struct sctp_chunk *chunk, __u16 flags, - unsigned int __nocast gfp); + gfp_t gfp); struct sctp_ulpevent *sctp_ulpevent_make_send_failed( const struct sctp_association *asoc, struct sctp_chunk *chunk, __u16 flags, __u32 error, - unsigned int __nocast gfp); + gfp_t gfp); struct sctp_ulpevent *sctp_ulpevent_make_shutdown_event( const struct sctp_association *asoc, __u16 flags, - unsigned int __nocast gfp); + gfp_t gfp); struct sctp_ulpevent *sctp_ulpevent_make_pdapi( const struct sctp_association *asoc, - __u32 indication, unsigned int __nocast gfp); + __u32 indication, gfp_t gfp); struct sctp_ulpevent *sctp_ulpevent_make_adaption_indication( - const struct sctp_association *asoc, unsigned int __nocast gfp); + const struct sctp_association *asoc, gfp_t gfp); struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc, struct sctp_chunk *chunk, - unsigned int __nocast gfp); + gfp_t gfp); void sctp_ulpevent_read_sndrcvinfo(const struct sctp_ulpevent *event, struct msghdr *); diff --git a/include/net/sctp/ulpqueue.h b/include/net/sctp/ulpqueue.h index 1a60c6d943c1..a43c8788b650 100644 --- a/include/net/sctp/ulpqueue.h +++ b/include/net/sctp/ulpqueue.h @@ -62,22 +62,19 @@ struct sctp_ulpq *sctp_ulpq_init(struct sctp_ulpq *, void sctp_ulpq_free(struct sctp_ulpq *); /* Add a new DATA chunk for processing. */ -int sctp_ulpq_tail_data(struct sctp_ulpq *, struct sctp_chunk *, - unsigned int __nocast); +int sctp_ulpq_tail_data(struct sctp_ulpq *, struct sctp_chunk *, gfp_t); /* Add a new event for propagation to the ULP. */ int sctp_ulpq_tail_event(struct sctp_ulpq *, struct sctp_ulpevent *ev); /* Renege previously received chunks. */ -void sctp_ulpq_renege(struct sctp_ulpq *, struct sctp_chunk *, - unsigned int __nocast); +void sctp_ulpq_renege(struct sctp_ulpq *, struct sctp_chunk *, gfp_t); /* Perform partial delivery. */ -void sctp_ulpq_partial_delivery(struct sctp_ulpq *, struct sctp_chunk *, - unsigned int __nocast); +void sctp_ulpq_partial_delivery(struct sctp_ulpq *, struct sctp_chunk *, gfp_t); /* Abort the partial delivery. */ -void sctp_ulpq_abort_pd(struct sctp_ulpq *, unsigned int __nocast); +void sctp_ulpq_abort_pd(struct sctp_ulpq *, gfp_t); /* Clear the partial data delivery condition on this socket. */ int sctp_clear_pd(struct sock *sk); diff --git a/include/net/sock.h b/include/net/sock.h index b6440805c420..ecb75526cba0 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -739,18 +739,18 @@ extern void FASTCALL(release_sock(struct sock *sk)); #define bh_unlock_sock(__sk) spin_unlock(&((__sk)->sk_lock.slock)) extern struct sock *sk_alloc(int family, - unsigned int __nocast priority, + gfp_t priority, struct proto *prot, int zero_it); extern void sk_free(struct sock *sk); extern struct sock *sk_clone(const struct sock *sk, - const unsigned int __nocast priority); + const gfp_t priority); extern struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, - unsigned int __nocast priority); + gfp_t priority); extern struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, - unsigned int __nocast priority); + gfp_t priority); extern void sock_wfree(struct sk_buff *skb); extern void sock_rfree(struct sk_buff *skb); @@ -766,7 +766,7 @@ extern struct sk_buff *sock_alloc_send_skb(struct sock *sk, int noblock, int *errcode); extern void *sock_kmalloc(struct sock *sk, int size, - unsigned int __nocast priority); + gfp_t priority); extern void sock_kfree_s(struct sock *sk, void *mem, int size); extern void sk_send_sigurg(struct sock *sk); @@ -1201,7 +1201,7 @@ static inline void sk_stream_moderate_sndbuf(struct sock *sk) static inline struct sk_buff *sk_stream_alloc_pskb(struct sock *sk, int size, int mem, - unsigned int __nocast gfp) + gfp_t gfp) { struct sk_buff *skb; int hdr_len; @@ -1224,7 +1224,7 @@ static inline struct sk_buff *sk_stream_alloc_pskb(struct sock *sk, static inline struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, - unsigned int __nocast gfp) + gfp_t gfp) { return sk_stream_alloc_pskb(sk, size, 0, gfp); } @@ -1255,7 +1255,7 @@ static inline int sock_writeable(const struct sock *sk) return atomic_read(&sk->sk_wmem_alloc) < (sk->sk_sndbuf / 2); } -static inline unsigned int __nocast gfp_any(void) +static inline gfp_t gfp_any(void) { return in_softirq() ? GFP_ATOMIC : GFP_KERNEL; } diff --git a/include/net/tcp.h b/include/net/tcp.h index 97af77c4d096..c24339c4e310 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -460,8 +460,7 @@ extern void tcp_send_probe0(struct sock *); extern void tcp_send_partial(struct sock *); extern int tcp_write_wakeup(struct sock *); extern void tcp_send_fin(struct sock *sk); -extern void tcp_send_active_reset(struct sock *sk, - unsigned int __nocast priority); +extern void tcp_send_active_reset(struct sock *sk, gfp_t priority); extern int tcp_send_synack(struct sock *); extern void tcp_push_one(struct sock *, unsigned int mss_now); extern void tcp_send_ack(struct sock *sk); diff --git a/include/net/xfrm.h b/include/net/xfrm.h index b6e72f890c6c..5beae1ccd574 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -875,7 +875,7 @@ static inline int xfrm_dst_lookup(struct xfrm_dst **dst, struct flowi *fl, unsig } #endif -struct xfrm_policy *xfrm_policy_alloc(unsigned int __nocast gfp); +struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp); extern int xfrm_policy_walk(int (*func)(struct xfrm_policy *, int, int, void*), void *); int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl); struct xfrm_policy *xfrm_policy_bysel(int dir, struct xfrm_selector *sel, diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h index 0e293fe733b0..4172e6841e3d 100644 --- a/include/rdma/ib_mad.h +++ b/include/rdma/ib_mad.h @@ -596,7 +596,7 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, u32 remote_qpn, u16 pkey_index, struct ib_ah *ah, int rmpp_active, int hdr_len, int data_len, - unsigned int __nocast gfp_mask); + gfp_t gfp_mask); /** * ib_free_send_mad - Returns data buffers used to send a MAD. diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h index a7555c800ecf..f404fe21cc21 100644 --- a/include/rdma/ib_sa.h +++ b/include/rdma/ib_sa.h @@ -285,7 +285,7 @@ void ib_sa_cancel_query(int id, struct ib_sa_query *query); int ib_sa_path_rec_get(struct ib_device *device, u8 port_num, struct ib_sa_path_rec *rec, ib_sa_comp_mask comp_mask, - int timeout_ms, unsigned int __nocast gfp_mask, + int timeout_ms, gfp_t gfp_mask, void (*callback)(int status, struct ib_sa_path_rec *resp, void *context), @@ -296,7 +296,7 @@ int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num, u8 method, struct ib_sa_mcmember_rec *rec, ib_sa_comp_mask comp_mask, - int timeout_ms, unsigned int __nocast gfp_mask, + int timeout_ms, gfp_t gfp_mask, void (*callback)(int status, struct ib_sa_mcmember_rec *resp, void *context), @@ -307,7 +307,7 @@ int ib_sa_service_rec_query(struct ib_device *device, u8 port_num, u8 method, struct ib_sa_service_rec *rec, ib_sa_comp_mask comp_mask, - int timeout_ms, unsigned int __nocast gfp_mask, + int timeout_ms, gfp_t gfp_mask, void (*callback)(int status, struct ib_sa_service_rec *resp, void *context), @@ -342,7 +342,7 @@ static inline int ib_sa_mcmember_rec_set(struct ib_device *device, u8 port_num, struct ib_sa_mcmember_rec *rec, ib_sa_comp_mask comp_mask, - int timeout_ms, unsigned int __nocast gfp_mask, + int timeout_ms, gfp_t gfp_mask, void (*callback)(int status, struct ib_sa_mcmember_rec *resp, void *context), @@ -384,7 +384,7 @@ static inline int ib_sa_mcmember_rec_delete(struct ib_device *device, u8 port_num, struct ib_sa_mcmember_rec *rec, ib_sa_comp_mask comp_mask, - int timeout_ms, unsigned int __nocast gfp_mask, + int timeout_ms, gfp_t gfp_mask, void (*callback)(int status, struct ib_sa_mcmember_rec *resp, void *context), diff --git a/include/rxrpc/call.h b/include/rxrpc/call.h index 8118731e7d96..b86f83743510 100644 --- a/include/rxrpc/call.h +++ b/include/rxrpc/call.h @@ -203,7 +203,7 @@ extern int rxrpc_call_write_data(struct rxrpc_call *call, size_t sioc, struct kvec *siov, uint8_t rxhdr_flags, - unsigned int __nocast alloc_flags, + gfp_t alloc_flags, int dup_data, size_t *size_sent); diff --git a/include/rxrpc/message.h b/include/rxrpc/message.h index 983d9f9eee1a..b318f273d4f2 100644 --- a/include/rxrpc/message.h +++ b/include/rxrpc/message.h @@ -63,7 +63,7 @@ extern int rxrpc_conn_newmsg(struct rxrpc_connection *conn, uint8_t type, int count, struct kvec *diov, - unsigned int __nocast alloc_flags, + gfp_t alloc_flags, struct rxrpc_message **_msg); extern int rxrpc_conn_sendmsg(struct rxrpc_connection *conn, struct rxrpc_message *msg); diff --git a/include/sound/core.h b/include/sound/core.h index 26160adcdffc..6d971a4c4ca0 100644 --- a/include/sound/core.h +++ b/include/sound/core.h @@ -290,13 +290,13 @@ void snd_memory_init(void); void snd_memory_done(void); int snd_memory_info_init(void); int snd_memory_info_done(void); -void *snd_hidden_kmalloc(size_t size, unsigned int __nocast flags); -void *snd_hidden_kzalloc(size_t size, unsigned int __nocast flags); -void *snd_hidden_kcalloc(size_t n, size_t size, unsigned int __nocast flags); +void *snd_hidden_kmalloc(size_t size, gfp_t flags); +void *snd_hidden_kzalloc(size_t size, gfp_t flags); +void *snd_hidden_kcalloc(size_t n, size_t size, gfp_t flags); void snd_hidden_kfree(const void *obj); void *snd_hidden_vmalloc(unsigned long size); void snd_hidden_vfree(void *obj); -char *snd_hidden_kstrdup(const char *s, unsigned int __nocast flags); +char *snd_hidden_kstrdup(const char *s, gfp_t flags); #define kmalloc(size, flags) snd_hidden_kmalloc(size, flags) #define kzalloc(size, flags) snd_hidden_kzalloc(size, flags) #define kcalloc(n, size, flags) snd_hidden_kcalloc(n, size, flags) diff --git a/include/sound/driver.h b/include/sound/driver.h index 0d12456ec3ae..1ec2fae050a6 100644 --- a/include/sound/driver.h +++ b/include/sound/driver.h @@ -51,7 +51,7 @@ #ifdef CONFIG_SND_DEBUG_MEMORY #include #include -void *snd_wrapper_kmalloc(size_t, unsigned int __nocast); +void *snd_wrapper_kmalloc(size_t, gfp_t); #undef kmalloc void snd_wrapper_kfree(const void *); #undef kfree diff --git a/kernel/audit.c b/kernel/audit.c index 83096b67510a..aefa73a8a586 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -560,7 +560,7 @@ static void audit_buffer_free(struct audit_buffer *ab) } static struct audit_buffer * audit_buffer_alloc(struct audit_context *ctx, - unsigned int __nocast gfp_mask, int type) + gfp_t gfp_mask, int type) { unsigned long flags; struct audit_buffer *ab = NULL; diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 45a5719a0104..28176d083f7b 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -1670,7 +1670,7 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs) * GFP_USER - only nodes in current tasks mems allowed ok. **/ -int cpuset_zone_allowed(struct zone *z, unsigned int __nocast gfp_mask) +int cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) { int node; /* node that zone z is on */ const struct cpuset *cs; /* current cpuset ancestors */ diff --git a/kernel/kfifo.c b/kernel/kfifo.c index 179baafcdd96..64ab045c3d9d 100644 --- a/kernel/kfifo.c +++ b/kernel/kfifo.c @@ -36,7 +36,7 @@ * struct kfifo with kfree(). */ struct kfifo *kfifo_init(unsigned char *buffer, unsigned int size, - unsigned int __nocast gfp_mask, spinlock_t *lock) + gfp_t gfp_mask, spinlock_t *lock) { struct kfifo *fifo; @@ -64,7 +64,7 @@ EXPORT_SYMBOL(kfifo_init); * * The size will be rounded-up to a power of 2. */ -struct kfifo *kfifo_alloc(unsigned int size, unsigned int __nocast gfp_mask, spinlock_t *lock) +struct kfifo *kfifo_alloc(unsigned int size, gfp_t gfp_mask, spinlock_t *lock) { unsigned char *buffer; struct kfifo *ret; diff --git a/kernel/signal.c b/kernel/signal.c index c135f5aa2c2d..cba193ceda0d 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -262,7 +262,7 @@ next_signal(struct sigpending *pending, sigset_t *mask) return sig; } -static struct sigqueue *__sigqueue_alloc(struct task_struct *t, unsigned int __nocast flags, +static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags, int override_rlimit) { struct sigqueue *q = NULL; diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 6a8bc6e06431..d1c057e71b68 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -110,7 +110,7 @@ radix_tree_node_free(struct radix_tree_node *node) * success, return zero, with preemption disabled. On error, return -ENOMEM * with preemption not disabled. */ -int radix_tree_preload(unsigned int __nocast gfp_mask) +int radix_tree_preload(gfp_t gfp_mask) { struct radix_tree_preload *rtp; struct radix_tree_node *node; diff --git a/lib/ts_bm.c b/lib/ts_bm.c index 1b61fceef777..8a8b3a16133e 100644 --- a/lib/ts_bm.c +++ b/lib/ts_bm.c @@ -127,7 +127,7 @@ static void compute_prefix_tbl(struct ts_bm *bm, const u8 *pattern, } static struct ts_config *bm_init(const void *pattern, unsigned int len, - unsigned int __nocast gfp_mask) + gfp_t gfp_mask) { struct ts_config *conf; struct ts_bm *bm; diff --git a/lib/ts_fsm.c b/lib/ts_fsm.c index ef9779e00506..ca3211206eef 100644 --- a/lib/ts_fsm.c +++ b/lib/ts_fsm.c @@ -258,7 +258,7 @@ found_match: } static struct ts_config *fsm_init(const void *pattern, unsigned int len, - unsigned int __nocast gfp_mask) + gfp_t gfp_mask) { int i, err = -EINVAL; struct ts_config *conf; diff --git a/lib/ts_kmp.c b/lib/ts_kmp.c index e45f0f0c2379..7fd45451b44a 100644 --- a/lib/ts_kmp.c +++ b/lib/ts_kmp.c @@ -87,7 +87,7 @@ static inline void compute_prefix_tbl(const u8 *pattern, unsigned int len, } static struct ts_config *kmp_init(const void *pattern, unsigned int len, - unsigned int __nocast gfp_mask) + gfp_t gfp_mask) { struct ts_config *conf; struct ts_kmp *kmp; diff --git a/mm/highmem.c b/mm/highmem.c index 400911599468..90e1861e2da0 100644 --- a/mm/highmem.c +++ b/mm/highmem.c @@ -30,7 +30,7 @@ static mempool_t *page_pool, *isa_page_pool; -static void *page_pool_alloc(unsigned int __nocast gfp_mask, void *data) +static void *page_pool_alloc(gfp_t gfp_mask, void *data) { unsigned int gfp = gfp_mask | (unsigned int) (long) data; diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 9033f0859aa8..37af443eb094 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -687,7 +687,7 @@ get_vma_policy(struct task_struct *task, struct vm_area_struct *vma, unsigned lo } /* Return a zonelist representing a mempolicy */ -static struct zonelist *zonelist_policy(unsigned int __nocast gfp, struct mempolicy *policy) +static struct zonelist *zonelist_policy(gfp_t gfp, struct mempolicy *policy) { int nd; @@ -751,7 +751,7 @@ static unsigned offset_il_node(struct mempolicy *pol, /* Allocate a page in interleaved policy. Own path because it needs to do special accounting. */ -static struct page *alloc_page_interleave(unsigned int __nocast gfp, unsigned order, unsigned nid) +static struct page *alloc_page_interleave(gfp_t gfp, unsigned order, unsigned nid) { struct zonelist *zl; struct page *page; @@ -789,7 +789,7 @@ static struct page *alloc_page_interleave(unsigned int __nocast gfp, unsigned or * Should be called with the mm_sem of the vma hold. */ struct page * -alloc_page_vma(unsigned int __nocast gfp, struct vm_area_struct *vma, unsigned long addr) +alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr) { struct mempolicy *pol = get_vma_policy(current, vma, addr); @@ -832,7 +832,7 @@ alloc_page_vma(unsigned int __nocast gfp, struct vm_area_struct *vma, unsigned l * 1) it's ok to take cpuset_sem (can WAIT), and * 2) allocating for current task (not interrupt). */ -struct page *alloc_pages_current(unsigned int __nocast gfp, unsigned order) +struct page *alloc_pages_current(gfp_t gfp, unsigned order) { struct mempolicy *pol = current->mempolicy; diff --git a/mm/mempool.c b/mm/mempool.c index 65f2957b8d51..9e377ea700b2 100644 --- a/mm/mempool.c +++ b/mm/mempool.c @@ -112,7 +112,7 @@ EXPORT_SYMBOL(mempool_create_node); * while this function is running. mempool_alloc() & mempool_free() * might be called (eg. from IRQ contexts) while this function executes. */ -int mempool_resize(mempool_t *pool, int new_min_nr, unsigned int __nocast gfp_mask) +int mempool_resize(mempool_t *pool, int new_min_nr, gfp_t gfp_mask) { void *element; void **new_elements; @@ -200,7 +200,7 @@ EXPORT_SYMBOL(mempool_destroy); * *never* fails when called from process contexts. (it might * fail if called from an IRQ context.) */ -void * mempool_alloc(mempool_t *pool, unsigned int __nocast gfp_mask) +void * mempool_alloc(mempool_t *pool, gfp_t gfp_mask) { void *element; unsigned long flags; @@ -276,7 +276,7 @@ EXPORT_SYMBOL(mempool_free); /* * A commonly used alloc and free fn. */ -void *mempool_alloc_slab(unsigned int __nocast gfp_mask, void *pool_data) +void *mempool_alloc_slab(gfp_t gfp_mask, void *pool_data) { kmem_cache_t *mem = (kmem_cache_t *) pool_data; return kmem_cache_alloc(mem, gfp_mask); diff --git a/mm/nommu.c b/mm/nommu.c index 064d70442895..0ef241ae3763 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -157,8 +157,7 @@ void vfree(void *addr) kfree(addr); } -void *__vmalloc(unsigned long size, unsigned int __nocast gfp_mask, - pgprot_t prot) +void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot) { /* * kmalloc doesn't like __GFP_HIGHMEM for some reason diff --git a/mm/oom_kill.c b/mm/oom_kill.c index ac3bf33e5370..d348b9035955 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -263,7 +263,7 @@ static struct mm_struct *oom_kill_process(struct task_struct *p) * OR try to be smart about which process to kill. Note that we * don't have to be perfect here, we just have to be good. */ -void out_of_memory(unsigned int __nocast gfp_mask, int order) +void out_of_memory(gfp_t gfp_mask, int order) { struct mm_struct *mm = NULL; task_t * p; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index ae2903339e71..cc1fe2672a31 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -671,7 +671,7 @@ void fastcall free_cold_page(struct page *page) free_hot_cold_page(page, 1); } -static inline void prep_zero_page(struct page *page, int order, unsigned int __nocast gfp_flags) +static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags) { int i; @@ -686,7 +686,7 @@ static inline void prep_zero_page(struct page *page, int order, unsigned int __n * or two. */ static struct page * -buffered_rmqueue(struct zone *zone, int order, unsigned int __nocast gfp_flags) +buffered_rmqueue(struct zone *zone, int order, gfp_t gfp_flags) { unsigned long flags; struct page *page = NULL; @@ -761,7 +761,7 @@ int zone_watermark_ok(struct zone *z, int order, unsigned long mark, } static inline int -should_reclaim_zone(struct zone *z, unsigned int gfp_mask) +should_reclaim_zone(struct zone *z, gfp_t gfp_mask) { if (!z->reclaim_pages) return 0; @@ -774,7 +774,7 @@ should_reclaim_zone(struct zone *z, unsigned int gfp_mask) * This is the 'heart' of the zoned buddy allocator. */ struct page * fastcall -__alloc_pages(unsigned int __nocast gfp_mask, unsigned int order, +__alloc_pages(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist) { const int wait = gfp_mask & __GFP_WAIT; @@ -977,7 +977,7 @@ EXPORT_SYMBOL(__alloc_pages); /* * Common helper functions. */ -fastcall unsigned long __get_free_pages(unsigned int __nocast gfp_mask, unsigned int order) +fastcall unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order) { struct page * page; page = alloc_pages(gfp_mask, order); @@ -988,7 +988,7 @@ fastcall unsigned long __get_free_pages(unsigned int __nocast gfp_mask, unsigned EXPORT_SYMBOL(__get_free_pages); -fastcall unsigned long get_zeroed_page(unsigned int __nocast gfp_mask) +fastcall unsigned long get_zeroed_page(gfp_t gfp_mask) { struct page * page; diff --git a/mm/page_io.c b/mm/page_io.c index 2e605a19ce57..330e00d6db00 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -19,7 +19,7 @@ #include #include -static struct bio *get_swap_bio(unsigned int __nocast gfp_flags, pgoff_t index, +static struct bio *get_swap_bio(gfp_t gfp_flags, pgoff_t index, struct page *page, bio_end_io_t end_io) { struct bio *bio; diff --git a/mm/shmem.c b/mm/shmem.c index 1f7aeb210c7b..ea064d89cda9 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -921,8 +921,7 @@ shmem_swapin(struct shmem_inode_info *info,swp_entry_t entry,unsigned long idx) } static inline struct page * -shmem_alloc_page(unsigned int __nocast gfp,struct shmem_inode_info *info, - unsigned long idx) +shmem_alloc_page(gfp_t gfp,struct shmem_inode_info *info, unsigned long idx) { return alloc_page(gfp | __GFP_ZERO); } diff --git a/mm/slab.c b/mm/slab.c index 5cbbdfa6dd0e..d05c678bceb3 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -650,8 +650,7 @@ static inline struct array_cache *ac_data(kmem_cache_t *cachep) return cachep->array[smp_processor_id()]; } -static inline kmem_cache_t *__find_general_cachep(size_t size, - unsigned int __nocast gfpflags) +static inline kmem_cache_t *__find_general_cachep(size_t size, gfp_t gfpflags) { struct cache_sizes *csizep = malloc_sizes; @@ -675,8 +674,7 @@ static inline kmem_cache_t *__find_general_cachep(size_t size, return csizep->cs_cachep; } -kmem_cache_t *kmem_find_general_cachep(size_t size, - unsigned int __nocast gfpflags) +kmem_cache_t *kmem_find_general_cachep(size_t size, gfp_t gfpflags) { return __find_general_cachep(size, gfpflags); } @@ -1185,7 +1183,7 @@ __initcall(cpucache_init); * did not request dmaable memory, we might get it, but that * would be relatively rare and ignorable. */ -static void *kmem_getpages(kmem_cache_t *cachep, unsigned int __nocast flags, int nodeid) +static void *kmem_getpages(kmem_cache_t *cachep, gfp_t flags, int nodeid) { struct page *page; void *addr; @@ -2048,7 +2046,7 @@ EXPORT_SYMBOL(kmem_cache_destroy); /* Get the memory for a slab management obj. */ static struct slab* alloc_slabmgmt(kmem_cache_t *cachep, void *objp, - int colour_off, unsigned int __nocast local_flags) + int colour_off, gfp_t local_flags) { struct slab *slabp; @@ -2149,7 +2147,7 @@ static void set_slab_attr(kmem_cache_t *cachep, struct slab *slabp, void *objp) * Grow (by 1) the number of slabs within a cache. This is called by * kmem_cache_alloc() when there are no active objs left in a cache. */ -static int cache_grow(kmem_cache_t *cachep, unsigned int __nocast flags, int nodeid) +static int cache_grow(kmem_cache_t *cachep, gfp_t flags, int nodeid) { struct slab *slabp; void *objp; @@ -2356,7 +2354,7 @@ bad: #define check_slabp(x,y) do { } while(0) #endif -static void *cache_alloc_refill(kmem_cache_t *cachep, unsigned int __nocast flags) +static void *cache_alloc_refill(kmem_cache_t *cachep, gfp_t flags) { int batchcount; struct kmem_list3 *l3; @@ -2456,7 +2454,7 @@ alloc_done: } static inline void -cache_alloc_debugcheck_before(kmem_cache_t *cachep, unsigned int __nocast flags) +cache_alloc_debugcheck_before(kmem_cache_t *cachep, gfp_t flags) { might_sleep_if(flags & __GFP_WAIT); #if DEBUG @@ -2467,7 +2465,7 @@ cache_alloc_debugcheck_before(kmem_cache_t *cachep, unsigned int __nocast flags) #if DEBUG static void * cache_alloc_debugcheck_after(kmem_cache_t *cachep, - unsigned int __nocast flags, void *objp, void *caller) + gfp_t flags, void *objp, void *caller) { if (!objp) return objp; @@ -2510,7 +2508,7 @@ cache_alloc_debugcheck_after(kmem_cache_t *cachep, #define cache_alloc_debugcheck_after(a,b,objp,d) (objp) #endif -static inline void *____cache_alloc(kmem_cache_t *cachep, unsigned int __nocast flags) +static inline void *____cache_alloc(kmem_cache_t *cachep, gfp_t flags) { void* objp; struct array_cache *ac; @@ -2528,7 +2526,7 @@ static inline void *____cache_alloc(kmem_cache_t *cachep, unsigned int __nocast return objp; } -static inline void *__cache_alloc(kmem_cache_t *cachep, unsigned int __nocast flags) +static inline void *__cache_alloc(kmem_cache_t *cachep, gfp_t flags) { unsigned long save_flags; void* objp; @@ -2787,7 +2785,7 @@ static inline void __cache_free(kmem_cache_t *cachep, void *objp) * Allocate an object from this cache. The flags are only relevant * if the cache has no available objects. */ -void *kmem_cache_alloc(kmem_cache_t *cachep, unsigned int __nocast flags) +void *kmem_cache_alloc(kmem_cache_t *cachep, gfp_t flags) { return __cache_alloc(cachep, flags); } @@ -2848,7 +2846,7 @@ out: * New and improved: it will now make sure that the object gets * put on the correct node list so that there is no false sharing. */ -void *kmem_cache_alloc_node(kmem_cache_t *cachep, unsigned int __nocast flags, int nodeid) +void *kmem_cache_alloc_node(kmem_cache_t *cachep, gfp_t flags, int nodeid) { unsigned long save_flags; void *ptr; @@ -2875,7 +2873,7 @@ void *kmem_cache_alloc_node(kmem_cache_t *cachep, unsigned int __nocast flags, i } EXPORT_SYMBOL(kmem_cache_alloc_node); -void *kmalloc_node(size_t size, unsigned int __nocast flags, int node) +void *kmalloc_node(size_t size, gfp_t flags, int node) { kmem_cache_t *cachep; @@ -2908,7 +2906,7 @@ EXPORT_SYMBOL(kmalloc_node); * platforms. For example, on i386, it means that the memory must come * from the first 16MB. */ -void *__kmalloc(size_t size, unsigned int __nocast flags) +void *__kmalloc(size_t size, gfp_t flags) { kmem_cache_t *cachep; @@ -2997,7 +2995,7 @@ EXPORT_SYMBOL(kmem_cache_free); * @size: how many bytes of memory are required. * @flags: the type of memory to allocate. */ -void *kzalloc(size_t size, unsigned int __nocast flags) +void *kzalloc(size_t size, gfp_t flags) { void *ret = kmalloc(size, flags); if (ret) @@ -3603,7 +3601,7 @@ unsigned int ksize(const void *objp) * @s: the string to duplicate * @gfp: the GFP mask used in the kmalloc() call when allocating memory */ -char *kstrdup(const char *s, unsigned int __nocast gfp) +char *kstrdup(const char *s, gfp_t gfp) { size_t len; char *buf; diff --git a/mm/swap_state.c b/mm/swap_state.c index adbc2b426c2f..132164f7d0a7 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -68,7 +68,7 @@ void show_swap_cache_info(void) * but sets SwapCache flag and private instead of mapping and index. */ static int __add_to_swap_cache(struct page *page, swp_entry_t entry, - unsigned int __nocast gfp_mask) + gfp_t gfp_mask) { int error; diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 13c3d82968ae..1150229b6366 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -395,7 +395,7 @@ void *vmap(struct page **pages, unsigned int count, EXPORT_SYMBOL(vmap); -void *__vmalloc_area(struct vm_struct *area, unsigned int __nocast gfp_mask, pgprot_t prot) +void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask, pgprot_t prot) { struct page **pages; unsigned int nr_pages, array_size, i; @@ -446,7 +446,7 @@ fail: * allocator with @gfp_mask flags. Map them into contiguous * kernel virtual space, using a pagetable protection of @prot. */ -void *__vmalloc(unsigned long size, unsigned int __nocast gfp_mask, pgprot_t prot) +void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot) { struct vm_struct *area; diff --git a/net/atm/atm_misc.c b/net/atm/atm_misc.c index 71abc99ec815..223c7ad5bd0f 100644 --- a/net/atm/atm_misc.c +++ b/net/atm/atm_misc.c @@ -25,7 +25,7 @@ int atm_charge(struct atm_vcc *vcc,int truesize) struct sk_buff *atm_alloc_charge(struct atm_vcc *vcc,int pdu_size, - unsigned int __nocast gfp_flags) + gfp_t gfp_flags) { struct sock *sk = sk_atm(vcc); int guess = atm_guess_pdu2truesize(pdu_size); diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index d3d6bc547212..59b2dd36baa7 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -372,7 +372,7 @@ static struct proto l2cap_proto = { .obj_size = sizeof(struct l2cap_pinfo) }; -static struct sock *l2cap_sock_alloc(struct socket *sock, int proto, unsigned int __nocast prio) +static struct sock *l2cap_sock_alloc(struct socket *sock, int proto, gfp_t prio) { struct sock *sk; diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 173f46e8cdae..35adce6482b6 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -229,7 +229,7 @@ static void rfcomm_dlc_clear_state(struct rfcomm_dlc *d) d->rx_credits = RFCOMM_DEFAULT_CREDITS; } -struct rfcomm_dlc *rfcomm_dlc_alloc(unsigned int __nocast prio) +struct rfcomm_dlc *rfcomm_dlc_alloc(gfp_t prio) { struct rfcomm_dlc *d = kmalloc(sizeof(*d), prio); if (!d) diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index f49e7e938bfb..a2b30f0aedb7 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -284,7 +284,7 @@ static struct proto rfcomm_proto = { .obj_size = sizeof(struct rfcomm_pinfo) }; -static struct sock *rfcomm_sock_alloc(struct socket *sock, int proto, unsigned int __nocast prio) +static struct sock *rfcomm_sock_alloc(struct socket *sock, int proto, gfp_t prio) { struct rfcomm_dlc *d; struct sock *sk; diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index 1bca860a6109..158a9c46d863 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -286,7 +286,7 @@ static inline void rfcomm_set_owner_w(struct sk_buff *skb, struct rfcomm_dev *de skb->destructor = rfcomm_wfree; } -static struct sk_buff *rfcomm_wmalloc(struct rfcomm_dev *dev, unsigned long size, unsigned int __nocast priority) +static struct sk_buff *rfcomm_wmalloc(struct rfcomm_dev *dev, unsigned long size, gfp_t priority) { if (atomic_read(&dev->wmem_alloc) < rfcomm_room(dev->dlc)) { struct sk_buff *skb = alloc_skb(size, priority); diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index ce7ab7dfa0b2..997e42df115c 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -418,7 +418,7 @@ static struct proto sco_proto = { .obj_size = sizeof(struct sco_pinfo) }; -static struct sock *sco_sock_alloc(struct socket *sock, int proto, unsigned int __nocast prio) +static struct sock *sco_sock_alloc(struct socket *sock, int proto, gfp_t prio) { struct sock *sk; diff --git a/net/core/dev.c b/net/core/dev.c index 9066c874e273..a44eeef24edf 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1132,7 +1132,7 @@ static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb) #endif /* Keep head the same: replace data */ -int __skb_linearize(struct sk_buff *skb, unsigned int __nocast gfp_mask) +int __skb_linearize(struct sk_buff *skb, gfp_t gfp_mask) { unsigned int size; u8 *data; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 0e9431b59fb2..af9b1516e21f 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -130,7 +130,7 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here) * Buffers may only be allocated from interrupts using a @gfp_mask of * %GFP_ATOMIC. */ -struct sk_buff *__alloc_skb(unsigned int size, unsigned int __nocast gfp_mask, +struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, int fclone) { struct sk_buff *skb; @@ -198,7 +198,7 @@ nodata: */ struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp, unsigned int size, - unsigned int __nocast gfp_mask) + gfp_t gfp_mask) { struct sk_buff *skb; u8 *data; @@ -361,7 +361,7 @@ void __kfree_skb(struct sk_buff *skb) * %GFP_ATOMIC. */ -struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask) +struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) { struct sk_buff *n; @@ -500,7 +500,7 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) * header is going to be modified. Use pskb_copy() instead. */ -struct sk_buff *skb_copy(const struct sk_buff *skb, unsigned int __nocast gfp_mask) +struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask) { int headerlen = skb->data - skb->head; /* @@ -539,7 +539,7 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, unsigned int __nocast gfp_ma * The returned buffer has a reference count of 1. */ -struct sk_buff *pskb_copy(struct sk_buff *skb, unsigned int __nocast gfp_mask) +struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask) { /* * Allocate the copy buffer @@ -598,7 +598,7 @@ out: */ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, - unsigned int __nocast gfp_mask) + gfp_t gfp_mask) { int i; u8 *data; @@ -689,7 +689,7 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom) */ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, int newheadroom, int newtailroom, - unsigned int __nocast gfp_mask) + gfp_t gfp_mask) { /* * Allocate the copy buffer diff --git a/net/core/sock.c b/net/core/sock.c index 928d2a1d6d8e..1c52fe809eda 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -637,7 +637,7 @@ lenout: * @prot: struct proto associated with this new sock instance * @zero_it: if we should zero the newly allocated sock */ -struct sock *sk_alloc(int family, unsigned int __nocast priority, +struct sock *sk_alloc(int family, gfp_t priority, struct proto *prot, int zero_it) { struct sock *sk = NULL; @@ -704,7 +704,7 @@ void sk_free(struct sock *sk) module_put(owner); } -struct sock *sk_clone(const struct sock *sk, const unsigned int __nocast priority) +struct sock *sk_clone(const struct sock *sk, const gfp_t priority) { struct sock *newsk = sk_alloc(sk->sk_family, priority, sk->sk_prot, 0); @@ -845,7 +845,7 @@ unsigned long sock_i_ino(struct sock *sk) * Allocate a skb from the socket's send buffer. */ struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, - unsigned int __nocast priority) + gfp_t priority) { if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) { struct sk_buff * skb = alloc_skb(size, priority); @@ -861,7 +861,7 @@ struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, * Allocate a skb from the socket's receive buffer. */ struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, - unsigned int __nocast priority) + gfp_t priority) { if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) { struct sk_buff *skb = alloc_skb(size, priority); @@ -876,7 +876,7 @@ struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, /* * Allocate a memory block from the socket's option memory buffer. */ -void *sock_kmalloc(struct sock *sk, int size, unsigned int __nocast priority) +void *sock_kmalloc(struct sock *sk, int size, gfp_t priority) { if ((unsigned)size <= sysctl_optmem_max && atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) { diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c index 6530283eafca..c9a62cca22fc 100644 --- a/net/dccp/ackvec.c +++ b/net/dccp/ackvec.c @@ -91,7 +91,7 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb) } struct dccp_ackvec *dccp_ackvec_alloc(const unsigned int len, - const unsigned int __nocast priority) + const gfp_t priority) { struct dccp_ackvec *av = kmalloc(sizeof(*av) + len, priority); diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h index 8ca51c9191f7..d0fd6c60c574 100644 --- a/net/dccp/ackvec.h +++ b/net/dccp/ackvec.h @@ -74,7 +74,7 @@ struct sk_buff; #ifdef CONFIG_IP_DCCP_ACKVEC extern struct dccp_ackvec *dccp_ackvec_alloc(unsigned int len, - const unsigned int __nocast priority); + const gfp_t priority); extern void dccp_ackvec_free(struct dccp_ackvec *av); extern int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk, @@ -93,7 +93,7 @@ static inline int dccp_ackvec_pending(const struct dccp_ackvec *av) } #else /* CONFIG_IP_DCCP_ACKVEC */ static inline struct dccp_ackvec *dccp_ackvec_alloc(unsigned int len, - const unsigned int __nocast priority) + const gfp_t priority) { return NULL; } diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h index 13ad47ba1420..417d9d82df3e 100644 --- a/net/dccp/ccids/lib/loss_interval.h +++ b/net/dccp/ccids/lib/loss_interval.h @@ -36,7 +36,7 @@ struct dccp_li_hist_entry { static inline struct dccp_li_hist_entry * dccp_li_hist_entry_new(struct dccp_li_hist *hist, - const unsigned int __nocast prio) + const gfp_t prio) { return kmem_cache_alloc(hist->dccplih_slab, prio); } diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h index b375ebdb7dcf..122e96737ff6 100644 --- a/net/dccp/ccids/lib/packet_history.h +++ b/net/dccp/ccids/lib/packet_history.h @@ -86,7 +86,7 @@ extern struct dccp_rx_hist_entry * static inline struct dccp_tx_hist_entry * dccp_tx_hist_entry_new(struct dccp_tx_hist *hist, - const unsigned int __nocast prio) + const gfp_t prio) { struct dccp_tx_hist_entry *entry = kmem_cache_alloc(hist->dccptxh_slab, prio); @@ -137,7 +137,7 @@ static inline struct dccp_rx_hist_entry * const struct sock *sk, const u32 ndp, const struct sk_buff *skb, - const unsigned int __nocast prio) + const gfp_t prio) { struct dccp_rx_hist_entry *entry = kmem_cache_alloc(hist->dccprxh_slab, prio); diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 34d4128d56d5..1186dc44cdff 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -452,8 +452,7 @@ static struct proto dn_proto = { .obj_size = sizeof(struct dn_sock), }; -static struct sock *dn_alloc_sock(struct socket *sock, - unsigned int __nocast gfp) +static struct sock *dn_alloc_sock(struct socket *sock, gfp_t gfp) { struct dn_scp *scp; struct sock *sk = sk_alloc(PF_DECnet, gfp, &dn_proto, 1); @@ -805,8 +804,7 @@ static int dn_auto_bind(struct socket *sock) return rv; } -static int dn_confirm_accept(struct sock *sk, long *timeo, - unsigned int __nocast allocation) +static int dn_confirm_accept(struct sock *sk, long *timeo, gfp_t allocation) { struct dn_scp *scp = DN_SK(sk); DEFINE_WAIT(wait); diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c index cd08244aa10c..c96c767b1f74 100644 --- a/net/decnet/dn_nsp_out.c +++ b/net/decnet/dn_nsp_out.c @@ -117,8 +117,7 @@ try_again: * The eventual aim is for each socket to have a cached header size * for its outgoing packets, and to set hdr from this when sk != NULL. */ -struct sk_buff *dn_alloc_skb(struct sock *sk, int size, - unsigned int __nocast pri) +struct sk_buff *dn_alloc_skb(struct sock *sk, int size, gfp_t pri) { struct sk_buff *skb; int hdr = 64; @@ -212,7 +211,7 @@ static void dn_nsp_rtt(struct sock *sk, long rtt) * Returns: The number of times the packet has been sent previously */ static inline unsigned dn_nsp_clone_and_send(struct sk_buff *skb, - unsigned int __nocast gfp) + gfp_t gfp) { struct dn_skb_cb *cb = DN_SKB_CB(skb); struct sk_buff *skb2; @@ -353,7 +352,7 @@ static unsigned short *dn_nsp_mk_data_header(struct sock *sk, struct sk_buff *sk } void dn_nsp_queue_xmit(struct sock *sk, struct sk_buff *skb, - unsigned int __nocast gfp, int oth) + gfp_t gfp, int oth) { struct dn_scp *scp = DN_SK(sk); struct dn_skb_cb *cb = DN_SKB_CB(skb); @@ -520,7 +519,7 @@ static int dn_nsp_retrans_conn_conf(struct sock *sk) return 0; } -void dn_send_conn_conf(struct sock *sk, unsigned int __nocast gfp) +void dn_send_conn_conf(struct sock *sk, gfp_t gfp) { struct dn_scp *scp = DN_SK(sk); struct sk_buff *skb = NULL; @@ -552,7 +551,7 @@ void dn_send_conn_conf(struct sock *sk, unsigned int __nocast gfp) static __inline__ void dn_nsp_do_disc(struct sock *sk, unsigned char msgflg, - unsigned short reason, unsigned int __nocast gfp, + unsigned short reason, gfp_t gfp, struct dst_entry *dst, int ddl, unsigned char *dd, __u16 rem, __u16 loc) { @@ -595,7 +594,7 @@ static __inline__ void dn_nsp_do_disc(struct sock *sk, unsigned char msgflg, void dn_nsp_send_disc(struct sock *sk, unsigned char msgflg, - unsigned short reason, unsigned int __nocast gfp) + unsigned short reason, gfp_t gfp) { struct dn_scp *scp = DN_SK(sk); int ddl = 0; @@ -616,7 +615,7 @@ void dn_nsp_return_disc(struct sk_buff *skb, unsigned char msgflg, { struct dn_skb_cb *cb = DN_SKB_CB(skb); int ddl = 0; - unsigned int __nocast gfp = GFP_ATOMIC; + gfp_t gfp = GFP_ATOMIC; dn_nsp_do_disc(NULL, msgflg, reason, gfp, skb->dst, ddl, NULL, cb->src_port, cb->dst_port); @@ -628,7 +627,7 @@ void dn_nsp_send_link(struct sock *sk, unsigned char lsflags, char fcval) struct dn_scp *scp = DN_SK(sk); struct sk_buff *skb; unsigned char *ptr; - unsigned int __nocast gfp = GFP_ATOMIC; + gfp_t gfp = GFP_ATOMIC; if ((skb = dn_alloc_skb(sk, DN_MAX_NSP_DATA_HEADER + 2, gfp)) == NULL) return; @@ -663,8 +662,7 @@ void dn_nsp_send_conninit(struct sock *sk, unsigned char msgflg) unsigned char menuver; struct dn_skb_cb *cb; unsigned char type = 1; - unsigned int __nocast allocation = - (msgflg == NSP_CI) ? sk->sk_allocation : GFP_ATOMIC; + gfp_t allocation = (msgflg == NSP_CI) ? sk->sk_allocation : GFP_ATOMIC; struct sk_buff *skb = dn_alloc_skb(sk, 200, allocation); if (!skb) diff --git a/net/ieee80211/ieee80211_tx.c b/net/ieee80211/ieee80211_tx.c index ecdf9f7a538f..eed07bbbe6b6 100644 --- a/net/ieee80211/ieee80211_tx.c +++ b/net/ieee80211/ieee80211_tx.c @@ -207,7 +207,7 @@ void ieee80211_txb_free(struct ieee80211_txb *txb) } static struct ieee80211_txb *ieee80211_alloc_txb(int nr_frags, int txb_size, - unsigned int __nocast gfp_mask) + gfp_t gfp_mask) { struct ieee80211_txb *txb; int i; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index fe3c6d3d0c91..94468a76c5b4 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -494,7 +494,7 @@ void inet_csk_reqsk_queue_prune(struct sock *parent, EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_prune); struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, - const unsigned int __nocast priority) + const gfp_t priority) { struct sock *newsk = sk_clone(sk, priority); diff --git a/net/ipv4/ipvs/ip_vs_app.c b/net/ipv4/ipvs/ip_vs_app.c index b942ff3c8860..fc6f95aaa969 100644 --- a/net/ipv4/ipvs/ip_vs_app.c +++ b/net/ipv4/ipvs/ip_vs_app.c @@ -604,7 +604,7 @@ static struct file_operations ip_vs_app_fops = { /* * Replace a segment of data with a new segment */ -int ip_vs_skb_replace(struct sk_buff *skb, unsigned int __nocast pri, +int ip_vs_skb_replace(struct sk_buff *skb, gfp_t pri, char *o_buf, int o_len, char *n_buf, int n_len) { struct iphdr *iph; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index c5b911f9b662..8225e4257258 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1610,7 +1610,7 @@ void tcp_send_fin(struct sock *sk) * was unread data in the receive queue. This behavior is recommended * by draft-ietf-tcpimpl-prob-03.txt section 3.10. -DaveM */ -void tcp_send_active_reset(struct sock *sk, unsigned int __nocast priority) +void tcp_send_active_reset(struct sock *sk, gfp_t priority) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; diff --git a/net/key/af_key.c b/net/key/af_key.c index bbf0f69181ba..39031684b65c 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -185,7 +185,7 @@ static int pfkey_release(struct socket *sock) } static int pfkey_broadcast_one(struct sk_buff *skb, struct sk_buff **skb2, - unsigned int __nocast allocation, struct sock *sk) + gfp_t allocation, struct sock *sk) { int err = -ENOBUFS; @@ -217,7 +217,7 @@ static int pfkey_broadcast_one(struct sk_buff *skb, struct sk_buff **skb2, #define BROADCAST_ONE 1 #define BROADCAST_REGISTERED 2 #define BROADCAST_PROMISC_ONLY 4 -static int pfkey_broadcast(struct sk_buff *skb, unsigned int __nocast allocation, +static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation, int broadcast_flags, struct sock *one_sk) { struct sock *sk; @@ -1417,7 +1417,7 @@ static int pfkey_get(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, } static struct sk_buff *compose_sadb_supported(struct sadb_msg *orig, - unsigned int __nocast allocation) + gfp_t allocation) { struct sk_buff *skb; struct sadb_msg *hdr; diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index 042b24a8ca4c..c761c15da421 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -867,8 +867,7 @@ static void llc_sk_init(struct sock* sk) * Allocates a LLC sock and initializes it. Returns the new LLC sock * or %NULL if there's no memory available for one */ -struct sock *llc_sk_alloc(int family, unsigned int __nocast priority, - struct proto *prot) +struct sock *llc_sk_alloc(int family, gfp_t priority, struct proto *prot) { struct sock *sk = sk_alloc(family, priority, prot, 1); diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 34d671974a4d..1caaca06f698 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -195,8 +195,7 @@ nfnetlink_check_attributes(struct nfnetlink_subsystem *subsys, int nfnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo) { - unsigned int __nocast allocation = - in_interrupt() ? GFP_ATOMIC : GFP_KERNEL; + gfp_t allocation = in_interrupt() ? GFP_ATOMIC : GFP_KERNEL; int err = 0; NETLINK_CB(skb).dst_group = group; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index a64e1d5ce3ca..678c3f2c0d0b 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -758,7 +758,7 @@ void netlink_detachskb(struct sock *sk, struct sk_buff *skb) } static inline struct sk_buff *netlink_trim(struct sk_buff *skb, - unsigned int __nocast allocation) + gfp_t allocation) { int delta; @@ -880,7 +880,7 @@ out: } int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid, - u32 group, unsigned int __nocast allocation) + u32 group, gfp_t allocation) { struct netlink_broadcast_data info; struct hlist_node *node; diff --git a/net/rxrpc/call.c b/net/rxrpc/call.c index 86f777052633..c4aeb7d40266 100644 --- a/net/rxrpc/call.c +++ b/net/rxrpc/call.c @@ -1923,7 +1923,7 @@ int rxrpc_call_write_data(struct rxrpc_call *call, size_t sioc, struct kvec *siov, u8 rxhdr_flags, - unsigned int __nocast alloc_flags, + gfp_t alloc_flags, int dup_data, size_t *size_sent) { diff --git a/net/rxrpc/connection.c b/net/rxrpc/connection.c index be4b2be58956..2ba14a75dbbe 100644 --- a/net/rxrpc/connection.c +++ b/net/rxrpc/connection.c @@ -522,7 +522,7 @@ int rxrpc_conn_newmsg(struct rxrpc_connection *conn, uint8_t type, int dcount, struct kvec diov[], - unsigned int __nocast alloc_flags, + gfp_t alloc_flags, struct rxrpc_message **_msg) { struct rxrpc_message *msg; diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 5b24ae0650d3..12b0f582a66b 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -71,7 +71,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a const struct sctp_endpoint *ep, const struct sock *sk, sctp_scope_t scope, - unsigned int __nocast gfp) + gfp_t gfp) { struct sctp_sock *sp; int i; @@ -273,7 +273,7 @@ fail_init: struct sctp_association *sctp_association_new(const struct sctp_endpoint *ep, const struct sock *sk, sctp_scope_t scope, - unsigned int __nocast gfp) + gfp_t gfp) { struct sctp_association *asoc; @@ -479,7 +479,7 @@ void sctp_assoc_rm_peer(struct sctp_association *asoc, /* Add a transport address to an association. */ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, const union sctp_addr *addr, - const unsigned int __nocast gfp, + const gfp_t gfp, const int peer_state) { struct sctp_transport *peer; @@ -1231,7 +1231,7 @@ void sctp_assoc_rwnd_decrease(struct sctp_association *asoc, unsigned len) * local endpoint and the remote peer. */ int sctp_assoc_set_bind_addr_from_ep(struct sctp_association *asoc, - unsigned int __nocast gfp) + gfp_t gfp) { sctp_scope_t scope; int flags; @@ -1254,7 +1254,7 @@ int sctp_assoc_set_bind_addr_from_ep(struct sctp_association *asoc, /* Build the association's bind address list from the cookie. */ int sctp_assoc_set_bind_addr_from_cookie(struct sctp_association *asoc, struct sctp_cookie *cookie, - unsigned int __nocast gfp) + gfp_t gfp) { int var_size2 = ntohs(cookie->peer_init->chunk_hdr.length); int var_size3 = cookie->raw_addr_list_len; diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c index f71549710f2e..2b962627f631 100644 --- a/net/sctp/bind_addr.c +++ b/net/sctp/bind_addr.c @@ -53,7 +53,7 @@ /* Forward declarations for internal helpers. */ static int sctp_copy_one_addr(struct sctp_bind_addr *, union sctp_addr *, - sctp_scope_t scope, unsigned int __nocast gfp, + sctp_scope_t scope, gfp_t gfp, int flags); static void sctp_bind_addr_clean(struct sctp_bind_addr *); @@ -64,7 +64,7 @@ static void sctp_bind_addr_clean(struct sctp_bind_addr *); */ int sctp_bind_addr_copy(struct sctp_bind_addr *dest, const struct sctp_bind_addr *src, - sctp_scope_t scope, unsigned int __nocast gfp, + sctp_scope_t scope, gfp_t gfp, int flags) { struct sctp_sockaddr_entry *addr; @@ -146,7 +146,7 @@ void sctp_bind_addr_free(struct sctp_bind_addr *bp) /* Add an address to the bind address list in the SCTP_bind_addr structure. */ int sctp_add_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *new, - unsigned int __nocast gfp) + gfp_t gfp) { struct sctp_sockaddr_entry *addr; @@ -200,7 +200,7 @@ int sctp_del_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *del_addr) */ union sctp_params sctp_bind_addrs_to_raw(const struct sctp_bind_addr *bp, int *addrs_len, - unsigned int __nocast gfp) + gfp_t gfp) { union sctp_params addrparms; union sctp_params retval; @@ -252,7 +252,7 @@ end_raw: * address parameters). */ int sctp_raw_to_bind_addrs(struct sctp_bind_addr *bp, __u8 *raw_addr_list, - int addrs_len, __u16 port, unsigned int __nocast gfp) + int addrs_len, __u16 port, gfp_t gfp) { union sctp_addr_param *rawaddr; struct sctp_paramhdr *param; @@ -350,7 +350,7 @@ union sctp_addr *sctp_find_unmatch_addr(struct sctp_bind_addr *bp, /* Copy out addresses from the global local address list. */ static int sctp_copy_one_addr(struct sctp_bind_addr *dest, union sctp_addr *addr, - sctp_scope_t scope, unsigned int __nocast gfp, + sctp_scope_t scope, gfp_t gfp, int flags) { int error = 0; diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index 61da2937e641..83ef411772f4 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -62,7 +62,7 @@ static void sctp_datamsg_init(struct sctp_datamsg *msg) } /* Allocate and initialize datamsg. */ -SCTP_STATIC struct sctp_datamsg *sctp_datamsg_new(unsigned int __nocast gfp) +SCTP_STATIC struct sctp_datamsg *sctp_datamsg_new(gfp_t gfp) { struct sctp_datamsg *msg; msg = kmalloc(sizeof(struct sctp_datamsg), gfp); diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index e22ccd655965..96984f7a2d69 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -68,7 +68,7 @@ static void sctp_endpoint_bh_rcv(struct sctp_endpoint *ep); */ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, struct sock *sk, - unsigned int __nocast gfp) + gfp_t gfp) { struct sctp_sock *sp = sctp_sk(sk); memset(ep, 0, sizeof(struct sctp_endpoint)); @@ -138,8 +138,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, /* Create a sctp_endpoint with all that boring stuff initialized. * Returns NULL if there isn't enough memory. */ -struct sctp_endpoint *sctp_endpoint_new(struct sock *sk, - unsigned int __nocast gfp) +struct sctp_endpoint *sctp_endpoint_new(struct sock *sk, gfp_t gfp) { struct sctp_endpoint *ep; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index f01d1c9002a1..26de4d3e1bd9 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -219,7 +219,7 @@ static void sctp_free_local_addr_list(void) /* Copy the local addresses which are valid for 'scope' into 'bp'. */ int sctp_copy_local_addr_list(struct sctp_bind_addr *bp, sctp_scope_t scope, - unsigned int __nocast gfp, int copy_flags) + gfp_t gfp, int copy_flags) { struct sctp_sockaddr_entry *addr; int error = 0; diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 3868a8d70cc0..10e82ec2ebd3 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -78,7 +78,7 @@ static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep, static int sctp_process_param(struct sctp_association *asoc, union sctp_params param, const union sctp_addr *peer_addr, - unsigned int __nocast gfp); + gfp_t gfp); /* What was the inbound interface for this chunk? */ int sctp_chunk_iif(const struct sctp_chunk *chunk) @@ -174,7 +174,7 @@ void sctp_init_cause(struct sctp_chunk *chunk, __u16 cause_code, */ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, const struct sctp_bind_addr *bp, - unsigned int __nocast gfp, int vparam_len) + gfp_t gfp, int vparam_len) { sctp_inithdr_t init; union sctp_params addrs; @@ -261,7 +261,7 @@ nodata: struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, const struct sctp_chunk *chunk, - unsigned int __nocast gfp, int unkparam_len) + gfp_t gfp, int unkparam_len) { sctp_inithdr_t initack; struct sctp_chunk *retval; @@ -1234,7 +1234,7 @@ void sctp_chunk_assign_tsn(struct sctp_chunk *chunk) /* Create a CLOSED association to use with an incoming packet. */ struct sctp_association *sctp_make_temp_asoc(const struct sctp_endpoint *ep, struct sctp_chunk *chunk, - unsigned int __nocast gfp) + gfp_t gfp) { struct sctp_association *asoc; struct sk_buff *skb; @@ -1349,7 +1349,7 @@ nodata: struct sctp_association *sctp_unpack_cookie( const struct sctp_endpoint *ep, const struct sctp_association *asoc, - struct sctp_chunk *chunk, unsigned int __nocast gfp, + struct sctp_chunk *chunk, gfp_t gfp, int *error, struct sctp_chunk **errp) { struct sctp_association *retval = NULL; @@ -1814,7 +1814,7 @@ int sctp_verify_init(const struct sctp_association *asoc, */ int sctp_process_init(struct sctp_association *asoc, sctp_cid_t cid, const union sctp_addr *peer_addr, - sctp_init_chunk_t *peer_init, unsigned int __nocast gfp) + sctp_init_chunk_t *peer_init, gfp_t gfp) { union sctp_params param; struct sctp_transport *transport; @@ -1985,7 +1985,7 @@ nomem: static int sctp_process_param(struct sctp_association *asoc, union sctp_params param, const union sctp_addr *peer_addr, - unsigned int __nocast gfp) + gfp_t gfp) { union sctp_addr addr; int i; diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 39c970b5b198..f84173ea8ec1 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -63,7 +63,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, void *event_arg, sctp_disposition_t status, sctp_cmd_seq_t *commands, - unsigned int __nocast gfp); + gfp_t gfp); static int sctp_side_effects(sctp_event_t event_type, sctp_subtype_t subtype, sctp_state_t state, struct sctp_endpoint *ep, @@ -71,7 +71,7 @@ static int sctp_side_effects(sctp_event_t event_type, sctp_subtype_t subtype, void *event_arg, sctp_disposition_t status, sctp_cmd_seq_t *commands, - unsigned int __nocast gfp); + gfp_t gfp); /******************************************************************** * Helper functions @@ -498,7 +498,7 @@ static int sctp_cmd_process_init(sctp_cmd_seq_t *commands, struct sctp_association *asoc, struct sctp_chunk *chunk, sctp_init_chunk_t *peer_init, - unsigned int __nocast gfp) + gfp_t gfp) { int error; @@ -853,7 +853,7 @@ int sctp_do_sm(sctp_event_t event_type, sctp_subtype_t subtype, struct sctp_endpoint *ep, struct sctp_association *asoc, void *event_arg, - unsigned int __nocast gfp) + gfp_t gfp) { sctp_cmd_seq_t commands; const sctp_sm_table_entry_t *state_fn; @@ -898,7 +898,7 @@ static int sctp_side_effects(sctp_event_t event_type, sctp_subtype_t subtype, void *event_arg, sctp_disposition_t status, sctp_cmd_seq_t *commands, - unsigned int __nocast gfp) + gfp_t gfp) { int error; @@ -986,7 +986,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, void *event_arg, sctp_disposition_t status, sctp_cmd_seq_t *commands, - unsigned int __nocast gfp) + gfp_t gfp) { int error = 0; int force; diff --git a/net/sctp/ssnmap.c b/net/sctp/ssnmap.c index 25037daf3fa0..cbe2513d2822 100644 --- a/net/sctp/ssnmap.c +++ b/net/sctp/ssnmap.c @@ -58,7 +58,7 @@ static inline size_t sctp_ssnmap_size(__u16 in, __u16 out) * Allocate room to store at least 'len' contiguous TSNs. */ struct sctp_ssnmap *sctp_ssnmap_new(__u16 in, __u16 out, - unsigned int __nocast gfp) + gfp_t gfp) { struct sctp_ssnmap *retval; int size; diff --git a/net/sctp/transport.c b/net/sctp/transport.c index d2f04ebe5081..6bc27200e6ca 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -57,7 +57,7 @@ /* Initialize a new transport from provided memory. */ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer, const union sctp_addr *addr, - unsigned int __nocast gfp) + gfp_t gfp) { /* Copy in the address. */ peer->ipaddr = *addr; @@ -122,7 +122,7 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer, /* Allocate and initialize a new transport. */ struct sctp_transport *sctp_transport_new(const union sctp_addr *addr, - unsigned int __nocast gfp) + gfp_t gfp) { struct sctp_transport *transport; diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index 0abd5101107c..057e7fac3af0 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -74,7 +74,7 @@ SCTP_STATIC void sctp_ulpevent_init(struct sctp_ulpevent *event, int msg_flags) /* Create a new sctp_ulpevent. */ SCTP_STATIC struct sctp_ulpevent *sctp_ulpevent_new(int size, int msg_flags, - unsigned int __nocast gfp) + gfp_t gfp) { struct sctp_ulpevent *event; struct sk_buff *skb; @@ -136,7 +136,7 @@ static inline void sctp_ulpevent_release_owner(struct sctp_ulpevent *event) struct sctp_ulpevent *sctp_ulpevent_make_assoc_change( const struct sctp_association *asoc, __u16 flags, __u16 state, __u16 error, __u16 outbound, - __u16 inbound, unsigned int __nocast gfp) + __u16 inbound, gfp_t gfp) { struct sctp_ulpevent *event; struct sctp_assoc_change *sac; @@ -237,7 +237,7 @@ fail: struct sctp_ulpevent *sctp_ulpevent_make_peer_addr_change( const struct sctp_association *asoc, const struct sockaddr_storage *aaddr, - int flags, int state, int error, unsigned int __nocast gfp) + int flags, int state, int error, gfp_t gfp) { struct sctp_ulpevent *event; struct sctp_paddr_change *spc; @@ -350,7 +350,7 @@ fail: */ struct sctp_ulpevent *sctp_ulpevent_make_remote_error( const struct sctp_association *asoc, struct sctp_chunk *chunk, - __u16 flags, unsigned int __nocast gfp) + __u16 flags, gfp_t gfp) { struct sctp_ulpevent *event; struct sctp_remote_error *sre; @@ -448,7 +448,7 @@ fail: */ struct sctp_ulpevent *sctp_ulpevent_make_send_failed( const struct sctp_association *asoc, struct sctp_chunk *chunk, - __u16 flags, __u32 error, unsigned int __nocast gfp) + __u16 flags, __u32 error, gfp_t gfp) { struct sctp_ulpevent *event; struct sctp_send_failed *ssf; @@ -557,7 +557,7 @@ fail: */ struct sctp_ulpevent *sctp_ulpevent_make_shutdown_event( const struct sctp_association *asoc, - __u16 flags, unsigned int __nocast gfp) + __u16 flags, gfp_t gfp) { struct sctp_ulpevent *event; struct sctp_shutdown_event *sse; @@ -620,7 +620,7 @@ fail: * 5.3.1.6 SCTP_ADAPTION_INDICATION */ struct sctp_ulpevent *sctp_ulpevent_make_adaption_indication( - const struct sctp_association *asoc, unsigned int __nocast gfp) + const struct sctp_association *asoc, gfp_t gfp) { struct sctp_ulpevent *event; struct sctp_adaption_event *sai; @@ -657,7 +657,7 @@ fail: */ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc, struct sctp_chunk *chunk, - unsigned int __nocast gfp) + gfp_t gfp) { struct sctp_ulpevent *event = NULL; struct sk_buff *skb; @@ -719,7 +719,7 @@ fail: */ struct sctp_ulpevent *sctp_ulpevent_make_pdapi( const struct sctp_association *asoc, __u32 indication, - unsigned int __nocast gfp) + gfp_t gfp) { struct sctp_ulpevent *event; struct sctp_pdapi_event *pd; diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index ec2c857eae7f..2080b2d28c98 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -100,7 +100,7 @@ void sctp_ulpq_free(struct sctp_ulpq *ulpq) /* Process an incoming DATA chunk. */ int sctp_ulpq_tail_data(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk, - unsigned int __nocast gfp) + gfp_t gfp) { struct sk_buff_head temp; sctp_data_chunk_t *hdr; @@ -792,7 +792,7 @@ static __u16 sctp_ulpq_renege_frags(struct sctp_ulpq *ulpq, __u16 needed) /* Partial deliver the first message as there is pressure on rwnd. */ void sctp_ulpq_partial_delivery(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk, - unsigned int __nocast gfp) + gfp_t gfp) { struct sctp_ulpevent *event; struct sctp_association *asoc; @@ -816,7 +816,7 @@ void sctp_ulpq_partial_delivery(struct sctp_ulpq *ulpq, /* Renege some packets to make room for an incoming chunk. */ void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk, - unsigned int __nocast gfp) + gfp_t gfp) { struct sctp_association *asoc; __u16 needed, freed; @@ -855,7 +855,7 @@ void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk, /* Notify the application if an association is aborted and in * partial delivery mode. Send up any pending received messages. */ -void sctp_ulpq_abort_pd(struct sctp_ulpq *ulpq, unsigned int __nocast gfp) +void sctp_ulpq_abort_pd(struct sctp_ulpq *ulpq, gfp_t gfp) { struct sctp_ulpevent *ev = NULL; struct sock *sk; diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index ade730eaf401..54e60a657500 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -719,7 +719,7 @@ static void rpc_async_schedule(void *arg) void * rpc_malloc(struct rpc_task *task, size_t size) { - unsigned int __nocast gfp; + gfp_t gfp; if (task->tk_flags & RPC_TASK_SWAPPER) gfp = GFP_ATOMIC; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 061b44cc2451..cbb0ba34a600 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -225,7 +225,7 @@ expired: * SPD calls. */ -struct xfrm_policy *xfrm_policy_alloc(unsigned int __nocast gfp) +struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp) { struct xfrm_policy *policy; diff --git a/sound/core/memalloc.c b/sound/core/memalloc.c index 91124ddbdda9..e72cec77f0db 100644 --- a/sound/core/memalloc.c +++ b/sound/core/memalloc.c @@ -106,7 +106,7 @@ struct snd_mem_list { static void *snd_dma_hack_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, - unsigned int __nocast flags) + gfp_t flags) { void *ret; u64 dma_mask, coherent_dma_mask; diff --git a/sound/core/memory.c b/sound/core/memory.c index 8fa888fc53a0..7d8e2eebba51 100644 --- a/sound/core/memory.c +++ b/sound/core/memory.c @@ -89,7 +89,7 @@ void snd_memory_done(void) } } -static void *__snd_kmalloc(size_t size, unsigned int __nocast flags, void *caller) +static void *__snd_kmalloc(size_t size, gfp_t flags, void *caller) { unsigned long cpu_flags; struct snd_alloc_track *t; @@ -111,12 +111,12 @@ static void *__snd_kmalloc(size_t size, unsigned int __nocast flags, void *calle } #define _snd_kmalloc(size, flags) __snd_kmalloc((size), (flags), __builtin_return_address(0)); -void *snd_hidden_kmalloc(size_t size, unsigned int __nocast flags) +void *snd_hidden_kmalloc(size_t size, gfp_t flags) { return _snd_kmalloc(size, flags); } -void *snd_hidden_kzalloc(size_t size, unsigned int __nocast flags) +void *snd_hidden_kzalloc(size_t size, gfp_t flags) { void *ret = _snd_kmalloc(size, flags); if (ret) @@ -125,7 +125,7 @@ void *snd_hidden_kzalloc(size_t size, unsigned int __nocast flags) } EXPORT_SYMBOL(snd_hidden_kzalloc); -void *snd_hidden_kcalloc(size_t n, size_t size, unsigned int __nocast flags) +void *snd_hidden_kcalloc(size_t n, size_t size, gfp_t flags) { void *ret = NULL; if (n != 0 && size > INT_MAX / n) @@ -190,7 +190,7 @@ void snd_hidden_vfree(void *obj) snd_wrapper_vfree(obj); } -char *snd_hidden_kstrdup(const char *s, unsigned int __nocast flags) +char *snd_hidden_kstrdup(const char *s, gfp_t flags) { int len; char *buf; diff --git a/sound/core/seq/instr/ainstr_iw.c b/sound/core/seq/instr/ainstr_iw.c index b3cee092b1a4..67c24c8e8e7b 100644 --- a/sound/core/seq/instr/ainstr_iw.c +++ b/sound/core/seq/instr/ainstr_iw.c @@ -58,7 +58,7 @@ static int snd_seq_iwffff_copy_env_from_stream(__u32 req_stype, iwffff_xenv_t *ex, char __user **data, long *len, - unsigned int __nocast gfp_mask) + gfp_t gfp_mask) { __u32 stype; iwffff_env_record_t *rp, *rp_last; diff --git a/sound/core/wrappers.c b/sound/core/wrappers.c index 508e6d67ee19..296b716f1376 100644 --- a/sound/core/wrappers.c +++ b/sound/core/wrappers.c @@ -27,7 +27,7 @@ #include #ifdef CONFIG_SND_DEBUG_MEMORY -void *snd_wrapper_kmalloc(size_t size, unsigned int __nocast flags) +void *snd_wrapper_kmalloc(size_t size, gfp_t flags) { return kmalloc(size, flags); } -- cgit v1.2.3 From 3dd083255ddcfa87751fa8e32f61a9547a15a541 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 9 Oct 2005 21:19:40 +0200 Subject: [PATCH] x86_64: Set up safe page tables during resume The following patch makes swsusp avoid the possible temporary corruption of page translation tables during resume on x86-64. This is achieved by creating a copy of the relevant page tables that will not be modified by swsusp and can be safely used by it on resume. The problem is that during resume on x86-64 swsusp may temporarily corrupt the page tables used for the direct mapping of RAM. If that happens, a page fault occurs and cannot be handled properly, which leads to the solid hang of the affected system. This leads to the loss of the system's state from before suspend and may result in the loss of data or the corruption of filesystems, so it is a serious issue. Also, it appears to happen quite often (for me, as often as 50% of the time). The problem is related to the fact that (at least) one of the PMD entries used in the direct memory mapping (starting at PAGE_OFFSET) points to a page table the physical address of which is much greater than the physical address of the PMD entry itself. Moreover, unfortunately, the physical address of the page table before suspend (i.e. the one stored in the suspend image) happens to be different to the physical address of the corresponding page table used during resume (i.e. the one that is valid right before swsusp_arch_resume() in arch/x86_64/kernel/suspend_asm.S is executed). Thus while the image is restored, the "offending" PMD entry gets overwritten, so it does not point to the right physical address any more (i.e. there's no page table at the address pointed to by it, because it points to the address the page table has been at during suspend). Consequently, if the PMD entry is used later on, and it _is_ used in the process of copying the image pages, a page fault occurs, but it cannot be handled in the normal way and the system hangs. In principle we can call create_resume_mapping() from swsusp_arch_resume() (ie. from suspend_asm.S), but then the memory allocations in create_resume_mapping(), resume_pud_mapping(), and resume_pmd_mapping() must be made carefully so that we use _only_ NosaveFree pages in them (the other pages are overwritten by the loop in swsusp_arch_resume()). Additionally, we are in atomic context at that time, so we cannot use GFP_KERNEL. Moreover, if one of the allocations fails, we should free all of the allocated pages, so we need to trace them somehow. All of this is done in the appended patch, except that the functions populating the page tables are located in arch/x86_64/kernel/suspend.c rather than in init.c. It may be done in a more elegan way in the future, with the help of some swsusp patches that are in the works now. [AK: move some externs into headers, renamed a function] Signed-off-by: Rafael J. Wysocki Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/suspend.c | 127 +++++++++++++++++++++++++++++++++++++++ arch/x86_64/kernel/suspend_asm.S | 17 ++++-- include/linux/suspend.h | 2 + kernel/power/swsusp.c | 7 ++- 4 files changed, 144 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/arch/x86_64/kernel/suspend.c b/arch/x86_64/kernel/suspend.c index ebb9abf3ce6d..f066c6ab3618 100644 --- a/arch/x86_64/kernel/suspend.c +++ b/arch/x86_64/kernel/suspend.c @@ -11,6 +11,8 @@ #include #include #include +#include +#include struct saved_context saved_context; @@ -140,4 +142,129 @@ void fix_processor_context(void) } +#ifdef CONFIG_SOFTWARE_SUSPEND +/* Defined in arch/x86_64/kernel/suspend_asm.S */ +extern int restore_image(void); +pgd_t *temp_level4_pgt; + +static void **pages; + +static inline void *__add_page(void) +{ + void **c; + + c = (void **)get_usable_page(GFP_ATOMIC); + if (c) { + *c = pages; + pages = c; + } + return c; +} + +static inline void *__next_page(void) +{ + void **c; + + c = pages; + if (c) { + pages = *c; + *c = NULL; + } + return c; +} + +/* + * Try to allocate as many usable pages as needed and daisy chain them. + * If one allocation fails, free the pages allocated so far + */ +static int alloc_usable_pages(unsigned long n) +{ + void *p; + + pages = NULL; + do + if (!__add_page()) + break; + while (--n); + if (n) { + p = __next_page(); + while (p) { + free_page((unsigned long)p); + p = __next_page(); + } + return -ENOMEM; + } + return 0; +} + +static void res_phys_pud_init(pud_t *pud, unsigned long address, unsigned long end) +{ + long i, j; + + i = pud_index(address); + pud = pud + i; + for (; i < PTRS_PER_PUD; pud++, i++) { + unsigned long paddr; + pmd_t *pmd; + + paddr = address + i*PUD_SIZE; + if (paddr >= end) + break; + + pmd = (pmd_t *)__next_page(); + set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE)); + for (j = 0; j < PTRS_PER_PMD; pmd++, j++, paddr += PMD_SIZE) { + unsigned long pe; + + if (paddr >= end) + break; + pe = _PAGE_NX | _PAGE_PSE | _KERNPG_TABLE | paddr; + pe &= __supported_pte_mask; + set_pmd(pmd, __pmd(pe)); + } + } +} + +static void set_up_temporary_mappings(void) +{ + unsigned long start, end, next; + + temp_level4_pgt = (pgd_t *)__next_page(); + + /* It is safe to reuse the original kernel mapping */ + set_pgd(temp_level4_pgt + pgd_index(__START_KERNEL_map), + init_level4_pgt[pgd_index(__START_KERNEL_map)]); + + /* Set up the direct mapping from scratch */ + start = (unsigned long)pfn_to_kaddr(0); + end = (unsigned long)pfn_to_kaddr(end_pfn); + + for (; start < end; start = next) { + pud_t *pud = (pud_t *)__next_page(); + next = start + PGDIR_SIZE; + if (next > end) + next = end; + res_phys_pud_init(pud, __pa(start), __pa(next)); + set_pgd(temp_level4_pgt + pgd_index(start), + mk_kernel_pgd(__pa(pud))); + } +} + +int swsusp_arch_resume(void) +{ + unsigned long n; + + n = ((end_pfn << PAGE_SHIFT) + PUD_SIZE - 1) >> PUD_SHIFT; + n += (n + PTRS_PER_PUD - 1) / PTRS_PER_PUD + 1; + pr_debug("swsusp_arch_resume(): pages needed = %lu\n", n); + if (alloc_usable_pages(n)) { + free_eaten_memory(); + return -ENOMEM; + } + /* We have got enough memory and from now on we cannot recover */ + set_up_temporary_mappings(); + restore_image(); + return 0; +} +#endif /* CONFIG_SOFTWARE_SUSPEND */ diff --git a/arch/x86_64/kernel/suspend_asm.S b/arch/x86_64/kernel/suspend_asm.S index 4d659e97df10..320b6fb00cca 100644 --- a/arch/x86_64/kernel/suspend_asm.S +++ b/arch/x86_64/kernel/suspend_asm.S @@ -39,12 +39,13 @@ ENTRY(swsusp_arch_suspend) call swsusp_save ret -ENTRY(swsusp_arch_resume) - /* set up cr3 */ - leaq init_level4_pgt(%rip),%rax - subq $__START_KERNEL_map,%rax - movq %rax,%cr3 - +ENTRY(restore_image) + /* switch to temporary page tables */ + movq $__PAGE_OFFSET, %rdx + movq temp_level4_pgt(%rip), %rax + subq %rdx, %rax + movq %rax, %cr3 + /* Flush TLB */ movq mmu_cr4_features(%rip), %rax movq %rax, %rdx andq $~(1<<7), %rdx # PGE @@ -69,6 +70,10 @@ loop: movq pbe_next(%rdx), %rdx jmp loop done: + /* go back to the original page tables */ + leaq init_level4_pgt(%rip), %rax + subq $__START_KERNEL_map, %rax + movq %rax, %cr3 /* Flush TLB, including "global" things (vmalloc) */ movq mmu_cr4_features(%rip), %rax movq %rax, %rdx diff --git a/include/linux/suspend.h b/include/linux/suspend.h index f2e96fdfaae0..ad15a54806d8 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -71,5 +71,7 @@ void restore_processor_state(void); struct saved_context; void __save_processor_state(struct saved_context *ctxt); void __restore_processor_state(struct saved_context *ctxt); +extern unsigned long get_usable_page(unsigned gfp_mask); +extern void free_eaten_memory(void); #endif /* _LINUX_SWSUSP_H */ diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index acf79ac1cb6d..2d5c45676442 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c @@ -1095,7 +1095,7 @@ static inline void eat_page(void *page) *eaten_memory = c; } -static unsigned long get_usable_page(unsigned gfp_mask) +unsigned long get_usable_page(unsigned gfp_mask) { unsigned long m; @@ -1109,7 +1109,7 @@ static unsigned long get_usable_page(unsigned gfp_mask) return m; } -static void free_eaten_memory(void) +void free_eaten_memory(void) { unsigned long m; void **c; @@ -1481,11 +1481,12 @@ static int read_suspend_image(void) /* Allocate memory for the image and read the data from swap */ error = check_pagedir(pagedir_nosave); - free_eaten_memory(); + if (!error) error = data_read(pagedir_nosave); if (error) { /* We fail cleanly */ + free_eaten_memory(); for_each_pbe (p, pagedir_nosave) if (p->address) { free_page(p->address); -- cgit v1.2.3 From 46113830a18847cff8da73005e57bc49c2f95a56 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Mon, 10 Oct 2005 19:44:29 +0200 Subject: [PATCH] Fix signal sending in usbdevio on async URB completion If a process issues an URB from userspace and (starts to) terminate before the URB comes back, we run into the issue described above. This is because the urb saves a pointer to "current" when it is posted to the device, but there's no guarantee that this pointer is still valid afterwards. In fact, there are three separate issues: 1) the pointer to "current" can become invalid, since the task could be completely gone when the URB completion comes back from the device. 2) Even if the saved task pointer is still pointing to a valid task_struct, task_struct->sighand could have gone meanwhile. 3) Even if the process is perfectly fine, permissions may have changed, and we can no longer send it a signal. So what we do instead, is to save the PID and uid's of the process, and introduce a new kill_proc_info_as_uid() function. Signed-off-by: Harald Welte [ Fixed up types and added symbol exports ] Signed-off-by: Linus Torvalds --- drivers/usb/core/devio.c | 12 +++++++++--- include/linux/sched.h | 1 + kernel/signal.c | 34 ++++++++++++++++++++++++++++++++++ 3 files changed, 44 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index b4265aa7d45e..6c35dcbea664 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -30,6 +30,8 @@ * Revision history * 22.12.1999 0.1 Initial release (split from proc_usb.c) * 04.01.2000 0.2 Turned into its own filesystem + * 30.09.2005 0.3 Fix user-triggerable oops in async URB delivery + * (CAN-2005-3055) */ /*****************************************************************************/ @@ -58,7 +60,8 @@ static struct class *usb_device_class; struct async { struct list_head asynclist; struct dev_state *ps; - struct task_struct *task; + pid_t pid; + uid_t uid, euid; unsigned int signr; unsigned int ifnum; void __user *userbuffer; @@ -290,7 +293,8 @@ static void async_completed(struct urb *urb, struct pt_regs *regs) sinfo.si_errno = as->urb->status; sinfo.si_code = SI_ASYNCIO; sinfo.si_addr = as->userurb; - send_sig_info(as->signr, &sinfo, as->task); + kill_proc_info_as_uid(as->signr, &sinfo, as->pid, as->uid, + as->euid); } wake_up(&ps->wait); } @@ -988,7 +992,9 @@ static int proc_do_submiturb(struct dev_state *ps, struct usbdevfs_urb *uurb, as->userbuffer = NULL; as->signr = uurb->signr; as->ifnum = ifnum; - as->task = current; + as->pid = current->pid; + as->uid = current->uid; + as->euid = current->euid; if (!(uurb->endpoint & USB_DIR_IN)) { if (copy_from_user(as->urb->transfer_buffer, uurb->buffer, as->urb->transfer_buffer_length)) { free_async(as); diff --git a/include/linux/sched.h b/include/linux/sched.h index c3ba31f210a9..27519df0f987 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1018,6 +1018,7 @@ extern int force_sig_info(int, struct siginfo *, struct task_struct *); extern int __kill_pg_info(int sig, struct siginfo *info, pid_t pgrp); extern int kill_pg_info(int, struct siginfo *, pid_t); extern int kill_proc_info(int, struct siginfo *, pid_t); +extern int kill_proc_info_as_uid(int, struct siginfo *, pid_t, uid_t, uid_t); extern void do_notify_parent(struct task_struct *, int); extern void force_sig(int, struct task_struct *); extern void force_sig_specific(int, struct task_struct *); diff --git a/kernel/signal.c b/kernel/signal.c index cba193ceda0d..50c992643771 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1193,6 +1193,40 @@ kill_proc_info(int sig, struct siginfo *info, pid_t pid) return error; } +/* like kill_proc_info(), but doesn't use uid/euid of "current" */ +int kill_proc_info_as_uid(int sig, struct siginfo *info, pid_t pid, + uid_t uid, uid_t euid) +{ + int ret = -EINVAL; + struct task_struct *p; + + if (!valid_signal(sig)) + return ret; + + read_lock(&tasklist_lock); + p = find_task_by_pid(pid); + if (!p) { + ret = -ESRCH; + goto out_unlock; + } + if ((!info || ((unsigned long)info != 1 && + (unsigned long)info != 2 && SI_FROMUSER(info))) + && (euid != p->suid) && (euid != p->uid) + && (uid != p->suid) && (uid != p->uid)) { + ret = -EPERM; + goto out_unlock; + } + if (sig && p->sighand) { + unsigned long flags; + spin_lock_irqsave(&p->sighand->siglock, flags); + ret = __group_send_sig_info(sig, info, p); + spin_unlock_irqrestore(&p->sighand->siglock, flags); + } +out_unlock: + read_unlock(&tasklist_lock); + return ret; +} +EXPORT_SYMBOL_GPL(kill_proc_info_as_uid); /* * kill_something_info() interprets pid in interesting ways just like kill(2). -- cgit v1.2.3 From ebe0bbf06c9e03613bdcb6b5a704595a9344b7ff Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Mon, 10 Oct 2005 20:52:19 -0700 Subject: [NETFILTER] nfnetlink: use highest bit of nfa_type to indicate nested TLV As Henrik Nordstrom pointed out, all our efforts with "split endian" (i.e. host byte order tags, net byte order values) are useless, unless a parser can determine whether an attribute is nested or not. This patch steals the highest bit of nfattr.nfa_type to indicate whether the data payload contains a nested nfattr (1) or not (0). This will break userspace compatibility, but luckily no kernel with nfnetlink was released so far. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter/nfnetlink.h | 12 ++++++++---- net/netfilter/nfnetlink.c | 4 ++-- 2 files changed, 10 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index 1d5b10ae2399..f08e870100f4 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -41,11 +41,15 @@ enum nfnetlink_groups { struct nfattr { u_int16_t nfa_len; - u_int16_t nfa_type; + u_int16_t nfa_type; /* we use 15 bits for the type, and the highest + * bit to indicate whether the payload is nested */ } __attribute__ ((packed)); -/* FIXME: Shamelessly copy and pasted from rtnetlink.h, it's time - * to put this in a generic file */ +/* FIXME: Apart from NFNL_NFA_NESTED shamelessly copy and pasted from + * rtnetlink.h, it's time to put this in a generic file */ + +#define NFNL_NFA_NEST 0x8000 +#define NFA_TYPE(attr) ((attr)->nfa_type & 0x7fff) #define NFA_ALIGNTO 4 #define NFA_ALIGN(len) (((len) + NFA_ALIGNTO - 1) & ~(NFA_ALIGNTO - 1)) @@ -59,7 +63,7 @@ struct nfattr #define NFA_PAYLOAD(nfa) ((int)((nfa)->nfa_len) - NFA_LENGTH(0)) #define NFA_NEST(skb, type) \ ({ struct nfattr *__start = (struct nfattr *) (skb)->tail; \ - NFA_PUT(skb, type, 0, NULL); \ + NFA_PUT(skb, (NFNL_NFA_NEST | type), 0, NULL); \ __start; }) #define NFA_NEST_END(skb, start) \ ({ (start)->nfa_len = ((skb)->tail - (unsigned char *) (start)); \ diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 1caaca06f698..4bc27a6334c1 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -133,7 +133,7 @@ int nfattr_parse(struct nfattr *tb[], int maxattr, struct nfattr *nfa, int len) memset(tb, 0, sizeof(struct nfattr *) * maxattr); while (NFA_OK(nfa, len)) { - unsigned flavor = nfa->nfa_type; + unsigned flavor = NFA_TYPE(nfa); if (flavor && flavor <= maxattr) tb[flavor-1] = nfa; nfa = NFA_NEXT(nfa, len); @@ -177,7 +177,7 @@ nfnetlink_check_attributes(struct nfnetlink_subsystem *subsys, int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len); while (NFA_OK(attr, attrlen)) { - unsigned flavor = attr->nfa_type; + unsigned flavor = NFA_TYPE(attr); if (flavor) { if (flavor > attr_count) return -EINVAL; -- cgit v1.2.3 From b3a91d037a2575040f9b6a483f60c407a3d80368 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Mon, 10 Oct 2005 20:52:36 -0700 Subject: [NETFILTER] nat: remove bogus structure member When 'rustynat' was merged in 2.6.12, the use of the "helper" pointer of struct ipt_nat_info was obsoleted, but the pointer not removed from the struct. This patch removes the pointer, thereby yet again shrinking struct ip_conntrack. Discovered-by: Rusty Russell Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_nat.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv4/ip_nat.h b/include/linux/netfilter_ipv4/ip_nat.h index e201ec6e9905..41a107de17cf 100644 --- a/include/linux/netfilter_ipv4/ip_nat.h +++ b/include/linux/netfilter_ipv4/ip_nat.h @@ -58,10 +58,6 @@ extern rwlock_t ip_nat_lock; struct ip_nat_info { struct list_head bysource; - - /* Helper (NULL if none). */ - struct ip_nat_helper *helper; - struct ip_nat_seq seq[IP_CT_DIR_MAX]; }; -- cgit v1.2.3 From 5bbc243aafff9ad653dc7a9fa7bcaf0b4631355a Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Mon, 10 Oct 2005 20:54:01 -0700 Subject: [NETFILTER]: Add missing include to ip_conntrack_tuple.h Without this #include, __be16 is not defined and userspace programs will break. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_conntrack_tuple.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv4/ip_conntrack_tuple.h b/include/linux/netfilter_ipv4/ip_conntrack_tuple.h index 20e43f018b7c..3232db11a4e5 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_tuple.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_tuple.h @@ -1,6 +1,8 @@ #ifndef _IP_CONNTRACK_TUPLE_H #define _IP_CONNTRACK_TUPLE_H +#include + /* A `tuple' is a structure containing the information to uniquely identify a connection. ie. if two packets have the same tuple, they are in the same connection; if not, they are not. -- cgit v1.2.3 From e1c73b78e3706bd3c336d4730a01dd4081dfb7ee Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 10 Oct 2005 20:55:49 -0700 Subject: [NETFILTER] ctnetlink: add one nesting level for TCP state To keep consistency, the TCP private protocol information is nested attributes under CTA_PROTOINFO_TCP. This way the sequence of attributes to access the TCP state information looks like here below: CTA_PROTOINFO CTA_PROTOINFO_TCP CTA_PROTOINFO_TCP_STATE instead of: CTA_PROTOINFO CTA_PROTOINFO_TCP_STATE Signed-off-by: Pablo Neira Ayuso Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter/nfnetlink_conntrack.h | 9 ++++++++- net/ipv4/netfilter/ip_conntrack_proto_tcp.c | 4 ++++ 2 files changed, 12 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h index 5c55751c78e4..fb5511030185 100644 --- a/include/linux/netfilter/nfnetlink_conntrack.h +++ b/include/linux/netfilter/nfnetlink_conntrack.h @@ -70,11 +70,18 @@ enum ctattr_l4proto { enum ctattr_protoinfo { CTA_PROTOINFO_UNSPEC, - CTA_PROTOINFO_TCP_STATE, + CTA_PROTOINFO_TCP, __CTA_PROTOINFO_MAX }; #define CTA_PROTOINFO_MAX (__CTA_PROTOINFO_MAX - 1) +enum ctattr_protoinfo_tcp { + CTA_PROTOINFO_TCP_UNSPEC, + CTA_PROTOINFO_TCP_STATE, + __CTA_PROTOINFO_TCP_MAX +}; +#define CTA_PROTOINFO_TCP_MAX (__CTA_PROTOINFO_TCP_MAX - 1) + enum ctattr_counters { CTA_COUNTERS_UNSPEC, CTA_COUNTERS_PACKETS, diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c index 121760d6cc50..75e27e65c28f 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c @@ -341,11 +341,15 @@ static int tcp_print_conntrack(struct seq_file *s, static int tcp_to_nfattr(struct sk_buff *skb, struct nfattr *nfa, const struct ip_conntrack *ct) { + struct nfattr *nest_parms = NFA_NEST(skb, CTA_PROTOINFO_TCP); + read_lock_bh(&tcp_lock); NFA_PUT(skb, CTA_PROTOINFO_TCP_STATE, sizeof(u_int8_t), &ct->proto.tcp.state); read_unlock_bh(&tcp_lock); + NFA_NEST_END(skb, nest_parms); + return 0; nfattr_failure: -- cgit v1.2.3 From a051a8f7306476af0a74370ad56e793cb6c43bf7 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Mon, 10 Oct 2005 21:21:10 -0700 Subject: [NETFILTER]: Use only 32bit counters for CONNTRACK_ACCT Initially we used 64bit counters for conntrack-based accounting, since we had no event mechanism to tell userspace that our counters are about to overflow. With nfnetlink_conntrack, we now have such a event mechanism and thus can save 16bytes per connection. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter/nfnetlink_conntrack.h | 6 ++++-- include/linux/netfilter_ipv4/ip_conntrack.h | 8 ++++++-- net/ipv4/netfilter/ip_conntrack_core.c | 13 ++++++++----- net/ipv4/netfilter/ip_conntrack_netlink.c | 8 ++++---- 4 files changed, 22 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h index fb5511030185..116fcaced909 100644 --- a/include/linux/netfilter/nfnetlink_conntrack.h +++ b/include/linux/netfilter/nfnetlink_conntrack.h @@ -84,8 +84,10 @@ enum ctattr_protoinfo_tcp { enum ctattr_counters { CTA_COUNTERS_UNSPEC, - CTA_COUNTERS_PACKETS, - CTA_COUNTERS_BYTES, + CTA_COUNTERS_PACKETS, /* old 64bit counters */ + CTA_COUNTERS_BYTES, /* old 64bit counters */ + CTA_COUNTERS32_PACKETS, + CTA_COUNTERS32_BYTES, __CTA_COUNTERS_MAX }; #define CTA_COUNTERS_MAX (__CTA_COUNTERS_MAX - 1) diff --git a/include/linux/netfilter_ipv4/ip_conntrack.h b/include/linux/netfilter_ipv4/ip_conntrack.h index 4ced38736813..d078bb91d9e5 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack.h +++ b/include/linux/netfilter_ipv4/ip_conntrack.h @@ -117,6 +117,10 @@ enum ip_conntrack_events /* NAT info */ IPCT_NATINFO_BIT = 10, IPCT_NATINFO = (1 << IPCT_NATINFO_BIT), + + /* Counter highest bit has been set */ + IPCT_COUNTER_FILLING_BIT = 11, + IPCT_COUNTER_FILLING = (1 << IPCT_COUNTER_FILLING_BIT), }; enum ip_conntrack_expect_events { @@ -192,8 +196,8 @@ do { \ struct ip_conntrack_counter { - u_int64_t packets; - u_int64_t bytes; + u_int32_t packets; + u_int32_t bytes; }; struct ip_conntrack_helper; diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index ea65dd3e517a..07a80b56e8dc 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c @@ -1119,7 +1119,7 @@ void __ip_ct_refresh_acct(struct ip_conntrack *ct, unsigned long extra_jiffies, int do_acct) { - int do_event = 0; + int event = 0; IP_NF_ASSERT(ct->timeout.data == (unsigned long)ct); IP_NF_ASSERT(skb); @@ -1129,13 +1129,13 @@ void __ip_ct_refresh_acct(struct ip_conntrack *ct, /* If not in hash table, timer will not be active yet */ if (!is_confirmed(ct)) { ct->timeout.expires = extra_jiffies; - do_event = 1; + event = IPCT_REFRESH; } else { /* Need del_timer for race avoidance (may already be dying). */ if (del_timer(&ct->timeout)) { ct->timeout.expires = jiffies + extra_jiffies; add_timer(&ct->timeout); - do_event = 1; + event = IPCT_REFRESH; } } @@ -1144,14 +1144,17 @@ void __ip_ct_refresh_acct(struct ip_conntrack *ct, ct->counters[CTINFO2DIR(ctinfo)].packets++; ct->counters[CTINFO2DIR(ctinfo)].bytes += ntohs(skb->nh.iph->tot_len); + if ((ct->counters[CTINFO2DIR(ctinfo)].packets & 0x80000000) + || (ct->counters[CTINFO2DIR(ctinfo)].bytes & 0x80000000)) + event |= IPCT_COUNTER_FILLING; } #endif write_unlock_bh(&ip_conntrack_lock); /* must be unlocked when calling event cache */ - if (do_event) - ip_conntrack_event_cache(IPCT_REFRESH, skb); + if (event) + ip_conntrack_event_cache(event, skb); } #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c index eade2749915a..06ed91ee8ace 100644 --- a/net/ipv4/netfilter/ip_conntrack_netlink.c +++ b/net/ipv4/netfilter/ip_conntrack_netlink.c @@ -177,11 +177,11 @@ ctnetlink_dump_counters(struct sk_buff *skb, const struct ip_conntrack *ct, struct nfattr *nest_count = NFA_NEST(skb, type); u_int64_t tmp; - tmp = cpu_to_be64(ct->counters[dir].packets); - NFA_PUT(skb, CTA_COUNTERS_PACKETS, sizeof(u_int64_t), &tmp); + tmp = htonl(ct->counters[dir].packets); + NFA_PUT(skb, CTA_COUNTERS32_PACKETS, sizeof(u_int32_t), &tmp); - tmp = cpu_to_be64(ct->counters[dir].bytes); - NFA_PUT(skb, CTA_COUNTERS_BYTES, sizeof(u_int64_t), &tmp); + tmp = htonl(ct->counters[dir].bytes); + NFA_PUT(skb, CTA_COUNTERS32_BYTES, sizeof(u_int32_t), &tmp); NFA_NEST_END(skb, nest_count); -- cgit v1.2.3 From 339231537506846cb232a2f0cc4a2c662b2d5b07 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 10 Oct 2005 21:23:28 -0700 Subject: [NETFILTER] ctnetlink: allow userspace to change TCP state This patch adds the ability of changing the state a TCP connection. I know that this must be used with care but it's required to provide a complete conntrack creation via conntrack_netlink. So I'll document this aspect on the upcoming docs. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- .../linux/netfilter_ipv4/ip_conntrack_protocol.h | 3 +++ net/ipv4/netfilter/ip_conntrack_proto_tcp.c | 23 ++++++++++++++++++++++ 2 files changed, 26 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv4/ip_conntrack_protocol.h b/include/linux/netfilter_ipv4/ip_conntrack_protocol.h index b6b99be8632a..2c76b879e3dc 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_protocol.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_protocol.h @@ -52,6 +52,9 @@ struct ip_conntrack_protocol int (*to_nfattr)(struct sk_buff *skb, struct nfattr *nfa, const struct ip_conntrack *ct); + /* convert nfnetlink attributes to protoinfo */ + int (*from_nfattr)(struct nfattr *tb[], struct ip_conntrack *ct); + int (*tuple_to_nfattr)(struct sk_buff *skb, const struct ip_conntrack_tuple *t); int (*nfattr_to_tuple)(struct nfattr *tb[], diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c index 75e27e65c28f..d6701cafbcc2 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c @@ -356,6 +356,28 @@ nfattr_failure: read_unlock_bh(&tcp_lock); return -1; } + +static int nfattr_to_tcp(struct nfattr *cda[], struct ip_conntrack *ct) +{ + struct nfattr *attr = cda[CTA_PROTOINFO_TCP-1]; + struct nfattr *tb[CTA_PROTOINFO_TCP_MAX]; + + if (nfattr_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, attr) < 0) + goto nfattr_failure; + + if (!tb[CTA_PROTOINFO_TCP_STATE-1]) + return -EINVAL; + + write_lock_bh(&tcp_lock); + ct->proto.tcp.state = + *(u_int8_t *)NFA_DATA(tb[CTA_PROTOINFO_TCP_STATE-1]); + write_unlock_bh(&tcp_lock); + + return 0; + +nfattr_failure: + return -1; +} #endif static unsigned int get_conntrack_index(const struct tcphdr *tcph) @@ -1127,6 +1149,7 @@ struct ip_conntrack_protocol ip_conntrack_protocol_tcp = #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) .to_nfattr = tcp_to_nfattr, + .from_nfattr = nfattr_to_tcp, .tuple_to_nfattr = ip_ct_port_tuple_to_nfattr, .nfattr_to_tuple = ip_ct_port_nfattr_to_tuple, #endif -- cgit v1.2.3 From afb997c6163b33292d31a09d6aa5cbb03ffa5bf1 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Wed, 12 Oct 2005 15:12:21 -0700 Subject: [NETPOLL]: wrong return for null netpoll_poll_lock() When netpoll is not being used, the macro that defines the removed routing netpoll_poll_lock defines the return as zero, but the real routine returns a `void *` Signed-off-by: Ben Dooks Signed-off-by: David S. Miller --- include/linux/netpoll.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index 5ade54a78dbb..ca5a8733000f 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -86,7 +86,7 @@ static inline void netpoll_poll_unlock(void *have) #else #define netpoll_rx(a) 0 -#define netpoll_poll_lock(a) 0 +#define netpoll_poll_lock(a) NULL #define netpoll_poll_unlock(a) #endif -- cgit v1.2.3 From c8923c6b852d3a97c1faad0566e38fca330375a7 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 13 Oct 2005 14:41:23 -0700 Subject: [NETFILTER]: Fix OOPSes on machines with discontiguous cpu numbering. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Original patch by Harald Welte, with feedback from Herbert Xu and testing by Sébastien Bernard. EBTABLES, ARP tables, and IP/IP6 tables all assume that cpus are numbered linearly. That is not necessarily true. This patch fixes that up by calculating the largest possible cpu number, and allocating enough per-cpu structure space given that. Signed-off-by: David S. Miller --- arch/cris/arch-v32/kernel/smp.c | 2 ++ arch/sh/kernel/smp.c | 3 +++ include/linux/cpumask.h | 12 ++++++++++++ net/bridge/netfilter/ebtables.c | 27 +++++++++++++++++---------- net/ipv4/netfilter/arp_tables.c | 14 +++++++++----- net/ipv4/netfilter/ip_tables.c | 17 +++++++++++------ net/ipv6/netfilter/ip6_tables.c | 16 +++++++++++----- 7 files changed, 65 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/arch/cris/arch-v32/kernel/smp.c b/arch/cris/arch-v32/kernel/smp.c index 2c5cae04a95c..957f551ba5ce 100644 --- a/arch/cris/arch-v32/kernel/smp.c +++ b/arch/cris/arch-v32/kernel/smp.c @@ -15,6 +15,7 @@ #include #include #include +#include #define IPI_SCHEDULE 1 #define IPI_CALL 2 @@ -28,6 +29,7 @@ spinlock_t cris_atomic_locks[] = { [0 ... LOCK_COUNT - 1] = SPIN_LOCK_UNLOCKED}; /* CPU masks */ cpumask_t cpu_online_map = CPU_MASK_NONE; cpumask_t phys_cpu_present_map = CPU_MASK_NONE; +EXPORT_SYMBOL(phys_cpu_present_map); /* Variables used during SMP boot */ volatile int cpu_now_booting = 0; diff --git a/arch/sh/kernel/smp.c b/arch/sh/kernel/smp.c index 56a39d69e080..5ecefc02896a 100644 --- a/arch/sh/kernel/smp.c +++ b/arch/sh/kernel/smp.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -39,6 +40,8 @@ struct sh_cpuinfo cpu_data[NR_CPUS]; extern void per_cpu_trap_init(void); cpumask_t cpu_possible_map; +EXPORT_SYMBOL(cpu_possible_map); + cpumask_t cpu_online_map; static atomic_t cpus_booted = ATOMIC_INIT(0); diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index b15826f6e3a2..fe9778301d07 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -392,4 +392,16 @@ extern cpumask_t cpu_present_map; #define for_each_online_cpu(cpu) for_each_cpu_mask((cpu), cpu_online_map) #define for_each_present_cpu(cpu) for_each_cpu_mask((cpu), cpu_present_map) +/* Find the highest possible smp_processor_id() */ +static inline unsigned int highest_possible_processor_id(void) +{ + unsigned int cpu, highest = 0; + + for_each_cpu_mask(cpu, cpu_possible_map) + highest = cpu; + + return highest; +} + + #endif /* __LINUX_CPUMASK_H */ diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index c4540144f0f4..f8ffbf6e2333 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -26,6 +26,7 @@ #include #include #include +#include #include /* needed for logical [in,out]-dev filtering */ #include "../br_private.h" @@ -823,10 +824,11 @@ static int translate_table(struct ebt_replace *repl, /* this will get free'd in do_replace()/ebt_register_table() if an error occurs */ newinfo->chainstack = (struct ebt_chainstack **) - vmalloc(num_possible_cpus() * sizeof(struct ebt_chainstack)); + vmalloc((highest_possible_processor_id()+1) + * sizeof(struct ebt_chainstack)); if (!newinfo->chainstack) return -ENOMEM; - for (i = 0; i < num_possible_cpus(); i++) { + for_each_cpu(i) { newinfo->chainstack[i] = vmalloc(udc_cnt * sizeof(struct ebt_chainstack)); if (!newinfo->chainstack[i]) { @@ -895,9 +897,12 @@ static void get_counters(struct ebt_counter *oldcounters, /* counters of cpu 0 */ memcpy(counters, oldcounters, - sizeof(struct ebt_counter) * nentries); + sizeof(struct ebt_counter) * nentries); + /* add other counters to those of cpu 0 */ - for (cpu = 1; cpu < num_possible_cpus(); cpu++) { + for_each_cpu(cpu) { + if (cpu == 0) + continue; counter_base = COUNTER_BASE(oldcounters, nentries, cpu); for (i = 0; i < nentries; i++) { counters[i].pcnt += counter_base[i].pcnt; @@ -929,7 +934,8 @@ static int do_replace(void __user *user, unsigned int len) BUGPRINT("Entries_size never zero\n"); return -EINVAL; } - countersize = COUNTER_OFFSET(tmp.nentries) * num_possible_cpus(); + countersize = COUNTER_OFFSET(tmp.nentries) * + (highest_possible_processor_id()+1); newinfo = (struct ebt_table_info *) vmalloc(sizeof(struct ebt_table_info) + countersize); if (!newinfo) @@ -1022,7 +1028,7 @@ static int do_replace(void __user *user, unsigned int len) vfree(table->entries); if (table->chainstack) { - for (i = 0; i < num_possible_cpus(); i++) + for_each_cpu(i) vfree(table->chainstack[i]); vfree(table->chainstack); } @@ -1040,7 +1046,7 @@ free_counterstmp: vfree(counterstmp); /* can be initialized in translate_table() */ if (newinfo->chainstack) { - for (i = 0; i < num_possible_cpus(); i++) + for_each_cpu(i) vfree(newinfo->chainstack[i]); vfree(newinfo->chainstack); } @@ -1132,7 +1138,8 @@ int ebt_register_table(struct ebt_table *table) return -EINVAL; } - countersize = COUNTER_OFFSET(table->table->nentries) * num_possible_cpus(); + countersize = COUNTER_OFFSET(table->table->nentries) * + (highest_possible_processor_id()+1); newinfo = (struct ebt_table_info *) vmalloc(sizeof(struct ebt_table_info) + countersize); ret = -ENOMEM; @@ -1186,7 +1193,7 @@ free_unlock: up(&ebt_mutex); free_chainstack: if (newinfo->chainstack) { - for (i = 0; i < num_possible_cpus(); i++) + for_each_cpu(i) vfree(newinfo->chainstack[i]); vfree(newinfo->chainstack); } @@ -1209,7 +1216,7 @@ void ebt_unregister_table(struct ebt_table *table) up(&ebt_mutex); vfree(table->private->entries); if (table->private->chainstack) { - for (i = 0; i < num_possible_cpus(); i++) + for_each_cpu(i) vfree(table->private->chainstack[i]); vfree(table->private->chainstack); } diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index fa1634256680..a7969286e6e7 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -716,8 +716,10 @@ static int translate_table(const char *name, } /* And one copy for every other CPU */ - for (i = 1; i < num_possible_cpus(); i++) { - memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i, + for_each_cpu(i) { + if (i == 0) + continue; + memcpy(newinfo->entries + SMP_ALIGN(newinfo->size) * i, newinfo->entries, SMP_ALIGN(newinfo->size)); } @@ -767,7 +769,7 @@ static void get_counters(const struct arpt_table_info *t, unsigned int cpu; unsigned int i; - for (cpu = 0; cpu < num_possible_cpus(); cpu++) { + for_each_cpu(cpu) { i = 0; ARPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu), t->size, @@ -885,7 +887,8 @@ static int do_replace(void __user *user, unsigned int len) return -ENOMEM; newinfo = vmalloc(sizeof(struct arpt_table_info) - + SMP_ALIGN(tmp.size) * num_possible_cpus()); + + SMP_ALIGN(tmp.size) * + (highest_possible_processor_id()+1)); if (!newinfo) return -ENOMEM; @@ -1158,7 +1161,8 @@ int arpt_register_table(struct arpt_table *table, = { 0, 0, 0, { 0 }, { 0 }, { } }; newinfo = vmalloc(sizeof(struct arpt_table_info) - + SMP_ALIGN(repl->size) * num_possible_cpus()); + + SMP_ALIGN(repl->size) * + (highest_possible_processor_id()+1)); if (!newinfo) { ret = -ENOMEM; return ret; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index eef99a1b5de6..75c27e92f6ab 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -27,6 +27,7 @@ #include #include #include +#include #include @@ -921,8 +922,10 @@ translate_table(const char *name, } /* And one copy for every other CPU */ - for (i = 1; i < num_possible_cpus(); i++) { - memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i, + for_each_cpu(i) { + if (i == 0) + continue; + memcpy(newinfo->entries + SMP_ALIGN(newinfo->size) * i, newinfo->entries, SMP_ALIGN(newinfo->size)); } @@ -943,7 +946,7 @@ replace_table(struct ipt_table *table, struct ipt_entry *table_base; unsigned int i; - for (i = 0; i < num_possible_cpus(); i++) { + for_each_cpu(i) { table_base = (void *)newinfo->entries + TABLE_OFFSET(newinfo, i); @@ -990,7 +993,7 @@ get_counters(const struct ipt_table_info *t, unsigned int cpu; unsigned int i; - for (cpu = 0; cpu < num_possible_cpus(); cpu++) { + for_each_cpu(cpu) { i = 0; IPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu), t->size, @@ -1128,7 +1131,8 @@ do_replace(void __user *user, unsigned int len) return -ENOMEM; newinfo = vmalloc(sizeof(struct ipt_table_info) - + SMP_ALIGN(tmp.size) * num_possible_cpus()); + + SMP_ALIGN(tmp.size) * + (highest_possible_processor_id()+1)); if (!newinfo) return -ENOMEM; @@ -1458,7 +1462,8 @@ int ipt_register_table(struct ipt_table *table, const struct ipt_replace *repl) = { 0, 0, 0, { 0 }, { 0 }, { } }; newinfo = vmalloc(sizeof(struct ipt_table_info) - + SMP_ALIGN(repl->size) * num_possible_cpus()); + + SMP_ALIGN(repl->size) * + (highest_possible_processor_id()+1)); if (!newinfo) return -ENOMEM; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 2da514b16d95..b03e90649eb5 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -28,6 +28,7 @@ #include #include #include +#include #include @@ -950,8 +951,10 @@ translate_table(const char *name, } /* And one copy for every other CPU */ - for (i = 1; i < num_possible_cpus(); i++) { - memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i, + for_each_cpu(i) { + if (i == 0) + continue; + memcpy(newinfo->entries + SMP_ALIGN(newinfo->size) * i, newinfo->entries, SMP_ALIGN(newinfo->size)); } @@ -973,6 +976,7 @@ replace_table(struct ip6t_table *table, unsigned int i; for (i = 0; i < num_possible_cpus(); i++) { + for_each_cpu(i) { table_base = (void *)newinfo->entries + TABLE_OFFSET(newinfo, i); @@ -1019,7 +1023,7 @@ get_counters(const struct ip6t_table_info *t, unsigned int cpu; unsigned int i; - for (cpu = 0; cpu < num_possible_cpus(); cpu++) { + for_each_cpu(cpu) { i = 0; IP6T_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu), t->size, @@ -1153,7 +1157,8 @@ do_replace(void __user *user, unsigned int len) return -ENOMEM; newinfo = vmalloc(sizeof(struct ip6t_table_info) - + SMP_ALIGN(tmp.size) * num_possible_cpus()); + + SMP_ALIGN(tmp.size) * + (highest_possible_processor_id()+1)); if (!newinfo) return -ENOMEM; @@ -1467,7 +1472,8 @@ int ip6t_register_table(struct ip6t_table *table, = { 0, 0, 0, { 0 }, { 0 }, { } }; newinfo = vmalloc(sizeof(struct ip6t_table_info) - + SMP_ALIGN(repl->size) * num_possible_cpus()); + + SMP_ALIGN(repl->size) * + (highest_possible_processor_id()+1)); if (!newinfo) return -ENOMEM; -- cgit v1.2.3 From e26148d934762b61133a64b6862f870624ff617d Mon Sep 17 00:00:00 2001 From: Tim Schmielau Date: Fri, 14 Oct 2005 15:59:05 -0700 Subject: [PATCH] Fix copy-and-paste error in BSD accounting Fix copy and paste error in jiffies_to_AHZ conversion which leads to wrong BSD accounting information on alpha and ia64 when CONFIG_BSD_PROCESS_ACCT_V3 is turned on. Also update comment to match reorganised header files. Signed-off-by: Tim Schmielau Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/acct.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/acct.h b/include/linux/acct.h index 1993a3691768..19f70462b3be 100644 --- a/include/linux/acct.h +++ b/include/linux/acct.h @@ -162,13 +162,13 @@ typedef struct acct acct_t; #ifdef __KERNEL__ /* * Yet another set of HZ to *HZ helper functions. - * See for the original. + * See for the original. */ static inline u32 jiffies_to_AHZ(unsigned long x) { #if (TICK_NSEC % (NSEC_PER_SEC / AHZ)) == 0 - return x / (HZ / USER_HZ); + return x / (HZ / AHZ); #else u64 tmp = (u64)x * TICK_NSEC; do_div(tmp, (NSEC_PER_SEC / AHZ)); -- cgit v1.2.3 From 688ce17b8599abc548b406c00e4d18ae0dec954f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 16 Oct 2005 00:17:33 -0700 Subject: [PATCH]: highest_possible_processor_id() has to be a macro ... otherwise, things like alpha and sparc64 break and break badly. They define cpu_possible_map to something else in smp.h *AFTER* having included cpumask.h. If that puppy is a macro, expansion will happen at the actual caller, when we'd already seen #define cpu_possible_map ... and we will get the right thing used. As an inline helper it will be tokenized before we get to that define and that's it; no matter what we define later, it won't affect anything. We get modules with dependency on cpu_possible_map instead of the right symbol (phys_cpu_present_map in case of sparc64), or outright link errors if they are built-in. Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/linux/cpumask.h | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index fe9778301d07..9bdba8169b41 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -393,15 +393,13 @@ extern cpumask_t cpu_present_map; #define for_each_present_cpu(cpu) for_each_cpu_mask((cpu), cpu_present_map) /* Find the highest possible smp_processor_id() */ -static inline unsigned int highest_possible_processor_id(void) -{ - unsigned int cpu, highest = 0; - - for_each_cpu_mask(cpu, cpu_possible_map) - highest = cpu; - - return highest; -} +#define highest_possible_processor_id() \ +({ \ + unsigned int cpu, highest = 0; \ + for_each_cpu_mask(cpu, cpu_possible_map) \ + highest = cpu; \ + highest; \ +}) #endif /* __LINUX_CPUMASK_H */ -- cgit v1.2.3 From b24d18aa743dad0c42918157c5d717686269d3a8 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 16 Oct 2005 20:29:20 -0700 Subject: [PATCH] list: add missing rcu_dereference on first element It seems that all the list_*_rcu primitives are missing a memory barrier on the very first dereference. For example, #define list_for_each_rcu(pos, head) \ for (pos = (head)->next; prefetch(pos->next), pos != (head); \ pos = rcu_dereference(pos->next)) It will go something like: pos = (head)->next prefetch(pos->next) pos != (head) do stuff We're missing a barrier here. pos = rcu_dereference(pos->next) fetch pos->next barrier given by rcu_dereference(pos->next) store pos Without the missing barrier, the pos->next value may turn out to be stale. In fact, if "do stuff" were also dereferencing pos and relying on list_for_each_rcu to provide the barrier then it may also break. So here is a patch to make sure that we have a barrier for the first element in the list. Signed-off-by: Herbert Xu Acked-by: "Paul E. McKenney" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/list.h | 39 ++++++++++++++++++++++----------------- 1 file changed, 22 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/list.h b/include/linux/list.h index e6ec59682274..084971f333fe 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -442,12 +442,14 @@ static inline void list_splice_init(struct list_head *list, * as long as the traversal is guarded by rcu_read_lock(). */ #define list_for_each_rcu(pos, head) \ - for (pos = (head)->next; prefetch(pos->next), pos != (head); \ - pos = rcu_dereference(pos->next)) + for (pos = (head)->next; \ + prefetch(rcu_dereference(pos)->next), pos != (head); \ + pos = pos->next) #define __list_for_each_rcu(pos, head) \ - for (pos = (head)->next; pos != (head); \ - pos = rcu_dereference(pos->next)) + for (pos = (head)->next; \ + rcu_dereference(pos) != (head); \ + pos = pos->next) /** * list_for_each_safe_rcu - iterate over an rcu-protected list safe @@ -461,8 +463,9 @@ static inline void list_splice_init(struct list_head *list, * as long as the traversal is guarded by rcu_read_lock(). */ #define list_for_each_safe_rcu(pos, n, head) \ - for (pos = (head)->next, n = pos->next; pos != (head); \ - pos = rcu_dereference(n), n = pos->next) + for (pos = (head)->next; \ + n = rcu_dereference(pos)->next, pos != (head); \ + pos = n) /** * list_for_each_entry_rcu - iterate over rcu list of given type @@ -474,11 +477,11 @@ static inline void list_splice_init(struct list_head *list, * the _rcu list-mutation primitives such as list_add_rcu() * as long as the traversal is guarded by rcu_read_lock(). */ -#define list_for_each_entry_rcu(pos, head, member) \ - for (pos = list_entry((head)->next, typeof(*pos), member); \ - prefetch(pos->member.next), &pos->member != (head); \ - pos = rcu_dereference(list_entry(pos->member.next, \ - typeof(*pos), member))) +#define list_for_each_entry_rcu(pos, head, member) \ + for (pos = list_entry((head)->next, typeof(*pos), member); \ + prefetch(rcu_dereference(pos)->member.next), \ + &pos->member != (head); \ + pos = list_entry(pos->member.next, typeof(*pos), member)) /** @@ -492,8 +495,9 @@ static inline void list_splice_init(struct list_head *list, * as long as the traversal is guarded by rcu_read_lock(). */ #define list_for_each_continue_rcu(pos, head) \ - for ((pos) = (pos)->next; prefetch((pos)->next), (pos) != (head); \ - (pos) = rcu_dereference((pos)->next)) + for ((pos) = (pos)->next; \ + prefetch(rcu_dereference((pos))->next), (pos) != (head); \ + (pos) = (pos)->next) /* * Double linked lists with a single pointer list head. @@ -696,8 +700,9 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, pos = n) #define hlist_for_each_rcu(pos, head) \ - for ((pos) = (head)->first; pos && ({ prefetch((pos)->next); 1; }); \ - (pos) = rcu_dereference((pos)->next)) + for ((pos) = (head)->first; \ + rcu_dereference((pos)) && ({ prefetch((pos)->next); 1; }); \ + (pos) = (pos)->next) /** * hlist_for_each_entry - iterate over list of given type @@ -762,9 +767,9 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, */ #define hlist_for_each_entry_rcu(tpos, pos, head, member) \ for (pos = (head)->first; \ - pos && ({ prefetch(pos->next); 1;}) && \ + rcu_dereference(pos) && ({ prefetch(pos->next); 1;}) && \ ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ - pos = rcu_dereference(pos->next)) + pos = pos->next) #else #warning "don't include kernel headers in userspace" -- cgit v1.2.3 From 5ee832dbc6770135ec8d63296af0a4374557bb79 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 17 Oct 2005 20:01:21 +0200 Subject: [PATCH] rcu: keep rcu callback event counter This makes call_rcu() keep track of how many events there are on the RCU list, and cause a reschedule event when the list gets too long. This helps keep RCU event lists down. Signed-off-by: Linus Torvalds --- include/linux/rcupdate.h | 1 + kernel/rcupdate.c | 11 +++++++++++ 2 files changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 4e65eb44adfd..70191a5a148f 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -94,6 +94,7 @@ struct rcu_data { long batch; /* Batch # for current RCU batch */ struct rcu_head *nxtlist; struct rcu_head **nxttail; + long count; /* # of queued items */ struct rcu_head *curlist; struct rcu_head **curtail; struct rcu_head *donelist; diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index dd99415b1551..2559d4b8f23f 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -109,6 +109,10 @@ void fastcall call_rcu(struct rcu_head *head, rdp = &__get_cpu_var(rcu_data); *rdp->nxttail = head; rdp->nxttail = &head->next; + + if (unlikely(++rdp->count > 10000)) + set_need_resched(); + local_irq_restore(flags); } @@ -140,6 +144,12 @@ void fastcall call_rcu_bh(struct rcu_head *head, rdp = &__get_cpu_var(rcu_bh_data); *rdp->nxttail = head; rdp->nxttail = &head->next; + rdp->count++; +/* + * Should we directly call rcu_do_batch() here ? + * if (unlikely(rdp->count > 10000)) + * rcu_do_batch(rdp); + */ local_irq_restore(flags); } @@ -157,6 +167,7 @@ static void rcu_do_batch(struct rcu_data *rdp) next = rdp->donelist = list->next; list->func(list); list = next; + rdp->count--; if (++count >= maxbatch) break; } -- cgit v1.2.3 From 4faa5285283fad081443e3612ca426a311bb6c7e Mon Sep 17 00:00:00 2001 From: Zach Brown Date: Mon, 17 Oct 2005 16:43:33 -0700 Subject: [PATCH] aio: revert lock_kiocb() lock_kiocb() was introduced to serialize retrying and cancellation. In the process of doing so it tried to sleep waiting for KIF_LOCKED while holding the ctx_lock spinlock. Recent fixes have ensured that multiple concurrent retries won't be attempted for a given iocb. Cancel has other problems and has no significant in-tree users that have been complaining about it. So for the immediate future we'll revert sleeping with the lock held and will address proper cancellation and retry serialization in the future. Signed-off-by: Zach Brown Acked-by: Benjamin LaHaise Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/aio.c | 26 +------------------------- include/linux/aio.h | 7 ++++++- 2 files changed, 7 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/fs/aio.c b/fs/aio.c index d6b1551342b7..9fe7216457d8 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -398,7 +398,7 @@ static struct kiocb fastcall *__aio_get_req(struct kioctx *ctx) if (unlikely(!req)) return NULL; - req->ki_flags = 1 << KIF_LOCKED; + req->ki_flags = 0; req->ki_users = 2; req->ki_key = 0; req->ki_ctx = ctx; @@ -547,25 +547,6 @@ struct kioctx *lookup_ioctx(unsigned long ctx_id) return ioctx; } -static int lock_kiocb_action(void *param) -{ - schedule(); - return 0; -} - -static inline void lock_kiocb(struct kiocb *iocb) -{ - wait_on_bit_lock(&iocb->ki_flags, KIF_LOCKED, lock_kiocb_action, - TASK_UNINTERRUPTIBLE); -} - -static inline void unlock_kiocb(struct kiocb *iocb) -{ - kiocbClearLocked(iocb); - smp_mb__after_clear_bit(); - wake_up_bit(&iocb->ki_flags, KIF_LOCKED); -} - /* * use_mm * Makes the calling kernel thread take on the specified @@ -796,9 +777,7 @@ static int __aio_run_iocbs(struct kioctx *ctx) * Hold an extra reference while retrying i/o. */ iocb->ki_users++; /* grab extra reference */ - lock_kiocb(iocb); aio_run_iocb(iocb); - unlock_kiocb(iocb); if (__aio_put_req(ctx, iocb)) /* drop extra ref */ put_ioctx(ctx); } @@ -1542,7 +1521,6 @@ int fastcall io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, spin_lock_irq(&ctx->ctx_lock); aio_run_iocb(req); - unlock_kiocb(req); if (!list_empty(&ctx->run_list)) { /* drain the run list */ while (__aio_run_iocbs(ctx)) @@ -1674,7 +1652,6 @@ asmlinkage long sys_io_cancel(aio_context_t ctx_id, struct iocb __user *iocb, if (NULL != cancel) { struct io_event tmp; pr_debug("calling cancel\n"); - lock_kiocb(kiocb); memset(&tmp, 0, sizeof(tmp)); tmp.obj = (u64)(unsigned long)kiocb->ki_obj.user; tmp.data = kiocb->ki_user_data; @@ -1686,7 +1663,6 @@ asmlinkage long sys_io_cancel(aio_context_t ctx_id, struct iocb __user *iocb, if (copy_to_user(result, &tmp, sizeof(tmp))) ret = -EFAULT; } - unlock_kiocb(kiocb); } else ret = -EINVAL; diff --git a/include/linux/aio.h b/include/linux/aio.h index 60def658b246..0decf66117c1 100644 --- a/include/linux/aio.h +++ b/include/linux/aio.h @@ -24,7 +24,12 @@ struct kioctx; #define KIOCB_SYNC_KEY (~0U) /* ki_flags bits */ -#define KIF_LOCKED 0 +/* + * This may be used for cancel/retry serialization in the future, but + * for now it's unused and we probably don't want modules to even + * think they can use it. + */ +/* #define KIF_LOCKED 0 */ #define KIF_KICKED 1 #define KIF_CANCELLED 2 -- cgit v1.2.3 From 8cbd6df1f0ce977ab7b61feffa59879bb5e0ed8f Mon Sep 17 00:00:00 2001 From: Albert Lee Date: Wed, 12 Oct 2005 15:06:27 +0800 Subject: [PATCH] libata CHS: calculate read/write commands and protocol on the fly (revise #6) - merge ata_prot_to_cmd() and ata_dev_set_protocol() as ata_rwcmd_protocol() - pave road for read/write multiple support - remove usage of pre-cached command and protocol values and call ata_rwcmd_protocol() instead Signed-off-by: Albert Lee ============== Signed-off-by: Jeff Garzik --- drivers/scsi/libata-core.c | 105 ++++++++++++++++----------------------------- drivers/scsi/libata-scsi.c | 13 +++--- drivers/scsi/libata.h | 1 + include/linux/ata.h | 4 ++ include/linux/libata.h | 6 +-- 5 files changed, 50 insertions(+), 79 deletions(-) (limited to 'include/linux') diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c index 55d4dee133af..19d3d717faf6 100644 --- a/drivers/scsi/libata-core.c +++ b/drivers/scsi/libata-core.c @@ -616,79 +616,53 @@ void ata_tf_from_fis(u8 *fis, struct ata_taskfile *tf) tf->hob_nsect = fis[13]; } -/** - * ata_prot_to_cmd - determine which read/write opcodes to use - * @protocol: ATA_PROT_xxx taskfile protocol - * @lba48: true is lba48 is present - * - * Given necessary input, determine which read/write commands - * to use to transfer data. - * - * LOCKING: - * None. - */ -static int ata_prot_to_cmd(int protocol, int lba48) -{ - int rcmd = 0, wcmd = 0; - - switch (protocol) { - case ATA_PROT_PIO: - if (lba48) { - rcmd = ATA_CMD_PIO_READ_EXT; - wcmd = ATA_CMD_PIO_WRITE_EXT; - } else { - rcmd = ATA_CMD_PIO_READ; - wcmd = ATA_CMD_PIO_WRITE; - } - break; - - case ATA_PROT_DMA: - if (lba48) { - rcmd = ATA_CMD_READ_EXT; - wcmd = ATA_CMD_WRITE_EXT; - } else { - rcmd = ATA_CMD_READ; - wcmd = ATA_CMD_WRITE; - } - break; - - default: - return -1; - } - - return rcmd | (wcmd << 8); -} +static const u8 ata_rw_cmds[] = { + /* pio multi */ + ATA_CMD_READ_MULTI, + ATA_CMD_WRITE_MULTI, + ATA_CMD_READ_MULTI_EXT, + ATA_CMD_WRITE_MULTI_EXT, + /* pio */ + ATA_CMD_PIO_READ, + ATA_CMD_PIO_WRITE, + ATA_CMD_PIO_READ_EXT, + ATA_CMD_PIO_WRITE_EXT, + /* dma */ + ATA_CMD_READ, + ATA_CMD_WRITE, + ATA_CMD_READ_EXT, + ATA_CMD_WRITE_EXT +}; /** - * ata_dev_set_protocol - set taskfile protocol and r/w commands - * @dev: device to examine and configure + * ata_rwcmd_protocol - set taskfile r/w commands and protocol + * @qc: command to examine and configure * - * Examine the device configuration, after we have - * read the identify-device page and configured the - * data transfer mode. Set internal state related to - * the ATA taskfile protocol (pio, pio mult, dma, etc.) - * and calculate the proper read/write commands to use. + * Examine the device configuration and tf->flags to calculate + * the proper read/write commands and protocol to use. * * LOCKING: * caller. */ -static void ata_dev_set_protocol(struct ata_device *dev) +void ata_rwcmd_protocol(struct ata_queued_cmd *qc) { - int pio = (dev->flags & ATA_DFLAG_PIO); - int lba48 = (dev->flags & ATA_DFLAG_LBA48); - int proto, cmd; + struct ata_taskfile *tf = &qc->tf; + struct ata_device *dev = qc->dev; - if (pio) - proto = dev->xfer_protocol = ATA_PROT_PIO; - else - proto = dev->xfer_protocol = ATA_PROT_DMA; + int index, lba48, write; + + lba48 = (tf->flags & ATA_TFLAG_LBA48) ? 2 : 0; + write = (tf->flags & ATA_TFLAG_WRITE) ? 1 : 0; - cmd = ata_prot_to_cmd(proto, lba48); - if (cmd < 0) - BUG(); + if (dev->flags & ATA_DFLAG_PIO) { + tf->protocol = ATA_PROT_PIO; + index = dev->multi_count ? 0 : 4; + } else { + tf->protocol = ATA_PROT_DMA; + index = 8; + } - dev->read_cmd = cmd & 0xff; - dev->write_cmd = (cmd >> 8) & 0xff; + tf->command = ata_rw_cmds[index + lba48 + write]; } static const char * xfer_mode_str[] = { @@ -1641,7 +1615,7 @@ static void ata_host_set_dma(struct ata_port *ap, u8 xfer_mode, */ static void ata_set_mode(struct ata_port *ap) { - unsigned int i, xfer_shift; + unsigned int xfer_shift; u8 xfer_mode; int rc; @@ -1670,11 +1644,6 @@ static void ata_set_mode(struct ata_port *ap) if (ap->ops->post_set_mode) ap->ops->post_set_mode(ap); - for (i = 0; i < 2; i++) { - struct ata_device *dev = &ap->device[i]; - ata_dev_set_protocol(dev); - } - return; err_out: diff --git a/drivers/scsi/libata-scsi.c b/drivers/scsi/libata-scsi.c index ea7a4d8a6fc9..90bf22204668 100644 --- a/drivers/scsi/libata-scsi.c +++ b/drivers/scsi/libata-scsi.c @@ -742,15 +742,10 @@ static unsigned int ata_scsi_rw_xlat(struct ata_queued_cmd *qc, u8 *scsicmd) u32 n_block; tf->flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; - tf->protocol = qc->dev->xfer_protocol; - if (scsicmd[0] == READ_10 || scsicmd[0] == READ_6 || - scsicmd[0] == READ_16) { - tf->command = qc->dev->read_cmd; - } else { - tf->command = qc->dev->write_cmd; + if (scsicmd[0] == WRITE_10 || scsicmd[0] == WRITE_6 || + scsicmd[0] == WRITE_16) tf->flags |= ATA_TFLAG_WRITE; - } /* Calculate the SCSI LBA and transfer length. */ switch (scsicmd[0]) { @@ -812,6 +807,8 @@ static unsigned int ata_scsi_rw_xlat(struct ata_queued_cmd *qc, u8 *scsicmd) tf->device |= (block >> 24) & 0xf; } + ata_rwcmd_protocol(qc); + qc->nsect = n_block; tf->nsect = n_block & 0xff; @@ -828,6 +825,8 @@ static unsigned int ata_scsi_rw_xlat(struct ata_queued_cmd *qc, u8 *scsicmd) if ((block >> 28) || (n_block > 256)) goto out_of_range; + ata_rwcmd_protocol(qc); + /* Convert LBA to CHS */ track = (u32)block / dev->sectors; cyl = track / dev->heads; diff --git a/drivers/scsi/libata.h b/drivers/scsi/libata.h index a18f2ac1d4a1..67d752ca8ae2 100644 --- a/drivers/scsi/libata.h +++ b/drivers/scsi/libata.h @@ -42,6 +42,7 @@ extern int atapi_enabled; extern int ata_qc_complete_noop(struct ata_queued_cmd *qc, u8 drv_stat); extern struct ata_queued_cmd *ata_qc_new_init(struct ata_port *ap, struct ata_device *dev); +extern void ata_rwcmd_protocol(struct ata_queued_cmd *qc); extern void ata_qc_free(struct ata_queued_cmd *qc); extern int ata_qc_issue(struct ata_queued_cmd *qc); extern int ata_check_atapi_dma(struct ata_queued_cmd *qc); diff --git a/include/linux/ata.h b/include/linux/ata.h index ecb7346d0c16..630908c9378b 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -128,6 +128,10 @@ enum { ATA_CMD_PIO_READ_EXT = 0x24, ATA_CMD_PIO_WRITE = 0x30, ATA_CMD_PIO_WRITE_EXT = 0x34, + ATA_CMD_READ_MULTI = 0xC4, + ATA_CMD_READ_MULTI_EXT = 0x29, + ATA_CMD_WRITE_MULTI = 0xC5, + ATA_CMD_WRITE_MULTI_EXT = 0x39, ATA_CMD_SET_FEATURES = 0xEF, ATA_CMD_PACKET = 0xA0, ATA_CMD_VERIFY = 0x40, diff --git a/include/linux/libata.h b/include/linux/libata.h index 7929cfc9318d..0261c55f3483 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -283,10 +283,8 @@ struct ata_device { u8 xfer_mode; unsigned int xfer_shift; /* ATA_SHIFT_xxx */ - /* cache info about current transfer mode */ - u8 xfer_protocol; /* taskfile xfer protocol */ - u8 read_cmd; /* opcode to use on read */ - u8 write_cmd; /* opcode to use on write */ + unsigned int multi_count; /* sectors count for + READ/WRITE MULTIPLE */ /* for CHS addressing */ u16 cylinders; /* Number of cylinders */ -- cgit v1.2.3 From 5e5ce5be6f0161d2a069a4f8a1154fe639c5c02f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Oct 2005 14:20:11 -0700 Subject: RPC: allow call_encode() to delay transmission of an RPC call. Currently, call_encode will cause the entire RPC call to abort if it returns an error. This is unnecessarily rigid, and gets in the way of attempts to allow the NFSv4 layer to order RPC calls that carry sequence ids. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 1 + net/sunrpc/clnt.c | 23 ++++++++++++----------- net/sunrpc/xprt.c | 8 ++++++++ 3 files changed, 21 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 99cad3ead81d..068e1fb0868b 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -211,6 +211,7 @@ int xprt_reserve_xprt(struct rpc_task *task); int xprt_reserve_xprt_cong(struct rpc_task *task); int xprt_prepare_transmit(struct rpc_task *task); void xprt_transmit(struct rpc_task *task); +void xprt_abort_transmit(struct rpc_task *task); int xprt_adjust_timeout(struct rpc_rqst *req); void xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task); void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index a5f7029b1daa..534274056329 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -678,13 +678,11 @@ call_allocate(struct rpc_task *task) static void call_encode(struct rpc_task *task) { - struct rpc_clnt *clnt = task->tk_client; struct rpc_rqst *req = task->tk_rqstp; struct xdr_buf *sndbuf = &req->rq_snd_buf; struct xdr_buf *rcvbuf = &req->rq_rcv_buf; unsigned int bufsiz; kxdrproc_t encode; - int status; u32 *p; dprintk("RPC: %4d call_encode (status %d)\n", @@ -712,12 +710,9 @@ call_encode(struct rpc_task *task) rpc_exit(task, -EIO); return; } - if (encode && (status = rpcauth_wrap_req(task, encode, req, p, - task->tk_msg.rpc_argp)) < 0) { - printk(KERN_WARNING "%s: can't encode arguments: %d\n", - clnt->cl_protname, -status); - rpc_exit(task, status); - } + if (encode != NULL) + task->tk_status = rpcauth_wrap_req(task, encode, req, p, + task->tk_msg.rpc_argp); } /* @@ -865,10 +860,12 @@ call_transmit(struct rpc_task *task) if (task->tk_status != 0) return; /* Encode here so that rpcsec_gss can use correct sequence number. */ - if (!task->tk_rqstp->rq_bytes_sent) + if (task->tk_rqstp->rq_bytes_sent == 0) { call_encode(task); - if (task->tk_status < 0) - return; + /* Did the encode result in an error condition? */ + if (task->tk_status != 0) + goto out_nosend; + } xprt_transmit(task); if (task->tk_status < 0) return; @@ -876,6 +873,10 @@ call_transmit(struct rpc_task *task) task->tk_action = NULL; rpc_wake_up_task(task); } + return; +out_nosend: + /* release socket write lock before attempting to handle error */ + xprt_abort_transmit(task); } /* diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 215be0d0ef6b..1ba55dc38b7a 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -709,6 +709,14 @@ out_unlock: return err; } +void +xprt_abort_transmit(struct rpc_task *task) +{ + struct rpc_xprt *xprt = task->tk_xprt; + + xprt_release_write(xprt, task); +} + /** * xprt_transmit - send an RPC request on a transport * @task: controlling RPC task -- cgit v1.2.3 From cee54fc944422c44e476736c045a9e8053cb0644 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Oct 2005 14:20:12 -0700 Subject: NFSv4: Add functions to order RPC calls NFSv4 file state-changing functions such as OPEN, CLOSE, LOCK,... are all labelled with "sequence identifiers" in order to prevent the server from reordering RPC requests, as this could cause its file state to become out of sync with the client. Currently the NFS client code enforces this ordering locally using semaphores to restrict access to structures until the RPC call is done. This, of course, only works with synchronous RPC calls, since the user process must first grab the semaphore. By dropping semaphores, and instead teaching the RPC engine to hold the RPC calls until they are ready to be sent, we can extend this process to work nicely with asynchronous RPC calls too. This patch adds a new list called "rpc_sequence" that defines the order of the RPC calls to be sent. We add one such list for each state_owner. When an RPC call is ready to be sent, it checks if it is top of the rpc_sequence list. If so, it proceeds. If not, it goes back to sleep, and loops until it hits top of the list. Once the RPC call has completed, it can then bump the sequence id counter, and remove itself from the rpc_sequence list, and then wake up the next sleeper. Note that the state_owner sequence ids and lock_owner sequence ids are all indexed to the same rpc_sequence list, so OPEN, LOCK,... requests are all ordered w.r.t. each other. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 42 ++++++++++++-- fs/nfs/nfs4proc.c | 111 +++++++++++++++++++++++------------- fs/nfs/nfs4state.c | 145 +++++++++++++++++++++++++++++++++++++++--------- fs/nfs/nfs4xdr.c | 43 +++++++++++--- include/linux/nfs_xdr.h | 15 ++--- 5 files changed, 273 insertions(+), 83 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index ec1a22d7b876..6ac6708484f5 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -92,6 +92,35 @@ struct nfs4_client { unsigned char cl_id_uniquifier; }; +/* + * struct rpc_sequence ensures that RPC calls are sent in the exact + * order that they appear on the list. + */ +struct rpc_sequence { + struct rpc_wait_queue wait; /* RPC call delay queue */ + spinlock_t lock; /* Protects the list */ + struct list_head list; /* Defines sequence of RPC calls */ +}; + +#define NFS_SEQID_CONFIRMED 1 +struct nfs_seqid_counter { + struct rpc_sequence *sequence; + int flags; + u32 counter; +}; + +struct nfs_seqid { + struct list_head list; + struct nfs_seqid_counter *sequence; + struct rpc_task *task; +}; + +static inline void nfs_confirm_seqid(struct nfs_seqid_counter *seqid, int status) +{ + if (seqid_mutating_err(-status)) + seqid->flags |= NFS_SEQID_CONFIRMED; +} + /* * NFS4 state_owners and lock_owners are simply labels for ordered * sequences of RPC calls. Their sole purpose is to provide once-only @@ -106,12 +135,13 @@ struct nfs4_state_owner { struct nfs4_client *so_client; u32 so_id; /* 32-bit identifier, unique */ struct semaphore so_sema; - u32 so_seqid; /* protected by so_sema */ atomic_t so_count; struct rpc_cred *so_cred; /* Associated cred */ struct list_head so_states; struct list_head so_delegations; + struct nfs_seqid_counter so_seqid; + struct rpc_sequence so_sequence; }; /* @@ -132,7 +162,7 @@ struct nfs4_lock_state { fl_owner_t ls_owner; /* POSIX lock owner */ #define NFS_LOCK_INITIALIZED 1 int ls_flags; - u32 ls_seqid; + struct nfs_seqid_counter ls_seqid; u32 ls_id; nfs4_stateid ls_stateid; atomic_t ls_count; @@ -224,12 +254,16 @@ extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state extern void nfs4_put_open_state(struct nfs4_state *); extern void nfs4_close_state(struct nfs4_state *, mode_t); extern struct nfs4_state *nfs4_find_state(struct inode *, struct rpc_cred *, mode_t mode); -extern void nfs4_increment_seqid(int status, struct nfs4_state_owner *sp); extern void nfs4_schedule_state_recovery(struct nfs4_client *); extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); -extern void nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *ls); extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t); +extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter); +extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task); +extern void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid); +extern void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid); +extern void nfs_free_seqid(struct nfs_seqid *seqid); + extern const nfs4_stateid zero_stateid; /* nfs4xdr.c */ diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 9701ca8c9428..9ba89e7cdd28 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -218,7 +218,6 @@ static int _nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *st struct nfs_delegation *delegation = NFS_I(inode)->delegation; struct nfs_openargs o_arg = { .fh = NFS_FH(inode), - .seqid = sp->so_seqid, .id = sp->so_id, .open_flags = state->state, .clientid = server->nfs4_state->cl_clientid, @@ -245,8 +244,13 @@ static int _nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *st } o_arg.u.delegation_type = delegation->type; } + o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid); + if (o_arg.seqid == NULL) + return -ENOMEM; status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); - nfs4_increment_seqid(status, sp); + /* Confirm the sequence as being established */ + nfs_confirm_seqid(&sp->so_seqid, status); + nfs_increment_open_seqid(status, o_arg.seqid); if (status == 0) { memcpy(&state->stateid, &o_res.stateid, sizeof(state->stateid)); if (o_res.delegation_type != 0) { @@ -256,6 +260,7 @@ static int _nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *st nfs_async_inode_return_delegation(inode, &o_res.stateid); } } + nfs_free_seqid(o_arg.seqid); clear_bit(NFS_DELEGATED_STATE, &state->flags); /* Ensure we update the inode attributes */ NFS_CACHEINV(inode); @@ -307,16 +312,20 @@ static int _nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state goto out; if (state->state == 0) goto out; - arg.seqid = sp->so_seqid; + arg.seqid = nfs_alloc_seqid(&sp->so_seqid); + status = -ENOMEM; + if (arg.seqid == NULL) + goto out; arg.open_flags = state->state; memcpy(arg.u.delegation.data, state->stateid.data, sizeof(arg.u.delegation.data)); status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); - nfs4_increment_seqid(status, sp); + nfs_increment_open_seqid(status, arg.seqid); if (status >= 0) { memcpy(state->stateid.data, res.stateid.data, sizeof(state->stateid.data)); clear_bit(NFS_DELEGATED_STATE, &state->flags); } + nfs_free_seqid(arg.seqid); out: up(&sp->so_sema); dput(parent); @@ -345,11 +354,11 @@ int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state) return err; } -static inline int _nfs4_proc_open_confirm(struct rpc_clnt *clnt, const struct nfs_fh *fh, struct nfs4_state_owner *sp, nfs4_stateid *stateid) +static inline int _nfs4_proc_open_confirm(struct rpc_clnt *clnt, const struct nfs_fh *fh, struct nfs4_state_owner *sp, nfs4_stateid *stateid, struct nfs_seqid *seqid) { struct nfs_open_confirmargs arg = { .fh = fh, - .seqid = sp->so_seqid, + .seqid = seqid, .stateid = *stateid, }; struct nfs_open_confirmres res; @@ -362,7 +371,9 @@ static inline int _nfs4_proc_open_confirm(struct rpc_clnt *clnt, const struct nf int status; status = rpc_call_sync(clnt, &msg, RPC_TASK_NOINTR); - nfs4_increment_seqid(status, sp); + /* Confirm the sequence as being established */ + nfs_confirm_seqid(&sp->so_seqid, status); + nfs_increment_open_seqid(status, seqid); if (status >= 0) memcpy(stateid, &res.stateid, sizeof(*stateid)); return status; @@ -380,21 +391,21 @@ static int _nfs4_proc_open(struct inode *dir, struct nfs4_state_owner *sp, stru int status; /* Update sequence id. The caller must serialize! */ - o_arg->seqid = sp->so_seqid; o_arg->id = sp->so_id; o_arg->clientid = sp->so_client->cl_clientid; status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); - nfs4_increment_seqid(status, sp); + nfs_increment_open_seqid(status, o_arg->seqid); if (status != 0) goto out; update_changeattr(dir, &o_res->cinfo); if(o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) { status = _nfs4_proc_open_confirm(server->client, &o_res->fh, - sp, &o_res->stateid); + sp, &o_res->stateid, o_arg->seqid); if (status != 0) goto out; } + nfs_confirm_seqid(&sp->so_seqid, 0); if (!(o_res->f_attr->valid & NFS_ATTR_FATTR)) status = server->rpc_ops->getattr(server, &o_res->fh, o_res->f_attr); out: @@ -465,6 +476,10 @@ static int _nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *st set_bit(NFS_DELEGATED_STATE, &state->flags); goto out; } + o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid); + status = -ENOMEM; + if (o_arg.seqid == NULL) + goto out; status = _nfs4_proc_open(dir, sp, &o_arg, &o_res); if (status != 0) goto out_nodeleg; @@ -490,6 +505,7 @@ static int _nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *st nfs_inode_reclaim_delegation(inode, sp->so_cred, &o_res); } out_nodeleg: + nfs_free_seqid(o_arg.seqid); clear_bit(NFS_DELEGATED_STATE, &state->flags); out: dput(parent); @@ -667,6 +683,9 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st /* Serialization for the sequence id */ down(&sp->so_sema); + o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid); + if (o_arg.seqid == NULL) + return -ENOMEM; status = _nfs4_proc_open(dir, sp, &o_arg, &o_res); if (status != 0) goto out_err; @@ -681,6 +700,7 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st update_open_stateid(state, &o_res.stateid, flags); if (o_res.delegation_type != 0) nfs_inode_set_delegation(inode, cred, &o_res); + nfs_free_seqid(o_arg.seqid); up(&sp->so_sema); nfs4_put_state_owner(sp); up_read(&clp->cl_sem); @@ -690,6 +710,7 @@ out_err: if (sp != NULL) { if (state != NULL) nfs4_put_open_state(state); + nfs_free_seqid(o_arg.seqid); up(&sp->so_sema); nfs4_put_state_owner(sp); } @@ -718,7 +739,7 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry, * It is actually a sign of a bug on the client or on the server. * * If we receive a BAD_SEQID error in the particular case of - * doing an OPEN, we assume that nfs4_increment_seqid() will + * doing an OPEN, we assume that nfs_increment_open_seqid() will * have unhashed the old state_owner for us, and that we can * therefore safely retry using a new one. We should still warn * the user though... @@ -799,7 +820,7 @@ static void nfs4_close_done(struct rpc_task *task) /* hmm. we are done with the inode, and in the process of freeing * the state_owner. we keep this around to process errors */ - nfs4_increment_seqid(task->tk_status, sp); + nfs_increment_open_seqid(task->tk_status, calldata->arg.seqid); switch (task->tk_status) { case 0: memcpy(&state->stateid, &calldata->res.stateid, @@ -818,6 +839,7 @@ static void nfs4_close_done(struct rpc_task *task) } state->state = calldata->arg.open_flags; nfs4_put_open_state(state); + nfs_free_seqid(calldata->arg.seqid); up(&sp->so_sema); nfs4_put_state_owner(sp); up_read(&server->nfs4_state->cl_sem); @@ -865,7 +887,11 @@ int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode) calldata->state = state; calldata->arg.fh = NFS_FH(inode); /* Serialization for the sequence id */ - calldata->arg.seqid = state->owner->so_seqid; + calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid); + if (calldata->arg.seqid == NULL) { + kfree(calldata); + return -ENOMEM; + } calldata->arg.open_flags = mode; memcpy(&calldata->arg.stateid, &state->stateid, sizeof(calldata->arg.stateid)); @@ -2729,15 +2755,19 @@ static int _nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock /* We might have lost the locks! */ if ((lsp->ls_flags & NFS_LOCK_INITIALIZED) == 0) goto out; - luargs.seqid = lsp->ls_seqid; - memcpy(&luargs.stateid, &lsp->ls_stateid, sizeof(luargs.stateid)); + luargs.seqid = nfs_alloc_seqid(&lsp->ls_seqid); + status = -ENOMEM; + if (luargs.seqid == NULL) + goto out; + memcpy(luargs.stateid.data, lsp->ls_stateid.data, sizeof(luargs.stateid.data)); arg.u.locku = &luargs; status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); - nfs4_increment_lock_seqid(status, lsp); + nfs_increment_lock_seqid(status, luargs.seqid); if (status == 0) - memcpy(&lsp->ls_stateid, &res.u.stateid, - sizeof(lsp->ls_stateid)); + memcpy(lsp->ls_stateid.data, res.u.stateid.data, + sizeof(lsp->ls_stateid.data)); + nfs_free_seqid(luargs.seqid); out: up(&state->lock_sema); if (status == 0) @@ -2783,9 +2813,13 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *r .reclaim = reclaim, .new_lock_owner = 0, }; - int status; + struct nfs_seqid *lock_seqid; + int status = -ENOMEM; - if (!(lsp->ls_flags & NFS_LOCK_INITIALIZED)) { + lock_seqid = nfs_alloc_seqid(&lsp->ls_seqid); + if (lock_seqid == NULL) + return -ENOMEM; + if (!(lsp->ls_seqid.flags & NFS_SEQID_CONFIRMED)) { struct nfs4_state_owner *owner = state->owner; struct nfs_open_to_lock otl = { .lock_owner = { @@ -2793,39 +2827,40 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *r }, }; - otl.lock_seqid = lsp->ls_seqid; + otl.lock_seqid = lock_seqid; otl.lock_owner.id = lsp->ls_id; memcpy(&otl.open_stateid, &state->stateid, sizeof(otl.open_stateid)); largs.u.open_lock = &otl; largs.new_lock_owner = 1; arg.u.lock = &largs; down(&owner->so_sema); - otl.open_seqid = owner->so_seqid; - status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); - /* increment open_owner seqid on success, and - * seqid mutating errors */ - nfs4_increment_seqid(status, owner); - up(&owner->so_sema); - if (status == 0) { - lsp->ls_flags |= NFS_LOCK_INITIALIZED; - lsp->ls_seqid++; + otl.open_seqid = nfs_alloc_seqid(&owner->so_seqid); + if (otl.open_seqid != NULL) { + status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); + /* increment seqid on success, and seqid mutating errors */ + nfs_increment_open_seqid(status, otl.open_seqid); + nfs_free_seqid(otl.open_seqid); } + up(&owner->so_sema); + if (status == 0) + nfs_confirm_seqid(&lsp->ls_seqid, 0); } else { - struct nfs_exist_lock el = { - .seqid = lsp->ls_seqid, - }; + struct nfs_exist_lock el; memcpy(&el.stateid, &lsp->ls_stateid, sizeof(el.stateid)); largs.u.exist_lock = ⪙ arg.u.lock = &largs; + el.seqid = lock_seqid; status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); - /* increment seqid on success, and * seqid mutating errors*/ - nfs4_increment_lock_seqid(status, lsp); } + /* increment seqid on success, and seqid mutating errors*/ + nfs_increment_lock_seqid(status, lock_seqid); /* save the returned stateid. */ - if (status == 0) - memcpy(&lsp->ls_stateid, &res.u.stateid, sizeof(nfs4_stateid)); - else if (status == -NFS4ERR_DENIED) + if (status == 0) { + memcpy(lsp->ls_stateid.data, res.u.stateid.data, sizeof(lsp->ls_stateid.data)); + lsp->ls_flags |= NFS_LOCK_INITIALIZED; + } else if (status == -NFS4ERR_DENIED) status = -EAGAIN; + nfs_free_seqid(lock_seqid); return status; } diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index afe587d82f1e..f535c219cf3a 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -264,13 +264,16 @@ nfs4_alloc_state_owner(void) { struct nfs4_state_owner *sp; - sp = kmalloc(sizeof(*sp),GFP_KERNEL); + sp = kzalloc(sizeof(*sp),GFP_KERNEL); if (!sp) return NULL; init_MUTEX(&sp->so_sema); - sp->so_seqid = 0; /* arbitrary */ INIT_LIST_HEAD(&sp->so_states); INIT_LIST_HEAD(&sp->so_delegations); + rpc_init_wait_queue(&sp->so_sequence.wait, "Seqid_waitqueue"); + sp->so_seqid.sequence = &sp->so_sequence; + spin_lock_init(&sp->so_sequence.lock); + INIT_LIST_HEAD(&sp->so_sequence.list); atomic_set(&sp->so_count, 1); return sp; } @@ -553,12 +556,10 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f struct nfs4_lock_state *lsp; struct nfs4_client *clp = state->owner->so_client; - lsp = kmalloc(sizeof(*lsp), GFP_KERNEL); + lsp = kzalloc(sizeof(*lsp), GFP_KERNEL); if (lsp == NULL) return NULL; - lsp->ls_flags = 0; - lsp->ls_seqid = 0; /* arbitrary */ - memset(lsp->ls_stateid.data, 0, sizeof(lsp->ls_stateid.data)); + lsp->ls_seqid.sequence = &state->owner->so_sequence; atomic_set(&lsp->ls_count, 1); lsp->ls_owner = fl_owner; spin_lock(&clp->cl_lock); @@ -673,29 +674,102 @@ void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t f nfs4_put_lock_state(lsp); } -/* -* Called with state->lock_sema and clp->cl_sem held. -*/ -void nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *lsp) +struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter) +{ + struct rpc_sequence *sequence = counter->sequence; + struct nfs_seqid *new; + + new = kmalloc(sizeof(*new), GFP_KERNEL); + if (new != NULL) { + new->sequence = counter; + new->task = NULL; + spin_lock(&sequence->lock); + list_add_tail(&new->list, &sequence->list); + spin_unlock(&sequence->lock); + } + return new; +} + +void nfs_free_seqid(struct nfs_seqid *seqid) { - if (status == NFS_OK || seqid_mutating_err(-status)) - lsp->ls_seqid++; + struct rpc_sequence *sequence = seqid->sequence->sequence; + struct rpc_task *next = NULL; + + spin_lock(&sequence->lock); + list_del(&seqid->list); + if (!list_empty(&sequence->list)) { + next = list_entry(sequence->list.next, struct nfs_seqid, list)->task; + if (next) + rpc_wake_up_task(next); + } + spin_unlock(&sequence->lock); + kfree(seqid); } /* -* Called with sp->so_sema and clp->cl_sem held. -* -* Increment the seqid if the OPEN/OPEN_DOWNGRADE/CLOSE succeeded, or -* failed with a seqid incrementing error - -* see comments nfs_fs.h:seqid_mutating_error() -*/ -void nfs4_increment_seqid(int status, struct nfs4_state_owner *sp) -{ - if (status == NFS_OK || seqid_mutating_err(-status)) - sp->so_seqid++; - /* If the server returns BAD_SEQID, unhash state_owner here */ - if (status == -NFS4ERR_BAD_SEQID) + * Called with sp->so_sema and clp->cl_sem held. + * + * Increment the seqid if the OPEN/OPEN_DOWNGRADE/CLOSE succeeded, or + * failed with a seqid incrementing error - + * see comments nfs_fs.h:seqid_mutating_error() + */ +static inline void nfs_increment_seqid(int status, struct nfs_seqid *seqid) +{ + switch (status) { + case 0: + break; + case -NFS4ERR_BAD_SEQID: + case -NFS4ERR_STALE_CLIENTID: + case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_BAD_STATEID: + case -NFS4ERR_BADXDR: + case -NFS4ERR_RESOURCE: + case -NFS4ERR_NOFILEHANDLE: + /* Non-seqid mutating errors */ + return; + }; + /* + * Note: no locking needed as we are guaranteed to be first + * on the sequence list + */ + seqid->sequence->counter++; +} + +void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid) +{ + if (status == -NFS4ERR_BAD_SEQID) { + struct nfs4_state_owner *sp = container_of(seqid->sequence, + struct nfs4_state_owner, so_seqid); nfs4_drop_state_owner(sp); + } + return nfs_increment_seqid(status, seqid); +} + +/* + * Called with ls->lock_sema and clp->cl_sem held. + * + * Increment the seqid if the LOCK/LOCKU succeeded, or + * failed with a seqid incrementing error - + * see comments nfs_fs.h:seqid_mutating_error() + */ +void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid) +{ + return nfs_increment_seqid(status, seqid); +} + +int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task) +{ + struct rpc_sequence *sequence = seqid->sequence->sequence; + int status = 0; + + spin_lock(&sequence->lock); + if (sequence->list.next != &seqid->list) { + seqid->task = task; + rpc_sleep_on(&sequence->wait, task, NULL, NULL); + status = -EAGAIN; + } + spin_unlock(&sequence->lock); + return status; } static int reclaimer(void *); @@ -791,8 +865,6 @@ static int nfs4_reclaim_open_state(struct nfs4_state_recovery_ops *ops, struct n if (state->state == 0) continue; status = ops->recover_open(sp, state); - list_for_each_entry(lock, &state->lock_states, ls_locks) - lock->ls_flags &= ~NFS_LOCK_INITIALIZED; if (status >= 0) { status = nfs4_reclaim_locks(ops, state); if (status < 0) @@ -831,6 +903,26 @@ out_err: return status; } +static void nfs4_state_mark_reclaim(struct nfs4_client *clp) +{ + struct nfs4_state_owner *sp; + struct nfs4_state *state; + struct nfs4_lock_state *lock; + + /* Reset all sequence ids to zero */ + list_for_each_entry(sp, &clp->cl_state_owners, so_list) { + sp->so_seqid.counter = 0; + sp->so_seqid.flags = 0; + list_for_each_entry(state, &sp->so_states, open_states) { + list_for_each_entry(lock, &state->lock_states, ls_locks) { + lock->ls_seqid.counter = 0; + lock->ls_seqid.flags = 0; + lock->ls_flags &= ~NFS_LOCK_INITIALIZED; + } + } + } +} + static int reclaimer(void *ptr) { struct reclaimer_args *args = (struct reclaimer_args *)ptr; @@ -864,6 +956,7 @@ restart_loop: default: ops = &nfs4_network_partition_recovery_ops; }; + nfs4_state_mark_reclaim(clp); status = __nfs4_init_client(clp); if (status) goto out_error; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 6c564ef9489e..fcd28a29a2f8 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -604,7 +604,7 @@ static int encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg) RESERVE_SPACE(8+sizeof(arg->stateid.data)); WRITE32(OP_CLOSE); - WRITE32(arg->seqid); + WRITE32(arg->seqid->sequence->counter); WRITEMEM(arg->stateid.data, sizeof(arg->stateid.data)); return 0; @@ -732,9 +732,9 @@ static int encode_lock(struct xdr_stream *xdr, const struct nfs_lockargs *arg) struct nfs_open_to_lock *ol = opargs->u.open_lock; RESERVE_SPACE(40); - WRITE32(ol->open_seqid); + WRITE32(ol->open_seqid->sequence->counter); WRITEMEM(&ol->open_stateid, sizeof(ol->open_stateid)); - WRITE32(ol->lock_seqid); + WRITE32(ol->lock_seqid->sequence->counter); WRITE64(ol->lock_owner.clientid); WRITE32(4); WRITE32(ol->lock_owner.id); @@ -744,7 +744,7 @@ static int encode_lock(struct xdr_stream *xdr, const struct nfs_lockargs *arg) RESERVE_SPACE(20); WRITEMEM(&el->stateid, sizeof(el->stateid)); - WRITE32(el->seqid); + WRITE32(el->seqid->sequence->counter); } return 0; @@ -775,7 +775,7 @@ static int encode_locku(struct xdr_stream *xdr, const struct nfs_lockargs *arg) RESERVE_SPACE(44); WRITE32(OP_LOCKU); WRITE32(arg->type); - WRITE32(opargs->seqid); + WRITE32(opargs->seqid->sequence->counter); WRITEMEM(&opargs->stateid, sizeof(opargs->stateid)); WRITE64(arg->offset); WRITE64(arg->length); @@ -826,7 +826,7 @@ static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_opena */ RESERVE_SPACE(8); WRITE32(OP_OPEN); - WRITE32(arg->seqid); + WRITE32(arg->seqid->sequence->counter); encode_share_access(xdr, arg->open_flags); RESERVE_SPACE(16); WRITE64(arg->clientid); @@ -941,7 +941,7 @@ static int encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_con RESERVE_SPACE(8+sizeof(arg->stateid.data)); WRITE32(OP_OPEN_CONFIRM); WRITEMEM(arg->stateid.data, sizeof(arg->stateid.data)); - WRITE32(arg->seqid); + WRITE32(arg->seqid->sequence->counter); return 0; } @@ -953,7 +953,7 @@ static int encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_closea RESERVE_SPACE(8+sizeof(arg->stateid.data)); WRITE32(OP_OPEN_DOWNGRADE); WRITEMEM(arg->stateid.data, sizeof(arg->stateid.data)); - WRITE32(arg->seqid); + WRITE32(arg->seqid->sequence->counter); encode_share_access(xdr, arg->open_flags); return 0; } @@ -1416,6 +1416,9 @@ static int nfs4_xdr_enc_close(struct rpc_rqst *req, uint32_t *p, struct nfs_clos }; int status; + status = nfs_wait_on_sequence(args->seqid, req->rq_task); + if (status != 0) + goto out; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); status = encode_putfh(&xdr, args->fh); @@ -1437,6 +1440,9 @@ static int nfs4_xdr_enc_open(struct rpc_rqst *req, uint32_t *p, struct nfs_opena }; int status; + status = nfs_wait_on_sequence(args->seqid, req->rq_task); + if (status != 0) + goto out; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); status = encode_putfh(&xdr, args->fh); @@ -1464,6 +1470,9 @@ static int nfs4_xdr_enc_open_confirm(struct rpc_rqst *req, uint32_t *p, struct n }; int status; + status = nfs_wait_on_sequence(args->seqid, req->rq_task); + if (status != 0) + goto out; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); status = encode_putfh(&xdr, args->fh); @@ -1485,6 +1494,9 @@ static int nfs4_xdr_enc_open_noattr(struct rpc_rqst *req, uint32_t *p, struct nf }; int status; + status = nfs_wait_on_sequence(args->seqid, req->rq_task); + if (status != 0) + goto out; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); status = encode_putfh(&xdr, args->fh); @@ -1506,6 +1518,9 @@ static int nfs4_xdr_enc_open_downgrade(struct rpc_rqst *req, uint32_t *p, struct }; int status; + status = nfs_wait_on_sequence(args->seqid, req->rq_task); + if (status != 0) + goto out; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); status = encode_putfh(&xdr, args->fh); @@ -1525,8 +1540,17 @@ static int nfs4_xdr_enc_lock(struct rpc_rqst *req, uint32_t *p, struct nfs_locka struct compound_hdr hdr = { .nops = 2, }; + struct nfs_lock_opargs *opargs = args->u.lock; + struct nfs_seqid *seqid; int status; + if (opargs->new_lock_owner) + seqid = opargs->u.open_lock->lock_seqid; + else + seqid = opargs->u.exist_lock->seqid; + status = nfs_wait_on_sequence(seqid, req->rq_task); + if (status != 0) + goto out; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); status = encode_putfh(&xdr, args->fh); @@ -1569,6 +1593,9 @@ static int nfs4_xdr_enc_locku(struct rpc_rqst *req, uint32_t *p, struct nfs_lock }; int status; + status = nfs_wait_on_sequence(args->u.locku->seqid, req->rq_task); + if (status != 0) + goto out; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); status = encode_putfh(&xdr, args->fh); diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index a2bf6914ff1b..d578912bf9a9 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -96,12 +96,13 @@ struct nfs4_change_info { u64 after; }; +struct nfs_seqid; /* * Arguments to the open call. */ struct nfs_openargs { const struct nfs_fh * fh; - __u32 seqid; + struct nfs_seqid * seqid; int open_flags; __u64 clientid; __u32 id; @@ -136,7 +137,7 @@ struct nfs_openres { struct nfs_open_confirmargs { const struct nfs_fh * fh; nfs4_stateid stateid; - __u32 seqid; + struct nfs_seqid * seqid; }; struct nfs_open_confirmres { @@ -149,7 +150,7 @@ struct nfs_open_confirmres { struct nfs_closeargs { struct nfs_fh * fh; nfs4_stateid stateid; - __u32 seqid; + struct nfs_seqid * seqid; int open_flags; }; @@ -165,15 +166,15 @@ struct nfs_lowner { }; struct nfs_open_to_lock { - __u32 open_seqid; + struct nfs_seqid * open_seqid; nfs4_stateid open_stateid; - __u32 lock_seqid; + struct nfs_seqid * lock_seqid; struct nfs_lowner lock_owner; }; struct nfs_exist_lock { nfs4_stateid stateid; - __u32 seqid; + struct nfs_seqid * seqid; }; struct nfs_lock_opargs { @@ -186,7 +187,7 @@ struct nfs_lock_opargs { }; struct nfs_locku_opargs { - __u32 seqid; + struct nfs_seqid * seqid; nfs4_stateid stateid; }; -- cgit v1.2.3 From 9512135df14f8293b9bc5e8fb22d4279dee5ff66 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Oct 2005 14:20:12 -0700 Subject: NFSv4: Fix a potential CLOSE race Once the state_owner and lock_owner semaphores get removed, it will be possible for other OPEN requests to reopen the same file if they have lower sequence ids than our CLOSE call. This patch ensures that we recheck the file state once nfs_wait_on_sequence() has completed waiting. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 110 +++++++++++++++++++++++++++++++++--------------- fs/nfs/nfs4state.c | 6 ++- fs/nfs/nfs4xdr.c | 14 ++---- include/linux/nfs_xdr.h | 2 +- 4 files changed, 87 insertions(+), 45 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 9ba89e7cdd28..5154ddf6d9a5 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -189,6 +189,21 @@ static void update_changeattr(struct inode *inode, struct nfs4_change_info *cinf nfsi->change_attr = cinfo->after; } +/* Helper for asynchronous RPC calls */ +static int nfs4_call_async(struct rpc_clnt *clnt, rpc_action tk_begin, + rpc_action tk_exit, void *calldata) +{ + struct rpc_task *task; + + if (!(task = rpc_new_task(clnt, tk_exit, RPC_TASK_ASYNC))) + return -ENOMEM; + + task->tk_calldata = calldata; + task->tk_action = tk_begin; + rpc_execute(task); + return 0; +} + static void update_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, int open_flags) { struct inode *inode = state->inode; @@ -810,11 +825,24 @@ struct nfs4_closedata { struct nfs_closeres res; }; +static void nfs4_free_closedata(struct nfs4_closedata *calldata) +{ + struct nfs4_state *state = calldata->state; + struct nfs4_state_owner *sp = state->owner; + struct nfs_server *server = NFS_SERVER(calldata->inode); + + nfs4_put_open_state(calldata->state); + nfs_free_seqid(calldata->arg.seqid); + up(&sp->so_sema); + nfs4_put_state_owner(sp); + up_read(&server->nfs4_state->cl_sem); + kfree(calldata); +} + static void nfs4_close_done(struct rpc_task *task) { struct nfs4_closedata *calldata = (struct nfs4_closedata *)task->tk_calldata; struct nfs4_state *state = calldata->state; - struct nfs4_state_owner *sp = state->owner; struct nfs_server *server = NFS_SERVER(calldata->inode); /* hmm. we are done with the inode, and in the process of freeing @@ -838,25 +866,46 @@ static void nfs4_close_done(struct rpc_task *task) } } state->state = calldata->arg.open_flags; - nfs4_put_open_state(state); - nfs_free_seqid(calldata->arg.seqid); - up(&sp->so_sema); - nfs4_put_state_owner(sp); - up_read(&server->nfs4_state->cl_sem); - kfree(calldata); + nfs4_free_closedata(calldata); } -static inline int nfs4_close_call(struct rpc_clnt *clnt, struct nfs4_closedata *calldata) +static void nfs4_close_begin(struct rpc_task *task) { + struct nfs4_closedata *calldata = (struct nfs4_closedata *)task->tk_calldata; + struct nfs4_state *state = calldata->state; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE], .rpc_argp = &calldata->arg, .rpc_resp = &calldata->res, - .rpc_cred = calldata->state->owner->so_cred, + .rpc_cred = state->owner->so_cred, }; - if (calldata->arg.open_flags != 0) + int mode = 0; + int status; + + status = nfs_wait_on_sequence(calldata->arg.seqid, task); + if (status != 0) + return; + /* Don't reorder reads */ + smp_rmb(); + /* Recalculate the new open mode in case someone reopened the file + * while we were waiting in line to be scheduled. + */ + if (state->nreaders != 0) + mode |= FMODE_READ; + if (state->nwriters != 0) + mode |= FMODE_WRITE; + if (test_bit(NFS_DELEGATED_STATE, &state->flags)) + state->state = mode; + if (mode == state->state) { + nfs4_free_closedata(calldata); + task->tk_exit = NULL; + rpc_exit(task, 0); + return; + } + if (mode != 0) msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE]; - return rpc_call_async(clnt, &msg, 0, nfs4_close_done, calldata); + calldata->arg.open_flags = mode; + rpc_call_setup(task, &msg, 0); } /* @@ -873,35 +922,30 @@ static inline int nfs4_close_call(struct rpc_clnt *clnt, struct nfs4_closedata * int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode) { struct nfs4_closedata *calldata; - int status; + int status = -ENOMEM; - /* Tell caller we're done */ - if (test_bit(NFS_DELEGATED_STATE, &state->flags)) { - state->state = mode; - return 0; - } - calldata = (struct nfs4_closedata *)kmalloc(sizeof(*calldata), GFP_KERNEL); + calldata = kmalloc(sizeof(*calldata), GFP_KERNEL); if (calldata == NULL) - return -ENOMEM; + goto out; calldata->inode = inode; calldata->state = state; calldata->arg.fh = NFS_FH(inode); + calldata->arg.stateid = &state->stateid; /* Serialization for the sequence id */ calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid); - if (calldata->arg.seqid == NULL) { - kfree(calldata); - return -ENOMEM; - } - calldata->arg.open_flags = mode; - memcpy(&calldata->arg.stateid, &state->stateid, - sizeof(calldata->arg.stateid)); - status = nfs4_close_call(NFS_SERVER(inode)->client, calldata); - /* - * Return -EINPROGRESS on success in order to indicate to the - * caller that an asynchronous RPC call has been launched, and - * that it will release the semaphores on completion. - */ - return (status == 0) ? -EINPROGRESS : status; + if (calldata->arg.seqid == NULL) + goto out_free_calldata; + + status = nfs4_call_async(NFS_SERVER(inode)->client, nfs4_close_begin, + nfs4_close_done, calldata); + if (status == 0) + goto out; + + nfs_free_seqid(calldata->arg.seqid); +out_free_calldata: + kfree(calldata); +out: + return status; } struct inode * diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index f535c219cf3a..59c93f37e1b2 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -518,7 +518,11 @@ void nfs4_close_state(struct nfs4_state *state, mode_t mode) newstate |= FMODE_WRITE; if (state->state == newstate) goto out; - if (nfs4_do_close(inode, state, newstate) == -EINPROGRESS) + if (test_bit(NFS_DELEGATED_STATE, &state->flags)) { + state->state = newstate; + goto out; + } + if (nfs4_do_close(inode, state, newstate) == 0) return; } out: diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index fcd28a29a2f8..934ec50ea6bf 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -602,10 +602,10 @@ static int encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg) { uint32_t *p; - RESERVE_SPACE(8+sizeof(arg->stateid.data)); + RESERVE_SPACE(8+sizeof(arg->stateid->data)); WRITE32(OP_CLOSE); WRITE32(arg->seqid->sequence->counter); - WRITEMEM(arg->stateid.data, sizeof(arg->stateid.data)); + WRITEMEM(arg->stateid->data, sizeof(arg->stateid->data)); return 0; } @@ -950,9 +950,9 @@ static int encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_closea { uint32_t *p; - RESERVE_SPACE(8+sizeof(arg->stateid.data)); + RESERVE_SPACE(8+sizeof(arg->stateid->data)); WRITE32(OP_OPEN_DOWNGRADE); - WRITEMEM(arg->stateid.data, sizeof(arg->stateid.data)); + WRITEMEM(arg->stateid->data, sizeof(arg->stateid->data)); WRITE32(arg->seqid->sequence->counter); encode_share_access(xdr, arg->open_flags); return 0; @@ -1416,9 +1416,6 @@ static int nfs4_xdr_enc_close(struct rpc_rqst *req, uint32_t *p, struct nfs_clos }; int status; - status = nfs_wait_on_sequence(args->seqid, req->rq_task); - if (status != 0) - goto out; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); status = encode_putfh(&xdr, args->fh); @@ -1518,9 +1515,6 @@ static int nfs4_xdr_enc_open_downgrade(struct rpc_rqst *req, uint32_t *p, struct }; int status; - status = nfs_wait_on_sequence(args->seqid, req->rq_task); - if (status != 0) - goto out; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); status = encode_putfh(&xdr, args->fh); diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index d578912bf9a9..cac0df950c66 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -149,7 +149,7 @@ struct nfs_open_confirmres { */ struct nfs_closeargs { struct nfs_fh * fh; - nfs4_stateid stateid; + nfs4_stateid * stateid; struct nfs_seqid * seqid; int open_flags; }; -- cgit v1.2.3 From faf5f49c2d9c0af2847837c232a432cc146e203b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Oct 2005 14:20:15 -0700 Subject: NFSv4: Make NFS clean up byte range locks asynchronously Currently we fail to do so if the process was signalled. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 1 + fs/nfs/nfs4proc.c | 161 +++++++++++++++++++++++++++++++++--------------- fs/nfs/nfs4state.c | 2 +- fs/nfs/nfs4xdr.c | 9 +-- include/linux/nfs_xdr.h | 2 +- 5 files changed, 116 insertions(+), 59 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index d4fcb5d0ce6c..2215cdee43ae 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -249,6 +249,7 @@ extern void nfs4_put_open_state(struct nfs4_state *); extern void nfs4_close_state(struct nfs4_state *, mode_t); extern struct nfs4_state *nfs4_find_state(struct inode *, struct rpc_cred *, mode_t mode); extern void nfs4_schedule_state_recovery(struct nfs4_client *); +extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f57dba815099..612a9a14aed3 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -58,9 +58,9 @@ static int _nfs4_proc_open_confirm(struct rpc_clnt *clnt, const struct nfs_fh *fh, struct nfs4_state_owner *sp, nfs4_stateid *stateid, struct nfs_seqid *seqid); static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); -static int nfs4_async_handle_error(struct rpc_task *, struct nfs_server *); +static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *); static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry); -static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_exception *exception); +static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception); extern u32 *nfs4_decode_dirent(u32 *p, struct nfs_entry *entry, int plus); extern struct rpc_procinfo nfs4_procedures[]; @@ -2422,7 +2422,7 @@ static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen } static int -nfs4_async_handle_error(struct rpc_task *task, struct nfs_server *server) +nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server) { struct nfs4_client *clp = server->nfs4_state; @@ -2500,7 +2500,7 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout) /* This is the error handling routine for processes that are allowed * to sleep. */ -int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_exception *exception) +int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception) { struct nfs4_client *clp = server->nfs4_state; int ret = errorcode; @@ -2763,68 +2763,127 @@ static int do_vfs_lock(struct file *file, struct file_lock *fl) return res; } -static int _nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request) +struct nfs4_unlockdata { + struct nfs_lockargs arg; + struct nfs_locku_opargs luargs; + struct nfs_lockres res; + struct nfs4_lock_state *lsp; + struct nfs_open_context *ctx; + atomic_t refcount; + struct completion completion; +}; + +static void nfs4_locku_release_calldata(struct nfs4_unlockdata *calldata) { - struct inode *inode = state->inode; - struct nfs_server *server = NFS_SERVER(inode); - struct nfs_lockargs arg = { - .fh = NFS_FH(inode), - .type = nfs4_lck_type(cmd, request), - .offset = request->fl_start, - .length = nfs4_lck_length(request), - }; - struct nfs_lockres res = { - .server = server, - }; + if (atomic_dec_and_test(&calldata->refcount)) { + nfs_free_seqid(calldata->luargs.seqid); + nfs4_put_lock_state(calldata->lsp); + put_nfs_open_context(calldata->ctx); + kfree(calldata); + } +} + +static void nfs4_locku_complete(struct nfs4_unlockdata *calldata) +{ + complete(&calldata->completion); + nfs4_locku_release_calldata(calldata); +} + +static void nfs4_locku_done(struct rpc_task *task) +{ + struct nfs4_unlockdata *calldata = (struct nfs4_unlockdata *)task->tk_calldata; + + nfs_increment_lock_seqid(task->tk_status, calldata->luargs.seqid); + switch (task->tk_status) { + case 0: + memcpy(calldata->lsp->ls_stateid.data, + calldata->res.u.stateid.data, + sizeof(calldata->lsp->ls_stateid.data)); + break; + case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_EXPIRED: + nfs4_schedule_state_recovery(calldata->res.server->nfs4_state); + break; + default: + if (nfs4_async_handle_error(task, calldata->res.server) == -EAGAIN) { + rpc_restart_call(task); + return; + } + } + nfs4_locku_complete(calldata); +} + +static void nfs4_locku_begin(struct rpc_task *task) +{ + struct nfs4_unlockdata *calldata = (struct nfs4_unlockdata *)task->tk_calldata; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOCKU], - .rpc_argp = &arg, - .rpc_resp = &res, - .rpc_cred = state->owner->so_cred, + .rpc_argp = &calldata->arg, + .rpc_resp = &calldata->res, + .rpc_cred = calldata->lsp->ls_state->owner->so_cred, }; + int status; + + status = nfs_wait_on_sequence(calldata->luargs.seqid, task); + if (status != 0) + return; + if ((calldata->lsp->ls_flags & NFS_LOCK_INITIALIZED) == 0) { + nfs4_locku_complete(calldata); + task->tk_exit = NULL; + rpc_exit(task, 0); + return; + } + rpc_call_setup(task, &msg, 0); +} + +static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request) +{ + struct nfs4_unlockdata *calldata; + struct inode *inode = state->inode; + struct nfs_server *server = NFS_SERVER(inode); struct nfs4_lock_state *lsp; - struct nfs_locku_opargs luargs; int status; - + status = nfs4_set_lock_state(state, request); if (status != 0) - goto out; + return status; lsp = request->fl_u.nfs4_fl.owner; /* We might have lost the locks! */ if ((lsp->ls_flags & NFS_LOCK_INITIALIZED) == 0) - goto out; - luargs.seqid = nfs_alloc_seqid(&lsp->ls_seqid); - status = -ENOMEM; - if (luargs.seqid == NULL) - goto out; - memcpy(luargs.stateid.data, lsp->ls_stateid.data, sizeof(luargs.stateid.data)); - arg.u.locku = &luargs; - status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); - nfs_increment_lock_seqid(status, luargs.seqid); - - if (status == 0) - memcpy(lsp->ls_stateid.data, res.u.stateid.data, - sizeof(lsp->ls_stateid.data)); - nfs_free_seqid(luargs.seqid); -out: + return 0; + calldata = kmalloc(sizeof(*calldata), GFP_KERNEL); + if (calldata == NULL) + return -ENOMEM; + calldata->luargs.seqid = nfs_alloc_seqid(&lsp->ls_seqid); + if (calldata->luargs.seqid == NULL) { + kfree(calldata); + return -ENOMEM; + } + calldata->luargs.stateid = &lsp->ls_stateid; + calldata->arg.fh = NFS_FH(inode); + calldata->arg.type = nfs4_lck_type(cmd, request); + calldata->arg.offset = request->fl_start; + calldata->arg.length = nfs4_lck_length(request); + calldata->arg.u.locku = &calldata->luargs; + calldata->res.server = server; + calldata->lsp = lsp; + atomic_inc(&lsp->ls_count); + + /* Ensure we don't close file until we're done freeing locks! */ + calldata->ctx = get_nfs_open_context((struct nfs_open_context*)request->fl_file->private_data); + + atomic_set(&calldata->refcount, 2); + init_completion(&calldata->completion); + + status = nfs4_call_async(NFS_SERVER(inode)->client, nfs4_locku_begin, + nfs4_locku_done, calldata); if (status == 0) - do_vfs_lock(request->fl_file, request); + wait_for_completion_interruptible(&calldata->completion); + do_vfs_lock(request->fl_file, request); + nfs4_locku_release_calldata(calldata); return status; } -static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request) -{ - struct nfs4_exception exception = { }; - int err; - - do { - err = nfs4_handle_exception(NFS_SERVER(state->inode), - _nfs4_proc_unlck(state, cmd, request), - &exception); - } while (exception.retry); - return err; -} - static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *request, int reclaim) { struct inode *inode = state->inode; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index bb3574361958..23834c8fb740 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -600,7 +600,7 @@ static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_ * Release reference to lock_state, and free it if we see that * it is no longer in use */ -static void nfs4_put_lock_state(struct nfs4_lock_state *lsp) +void nfs4_put_lock_state(struct nfs4_lock_state *lsp) { struct nfs4_state *state; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 934ec50ea6bf..4706192cfb07 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -776,7 +776,7 @@ static int encode_locku(struct xdr_stream *xdr, const struct nfs_lockargs *arg) WRITE32(OP_LOCKU); WRITE32(arg->type); WRITE32(opargs->seqid->sequence->counter); - WRITEMEM(&opargs->stateid, sizeof(opargs->stateid)); + WRITEMEM(opargs->stateid->data, sizeof(opargs->stateid->data)); WRITE64(arg->offset); WRITE64(arg->length); @@ -1587,9 +1587,6 @@ static int nfs4_xdr_enc_locku(struct rpc_rqst *req, uint32_t *p, struct nfs_lock }; int status; - status = nfs_wait_on_sequence(args->u.locku->seqid, req->rq_task); - if (status != 0) - goto out; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); status = encode_putfh(&xdr, args->fh); @@ -2934,8 +2931,8 @@ static int decode_locku(struct xdr_stream *xdr, struct nfs_lockres *res) status = decode_op_hdr(xdr, OP_LOCKU); if (status == 0) { - READ_BUF(sizeof(nfs4_stateid)); - COPYMEM(&res->u.stateid, sizeof(res->u.stateid)); + READ_BUF(sizeof(res->u.stateid.data)); + COPYMEM(res->u.stateid.data, sizeof(res->u.stateid.data)); } return status; } diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index cac0df950c66..849f95c5fae4 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -188,7 +188,7 @@ struct nfs_lock_opargs { struct nfs_locku_opargs { struct nfs_seqid * seqid; - nfs4_stateid stateid; + nfs4_stateid * stateid; }; struct nfs_lockargs { -- cgit v1.2.3 From 06735b3454824bd561decbde46111f144e905923 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Oct 2005 14:20:15 -0700 Subject: NFSv4: Fix up handling of open_to_lock sequence ids Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 69 +++++++++++++++++++++++-------------------------- fs/nfs/nfs4xdr.c | 32 ++++++++++------------- include/linux/nfs_xdr.h | 19 +++----------- 3 files changed, 49 insertions(+), 71 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 612a9a14aed3..35da15342e05 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2889,11 +2889,23 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *r struct inode *inode = state->inode; struct nfs_server *server = NFS_SERVER(inode); struct nfs4_lock_state *lsp = request->fl_u.nfs4_fl.owner; + struct nfs_lock_opargs largs = { + .lock_stateid = &lsp->ls_stateid, + .open_stateid = &state->stateid, + .lock_owner = { + .clientid = server->nfs4_state->cl_clientid, + .id = lsp->ls_id, + }, + .reclaim = reclaim, + }; struct nfs_lockargs arg = { .fh = NFS_FH(inode), .type = nfs4_lck_type(cmd, request), .offset = request->fl_start, .length = nfs4_lck_length(request), + .u = { + .lock = &largs, + }, }; struct nfs_lockres res = { .server = server, @@ -2904,56 +2916,39 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *r .rpc_resp = &res, .rpc_cred = state->owner->so_cred, }; - struct nfs_lock_opargs largs = { - .reclaim = reclaim, - .new_lock_owner = 0, - }; - struct nfs_seqid *lock_seqid; int status = -ENOMEM; - lock_seqid = nfs_alloc_seqid(&lsp->ls_seqid); - if (lock_seqid == NULL) + largs.lock_seqid = nfs_alloc_seqid(&lsp->ls_seqid); + if (largs.lock_seqid == NULL) return -ENOMEM; if (!(lsp->ls_seqid.flags & NFS_SEQID_CONFIRMED)) { struct nfs4_state_owner *owner = state->owner; - struct nfs_open_to_lock otl = { - .lock_owner = { - .clientid = server->nfs4_state->cl_clientid, - }, - }; - - otl.lock_seqid = lock_seqid; - otl.lock_owner.id = lsp->ls_id; - memcpy(&otl.open_stateid, &state->stateid, sizeof(otl.open_stateid)); - largs.u.open_lock = &otl; + + largs.open_seqid = nfs_alloc_seqid(&owner->so_seqid); + if (largs.open_seqid == NULL) + goto out; largs.new_lock_owner = 1; - arg.u.lock = &largs; - otl.open_seqid = nfs_alloc_seqid(&owner->so_seqid); - if (otl.open_seqid != NULL) { - status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); - /* increment seqid on success, and seqid mutating errors */ - nfs_increment_open_seqid(status, otl.open_seqid); - nfs_free_seqid(otl.open_seqid); + status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); + /* increment open seqid on success, and seqid mutating errors */ + if (largs.new_lock_owner != 0) { + nfs_increment_open_seqid(status, largs.open_seqid); + if (status == 0) + nfs_confirm_seqid(&lsp->ls_seqid, 0); } - if (status == 0) - nfs_confirm_seqid(&lsp->ls_seqid, 0); - } else { - struct nfs_exist_lock el; - memcpy(&el.stateid, &lsp->ls_stateid, sizeof(el.stateid)); - largs.u.exist_lock = ⪙ - arg.u.lock = &largs; - el.seqid = lock_seqid; + nfs_free_seqid(largs.open_seqid); + } else status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); - } - /* increment seqid on success, and seqid mutating errors*/ - nfs_increment_lock_seqid(status, lock_seqid); + /* increment lock seqid on success, and seqid mutating errors*/ + nfs_increment_lock_seqid(status, largs.lock_seqid); /* save the returned stateid. */ if (status == 0) { - memcpy(lsp->ls_stateid.data, res.u.stateid.data, sizeof(lsp->ls_stateid.data)); + memcpy(lsp->ls_stateid.data, res.u.stateid.data, + sizeof(lsp->ls_stateid.data)); lsp->ls_flags |= NFS_LOCK_INITIALIZED; } else if (status == -NFS4ERR_DENIED) status = -EAGAIN; - nfs_free_seqid(lock_seqid); +out: + nfs_free_seqid(largs.lock_seqid); return status; } diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 4706192cfb07..c5c75235c5b8 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -729,22 +729,18 @@ static int encode_lock(struct xdr_stream *xdr, const struct nfs_lockargs *arg) WRITE64(arg->length); WRITE32(opargs->new_lock_owner); if (opargs->new_lock_owner){ - struct nfs_open_to_lock *ol = opargs->u.open_lock; - RESERVE_SPACE(40); - WRITE32(ol->open_seqid->sequence->counter); - WRITEMEM(&ol->open_stateid, sizeof(ol->open_stateid)); - WRITE32(ol->lock_seqid->sequence->counter); - WRITE64(ol->lock_owner.clientid); + WRITE32(opargs->open_seqid->sequence->counter); + WRITEMEM(opargs->open_stateid->data, sizeof(opargs->open_stateid->data)); + WRITE32(opargs->lock_seqid->sequence->counter); + WRITE64(opargs->lock_owner.clientid); WRITE32(4); - WRITE32(ol->lock_owner.id); + WRITE32(opargs->lock_owner.id); } else { - struct nfs_exist_lock *el = opargs->u.exist_lock; - RESERVE_SPACE(20); - WRITEMEM(&el->stateid, sizeof(el->stateid)); - WRITE32(el->seqid->sequence->counter); + WRITEMEM(opargs->lock_stateid->data, sizeof(opargs->lock_stateid->data)); + WRITE32(opargs->lock_seqid->sequence->counter); } return 0; @@ -1535,16 +1531,14 @@ static int nfs4_xdr_enc_lock(struct rpc_rqst *req, uint32_t *p, struct nfs_locka .nops = 2, }; struct nfs_lock_opargs *opargs = args->u.lock; - struct nfs_seqid *seqid; int status; - if (opargs->new_lock_owner) - seqid = opargs->u.open_lock->lock_seqid; - else - seqid = opargs->u.exist_lock->seqid; - status = nfs_wait_on_sequence(seqid, req->rq_task); + status = nfs_wait_on_sequence(opargs->lock_seqid, req->rq_task); if (status != 0) goto out; + /* Do we need to do an open_to_lock_owner? */ + if (opargs->lock_seqid->sequence->flags & NFS_SEQID_CONFIRMED) + opargs->new_lock_owner = 0; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); status = encode_putfh(&xdr, args->fh); @@ -2908,8 +2902,8 @@ static int decode_lock(struct xdr_stream *xdr, struct nfs_lockres *res) status = decode_op_hdr(xdr, OP_LOCK); if (status == 0) { - READ_BUF(sizeof(nfs4_stateid)); - COPYMEM(&res->u.stateid, sizeof(res->u.stateid)); + READ_BUF(sizeof(res->u.stateid.data)); + COPYMEM(res->u.stateid.data, sizeof(res->u.stateid.data)); } else if (status == -NFS4ERR_DENIED) return decode_lock_denied(xdr, &res->u.denied); return status; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 849f95c5fae4..57efcc27f20b 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -165,25 +165,14 @@ struct nfs_lowner { u32 id; }; -struct nfs_open_to_lock { - struct nfs_seqid * open_seqid; - nfs4_stateid open_stateid; +struct nfs_lock_opargs { struct nfs_seqid * lock_seqid; + nfs4_stateid * lock_stateid; + struct nfs_seqid * open_seqid; + nfs4_stateid * open_stateid; struct nfs_lowner lock_owner; -}; - -struct nfs_exist_lock { - nfs4_stateid stateid; - struct nfs_seqid * seqid; -}; - -struct nfs_lock_opargs { __u32 reclaim; __u32 new_lock_owner; - union { - struct nfs_open_to_lock *open_lock; - struct nfs_exist_lock *exist_lock; - } u; }; struct nfs_locku_opargs { -- cgit v1.2.3 From 834f2a4a1554dc5b2598038b3fe8703defcbe467 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Oct 2005 14:20:16 -0700 Subject: VFS: Allow the filesystem to return a full file pointer on open intent This is needed by NFSv4 for atomicity reasons: our open command is in fact a lookup+open, so we need to be able to propagate open context information from lookup() into the resulting struct file's private_data field. Signed-off-by: Trond Myklebust --- fs/exec.c | 12 +++---- fs/namei.c | 93 +++++++++++++++++++++++++++++++++++++++++++++++---- fs/open.c | 79 +++++++++++++++++++++++++++++++++++-------- include/linux/namei.h | 8 +++++ 4 files changed, 165 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/fs/exec.c b/fs/exec.c index a04a575ad433..d2208f7c87db 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -126,8 +126,7 @@ asmlinkage long sys_uselib(const char __user * library) struct nameidata nd; int error; - nd.intent.open.flags = FMODE_READ; - error = __user_walk(library, LOOKUP_FOLLOW|LOOKUP_OPEN, &nd); + error = __user_path_lookup_open(library, LOOKUP_FOLLOW, &nd, FMODE_READ); if (error) goto out; @@ -139,7 +138,7 @@ asmlinkage long sys_uselib(const char __user * library) if (error) goto exit; - file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); + file = nameidata_to_filp(&nd, O_RDONLY); error = PTR_ERR(file); if (IS_ERR(file)) goto out; @@ -167,6 +166,7 @@ asmlinkage long sys_uselib(const char __user * library) out: return error; exit: + release_open_intent(&nd); path_release(&nd); goto out; } @@ -490,8 +490,7 @@ struct file *open_exec(const char *name) int err; struct file *file; - nd.intent.open.flags = FMODE_READ; - err = path_lookup(name, LOOKUP_FOLLOW|LOOKUP_OPEN, &nd); + err = path_lookup_open(name, LOOKUP_FOLLOW, &nd, FMODE_READ); file = ERR_PTR(err); if (!err) { @@ -504,7 +503,7 @@ struct file *open_exec(const char *name) err = -EACCES; file = ERR_PTR(err); if (!err) { - file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); + file = nameidata_to_filp(&nd, O_RDONLY); if (!IS_ERR(file)) { err = deny_write_access(file); if (err) { @@ -516,6 +515,7 @@ out: return file; } } + release_open_intent(&nd); path_release(&nd); } goto out; diff --git a/fs/namei.c b/fs/namei.c index aa62dbda93ac..0d1dff7d3d95 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -317,6 +318,18 @@ void path_release_on_umount(struct nameidata *nd) mntput_no_expire(nd->mnt); } +/** + * release_open_intent - free up open intent resources + * @nd: pointer to nameidata + */ +void release_open_intent(struct nameidata *nd) +{ + if (nd->intent.open.file->f_dentry == NULL) + put_filp(nd->intent.open.file); + else + fput(nd->intent.open.file); +} + /* * Internal lookup() using the new generic dcache. * SMP-safe @@ -1052,6 +1065,70 @@ out: return retval; } +static int __path_lookup_intent_open(const char *name, unsigned int lookup_flags, + struct nameidata *nd, int open_flags, int create_mode) +{ + struct file *filp = get_empty_filp(); + int err; + + if (filp == NULL) + return -ENFILE; + nd->intent.open.file = filp; + nd->intent.open.flags = open_flags; + nd->intent.open.create_mode = create_mode; + err = path_lookup(name, lookup_flags|LOOKUP_OPEN, nd); + if (IS_ERR(nd->intent.open.file)) { + if (err == 0) { + err = PTR_ERR(nd->intent.open.file); + path_release(nd); + } + } else if (err != 0) + release_open_intent(nd); + return err; +} + +/** + * path_lookup_open - lookup a file path with open intent + * @name: pointer to file name + * @lookup_flags: lookup intent flags + * @nd: pointer to nameidata + * @open_flags: open intent flags + */ +int path_lookup_open(const char *name, unsigned int lookup_flags, + struct nameidata *nd, int open_flags) +{ + return __path_lookup_intent_open(name, lookup_flags, nd, + open_flags, 0); +} + +/** + * path_lookup_create - lookup a file path with open + create intent + * @name: pointer to file name + * @lookup_flags: lookup intent flags + * @nd: pointer to nameidata + * @open_flags: open intent flags + * @create_mode: create intent flags + */ +int path_lookup_create(const char *name, unsigned int lookup_flags, + struct nameidata *nd, int open_flags, int create_mode) +{ + return __path_lookup_intent_open(name, lookup_flags|LOOKUP_CREATE, nd, + open_flags, create_mode); +} + +int __user_path_lookup_open(const char __user *name, unsigned int lookup_flags, + struct nameidata *nd, int open_flags) +{ + char *tmp = getname(name); + int err = PTR_ERR(tmp); + + if (!IS_ERR(tmp)) { + err = __path_lookup_intent_open(tmp, lookup_flags, nd, open_flags, 0); + putname(tmp); + } + return err; +} + /* * Restricted form of lookup. Doesn't follow links, single-component only, * needs parent already locked. Doesn't follow mounts. @@ -1416,27 +1493,27 @@ int may_open(struct nameidata *nd, int acc_mode, int flag) */ int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd) { - int acc_mode, error = 0; + int acc_mode, error; struct path path; struct dentry *dir; int count = 0; acc_mode = ACC_MODE(flag); + /* O_TRUNC implies we need access checks for write permissions */ + if (flag & O_TRUNC) + acc_mode |= MAY_WRITE; + /* Allow the LSM permission hook to distinguish append access from general write access. */ if (flag & O_APPEND) acc_mode |= MAY_APPEND; - /* Fill in the open() intent data */ - nd->intent.open.flags = flag; - nd->intent.open.create_mode = mode; - /* * The simplest case - just a plain lookup. */ if (!(flag & O_CREAT)) { - error = path_lookup(pathname, lookup_flags(flag)|LOOKUP_OPEN, nd); + error = path_lookup_open(pathname, lookup_flags(flag), nd, flag); if (error) return error; goto ok; @@ -1445,7 +1522,7 @@ int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd) /* * Create - we need to know the parent. */ - error = path_lookup(pathname, LOOKUP_PARENT|LOOKUP_OPEN|LOOKUP_CREATE, nd); + error = path_lookup_create(pathname, LOOKUP_PARENT, nd, flag, mode); if (error) return error; @@ -1520,6 +1597,8 @@ ok: exit_dput: dput_path(&path, nd); exit: + if (!IS_ERR(nd->intent.open.file)) + release_open_intent(nd); path_release(nd); return error; diff --git a/fs/open.c b/fs/open.c index f0d90cf0495c..8d06ec911fd9 100644 --- a/fs/open.c +++ b/fs/open.c @@ -739,7 +739,8 @@ asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group) } static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, - int flags, struct file *f) + int flags, struct file *f, + int (*open)(struct inode *, struct file *)) { struct inode *inode; int error; @@ -761,11 +762,14 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, f->f_op = fops_get(inode->i_fop); file_move(f, &inode->i_sb->s_files); - if (f->f_op && f->f_op->open) { - error = f->f_op->open(inode,f); + if (!open && f->f_op) + open = f->f_op->open; + if (open) { + error = open(inode, f); if (error) goto cleanup_all; } + f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping); @@ -814,28 +818,75 @@ struct file *filp_open(const char * filename, int flags, int mode) { int namei_flags, error; struct nameidata nd; - struct file *f; namei_flags = flags; if ((namei_flags+1) & O_ACCMODE) namei_flags++; - if (namei_flags & O_TRUNC) - namei_flags |= 2; - - error = -ENFILE; - f = get_empty_filp(); - if (f == NULL) - return ERR_PTR(error); error = open_namei(filename, namei_flags, mode, &nd); if (!error) - return __dentry_open(nd.dentry, nd.mnt, flags, f); + return nameidata_to_filp(&nd, flags); - put_filp(f); return ERR_PTR(error); } EXPORT_SYMBOL(filp_open); +/** + * lookup_instantiate_filp - instantiates the open intent filp + * @nd: pointer to nameidata + * @dentry: pointer to dentry + * @open: open callback + * + * Helper for filesystems that want to use lookup open intents and pass back + * a fully instantiated struct file to the caller. + * This function is meant to be called from within a filesystem's + * lookup method. + * Note that in case of error, nd->intent.open.file is destroyed, but the + * path information remains valid. + * If the open callback is set to NULL, then the standard f_op->open() + * filesystem callback is substituted. + */ +struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry, + int (*open)(struct inode *, struct file *)) +{ + if (IS_ERR(nd->intent.open.file)) + goto out; + if (IS_ERR(dentry)) + goto out_err; + nd->intent.open.file = __dentry_open(dget(dentry), mntget(nd->mnt), + nd->intent.open.flags - 1, + nd->intent.open.file, + open); +out: + return nd->intent.open.file; +out_err: + release_open_intent(nd); + nd->intent.open.file = (struct file *)dentry; + goto out; +} +EXPORT_SYMBOL_GPL(lookup_instantiate_filp); + +/** + * nameidata_to_filp - convert a nameidata to an open filp. + * @nd: pointer to nameidata + * @flags: open flags + * + * Note that this function destroys the original nameidata + */ +struct file *nameidata_to_filp(struct nameidata *nd, int flags) +{ + struct file *filp; + + /* Pick up the filp from the open intent */ + filp = nd->intent.open.file; + /* Has the filesystem initialised the file for us? */ + if (filp->f_dentry == NULL) + filp = __dentry_open(nd->dentry, nd->mnt, flags, filp, NULL); + else + path_release(nd); + return filp; +} + struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) { int error; @@ -846,7 +897,7 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) if (f == NULL) return ERR_PTR(error); - return __dentry_open(dentry, mnt, flags, f); + return __dentry_open(dentry, mnt, flags, f, NULL); } EXPORT_SYMBOL(dentry_open); diff --git a/include/linux/namei.h b/include/linux/namei.h index 7db67b008cac..1c975d0d9e94 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -8,6 +8,7 @@ struct vfsmount; struct open_intent { int flags; int create_mode; + struct file *file; }; enum { MAX_NESTED_LINKS = 5 }; @@ -65,6 +66,13 @@ extern int FASTCALL(link_path_walk(const char *, struct nameidata *)); extern void path_release(struct nameidata *); extern void path_release_on_umount(struct nameidata *); +extern int __user_path_lookup_open(const char __user *, unsigned lookup_flags, struct nameidata *nd, int open_flags); +extern int path_lookup_open(const char *, unsigned lookup_flags, struct nameidata *, int open_flags); +extern struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry, + int (*open)(struct inode *, struct file *)); +extern struct file *nameidata_to_filp(struct nameidata *nd, int flags); +extern void release_open_intent(struct nameidata *); + extern struct dentry * lookup_one_len(const char *, struct dentry *, int); extern struct dentry * lookup_hash(struct qstr *, struct dentry *); -- cgit v1.2.3 From 02a913a73b52071e93f4b76db3e86138d19efffd Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Oct 2005 14:20:17 -0700 Subject: NFSv4: Eliminate nfsv4 open race... Make NFSv4 return the fully initialized file pointer with the stateid that it created in the lookup w/intent. Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 29 +++++----- fs/nfs/nfs3proc.c | 2 +- fs/nfs/nfs4_fs.h | 4 +- fs/nfs/nfs4proc.c | 137 ++++++++++++++++++++---------------------------- fs/nfs/proc.c | 2 +- include/linux/nfs_xdr.h | 2 +- 6 files changed, 74 insertions(+), 102 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index c70eabd6d179..a6e251f21fd8 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -914,7 +914,6 @@ static int is_atomic_open(struct inode *dir, struct nameidata *nd) static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { struct dentry *res = NULL; - struct inode *inode = NULL; int error; /* Check that we are indeed trying to open this file */ @@ -928,8 +927,10 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry dentry->d_op = NFS_PROTO(dir)->dentry_ops; /* Let vfs_create() deal with O_EXCL */ - if (nd->intent.open.flags & O_EXCL) - goto no_entry; + if (nd->intent.open.flags & O_EXCL) { + d_add(dentry, NULL); + goto out; + } /* Open the file on the server */ lock_kernel(); @@ -943,18 +944,18 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry if (nd->intent.open.flags & O_CREAT) { nfs_begin_data_update(dir); - inode = nfs4_atomic_open(dir, dentry, nd); + res = nfs4_atomic_open(dir, dentry, nd); nfs_end_data_update(dir); } else - inode = nfs4_atomic_open(dir, dentry, nd); + res = nfs4_atomic_open(dir, dentry, nd); unlock_kernel(); - if (IS_ERR(inode)) { - error = PTR_ERR(inode); + if (IS_ERR(res)) { + error = PTR_ERR(res); switch (error) { /* Make a negative dentry */ case -ENOENT: - inode = NULL; - break; + res = NULL; + goto out; /* This turned out not to be a regular file */ case -ELOOP: if (!(nd->intent.open.flags & O_NOFOLLOW)) @@ -962,13 +963,9 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry /* case -EISDIR: */ /* case -EINVAL: */ default: - res = ERR_PTR(error); goto out; } - } -no_entry: - res = d_add_unique(dentry, inode); - if (res != NULL) + } else if (res != NULL) dentry = res; nfs_renew_times(dentry); nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); @@ -1012,7 +1009,7 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) */ lock_kernel(); verifier = nfs_save_change_attribute(dir); - ret = nfs4_open_revalidate(dir, dentry, openflags); + ret = nfs4_open_revalidate(dir, dentry, openflags, nd); if (!ret) nfs_set_verifier(dentry, verifier); unlock_kernel(); @@ -1135,7 +1132,7 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode, lock_kernel(); nfs_begin_data_update(dir); - error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags); + error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, nd); nfs_end_data_update(dir); if (error != 0) goto out_err; diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index edc95514046d..df80477c5af4 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -299,7 +299,7 @@ static int nfs3_proc_commit(struct nfs_write_data *cdata) */ static int nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, - int flags) + int flags, struct nameidata *nd) { struct nfs_fh fhandle; struct nfs_fattr fattr; diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 2215cdee43ae..8a3788199052 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -215,8 +215,8 @@ extern int nfs4_proc_setclientid_confirm(struct nfs4_client *); extern int nfs4_proc_async_renew(struct nfs4_client *); extern int nfs4_proc_renew(struct nfs4_client *); extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode); -extern struct inode *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *); -extern int nfs4_open_revalidate(struct inode *, struct dentry *, int); +extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *); +extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *); extern struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops; extern struct nfs4_state_recovery_ops nfs4_network_partition_recovery_ops; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 35da15342e05..c9ecb8119632 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -47,6 +47,7 @@ #include #include #include +#include #include "nfs4_fs.h" #include "delegation.h" @@ -947,12 +948,26 @@ out: return status; } -struct inode * +static void nfs4_intent_set_file(struct nameidata *nd, struct dentry *dentry, struct nfs4_state *state) +{ + struct file *filp; + + filp = lookup_instantiate_filp(nd, dentry, NULL); + if (!IS_ERR(filp)) { + struct nfs_open_context *ctx; + ctx = (struct nfs_open_context *)filp->private_data; + ctx->state = state; + } else + nfs4_close_state(state, nd->intent.open.flags); +} + +struct dentry * nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { struct iattr attr; struct rpc_cred *cred; struct nfs4_state *state; + struct dentry *res; if (nd->flags & LOOKUP_CREATE) { attr.ia_mode = nd->intent.open.create_mode; @@ -966,16 +981,23 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0); if (IS_ERR(cred)) - return (struct inode *)cred; + return (struct dentry *)cred; state = nfs4_do_open(dir, dentry, nd->intent.open.flags, &attr, cred); put_rpccred(cred); - if (IS_ERR(state)) - return (struct inode *)state; - return state->inode; + if (IS_ERR(state)) { + if (PTR_ERR(state) == -ENOENT) + d_add(dentry, NULL); + return (struct dentry *)state; + } + res = d_add_unique(dentry, state->inode); + if (res != NULL) + dentry = res; + nfs4_intent_set_file(nd, dentry, state); + return res; } int -nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags) +nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, struct nameidata *nd) { struct rpc_cred *cred; struct nfs4_state *state; @@ -988,18 +1010,30 @@ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags) if (IS_ERR(state)) state = nfs4_do_open(dir, dentry, openflags, NULL, cred); put_rpccred(cred); - if (state == ERR_PTR(-ENOENT) && dentry->d_inode == 0) - return 1; - if (IS_ERR(state)) - return 0; + if (IS_ERR(state)) { + switch (PTR_ERR(state)) { + case -EPERM: + case -EACCES: + case -EDQUOT: + case -ENOSPC: + case -EROFS: + lookup_instantiate_filp(nd, (struct dentry *)state, NULL); + return 1; + case -ENOENT: + if (dentry->d_inode == NULL) + return 1; + } + goto out_drop; + } inode = state->inode; + iput(inode); if (inode == dentry->d_inode) { - iput(inode); + nfs4_intent_set_file(nd, dentry, state); return 1; } - d_drop(dentry); nfs4_close_state(state, openflags); - iput(inode); +out_drop: + d_drop(dentry); return 0; } @@ -1500,7 +1534,7 @@ static int nfs4_proc_commit(struct nfs_write_data *cdata) static int nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, - int flags) + int flags, struct nameidata *nd) { struct nfs4_state *state; struct rpc_cred *cred; @@ -1522,13 +1556,13 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, struct nfs_fattr fattr; status = nfs4_do_setattr(NFS_SERVER(dir), &fattr, NFS_FH(state->inode), sattr, state); - if (status == 0) { + if (status == 0) nfs_setattr_update_inode(state->inode, sattr); - goto out; - } - } else if (flags != 0) - goto out; - nfs4_close_state(state, flags); + } + if (status == 0 && nd != NULL && (nd->flags & LOOKUP_OPEN)) + nfs4_intent_set_file(nd, dentry, state); + else + nfs4_close_state(state, flags); out: return status; } @@ -2175,65 +2209,6 @@ nfs4_proc_renew(struct nfs4_client *clp) return 0; } -/* - * We will need to arrange for the VFS layer to provide an atomic open. - * Until then, this open method is prone to inefficiency and race conditions - * due to the lookup, potential create, and open VFS calls from sys_open() - * placed on the wire. - */ -static int -nfs4_proc_file_open(struct inode *inode, struct file *filp) -{ - struct dentry *dentry = filp->f_dentry; - struct nfs_open_context *ctx; - struct nfs4_state *state = NULL; - struct rpc_cred *cred; - int status = -ENOMEM; - - dprintk("nfs4_proc_file_open: starting on (%.*s/%.*s)\n", - (int)dentry->d_parent->d_name.len, - dentry->d_parent->d_name.name, - (int)dentry->d_name.len, dentry->d_name.name); - - - /* Find our open stateid */ - cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0); - if (IS_ERR(cred)) - return PTR_ERR(cred); - ctx = alloc_nfs_open_context(dentry, cred); - put_rpccred(cred); - if (unlikely(ctx == NULL)) - return -ENOMEM; - status = -EIO; /* ERACE actually */ - state = nfs4_find_state(inode, cred, filp->f_mode); - if (unlikely(state == NULL)) - goto no_state; - ctx->state = state; - nfs4_close_state(state, filp->f_mode); - ctx->mode = filp->f_mode; - nfs_file_set_open_context(filp, ctx); - put_nfs_open_context(ctx); - if (filp->f_mode & FMODE_WRITE) - nfs_begin_data_update(inode); - return 0; -no_state: - printk(KERN_WARNING "NFS: v4 raced in function %s\n", __FUNCTION__); - put_nfs_open_context(ctx); - return status; -} - -/* - * Release our state - */ -static int -nfs4_proc_file_release(struct inode *inode, struct file *filp) -{ - if (filp->f_mode & FMODE_WRITE) - nfs_end_data_update(inode); - nfs_file_clear_open_context(filp); - return 0; -} - static inline int nfs4_server_supports_acls(struct nfs_server *server) { return (server->caps & NFS_CAP_ACLS) @@ -3145,8 +3120,8 @@ struct nfs_rpc_ops nfs_v4_clientops = { .read_setup = nfs4_proc_read_setup, .write_setup = nfs4_proc_write_setup, .commit_setup = nfs4_proc_commit_setup, - .file_open = nfs4_proc_file_open, - .file_release = nfs4_proc_file_release, + .file_open = nfs_open, + .file_release = nfs_release, .lock = nfs4_proc_lock, .clear_acl_cache = nfs4_zap_acl_attr, }; diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index be23c3fb9260..8fef86523d7f 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -216,7 +216,7 @@ static int nfs_proc_write(struct nfs_write_data *wdata) static int nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, - int flags) + int flags, struct nameidata *nd) { struct nfs_fh fhandle; struct nfs_fattr fattr; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 57efcc27f20b..60086dac11d5 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -712,7 +712,7 @@ struct nfs_rpc_ops { int (*write) (struct nfs_write_data *); int (*commit) (struct nfs_write_data *); int (*create) (struct inode *, struct dentry *, - struct iattr *, int); + struct iattr *, int, struct nameidata *); int (*remove) (struct inode *, struct qstr *); int (*unlink_setup) (struct rpc_message *, struct dentry *, struct qstr *); -- cgit v1.2.3 From 293f1eb551a77fe5c8956a559a3c0baea95cd9bc Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 13 Oct 2005 16:54:37 -0400 Subject: SUNRPC: Add support for privacy to generic gss-api code. Add support for privacy to generic gss-api code. This is dead code until we have both a mechanism that supports privacy and code in the client or server that uses it. Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- include/linux/sunrpc/gss_api.h | 22 ++++++++++++++++++++++ net/sunrpc/auth_gss/gss_mech_switch.c | 22 ++++++++++++++++++++++ 2 files changed, 44 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/gss_api.h b/include/linux/sunrpc/gss_api.h index 689262f63059..e896752ffbf9 100644 --- a/include/linux/sunrpc/gss_api.h +++ b/include/linux/sunrpc/gss_api.h @@ -48,6 +48,17 @@ u32 gss_verify_mic( struct xdr_buf *message, struct xdr_netobj *mic_token, u32 *qstate); +u32 gss_wrap( + struct gss_ctx *ctx_id, + u32 qop, + int offset, + struct xdr_buf *outbuf, + struct page **inpages); +u32 gss_unwrap( + struct gss_ctx *ctx_id, + u32 *qop, + int offset, + struct xdr_buf *inbuf); u32 gss_delete_sec_context( struct gss_ctx **ctx_id); @@ -93,6 +104,17 @@ struct gss_api_ops { struct xdr_buf *message, struct xdr_netobj *mic_token, u32 *qstate); + u32 (*gss_wrap)( + struct gss_ctx *ctx_id, + u32 qop, + int offset, + struct xdr_buf *outbuf, + struct page **inpages); + u32 (*gss_unwrap)( + struct gss_ctx *ctx_id, + u32 *qop, + int offset, + struct xdr_buf *buf); void (*gss_delete_sec_context)( void *internal_ctx_id); }; diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c index 58aeaddd8c79..06d97cb3481a 100644 --- a/net/sunrpc/auth_gss/gss_mech_switch.c +++ b/net/sunrpc/auth_gss/gss_mech_switch.c @@ -276,6 +276,28 @@ gss_verify_mic(struct gss_ctx *context_handle, qstate); } +u32 +gss_wrap(struct gss_ctx *ctx_id, + u32 qop, + int offset, + struct xdr_buf *buf, + struct page **inpages) +{ + return ctx_id->mech_type->gm_ops + ->gss_wrap(ctx_id, qop, offset, buf, inpages); +} + +u32 +gss_unwrap(struct gss_ctx *ctx_id, + u32 *qop, + int offset, + struct xdr_buf *buf) +{ + return ctx_id->mech_type->gm_ops + ->gss_unwrap(ctx_id, qop, offset, buf); +} + + /* gss_delete_sec_context: free all resources associated with context_handle. * Note this differs from the RFC 2744-specified prototype in that we don't * bother returning an output token, since it would never be used anyway. */ -- cgit v1.2.3 From ead5e1c26fdcd969cf40c49cb0589d56879d240d Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 13 Oct 2005 16:54:43 -0400 Subject: SUNRPC: Provide a callback to allow free pages allocated during xdr encoding For privacy, we need to allocate pages to store the encrypted data (passed in pages can't be used without the risk of corrupting data in the page cache). So we need a way to free that memory after the request has been transmitted. Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 5 ++++- net/sunrpc/xprt.c | 3 +++ 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 068e1fb0868b..3b8b6e823c70 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -73,7 +73,10 @@ struct rpc_rqst { int rq_cong; /* has incremented xprt->cong */ int rq_received; /* receive completed */ u32 rq_seqno; /* gss seq no. used on req. */ - + int rq_enc_pages_num; + struct page **rq_enc_pages; /* scratch pages for use by + gss privacy code */ + void (*rq_release_snd_buf)(struct rpc_rqst *); /* release rq_enc_pages */ struct list_head rq_list; struct xdr_buf rq_private_buf; /* The receive buffer diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 1ba55dc38b7a..6dda3860351f 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -839,6 +839,7 @@ static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt) req->rq_task = task; req->rq_xprt = xprt; req->rq_xid = xprt_alloc_xid(xprt); + req->rq_release_snd_buf = NULL; dprintk("RPC: %4d reserved req %p xid %08x\n", task->tk_pid, req, ntohl(req->rq_xid)); } @@ -867,6 +868,8 @@ void xprt_release(struct rpc_task *task) xprt->last_used + xprt->idle_timeout); spin_unlock_bh(&xprt->transport_lock); task->tk_rqstp = NULL; + if (req->rq_release_snd_buf) + req->rq_release_snd_buf(req); memset(req, 0, sizeof(*req)); /* mark unused */ dprintk("RPC: %4d release request %p\n", task->tk_pid, req); -- cgit v1.2.3 From 24b2605becc10ca63c4c30808fa59a8abbf68727 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 13 Oct 2005 16:54:53 -0400 Subject: RPCSEC_GSS: cleanup au_rslack calculation Various xdr encode routines use au_rslack to guess where the reply argument will end up, so we can set up the xdr_buf to recieve data into the right place for zero copy. Currently we calculate the au_rslack estimate when we check the verifier. Normally this only depends on the verifier size. In the integrity case we add a few bytes to allow for a length and sequence number. It's a bit simpler to calculate only the verifier size when we check the verifier, and delay the full calculation till we unwrap. Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- include/linux/sunrpc/auth.h | 7 ++++++- net/sunrpc/auth_gss/auth_gss.c | 20 ++++++-------------- 2 files changed, 12 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index 04ebc24db348..b68c11a2d6dd 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -66,7 +66,12 @@ struct rpc_cred_cache { struct rpc_auth { unsigned int au_cslack; /* call cred size estimate */ - unsigned int au_rslack; /* reply verf size guess */ + /* guess at number of u32's auth adds before + * reply data; normally the verifier size: */ + unsigned int au_rslack; + /* for gss, used to calculate au_rslack: */ + unsigned int au_verfsize; + unsigned int au_flags; /* various flags */ struct rpc_authops * au_ops; /* operations */ rpc_authflavor_t au_flavor; /* pseudoflavor (note may diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index d2b08f16c257..dc95b797ca65 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -886,8 +886,6 @@ static u32 * gss_validate(struct rpc_task *task, u32 *p) { struct rpc_cred *cred = task->tk_msg.rpc_cred; - struct gss_cred *gss_cred = container_of(cred, struct gss_cred, - gc_base); struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); u32 seq, qop_state; struct kvec iov; @@ -915,18 +913,9 @@ gss_validate(struct rpc_task *task, u32 *p) cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; if (maj_stat) goto out_bad; - switch (gss_cred->gc_service) { - case RPC_GSS_SVC_NONE: - /* verifier data, flavor, length: */ - task->tk_auth->au_rslack = XDR_QUADLEN(len) + 2; - break; - case RPC_GSS_SVC_INTEGRITY: - /* verifier data, flavor, length, length, sequence number: */ - task->tk_auth->au_rslack = XDR_QUADLEN(len) + 4; - break; - case RPC_GSS_SVC_PRIVACY: - goto out_bad; - } + /* We leave it to unwrap to calculate au_rslack. For now we just + * calculate the length of the verifier: */ + task->tk_auth->au_verfsize = XDR_QUADLEN(len) + 2; gss_put_ctx(ctx); dprintk("RPC: %4u GSS gss_validate: gss_verify_mic succeeded.\n", task->tk_pid); @@ -1067,6 +1056,7 @@ gss_unwrap_resp(struct rpc_task *task, struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); + u32 *savedp = p; int status = -EIO; if (ctx->gc_proc != RPC_GSS_PROC_DATA) @@ -1082,6 +1072,8 @@ gss_unwrap_resp(struct rpc_task *task, case RPC_GSS_SVC_PRIVACY: break; } + /* take into account extra slack for integrity and privacy cases: */ + task->tk_auth->au_rslack = task->tk_auth->au_verfsize + (p - savedp); out_decode: status = decode(rqstp, p, obj); out: -- cgit v1.2.3 From bfa91516b57483fc9c81d8d90325fd2c3c16ac48 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 13 Oct 2005 16:55:08 -0400 Subject: RPCSEC_GSS: krb5 pre-privacy cleanup The code this was originally derived from processed wrap and mic tokens using the same functions. This required some contortions, and more would be required with the addition of xdr_buf's, so it's better to separate out the two code paths. In preparation for adding privacy support, remove the last vestiges of the old wrap token code. Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- include/linux/sunrpc/gss_krb5.h | 5 ++--- net/sunrpc/auth_gss/gss_krb5_mech.c | 5 ++--- net/sunrpc/auth_gss/gss_krb5_seal.c | 38 ++++++----------------------------- net/sunrpc/auth_gss/gss_krb5_unseal.c | 30 ++++++--------------------- 4 files changed, 16 insertions(+), 62 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index ffe31d2eb9ec..cb35833e2ae3 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -121,13 +121,12 @@ make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body, u32 krb5_make_token(struct krb5_ctx *context_handle, int qop_req, struct xdr_buf *input_message_buffer, - struct xdr_netobj *output_message_buffer, int toktype); + struct xdr_netobj *output_message_buffer); u32 krb5_read_token(struct krb5_ctx *context_handle, struct xdr_netobj *input_token_buffer, - struct xdr_buf *message_buffer, - int *qop_state, int toktype); + struct xdr_buf *message_buffer, int *qop_state); u32 krb5_encrypt(struct crypto_tfm * key, diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index 462c5b86b073..8b9066fdfda5 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -199,8 +199,7 @@ gss_verify_mic_kerberos(struct gss_ctx *ctx, int qop_state; struct krb5_ctx *kctx = ctx->internal_ctx_id; - maj_stat = krb5_read_token(kctx, mic_token, message, &qop_state, - KG_TOK_MIC_MSG); + maj_stat = krb5_read_token(kctx, mic_token, message, &qop_state); if (!maj_stat && qop_state) *qstate = qop_state; @@ -216,7 +215,7 @@ gss_get_mic_kerberos(struct gss_ctx *ctx, u32 err = 0; struct krb5_ctx *kctx = ctx->internal_ctx_id; - err = krb5_make_token(kctx, qop, message, mic_token, KG_TOK_MIC_MSG); + err = krb5_make_token(kctx, qop, message, mic_token); dprintk("RPC: gss_get_mic_kerberos returning %d\n",err); diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c index afeeb8715a77..2511834e6e52 100644 --- a/net/sunrpc/auth_gss/gss_krb5_seal.c +++ b/net/sunrpc/auth_gss/gss_krb5_seal.c @@ -70,22 +70,12 @@ # define RPCDBG_FACILITY RPCDBG_AUTH #endif -static inline int -gss_krb5_padding(int blocksize, int length) { - /* Most of the code is block-size independent but in practice we - * use only 8: */ - BUG_ON(blocksize != 8); - return 8 - (length & 7); -} - u32 krb5_make_token(struct krb5_ctx *ctx, int qop_req, - struct xdr_buf *text, struct xdr_netobj *token, - int toktype) + struct xdr_buf *text, struct xdr_netobj *token) { s32 checksum_type; struct xdr_netobj md5cksum = {.len = 0, .data = NULL}; - int blocksize = 0, tmsglen; unsigned char *ptr, *krb5_hdr, *msg_start; s32 now; @@ -111,21 +101,13 @@ krb5_make_token(struct krb5_ctx *ctx, int qop_req, goto out_err; } - if (toktype == KG_TOK_WRAP_MSG) { - blocksize = crypto_tfm_alg_blocksize(ctx->enc); - tmsglen = blocksize + text->len - + gss_krb5_padding(blocksize, blocksize + text->len); - } else { - tmsglen = 0; - } - - token->len = g_token_size(&ctx->mech_used, 22 + tmsglen); + token->len = g_token_size(&ctx->mech_used, 22); ptr = token->data; - g_make_token_header(&ctx->mech_used, 22 + tmsglen, &ptr); + g_make_token_header(&ctx->mech_used, 22, &ptr); - *ptr++ = (unsigned char) ((toktype>>8)&0xff); - *ptr++ = (unsigned char) (toktype&0xff); + *ptr++ = (unsigned char) ((KG_TOK_MIC_MSG>>8)&0xff); + *ptr++ = (unsigned char) (KG_TOK_MIC_MSG&0xff); /* ptr now at byte 2 of header described in rfc 1964, section 1.2.1: */ krb5_hdr = ptr - 2; @@ -133,17 +115,9 @@ krb5_make_token(struct krb5_ctx *ctx, int qop_req, *(u16 *)(krb5_hdr + 2) = htons(ctx->signalg); memset(krb5_hdr + 4, 0xff, 4); - if (toktype == KG_TOK_WRAP_MSG) - *(u16 *)(krb5_hdr + 4) = htons(ctx->sealalg); - if (toktype == KG_TOK_WRAP_MSG) { - /* XXX removing support for now */ + if (make_checksum(checksum_type, krb5_hdr, 8, text, &md5cksum)) goto out_err; - } else { /* Sign only. */ - if (make_checksum(checksum_type, krb5_hdr, 8, text, - &md5cksum)) - goto out_err; - } switch (ctx->signalg) { case SGN_ALG_DES_MAC_MD5: diff --git a/net/sunrpc/auth_gss/gss_krb5_unseal.c b/net/sunrpc/auth_gss/gss_krb5_unseal.c index 8767fc53183d..19eba3df6607 100644 --- a/net/sunrpc/auth_gss/gss_krb5_unseal.c +++ b/net/sunrpc/auth_gss/gss_krb5_unseal.c @@ -68,20 +68,13 @@ #endif -/* message_buffer is an input if toktype is MIC and an output if it is WRAP: - * If toktype is MIC: read_token is a mic token, and message_buffer is the - * data that the mic was supposedly taken over. - * If toktype is WRAP: read_token is a wrap token, and message_buffer is used - * to return the decrypted data. - */ +/* read_token is a mic token, and message_buffer is the data that the mic was + * supposedly taken over. */ -/* XXX will need to change prototype and/or just split into a separate function - * when we add privacy (because read_token will be in pages too). */ u32 krb5_read_token(struct krb5_ctx *ctx, struct xdr_netobj *read_token, - struct xdr_buf *message_buffer, - int *qop_state, int toktype) + struct xdr_buf *message_buffer, int *qop_state) { int signalg; int sealalg; @@ -100,16 +93,12 @@ krb5_read_token(struct krb5_ctx *ctx, read_token->len)) goto out; - if ((*ptr++ != ((toktype>>8)&0xff)) || (*ptr++ != (toktype&0xff))) + if ((*ptr++ != ((KG_TOK_MIC_MSG>>8)&0xff)) || + (*ptr++ != ( KG_TOK_MIC_MSG &0xff)) ) goto out; /* XXX sanity-check bodysize?? */ - if (toktype == KG_TOK_WRAP_MSG) { - /* XXX gone */ - goto out; - } - /* get the sign and seal algorithms */ signalg = ptr[0] + (ptr[1] << 8); @@ -120,14 +109,7 @@ krb5_read_token(struct krb5_ctx *ctx, if ((ptr[4] != 0xff) || (ptr[5] != 0xff)) goto out; - if (((toktype != KG_TOK_WRAP_MSG) && (sealalg != 0xffff)) || - ((toktype == KG_TOK_WRAP_MSG) && (sealalg == 0xffff))) - goto out; - - /* in the current spec, there is only one valid seal algorithm per - key type, so a simple comparison is ok */ - - if ((toktype == KG_TOK_WRAP_MSG) && !(sealalg == ctx->sealalg)) + if (sealalg != 0xffff) goto out; /* there are several mappings of seal algorithms to sign algorithms, -- cgit v1.2.3 From 14ae162c24d985593d5b19437d7f3d8fd0062b59 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 13 Oct 2005 16:55:13 -0400 Subject: RPCSEC_GSS: Add support for privacy to krb5 rpcsec_gss mechanism. Add support for privacy to the krb5 rpcsec_gss mechanism. Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- include/linux/sunrpc/gss_krb5.h | 18 +- net/sunrpc/auth_gss/Makefile | 2 +- net/sunrpc/auth_gss/gss_krb5_crypto.c | 156 +++++++++++++- net/sunrpc/auth_gss/gss_krb5_mech.c | 7 + net/sunrpc/auth_gss/gss_krb5_seal.c | 4 +- net/sunrpc/auth_gss/gss_krb5_unseal.c | 2 +- net/sunrpc/auth_gss/gss_krb5_wrap.c | 370 ++++++++++++++++++++++++++++++++++ 7 files changed, 552 insertions(+), 7 deletions(-) create mode 100644 net/sunrpc/auth_gss/gss_krb5_wrap.c (limited to 'include/linux') diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index cb35833e2ae3..7f93c2d5ebdb 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -116,7 +116,7 @@ enum seal_alg { s32 make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body, - struct xdr_netobj *cksum); + int body_offset, struct xdr_netobj *cksum); u32 krb5_make_token(struct krb5_ctx *context_handle, int qop_req, @@ -128,6 +128,15 @@ krb5_read_token(struct krb5_ctx *context_handle, struct xdr_netobj *input_token_buffer, struct xdr_buf *message_buffer, int *qop_state); +u32 +gss_wrap_kerberos(struct gss_ctx *ctx_id, u32 qop, int offset, + struct xdr_buf *outbuf, struct page **pages); + +u32 +gss_unwrap_kerberos(struct gss_ctx *ctx_id, u32 *qop, int offset, + struct xdr_buf *buf); + + u32 krb5_encrypt(struct crypto_tfm * key, void *iv, void *in, void *out, int length); @@ -136,6 +145,13 @@ u32 krb5_decrypt(struct crypto_tfm * key, void *iv, void *in, void *out, int length); +int +gss_encrypt_xdr_buf(struct crypto_tfm *tfm, struct xdr_buf *outbuf, int offset, + struct page **pages); + +int +gss_decrypt_xdr_buf(struct crypto_tfm *tfm, struct xdr_buf *inbuf, int offset); + s32 krb5_make_seq_num(struct crypto_tfm * key, int direction, diff --git a/net/sunrpc/auth_gss/Makefile b/net/sunrpc/auth_gss/Makefile index fe1b874084bc..f3431a7e33da 100644 --- a/net/sunrpc/auth_gss/Makefile +++ b/net/sunrpc/auth_gss/Makefile @@ -10,7 +10,7 @@ auth_rpcgss-objs := auth_gss.o gss_generic_token.o \ obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o rpcsec_gss_krb5-objs := gss_krb5_mech.o gss_krb5_seal.o gss_krb5_unseal.o \ - gss_krb5_seqnum.o + gss_krb5_seqnum.o gss_krb5_wrap.o obj-$(CONFIG_RPCSEC_GSS_SPKM3) += rpcsec_gss_spkm3.o diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c index 2baf93f8b8f5..3f3d5437f02d 100644 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c @@ -218,7 +218,7 @@ checksummer(struct scatterlist *sg, void *data) /* checksum the plaintext data and hdrlen bytes of the token header */ s32 make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body, - struct xdr_netobj *cksum) + int body_offset, struct xdr_netobj *cksum) { char *cksumname; struct crypto_tfm *tfm = NULL; /* XXX add to ctx? */ @@ -243,7 +243,8 @@ make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body, crypto_digest_init(tfm); buf_to_sg(sg, header, hdrlen); crypto_digest_update(tfm, sg, 1); - process_xdr_buf(body, 0, body->len, checksummer, tfm); + process_xdr_buf(body, body_offset, body->len - body_offset, + checksummer, tfm); crypto_digest_final(tfm, cksum->data); code = 0; out: @@ -252,3 +253,154 @@ out: } EXPORT_SYMBOL(make_checksum); + +struct encryptor_desc { + u8 iv[8]; /* XXX hard-coded blocksize */ + struct crypto_tfm *tfm; + int pos; + struct xdr_buf *outbuf; + struct page **pages; + struct scatterlist infrags[4]; + struct scatterlist outfrags[4]; + int fragno; + int fraglen; +}; + +static int +encryptor(struct scatterlist *sg, void *data) +{ + struct encryptor_desc *desc = data; + struct xdr_buf *outbuf = desc->outbuf; + struct page *in_page; + int thislen = desc->fraglen + sg->length; + int fraglen, ret; + int page_pos; + + /* Worst case is 4 fragments: head, end of page 1, start + * of page 2, tail. Anything more is a bug. */ + BUG_ON(desc->fragno > 3); + desc->infrags[desc->fragno] = *sg; + desc->outfrags[desc->fragno] = *sg; + + page_pos = desc->pos - outbuf->head[0].iov_len; + if (page_pos >= 0 && page_pos < outbuf->page_len) { + /* pages are not in place: */ + int i = (page_pos + outbuf->page_base) >> PAGE_CACHE_SHIFT; + in_page = desc->pages[i]; + } else { + in_page = sg->page; + } + desc->infrags[desc->fragno].page = in_page; + desc->fragno++; + desc->fraglen += sg->length; + desc->pos += sg->length; + + fraglen = thislen & 7; /* XXX hardcoded blocksize */ + thislen -= fraglen; + + if (thislen == 0) + return 0; + + ret = crypto_cipher_encrypt_iv(desc->tfm, desc->outfrags, desc->infrags, + thislen, desc->iv); + if (ret) + return ret; + if (fraglen) { + desc->outfrags[0].page = sg->page; + desc->outfrags[0].offset = sg->offset + sg->length - fraglen; + desc->outfrags[0].length = fraglen; + desc->infrags[0] = desc->outfrags[0]; + desc->infrags[0].page = in_page; + desc->fragno = 1; + desc->fraglen = fraglen; + } else { + desc->fragno = 0; + desc->fraglen = 0; + } + return 0; +} + +int +gss_encrypt_xdr_buf(struct crypto_tfm *tfm, struct xdr_buf *buf, int offset, + struct page **pages) +{ + int ret; + struct encryptor_desc desc; + + BUG_ON((buf->len - offset) % crypto_tfm_alg_blocksize(tfm) != 0); + + memset(desc.iv, 0, sizeof(desc.iv)); + desc.tfm = tfm; + desc.pos = offset; + desc.outbuf = buf; + desc.pages = pages; + desc.fragno = 0; + desc.fraglen = 0; + + ret = process_xdr_buf(buf, offset, buf->len - offset, encryptor, &desc); + return ret; +} + +EXPORT_SYMBOL(gss_encrypt_xdr_buf); + +struct decryptor_desc { + u8 iv[8]; /* XXX hard-coded blocksize */ + struct crypto_tfm *tfm; + struct scatterlist frags[4]; + int fragno; + int fraglen; +}; + +static int +decryptor(struct scatterlist *sg, void *data) +{ + struct decryptor_desc *desc = data; + int thislen = desc->fraglen + sg->length; + int fraglen, ret; + + /* Worst case is 4 fragments: head, end of page 1, start + * of page 2, tail. Anything more is a bug. */ + BUG_ON(desc->fragno > 3); + desc->frags[desc->fragno] = *sg; + desc->fragno++; + desc->fraglen += sg->length; + + fraglen = thislen & 7; /* XXX hardcoded blocksize */ + thislen -= fraglen; + + if (thislen == 0) + return 0; + + ret = crypto_cipher_decrypt_iv(desc->tfm, desc->frags, desc->frags, + thislen, desc->iv); + if (ret) + return ret; + if (fraglen) { + desc->frags[0].page = sg->page; + desc->frags[0].offset = sg->offset + sg->length - fraglen; + desc->frags[0].length = fraglen; + desc->fragno = 1; + desc->fraglen = fraglen; + } else { + desc->fragno = 0; + desc->fraglen = 0; + } + return 0; +} + +int +gss_decrypt_xdr_buf(struct crypto_tfm *tfm, struct xdr_buf *buf, int offset) +{ + struct decryptor_desc desc; + + /* XXXJBF: */ + BUG_ON((buf->len - offset) % crypto_tfm_alg_blocksize(tfm) != 0); + + memset(desc.iv, 0, sizeof(desc.iv)); + desc.tfm = tfm; + desc.fragno = 0; + desc.fraglen = 0; + return process_xdr_buf(buf, offset, buf->len - offset, decryptor, &desc); +} + +EXPORT_SYMBOL(gss_decrypt_xdr_buf); diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index 8b9066fdfda5..37a9ad97ccd4 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -226,6 +226,8 @@ static struct gss_api_ops gss_kerberos_ops = { .gss_import_sec_context = gss_import_sec_context_kerberos, .gss_get_mic = gss_get_mic_kerberos, .gss_verify_mic = gss_verify_mic_kerberos, + .gss_wrap = gss_wrap_kerberos, + .gss_unwrap = gss_unwrap_kerberos, .gss_delete_sec_context = gss_delete_sec_context_kerberos, }; @@ -240,6 +242,11 @@ static struct pf_desc gss_kerberos_pfs[] = { .service = RPC_GSS_SVC_INTEGRITY, .name = "krb5i", }, + [2] = { + .pseudoflavor = RPC_AUTH_GSS_KRB5P, + .service = RPC_GSS_SVC_PRIVACY, + .name = "krb5p", + }, }; static struct gss_api_mech gss_kerberos_mech = { diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c index 2511834e6e52..fb852d9ab06f 100644 --- a/net/sunrpc/auth_gss/gss_krb5_seal.c +++ b/net/sunrpc/auth_gss/gss_krb5_seal.c @@ -116,8 +116,8 @@ krb5_make_token(struct krb5_ctx *ctx, int qop_req, *(u16 *)(krb5_hdr + 2) = htons(ctx->signalg); memset(krb5_hdr + 4, 0xff, 4); - if (make_checksum(checksum_type, krb5_hdr, 8, text, &md5cksum)) - goto out_err; + if (make_checksum(checksum_type, krb5_hdr, 8, text, 0, &md5cksum)) + goto out_err; switch (ctx->signalg) { case SGN_ALG_DES_MAC_MD5: diff --git a/net/sunrpc/auth_gss/gss_krb5_unseal.c b/net/sunrpc/auth_gss/gss_krb5_unseal.c index 19eba3df6607..c3d6d1bc100c 100644 --- a/net/sunrpc/auth_gss/gss_krb5_unseal.c +++ b/net/sunrpc/auth_gss/gss_krb5_unseal.c @@ -136,7 +136,7 @@ krb5_read_token(struct krb5_ctx *ctx, switch (signalg) { case SGN_ALG_DES_MAC_MD5: ret = make_checksum(checksum_type, ptr - 2, 8, - message_buffer, &md5cksum); + message_buffer, 0, &md5cksum); if (ret) goto out; diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c new file mode 100644 index 000000000000..ddcde6e42b23 --- /dev/null +++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c @@ -0,0 +1,370 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef RPC_DEBUG +# define RPCDBG_FACILITY RPCDBG_AUTH +#endif + +static inline int +gss_krb5_padding(int blocksize, int length) +{ + /* Most of the code is block-size independent but currently we + * use only 8: */ + BUG_ON(blocksize != 8); + return 8 - (length & 7); +} + +static inline void +gss_krb5_add_padding(struct xdr_buf *buf, int offset, int blocksize) +{ + int padding = gss_krb5_padding(blocksize, buf->len - offset); + char *p; + struct kvec *iov; + + if (buf->page_len || buf->tail[0].iov_len) + iov = &buf->tail[0]; + else + iov = &buf->head[0]; + p = iov->iov_base + iov->iov_len; + iov->iov_len += padding; + buf->len += padding; + memset(p, padding, padding); +} + +static inline int +gss_krb5_remove_padding(struct xdr_buf *buf, int blocksize) +{ + u8 *ptr; + u8 pad; + int len = buf->len; + + if (len <= buf->head[0].iov_len) { + pad = *(u8 *)(buf->head[0].iov_base + len - 1); + if (pad > buf->head[0].iov_len) + return -EINVAL; + buf->head[0].iov_len -= pad; + goto out; + } else + len -= buf->head[0].iov_len; + if (len <= buf->page_len) { + int last = (buf->page_base + len - 1) + >>PAGE_CACHE_SHIFT; + int offset = (buf->page_base + len - 1) + & (PAGE_CACHE_SIZE - 1); + ptr = kmap_atomic(buf->pages[last], KM_SKB_SUNRPC_DATA); + pad = *(ptr + offset); + kunmap_atomic(ptr, KM_SKB_SUNRPC_DATA); + goto out; + } else + len -= buf->page_len; + BUG_ON(len > buf->tail[0].iov_len); + pad = *(u8 *)(buf->tail[0].iov_base + len - 1); +out: + /* XXX: NOTE: we do not adjust the page lengths--they represent + * a range of data in the real filesystem page cache, and we need + * to know that range so the xdr code can properly place read data. + * However adjusting the head length, as we do above, is harmless. + * In the case of a request that fits into a single page, the server + * also uses length and head length together to determine the original + * start of the request to copy the request for deferal; so it's + * easier on the server if we adjust head and tail length in tandem. + * It's not really a problem that we don't fool with the page and + * tail lengths, though--at worst badly formed xdr might lead the + * server to attempt to parse the padding. + * XXX: Document all these weird requirements for gss mechanism + * wrap/unwrap functions. */ + if (pad > blocksize) + return -EINVAL; + if (buf->len > pad) + buf->len -= pad; + else + return -EINVAL; + return 0; +} + +static inline void +make_confounder(char *p, int blocksize) +{ + static u64 i = 0; + u64 *q = (u64 *)p; + + /* rfc1964 claims this should be "random". But all that's really + * necessary is that it be unique. And not even that is necessary in + * our case since our "gssapi" implementation exists only to support + * rpcsec_gss, so we know that the only buffers we will ever encrypt + * already begin with a unique sequence number. Just to hedge my bets + * I'll make a half-hearted attempt at something unique, but ensuring + * uniqueness would mean worrying about atomicity and rollover, and I + * don't care enough. */ + + BUG_ON(blocksize != 8); + *q = i++; +} + +/* Assumptions: the head and tail of inbuf are ours to play with. + * The pages, however, may be real pages in the page cache and we replace + * them with scratch pages from **pages before writing to them. */ +/* XXX: obviously the above should be documentation of wrap interface, + * and shouldn't be in this kerberos-specific file. */ + +/* XXX factor out common code with seal/unseal. */ + +u32 +gss_wrap_kerberos(struct gss_ctx *ctx, u32 qop, int offset, + struct xdr_buf *buf, struct page **pages) +{ + struct krb5_ctx *kctx = ctx->internal_ctx_id; + s32 checksum_type; + struct xdr_netobj md5cksum = {.len = 0, .data = NULL}; + int blocksize = 0, plainlen; + unsigned char *ptr, *krb5_hdr, *msg_start; + s32 now; + int headlen; + struct page **tmp_pages; + + dprintk("RPC: gss_wrap_kerberos\n"); + + now = get_seconds(); + + if (qop != 0) + goto out_err; + + switch (kctx->signalg) { + case SGN_ALG_DES_MAC_MD5: + checksum_type = CKSUMTYPE_RSA_MD5; + break; + default: + dprintk("RPC: gss_krb5_seal: kctx->signalg %d not" + " supported\n", kctx->signalg); + goto out_err; + } + if (kctx->sealalg != SEAL_ALG_NONE && kctx->sealalg != SEAL_ALG_DES) { + dprintk("RPC: gss_krb5_seal: kctx->sealalg %d not supported\n", + kctx->sealalg); + goto out_err; + } + + blocksize = crypto_tfm_alg_blocksize(kctx->enc); + gss_krb5_add_padding(buf, offset, blocksize); + BUG_ON((buf->len - offset) % blocksize); + plainlen = blocksize + buf->len - offset; + + headlen = g_token_size(&kctx->mech_used, 22 + plainlen) - + (buf->len - offset); + + ptr = buf->head[0].iov_base + offset; + /* shift data to make room for header. */ + /* XXX Would be cleverer to encrypt while copying. */ + /* XXX bounds checking, slack, etc. */ + memmove(ptr + headlen, ptr, buf->head[0].iov_len - offset); + buf->head[0].iov_len += headlen; + buf->len += headlen; + BUG_ON((buf->len - offset - headlen) % blocksize); + + g_make_token_header(&kctx->mech_used, 22 + plainlen, &ptr); + + + *ptr++ = (unsigned char) ((KG_TOK_WRAP_MSG>>8)&0xff); + *ptr++ = (unsigned char) (KG_TOK_WRAP_MSG&0xff); + + /* ptr now at byte 2 of header described in rfc 1964, section 1.2.1: */ + krb5_hdr = ptr - 2; + msg_start = krb5_hdr + 24; + /* XXXJBF: */ BUG_ON(buf->head[0].iov_base + offset + headlen != msg_start + blocksize); + + *(u16 *)(krb5_hdr + 2) = htons(kctx->signalg); + memset(krb5_hdr + 4, 0xff, 4); + *(u16 *)(krb5_hdr + 4) = htons(kctx->sealalg); + + make_confounder(msg_start, blocksize); + + /* XXXJBF: UGH!: */ + tmp_pages = buf->pages; + buf->pages = pages; + if (make_checksum(checksum_type, krb5_hdr, 8, buf, + offset + headlen - blocksize, &md5cksum)) + goto out_err; + buf->pages = tmp_pages; + + switch (kctx->signalg) { + case SGN_ALG_DES_MAC_MD5: + if (krb5_encrypt(kctx->seq, NULL, md5cksum.data, + md5cksum.data, md5cksum.len)) + goto out_err; + memcpy(krb5_hdr + 16, + md5cksum.data + md5cksum.len - KRB5_CKSUM_LENGTH, + KRB5_CKSUM_LENGTH); + + dprintk("RPC: make_seal_token: cksum data: \n"); + print_hexl((u32 *) (krb5_hdr + 16), KRB5_CKSUM_LENGTH, 0); + break; + default: + BUG(); + } + + kfree(md5cksum.data); + + /* XXX would probably be more efficient to compute checksum + * and encrypt at the same time: */ + if ((krb5_make_seq_num(kctx->seq, kctx->initiate ? 0 : 0xff, + kctx->seq_send, krb5_hdr + 16, krb5_hdr + 8))) + goto out_err; + + if (gss_encrypt_xdr_buf(kctx->enc, buf, offset + headlen - blocksize, + pages)) + goto out_err; + + kctx->seq_send++; + + return ((kctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE); +out_err: + if (md5cksum.data) kfree(md5cksum.data); + return GSS_S_FAILURE; +} + +u32 +gss_unwrap_kerberos(struct gss_ctx *ctx, u32 *qop, int offset, + struct xdr_buf *buf) +{ + struct krb5_ctx *kctx = ctx->internal_ctx_id; + int signalg; + int sealalg; + s32 checksum_type; + struct xdr_netobj md5cksum = {.len = 0, .data = NULL}; + s32 now; + int direction; + s32 seqnum; + unsigned char *ptr; + int bodysize; + u32 ret = GSS_S_DEFECTIVE_TOKEN; + void *data_start, *orig_start; + int data_len; + int blocksize; + + dprintk("RPC: gss_unwrap_kerberos\n"); + + ptr = (u8 *)buf->head[0].iov_base + offset; + if (g_verify_token_header(&kctx->mech_used, &bodysize, &ptr, + buf->len - offset)) + goto out; + + if ((*ptr++ != ((KG_TOK_WRAP_MSG>>8)&0xff)) || + (*ptr++ != (KG_TOK_WRAP_MSG &0xff)) ) + goto out; + + /* XXX sanity-check bodysize?? */ + + /* get the sign and seal algorithms */ + + signalg = ptr[0] + (ptr[1] << 8); + sealalg = ptr[2] + (ptr[3] << 8); + + /* Sanity checks */ + + if ((ptr[4] != 0xff) || (ptr[5] != 0xff)) + goto out; + + if (sealalg == 0xffff) + goto out; + + /* in the current spec, there is only one valid seal algorithm per + key type, so a simple comparison is ok */ + + if (sealalg != kctx->sealalg) + goto out; + + /* there are several mappings of seal algorithms to sign algorithms, + but few enough that we can try them all. */ + + if ((kctx->sealalg == SEAL_ALG_NONE && signalg > 1) || + (kctx->sealalg == SEAL_ALG_1 && signalg != SGN_ALG_3) || + (kctx->sealalg == SEAL_ALG_DES3KD && + signalg != SGN_ALG_HMAC_SHA1_DES3_KD)) + goto out; + + if (gss_decrypt_xdr_buf(kctx->enc, buf, + ptr + 22 - (unsigned char *)buf->head[0].iov_base)) + goto out; + + /* compute the checksum of the message */ + + /* initialize the the cksum */ + switch (signalg) { + case SGN_ALG_DES_MAC_MD5: + checksum_type = CKSUMTYPE_RSA_MD5; + break; + default: + ret = GSS_S_DEFECTIVE_TOKEN; + goto out; + } + + switch (signalg) { + case SGN_ALG_DES_MAC_MD5: + ret = make_checksum(checksum_type, ptr - 2, 8, buf, + ptr + 22 - (unsigned char *)buf->head[0].iov_base, &md5cksum); + if (ret) + goto out; + + ret = krb5_encrypt(kctx->seq, NULL, md5cksum.data, + md5cksum.data, md5cksum.len); + if (ret) + goto out; + + if (memcmp(md5cksum.data + 8, ptr + 14, 8)) { + ret = GSS_S_BAD_SIG; + goto out; + } + break; + default: + ret = GSS_S_DEFECTIVE_TOKEN; + goto out; + } + + /* it got through unscathed. Make sure the context is unexpired */ + + if (qop) + *qop = GSS_C_QOP_DEFAULT; + + now = get_seconds(); + + ret = GSS_S_CONTEXT_EXPIRED; + if (now > kctx->endtime) + goto out; + + /* do sequencing checks */ + + ret = GSS_S_BAD_SIG; + if ((ret = krb5_get_seq_num(kctx->seq, ptr + 14, ptr + 6, &direction, + &seqnum))) + goto out; + + if ((kctx->initiate && direction != 0xff) || + (!kctx->initiate && direction != 0)) + goto out; + + /* Copy the data back to the right position. XXX: Would probably be + * better to copy and encrypt at the same time. */ + + blocksize = crypto_tfm_alg_blocksize(kctx->enc); + data_start = ptr + 22 + blocksize; + orig_start = buf->head[0].iov_base + offset; + data_len = (buf->head[0].iov_base + buf->head[0].iov_len) - data_start; + memmove(orig_start, data_start, data_len); + buf->head[0].iov_len -= (data_start - orig_start); + buf->len -= (data_start - orig_start); + + ret = GSS_S_DEFECTIVE_TOKEN; + if (gss_krb5_remove_padding(buf, blocksize)) + goto out; + + ret = GSS_S_COMPLETE; +out: + if (md5cksum.data) kfree(md5cksum.data); + return ret; +} -- cgit v1.2.3 From 00fd6e14255fe7a249315746386d640bc4e9e758 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 13 Oct 2005 16:55:18 -0400 Subject: RPCSEC_GSS remove all qop parameters Not only are the qop parameters that are passed around throughout the gssapi unused by any currently implemented mechanism, but there appears to be some doubt as to whether they will ever be used. Let's just kill them off for now. Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- include/linux/sunrpc/gss_api.h | 13 ++----------- include/linux/sunrpc/gss_err.h | 10 ---------- include/linux/sunrpc/gss_krb5.h | 8 ++++---- include/linux/sunrpc/gss_spkm3.h | 4 ++-- net/sunrpc/auth_gss/auth_gss.c | 20 +++++++------------- net/sunrpc/auth_gss/gss_krb5_mech.c | 12 ++++-------- net/sunrpc/auth_gss/gss_krb5_seal.c | 5 +---- net/sunrpc/auth_gss/gss_krb5_unseal.c | 5 +---- net/sunrpc/auth_gss/gss_krb5_wrap.c | 11 ++--------- net/sunrpc/auth_gss/gss_mech_switch.c | 14 ++++---------- net/sunrpc/auth_gss/gss_spkm3_mech.c | 21 ++++++++------------- net/sunrpc/auth_gss/gss_spkm3_seal.c | 4 +--- net/sunrpc/auth_gss/gss_spkm3_unseal.c | 2 +- net/sunrpc/auth_gss/svcauth_gss.c | 9 ++++----- 14 files changed, 41 insertions(+), 97 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/gss_api.h b/include/linux/sunrpc/gss_api.h index e896752ffbf9..9b8bcf125c18 100644 --- a/include/linux/sunrpc/gss_api.h +++ b/include/linux/sunrpc/gss_api.h @@ -40,23 +40,19 @@ int gss_import_sec_context( struct gss_ctx **ctx_id); u32 gss_get_mic( struct gss_ctx *ctx_id, - u32 qop, struct xdr_buf *message, struct xdr_netobj *mic_token); u32 gss_verify_mic( struct gss_ctx *ctx_id, struct xdr_buf *message, - struct xdr_netobj *mic_token, - u32 *qstate); + struct xdr_netobj *mic_token); u32 gss_wrap( struct gss_ctx *ctx_id, - u32 qop, int offset, struct xdr_buf *outbuf, struct page **inpages); u32 gss_unwrap( struct gss_ctx *ctx_id, - u32 *qop, int offset, struct xdr_buf *inbuf); u32 gss_delete_sec_context( @@ -67,7 +63,6 @@ char *gss_service_to_auth_domain_name(struct gss_api_mech *, u32 service); struct pf_desc { u32 pseudoflavor; - u32 qop; u32 service; char *name; char *auth_domain_name; @@ -96,23 +91,19 @@ struct gss_api_ops { struct gss_ctx *ctx_id); u32 (*gss_get_mic)( struct gss_ctx *ctx_id, - u32 qop, struct xdr_buf *message, struct xdr_netobj *mic_token); u32 (*gss_verify_mic)( struct gss_ctx *ctx_id, struct xdr_buf *message, - struct xdr_netobj *mic_token, - u32 *qstate); + struct xdr_netobj *mic_token); u32 (*gss_wrap)( struct gss_ctx *ctx_id, - u32 qop, int offset, struct xdr_buf *outbuf, struct page **inpages); u32 (*gss_unwrap)( struct gss_ctx *ctx_id, - u32 *qop, int offset, struct xdr_buf *buf); void (*gss_delete_sec_context)( diff --git a/include/linux/sunrpc/gss_err.h b/include/linux/sunrpc/gss_err.h index 92608a2e574c..a6807867bd21 100644 --- a/include/linux/sunrpc/gss_err.h +++ b/include/linux/sunrpc/gss_err.h @@ -65,16 +65,6 @@ typedef unsigned int OM_uint32; #define GSS_C_MECH_CODE 2 -/* - * Define the default Quality of Protection for per-message services. Note - * that an implementation that offers multiple levels of QOP may either reserve - * a value (for example zero, as assumed here) to mean "default protection", or - * alternatively may simply equate GSS_C_QOP_DEFAULT to a specific explicit - * QOP value. However a value of 0 should always be interpreted by a GSSAPI - * implementation as a request for the default protection level. - */ -#define GSS_C_QOP_DEFAULT 0 - /* * Expiration time of 2^32-1 seconds means infinite lifetime for a * credential or security context diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index 7f93c2d5ebdb..a7bda4edb853 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -119,21 +119,21 @@ make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body, int body_offset, struct xdr_netobj *cksum); u32 -krb5_make_token(struct krb5_ctx *context_handle, int qop_req, +krb5_make_token(struct krb5_ctx *context_handle, struct xdr_buf *input_message_buffer, struct xdr_netobj *output_message_buffer); u32 krb5_read_token(struct krb5_ctx *context_handle, struct xdr_netobj *input_token_buffer, - struct xdr_buf *message_buffer, int *qop_state); + struct xdr_buf *message_buffer); u32 -gss_wrap_kerberos(struct gss_ctx *ctx_id, u32 qop, int offset, +gss_wrap_kerberos(struct gss_ctx *ctx_id, int offset, struct xdr_buf *outbuf, struct page **pages); u32 -gss_unwrap_kerberos(struct gss_ctx *ctx_id, u32 *qop, int offset, +gss_unwrap_kerberos(struct gss_ctx *ctx_id, int offset, struct xdr_buf *buf); diff --git a/include/linux/sunrpc/gss_spkm3.h b/include/linux/sunrpc/gss_spkm3.h index b5c9968c3c17..0beb2cf00a84 100644 --- a/include/linux/sunrpc/gss_spkm3.h +++ b/include/linux/sunrpc/gss_spkm3.h @@ -41,9 +41,9 @@ struct spkm3_ctx { #define SPKM_WRAP_TOK 5 #define SPKM_DEL_TOK 6 -u32 spkm3_make_token(struct spkm3_ctx *ctx, int qop_req, struct xdr_buf * text, struct xdr_netobj * token, int toktype); +u32 spkm3_make_token(struct spkm3_ctx *ctx, struct xdr_buf * text, struct xdr_netobj * token, int toktype); -u32 spkm3_read_token(struct spkm3_ctx *ctx, struct xdr_netobj *read_token, struct xdr_buf *message_buffer, int *qop_state, int toktype); +u32 spkm3_read_token(struct spkm3_ctx *ctx, struct xdr_netobj *read_token, struct xdr_buf *message_buffer, int toktype); #define CKSUMTYPE_RSA_MD5 0x0007 diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 5e4872058ec7..f44f46f1d8e0 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -854,9 +854,7 @@ gss_marshal(struct rpc_task *task, u32 *p) *p++ = htonl(RPC_AUTH_GSS); mic.data = (u8 *)(p + 1); - maj_stat = gss_get_mic(ctx->gc_gss_ctx, - GSS_C_QOP_DEFAULT, - &verf_buf, &mic); + maj_stat = gss_get_mic(ctx->gc_gss_ctx, &verf_buf, &mic); if (maj_stat == GSS_S_CONTEXT_EXPIRED) { cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; } else if (maj_stat != 0) { @@ -888,7 +886,7 @@ gss_validate(struct rpc_task *task, u32 *p) { struct rpc_cred *cred = task->tk_msg.rpc_cred; struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); - u32 seq, qop_state; + u32 seq; struct kvec iov; struct xdr_buf verf_buf; struct xdr_netobj mic; @@ -909,7 +907,7 @@ gss_validate(struct rpc_task *task, u32 *p) mic.data = (u8 *)p; mic.len = len; - maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &verf_buf, &mic, &qop_state); + maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &verf_buf, &mic); if (maj_stat == GSS_S_CONTEXT_EXPIRED) cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; if (maj_stat) @@ -961,8 +959,7 @@ gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx, p = iov->iov_base + iov->iov_len; mic.data = (u8 *)(p + 1); - maj_stat = gss_get_mic(ctx->gc_gss_ctx, - GSS_C_QOP_DEFAULT, &integ_buf, &mic); + maj_stat = gss_get_mic(ctx->gc_gss_ctx, &integ_buf, &mic); status = -EIO; /* XXX? */ if (maj_stat == GSS_S_CONTEXT_EXPIRED) cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; @@ -1057,8 +1054,7 @@ gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx, memcpy(tmp, snd_buf->tail[0].iov_base, snd_buf->tail[0].iov_len); snd_buf->tail[0].iov_base = tmp; } - maj_stat = gss_wrap(ctx->gc_gss_ctx, GSS_C_QOP_DEFAULT, offset, - snd_buf, inpages); + maj_stat = gss_wrap(ctx->gc_gss_ctx, offset, snd_buf, inpages); /* RPC_SLACK_SPACE should prevent this ever happening: */ BUG_ON(snd_buf->len > snd_buf->buflen); status = -EIO; @@ -1150,8 +1146,7 @@ gss_unwrap_resp_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx, if (xdr_buf_read_netobj(rcv_buf, &mic, mic_offset)) return status; - maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &integ_buf, - &mic, NULL); + maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &integ_buf, &mic); if (maj_stat == GSS_S_CONTEXT_EXPIRED) cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; if (maj_stat != GSS_S_COMPLETE) @@ -1176,8 +1171,7 @@ gss_unwrap_resp_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx, /* remove padding: */ rcv_buf->len = offset + opaque_len; - maj_stat = gss_unwrap(ctx->gc_gss_ctx, NULL, - offset, rcv_buf); + maj_stat = gss_unwrap(ctx->gc_gss_ctx, offset, rcv_buf); if (maj_stat == GSS_S_CONTEXT_EXPIRED) cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; if (maj_stat != GSS_S_COMPLETE) diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index 37a9ad97ccd4..9ffac2c50b94 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -193,15 +193,12 @@ gss_delete_sec_context_kerberos(void *internal_ctx) { static u32 gss_verify_mic_kerberos(struct gss_ctx *ctx, struct xdr_buf *message, - struct xdr_netobj *mic_token, - u32 *qstate) { + struct xdr_netobj *mic_token) +{ u32 maj_stat = 0; - int qop_state; struct krb5_ctx *kctx = ctx->internal_ctx_id; - maj_stat = krb5_read_token(kctx, mic_token, message, &qop_state); - if (!maj_stat && qop_state) - *qstate = qop_state; + maj_stat = krb5_read_token(kctx, mic_token, message); dprintk("RPC: gss_verify_mic_kerberos returning %d\n", maj_stat); return maj_stat; @@ -209,13 +206,12 @@ gss_verify_mic_kerberos(struct gss_ctx *ctx, static u32 gss_get_mic_kerberos(struct gss_ctx *ctx, - u32 qop, struct xdr_buf *message, struct xdr_netobj *mic_token) { u32 err = 0; struct krb5_ctx *kctx = ctx->internal_ctx_id; - err = krb5_make_token(kctx, qop, message, mic_token); + err = krb5_make_token(kctx, message, mic_token); dprintk("RPC: gss_get_mic_kerberos returning %d\n",err); diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c index fb852d9ab06f..15227c727c8b 100644 --- a/net/sunrpc/auth_gss/gss_krb5_seal.c +++ b/net/sunrpc/auth_gss/gss_krb5_seal.c @@ -71,7 +71,7 @@ #endif u32 -krb5_make_token(struct krb5_ctx *ctx, int qop_req, +krb5_make_token(struct krb5_ctx *ctx, struct xdr_buf *text, struct xdr_netobj *token) { s32 checksum_type; @@ -83,9 +83,6 @@ krb5_make_token(struct krb5_ctx *ctx, int qop_req, now = get_seconds(); - if (qop_req != 0) - goto out_err; - switch (ctx->signalg) { case SGN_ALG_DES_MAC_MD5: checksum_type = CKSUMTYPE_RSA_MD5; diff --git a/net/sunrpc/auth_gss/gss_krb5_unseal.c b/net/sunrpc/auth_gss/gss_krb5_unseal.c index c3d6d1bc100c..bcf978627a71 100644 --- a/net/sunrpc/auth_gss/gss_krb5_unseal.c +++ b/net/sunrpc/auth_gss/gss_krb5_unseal.c @@ -74,7 +74,7 @@ u32 krb5_read_token(struct krb5_ctx *ctx, struct xdr_netobj *read_token, - struct xdr_buf *message_buffer, int *qop_state) + struct xdr_buf *message_buffer) { int signalg; int sealalg; @@ -157,9 +157,6 @@ krb5_read_token(struct krb5_ctx *ctx, /* it got through unscathed. Make sure the context is unexpired */ - if (qop_state) - *qop_state = GSS_C_QOP_DEFAULT; - now = get_seconds(); ret = GSS_S_CONTEXT_EXPIRED; diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c index ddcde6e42b23..af777cf9f251 100644 --- a/net/sunrpc/auth_gss/gss_krb5_wrap.c +++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c @@ -116,7 +116,7 @@ make_confounder(char *p, int blocksize) /* XXX factor out common code with seal/unseal. */ u32 -gss_wrap_kerberos(struct gss_ctx *ctx, u32 qop, int offset, +gss_wrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf, struct page **pages) { struct krb5_ctx *kctx = ctx->internal_ctx_id; @@ -132,9 +132,6 @@ gss_wrap_kerberos(struct gss_ctx *ctx, u32 qop, int offset, now = get_seconds(); - if (qop != 0) - goto out_err; - switch (kctx->signalg) { case SGN_ALG_DES_MAC_MD5: checksum_type = CKSUMTYPE_RSA_MD5; @@ -229,8 +226,7 @@ out_err: } u32 -gss_unwrap_kerberos(struct gss_ctx *ctx, u32 *qop, int offset, - struct xdr_buf *buf) +gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf) { struct krb5_ctx *kctx = ctx->internal_ctx_id; int signalg; @@ -328,9 +324,6 @@ gss_unwrap_kerberos(struct gss_ctx *ctx, u32 *qop, int offset, /* it got through unscathed. Make sure the context is unexpired */ - if (qop) - *qop = GSS_C_QOP_DEFAULT; - now = get_seconds(); ret = GSS_S_CONTEXT_EXPIRED; diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c index 06d97cb3481a..b048bf672da2 100644 --- a/net/sunrpc/auth_gss/gss_mech_switch.c +++ b/net/sunrpc/auth_gss/gss_mech_switch.c @@ -250,13 +250,11 @@ gss_import_sec_context(const void *input_token, size_t bufsize, u32 gss_get_mic(struct gss_ctx *context_handle, - u32 qop, struct xdr_buf *message, struct xdr_netobj *mic_token) { return context_handle->mech_type->gm_ops ->gss_get_mic(context_handle, - qop, message, mic_token); } @@ -266,35 +264,31 @@ gss_get_mic(struct gss_ctx *context_handle, u32 gss_verify_mic(struct gss_ctx *context_handle, struct xdr_buf *message, - struct xdr_netobj *mic_token, - u32 *qstate) + struct xdr_netobj *mic_token) { return context_handle->mech_type->gm_ops ->gss_verify_mic(context_handle, message, - mic_token, - qstate); + mic_token); } u32 gss_wrap(struct gss_ctx *ctx_id, - u32 qop, int offset, struct xdr_buf *buf, struct page **inpages) { return ctx_id->mech_type->gm_ops - ->gss_wrap(ctx_id, qop, offset, buf, inpages); + ->gss_wrap(ctx_id, offset, buf, inpages); } u32 gss_unwrap(struct gss_ctx *ctx_id, - u32 *qop, int offset, struct xdr_buf *buf) { return ctx_id->mech_type->gm_ops - ->gss_unwrap(ctx_id, qop, offset, buf); + ->gss_unwrap(ctx_id, offset, buf); } diff --git a/net/sunrpc/auth_gss/gss_spkm3_mech.c b/net/sunrpc/auth_gss/gss_spkm3_mech.c index 6c97d61baa9b..39b3edc14694 100644 --- a/net/sunrpc/auth_gss/gss_spkm3_mech.c +++ b/net/sunrpc/auth_gss/gss_spkm3_mech.c @@ -224,18 +224,13 @@ gss_delete_sec_context_spkm3(void *internal_ctx) { static u32 gss_verify_mic_spkm3(struct gss_ctx *ctx, struct xdr_buf *signbuf, - struct xdr_netobj *checksum, - u32 *qstate) { + struct xdr_netobj *checksum) +{ u32 maj_stat = 0; - int qop_state = 0; struct spkm3_ctx *sctx = ctx->internal_ctx_id; dprintk("RPC: gss_verify_mic_spkm3 calling spkm3_read_token\n"); - maj_stat = spkm3_read_token(sctx, checksum, signbuf, &qop_state, - SPKM_MIC_TOK); - - if (!maj_stat && qop_state) - *qstate = qop_state; + maj_stat = spkm3_read_token(sctx, checksum, signbuf, SPKM_MIC_TOK); dprintk("RPC: gss_verify_mic_spkm3 returning %d\n", maj_stat); return maj_stat; @@ -243,15 +238,15 @@ gss_verify_mic_spkm3(struct gss_ctx *ctx, static u32 gss_get_mic_spkm3(struct gss_ctx *ctx, - u32 qop, struct xdr_buf *message_buffer, - struct xdr_netobj *message_token) { + struct xdr_netobj *message_token) +{ u32 err = 0; struct spkm3_ctx *sctx = ctx->internal_ctx_id; dprintk("RPC: gss_get_mic_spkm3\n"); - err = spkm3_make_token(sctx, qop, message_buffer, + err = spkm3_make_token(sctx, message_buffer, message_token, SPKM_MIC_TOK); return err; } @@ -264,8 +259,8 @@ static struct gss_api_ops gss_spkm3_ops = { }; static struct pf_desc gss_spkm3_pfs[] = { - {RPC_AUTH_GSS_SPKM, 0, RPC_GSS_SVC_NONE, "spkm3"}, - {RPC_AUTH_GSS_SPKMI, 0, RPC_GSS_SVC_INTEGRITY, "spkm3i"}, + {RPC_AUTH_GSS_SPKM, RPC_GSS_SVC_NONE, "spkm3"}, + {RPC_AUTH_GSS_SPKMI, RPC_GSS_SVC_INTEGRITY, "spkm3i"}, }; static struct gss_api_mech gss_spkm3_mech = { diff --git a/net/sunrpc/auth_gss/gss_spkm3_seal.c b/net/sunrpc/auth_gss/gss_spkm3_seal.c index 25339868d462..148201e929d0 100644 --- a/net/sunrpc/auth_gss/gss_spkm3_seal.c +++ b/net/sunrpc/auth_gss/gss_spkm3_seal.c @@ -51,7 +51,7 @@ */ u32 -spkm3_make_token(struct spkm3_ctx *ctx, int qop_req, +spkm3_make_token(struct spkm3_ctx *ctx, struct xdr_buf * text, struct xdr_netobj * token, int toktype) { @@ -68,8 +68,6 @@ spkm3_make_token(struct spkm3_ctx *ctx, int qop_req, dprintk("RPC: spkm3_make_token\n"); now = jiffies; - if (qop_req != 0) - goto out_err; if (ctx->ctx_id.len != 16) { dprintk("RPC: spkm3_make_token BAD ctx_id.len %d\n", diff --git a/net/sunrpc/auth_gss/gss_spkm3_unseal.c b/net/sunrpc/auth_gss/gss_spkm3_unseal.c index 65ce81bf0bc4..c3c0d9586103 100644 --- a/net/sunrpc/auth_gss/gss_spkm3_unseal.c +++ b/net/sunrpc/auth_gss/gss_spkm3_unseal.c @@ -52,7 +52,7 @@ u32 spkm3_read_token(struct spkm3_ctx *ctx, struct xdr_netobj *read_token, /* checksum */ struct xdr_buf *message_buffer, /* signbuf */ - int *qop_state, int toktype) + int toktype) { s32 code; struct xdr_netobj wire_cksum = {.len =0, .data = NULL}; diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index e3308195374e..e4ada15ed856 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -566,8 +566,7 @@ gss_verify_header(struct svc_rqst *rqstp, struct rsc *rsci, if (rqstp->rq_deferred) /* skip verification of revisited request */ return SVC_OK; - if (gss_verify_mic(ctx_id, &rpchdr, &checksum, NULL) - != GSS_S_COMPLETE) { + if (gss_verify_mic(ctx_id, &rpchdr, &checksum) != GSS_S_COMPLETE) { *authp = rpcsec_gsserr_credproblem; return SVC_DENIED; } @@ -604,7 +603,7 @@ gss_write_verf(struct svc_rqst *rqstp, struct gss_ctx *ctx_id, u32 seq) xdr_buf_from_iov(&iov, &verf_data); p = rqstp->rq_res.head->iov_base + rqstp->rq_res.head->iov_len; mic.data = (u8 *)(p + 1); - maj_stat = gss_get_mic(ctx_id, 0, &verf_data, &mic); + maj_stat = gss_get_mic(ctx_id, &verf_data, &mic); if (maj_stat != GSS_S_COMPLETE) return -1; *p++ = htonl(mic.len); @@ -710,7 +709,7 @@ unwrap_integ_data(struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx) goto out; if (read_bytes_from_xdr_buf(buf, integ_len + 4, mic.data, mic.len)) goto out; - maj_stat = gss_verify_mic(ctx, &integ_buf, &mic, NULL); + maj_stat = gss_verify_mic(ctx, &integ_buf, &mic); if (maj_stat != GSS_S_COMPLETE) goto out; if (ntohl(svc_getu32(&buf->head[0])) != seq) @@ -1012,7 +1011,7 @@ svcauth_gss_release(struct svc_rqst *rqstp) resv = &resbuf->tail[0]; } mic.data = (u8 *)resv->iov_base + resv->iov_len + 4; - if (gss_get_mic(gsd->rsci->mechctx, 0, &integ_buf, &mic)) + if (gss_get_mic(gsd->rsci->mechctx, &integ_buf, &mic)) goto out_err; svc_putu32(resv, htonl(mic.len)); memset(mic.data + mic.len, 0, -- cgit v1.2.3 From a0857d03b21fa54653c9d2fe7a315381176015b4 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 13 Oct 2005 16:55:23 -0400 Subject: RPCSEC_GSS: krb5 cleanup Remove some senseless wrappers. Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- include/linux/sunrpc/gss_krb5.h | 12 ++++-------- net/sunrpc/auth_gss/gss_krb5_mech.c | 28 ---------------------------- net/sunrpc/auth_gss/gss_krb5_seal.c | 5 +++-- net/sunrpc/auth_gss/gss_krb5_unseal.c | 6 +++--- 4 files changed, 10 insertions(+), 41 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index a7bda4edb853..2c3601d31045 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -118,15 +118,11 @@ s32 make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body, int body_offset, struct xdr_netobj *cksum); -u32 -krb5_make_token(struct krb5_ctx *context_handle, - struct xdr_buf *input_message_buffer, - struct xdr_netobj *output_message_buffer); +u32 gss_get_mic_kerberos(struct gss_ctx *, struct xdr_buf *, + struct xdr_netobj *); -u32 -krb5_read_token(struct krb5_ctx *context_handle, - struct xdr_netobj *input_token_buffer, - struct xdr_buf *message_buffer); +u32 gss_verify_mic_kerberos(struct gss_ctx *, struct xdr_buf *, + struct xdr_netobj *); u32 gss_wrap_kerberos(struct gss_ctx *ctx_id, int offset, diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index 9ffac2c50b94..5f1f806a0b11 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -190,34 +190,6 @@ gss_delete_sec_context_kerberos(void *internal_ctx) { kfree(kctx); } -static u32 -gss_verify_mic_kerberos(struct gss_ctx *ctx, - struct xdr_buf *message, - struct xdr_netobj *mic_token) -{ - u32 maj_stat = 0; - struct krb5_ctx *kctx = ctx->internal_ctx_id; - - maj_stat = krb5_read_token(kctx, mic_token, message); - - dprintk("RPC: gss_verify_mic_kerberos returning %d\n", maj_stat); - return maj_stat; -} - -static u32 -gss_get_mic_kerberos(struct gss_ctx *ctx, - struct xdr_buf *message, - struct xdr_netobj *mic_token) { - u32 err = 0; - struct krb5_ctx *kctx = ctx->internal_ctx_id; - - err = krb5_make_token(kctx, message, mic_token); - - dprintk("RPC: gss_get_mic_kerberos returning %d\n",err); - - return err; -} - static struct gss_api_ops gss_kerberos_ops = { .gss_import_sec_context = gss_import_sec_context_kerberos, .gss_get_mic = gss_get_mic_kerberos, diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c index 15227c727c8b..13f8ae979454 100644 --- a/net/sunrpc/auth_gss/gss_krb5_seal.c +++ b/net/sunrpc/auth_gss/gss_krb5_seal.c @@ -71,9 +71,10 @@ #endif u32 -krb5_make_token(struct krb5_ctx *ctx, - struct xdr_buf *text, struct xdr_netobj *token) +gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text, + struct xdr_netobj *token) { + struct krb5_ctx *ctx = gss_ctx->internal_ctx_id; s32 checksum_type; struct xdr_netobj md5cksum = {.len = 0, .data = NULL}; unsigned char *ptr, *krb5_hdr, *msg_start; diff --git a/net/sunrpc/auth_gss/gss_krb5_unseal.c b/net/sunrpc/auth_gss/gss_krb5_unseal.c index bcf978627a71..2030475d98ed 100644 --- a/net/sunrpc/auth_gss/gss_krb5_unseal.c +++ b/net/sunrpc/auth_gss/gss_krb5_unseal.c @@ -72,10 +72,10 @@ * supposedly taken over. */ u32 -krb5_read_token(struct krb5_ctx *ctx, - struct xdr_netobj *read_token, - struct xdr_buf *message_buffer) +gss_verify_mic_kerberos(struct gss_ctx *gss_ctx, + struct xdr_buf *message_buffer, struct xdr_netobj *read_token) { + struct krb5_ctx *ctx = gss_ctx->internal_ctx_id; int signalg; int sealalg; s32 checksum_type; -- cgit v1.2.3 From 3359b54c8c07338f3a863d1109b42eebccdcf379 Mon Sep 17 00:00:00 2001 From: "Seth, Rohit" Date: Tue, 18 Oct 2005 14:15:12 -0700 Subject: [PATCH] Handle spurious page fault for hugetlb region The hugetlb pages are currently pre-faulted. At the time of mmap of hugepages, we populate the new PTEs. It is possible that HW has already cached some of the unused PTEs internally. These stale entries never get a chance to be purged in existing control flow. This patch extends the check in page fault code for hugepages. Check if a faulted address falls with in size for the hugetlb file backing it. We return VM_FAULT_MINOR for these cases (assuming that the arch specific page-faulting code purges the stale entry for the archs that need it). Signed-off-by: Rohit Seth [ This is apparently arguably an ia64 port bug. But the code won't hurt, and for now it fixes a real problem on some ia64 machines ] Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 13 +++++++++++++ mm/memory.c | 14 ++++++++++++-- 2 files changed, 25 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index e670b0d13fe0..42cb7d70f9ac 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -155,11 +155,24 @@ static inline void set_file_hugepages(struct file *file) { file->f_op = &hugetlbfs_file_operations; } + +static inline int valid_hugetlb_file_off(struct vm_area_struct *vma, + unsigned long address) +{ + struct inode *inode = vma->vm_file->f_dentry->d_inode; + loff_t file_off = address - vma->vm_start; + + file_off += (vma->vm_pgoff << PAGE_SHIFT); + + return (file_off < inode->i_size); +} + #else /* !CONFIG_HUGETLBFS */ #define is_file_hugepages(file) 0 #define set_file_hugepages(file) BUG() #define hugetlb_zero_setup(size) ERR_PTR(-ENOSYS) +#define valid_hugetlb_file_off(vma, address) 0 #endif /* !CONFIG_HUGETLBFS */ diff --git a/mm/memory.c b/mm/memory.c index ae8161f1f459..8c88b973abc5 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2045,8 +2045,18 @@ int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma, inc_page_state(pgfault); - if (is_vm_hugetlb_page(vma)) - return VM_FAULT_SIGBUS; /* mapping truncation does this. */ + if (unlikely(is_vm_hugetlb_page(vma))) { + if (valid_hugetlb_file_off(vma, address)) + /* We get here only if there was a stale(zero) TLB entry + * (because of HW prefetching). + * Low-level arch code (if needed) should have already + * purged the stale entry as part of this fault handling. + * Here we just return. + */ + return VM_FAULT_MINOR; + else + return VM_FAULT_SIGBUS; /* mapping truncation does this. */ + } /* * We need the page table lock to synchronize with kswapd -- cgit v1.2.3 From 281dd25cdc0d6903929b79183816d151ea626341 Mon Sep 17 00:00:00 2001 From: Yasunori Goto Date: Wed, 19 Oct 2005 15:52:18 -0700 Subject: [PATCH] swiotlb: make sure initial DMA allocations really are in DMA memory This introduces a limit parameter to the core bootmem allocator; The new parameter indicates that physical memory allocated by the bootmem allocator should be within the requested limit. We also introduce alloc_bootmem_low_pages_limit, alloc_bootmem_node_limit, alloc_bootmem_low_pages_node_limit apis, but alloc_bootmem_low_pages_limit is the only api used for swiotlb. The existing alloc_bootmem_low_pages() api could instead have been changed and made to pass right limit to the core allocator. But that would make the patch more intrusive for 2.6.14, as other arches use alloc_bootmem_low_pages(). We may be done that post 2.6.14 as a cleanup. With this, swiotlb gets memory within 4G for both x86_64 and ia64 arches. Signed-off-by: Yasunori Goto Cc: Ravikiran G Thirumalai Signed-off-by: Linus Torvalds --- arch/ia64/lib/swiotlb.c | 4 ++-- include/linux/bootmem.h | 32 ++++++++++++++++++++++++++++++-- mm/bootmem.c | 31 +++++++++++++++++++++---------- 3 files changed, 53 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/arch/ia64/lib/swiotlb.c b/arch/ia64/lib/swiotlb.c index dbc0b3e449c5..a604efc7f6c9 100644 --- a/arch/ia64/lib/swiotlb.c +++ b/arch/ia64/lib/swiotlb.c @@ -123,8 +123,8 @@ swiotlb_init_with_default_size (size_t default_size) /* * Get IO TLB memory from the low pages */ - io_tlb_start = alloc_bootmem_low_pages(io_tlb_nslabs * - (1 << IO_TLB_SHIFT)); + io_tlb_start = alloc_bootmem_low_pages_limit(io_tlb_nslabs * + (1 << IO_TLB_SHIFT), 0x100000000); if (!io_tlb_start) panic("Cannot allocate SWIOTLB buffer"); io_tlb_end = io_tlb_start + io_tlb_nslabs * (1 << IO_TLB_SHIFT); diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index 82bd8842d11c..3b03b0b868dd 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -43,7 +43,7 @@ typedef struct bootmem_data { extern unsigned long __init bootmem_bootmap_pages (unsigned long); extern unsigned long __init init_bootmem (unsigned long addr, unsigned long memend); extern void __init free_bootmem (unsigned long addr, unsigned long size); -extern void * __init __alloc_bootmem (unsigned long size, unsigned long align, unsigned long goal); +extern void * __init __alloc_bootmem_limit (unsigned long size, unsigned long align, unsigned long goal, unsigned long limit); #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE extern void __init reserve_bootmem (unsigned long addr, unsigned long size); #define alloc_bootmem(x) \ @@ -54,6 +54,16 @@ extern void __init reserve_bootmem (unsigned long addr, unsigned long size); __alloc_bootmem((x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_low_pages(x) \ __alloc_bootmem((x), PAGE_SIZE, 0) + +#define alloc_bootmem_limit(x, limit) \ + __alloc_bootmem_limit((x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS), (limit)) +#define alloc_bootmem_low_limit(x, limit) \ + __alloc_bootmem_limit((x), SMP_CACHE_BYTES, 0, (limit)) +#define alloc_bootmem_pages_limit(x, limit) \ + __alloc_bootmem_limit((x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS), (limit)) +#define alloc_bootmem_low_pages_limit(x, limit) \ + __alloc_bootmem_limit((x), PAGE_SIZE, 0, (limit)) + #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ extern unsigned long __init free_all_bootmem (void); @@ -61,7 +71,7 @@ extern unsigned long __init init_bootmem_node (pg_data_t *pgdat, unsigned long f extern void __init reserve_bootmem_node (pg_data_t *pgdat, unsigned long physaddr, unsigned long size); extern void __init free_bootmem_node (pg_data_t *pgdat, unsigned long addr, unsigned long size); extern unsigned long __init free_all_bootmem_node (pg_data_t *pgdat); -extern void * __init __alloc_bootmem_node (pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal); +extern void * __init __alloc_bootmem_node_limit (pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal, unsigned long limit); #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE #define alloc_bootmem_node(pgdat, x) \ __alloc_bootmem_node((pgdat), (x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) @@ -69,6 +79,14 @@ extern void * __init __alloc_bootmem_node (pg_data_t *pgdat, unsigned long size, __alloc_bootmem_node((pgdat), (x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_low_pages_node(pgdat, x) \ __alloc_bootmem_node((pgdat), (x), PAGE_SIZE, 0) + +#define alloc_bootmem_node_limit(pgdat, x, limit) \ + __alloc_bootmem_node_limit((pgdat), (x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS), (limit)) +#define alloc_bootmem_pages_node_limit(pgdat, x, limit) \ + __alloc_bootmem_node_limit((pgdat), (x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS), (limit)) +#define alloc_bootmem_low_pages_node_limit(pgdat, x, limit) \ + __alloc_bootmem_node_limit((pgdat), (x), PAGE_SIZE, 0, (limit)) + #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ #ifdef CONFIG_HAVE_ARCH_ALLOC_REMAP @@ -105,5 +123,15 @@ extern void *__init alloc_large_system_hash(const char *tablename, #endif extern int __initdata hashdist; /* Distribute hashes across NUMA nodes? */ +static inline void *__alloc_bootmem (unsigned long size, unsigned long align, unsigned long goal) +{ + return __alloc_bootmem_limit(size, align, goal, 0); +} + +static inline void *__alloc_bootmem_node (pg_data_t *pgdat, unsigned long size, unsigned long align, + unsigned long goal) +{ + return __alloc_bootmem_node_limit(pgdat, size, align, goal, 0); +} #endif /* _LINUX_BOOTMEM_H */ diff --git a/mm/bootmem.c b/mm/bootmem.c index c1330cc19783..a58699b6579e 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -154,10 +154,10 @@ static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr, */ static void * __init __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size, - unsigned long align, unsigned long goal) + unsigned long align, unsigned long goal, unsigned long limit) { unsigned long offset, remaining_size, areasize, preferred; - unsigned long i, start = 0, incr, eidx; + unsigned long i, start = 0, incr, eidx, end_pfn = bdata->node_low_pfn; void *ret; if(!size) { @@ -166,7 +166,14 @@ __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size, } BUG_ON(align & (align-1)); - eidx = bdata->node_low_pfn - (bdata->node_boot_start >> PAGE_SHIFT); + if (limit && bdata->node_boot_start >= limit) + return NULL; + + limit >>=PAGE_SHIFT; + if (limit && end_pfn > limit) + end_pfn = limit; + + eidx = end_pfn - (bdata->node_boot_start >> PAGE_SHIFT); offset = 0; if (align && (bdata->node_boot_start & (align - 1UL)) != 0) @@ -178,11 +185,12 @@ __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size, * first, then we try to allocate lower pages. */ if (goal && (goal >= bdata->node_boot_start) && - ((goal >> PAGE_SHIFT) < bdata->node_low_pfn)) { + ((goal >> PAGE_SHIFT) < end_pfn)) { preferred = goal - bdata->node_boot_start; if (bdata->last_success >= preferred) - preferred = bdata->last_success; + if (!limit || (limit && limit > bdata->last_success)) + preferred = bdata->last_success; } else preferred = 0; @@ -382,14 +390,15 @@ unsigned long __init free_all_bootmem (void) return(free_all_bootmem_core(NODE_DATA(0))); } -void * __init __alloc_bootmem (unsigned long size, unsigned long align, unsigned long goal) +void * __init __alloc_bootmem_limit (unsigned long size, unsigned long align, unsigned long goal, + unsigned long limit) { pg_data_t *pgdat = pgdat_list; void *ptr; for_each_pgdat(pgdat) if ((ptr = __alloc_bootmem_core(pgdat->bdata, size, - align, goal))) + align, goal, limit))) return(ptr); /* @@ -400,14 +409,16 @@ void * __init __alloc_bootmem (unsigned long size, unsigned long align, unsigned return NULL; } -void * __init __alloc_bootmem_node (pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal) + +void * __init __alloc_bootmem_node_limit (pg_data_t *pgdat, unsigned long size, unsigned long align, + unsigned long goal, unsigned long limit) { void *ptr; - ptr = __alloc_bootmem_core(pgdat->bdata, size, align, goal); + ptr = __alloc_bootmem_core(pgdat->bdata, size, align, goal, limit); if (ptr) return (ptr); - return __alloc_bootmem(size, align, goal); + return __alloc_bootmem_limit(size, align, goal, limit); } -- cgit v1.2.3 From ac9b9c667c2e1194e22ebe0a441ae1c37aaa9b90 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Thu, 20 Oct 2005 16:24:28 +0100 Subject: [PATCH] Fix handling spurious page fault for hugetlb region This reverts commit 3359b54c8c07338f3a863d1109b42eebccdcf379 and replaces it with a cleaner version that is purely based on page table operations, so that the synchronization between inode size and hugetlb mappings becomes moot. Signed-off-by: Hugh Dickins Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 16 +++------------- mm/hugetlb.c | 22 ++++++++++++++++++++++ mm/memory.c | 14 ++------------ 3 files changed, 27 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 42cb7d70f9ac..d664330d900e 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -25,6 +25,8 @@ int is_hugepage_mem_enough(size_t); unsigned long hugetlb_total_pages(void); struct page *alloc_huge_page(void); void free_huge_page(struct page *); +int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long address, int write_access); extern unsigned long max_huge_pages; extern const unsigned long hugetlb_zero, hugetlb_infinity; @@ -99,6 +101,7 @@ static inline unsigned long hugetlb_total_pages(void) do { } while (0) #define alloc_huge_page() ({ NULL; }) #define free_huge_page(p) ({ (void)(p); BUG(); }) +#define hugetlb_fault(mm, vma, addr, write) ({ BUG(); 0; }) #ifndef HPAGE_MASK #define HPAGE_MASK 0 /* Keep the compiler happy */ @@ -155,24 +158,11 @@ static inline void set_file_hugepages(struct file *file) { file->f_op = &hugetlbfs_file_operations; } - -static inline int valid_hugetlb_file_off(struct vm_area_struct *vma, - unsigned long address) -{ - struct inode *inode = vma->vm_file->f_dentry->d_inode; - loff_t file_off = address - vma->vm_start; - - file_off += (vma->vm_pgoff << PAGE_SHIFT); - - return (file_off < inode->i_size); -} - #else /* !CONFIG_HUGETLBFS */ #define is_file_hugepages(file) 0 #define set_file_hugepages(file) BUG() #define hugetlb_zero_setup(size) ERR_PTR(-ENOSYS) -#define valid_hugetlb_file_off(vma, address) 0 #endif /* !CONFIG_HUGETLBFS */ diff --git a/mm/hugetlb.c b/mm/hugetlb.c index a1b30d45459e..61d380678030 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -394,6 +394,28 @@ out: return ret; } +/* + * On ia64 at least, it is possible to receive a hugetlb fault from a + * stale zero entry left in the TLB from earlier hardware prefetching. + * Low-level arch code should already have flushed the stale entry as + * part of its fault handling, but we do need to accept this minor fault + * and return successfully. Whereas the "normal" case is that this is + * an access to a hugetlb page which has been truncated off since mmap. + */ +int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long address, int write_access) +{ + int ret = VM_FAULT_SIGBUS; + pte_t *pte; + + spin_lock(&mm->page_table_lock); + pte = huge_pte_offset(mm, address); + if (pte && !pte_none(*pte)) + ret = VM_FAULT_MINOR; + spin_unlock(&mm->page_table_lock); + return ret; +} + int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, struct page **pages, struct vm_area_struct **vmas, unsigned long *position, int *length, int i) diff --git a/mm/memory.c b/mm/memory.c index 8c88b973abc5..1db40e935e55 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2045,18 +2045,8 @@ int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma, inc_page_state(pgfault); - if (unlikely(is_vm_hugetlb_page(vma))) { - if (valid_hugetlb_file_off(vma, address)) - /* We get here only if there was a stale(zero) TLB entry - * (because of HW prefetching). - * Low-level arch code (if needed) should have already - * purged the stale entry as part of this fault handling. - * Here we just return. - */ - return VM_FAULT_MINOR; - else - return VM_FAULT_SIGBUS; /* mapping truncation does this. */ - } + if (unlikely(is_vm_hugetlb_page(vma))) + return hugetlb_fault(mm, vma, address, write_access); /* * We need the page table lock to synchronize with kswapd -- cgit v1.2.3 From 11e29e21514517f3022a1f30998ac4c7b1197658 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Fri, 21 Oct 2005 18:46:32 -0400 Subject: libata: handle early device PIO modes correctly --- drivers/scsi/libata-core.c | 31 +++++++++++++++++++++++++++---- include/linux/ata.h | 13 +++++++++---- 2 files changed, 36 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c index 175d4646333d..09639e7aaa71 100644 --- a/drivers/scsi/libata-core.c +++ b/drivers/scsi/libata-core.c @@ -1082,6 +1082,31 @@ static inline void ata_dump_id(struct ata_device *dev) dev->id[93]); } +/* + * Compute the PIO modes available for this device. This is not as + * trivial as it seems if we must consider early devices correctly. + * + * FIXME: pre IDE drive timing (do we care ?). + */ + +static unsigned int ata_pio_modes(struct ata_device *adev) +{ + u16 modes; + + /* Usual case. Word 53 indicates word 88 is valid */ + if (adev->id[ATA_ID_FIELD_VALID] & (1 << 2)) { + modes = adev->id[ATA_ID_PIO_MODES] & 0x03; + modes <<= 3; + modes |= 0x7; + return modes; + } + + /* If word 88 isn't valid then Word 51 holds the PIO timing number + for the maximum. Turn it into a mask and return it */ + modes = (2 << (adev->id[ATA_ID_OLD_PIO_MODES] & 0xFF)) - 1 ; + return modes; +} + /** * ata_dev_identify - obtain IDENTIFY x DEVICE page * @ap: port on which device we wish to probe resides @@ -1215,10 +1240,8 @@ retry: xfer_modes = dev->id[ATA_ID_UDMA_MODES]; if (!xfer_modes) xfer_modes = (dev->id[ATA_ID_MWDMA_MODES]) << ATA_SHIFT_MWDMA; - if (!xfer_modes) { - xfer_modes = (dev->id[ATA_ID_PIO_MODES]) << (ATA_SHIFT_PIO + 3); - xfer_modes |= (0x7 << ATA_SHIFT_PIO); - } + if (!xfer_modes) + xfer_modes = ata_pio_modes(dev); ata_dump_id(dev); diff --git a/include/linux/ata.h b/include/linux/ata.h index 630908c9378b..33276d1d05d2 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -42,13 +42,18 @@ enum { ATA_SECT_SIZE = 512, ATA_ID_WORDS = 256, - ATA_ID_PROD_OFS = 27, - ATA_ID_FW_REV_OFS = 23, ATA_ID_SERNO_OFS = 10, - ATA_ID_MAJOR_VER = 80, - ATA_ID_PIO_MODES = 64, + ATA_ID_FW_REV_OFS = 23, + ATA_ID_PROD_OFS = 27, + ATA_ID_OLD_PIO_MODES = 51, + ATA_ID_FIELD_VALID = 53, ATA_ID_MWDMA_MODES = 63, + ATA_ID_PIO_MODES = 64, + ATA_ID_EIDE_DMA_MIN = 65, + ATA_ID_EIDE_PIO = 67, + ATA_ID_EIDE_PIO_IORDY = 68, ATA_ID_UDMA_MODES = 88, + ATA_ID_MAJOR_VER = 80, ATA_ID_PIO4 = (1 << 1), ATA_PCI_CTL_OFS = 2, -- cgit v1.2.3 From 452503f993feffe96e8cc9fbff4888b96e2c5e40 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Fri, 21 Oct 2005 19:01:32 -0400 Subject: Add ide-timing functionality to libata. This is needed for full AMD and VIA drivers and possibly more. Functions to turn actual clocking and cycle timings into register values. Also to merge shared timings to compute an optimal timing set. Built from the drivers/ide version by Vojtech Pavlik Signed-off-by: Alan Cox Signed-off-by: Jeff Garzik --- drivers/scsi/libata-core.c | 149 +++++++++++++++++++++++++++++++++++++++++++++ include/linux/libata.h | 39 ++++++++++++ 2 files changed, 188 insertions(+) (limited to 'include/linux') diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c index 09639e7aaa71..9269fd9b814f 100644 --- a/drivers/scsi/libata-core.c +++ b/drivers/scsi/libata-core.c @@ -1538,6 +1538,152 @@ void ata_port_disable(struct ata_port *ap) ap->flags |= ATA_FLAG_PORT_DISABLED; } +/* + * This mode timing computation functionality is ported over from + * drivers/ide/ide-timing.h and was originally written by Vojtech Pavlik + */ +/* + * PIO 0-5, MWDMA 0-2 and UDMA 0-6 timings (in nanoseconds). + * These were taken from ATA/ATAPI-6 standard, rev 0a, except + * for PIO 5, which is a nonstandard extension and UDMA6, which + * is currently supported only by Maxtor drives. + */ + +static const struct ata_timing ata_timing[] = { + + { XFER_UDMA_6, 0, 0, 0, 0, 0, 0, 0, 15 }, + { XFER_UDMA_5, 0, 0, 0, 0, 0, 0, 0, 20 }, + { XFER_UDMA_4, 0, 0, 0, 0, 0, 0, 0, 30 }, + { XFER_UDMA_3, 0, 0, 0, 0, 0, 0, 0, 45 }, + + { XFER_UDMA_2, 0, 0, 0, 0, 0, 0, 0, 60 }, + { XFER_UDMA_1, 0, 0, 0, 0, 0, 0, 0, 80 }, + { XFER_UDMA_0, 0, 0, 0, 0, 0, 0, 0, 120 }, + +/* { XFER_UDMA_SLOW, 0, 0, 0, 0, 0, 0, 0, 150 }, */ + + { XFER_MW_DMA_2, 25, 0, 0, 0, 70, 25, 120, 0 }, + { XFER_MW_DMA_1, 45, 0, 0, 0, 80, 50, 150, 0 }, + { XFER_MW_DMA_0, 60, 0, 0, 0, 215, 215, 480, 0 }, + + { XFER_SW_DMA_2, 60, 0, 0, 0, 120, 120, 240, 0 }, + { XFER_SW_DMA_1, 90, 0, 0, 0, 240, 240, 480, 0 }, + { XFER_SW_DMA_0, 120, 0, 0, 0, 480, 480, 960, 0 }, + +/* { XFER_PIO_5, 20, 50, 30, 100, 50, 30, 100, 0 }, */ + { XFER_PIO_4, 25, 70, 25, 120, 70, 25, 120, 0 }, + { XFER_PIO_3, 30, 80, 70, 180, 80, 70, 180, 0 }, + + { XFER_PIO_2, 30, 290, 40, 330, 100, 90, 240, 0 }, + { XFER_PIO_1, 50, 290, 93, 383, 125, 100, 383, 0 }, + { XFER_PIO_0, 70, 290, 240, 600, 165, 150, 600, 0 }, + +/* { XFER_PIO_SLOW, 120, 290, 240, 960, 290, 240, 960, 0 }, */ + + { 0xFF } +}; + +#define ENOUGH(v,unit) (((v)-1)/(unit)+1) +#define EZ(v,unit) ((v)?ENOUGH(v,unit):0) + +static void ata_timing_quantize(const struct ata_timing *t, struct ata_timing *q, int T, int UT) +{ + q->setup = EZ(t->setup * 1000, T); + q->act8b = EZ(t->act8b * 1000, T); + q->rec8b = EZ(t->rec8b * 1000, T); + q->cyc8b = EZ(t->cyc8b * 1000, T); + q->active = EZ(t->active * 1000, T); + q->recover = EZ(t->recover * 1000, T); + q->cycle = EZ(t->cycle * 1000, T); + q->udma = EZ(t->udma * 1000, UT); +} + +void ata_timing_merge(const struct ata_timing *a, const struct ata_timing *b, + struct ata_timing *m, unsigned int what) +{ + if (what & ATA_TIMING_SETUP ) m->setup = max(a->setup, b->setup); + if (what & ATA_TIMING_ACT8B ) m->act8b = max(a->act8b, b->act8b); + if (what & ATA_TIMING_REC8B ) m->rec8b = max(a->rec8b, b->rec8b); + if (what & ATA_TIMING_CYC8B ) m->cyc8b = max(a->cyc8b, b->cyc8b); + if (what & ATA_TIMING_ACTIVE ) m->active = max(a->active, b->active); + if (what & ATA_TIMING_RECOVER) m->recover = max(a->recover, b->recover); + if (what & ATA_TIMING_CYCLE ) m->cycle = max(a->cycle, b->cycle); + if (what & ATA_TIMING_UDMA ) m->udma = max(a->udma, b->udma); +} + +static const struct ata_timing* ata_timing_find_mode(unsigned short speed) +{ + const struct ata_timing *t; + + for (t = ata_timing; t->mode != speed; t++) + if (t->mode != 0xFF) + return NULL; + return t; +} + +int ata_timing_compute(struct ata_device *adev, unsigned short speed, + struct ata_timing *t, int T, int UT) +{ + const struct ata_timing *s; + struct ata_timing p; + + /* + * Find the mode. + */ + + if (!(s = ata_timing_find_mode(speed))) + return -EINVAL; + + /* + * If the drive is an EIDE drive, it can tell us it needs extended + * PIO/MW_DMA cycle timing. + */ + + if (adev->id[ATA_ID_FIELD_VALID] & 2) { /* EIDE drive */ + memset(&p, 0, sizeof(p)); + if(speed >= XFER_PIO_0 && speed <= XFER_SW_DMA_0) { + if (speed <= XFER_PIO_2) p.cycle = p.cyc8b = adev->id[ATA_ID_EIDE_PIO]; + else p.cycle = p.cyc8b = adev->id[ATA_ID_EIDE_PIO_IORDY]; + } else if(speed >= XFER_MW_DMA_0 && speed <= XFER_MW_DMA_2) { + p.cycle = adev->id[ATA_ID_EIDE_DMA_MIN]; + } + ata_timing_merge(&p, t, t, ATA_TIMING_CYCLE | ATA_TIMING_CYC8B); + } + + /* + * Convert the timing to bus clock counts. + */ + + ata_timing_quantize(s, t, T, UT); + + /* + * Even in DMA/UDMA modes we still use PIO access for IDENTIFY, S.M.A.R.T + * and some other commands. We have to ensure that the DMA cycle timing is + * slower/equal than the fastest PIO timing. + */ + + if (speed > XFER_PIO_4) { + ata_timing_compute(adev, adev->pio_mode, &p, T, UT); + ata_timing_merge(&p, t, t, ATA_TIMING_ALL); + } + + /* + * Lenghten active & recovery time so that cycle time is correct. + */ + + if (t->act8b + t->rec8b < t->cyc8b) { + t->act8b += (t->cyc8b - (t->act8b + t->rec8b)) / 2; + t->rec8b = t->cyc8b - t->act8b; + } + + if (t->active + t->recover < t->cycle) { + t->active += (t->cycle - (t->active + t->recover)) / 2; + t->recover = t->cycle - t->active; + } + + return 0; +} + static struct { unsigned int shift; u8 base; @@ -4764,6 +4910,9 @@ EXPORT_SYMBOL_GPL(ata_dev_id_string); EXPORT_SYMBOL_GPL(ata_dev_config); EXPORT_SYMBOL_GPL(ata_scsi_simulate); +EXPORT_SYMBOL_GPL(ata_timing_compute); +EXPORT_SYMBOL_GPL(ata_timing_merge); + #ifdef CONFIG_PCI EXPORT_SYMBOL_GPL(pci_test_config_bits); EXPORT_SYMBOL_GPL(ata_pci_host_stop); diff --git a/include/linux/libata.h b/include/linux/libata.h index 0261c55f3483..0e214f8c8f9f 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -388,6 +388,19 @@ struct ata_port_info { struct ata_port_operations *port_ops; }; +struct ata_timing { + unsigned short mode; /* ATA mode */ + unsigned short setup; /* t1 */ + unsigned short act8b; /* t2 for 8-bit I/O */ + unsigned short rec8b; /* t2i for 8-bit I/O */ + unsigned short cyc8b; /* t0 for 8-bit I/O */ + unsigned short active; /* t2 or tD */ + unsigned short recover; /* t2i or tK */ + unsigned short cycle; /* t0 */ + unsigned short udma; /* t2CYCTYP/2 */ +}; + +#define FIT(v,vmin,vmax) max_t(short,min_t(short,v,vmax),vmin) extern void ata_port_probe(struct ata_port *); extern void __sata_phy_reset(struct ata_port *ap); @@ -451,6 +464,32 @@ extern int ata_std_bios_param(struct scsi_device *sdev, sector_t capacity, int geom[]); extern int ata_scsi_slave_config(struct scsi_device *sdev); +/* + * Timing helpers + */ +extern int ata_timing_compute(struct ata_device *, unsigned short, + struct ata_timing *, int, int); +extern void ata_timing_merge(const struct ata_timing *, + const struct ata_timing *, struct ata_timing *, + unsigned int); + +enum { + ATA_TIMING_SETUP = (1 << 0), + ATA_TIMING_ACT8B = (1 << 1), + ATA_TIMING_REC8B = (1 << 2), + ATA_TIMING_CYC8B = (1 << 3), + ATA_TIMING_8BIT = ATA_TIMING_ACT8B | ATA_TIMING_REC8B | + ATA_TIMING_CYC8B, + ATA_TIMING_ACTIVE = (1 << 4), + ATA_TIMING_RECOVER = (1 << 5), + ATA_TIMING_CYCLE = (1 << 6), + ATA_TIMING_UDMA = (1 << 7), + ATA_TIMING_ALL = ATA_TIMING_SETUP | ATA_TIMING_ACT8B | + ATA_TIMING_REC8B | ATA_TIMING_CYC8B | + ATA_TIMING_ACTIVE | ATA_TIMING_RECOVER | + ATA_TIMING_CYCLE | ATA_TIMING_UDMA, +}; + #ifdef CONFIG_PCI struct pci_bits { -- cgit v1.2.3 From cf482935c6abe5245e481213c6e6df808c976f56 Mon Sep 17 00:00:00 2001 From: Jeff Garzik Date: Sat, 22 Oct 2005 00:19:33 -0400 Subject: libata: turn on block layer clustering --- include/linux/libata.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 0e214f8c8f9f..634b5aa0a615 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -91,7 +91,7 @@ enum { ATA_SHT_EMULATED = 1, ATA_SHT_CMD_PER_LUN = 1, ATA_SHT_THIS_ID = -1, - ATA_SHT_USE_CLUSTERING = 0, + ATA_SHT_USE_CLUSTERING = 1, /* struct ata_device stuff */ ATA_DFLAG_LBA48 = (1 << 0), /* device supports LBA48 */ -- cgit v1.2.3 From 057ace5e79da9ebf2aa82833cfea825533ac06fb Mon Sep 17 00:00:00 2001 From: Jeff Garzik Date: Sat, 22 Oct 2005 14:27:05 -0400 Subject: libata: const-ification bombing run Enforce access rules where appropriate. If the compiler is smart enough, this may buy us an optimization or two as a side effect. --- drivers/scsi/ahci.c | 2 +- drivers/scsi/ata_piix.c | 4 +-- drivers/scsi/libata-core.c | 68 ++++++++++++++++++++++----------------------- drivers/scsi/libata-scsi.c | 24 ++++++++-------- drivers/scsi/libata.h | 2 +- drivers/scsi/pdc_adma.c | 2 +- drivers/scsi/sata_mv.c | 2 +- drivers/scsi/sata_nv.c | 2 +- drivers/scsi/sata_promise.c | 12 ++++---- drivers/scsi/sata_qstor.c | 2 +- drivers/scsi/sata_sil.c | 2 +- drivers/scsi/sata_sil24.c | 2 +- drivers/scsi/sata_sis.c | 2 +- drivers/scsi/sata_svw.c | 4 +-- drivers/scsi/sata_sx4.c | 10 +++---- drivers/scsi/sata_uli.c | 2 +- drivers/scsi/sata_via.c | 2 +- drivers/scsi/sata_vsc.c | 4 +-- include/linux/ata.h | 6 ++-- include/linux/libata.h | 32 ++++++++++----------- 20 files changed, 93 insertions(+), 93 deletions(-) (limited to 'include/linux') diff --git a/drivers/scsi/ahci.c b/drivers/scsi/ahci.c index cfa22e4ee547..fe8187d6f58b 100644 --- a/drivers/scsi/ahci.c +++ b/drivers/scsi/ahci.c @@ -216,7 +216,7 @@ static Scsi_Host_Template ahci_sht = { .ordered_flush = 1, }; -static struct ata_port_operations ahci_ops = { +static const struct ata_port_operations ahci_ops = { .port_disable = ata_port_disable, .check_status = ahci_check_status, diff --git a/drivers/scsi/ata_piix.c b/drivers/scsi/ata_piix.c index d71cef767cec..be021478f416 100644 --- a/drivers/scsi/ata_piix.c +++ b/drivers/scsi/ata_piix.c @@ -147,7 +147,7 @@ static Scsi_Host_Template piix_sht = { .ordered_flush = 1, }; -static struct ata_port_operations piix_pata_ops = { +static const struct ata_port_operations piix_pata_ops = { .port_disable = ata_port_disable, .set_piomode = piix_set_piomode, .set_dmamode = piix_set_dmamode, @@ -177,7 +177,7 @@ static struct ata_port_operations piix_pata_ops = { .host_stop = ata_host_stop, }; -static struct ata_port_operations piix_sata_ops = { +static const struct ata_port_operations piix_sata_ops = { .port_disable = ata_port_disable, .tf_load = ata_tf_load, diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c index 9269fd9b814f..d55f12dacfcb 100644 --- a/drivers/scsi/libata-core.c +++ b/drivers/scsi/libata-core.c @@ -67,9 +67,9 @@ static void ata_dev_reread_id(struct ata_port *ap, struct ata_device *dev); static void ata_dev_init_params(struct ata_port *ap, struct ata_device *dev); static void ata_set_mode(struct ata_port *ap); static void ata_dev_set_xfermode(struct ata_port *ap, struct ata_device *dev); -static unsigned int ata_get_mode_mask(struct ata_port *ap, int shift); +static unsigned int ata_get_mode_mask(const struct ata_port *ap, int shift); static int fgb(u32 bitmap); -static int ata_choose_xfer_mode(struct ata_port *ap, +static int ata_choose_xfer_mode(const struct ata_port *ap, u8 *xfer_mode_out, unsigned int *xfer_shift_out); static void __ata_qc_complete(struct ata_queued_cmd *qc); @@ -97,7 +97,7 @@ MODULE_VERSION(DRV_VERSION); * Inherited from caller. */ -static void ata_tf_load_pio(struct ata_port *ap, struct ata_taskfile *tf) +static void ata_tf_load_pio(struct ata_port *ap, const struct ata_taskfile *tf) { struct ata_ioports *ioaddr = &ap->ioaddr; unsigned int is_addr = tf->flags & ATA_TFLAG_ISADDR; @@ -155,7 +155,7 @@ static void ata_tf_load_pio(struct ata_port *ap, struct ata_taskfile *tf) * Inherited from caller. */ -static void ata_tf_load_mmio(struct ata_port *ap, struct ata_taskfile *tf) +static void ata_tf_load_mmio(struct ata_port *ap, const struct ata_taskfile *tf) { struct ata_ioports *ioaddr = &ap->ioaddr; unsigned int is_addr = tf->flags & ATA_TFLAG_ISADDR; @@ -224,7 +224,7 @@ static void ata_tf_load_mmio(struct ata_port *ap, struct ata_taskfile *tf) * LOCKING: * Inherited from caller. */ -void ata_tf_load(struct ata_port *ap, struct ata_taskfile *tf) +void ata_tf_load(struct ata_port *ap, const struct ata_taskfile *tf) { if (ap->flags & ATA_FLAG_MMIO) ata_tf_load_mmio(ap, tf); @@ -244,7 +244,7 @@ void ata_tf_load(struct ata_port *ap, struct ata_taskfile *tf) * spin_lock_irqsave(host_set lock) */ -static void ata_exec_command_pio(struct ata_port *ap, struct ata_taskfile *tf) +static void ata_exec_command_pio(struct ata_port *ap, const struct ata_taskfile *tf) { DPRINTK("ata%u: cmd 0x%X\n", ap->id, tf->command); @@ -265,7 +265,7 @@ static void ata_exec_command_pio(struct ata_port *ap, struct ata_taskfile *tf) * spin_lock_irqsave(host_set lock) */ -static void ata_exec_command_mmio(struct ata_port *ap, struct ata_taskfile *tf) +static void ata_exec_command_mmio(struct ata_port *ap, const struct ata_taskfile *tf) { DPRINTK("ata%u: cmd 0x%X\n", ap->id, tf->command); @@ -285,7 +285,7 @@ static void ata_exec_command_mmio(struct ata_port *ap, struct ata_taskfile *tf) * LOCKING: * spin_lock_irqsave(host_set lock) */ -void ata_exec_command(struct ata_port *ap, struct ata_taskfile *tf) +void ata_exec_command(struct ata_port *ap, const struct ata_taskfile *tf) { if (ap->flags & ATA_FLAG_MMIO) ata_exec_command_mmio(ap, tf); @@ -305,7 +305,7 @@ void ata_exec_command(struct ata_port *ap, struct ata_taskfile *tf) * Obtains host_set lock. */ -static inline void ata_exec(struct ata_port *ap, struct ata_taskfile *tf) +static inline void ata_exec(struct ata_port *ap, const struct ata_taskfile *tf) { unsigned long flags; @@ -328,7 +328,7 @@ static inline void ata_exec(struct ata_port *ap, struct ata_taskfile *tf) * Obtains host_set lock. */ -static void ata_tf_to_host(struct ata_port *ap, struct ata_taskfile *tf) +static void ata_tf_to_host(struct ata_port *ap, const struct ata_taskfile *tf) { ap->ops->tf_load(ap, tf); @@ -348,7 +348,7 @@ static void ata_tf_to_host(struct ata_port *ap, struct ata_taskfile *tf) * spin_lock_irqsave(host_set lock) */ -void ata_tf_to_host_nolock(struct ata_port *ap, struct ata_taskfile *tf) +void ata_tf_to_host_nolock(struct ata_port *ap, const struct ata_taskfile *tf) { ap->ops->tf_load(ap, tf); ap->ops->exec_command(ap, tf); @@ -558,7 +558,7 @@ u8 ata_chk_err(struct ata_port *ap) * Inherited from caller. */ -void ata_tf_to_fis(struct ata_taskfile *tf, u8 *fis, u8 pmp) +void ata_tf_to_fis(const struct ata_taskfile *tf, u8 *fis, u8 pmp) { fis[0] = 0x27; /* Register - Host to Device FIS */ fis[1] = (pmp & 0xf) | (1 << 7); /* Port multiplier number, @@ -599,7 +599,7 @@ void ata_tf_to_fis(struct ata_taskfile *tf, u8 *fis, u8 pmp) * Inherited from caller. */ -void ata_tf_from_fis(u8 *fis, struct ata_taskfile *tf) +void ata_tf_from_fis(const u8 *fis, struct ata_taskfile *tf) { tf->command = fis[2]; /* status */ tf->feature = fis[3]; /* error */ @@ -845,7 +845,7 @@ static unsigned int ata_devchk(struct ata_port *ap, * the event of failure. */ -unsigned int ata_dev_classify(struct ata_taskfile *tf) +unsigned int ata_dev_classify(const struct ata_taskfile *tf) { /* Apple's open source Darwin code hints that some devices only * put a proper signature into the LBA mid/high registers, @@ -937,7 +937,7 @@ static u8 ata_dev_try_classify(struct ata_port *ap, unsigned int device) * caller. */ -void ata_dev_id_string(u16 *id, unsigned char *s, +void ata_dev_id_string(const u16 *id, unsigned char *s, unsigned int ofs, unsigned int len) { unsigned int c; @@ -1054,7 +1054,7 @@ void ata_dev_select(struct ata_port *ap, unsigned int device, * caller. */ -static inline void ata_dump_id(struct ata_device *dev) +static inline void ata_dump_id(const struct ata_device *dev) { DPRINTK("49==0x%04x " "53==0x%04x " @@ -1089,7 +1089,7 @@ static inline void ata_dump_id(struct ata_device *dev) * FIXME: pre IDE drive timing (do we care ?). */ -static unsigned int ata_pio_modes(struct ata_device *adev) +static unsigned int ata_pio_modes(const struct ata_device *adev) { u16 modes; @@ -1352,7 +1352,7 @@ err_out: } -static inline u8 ata_dev_knobble(struct ata_port *ap) +static inline u8 ata_dev_knobble(const struct ata_port *ap) { return ((ap->cbl == ATA_CBL_SATA) && (!ata_id_is_sata(ap->device->id))); } @@ -1684,7 +1684,7 @@ int ata_timing_compute(struct ata_device *adev, unsigned short speed, return 0; } -static struct { +static const struct { unsigned int shift; u8 base; } xfer_mode_classes[] = { @@ -2093,7 +2093,8 @@ err_out: DPRINTK("EXIT\n"); } -static void ata_pr_blacklisted(struct ata_port *ap, struct ata_device *dev) +static void ata_pr_blacklisted(const struct ata_port *ap, + const struct ata_device *dev) { printk(KERN_WARNING "ata%u: dev %u is on DMA blacklist, disabling DMA\n", ap->id, dev->devno); @@ -2131,7 +2132,7 @@ static const char * ata_dma_blacklist [] = { "_NEC DV5800A", }; -static int ata_dma_blacklisted(struct ata_port *ap, struct ata_device *dev) +static int ata_dma_blacklisted(const struct ata_device *dev) { unsigned char model_num[40]; char *s; @@ -2156,9 +2157,9 @@ static int ata_dma_blacklisted(struct ata_port *ap, struct ata_device *dev) return 0; } -static unsigned int ata_get_mode_mask(struct ata_port *ap, int shift) +static unsigned int ata_get_mode_mask(const struct ata_port *ap, int shift) { - struct ata_device *master, *slave; + const struct ata_device *master, *slave; unsigned int mask; master = &ap->device[0]; @@ -2170,14 +2171,14 @@ static unsigned int ata_get_mode_mask(struct ata_port *ap, int shift) mask = ap->udma_mask; if (ata_dev_present(master)) { mask &= (master->id[ATA_ID_UDMA_MODES] & 0xff); - if (ata_dma_blacklisted(ap, master)) { + if (ata_dma_blacklisted(master)) { mask = 0; ata_pr_blacklisted(ap, master); } } if (ata_dev_present(slave)) { mask &= (slave->id[ATA_ID_UDMA_MODES] & 0xff); - if (ata_dma_blacklisted(ap, slave)) { + if (ata_dma_blacklisted(slave)) { mask = 0; ata_pr_blacklisted(ap, slave); } @@ -2187,14 +2188,14 @@ static unsigned int ata_get_mode_mask(struct ata_port *ap, int shift) mask = ap->mwdma_mask; if (ata_dev_present(master)) { mask &= (master->id[ATA_ID_MWDMA_MODES] & 0x07); - if (ata_dma_blacklisted(ap, master)) { + if (ata_dma_blacklisted(master)) { mask = 0; ata_pr_blacklisted(ap, master); } } if (ata_dev_present(slave)) { mask &= (slave->id[ATA_ID_MWDMA_MODES] & 0x07); - if (ata_dma_blacklisted(ap, slave)) { + if (ata_dma_blacklisted(slave)) { mask = 0; ata_pr_blacklisted(ap, slave); } @@ -2258,7 +2259,7 @@ static int fgb(u32 bitmap) * Zero on success, negative on error. */ -static int ata_choose_xfer_mode(struct ata_port *ap, +static int ata_choose_xfer_mode(const struct ata_port *ap, u8 *xfer_mode_out, unsigned int *xfer_shift_out) { @@ -4144,7 +4145,7 @@ static void ata_host_remove(struct ata_port *ap, unsigned int do_unregister) static void ata_host_init(struct ata_port *ap, struct Scsi_Host *host, struct ata_host_set *host_set, - struct ata_probe_ent *ent, unsigned int port_no) + const struct ata_probe_ent *ent, unsigned int port_no) { unsigned int i; @@ -4203,7 +4204,7 @@ static void ata_host_init(struct ata_port *ap, struct Scsi_Host *host, * */ -static struct ata_port * ata_host_add(struct ata_probe_ent *ent, +static struct ata_port * ata_host_add(const struct ata_probe_ent *ent, struct ata_host_set *host_set, unsigned int port_no) { @@ -4251,7 +4252,7 @@ err_out: * */ -int ata_device_add(struct ata_probe_ent *ent) +int ata_device_add(const struct ata_probe_ent *ent) { unsigned int count = 0, i; struct device *dev = ent->dev; @@ -4470,7 +4471,7 @@ void ata_std_ports(struct ata_ioports *ioaddr) } static struct ata_probe_ent * -ata_probe_ent_alloc(struct device *dev, struct ata_port_info *port) +ata_probe_ent_alloc(struct device *dev, const struct ata_port_info *port) { struct ata_probe_ent *probe_ent; @@ -4570,7 +4571,6 @@ static struct ata_probe_ent *ata_pci_init_legacy_port(struct pci_dev *pdev, stru if (!probe_ent) return NULL; - probe_ent->legacy_mode = 1; probe_ent->n_ports = 1; probe_ent->hard_port_no = port_num; @@ -4783,7 +4783,7 @@ void ata_pci_remove_one (struct pci_dev *pdev) } /* move to PCI subsystem */ -int pci_test_config_bits(struct pci_dev *pdev, struct pci_bits *bits) +int pci_test_config_bits(struct pci_dev *pdev, const struct pci_bits *bits) { unsigned long tmp = 0; diff --git a/drivers/scsi/libata-scsi.c b/drivers/scsi/libata-scsi.c index 90bf22204668..58858886d751 100644 --- a/drivers/scsi/libata-scsi.c +++ b/drivers/scsi/libata-scsi.c @@ -44,9 +44,9 @@ #include "libata.h" -typedef unsigned int (*ata_xlat_func_t)(struct ata_queued_cmd *qc, u8 *scsicmd); +typedef unsigned int (*ata_xlat_func_t)(struct ata_queued_cmd *qc, const u8 *scsicmd); static struct ata_device * -ata_scsi_find_dev(struct ata_port *ap, struct scsi_device *scsidev); +ata_scsi_find_dev(struct ata_port *ap, const struct scsi_device *scsidev); static void ata_scsi_invalid_field(struct scsi_cmnd *cmd, @@ -418,7 +418,7 @@ int ata_scsi_error(struct Scsi_Host *host) */ static unsigned int ata_scsi_start_stop_xlat(struct ata_queued_cmd *qc, - u8 *scsicmd) + const u8 *scsicmd) { struct ata_taskfile *tf = &qc->tf; @@ -485,7 +485,7 @@ invalid_fld: * Zero on success, non-zero on error. */ -static unsigned int ata_scsi_flush_xlat(struct ata_queued_cmd *qc, u8 *scsicmd) +static unsigned int ata_scsi_flush_xlat(struct ata_queued_cmd *qc, const u8 *scsicmd) { struct ata_taskfile *tf = &qc->tf; @@ -512,7 +512,7 @@ static unsigned int ata_scsi_flush_xlat(struct ata_queued_cmd *qc, u8 *scsicmd) * @plen: the transfer length */ -static void scsi_6_lba_len(u8 *scsicmd, u64 *plba, u32 *plen) +static void scsi_6_lba_len(const u8 *scsicmd, u64 *plba, u32 *plen) { u64 lba = 0; u32 len = 0; @@ -539,7 +539,7 @@ static void scsi_6_lba_len(u8 *scsicmd, u64 *plba, u32 *plen) * @plen: the transfer length */ -static void scsi_10_lba_len(u8 *scsicmd, u64 *plba, u32 *plen) +static void scsi_10_lba_len(const u8 *scsicmd, u64 *plba, u32 *plen) { u64 lba = 0; u32 len = 0; @@ -569,7 +569,7 @@ static void scsi_10_lba_len(u8 *scsicmd, u64 *plba, u32 *plen) * @plen: the transfer length */ -static void scsi_16_lba_len(u8 *scsicmd, u64 *plba, u32 *plen) +static void scsi_16_lba_len(const u8 *scsicmd, u64 *plba, u32 *plen) { u64 lba = 0; u32 len = 0; @@ -608,7 +608,7 @@ static void scsi_16_lba_len(u8 *scsicmd, u64 *plba, u32 *plen) * Zero on success, non-zero on error. */ -static unsigned int ata_scsi_verify_xlat(struct ata_queued_cmd *qc, u8 *scsicmd) +static unsigned int ata_scsi_verify_xlat(struct ata_queued_cmd *qc, const u8 *scsicmd) { struct ata_taskfile *tf = &qc->tf; struct ata_device *dev = qc->dev; @@ -734,7 +734,7 @@ nothing_to_do: * Zero on success, non-zero on error. */ -static unsigned int ata_scsi_rw_xlat(struct ata_queued_cmd *qc, u8 *scsicmd) +static unsigned int ata_scsi_rw_xlat(struct ata_queued_cmd *qc, const u8 *scsicmd) { struct ata_taskfile *tf = &qc->tf; struct ata_device *dev = qc->dev; @@ -1688,7 +1688,7 @@ static int atapi_qc_complete(struct ata_queued_cmd *qc, u8 drv_stat) * Zero on success, non-zero on failure. */ -static unsigned int atapi_xlat(struct ata_queued_cmd *qc, u8 *scsicmd) +static unsigned int atapi_xlat(struct ata_queued_cmd *qc, const u8 *scsicmd) { struct scsi_cmnd *cmd = qc->scsicmd; struct ata_device *dev = qc->dev; @@ -1757,7 +1757,7 @@ static unsigned int atapi_xlat(struct ata_queued_cmd *qc, u8 *scsicmd) */ static struct ata_device * -ata_scsi_find_dev(struct ata_port *ap, struct scsi_device *scsidev) +ata_scsi_find_dev(struct ata_port *ap, const struct scsi_device *scsidev) { struct ata_device *dev; @@ -1914,7 +1914,7 @@ void ata_scsi_simulate(u16 *id, void (*done)(struct scsi_cmnd *)) { struct ata_scsi_args args; - u8 *scsicmd = cmd->cmnd; + const u8 *scsicmd = cmd->cmnd; args.id = id; args.cmd = cmd; diff --git a/drivers/scsi/libata.h b/drivers/scsi/libata.h index 67d752ca8ae2..3d60190584ba 100644 --- a/drivers/scsi/libata.h +++ b/drivers/scsi/libata.h @@ -48,7 +48,7 @@ extern int ata_qc_issue(struct ata_queued_cmd *qc); extern int ata_check_atapi_dma(struct ata_queued_cmd *qc); extern void ata_dev_select(struct ata_port *ap, unsigned int device, unsigned int wait, unsigned int can_sleep); -extern void ata_tf_to_host_nolock(struct ata_port *ap, struct ata_taskfile *tf); +extern void ata_tf_to_host_nolock(struct ata_port *ap, const struct ata_taskfile *tf); extern void swap_buf_le16(u16 *buf, unsigned int buf_words); diff --git a/drivers/scsi/pdc_adma.c b/drivers/scsi/pdc_adma.c index 53b8db4be1a9..9820f272f889 100644 --- a/drivers/scsi/pdc_adma.c +++ b/drivers/scsi/pdc_adma.c @@ -158,7 +158,7 @@ static Scsi_Host_Template adma_ata_sht = { .bios_param = ata_std_bios_param, }; -static struct ata_port_operations adma_ata_ops = { +static const struct ata_port_operations adma_ata_ops = { .port_disable = ata_port_disable, .tf_load = ata_tf_load, .tf_read = ata_tf_read, diff --git a/drivers/scsi/sata_mv.c b/drivers/scsi/sata_mv.c index 9b6213928f7a..422e0b6f603a 100644 --- a/drivers/scsi/sata_mv.c +++ b/drivers/scsi/sata_mv.c @@ -290,7 +290,7 @@ static Scsi_Host_Template mv_sht = { .ordered_flush = 1, }; -static struct ata_port_operations mv_ops = { +static const struct ata_port_operations mv_ops = { .port_disable = ata_port_disable, .tf_load = ata_tf_load, diff --git a/drivers/scsi/sata_nv.c b/drivers/scsi/sata_nv.c index 8866530bc491..1a56d6c79ddd 100644 --- a/drivers/scsi/sata_nv.c +++ b/drivers/scsi/sata_nv.c @@ -238,7 +238,7 @@ static Scsi_Host_Template nv_sht = { .ordered_flush = 1, }; -static struct ata_port_operations nv_ops = { +static const struct ata_port_operations nv_ops = { .port_disable = ata_port_disable, .tf_load = ata_tf_load, .tf_read = ata_tf_read, diff --git a/drivers/scsi/sata_promise.c b/drivers/scsi/sata_promise.c index 9bf8cbd29901..eee93b0016df 100644 --- a/drivers/scsi/sata_promise.c +++ b/drivers/scsi/sata_promise.c @@ -87,8 +87,8 @@ static void pdc_port_stop(struct ata_port *ap); static void pdc_pata_phy_reset(struct ata_port *ap); static void pdc_sata_phy_reset(struct ata_port *ap); static void pdc_qc_prep(struct ata_queued_cmd *qc); -static void pdc_tf_load_mmio(struct ata_port *ap, struct ata_taskfile *tf); -static void pdc_exec_command_mmio(struct ata_port *ap, struct ata_taskfile *tf); +static void pdc_tf_load_mmio(struct ata_port *ap, const struct ata_taskfile *tf); +static void pdc_exec_command_mmio(struct ata_port *ap, const struct ata_taskfile *tf); static void pdc_irq_clear(struct ata_port *ap); static int pdc_qc_issue_prot(struct ata_queued_cmd *qc); @@ -113,7 +113,7 @@ static Scsi_Host_Template pdc_ata_sht = { .ordered_flush = 1, }; -static struct ata_port_operations pdc_sata_ops = { +static const struct ata_port_operations pdc_sata_ops = { .port_disable = ata_port_disable, .tf_load = pdc_tf_load_mmio, .tf_read = ata_tf_read, @@ -136,7 +136,7 @@ static struct ata_port_operations pdc_sata_ops = { .host_stop = ata_pci_host_stop, }; -static struct ata_port_operations pdc_pata_ops = { +static const struct ata_port_operations pdc_pata_ops = { .port_disable = ata_port_disable, .tf_load = pdc_tf_load_mmio, .tf_read = ata_tf_read, @@ -546,7 +546,7 @@ static int pdc_qc_issue_prot(struct ata_queued_cmd *qc) return ata_qc_issue_prot(qc); } -static void pdc_tf_load_mmio(struct ata_port *ap, struct ata_taskfile *tf) +static void pdc_tf_load_mmio(struct ata_port *ap, const struct ata_taskfile *tf) { WARN_ON (tf->protocol == ATA_PROT_DMA || tf->protocol == ATA_PROT_NODATA); @@ -554,7 +554,7 @@ static void pdc_tf_load_mmio(struct ata_port *ap, struct ata_taskfile *tf) } -static void pdc_exec_command_mmio(struct ata_port *ap, struct ata_taskfile *tf) +static void pdc_exec_command_mmio(struct ata_port *ap, const struct ata_taskfile *tf) { WARN_ON (tf->protocol == ATA_PROT_DMA || tf->protocol == ATA_PROT_NODATA); diff --git a/drivers/scsi/sata_qstor.c b/drivers/scsi/sata_qstor.c index e1c1dae27c52..250dafa6bc36 100644 --- a/drivers/scsi/sata_qstor.c +++ b/drivers/scsi/sata_qstor.c @@ -147,7 +147,7 @@ static Scsi_Host_Template qs_ata_sht = { .bios_param = ata_std_bios_param, }; -static struct ata_port_operations qs_ata_ops = { +static const struct ata_port_operations qs_ata_ops = { .port_disable = ata_port_disable, .tf_load = ata_tf_load, .tf_read = ata_tf_read, diff --git a/drivers/scsi/sata_sil.c b/drivers/scsi/sata_sil.c index f6f0184e1ac8..3a056173fb95 100644 --- a/drivers/scsi/sata_sil.c +++ b/drivers/scsi/sata_sil.c @@ -150,7 +150,7 @@ static Scsi_Host_Template sil_sht = { .ordered_flush = 1, }; -static struct ata_port_operations sil_ops = { +static const struct ata_port_operations sil_ops = { .port_disable = ata_port_disable, .dev_config = sil_dev_config, .tf_load = ata_tf_load, diff --git a/drivers/scsi/sata_sil24.c b/drivers/scsi/sata_sil24.c index 19857814d69f..32d730bd5bb6 100644 --- a/drivers/scsi/sata_sil24.c +++ b/drivers/scsi/sata_sil24.c @@ -275,7 +275,7 @@ static Scsi_Host_Template sil24_sht = { .ordered_flush = 1, /* NCQ not supported yet */ }; -static struct ata_port_operations sil24_ops = { +static const struct ata_port_operations sil24_ops = { .port_disable = ata_port_disable, .check_status = sil24_check_status, diff --git a/drivers/scsi/sata_sis.c b/drivers/scsi/sata_sis.c index 0761a3234fcf..057f7b98b6c4 100644 --- a/drivers/scsi/sata_sis.c +++ b/drivers/scsi/sata_sis.c @@ -102,7 +102,7 @@ static Scsi_Host_Template sis_sht = { .ordered_flush = 1, }; -static struct ata_port_operations sis_ops = { +static const struct ata_port_operations sis_ops = { .port_disable = ata_port_disable, .tf_load = ata_tf_load, .tf_read = ata_tf_read, diff --git a/drivers/scsi/sata_svw.c b/drivers/scsi/sata_svw.c index d89d968bedac..e0f9570bc6dd 100644 --- a/drivers/scsi/sata_svw.c +++ b/drivers/scsi/sata_svw.c @@ -102,7 +102,7 @@ static void k2_sata_scr_write (struct ata_port *ap, unsigned int sc_reg, } -static void k2_sata_tf_load(struct ata_port *ap, struct ata_taskfile *tf) +static void k2_sata_tf_load(struct ata_port *ap, const struct ata_taskfile *tf) { struct ata_ioports *ioaddr = &ap->ioaddr; unsigned int is_addr = tf->flags & ATA_TFLAG_ISADDR; @@ -297,7 +297,7 @@ static Scsi_Host_Template k2_sata_sht = { }; -static struct ata_port_operations k2_sata_ops = { +static const struct ata_port_operations k2_sata_ops = { .port_disable = ata_port_disable, .tf_load = k2_sata_tf_load, .tf_read = k2_sata_tf_read, diff --git a/drivers/scsi/sata_sx4.c b/drivers/scsi/sata_sx4.c index d6d350a0b5e9..af08f4f650c1 100644 --- a/drivers/scsi/sata_sx4.c +++ b/drivers/scsi/sata_sx4.c @@ -157,8 +157,8 @@ static void pdc_20621_phy_reset (struct ata_port *ap); static int pdc_port_start(struct ata_port *ap); static void pdc_port_stop(struct ata_port *ap); static void pdc20621_qc_prep(struct ata_queued_cmd *qc); -static void pdc_tf_load_mmio(struct ata_port *ap, struct ata_taskfile *tf); -static void pdc_exec_command_mmio(struct ata_port *ap, struct ata_taskfile *tf); +static void pdc_tf_load_mmio(struct ata_port *ap, const struct ata_taskfile *tf); +static void pdc_exec_command_mmio(struct ata_port *ap, const struct ata_taskfile *tf); static void pdc20621_host_stop(struct ata_host_set *host_set); static unsigned int pdc20621_dimm_init(struct ata_probe_ent *pe); static int pdc20621_detect_dimm(struct ata_probe_ent *pe); @@ -196,7 +196,7 @@ static Scsi_Host_Template pdc_sata_sht = { .ordered_flush = 1, }; -static struct ata_port_operations pdc_20621_ops = { +static const struct ata_port_operations pdc_20621_ops = { .port_disable = ata_port_disable, .tf_load = pdc_tf_load_mmio, .tf_read = ata_tf_read, @@ -899,7 +899,7 @@ out: DPRINTK("EXIT\n"); } -static void pdc_tf_load_mmio(struct ata_port *ap, struct ata_taskfile *tf) +static void pdc_tf_load_mmio(struct ata_port *ap, const struct ata_taskfile *tf) { WARN_ON (tf->protocol == ATA_PROT_DMA || tf->protocol == ATA_PROT_NODATA); @@ -907,7 +907,7 @@ static void pdc_tf_load_mmio(struct ata_port *ap, struct ata_taskfile *tf) } -static void pdc_exec_command_mmio(struct ata_port *ap, struct ata_taskfile *tf) +static void pdc_exec_command_mmio(struct ata_port *ap, const struct ata_taskfile *tf) { WARN_ON (tf->protocol == ATA_PROT_DMA || tf->protocol == ATA_PROT_NODATA); diff --git a/drivers/scsi/sata_uli.c b/drivers/scsi/sata_uli.c index 9c06f2abe7f7..d68dc7d3422c 100644 --- a/drivers/scsi/sata_uli.c +++ b/drivers/scsi/sata_uli.c @@ -90,7 +90,7 @@ static Scsi_Host_Template uli_sht = { .ordered_flush = 1, }; -static struct ata_port_operations uli_ops = { +static const struct ata_port_operations uli_ops = { .port_disable = ata_port_disable, .tf_load = ata_tf_load, diff --git a/drivers/scsi/sata_via.c b/drivers/scsi/sata_via.c index 565872479b9a..80e291a909a9 100644 --- a/drivers/scsi/sata_via.c +++ b/drivers/scsi/sata_via.c @@ -109,7 +109,7 @@ static Scsi_Host_Template svia_sht = { .ordered_flush = 1, }; -static struct ata_port_operations svia_sata_ops = { +static const struct ata_port_operations svia_sata_ops = { .port_disable = ata_port_disable, .tf_load = ata_tf_load, diff --git a/drivers/scsi/sata_vsc.c b/drivers/scsi/sata_vsc.c index 877b9fda3965..5af05fdf8544 100644 --- a/drivers/scsi/sata_vsc.c +++ b/drivers/scsi/sata_vsc.c @@ -115,7 +115,7 @@ static void vsc_intr_mask_update(struct ata_port *ap, u8 ctl) } -static void vsc_sata_tf_load(struct ata_port *ap, struct ata_taskfile *tf) +static void vsc_sata_tf_load(struct ata_port *ap, const struct ata_taskfile *tf) { struct ata_ioports *ioaddr = &ap->ioaddr; unsigned int is_addr = tf->flags & ATA_TFLAG_ISADDR; @@ -231,7 +231,7 @@ static Scsi_Host_Template vsc_sata_sht = { }; -static struct ata_port_operations vsc_sata_ops = { +static const struct ata_port_operations vsc_sata_ops = { .port_disable = ata_port_disable, .tf_load = vsc_sata_tf_load, .tf_read = vsc_sata_tf_read, diff --git a/include/linux/ata.h b/include/linux/ata.h index 33276d1d05d2..d2873b732bb1 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -261,7 +261,7 @@ struct ata_taskfile { ((u64) (id)[(n) + 1] << 16) | \ ((u64) (id)[(n) + 0]) ) -static inline int ata_id_current_chs_valid(u16 *id) +static inline int ata_id_current_chs_valid(const u16 *id) { /* For ATA-1 devices, if the INITIALIZE DEVICE PARAMETERS command has not been issued to the device then the values of @@ -273,7 +273,7 @@ static inline int ata_id_current_chs_valid(u16 *id) id[56]; /* sectors in current translation */ } -static inline int atapi_cdb_len(u16 *dev_id) +static inline int atapi_cdb_len(const u16 *dev_id) { u16 tmp = dev_id[0] & 0x3; switch (tmp) { @@ -283,7 +283,7 @@ static inline int atapi_cdb_len(u16 *dev_id) } } -static inline int is_atapi_taskfile(struct ata_taskfile *tf) +static inline int is_atapi_taskfile(const struct ata_taskfile *tf) { return (tf->protocol == ATA_PROT_ATAPI) || (tf->protocol == ATA_PROT_ATAPI_NODATA) || diff --git a/include/linux/libata.h b/include/linux/libata.h index 634b5aa0a615..00a8a5738858 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -202,7 +202,7 @@ struct ata_ioports { struct ata_probe_ent { struct list_head node; struct device *dev; - struct ata_port_operations *port_ops; + const struct ata_port_operations *port_ops; Scsi_Host_Template *sht; struct ata_ioports port[ATA_MAX_PORTS]; unsigned int n_ports; @@ -225,7 +225,7 @@ struct ata_host_set { void __iomem *mmio_base; unsigned int n_ports; void *private_data; - struct ata_port_operations *ops; + const struct ata_port_operations *ops; struct ata_port * ports[0]; }; @@ -294,7 +294,7 @@ struct ata_device { struct ata_port { struct Scsi_Host *host; /* our co-allocated scsi host */ - struct ata_port_operations *ops; + const struct ata_port_operations *ops; unsigned long flags; /* ATA_FLAG_xxx */ unsigned int id; /* unique id req'd by scsi midlyr */ unsigned int port_no; /* unique port #; from zero */ @@ -341,10 +341,10 @@ struct ata_port_operations { void (*set_piomode) (struct ata_port *, struct ata_device *); void (*set_dmamode) (struct ata_port *, struct ata_device *); - void (*tf_load) (struct ata_port *ap, struct ata_taskfile *tf); + void (*tf_load) (struct ata_port *ap, const struct ata_taskfile *tf); void (*tf_read) (struct ata_port *ap, struct ata_taskfile *tf); - void (*exec_command)(struct ata_port *ap, struct ata_taskfile *tf); + void (*exec_command)(struct ata_port *ap, const struct ata_taskfile *tf); u8 (*check_status)(struct ata_port *ap); u8 (*check_altstatus)(struct ata_port *ap); u8 (*check_err)(struct ata_port *ap); @@ -385,7 +385,7 @@ struct ata_port_info { unsigned long pio_mask; unsigned long mwdma_mask; unsigned long udma_mask; - struct ata_port_operations *port_ops; + const struct ata_port_operations *port_ops; }; struct ata_timing { @@ -413,7 +413,7 @@ extern int ata_pci_init_one (struct pci_dev *pdev, struct ata_port_info **port_i unsigned int n_ports); extern void ata_pci_remove_one (struct pci_dev *pdev); #endif /* CONFIG_PCI */ -extern int ata_device_add(struct ata_probe_ent *ent); +extern int ata_device_add(const struct ata_probe_ent *ent); extern void ata_host_set_remove(struct ata_host_set *host_set); extern int ata_scsi_detect(Scsi_Host_Template *sht); extern int ata_scsi_ioctl(struct scsi_device *dev, int cmd, void __user *arg); @@ -426,16 +426,16 @@ extern int ata_ratelimit(void); /* * Default driver ops implementations */ -extern void ata_tf_load(struct ata_port *ap, struct ata_taskfile *tf); +extern void ata_tf_load(struct ata_port *ap, const struct ata_taskfile *tf); extern void ata_tf_read(struct ata_port *ap, struct ata_taskfile *tf); -extern void ata_tf_to_fis(struct ata_taskfile *tf, u8 *fis, u8 pmp); -extern void ata_tf_from_fis(u8 *fis, struct ata_taskfile *tf); +extern void ata_tf_to_fis(const struct ata_taskfile *tf, u8 *fis, u8 pmp); +extern void ata_tf_from_fis(const u8 *fis, struct ata_taskfile *tf); extern void ata_noop_dev_select (struct ata_port *ap, unsigned int device); extern void ata_std_dev_select (struct ata_port *ap, unsigned int device); extern u8 ata_check_status(struct ata_port *ap); extern u8 ata_altstatus(struct ata_port *ap); extern u8 ata_chk_err(struct ata_port *ap); -extern void ata_exec_command(struct ata_port *ap, struct ata_taskfile *tf); +extern void ata_exec_command(struct ata_port *ap, const struct ata_taskfile *tf); extern int ata_port_start (struct ata_port *ap); extern void ata_port_stop (struct ata_port *ap); extern void ata_host_stop (struct ata_host_set *host_set); @@ -446,8 +446,8 @@ extern void ata_sg_init_one(struct ata_queued_cmd *qc, void *buf, unsigned int buflen); extern void ata_sg_init(struct ata_queued_cmd *qc, struct scatterlist *sg, unsigned int n_elem); -extern unsigned int ata_dev_classify(struct ata_taskfile *tf); -extern void ata_dev_id_string(u16 *id, unsigned char *s, +extern unsigned int ata_dev_classify(const struct ata_taskfile *tf); +extern void ata_dev_id_string(const u16 *id, unsigned char *s, unsigned int ofs, unsigned int len); extern void ata_dev_config(struct ata_port *ap, unsigned int i); extern void ata_bmdma_setup (struct ata_queued_cmd *qc); @@ -502,7 +502,7 @@ struct pci_bits { extern void ata_pci_host_stop (struct ata_host_set *host_set); extern struct ata_probe_ent * ata_pci_init_native_mode(struct pci_dev *pdev, struct ata_port_info **port, int portmask); -extern int pci_test_config_bits(struct pci_dev *pdev, struct pci_bits *bits); +extern int pci_test_config_bits(struct pci_dev *pdev, const struct pci_bits *bits); #endif /* CONFIG_PCI */ @@ -512,7 +512,7 @@ static inline unsigned int ata_tag_valid(unsigned int tag) return (tag < ATA_MAX_QUEUE) ? 1 : 0; } -static inline unsigned int ata_dev_present(struct ata_device *dev) +static inline unsigned int ata_dev_present(const struct ata_device *dev) { return ((dev->class == ATA_DEV_ATA) || (dev->class == ATA_DEV_ATAPI)); @@ -711,7 +711,7 @@ static inline unsigned int sata_dev_present(struct ata_port *ap) return ((scr_read(ap, SCR_STATUS) & 0xf) == 0x3) ? 1 : 0; } -static inline int ata_try_flush_cache(struct ata_device *dev) +static inline int ata_try_flush_cache(const struct ata_device *dev) { return ata_id_wcache_enabled(dev->id) || ata_id_has_flush(dev->id) || -- cgit v1.2.3 From 8d3b35914aa54232b27e6a2b57d84092aadc5e86 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Sun, 23 Oct 2005 12:57:18 -0700 Subject: [PATCH] inotify/idr leak fix Fix a bug which was reported and diagnosed by Stefan Jones IDR trees include a cache of idr_layer objects. There's no way to destroy this cache, so when we discard an overall idr tree we end up leaking some memory. Add and use idr_destroy() for this. v9fs and infiniband also need to use idr_destroy() to avoid leaks. Or, we make the cache global, like radix_tree_preload(). Which is probably better. Later. Cc: Eric Van Hensbergen Cc: Roland Dreier Cc: Robert Love Cc: John McCutchan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/inotify.c | 1 + include/linux/idr.h | 1 + lib/idr.c | 13 +++++++++++++ 3 files changed, 15 insertions(+) (limited to 'include/linux') diff --git a/fs/inotify.c b/fs/inotify.c index a37e9fb1da58..9fbaebfdf40b 100644 --- a/fs/inotify.c +++ b/fs/inotify.c @@ -176,6 +176,7 @@ static inline void put_inotify_dev(struct inotify_device *dev) if (atomic_dec_and_test(&dev->count)) { atomic_dec(&dev->user->inotify_devs); free_uid(dev->user); + idr_destroy(&dev->idr); kfree(dev); } } diff --git a/include/linux/idr.h b/include/linux/idr.h index ca3b7e462576..3d5de45f961b 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -75,4 +75,5 @@ int idr_pre_get(struct idr *idp, unsigned gfp_mask); int idr_get_new(struct idr *idp, void *ptr, int *id); int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id); void idr_remove(struct idr *idp, int id); +void idr_destroy(struct idr *idp); void idr_init(struct idr *idp); diff --git a/lib/idr.c b/lib/idr.c index 6415d053e2bf..d4df21debc4d 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -345,6 +345,19 @@ void idr_remove(struct idr *idp, int id) } EXPORT_SYMBOL(idr_remove); +/** + * idr_destroy - release all cached layers within an idr tree + * idp: idr handle + */ +void idr_destroy(struct idr *idp) +{ + while (idp->id_free_cnt) { + struct idr_layer *p = alloc_layer(idp); + kmem_cache_free(idr_layer_cache, p); + } +} +EXPORT_SYMBOL(idr_destroy); + /** * idr_find - return pointer for given id * @idp: idr handle -- cgit v1.2.3 From add7b58e7558dd2894f3c17ca8574099fcba5c15 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 24 Oct 2005 22:11:57 +0100 Subject: [SERIAL] support the Exsys EX-4055 4S four-port card Tested by Wolfgang Denk with this device: 00:0f.0 Network controller: PLX Technology, Inc. PCI <-> IOBus Bridge (rev 01) Subsystem: Exsys EX-4055 4S(16C550) RS-232 Control: I/O+ Mem+ BusMaster- SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B- Status: Cap- 66Mhz- UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- SERR- Signed-off-by: Russell King --- drivers/serial/8250_pci.c | 25 ++++++++++++++++++++++++- include/linux/pci_ids.h | 1 + 2 files changed, 25 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/serial/8250_pci.c b/drivers/serial/8250_pci.c index 0e21f583690e..5da61ebbac88 100644 --- a/drivers/serial/8250_pci.c +++ b/drivers/serial/8250_pci.c @@ -226,8 +226,10 @@ static int __devinit pci_plx9050_init(struct pci_dev *dev) } irq_config = 0x41; - if (dev->vendor == PCI_VENDOR_ID_PANACOM) + if (dev->vendor == PCI_VENDOR_ID_PANACOM || + dev->subsystem_vendor == PCI_SUBVENDOR_ID_EXSYS) { irq_config = 0x43; + } if ((dev->vendor == PCI_VENDOR_ID_PLX) && (dev->device == PCI_DEVICE_ID_PLX_ROMULUS)) { /* @@ -661,6 +663,15 @@ static struct pci_serial_quirk pci_serial_quirks[] = { /* * PLX */ + { + .vendor = PCI_VENDOR_ID_PLX, + .device = PCI_DEVICE_ID_PLX_9050, + .subvendor = PCI_SUBVENDOR_ID_EXSYS, + .subdevice = PCI_SUBDEVICE_ID_EXSYS_4055, + .init = pci_plx9050_init, + .setup = pci_default_setup, + .exit = __devexit_p(pci_plx9050_exit), + }, { .vendor = PCI_VENDOR_ID_PLX, .device = PCI_DEVICE_ID_PLX_9050, @@ -927,6 +938,7 @@ enum pci_board_num_t { pbn_panacom, pbn_panacom2, pbn_panacom4, + pbn_exsys_4055, pbn_plx_romulus, pbn_oxsemi, pbn_intel_i960, @@ -1292,6 +1304,13 @@ static struct pciserial_board pci_boards[] __devinitdata = { .reg_shift = 7, }, + [pbn_exsys_4055] = { + .flags = FL_BASE2, + .num_ports = 4, + .base_baud = 115200, + .uart_offset = 8, + }, + /* I think this entry is broken - the first_offset looks wrong --rmk */ [pbn_plx_romulus] = { .flags = FL_BASE2, @@ -1853,6 +1872,10 @@ static struct pci_device_id serial_pci_tbl[] = { PCI_SUBVENDOR_ID_CHASE_PCIRAS, PCI_SUBDEVICE_ID_CHASE_PCIRAS8, 0, 0, pbn_b2_8_460800 }, + { PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_9050, + PCI_SUBVENDOR_ID_EXSYS, + PCI_SUBDEVICE_ID_EXSYS_4055, 0, 0, + pbn_exsys_4055 }, /* * Megawolf Romulus PCI Serial Card, from Mike Hudson * (Exoray@isys.ca) diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index f74ed9462475..ca0691c319ab 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2696,6 +2696,7 @@ #define PCI_SUBVENDOR_ID_EXSYS 0xd84d #define PCI_SUBDEVICE_ID_EXSYS_4014 0x4014 +#define PCI_SUBDEVICE_ID_EXSYS_4055 0x4055 #define PCI_VENDOR_ID_TIGERJET 0xe159 #define PCI_DEVICE_ID_TIGERJET_300 0x0001 -- cgit v1.2.3 From 551f8f0e87becb415c522adccab524a7a05ca83a Mon Sep 17 00:00:00 2001 From: Justin Chen Date: Mon, 24 Oct 2005 22:16:38 +0100 Subject: [SERIAL] new hp diva console port Add the new ID 0x132a and configure the new PCI Diva console port. This device supports only 1 single console UART. Signed-off-by: Andrew Morton Signed-off-by: Russell King --- drivers/serial/8250_pci.c | 1 + include/linux/pci_ids.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/drivers/serial/8250_pci.c b/drivers/serial/8250_pci.c index 5da61ebbac88..5c3c03932d6d 100644 --- a/drivers/serial/8250_pci.c +++ b/drivers/serial/8250_pci.c @@ -152,6 +152,7 @@ static int __devinit pci_hp_diva_init(struct pci_dev *dev) rc = 4; break; case PCI_DEVICE_ID_HP_DIVA_POWERBAR: + case PCI_DEVICE_ID_HP_DIVA_HURRICANE: rc = 1; break; } diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index ca0691c319ab..71834f05504f 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -723,6 +723,7 @@ #define PCI_DEVICE_ID_HP_DIVA_EVEREST 0x1282 #define PCI_DEVICE_ID_HP_DIVA_AUX 0x1290 #define PCI_DEVICE_ID_HP_DIVA_RMP3 0x1301 +#define PCI_DEVICE_ID_HP_DIVA_HURRICANE 0x132a #define PCI_DEVICE_ID_HP_CISS 0x3210 #define PCI_DEVICE_ID_HP_CISSA 0x3220 #define PCI_DEVICE_ID_HP_CISSB 0x3222 -- cgit v1.2.3 From c83c24861882758b9731e8550225cd1e52a4cd1c Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 18 Oct 2005 22:07:41 -0700 Subject: [SK_BUFF] kernel-doc: fix skbuff warnings Add kernel-doc to skbuff.h, skbuff.c to eliminate kernel-doc warnings. Signed-off-by: Randy Dunlap Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/skbuff.h | 3 ++- net/core/skbuff.c | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 8f5d9e7f8734..b756935da9c8 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -171,7 +171,6 @@ enum { * struct sk_buff - socket buffer * @next: Next buffer in list * @prev: Previous buffer in list - * @list: List we are on * @sk: Socket we are owned by * @tstamp: Time we arrived * @dev: Device we arrived on/are leaving by @@ -190,6 +189,7 @@ enum { * @cloned: Head may be cloned (check refcnt to be sure) * @nohdr: Payload reference only, must not modify header * @pkt_type: Packet class + * @fclone: skbuff clone status * @ip_summed: Driver fed us an IP checksum * @priority: Packet queueing priority * @users: User count - see {datagram,tcp}.c @@ -202,6 +202,7 @@ enum { * @destructor: Destruct function * @nfmark: Can be used for communication between hooks * @nfct: Associated connection, if any + * @ipvs_property: skbuff is owned by ipvs * @nfctinfo: Relationship of this skb to the connection * @nf_bridge: Saved data about a bridged frame - see br_netfilter.c * @tc_index: Traffic control index diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 02cd4cde2112..ef9d46b91eb9 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -122,6 +122,8 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here) * __alloc_skb - allocate a network buffer * @size: size to allocate * @gfp_mask: allocation mask + * @fclone: allocate from fclone cache instead of head cache + * and allocate a cloned (child) skb * * Allocate a new &sk_buff. The returned buffer has no headroom and a * tail room of size bytes. The object has a reference count of one. -- cgit v1.2.3 From 0e574af1be5f569a5d7f2800333b0bfb358a5e34 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 27 Oct 2005 22:12:38 -0400 Subject: NFS: Cleanup initialisation of struct nfs_fattr Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 1 + fs/nfs/nfs3proc.c | 58 +++++++++++++++++++++++++------------------------- fs/nfs/nfs4proc.c | 43 +++++++++++++++++-------------------- fs/nfs/proc.c | 26 +++++++++++----------- fs/nfs/read.c | 1 + fs/nfs/write.c | 2 ++ include/linux/nfs_fs.h | 5 +++++ 7 files changed, 71 insertions(+), 65 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index eb50c19fc253..b8a73045e9a0 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -532,6 +532,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) my_entry.eof = 0; my_entry.fh = &fh; my_entry.fattr = &fattr; + nfs_fattr_init(&fattr); desc->entry = &my_entry; while(!desc->entry->eof) { diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index e4a1cd48195e..4b1b48b139f6 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -78,7 +78,7 @@ nfs3_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, int status; dprintk("%s: call fsinfo\n", __FUNCTION__); - info->fattr->valid = 0; + nfs_fattr_init(info->fattr); status = rpc_call(server->client_sys, NFS3PROC_FSINFO, fhandle, info, 0); dprintk("%s: reply fsinfo: %d\n", __FUNCTION__, status); if (!(info->fattr->valid & NFS_ATTR_FATTR)) { @@ -98,7 +98,7 @@ nfs3_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, int status; dprintk("NFS call getattr\n"); - fattr->valid = 0; + nfs_fattr_init(fattr); status = rpc_call(server->client, NFS3PROC_GETATTR, fhandle, fattr, 0); dprintk("NFS reply getattr: %d\n", status); @@ -117,7 +117,7 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, int status; dprintk("NFS call setattr\n"); - fattr->valid = 0; + nfs_fattr_init(fattr); status = rpc_call(NFS_CLIENT(inode), NFS3PROC_SETATTR, &arg, fattr, 0); if (status == 0) nfs_setattr_update_inode(inode, sattr); @@ -143,8 +143,8 @@ nfs3_proc_lookup(struct inode *dir, struct qstr *name, int status; dprintk("NFS call lookup %s\n", name->name); - dir_attr.valid = 0; - fattr->valid = 0; + nfs_fattr_init(&dir_attr); + nfs_fattr_init(fattr); status = rpc_call(NFS_CLIENT(dir), NFS3PROC_LOOKUP, &arg, &res, 0); if (status >= 0 && !(fattr->valid & NFS_ATTR_FATTR)) status = rpc_call(NFS_CLIENT(dir), NFS3PROC_GETATTR, @@ -174,7 +174,6 @@ static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry) int status; dprintk("NFS call access\n"); - fattr.valid = 0; if (mode & MAY_READ) arg.access |= NFS3_ACCESS_READ; @@ -189,6 +188,7 @@ static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry) if (mode & MAY_EXEC) arg.access |= NFS3_ACCESS_EXECUTE; } + nfs_fattr_init(&fattr); status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); nfs_refresh_inode(inode, &fattr); if (status == 0) { @@ -217,7 +217,7 @@ static int nfs3_proc_readlink(struct inode *inode, struct page *page, int status; dprintk("NFS call readlink\n"); - fattr.valid = 0; + nfs_fattr_init(&fattr); status = rpc_call(NFS_CLIENT(inode), NFS3PROC_READLINK, &args, &fattr, 0); nfs_refresh_inode(inode, &fattr); @@ -240,7 +240,7 @@ static int nfs3_proc_read(struct nfs_read_data *rdata) dprintk("NFS call read %d @ %Ld\n", rdata->args.count, (long long) rdata->args.offset); - fattr->valid = 0; + nfs_fattr_init(fattr); status = rpc_call_sync(NFS_CLIENT(inode), &msg, flags); if (status >= 0) nfs_refresh_inode(inode, fattr); @@ -263,7 +263,7 @@ static int nfs3_proc_write(struct nfs_write_data *wdata) dprintk("NFS call write %d @ %Ld\n", wdata->args.count, (long long) wdata->args.offset); - fattr->valid = 0; + nfs_fattr_init(fattr); status = rpc_call_sync(NFS_CLIENT(inode), &msg, rpcflags); if (status >= 0) nfs_refresh_inode(inode, fattr); @@ -285,7 +285,7 @@ static int nfs3_proc_commit(struct nfs_write_data *cdata) dprintk("NFS call commit %d @ %Ld\n", cdata->args.count, (long long) cdata->args.offset); - fattr->valid = 0; + nfs_fattr_init(fattr); status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); if (status >= 0) nfs_refresh_inode(inode, fattr); @@ -329,8 +329,8 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, sattr->ia_mode &= ~current->fs->umask; again: - dir_attr.valid = 0; - fattr.valid = 0; + nfs_fattr_init(&dir_attr); + nfs_fattr_init(&fattr); status = rpc_call(NFS_CLIENT(dir), NFS3PROC_CREATE, &arg, &res, 0); nfs_refresh_inode(dir, &dir_attr); @@ -401,7 +401,7 @@ nfs3_proc_remove(struct inode *dir, struct qstr *name) int status; dprintk("NFS call remove %s\n", name->name); - dir_attr.valid = 0; + nfs_fattr_init(&dir_attr); status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); nfs_refresh_inode(dir, &dir_attr); dprintk("NFS reply remove: %d\n", status); @@ -422,7 +422,7 @@ nfs3_proc_unlink_setup(struct rpc_message *msg, struct dentry *dir, struct qstr ptr->arg.fh = NFS_FH(dir->d_inode); ptr->arg.name = name->name; ptr->arg.len = name->len; - ptr->res.valid = 0; + nfs_fattr_init(&ptr->res); msg->rpc_proc = &nfs3_procedures[NFS3PROC_REMOVE]; msg->rpc_argp = &ptr->arg; msg->rpc_resp = &ptr->res; @@ -465,8 +465,8 @@ nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name, int status; dprintk("NFS call rename %s -> %s\n", old_name->name, new_name->name); - old_dir_attr.valid = 0; - new_dir_attr.valid = 0; + nfs_fattr_init(&old_dir_attr); + nfs_fattr_init(&new_dir_attr); status = rpc_call(NFS_CLIENT(old_dir), NFS3PROC_RENAME, &arg, &res, 0); nfs_refresh_inode(old_dir, &old_dir_attr); nfs_refresh_inode(new_dir, &new_dir_attr); @@ -491,8 +491,8 @@ nfs3_proc_link(struct inode *inode, struct inode *dir, struct qstr *name) int status; dprintk("NFS call link %s\n", name->name); - dir_attr.valid = 0; - fattr.valid = 0; + nfs_fattr_init(&dir_attr); + nfs_fattr_init(&fattr); status = rpc_call(NFS_CLIENT(inode), NFS3PROC_LINK, &arg, &res, 0); nfs_refresh_inode(dir, &dir_attr); nfs_refresh_inode(inode, &fattr); @@ -524,8 +524,8 @@ nfs3_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path, if (path->len > NFS3_MAXPATHLEN) return -ENAMETOOLONG; dprintk("NFS call symlink %s -> %s\n", name->name, path->name); - dir_attr.valid = 0; - fattr->valid = 0; + nfs_fattr_init(&dir_attr); + nfs_fattr_init(fattr); status = rpc_call(NFS_CLIENT(dir), NFS3PROC_SYMLINK, &arg, &res, 0); nfs_refresh_inode(dir, &dir_attr); dprintk("NFS reply symlink: %d\n", status); @@ -552,11 +552,11 @@ nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr) int status; dprintk("NFS call mkdir %s\n", dentry->d_name.name); - dir_attr.valid = 0; - fattr.valid = 0; sattr->ia_mode &= ~current->fs->umask; + nfs_fattr_init(&dir_attr); + nfs_fattr_init(&fattr); status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKDIR, &arg, &res, 0); nfs_refresh_inode(dir, &dir_attr); if (status != 0) @@ -582,7 +582,7 @@ nfs3_proc_rmdir(struct inode *dir, struct qstr *name) int status; dprintk("NFS call rmdir %s\n", name->name); - dir_attr.valid = 0; + nfs_fattr_init(&dir_attr); status = rpc_call(NFS_CLIENT(dir), NFS3PROC_RMDIR, &arg, &dir_attr, 0); nfs_refresh_inode(dir, &dir_attr); dprintk("NFS reply rmdir: %d\n", status); @@ -634,7 +634,7 @@ nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, dprintk("NFS call readdir%s %d\n", plus? "plus" : "", (unsigned int) cookie); - dir_attr.valid = 0; + nfs_fattr_init(&dir_attr); status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); nfs_refresh_inode(dir, &dir_attr); dprintk("NFS reply readdir: %d\n", status); @@ -676,8 +676,8 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr, sattr->ia_mode &= ~current->fs->umask; - dir_attr.valid = 0; - fattr.valid = 0; + nfs_fattr_init(&dir_attr); + nfs_fattr_init(&fattr); status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKNOD, &arg, &res, 0); nfs_refresh_inode(dir, &dir_attr); if (status != 0) @@ -698,7 +698,7 @@ nfs3_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, int status; dprintk("NFS call fsstat\n"); - stat->fattr->valid = 0; + nfs_fattr_init(stat->fattr); status = rpc_call(server->client, NFS3PROC_FSSTAT, fhandle, stat, 0); dprintk("NFS reply statfs: %d\n", status); return status; @@ -711,7 +711,7 @@ nfs3_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, int status; dprintk("NFS call fsinfo\n"); - info->fattr->valid = 0; + nfs_fattr_init(info->fattr); status = rpc_call(server->client_sys, NFS3PROC_FSINFO, fhandle, info, 0); dprintk("NFS reply fsinfo: %d\n", status); return status; @@ -724,7 +724,7 @@ nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, int status; dprintk("NFS call pathconf\n"); - info->fattr->valid = 0; + nfs_fattr_init(info->fattr); status = rpc_call(server->client, NFS3PROC_PATHCONF, fhandle, info, 0); dprintk("NFS reply pathconf: %d\n", status); return status; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 9c1da34036aa..2a759e8e387c 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -494,9 +494,7 @@ static int _nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *st struct inode *inode = state->inode; struct nfs_server *server = NFS_SERVER(dir); struct nfs_delegation *delegation = NFS_I(inode)->delegation; - struct nfs_fattr f_attr = { - .valid = 0, - }; + struct nfs_fattr f_attr; struct nfs_openargs o_arg = { .fh = NFS_FH(dir), .open_flags = state->state, @@ -522,6 +520,7 @@ static int _nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *st status = -ENOMEM; if (o_arg.seqid == NULL) goto out; + nfs_fattr_init(&f_attr); status = _nfs4_proc_open(dir, sp, &o_arg, &o_res); if (status != 0) goto out_nodeleg; @@ -692,9 +691,7 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st struct nfs4_client *clp = server->nfs4_state; struct inode *inode = NULL; int status; - struct nfs_fattr f_attr = { - .valid = 0, - }; + struct nfs_fattr f_attr; struct nfs_openargs o_arg = { .fh = NFS_FH(dir), .open_flags = flags, @@ -726,6 +723,7 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid); if (o_arg.seqid == NULL) return -ENOMEM; + nfs_fattr_init(&f_attr); status = _nfs4_proc_open(dir, sp, &o_arg, &o_res); if (status != 0) goto out_err; @@ -824,7 +822,7 @@ static int _nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr, }; int status; - fattr->valid = 0; + nfs_fattr_init(fattr); if (state != NULL) { msg.rpc_cred = state->owner->so_cred; @@ -1107,13 +1105,12 @@ static int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fh static int _nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *info) { - struct nfs_fattr * fattr = info->fattr; struct nfs4_lookup_root_arg args = { .bitmask = nfs4_fattr_bitmap, }; struct nfs4_lookup_res res = { .server = server, - .fattr = fattr, + .fattr = info->fattr, .fh = fhandle, }; struct rpc_message msg = { @@ -1121,7 +1118,7 @@ static int _nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle, .rpc_argp = &args, .rpc_resp = &res, }; - fattr->valid = 0; + nfs_fattr_init(info->fattr); return rpc_call_sync(server->client, &msg, 0); } @@ -1184,7 +1181,7 @@ static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, q.len = p - q.name; do { - fattr->valid = 0; + nfs_fattr_init(fattr); status = nfs4_handle_exception(server, rpc_call_sync(server->client, &msg, 0), &exception); @@ -1221,7 +1218,7 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, .rpc_resp = &res, }; - fattr->valid = 0; + nfs_fattr_init(fattr); return rpc_call_sync(server->client, &msg, 0); } @@ -1263,7 +1260,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, struct nfs4_state *state; int status; - fattr->valid = 0; + nfs_fattr_init(fattr); cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0); if (IS_ERR(cred)) @@ -1309,7 +1306,7 @@ static int _nfs4_proc_lookup(struct inode *dir, struct qstr *name, .rpc_resp = &res, }; - fattr->valid = 0; + nfs_fattr_init(fattr); dprintk("NFS call lookup %s\n", name->name); status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); @@ -1458,7 +1455,7 @@ static int _nfs4_proc_read(struct nfs_read_data *rdata) dprintk("NFS call read %d @ %Ld\n", rdata->args.count, (long long) rdata->args.offset); - fattr->valid = 0; + nfs_fattr_init(fattr); status = rpc_call_sync(server->client, &msg, flags); if (!status) renew_lease(server, timestamp); @@ -1495,7 +1492,7 @@ static int _nfs4_proc_write(struct nfs_write_data *wdata) dprintk("NFS call write %d @ %Ld\n", wdata->args.count, (long long) wdata->args.offset); - fattr->valid = 0; + nfs_fattr_init(fattr); status = rpc_call_sync(server->client, &msg, rpcflags); dprintk("NFS reply write: %d\n", status); return status; @@ -1529,7 +1526,7 @@ static int _nfs4_proc_commit(struct nfs_write_data *cdata) dprintk("NFS call commit %d @ %Ld\n", cdata->args.count, (long long) cdata->args.offset); - fattr->valid = 0; + nfs_fattr_init(fattr); status = rpc_call_sync(server->client, &msg, 0); dprintk("NFS reply commit: %d\n", status); return status; @@ -1769,7 +1766,7 @@ static int _nfs4_proc_symlink(struct inode *dir, struct qstr *name, if (path->len > NFS4_MAXPATHLEN) return -ENAMETOOLONG; arg.u.symlink = path; - fattr->valid = 0; + nfs_fattr_init(fattr); status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); if (!status) @@ -1818,7 +1815,7 @@ static int _nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry, }; int status; - fattr.valid = 0; + nfs_fattr_init(&fattr); status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); if (!status) { @@ -1916,7 +1913,7 @@ static int _nfs4_proc_mknod(struct inode *dir, struct dentry *dentry, int status; int mode = sattr->ia_mode; - fattr.valid = 0; + nfs_fattr_init(&fattr); BUG_ON(!(sattr->ia_valid & ATTR_MODE)); BUG_ON(!S_ISFIFO(mode) && !S_ISBLK(mode) && !S_ISCHR(mode) && !S_ISSOCK(mode)); @@ -1969,7 +1966,7 @@ static int _nfs4_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, .rpc_resp = fsstat, }; - fsstat->fattr->valid = 0; + nfs_fattr_init(fsstat->fattr); return rpc_call_sync(server->client, &msg, 0); } @@ -2016,7 +2013,7 @@ static int nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, str static int nfs4_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *fsinfo) { - fsinfo->fattr->valid = 0; + nfs_fattr_init(fsinfo->fattr); return nfs4_do_fsinfo(server, fhandle, fsinfo); } @@ -2039,7 +2036,7 @@ static int _nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle return 0; } - pathconf->fattr->valid = 0; + nfs_fattr_init(pathconf->fattr); return rpc_call_sync(server->client, &msg, 0); } diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 8fef86523d7f..5ef28f08f424 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -61,7 +61,7 @@ nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, int status; dprintk("%s: call getattr\n", __FUNCTION__); - fattr->valid = 0; + nfs_fattr_init(fattr); status = rpc_call(server->client_sys, NFSPROC_GETATTR, fhandle, fattr, 0); dprintk("%s: reply getattr: %d\n", __FUNCTION__, status); if (status) @@ -93,7 +93,7 @@ nfs_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, int status; dprintk("NFS call getattr\n"); - fattr->valid = 0; + nfs_fattr_init(fattr); status = rpc_call(server->client, NFSPROC_GETATTR, fhandle, fattr, 0); dprintk("NFS reply getattr: %d\n", status); @@ -112,7 +112,7 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, int status; dprintk("NFS call setattr\n"); - fattr->valid = 0; + nfs_fattr_init(fattr); status = rpc_call(NFS_CLIENT(inode), NFSPROC_SETATTR, &arg, fattr, 0); if (status == 0) nfs_setattr_update_inode(inode, sattr); @@ -136,7 +136,7 @@ nfs_proc_lookup(struct inode *dir, struct qstr *name, int status; dprintk("NFS call lookup %s\n", name->name); - fattr->valid = 0; + nfs_fattr_init(fattr); status = rpc_call(NFS_CLIENT(dir), NFSPROC_LOOKUP, &arg, &res, 0); dprintk("NFS reply lookup: %d\n", status); return status; @@ -174,7 +174,7 @@ static int nfs_proc_read(struct nfs_read_data *rdata) dprintk("NFS call read %d @ %Ld\n", rdata->args.count, (long long) rdata->args.offset); - fattr->valid = 0; + nfs_fattr_init(fattr); status = rpc_call_sync(NFS_CLIENT(inode), &msg, flags); if (status >= 0) { nfs_refresh_inode(inode, fattr); @@ -203,7 +203,7 @@ static int nfs_proc_write(struct nfs_write_data *wdata) dprintk("NFS call write %d @ %Ld\n", wdata->args.count, (long long) wdata->args.offset); - fattr->valid = 0; + nfs_fattr_init(fattr); status = rpc_call_sync(NFS_CLIENT(inode), &msg, flags); if (status >= 0) { nfs_refresh_inode(inode, fattr); @@ -232,7 +232,7 @@ nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, }; int status; - fattr.valid = 0; + nfs_fattr_init(&fattr); dprintk("NFS call create %s\n", dentry->d_name.name); status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0); if (status == 0) @@ -273,12 +273,12 @@ nfs_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr, sattr->ia_size = new_encode_dev(rdev);/* get out your barf bag */ } - fattr.valid = 0; + nfs_fattr_init(&fattr); status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0); if (status == -EINVAL && S_ISFIFO(mode)) { sattr->ia_mode = mode; - fattr.valid = 0; + nfs_fattr_init(&fattr); status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0); } if (status == 0) @@ -391,7 +391,7 @@ nfs_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path, if (path->len > NFS2_MAXPATHLEN) return -ENAMETOOLONG; dprintk("NFS call symlink %s -> %s\n", name->name, path->name); - fattr->valid = 0; + nfs_fattr_init(fattr); fhandle->size = 0; status = rpc_call(NFS_CLIENT(dir), NFSPROC_SYMLINK, &arg, NULL, 0); dprintk("NFS reply symlink: %d\n", status); @@ -416,7 +416,7 @@ nfs_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr) int status; dprintk("NFS call mkdir %s\n", dentry->d_name.name); - fattr.valid = 0; + nfs_fattr_init(&fattr); status = rpc_call(NFS_CLIENT(dir), NFSPROC_MKDIR, &arg, &res, 0); if (status == 0) status = nfs_instantiate(dentry, &fhandle, &fattr); @@ -484,7 +484,7 @@ nfs_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, int status; dprintk("NFS call statfs\n"); - stat->fattr->valid = 0; + nfs_fattr_init(stat->fattr); status = rpc_call(server->client, NFSPROC_STATFS, fhandle, &fsinfo, 0); dprintk("NFS reply statfs: %d\n", status); if (status) @@ -507,7 +507,7 @@ nfs_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, int status; dprintk("NFS call fsinfo\n"); - info->fattr->valid = 0; + nfs_fattr_init(info->fattr); status = rpc_call(server->client, NFSPROC_STATFS, fhandle, &fsinfo, 0); dprintk("NFS reply fsinfo: %d\n", status); if (status) diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 9758ebd49905..43b03b19731b 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -215,6 +215,7 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, data->res.fattr = &data->fattr; data->res.count = count; data->res.eof = 0; + nfs_fattr_init(&data->fattr); NFS_PROTO(inode)->read_setup(data); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 5130eda231d7..819a65f5071f 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -870,6 +870,7 @@ static void nfs_write_rpcsetup(struct nfs_page *req, data->res.fattr = &data->fattr; data->res.count = count; data->res.verf = &data->verf; + nfs_fattr_init(&data->fattr); NFS_PROTO(inode)->write_setup(data, how); @@ -1237,6 +1238,7 @@ static void nfs_commit_rpcsetup(struct list_head *head, data->res.count = 0; data->res.fattr = &data->fattr; data->res.verf = &data->verf; + nfs_fattr_init(&data->fattr); NFS_PROTO(inode)->commit_setup(data, how); diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 7bac2785c6e4..8120fd68dee5 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -316,6 +316,11 @@ extern void nfs_file_clear_open_context(struct file *filp); /* linux/net/ipv4/ipconfig.c: trims ip addr off front of name, too. */ extern u32 root_nfs_parse_addr(char *name); /*__init*/ +static inline void nfs_fattr_init(struct nfs_fattr *fattr) +{ + fattr->valid = 0; +} + /* * linux/fs/nfs/file.c */ -- cgit v1.2.3 From 913a70fc170530f7e1ff0693595155457cc6d0ca Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 27 Oct 2005 22:12:38 -0400 Subject: NFS: Convert cache_change_attribute into a jiffy-based value Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 8 ++++---- include/linux/nfs_fs.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 65d5ab45ddc5..449df8c8aa31 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1135,7 +1135,7 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) * We may need to keep the attributes marked as invalid if * we raced with nfs_end_attr_update(). */ - if (verifier == nfsi->cache_change_attribute) + if (time_after_eq(verifier, nfsi->cache_change_attribute)) nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME); spin_unlock(&inode->i_lock); @@ -1202,7 +1202,7 @@ void nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) if (S_ISDIR(inode->i_mode)) { memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); /* This ensures we revalidate child dentries */ - nfsi->cache_change_attribute++; + nfsi->cache_change_attribute = jiffies; } spin_unlock(&inode->i_lock); @@ -1242,7 +1242,7 @@ void nfs_end_data_update(struct inode *inode) nfsi->cache_validity |= NFS_INO_INVALID_DATA; spin_unlock(&inode->i_lock); } - nfsi->cache_change_attribute ++; + nfsi->cache_change_attribute = jiffies; atomic_dec(&nfsi->data_updates); } @@ -1391,7 +1391,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsign /* Do we perhaps have any outstanding writes? */ if (nfsi->npages == 0) { /* No, but did we race with nfs_end_data_update()? */ - if (verifier == nfsi->cache_change_attribute) { + if (time_after_eq(verifier, nfsi->cache_change_attribute)) { inode->i_size = new_isize; invalid |= NFS_INO_INVALID_DATA; } diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 8120fd68dee5..abf890f5fbfb 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -280,7 +280,7 @@ static inline long nfs_save_change_attribute(struct inode *inode) static inline int nfs_verify_change_attribute(struct inode *inode, unsigned long chattr) { return !nfs_caches_unstable(inode) - && chattr == NFS_I(inode)->cache_change_attribute; + && time_after_eq(chattr, NFS_I(inode)->cache_change_attribute); } /* -- cgit v1.2.3 From 33801147a8fda6b04d7e9afe1d42f1c01d3d6837 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 27 Oct 2005 22:12:39 -0400 Subject: NFS: Optimise inode attribute cache updates Allow nfs_refresh_inode() also to update attributes on the inode if the RPC call was sent after the last call to nfs_update_inode(). Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 54 ++++++++++++++++++++++++++++++++++++------------- fs/nfs/nfs2xdr.c | 1 - fs/nfs/nfs3xdr.c | 1 - fs/nfs/nfs4xdr.c | 4 +--- include/linux/nfs_fs.h | 2 ++ include/linux/nfs_xdr.h | 2 +- 6 files changed, 44 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 449df8c8aa31..b7d4f8f13ac2 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -785,7 +785,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) else init_special_inode(inode, inode->i_mode, fattr->rdev); - nfsi->read_cache_jiffies = fattr->timestamp; + nfsi->read_cache_jiffies = fattr->time_start; + nfsi->last_updated = jiffies; inode->i_atime = fattr->atime; inode->i_mtime = fattr->mtime; inode->i_ctime = fattr->ctime; @@ -1120,14 +1121,15 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) goto out; } + spin_lock(&inode->i_lock); status = nfs_update_inode(inode, &fattr, verifier); if (status) { + spin_unlock(&inode->i_lock); dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) refresh failed, error=%d\n", inode->i_sb->s_id, (long long)NFS_FILEID(inode), status); goto out; } - spin_lock(&inode->i_lock); cache_validity = nfsi->cache_validity; nfsi->cache_validity &= ~NFS_INO_REVAL_PAGECACHE; @@ -1247,7 +1249,7 @@ void nfs_end_data_update(struct inode *inode) } /** - * nfs_refresh_inode - verify consistency of the inode attribute cache + * nfs_check_inode_attributes - verify consistency of the inode attribute cache * @inode - pointer to inode * @fattr - updated attributes * @@ -1255,13 +1257,12 @@ void nfs_end_data_update(struct inode *inode) * so that fattr carries weak cache consistency data, then it may * also update the ctime/mtime/change_attribute. */ -int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) +static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fattr) { struct nfs_inode *nfsi = NFS_I(inode); loff_t cur_size, new_isize; int data_unstable; - spin_lock(&inode->i_lock); /* Are we in the process of updating data on the server? */ data_unstable = nfs_caches_unstable(inode); @@ -1325,11 +1326,40 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) if (!timespec_equal(&inode->i_atime, &fattr->atime)) nfsi->cache_validity |= NFS_INO_INVALID_ATIME; - nfsi->read_cache_jiffies = fattr->timestamp; - spin_unlock(&inode->i_lock); + nfsi->read_cache_jiffies = fattr->time_start; return 0; } +/** + * nfs_refresh_inode - try to update the inode attribute cache + * @inode - pointer to inode + * @fattr - updated attributes + * + * Check that an RPC call that returned attributes has not overlapped with + * other recent updates of the inode metadata, then decide whether it is + * safe to do a full update of the inode attributes, or whether just to + * call nfs_check_inode_attributes. + */ +int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) +{ + struct nfs_inode *nfsi = NFS_I(inode); + int status; + + if ((fattr->valid & NFS_ATTR_FATTR) == 0) + return 0; + spin_lock(&inode->i_lock); + nfsi->cache_validity &= ~NFS_INO_REVAL_PAGECACHE; + if (nfs_verify_change_attribute(inode, fattr->time_start)) + nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME); + if (time_after(fattr->time_start, nfsi->last_updated)) + status = nfs_update_inode(inode, fattr, fattr->time_start); + else + status = nfs_check_inode_attributes(inode, fattr); + + spin_unlock(&inode->i_lock); + return status; +} + /* * Many nfs protocol calls return the new file attributes after * an operation. Here we update the inode to reflect the state @@ -1365,20 +1395,17 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsign goto out_err; } - spin_lock(&inode->i_lock); - /* * Make sure the inode's type hasn't changed. */ - if ((inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) { - spin_unlock(&inode->i_lock); + if ((inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) goto out_changed; - } /* * Update the read time so we don't revalidate too often. */ - nfsi->read_cache_jiffies = fattr->timestamp; + nfsi->read_cache_jiffies = fattr->time_start; + nfsi->last_updated = jiffies; /* Are we racing with known updates of the metadata on the server? */ data_unstable = ! (nfs_verify_change_attribute(inode, verifier) || @@ -1467,7 +1494,6 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsign if (!nfs_have_delegation(inode, FMODE_READ)) nfsi->cache_validity |= invalid; - spin_unlock(&inode->i_lock); return 0; out_changed: /* diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index d91b69044a4d..59049e864ca7 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -143,7 +143,6 @@ xdr_decode_fattr(u32 *p, struct nfs_fattr *fattr) fattr->mode = (fattr->mode & ~S_IFMT) | S_IFIFO; fattr->rdev = 0; } - fattr->timestamp = jiffies; return p; } diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index db4a904810a4..0498bd36602c 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -174,7 +174,6 @@ xdr_decode_fattr(u32 *p, struct nfs_fattr *fattr) /* Update the mode bits */ fattr->valid |= (NFS_ATTR_FATTR | NFS_ATTR_FATTR_V3); - fattr->timestamp = jiffies; return p; } diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index cd762648fa9a..8b21de8a06fa 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -2799,10 +2799,8 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, cons goto xdr_error; if ((status = decode_attr_time_modify(xdr, bitmap, &fattr->mtime)) != 0) goto xdr_error; - if ((status = verify_attr_len(xdr, savep, attrlen)) == 0) { + if ((status = verify_attr_len(xdr, savep, attrlen)) == 0) fattr->valid = NFS_ATTR_FATTR | NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4; - fattr->timestamp = jiffies; - } xdr_error: if (status != 0) printk(KERN_NOTICE "%s: xdr error %d!\n", __FUNCTION__, -status); diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index abf890f5fbfb..faeaad666ca8 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -141,6 +141,7 @@ struct nfs_inode { unsigned long attrtimeo_timestamp; __u64 change_attr; /* v4 only */ + unsigned long last_updated; /* "Generation counter" for the attribute cache. This is * bumped whenever we update the metadata on the * server. @@ -319,6 +320,7 @@ extern u32 root_nfs_parse_addr(char *name); /*__init*/ static inline void nfs_fattr_init(struct nfs_fattr *fattr) { fattr->valid = 0; + fattr->time_start = jiffies; } /* diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 60086dac11d5..aeaee7e7c51d 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -41,7 +41,7 @@ struct nfs_fattr { __u32 bitmap[2]; /* NFSv4 returned attribute bitmap */ __u64 change_attr; /* NFSv4 change attribute */ __u64 pre_change_attr;/* pre-op NFSv4 change attribute */ - unsigned long timestamp; + unsigned long time_start; }; #define NFS_ATTR_WCC 0x0001 /* pre-op WCC data */ -- cgit v1.2.3 From decf491f3076190262d4c649bed877650623903a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 27 Oct 2005 22:12:39 -0400 Subject: NFS: Don't let nfs_end_data_update() clobber attribute update information Since we almost always call nfs_end_data_update() after we called nfs_refresh_inode(), we now end up marking the inode metadata as needing revalidation immediately after having updated it. This patch rearranges things so that we mark the inode as needing revalidation _before_ we call nfs_refresh_inode() on those operations that need it. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 38 ++++++++++++++++++++++++++++++++------ fs/nfs/nfs3proc.c | 30 +++++++++++++++--------------- fs/nfs/nfs4proc.c | 3 +++ fs/nfs/proc.c | 16 +++++++++++++--- include/linux/nfs_fs.h | 15 ++++++++++----- 5 files changed, 73 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index b7d4f8f13ac2..6b3156e15350 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1236,13 +1236,12 @@ void nfs_end_data_update(struct inode *inode) struct nfs_inode *nfsi = NFS_I(inode); if (!nfs_have_delegation(inode, FMODE_READ)) { - /* Mark the attribute cache for revalidation */ - spin_lock(&inode->i_lock); - nfsi->cache_validity |= NFS_INO_INVALID_ATTR; - /* Directories and symlinks: invalidate page cache too */ - if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) + /* Directories and symlinks: invalidate page cache */ + if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) { + spin_lock(&inode->i_lock); nfsi->cache_validity |= NFS_INO_INVALID_DATA; - spin_unlock(&inode->i_lock); + spin_unlock(&inode->i_lock); + } } nfsi->cache_change_attribute = jiffies; atomic_dec(&nfsi->data_updates); @@ -1360,6 +1359,33 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) return status; } +/** + * nfs_post_op_update_inode - try to update the inode attribute cache + * @inode - pointer to inode + * @fattr - updated attributes + * + * After an operation that has changed the inode metadata, mark the + * attribute cache as being invalid, then try to update it. + */ +int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr) +{ + struct nfs_inode *nfsi = NFS_I(inode); + int status = 0; + + spin_lock(&inode->i_lock); + if (unlikely((fattr->valid & NFS_ATTR_FATTR) == 0)) { + nfsi->cache_validity |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS; + goto out; + } + status = nfs_update_inode(inode, fattr, fattr->time_start); + if (time_after_eq(fattr->time_start, nfsi->cache_change_attribute)) + nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME|NFS_INO_REVAL_PAGECACHE); + nfsi->cache_change_attribute = jiffies; +out: + spin_unlock(&inode->i_lock); + return status; +} + /* * Many nfs protocol calls return the new file attributes after * an operation. Here we update the inode to reflect the state diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 4b1b48b139f6..92c870d19ccd 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -266,7 +266,7 @@ static int nfs3_proc_write(struct nfs_write_data *wdata) nfs_fattr_init(fattr); status = rpc_call_sync(NFS_CLIENT(inode), &msg, rpcflags); if (status >= 0) - nfs_refresh_inode(inode, fattr); + nfs_post_op_update_inode(inode, fattr); dprintk("NFS reply write: %d\n", status); return status < 0? status : wdata->res.count; } @@ -288,7 +288,7 @@ static int nfs3_proc_commit(struct nfs_write_data *cdata) nfs_fattr_init(fattr); status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); if (status >= 0) - nfs_refresh_inode(inode, fattr); + nfs_post_op_update_inode(inode, fattr); dprintk("NFS reply commit: %d\n", status); return status; } @@ -332,7 +332,7 @@ again: nfs_fattr_init(&dir_attr); nfs_fattr_init(&fattr); status = rpc_call(NFS_CLIENT(dir), NFS3PROC_CREATE, &arg, &res, 0); - nfs_refresh_inode(dir, &dir_attr); + nfs_post_op_update_inode(dir, &dir_attr); /* If the server doesn't support the exclusive creation semantics, * try again with simple 'guarded' mode. */ @@ -403,7 +403,7 @@ nfs3_proc_remove(struct inode *dir, struct qstr *name) dprintk("NFS call remove %s\n", name->name); nfs_fattr_init(&dir_attr); status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); - nfs_refresh_inode(dir, &dir_attr); + nfs_post_op_update_inode(dir, &dir_attr); dprintk("NFS reply remove: %d\n", status); return status; } @@ -439,7 +439,7 @@ nfs3_proc_unlink_done(struct dentry *dir, struct rpc_task *task) return 1; if (msg->rpc_argp) { dir_attr = (struct nfs_fattr*)msg->rpc_resp; - nfs_refresh_inode(dir->d_inode, dir_attr); + nfs_post_op_update_inode(dir->d_inode, dir_attr); kfree(msg->rpc_argp); } return 0; @@ -468,8 +468,8 @@ nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name, nfs_fattr_init(&old_dir_attr); nfs_fattr_init(&new_dir_attr); status = rpc_call(NFS_CLIENT(old_dir), NFS3PROC_RENAME, &arg, &res, 0); - nfs_refresh_inode(old_dir, &old_dir_attr); - nfs_refresh_inode(new_dir, &new_dir_attr); + nfs_post_op_update_inode(old_dir, &old_dir_attr); + nfs_post_op_update_inode(new_dir, &new_dir_attr); dprintk("NFS reply rename: %d\n", status); return status; } @@ -494,8 +494,8 @@ nfs3_proc_link(struct inode *inode, struct inode *dir, struct qstr *name) nfs_fattr_init(&dir_attr); nfs_fattr_init(&fattr); status = rpc_call(NFS_CLIENT(inode), NFS3PROC_LINK, &arg, &res, 0); - nfs_refresh_inode(dir, &dir_attr); - nfs_refresh_inode(inode, &fattr); + nfs_post_op_update_inode(dir, &dir_attr); + nfs_post_op_update_inode(inode, &fattr); dprintk("NFS reply link: %d\n", status); return status; } @@ -527,7 +527,7 @@ nfs3_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path, nfs_fattr_init(&dir_attr); nfs_fattr_init(fattr); status = rpc_call(NFS_CLIENT(dir), NFS3PROC_SYMLINK, &arg, &res, 0); - nfs_refresh_inode(dir, &dir_attr); + nfs_post_op_update_inode(dir, &dir_attr); dprintk("NFS reply symlink: %d\n", status); return status; } @@ -558,7 +558,7 @@ nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr) nfs_fattr_init(&dir_attr); nfs_fattr_init(&fattr); status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKDIR, &arg, &res, 0); - nfs_refresh_inode(dir, &dir_attr); + nfs_post_op_update_inode(dir, &dir_attr); if (status != 0) goto out; status = nfs_instantiate(dentry, &fhandle, &fattr); @@ -584,7 +584,7 @@ nfs3_proc_rmdir(struct inode *dir, struct qstr *name) dprintk("NFS call rmdir %s\n", name->name); nfs_fattr_init(&dir_attr); status = rpc_call(NFS_CLIENT(dir), NFS3PROC_RMDIR, &arg, &dir_attr, 0); - nfs_refresh_inode(dir, &dir_attr); + nfs_post_op_update_inode(dir, &dir_attr); dprintk("NFS reply rmdir: %d\n", status); return status; } @@ -679,7 +679,7 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr, nfs_fattr_init(&dir_attr); nfs_fattr_init(&fattr); status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKNOD, &arg, &res, 0); - nfs_refresh_inode(dir, &dir_attr); + nfs_post_op_update_inode(dir, &dir_attr); if (status != 0) goto out; status = nfs_instantiate(dentry, &fh, &fattr); @@ -775,7 +775,7 @@ nfs3_write_done(struct rpc_task *task) return; data = (struct nfs_write_data *)task->tk_calldata; if (task->tk_status >= 0) - nfs_refresh_inode(data->inode, data->res.fattr); + nfs_post_op_update_inode(data->inode, data->res.fattr); nfs_writeback_done(task); } @@ -819,7 +819,7 @@ nfs3_commit_done(struct rpc_task *task) return; data = (struct nfs_write_data *)task->tk_calldata; if (task->tk_status >= 0) - nfs_refresh_inode(data->inode, data->res.fattr); + nfs_post_op_update_inode(data->inode, data->res.fattr); nfs_commit_done(task); } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 2a759e8e387c..3274f2d354f3 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -187,8 +187,11 @@ static void update_changeattr(struct inode *inode, struct nfs4_change_info *cinf { struct nfs_inode *nfsi = NFS_I(inode); + spin_lock(&inode->i_lock); + nfsi->cache_validity |= NFS_INO_INVALID_ATTR; if (cinfo->before == nfsi->change_attr && cinfo->atomic) nfsi->change_attr = cinfo->after; + spin_unlock(&inode->i_lock); } /* Helper for asynchronous RPC calls */ diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 5ef28f08f424..a48a003242c0 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -206,7 +206,7 @@ static int nfs_proc_write(struct nfs_write_data *wdata) nfs_fattr_init(fattr); status = rpc_call_sync(NFS_CLIENT(inode), &msg, flags); if (status >= 0) { - nfs_refresh_inode(inode, fattr); + nfs_post_op_update_inode(inode, fattr); wdata->res.count = wdata->args.count; wdata->verf.committed = NFS_FILE_SYNC; } @@ -275,6 +275,7 @@ nfs_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr, nfs_fattr_init(&fattr); status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0); + nfs_mark_for_revalidate(dir); if (status == -EINVAL && S_ISFIFO(mode)) { sattr->ia_mode = mode; @@ -305,6 +306,7 @@ nfs_proc_remove(struct inode *dir, struct qstr *name) dprintk("NFS call remove %s\n", name->name); status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); + nfs_mark_for_revalidate(dir); dprintk("NFS reply remove: %d\n", status); return status; @@ -331,8 +333,10 @@ nfs_proc_unlink_done(struct dentry *dir, struct rpc_task *task) { struct rpc_message *msg = &task->tk_msg; - if (msg->rpc_argp) + if (msg->rpc_argp) { + nfs_mark_for_revalidate(dir->d_inode); kfree(msg->rpc_argp); + } return 0; } @@ -352,6 +356,8 @@ nfs_proc_rename(struct inode *old_dir, struct qstr *old_name, dprintk("NFS call rename %s -> %s\n", old_name->name, new_name->name); status = rpc_call(NFS_CLIENT(old_dir), NFSPROC_RENAME, &arg, NULL, 0); + nfs_mark_for_revalidate(old_dir); + nfs_mark_for_revalidate(new_dir); dprintk("NFS reply rename: %d\n", status); return status; } @@ -369,6 +375,7 @@ nfs_proc_link(struct inode *inode, struct inode *dir, struct qstr *name) dprintk("NFS call link %s\n", name->name); status = rpc_call(NFS_CLIENT(inode), NFSPROC_LINK, &arg, NULL, 0); + nfs_mark_for_revalidate(dir); dprintk("NFS reply link: %d\n", status); return status; } @@ -394,6 +401,7 @@ nfs_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path, nfs_fattr_init(fattr); fhandle->size = 0; status = rpc_call(NFS_CLIENT(dir), NFSPROC_SYMLINK, &arg, NULL, 0); + nfs_mark_for_revalidate(dir); dprintk("NFS reply symlink: %d\n", status); return status; } @@ -418,6 +426,7 @@ nfs_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr) dprintk("NFS call mkdir %s\n", dentry->d_name.name); nfs_fattr_init(&fattr); status = rpc_call(NFS_CLIENT(dir), NFSPROC_MKDIR, &arg, &res, 0); + nfs_mark_for_revalidate(dir); if (status == 0) status = nfs_instantiate(dentry, &fhandle, &fattr); dprintk("NFS reply mkdir: %d\n", status); @@ -436,6 +445,7 @@ nfs_proc_rmdir(struct inode *dir, struct qstr *name) dprintk("NFS call rmdir %s\n", name->name); status = rpc_call(NFS_CLIENT(dir), NFSPROC_RMDIR, &arg, NULL, 0); + nfs_mark_for_revalidate(dir); dprintk("NFS reply rmdir: %d\n", status); return status; } @@ -579,7 +589,7 @@ nfs_write_done(struct rpc_task *task) struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata; if (task->tk_status >= 0) - nfs_refresh_inode(data->inode, data->res.fattr); + nfs_post_op_update_inode(data->inode, data->res.fattr); nfs_writeback_done(task); } diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index faeaad666ca8..325fe7ae49bb 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -241,13 +241,17 @@ static inline int nfs_caches_unstable(struct inode *inode) return atomic_read(&NFS_I(inode)->data_updates) != 0; } +static inline void nfs_mark_for_revalidate(struct inode *inode) +{ + spin_lock(&inode->i_lock); + NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS; + spin_unlock(&inode->i_lock); +} + static inline void NFS_CACHEINV(struct inode *inode) { - if (!nfs_caches_unstable(inode)) { - spin_lock(&inode->i_lock); - NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS; - spin_unlock(&inode->i_lock); - } + if (!nfs_caches_unstable(inode)) + nfs_mark_for_revalidate(inode); } static inline int nfs_server_capable(struct inode *inode, int cap) @@ -291,6 +295,7 @@ extern void nfs_zap_caches(struct inode *); extern struct inode *nfs_fhget(struct super_block *, struct nfs_fh *, struct nfs_fattr *); extern int nfs_refresh_inode(struct inode *, struct nfs_fattr *); +extern int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr); extern int nfs_getattr(struct vfsmount *, struct dentry *, struct kstat *); extern int nfs_permission(struct inode *, int, struct nameidata *); extern int nfs_access_get_cached(struct inode *, struct rpc_cred *, struct nfs_access_entry *); -- cgit v1.2.3 From 56ae19f38f10aad4f27f7e12138a29b295dff07a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 27 Oct 2005 22:12:40 -0400 Subject: NFSv4: Add directory post-op attributes to the CREATE operations. Since the directory attributes change every time we CREATE a file, we might as well pick up the new directory attributes in the same compound. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 28 ++++++++++++++++---- fs/nfs/nfs4xdr.c | 69 ++++++++++++++++++++++++++++++++++++++++++------- include/linux/nfs_xdr.h | 2 ++ 3 files changed, 84 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 3274f2d354f3..f363fd6c7f4d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -443,7 +443,11 @@ static int _nfs4_proc_open(struct inode *dir, struct nfs4_state_owner *sp, stru nfs_increment_open_seqid(status, o_arg->seqid); if (status != 0) goto out; - update_changeattr(dir, &o_res->cinfo); + if (o_arg->open_flags & O_CREAT) { + update_changeattr(dir, &o_res->cinfo); + nfs_post_op_update_inode(dir, o_res->dir_attr); + } else + nfs_refresh_inode(dir, o_res->dir_attr); if(o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) { status = _nfs4_proc_open_confirm(server->client, &o_res->fh, sp, &o_res->stateid, o_arg->seqid); @@ -497,7 +501,7 @@ static int _nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *st struct inode *inode = state->inode; struct nfs_server *server = NFS_SERVER(dir); struct nfs_delegation *delegation = NFS_I(inode)->delegation; - struct nfs_fattr f_attr; + struct nfs_fattr f_attr, dir_attr; struct nfs_openargs o_arg = { .fh = NFS_FH(dir), .open_flags = state->state, @@ -507,6 +511,7 @@ static int _nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *st }; struct nfs_openres o_res = { .f_attr = &f_attr, + .dir_attr = &dir_attr, .server = server, }; int status = 0; @@ -524,6 +529,7 @@ static int _nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *st if (o_arg.seqid == NULL) goto out; nfs_fattr_init(&f_attr); + nfs_fattr_init(&dir_attr); status = _nfs4_proc_open(dir, sp, &o_arg, &o_res); if (status != 0) goto out_nodeleg; @@ -694,7 +700,7 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st struct nfs4_client *clp = server->nfs4_state; struct inode *inode = NULL; int status; - struct nfs_fattr f_attr; + struct nfs_fattr f_attr, dir_attr; struct nfs_openargs o_arg = { .fh = NFS_FH(dir), .open_flags = flags, @@ -705,6 +711,7 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st }; struct nfs_openres o_res = { .f_attr = &f_attr, + .dir_attr = &dir_attr, .server = server, }; @@ -727,6 +734,7 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st if (o_arg.seqid == NULL) return -ENOMEM; nfs_fattr_init(&f_attr); + nfs_fattr_init(&dir_attr); status = _nfs4_proc_open(dir, sp, &o_arg, &o_res); if (status != 0) goto out_err; @@ -1746,6 +1754,7 @@ static int _nfs4_proc_symlink(struct inode *dir, struct qstr *name, struct nfs_fattr *fattr) { struct nfs_server *server = NFS_SERVER(dir); + struct nfs_fattr dir_fattr; struct nfs4_create_arg arg = { .dir_fh = NFS_FH(dir), .server = server, @@ -1758,6 +1767,7 @@ static int _nfs4_proc_symlink(struct inode *dir, struct qstr *name, .server = server, .fh = fhandle, .fattr = fattr, + .dir_fattr = &dir_fattr, }; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SYMLINK], @@ -1770,10 +1780,12 @@ static int _nfs4_proc_symlink(struct inode *dir, struct qstr *name, return -ENAMETOOLONG; arg.u.symlink = path; nfs_fattr_init(fattr); + nfs_fattr_init(&dir_fattr); status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); if (!status) update_changeattr(dir, &res.dir_cinfo); + nfs_post_op_update_inode(dir, res.dir_fattr); return status; } @@ -1797,7 +1809,7 @@ static int _nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry, { struct nfs_server *server = NFS_SERVER(dir); struct nfs_fh fhandle; - struct nfs_fattr fattr; + struct nfs_fattr fattr, dir_fattr; struct nfs4_create_arg arg = { .dir_fh = NFS_FH(dir), .server = server, @@ -1810,6 +1822,7 @@ static int _nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry, .server = server, .fh = &fhandle, .fattr = &fattr, + .dir_fattr = &dir_fattr, }; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE], @@ -1819,10 +1832,12 @@ static int _nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry, int status; nfs_fattr_init(&fattr); + nfs_fattr_init(&dir_fattr); status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); if (!status) { update_changeattr(dir, &res.dir_cinfo); + nfs_post_op_update_inode(dir, res.dir_fattr); status = nfs_instantiate(dentry, &fhandle, &fattr); } return status; @@ -1895,7 +1910,7 @@ static int _nfs4_proc_mknod(struct inode *dir, struct dentry *dentry, { struct nfs_server *server = NFS_SERVER(dir); struct nfs_fh fh; - struct nfs_fattr fattr; + struct nfs_fattr fattr, dir_fattr; struct nfs4_create_arg arg = { .dir_fh = NFS_FH(dir), .server = server, @@ -1907,6 +1922,7 @@ static int _nfs4_proc_mknod(struct inode *dir, struct dentry *dentry, .server = server, .fh = &fh, .fattr = &fattr, + .dir_fattr = &dir_fattr, }; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE], @@ -1917,6 +1933,7 @@ static int _nfs4_proc_mknod(struct inode *dir, struct dentry *dentry, int mode = sattr->ia_mode; nfs_fattr_init(&fattr); + nfs_fattr_init(&dir_fattr); BUG_ON(!(sattr->ia_valid & ATTR_MODE)); BUG_ON(!S_ISFIFO(mode) && !S_ISBLK(mode) && !S_ISCHR(mode) && !S_ISSOCK(mode)); @@ -1938,6 +1955,7 @@ static int _nfs4_proc_mknod(struct inode *dir, struct dentry *dentry, status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); if (status == 0) { update_changeattr(dir, &res.dir_cinfo); + nfs_post_op_update_inode(dir, res.dir_fattr); status = nfs_instantiate(dentry, &fh, &fattr); } return status; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 8b21de8a06fa..7f91d613d31a 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -95,6 +95,8 @@ static int nfs_stat_to_errno(int); #define decode_getattr_maxsz (op_decode_hdr_maxsz + nfs4_fattr_maxsz) #define encode_savefh_maxsz (op_encode_hdr_maxsz) #define decode_savefh_maxsz (op_decode_hdr_maxsz) +#define encode_restorefh_maxsz (op_encode_hdr_maxsz) +#define decode_restorefh_maxsz (op_decode_hdr_maxsz) #define encode_fsinfo_maxsz (op_encode_hdr_maxsz + 2) #define decode_fsinfo_maxsz (op_decode_hdr_maxsz + 11) #define encode_renew_maxsz (op_encode_hdr_maxsz + 3) @@ -336,14 +338,20 @@ static int nfs_stat_to_errno(int); decode_getfh_maxsz) #define NFS4_enc_create_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ + encode_savefh_maxsz + \ encode_create_maxsz + \ + encode_getfh_maxsz + \ encode_getattr_maxsz + \ - encode_getfh_maxsz) + encode_restorefh_maxsz + \ + encode_getattr_maxsz) #define NFS4_dec_create_sz (compound_decode_hdr_maxsz + \ decode_putfh_maxsz + \ + decode_savefh_maxsz + \ decode_create_maxsz + \ + decode_getfh_maxsz + \ decode_getattr_maxsz + \ - decode_getfh_maxsz) + decode_restorefh_maxsz + \ + decode_getattr_maxsz) #define NFS4_enc_pathconf_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ encode_getattr_maxsz) @@ -1112,6 +1120,17 @@ static int encode_renew(struct xdr_stream *xdr, const struct nfs4_client *client return 0; } +static int +encode_restorefh(struct xdr_stream *xdr) +{ + uint32_t *p; + + RESERVE_SPACE(4); + WRITE32(OP_RESTOREFH); + + return 0; +} + static int encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg) { @@ -1358,7 +1377,7 @@ static int nfs4_xdr_enc_create(struct rpc_rqst *req, uint32_t *p, const struct n { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 4, + .nops = 7, }; int status; @@ -1366,10 +1385,16 @@ static int nfs4_xdr_enc_create(struct rpc_rqst *req, uint32_t *p, const struct n encode_compound_hdr(&xdr, &hdr); if ((status = encode_putfh(&xdr, args->dir_fh)) != 0) goto out; + if ((status = encode_savefh(&xdr)) != 0) + goto out; if ((status = encode_create(&xdr, args)) != 0) goto out; if ((status = encode_getfh(&xdr)) != 0) goto out; + if ((status = encode_getfattr(&xdr, args->bitmask)) != 0) + goto out; + if ((status = encode_restorefh(&xdr)) != 0) + goto out; status = encode_getfattr(&xdr, args->bitmask); out: return status; @@ -1429,7 +1454,7 @@ static int nfs4_xdr_enc_open(struct rpc_rqst *req, uint32_t *p, struct nfs_opena { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 4, + .nops = 7, }; int status; @@ -1439,6 +1464,9 @@ static int nfs4_xdr_enc_open(struct rpc_rqst *req, uint32_t *p, struct nfs_opena xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); status = encode_putfh(&xdr, args->fh); + if (status) + goto out; + status = encode_savefh(&xdr); if (status) goto out; status = encode_open(&xdr, args); @@ -1448,6 +1476,12 @@ static int nfs4_xdr_enc_open(struct rpc_rqst *req, uint32_t *p, struct nfs_opena if (status) goto out; status = encode_getfattr(&xdr, args->bitmask); + if (status) + goto out; + status = encode_restorefh(&xdr); + if (status) + goto out; + status = encode_getfattr(&xdr, args->bitmask); out: return status; } @@ -3218,6 +3252,12 @@ static int decode_renew(struct xdr_stream *xdr) return decode_op_hdr(xdr, OP_RENEW); } +static int +decode_restorefh(struct xdr_stream *xdr) +{ + return decode_op_hdr(xdr, OP_RESTOREFH); +} + static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, size_t *acl_len) { @@ -3510,13 +3550,17 @@ static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_ goto out; if ((status = decode_putfh(&xdr)) != 0) goto out; + if ((status = decode_savefh(&xdr)) != 0) + goto out; if ((status = decode_create(&xdr,&res->dir_cinfo)) != 0) goto out; if ((status = decode_getfh(&xdr, res->fh)) != 0) goto out; - status = decode_getfattr(&xdr, res->fattr, res->server); - if (status == NFS4ERR_DELAY) - status = 0; + if (decode_getfattr(&xdr, res->fattr, res->server) != 0) + goto out; + if ((status = decode_restorefh(&xdr)) != 0) + goto out; + decode_getfattr(&xdr, res->dir_fattr, res->server); out: return status; } @@ -3654,15 +3698,20 @@ static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_ope status = decode_putfh(&xdr); if (status) goto out; + status = decode_savefh(&xdr); + if (status) + goto out; status = decode_open(&xdr, res); if (status) goto out; status = decode_getfh(&xdr, &res->fh); if (status) goto out; - status = decode_getfattr(&xdr, res->f_attr, res->server); - if (status == NFS4ERR_DELAY) - status = 0; + if (decode_getfattr(&xdr, res->f_attr, res->server) != 0) + goto out; + if ((status = decode_restorefh(&xdr)) != 0) + goto out; + decode_getfattr(&xdr, res->dir_attr, res->server); out: return status; } diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index aeaee7e7c51d..6485b8b41b83 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -124,6 +124,7 @@ struct nfs_openres { struct nfs4_change_info cinfo; __u32 rflags; struct nfs_fattr * f_attr; + struct nfs_fattr * dir_attr; const struct nfs_server *server; int delegation_type; nfs4_stateid delegation; @@ -540,6 +541,7 @@ struct nfs4_create_res { struct nfs_fh * fh; struct nfs_fattr * fattr; struct nfs4_change_info dir_cinfo; + struct nfs_fattr * dir_fattr; }; struct nfs4_fsinfo_arg { -- cgit v1.2.3 From 516a6af641bb50c608329a5bd751acd0d65cc4ab Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 27 Oct 2005 22:12:41 -0400 Subject: NFS: Add optional post-op getattr instruction to the NFSv4 file close. "Optional" means that the close call will not fail if the getattr at the end of the compound fails. If it does succeed, try to refresh inode attributes. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 9 ++++++++- fs/nfs/nfs4xdr.c | 34 ++++++++++++++++++++++++++++------ include/linux/nfs_xdr.h | 3 +++ 3 files changed, 39 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f363fd6c7f4d..7be3d2d15d6f 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -865,6 +865,7 @@ struct nfs4_closedata { struct nfs4_state *state; struct nfs_closeargs arg; struct nfs_closeres res; + struct nfs_fattr fattr; }; static void nfs4_free_closedata(struct nfs4_closedata *calldata) @@ -904,6 +905,7 @@ static void nfs4_close_done(struct rpc_task *task) return; } } + nfs_refresh_inode(calldata->inode, calldata->res.fattr); state->state = calldata->arg.open_flags; nfs4_free_closedata(calldata); } @@ -941,6 +943,7 @@ static void nfs4_close_begin(struct rpc_task *task) rpc_exit(task, 0); return; } + nfs_fattr_init(calldata->res.fattr); if (mode != 0) msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE]; calldata->arg.open_flags = mode; @@ -960,6 +963,7 @@ static void nfs4_close_begin(struct rpc_task *task) */ int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode) { + struct nfs_server *server = NFS_SERVER(inode); struct nfs4_closedata *calldata; int status = -ENOMEM; @@ -974,8 +978,11 @@ int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode) calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid); if (calldata->arg.seqid == NULL) goto out_free_calldata; + calldata->arg.bitmask = server->attr_bitmask; + calldata->res.fattr = &calldata->fattr; + calldata->res.server = server; - status = nfs4_call_async(NFS_SERVER(inode)->client, nfs4_close_begin, + status = nfs4_call_async(server->client, nfs4_close_begin, nfs4_close_done, calldata); if (status == 0) goto out; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 7f91d613d31a..cd9e26cfa868 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -198,17 +198,21 @@ static int nfs_stat_to_errno(int); #define NFS4_enc_open_downgrade_sz \ (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ - op_encode_hdr_maxsz + 7) + op_encode_hdr_maxsz + 7 + \ + encode_getattr_maxsz) #define NFS4_dec_open_downgrade_sz \ (compound_decode_hdr_maxsz + \ decode_putfh_maxsz + \ - op_decode_hdr_maxsz + 4) + op_decode_hdr_maxsz + 4 + \ + decode_getattr_maxsz) #define NFS4_enc_close_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ - op_encode_hdr_maxsz + 5) + op_encode_hdr_maxsz + 5 + \ + encode_getattr_maxsz) #define NFS4_dec_close_sz (compound_decode_hdr_maxsz + \ decode_putfh_maxsz + \ - op_decode_hdr_maxsz + 4) + op_decode_hdr_maxsz + 4 + \ + decode_getattr_maxsz) #define NFS4_enc_setattr_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ op_encode_hdr_maxsz + 4 + \ @@ -1433,7 +1437,7 @@ static int nfs4_xdr_enc_close(struct rpc_rqst *req, uint32_t *p, struct nfs_clos { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 2, + .nops = 3, }; int status; @@ -1443,6 +1447,9 @@ static int nfs4_xdr_enc_close(struct rpc_rqst *req, uint32_t *p, struct nfs_clos if(status) goto out; status = encode_close(&xdr, args); + if (status != 0) + goto out; + status = encode_getfattr(&xdr, args->bitmask); out: return status; } @@ -1541,7 +1548,7 @@ static int nfs4_xdr_enc_open_downgrade(struct rpc_rqst *req, uint32_t *p, struct { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 2, + .nops = 3, }; int status; @@ -1551,6 +1558,9 @@ static int nfs4_xdr_enc_open_downgrade(struct rpc_rqst *req, uint32_t *p, struct if (status) goto out; status = encode_open_downgrade(&xdr, args); + if (status != 0) + goto out; + status = encode_getfattr(&xdr, args->bitmask); out: return status; } @@ -3403,6 +3413,9 @@ static int nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp, uint32_t *p, stru if (status) goto out; status = decode_open_downgrade(&xdr, res); + if (status != 0) + goto out; + decode_getfattr(&xdr, res->fattr, res->server); out: return status; } @@ -3678,6 +3691,15 @@ static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_cl if (status) goto out; status = decode_close(&xdr, res); + if (status != 0) + goto out; + /* + * Note: Server may do delete on close for this file + * in which case the getattr call will fail with + * an ESTALE error. Shouldn't be a problem, + * though, since fattr->valid will remain unset. + */ + decode_getfattr(&xdr, res->fattr, res->server); out: return status; } diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 6485b8b41b83..4f03dc21cf4a 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -153,10 +153,13 @@ struct nfs_closeargs { nfs4_stateid * stateid; struct nfs_seqid * seqid; int open_flags; + const u32 * bitmask; }; struct nfs_closeres { nfs4_stateid stateid; + struct nfs_fattr * fattr; + const struct nfs_server *server; }; /* * * Arguments to the lock,lockt, and locku call. -- cgit v1.2.3 From 91ba2eeec5e8e86e054937eb3bf5aec5b22b1830 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 27 Oct 2005 22:12:42 -0400 Subject: NFSv4: Add post-op attributes to nfs4_proc_link() Optimise attribute revalidation when hardlinking. Add post-op attributes for the directory and the original inode. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 22 +++++++++++++++++----- fs/nfs/nfs4xdr.c | 34 ++++++++++++++++++++++++++++------ include/linux/nfs_xdr.h | 9 +++++++++ 3 files changed, 54 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 7be3d2d15d6f..04995e39e867 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1724,22 +1724,34 @@ static int nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name, static int _nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *name) { + struct nfs_server *server = NFS_SERVER(inode); struct nfs4_link_arg arg = { .fh = NFS_FH(inode), .dir_fh = NFS_FH(dir), .name = name, + .bitmask = server->attr_bitmask, + }; + struct nfs_fattr fattr, dir_attr; + struct nfs4_link_res res = { + .server = server, + .fattr = &fattr, + .dir_attr = &dir_attr, }; - struct nfs4_change_info cinfo = { }; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LINK], .rpc_argp = &arg, - .rpc_resp = &cinfo, + .rpc_resp = &res, }; int status; - status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); - if (!status) - update_changeattr(dir, &cinfo); + nfs_fattr_init(res.fattr); + nfs_fattr_init(res.dir_attr); + status = rpc_call_sync(server->client, &msg, 0); + if (!status) { + update_changeattr(dir, &res.cinfo); + nfs_post_op_update_inode(dir, res.dir_attr); + nfs_refresh_inode(inode, res.fattr); + } return status; } diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index cd9e26cfa868..f624b693ce21 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -324,12 +324,18 @@ static int nfs_stat_to_errno(int); encode_putfh_maxsz + \ encode_savefh_maxsz + \ encode_putfh_maxsz + \ - encode_link_maxsz) + encode_link_maxsz + \ + decode_getattr_maxsz + \ + encode_restorefh_maxsz + \ + decode_getattr_maxsz) #define NFS4_dec_link_sz (compound_decode_hdr_maxsz + \ decode_putfh_maxsz + \ decode_savefh_maxsz + \ decode_putfh_maxsz + \ - decode_link_maxsz) + decode_link_maxsz + \ + decode_getattr_maxsz + \ + decode_restorefh_maxsz + \ + decode_getattr_maxsz) #define NFS4_enc_symlink_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ encode_symlink_maxsz + \ @@ -1357,7 +1363,7 @@ static int nfs4_xdr_enc_link(struct rpc_rqst *req, uint32_t *p, const struct nfs { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 4, + .nops = 7, }; int status; @@ -1369,7 +1375,13 @@ static int nfs4_xdr_enc_link(struct rpc_rqst *req, uint32_t *p, const struct nfs goto out; if ((status = encode_putfh(&xdr, args->dir_fh)) != 0) goto out; - status = encode_link(&xdr, args->name); + if ((status = encode_link(&xdr, args->name)) != 0) + goto out; + if ((status = encode_getfattr(&xdr, args->bitmask)) != 0) + goto out; + if ((status = encode_restorefh(&xdr)) != 0) + goto out; + status = encode_getfattr(&xdr, args->bitmask); out: return status; } @@ -3529,7 +3541,7 @@ out: /* * Decode LINK response */ -static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_change_info *cinfo) +static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_link_res *res) { struct xdr_stream xdr; struct compound_hdr hdr; @@ -3544,7 +3556,17 @@ static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_ch goto out; if ((status = decode_putfh(&xdr)) != 0) goto out; - status = decode_link(&xdr, cinfo); + if ((status = decode_link(&xdr, &res->cinfo)) != 0) + goto out; + /* + * Note order: OP_LINK leaves the directory as the current + * filehandle. + */ + if (decode_getfattr(&xdr, res->dir_attr, res->server) != 0) + goto out; + if ((status = decode_restorefh(&xdr)) != 0) + goto out; + decode_getfattr(&xdr, res->fattr, res->server); out: return status; } diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 4f03dc21cf4a..89238b799cfd 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -566,8 +566,17 @@ struct nfs4_link_arg { const struct nfs_fh * fh; const struct nfs_fh * dir_fh; const struct qstr * name; + const u32 * bitmask; +}; + +struct nfs4_link_res { + const struct nfs_server * server; + struct nfs_fattr * fattr; + struct nfs4_change_info cinfo; + struct nfs_fattr * dir_attr; }; + struct nfs4_lookup_arg { const struct nfs_fh * dir_fh; const struct qstr * name; -- cgit v1.2.3 From 6caf2c8276d371679a798058e8fdf49f5ff831a3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 27 Oct 2005 22:12:43 -0400 Subject: NFSv4: Add post-op attributes to nfs4_proc_rename() Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 15 +++++++++++++-- fs/nfs/nfs4xdr.c | 29 ++++++++++++++++++++++++----- include/linux/nfs_xdr.h | 4 ++++ 3 files changed, 41 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 04995e39e867..f96bc12c0fa0 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1685,13 +1685,20 @@ static int nfs4_proc_unlink_done(struct dentry *dir, struct rpc_task *task) static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name, struct inode *new_dir, struct qstr *new_name) { + struct nfs_server *server = NFS_SERVER(old_dir); struct nfs4_rename_arg arg = { .old_dir = NFS_FH(old_dir), .new_dir = NFS_FH(new_dir), .old_name = old_name, .new_name = new_name, + .bitmask = server->attr_bitmask, + }; + struct nfs_fattr old_fattr, new_fattr; + struct nfs4_rename_res res = { + .server = server, + .old_fattr = &old_fattr, + .new_fattr = &new_fattr, }; - struct nfs4_rename_res res = { }; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME], .rpc_argp = &arg, @@ -1699,11 +1706,15 @@ static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name, }; int status; - status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0); + nfs_fattr_init(res.old_fattr); + nfs_fattr_init(res.new_fattr); + status = rpc_call_sync(server->client, &msg, 0); if (!status) { update_changeattr(old_dir, &res.old_cinfo); + nfs_post_op_update_inode(old_dir, res.old_fattr); update_changeattr(new_dir, &res.new_cinfo); + nfs_post_op_update_inode(new_dir, res.new_fattr); } return status; } diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index f624b693ce21..2a07755bd347 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -314,12 +314,18 @@ static int nfs_stat_to_errno(int); encode_putfh_maxsz + \ encode_savefh_maxsz + \ encode_putfh_maxsz + \ - encode_rename_maxsz) + encode_rename_maxsz + \ + encode_getattr_maxsz + \ + encode_restorefh_maxsz + \ + encode_getattr_maxsz) #define NFS4_dec_rename_sz (compound_decode_hdr_maxsz + \ decode_putfh_maxsz + \ decode_savefh_maxsz + \ decode_putfh_maxsz + \ - decode_rename_maxsz) + decode_rename_maxsz + \ + decode_getattr_maxsz + \ + decode_restorefh_maxsz + \ + decode_getattr_maxsz) #define NFS4_enc_link_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ encode_savefh_maxsz + \ @@ -1339,7 +1345,7 @@ static int nfs4_xdr_enc_rename(struct rpc_rqst *req, uint32_t *p, const struct n { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 4, + .nops = 7, }; int status; @@ -1351,7 +1357,13 @@ static int nfs4_xdr_enc_rename(struct rpc_rqst *req, uint32_t *p, const struct n goto out; if ((status = encode_putfh(&xdr, args->new_dir)) != 0) goto out; - status = encode_rename(&xdr, args->old_name, args->new_name); + if ((status = encode_rename(&xdr, args->old_name, args->new_name)) != 0) + goto out; + if ((status = encode_getfattr(&xdr, args->bitmask)) != 0) + goto out; + if ((status = encode_restorefh(&xdr)) != 0) + goto out; + status = encode_getfattr(&xdr, args->bitmask); out: return status; } @@ -3533,7 +3545,14 @@ static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_ goto out; if ((status = decode_putfh(&xdr)) != 0) goto out; - status = decode_rename(&xdr, &res->old_cinfo, &res->new_cinfo); + if ((status = decode_rename(&xdr, &res->old_cinfo, &res->new_cinfo)) != 0) + goto out; + /* Current FH is target directory */ + if (decode_getfattr(&xdr, res->new_fattr, res->server) != 0) + goto out; + if ((status = decode_restorefh(&xdr)) != 0) + goto out; + decode_getfattr(&xdr, res->old_fattr, res->server); out: return status; } diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 89238b799cfd..6f0804280824 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -630,11 +630,15 @@ struct nfs4_rename_arg { const struct nfs_fh * new_dir; const struct qstr * old_name; const struct qstr * new_name; + const u32 * bitmask; }; struct nfs4_rename_res { + const struct nfs_server * server; struct nfs4_change_info old_cinfo; + struct nfs_fattr * old_fattr; struct nfs4_change_info new_cinfo; + struct nfs_fattr * new_fattr; }; struct nfs4_setclientid { -- cgit v1.2.3 From 16e429596dec4d28e16812b3a9be27f18412c567 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 27 Oct 2005 22:12:44 -0400 Subject: NFSv4: Add post-op attributes to nfs4_proc_remove() Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 27 +++++++++++++++++++++------ fs/nfs/nfs4xdr.c | 25 +++++++++++++++++-------- include/linux/nfs_xdr.h | 7 +++++++ 3 files changed, 45 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f96bc12c0fa0..bab47c4cb41c 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1614,11 +1614,17 @@ out: static int _nfs4_proc_remove(struct inode *dir, struct qstr *name) { + struct nfs_server *server = NFS_SERVER(dir); struct nfs4_remove_arg args = { .fh = NFS_FH(dir), .name = name, + .bitmask = server->attr_bitmask, + }; + struct nfs_fattr dir_attr; + struct nfs4_remove_res res = { + .server = server, + .dir_attr = &dir_attr, }; - struct nfs4_change_info res; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE], .rpc_argp = &args, @@ -1626,9 +1632,12 @@ static int _nfs4_proc_remove(struct inode *dir, struct qstr *name) }; int status; - status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); - if (status == 0) - update_changeattr(dir, &res); + nfs_fattr_init(res.dir_attr); + status = rpc_call_sync(server->client, &msg, 0); + if (status == 0) { + update_changeattr(dir, &res.cinfo); + nfs_post_op_update_inode(dir, res.dir_attr); + } return status; } @@ -1646,12 +1655,14 @@ static int nfs4_proc_remove(struct inode *dir, struct qstr *name) struct unlink_desc { struct nfs4_remove_arg args; - struct nfs4_change_info res; + struct nfs4_remove_res res; + struct nfs_fattr dir_attr; }; static int nfs4_proc_unlink_setup(struct rpc_message *msg, struct dentry *dir, struct qstr *name) { + struct nfs_server *server = NFS_SERVER(dir->d_inode); struct unlink_desc *up; up = (struct unlink_desc *) kmalloc(sizeof(*up), GFP_KERNEL); @@ -1660,6 +1671,9 @@ static int nfs4_proc_unlink_setup(struct rpc_message *msg, struct dentry *dir, up->args.fh = NFS_FH(dir->d_inode); up->args.name = name; + up->args.bitmask = server->attr_bitmask; + up->res.server = server; + up->res.dir_attr = &up->dir_attr; msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE]; msg->rpc_argp = &up->args; @@ -1674,7 +1688,8 @@ static int nfs4_proc_unlink_done(struct dentry *dir, struct rpc_task *task) if (msg->rpc_resp != NULL) { up = container_of(msg->rpc_resp, struct unlink_desc, res); - update_changeattr(dir->d_inode, &up->res); + update_changeattr(dir->d_inode, &up->res.cinfo); + nfs_post_op_update_inode(dir->d_inode, up->res.dir_attr); kfree(up); msg->rpc_resp = NULL; msg->rpc_argp = NULL; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 2a07755bd347..3ee3a1669d28 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -306,10 +306,12 @@ static int nfs_stat_to_errno(int); decode_getfh_maxsz) #define NFS4_enc_remove_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ - encode_remove_maxsz) + encode_remove_maxsz + \ + encode_getattr_maxsz) #define NFS4_dec_remove_sz (compound_decode_hdr_maxsz + \ decode_putfh_maxsz + \ - op_decode_hdr_maxsz + 5) + op_decode_hdr_maxsz + 5 + \ + decode_getattr_maxsz) #define NFS4_enc_rename_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ encode_savefh_maxsz + \ @@ -1327,14 +1329,18 @@ static int nfs4_xdr_enc_remove(struct rpc_rqst *req, uint32_t *p, const struct n { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 2, + .nops = 3, }; int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - if ((status = encode_putfh(&xdr, args->fh)) == 0) - status = encode_remove(&xdr, args->name); + if ((status = encode_putfh(&xdr, args->fh)) != 0) + goto out; + if ((status = encode_remove(&xdr, args->name)) != 0) + goto out; + status = encode_getfattr(&xdr, args->bitmask); +out: return status; } @@ -3512,7 +3518,7 @@ out: /* * Decode REMOVE response */ -static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_change_info *cinfo) +static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_remove_res *res) { struct xdr_stream xdr; struct compound_hdr hdr; @@ -3521,8 +3527,11 @@ static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_ xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); if ((status = decode_compound_hdr(&xdr, &hdr)) != 0) goto out; - if ((status = decode_putfh(&xdr)) == 0) - status = decode_remove(&xdr, cinfo); + if ((status = decode_putfh(&xdr)) != 0) + goto out; + if ((status = decode_remove(&xdr, &res->cinfo)) != 0) + goto out; + decode_getfattr(&xdr, res->dir_attr, res->server); out: return status; } diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 6f0804280824..deeba7e2c518 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -623,6 +623,13 @@ struct nfs4_readlink { struct nfs4_remove_arg { const struct nfs_fh * fh; const struct qstr * name; + const u32 * bitmask; +}; + +struct nfs4_remove_res { + const struct nfs_server * server; + struct nfs4_change_info cinfo; + struct nfs_fattr * dir_attr; }; struct nfs4_rename_arg { -- cgit v1.2.3 From 4f9838c7ecd14f31f701f64fa65ded132fc0db8a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 27 Oct 2005 22:12:44 -0400 Subject: NFSv4: Add post-op attributes to NFSv4 write and commit callbacks. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 13 ++++++++++++- fs/nfs/nfs4xdr.c | 28 ++++++++++++++++++++++------ include/linux/nfs_xdr.h | 2 ++ 3 files changed, 36 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index bab47c4cb41c..933e13b383f8 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2169,8 +2169,10 @@ nfs4_write_done(struct rpc_task *task) rpc_restart_call(task); return; } - if (task->tk_status >= 0) + if (task->tk_status >= 0) { renew_lease(NFS_SERVER(inode), data->timestamp); + nfs_post_op_update_inode(inode, data->res.fattr); + } /* Call back common NFS writeback processing */ nfs_writeback_done(task); } @@ -2186,6 +2188,7 @@ nfs4_proc_write_setup(struct nfs_write_data *data, int how) .rpc_cred = data->cred, }; struct inode *inode = data->inode; + struct nfs_server *server = NFS_SERVER(inode); int stable; int flags; @@ -2197,6 +2200,8 @@ nfs4_proc_write_setup(struct nfs_write_data *data, int how) } else stable = NFS_UNSTABLE; data->args.stable = stable; + data->args.bitmask = server->attr_bitmask; + data->res.server = server; data->timestamp = jiffies; @@ -2218,6 +2223,8 @@ nfs4_commit_done(struct rpc_task *task) rpc_restart_call(task); return; } + if (task->tk_status >= 0) + nfs_post_op_update_inode(inode, data->res.fattr); /* Call back common NFS writeback processing */ nfs_commit_done(task); } @@ -2233,8 +2240,12 @@ nfs4_proc_commit_setup(struct nfs_write_data *data, int how) .rpc_cred = data->cred, }; struct inode *inode = data->inode; + struct nfs_server *server = NFS_SERVER(inode); int flags; + data->args.bitmask = server->attr_bitmask; + data->res.server = server; + /* Set the initial flags for the task. */ flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 3ee3a1669d28..6f1bf182e0e0 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -159,16 +159,20 @@ static int nfs_stat_to_errno(int); op_decode_hdr_maxsz + 2) #define NFS4_enc_write_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ - op_encode_hdr_maxsz + 8) + op_encode_hdr_maxsz + 8 + \ + encode_getattr_maxsz) #define NFS4_dec_write_sz (compound_decode_hdr_maxsz + \ decode_putfh_maxsz + \ - op_decode_hdr_maxsz + 4) + op_decode_hdr_maxsz + 4 + \ + decode_getattr_maxsz) #define NFS4_enc_commit_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ - op_encode_hdr_maxsz + 3) + op_encode_hdr_maxsz + 3 + \ + encode_getattr_maxsz) #define NFS4_dec_commit_sz (compound_decode_hdr_maxsz + \ decode_putfh_maxsz + \ - op_decode_hdr_maxsz + 2) + op_decode_hdr_maxsz + 2 + \ + decode_getattr_maxsz) #define NFS4_enc_open_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ op_encode_hdr_maxsz + \ @@ -1799,7 +1803,7 @@ static int nfs4_xdr_enc_write(struct rpc_rqst *req, uint32_t *p, struct nfs_writ { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 2, + .nops = 3, }; int status; @@ -1809,6 +1813,9 @@ static int nfs4_xdr_enc_write(struct rpc_rqst *req, uint32_t *p, struct nfs_writ if (status) goto out; status = encode_write(&xdr, args); + if (status) + goto out; + status = encode_getfattr(&xdr, args->bitmask); out: return status; } @@ -1820,7 +1827,7 @@ static int nfs4_xdr_enc_commit(struct rpc_rqst *req, uint32_t *p, struct nfs_wri { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 2, + .nops = 3, }; int status; @@ -1830,6 +1837,9 @@ static int nfs4_xdr_enc_commit(struct rpc_rqst *req, uint32_t *p, struct nfs_wri if (status) goto out; status = encode_commit(&xdr, args); + if (status) + goto out; + status = encode_getfattr(&xdr, args->bitmask); out: return status; } @@ -4001,6 +4011,9 @@ static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_wr if (status) goto out; status = decode_write(&xdr, res); + if (status) + goto out; + decode_getfattr(&xdr, res->fattr, res->server); if (!status) status = res->count; out: @@ -4024,6 +4037,9 @@ static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_w if (status) goto out; status = decode_commit(&xdr, res); + if (status) + goto out; + decode_getfattr(&xdr, res->fattr, res->server); out: return status; } diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index deeba7e2c518..40718669b9c8 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -256,6 +256,7 @@ struct nfs_writeargs { enum nfs3_stable_how stable; unsigned int pgbase; struct page ** pages; + const u32 * bitmask; }; struct nfs_writeverf { @@ -267,6 +268,7 @@ struct nfs_writeres { struct nfs_fattr * fattr; struct nfs_writeverf * verf; __u32 count; + const struct nfs_server *server; }; /* -- cgit v1.2.3 From 20e5c81fcff89535dced2ed71cf24c6c648ff40e Mon Sep 17 00:00:00 2001 From: "Chen, Kenneth W" Date: Thu, 13 Oct 2005 21:48:42 +0200 Subject: [patch] remove gendisk->stamp_idle field struct gendisk has these two fields: stamp, stamp_idle. Update to stamp_idle is always in sync with stamp and they are always the same. Therefore, it does not add any value in having two fields tracking same timestamp. Suggest to remove it. Also, we should only update gendisk stats with non-zero value. Advantage is that we don't have to needlessly calculate memory address, and then add zero to the content. Signed-off-by: Ken Chen Signed-off-by: Jens Axboe --- drivers/block/ll_rw_blk.c | 11 +++++------ fs/partitions/check.c | 2 +- include/linux/genhd.h | 2 +- 3 files changed, 7 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c index baedac522945..c42071fd2e95 100644 --- a/drivers/block/ll_rw_blk.c +++ b/drivers/block/ll_rw_blk.c @@ -2433,13 +2433,12 @@ void disk_round_stats(struct gendisk *disk) { unsigned long now = jiffies; - __disk_stat_add(disk, time_in_queue, - disk->in_flight * (now - disk->stamp)); + if (disk->in_flight) { + __disk_stat_add(disk, time_in_queue, + disk->in_flight * (now - disk->stamp)); + __disk_stat_add(disk, io_ticks, (now - disk->stamp)); + } disk->stamp = now; - - if (disk->in_flight) - __disk_stat_add(disk, io_ticks, (now - disk->stamp_idle)); - disk->stamp_idle = now; } /* diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 77e178f13162..1e848648a322 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -430,7 +430,7 @@ void del_gendisk(struct gendisk *disk) disk->flags &= ~GENHD_FL_UP; unlink_gendisk(disk); disk_stat_set_all(disk, 0); - disk->stamp = disk->stamp_idle = 0; + disk->stamp = 0; devfs_remove_disk(disk); diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 01796c41c951..142e1c1e0689 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -119,7 +119,7 @@ struct gendisk { int policy; atomic_t sync_io; /* RAID */ - unsigned long stamp, stamp_idle; + unsigned long stamp; int in_flight; #ifdef CONFIG_SMP struct disk_stats *dkstats; -- cgit v1.2.3 From 8922e16cf6269e668123acb1ae1fdc62b7a3a4fc Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 20 Oct 2005 16:23:44 +0200 Subject: [PATCH] 01/05 Implement generic dispatch queue Implements generic dispatch queue which can replace all dispatch queues implemented by each iosched. This reduces code duplication, eases enforcing semantics over dispatch queue, and simplifies specific ioscheds. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- drivers/block/elevator.c | 242 ++++++++++++++++++++++++++++++++-------------- drivers/block/ll_rw_blk.c | 23 +++-- include/linux/blkdev.h | 17 ++-- include/linux/elevator.h | 16 ++- 4 files changed, 201 insertions(+), 97 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/elevator.c b/drivers/block/elevator.c index 4144f30d82a9..a27555908d35 100644 --- a/drivers/block/elevator.c +++ b/drivers/block/elevator.c @@ -40,6 +40,11 @@ static DEFINE_SPINLOCK(elv_list_lock); static LIST_HEAD(elv_list); +static inline sector_t rq_last_sector(struct request *rq) +{ + return rq->sector + rq->nr_sectors; +} + /* * can we safely merge with this request? */ @@ -143,6 +148,9 @@ static int elevator_attach(request_queue_t *q, struct elevator_type *e, INIT_LIST_HEAD(&q->queue_head); q->last_merge = NULL; q->elevator = eq; + q->last_sector = 0; + q->boundary_rq = NULL; + q->max_back_kb = 0; if (eq->ops->elevator_init_fn) ret = eq->ops->elevator_init_fn(q, eq); @@ -225,6 +233,48 @@ void elevator_exit(elevator_t *e) kfree(e); } +/* + * Insert rq into dispatch queue of q. Queue lock must be held on + * entry. If sort != 0, rq is sort-inserted; otherwise, rq will be + * appended to the dispatch queue. To be used by specific elevators. + */ +void elv_dispatch_insert(request_queue_t *q, struct request *rq, int sort) +{ + sector_t boundary; + unsigned max_back; + struct list_head *entry; + + if (!sort) { + /* Specific elevator is performing sort. Step away. */ + q->last_sector = rq_last_sector(rq); + q->boundary_rq = rq; + list_add_tail(&rq->queuelist, &q->queue_head); + return; + } + + boundary = q->last_sector; + max_back = q->max_back_kb * 2; + boundary = boundary > max_back ? boundary - max_back : 0; + + list_for_each_prev(entry, &q->queue_head) { + struct request *pos = list_entry_rq(entry); + + if (pos->flags & (REQ_SOFTBARRIER|REQ_HARDBARRIER|REQ_STARTED)) + break; + if (rq->sector >= boundary) { + if (pos->sector < boundary) + continue; + } else { + if (pos->sector >= boundary) + break; + } + if (rq->sector >= pos->sector) + break; + } + + list_add(&rq->queuelist, entry); +} + int elv_merge(request_queue_t *q, struct request **req, struct bio *bio) { elevator_t *e = q->elevator; @@ -255,13 +305,7 @@ void elv_merge_requests(request_queue_t *q, struct request *rq, e->ops->elevator_merge_req_fn(q, rq, next); } -/* - * For careful internal use by the block layer. Essentially the same as - * a requeue in that it tells the io scheduler that this request is not - * active in the driver or hardware anymore, but we don't want the request - * added back to the scheduler. Function is not exported. - */ -void elv_deactivate_request(request_queue_t *q, struct request *rq) +void elv_requeue_request(request_queue_t *q, struct request *rq) { elevator_t *e = q->elevator; @@ -269,19 +313,14 @@ void elv_deactivate_request(request_queue_t *q, struct request *rq) * it already went through dequeue, we need to decrement the * in_flight count again */ - if (blk_account_rq(rq)) + if (blk_account_rq(rq)) { q->in_flight--; + if (blk_sorted_rq(rq) && e->ops->elevator_deactivate_req_fn) + e->ops->elevator_deactivate_req_fn(q, rq); + } rq->flags &= ~REQ_STARTED; - if (e->ops->elevator_deactivate_req_fn) - e->ops->elevator_deactivate_req_fn(q, rq); -} - -void elv_requeue_request(request_queue_t *q, struct request *rq) -{ - elv_deactivate_request(q, rq); - /* * if this is the flush, requeue the original instead and drop the flush */ @@ -290,55 +329,89 @@ void elv_requeue_request(request_queue_t *q, struct request *rq) rq = rq->end_io_data; } - /* - * the request is prepped and may have some resources allocated. - * allowing unprepped requests to pass this one may cause resource - * deadlock. turn on softbarrier. - */ - rq->flags |= REQ_SOFTBARRIER; - - /* - * if iosched has an explicit requeue hook, then use that. otherwise - * just put the request at the front of the queue - */ - if (q->elevator->ops->elevator_requeue_req_fn) - q->elevator->ops->elevator_requeue_req_fn(q, rq); - else - __elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 0); + __elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 0); } void __elv_add_request(request_queue_t *q, struct request *rq, int where, int plug) { - /* - * barriers implicitly indicate back insertion - */ - if (rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER) && - where == ELEVATOR_INSERT_SORT) - where = ELEVATOR_INSERT_BACK; + if (rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) { + /* + * barriers implicitly indicate back insertion + */ + if (where == ELEVATOR_INSERT_SORT) + where = ELEVATOR_INSERT_BACK; + + /* + * this request is scheduling boundary, update last_sector + */ + if (blk_fs_request(rq)) { + q->last_sector = rq_last_sector(rq); + q->boundary_rq = rq; + } + } if (plug) blk_plug_device(q); rq->q = q; - if (!test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags)) { - q->elevator->ops->elevator_add_req_fn(q, rq, where); - - if (blk_queue_plugged(q)) { - int nrq = q->rq.count[READ] + q->rq.count[WRITE] - - q->in_flight; - - if (nrq >= q->unplug_thresh) - __generic_unplug_device(q); - } - } else + if (unlikely(test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags))) { /* * if drain is set, store the request "locally". when the drain * is finished, the requests will be handed ordered to the io * scheduler */ list_add_tail(&rq->queuelist, &q->drain_list); + return; + } + + switch (where) { + case ELEVATOR_INSERT_FRONT: + rq->flags |= REQ_SOFTBARRIER; + + list_add(&rq->queuelist, &q->queue_head); + break; + + case ELEVATOR_INSERT_BACK: + rq->flags |= REQ_SOFTBARRIER; + + while (q->elevator->ops->elevator_dispatch_fn(q, 1)) + ; + list_add_tail(&rq->queuelist, &q->queue_head); + /* + * We kick the queue here for the following reasons. + * - The elevator might have returned NULL previously + * to delay requests and returned them now. As the + * queue wasn't empty before this request, ll_rw_blk + * won't run the queue on return, resulting in hang. + * - Usually, back inserted requests won't be merged + * with anything. There's no point in delaying queue + * processing. + */ + blk_remove_plug(q); + q->request_fn(q); + break; + + case ELEVATOR_INSERT_SORT: + BUG_ON(!blk_fs_request(rq)); + rq->flags |= REQ_SORTED; + q->elevator->ops->elevator_add_req_fn(q, rq); + break; + + default: + printk(KERN_ERR "%s: bad insertion point %d\n", + __FUNCTION__, where); + BUG(); + } + + if (blk_queue_plugged(q)) { + int nrq = q->rq.count[READ] + q->rq.count[WRITE] + - q->in_flight; + + if (nrq >= q->unplug_thresh) + __generic_unplug_device(q); + } } void elv_add_request(request_queue_t *q, struct request *rq, int where, @@ -353,13 +426,19 @@ void elv_add_request(request_queue_t *q, struct request *rq, int where, static inline struct request *__elv_next_request(request_queue_t *q) { - struct request *rq = q->elevator->ops->elevator_next_req_fn(q); + struct request *rq; + + if (unlikely(list_empty(&q->queue_head) && + !q->elevator->ops->elevator_dispatch_fn(q, 0))) + return NULL; + + rq = list_entry_rq(q->queue_head.next); /* * if this is a barrier write and the device has to issue a * flush sequence to support it, check how far we are */ - if (rq && blk_fs_request(rq) && blk_barrier_rq(rq)) { + if (blk_fs_request(rq) && blk_barrier_rq(rq)) { BUG_ON(q->ordered == QUEUE_ORDERED_NONE); if (q->ordered == QUEUE_ORDERED_FLUSH && @@ -376,16 +455,34 @@ struct request *elv_next_request(request_queue_t *q) int ret; while ((rq = __elv_next_request(q)) != NULL) { - /* - * just mark as started even if we don't start it, a request - * that has been delayed should not be passed by new incoming - * requests - */ - rq->flags |= REQ_STARTED; + if (!(rq->flags & REQ_STARTED)) { + elevator_t *e = q->elevator; + + /* + * This is the first time the device driver + * sees this request (possibly after + * requeueing). Notify IO scheduler. + */ + if (blk_sorted_rq(rq) && + e->ops->elevator_activate_req_fn) + e->ops->elevator_activate_req_fn(q, rq); + + /* + * just mark as started even if we don't start + * it, a request that has been delayed should + * not be passed by new incoming requests + */ + rq->flags |= REQ_STARTED; + } if (rq == q->last_merge) q->last_merge = NULL; + if (!q->boundary_rq || q->boundary_rq == rq) { + q->last_sector = rq_last_sector(rq); + q->boundary_rq = NULL; + } + if ((rq->flags & REQ_DONTPREP) || !q->prep_rq_fn) break; @@ -396,9 +493,9 @@ struct request *elv_next_request(request_queue_t *q) /* * the request may have been (partially) prepped. * we need to keep this request in the front to - * avoid resource deadlock. turn on softbarrier. + * avoid resource deadlock. REQ_STARTED will + * prevent other fs requests from passing this one. */ - rq->flags |= REQ_SOFTBARRIER; rq = NULL; break; } else if (ret == BLKPREP_KILL) { @@ -421,16 +518,16 @@ struct request *elv_next_request(request_queue_t *q) return rq; } -void elv_remove_request(request_queue_t *q, struct request *rq) +void elv_dequeue_request(request_queue_t *q, struct request *rq) { - elevator_t *e = q->elevator; + BUG_ON(list_empty(&rq->queuelist)); + + list_del_init(&rq->queuelist); /* * the time frame between a request being removed from the lists * and to it is freed is accounted as io that is in progress at - * the driver side. note that we only account requests that the - * driver has seen (REQ_STARTED set), to avoid false accounting - * for request-request merges + * the driver side. */ if (blk_account_rq(rq)) q->in_flight++; @@ -444,19 +541,19 @@ void elv_remove_request(request_queue_t *q, struct request *rq) */ if (rq == q->last_merge) q->last_merge = NULL; - - if (e->ops->elevator_remove_req_fn) - e->ops->elevator_remove_req_fn(q, rq); } int elv_queue_empty(request_queue_t *q) { elevator_t *e = q->elevator; + if (!list_empty(&q->queue_head)) + return 0; + if (e->ops->elevator_queue_empty_fn) return e->ops->elevator_queue_empty_fn(q); - return list_empty(&q->queue_head); + return 1; } struct request *elv_latter_request(request_queue_t *q, struct request *rq) @@ -528,11 +625,11 @@ void elv_completed_request(request_queue_t *q, struct request *rq) /* * request is released from the driver, io must be done */ - if (blk_account_rq(rq)) + if (blk_account_rq(rq)) { q->in_flight--; - - if (e->ops->elevator_completed_req_fn) - e->ops->elevator_completed_req_fn(q, rq); + if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn) + e->ops->elevator_completed_req_fn(q, rq); + } } int elv_register_queue(struct request_queue *q) @@ -705,11 +802,12 @@ ssize_t elv_iosched_show(request_queue_t *q, char *name) return len; } +EXPORT_SYMBOL(elv_dispatch_insert); EXPORT_SYMBOL(elv_add_request); EXPORT_SYMBOL(__elv_add_request); EXPORT_SYMBOL(elv_requeue_request); EXPORT_SYMBOL(elv_next_request); -EXPORT_SYMBOL(elv_remove_request); +EXPORT_SYMBOL(elv_dequeue_request); EXPORT_SYMBOL(elv_queue_empty); EXPORT_SYMBOL(elv_completed_request); EXPORT_SYMBOL(elevator_exit); diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c index 4e2b1b06b411..d2a66fd309c3 100644 --- a/drivers/block/ll_rw_blk.c +++ b/drivers/block/ll_rw_blk.c @@ -353,6 +353,8 @@ static void blk_pre_flush_end_io(struct request *flush_rq) struct request *rq = flush_rq->end_io_data; request_queue_t *q = rq->q; + elv_completed_request(q, flush_rq); + rq->flags |= REQ_BAR_PREFLUSH; if (!flush_rq->errors) @@ -369,6 +371,8 @@ static void blk_post_flush_end_io(struct request *flush_rq) struct request *rq = flush_rq->end_io_data; request_queue_t *q = rq->q; + elv_completed_request(q, flush_rq); + rq->flags |= REQ_BAR_POSTFLUSH; q->end_flush_fn(q, flush_rq); @@ -408,8 +412,6 @@ struct request *blk_start_pre_flush(request_queue_t *q, struct request *rq) if (!list_empty(&rq->queuelist)) blkdev_dequeue_request(rq); - elv_deactivate_request(q, rq); - flush_rq->end_io_data = rq; flush_rq->end_io = blk_pre_flush_end_io; @@ -1040,6 +1042,7 @@ EXPORT_SYMBOL(blk_queue_invalidate_tags); static char *rq_flags[] = { "REQ_RW", "REQ_FAILFAST", + "REQ_SORTED", "REQ_SOFTBARRIER", "REQ_HARDBARRIER", "REQ_CMD", @@ -2456,6 +2459,8 @@ static void __blk_put_request(request_queue_t *q, struct request *req) if (unlikely(--req->ref_count)) return; + elv_completed_request(q, req); + req->rq_status = RQ_INACTIVE; req->rl = NULL; @@ -2466,8 +2471,6 @@ static void __blk_put_request(request_queue_t *q, struct request *req) if (rl) { int rw = rq_data_dir(req); - elv_completed_request(q, req); - BUG_ON(!list_empty(&req->queuelist)); blk_free_request(q, req); @@ -2477,14 +2480,14 @@ static void __blk_put_request(request_queue_t *q, struct request *req) void blk_put_request(struct request *req) { + unsigned long flags; + request_queue_t *q = req->q; + /* - * if req->rl isn't set, this request didnt originate from the - * block layer, so it's safe to just disregard it + * Gee, IDE calls in w/ NULL q. Fix IDE and remove the + * following if (q) test. */ - if (req->rl) { - unsigned long flags; - request_queue_t *q = req->q; - + if (q) { spin_lock_irqsave(q->queue_lock, flags); __blk_put_request(q, req); spin_unlock_irqrestore(q->queue_lock, flags); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index efdc9b5bc05c..2c7b9154927a 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -203,6 +203,7 @@ struct request { enum rq_flag_bits { __REQ_RW, /* not set, read. set, write */ __REQ_FAILFAST, /* no low level driver retries */ + __REQ_SORTED, /* elevator knows about this request */ __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */ __REQ_HARDBARRIER, /* may not be passed by drive either */ __REQ_CMD, /* is a regular fs rw request */ @@ -235,6 +236,7 @@ enum rq_flag_bits { #define REQ_RW (1 << __REQ_RW) #define REQ_FAILFAST (1 << __REQ_FAILFAST) +#define REQ_SORTED (1 << __REQ_SORTED) #define REQ_SOFTBARRIER (1 << __REQ_SOFTBARRIER) #define REQ_HARDBARRIER (1 << __REQ_HARDBARRIER) #define REQ_CMD (1 << __REQ_CMD) @@ -332,6 +334,13 @@ struct request_queue prepare_flush_fn *prepare_flush_fn; end_flush_fn *end_flush_fn; + /* + * Dispatch queue sorting + */ + sector_t last_sector; + struct request *boundary_rq; + unsigned int max_back_kb; + /* * Auto-unplugging state */ @@ -454,6 +463,7 @@ enum { #define blk_pm_request(rq) \ ((rq)->flags & (REQ_PM_SUSPEND | REQ_PM_RESUME)) +#define blk_sorted_rq(rq) ((rq)->flags & REQ_SORTED) #define blk_barrier_rq(rq) ((rq)->flags & REQ_HARDBARRIER) #define blk_barrier_preflush(rq) ((rq)->flags & REQ_BAR_PREFLUSH) #define blk_barrier_postflush(rq) ((rq)->flags & REQ_BAR_POSTFLUSH) @@ -611,12 +621,7 @@ extern void end_request(struct request *req, int uptodate); static inline void blkdev_dequeue_request(struct request *req) { - BUG_ON(list_empty(&req->queuelist)); - - list_del_init(&req->queuelist); - - if (req->rl) - elv_remove_request(req->q, req); + elv_dequeue_request(req->q, req); } /* diff --git a/include/linux/elevator.h b/include/linux/elevator.h index ea6bbc2d7407..76f4f6920744 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -8,18 +8,17 @@ typedef void (elevator_merge_req_fn) (request_queue_t *, struct request *, struc typedef void (elevator_merged_fn) (request_queue_t *, struct request *); -typedef struct request *(elevator_next_req_fn) (request_queue_t *); +typedef int (elevator_dispatch_fn) (request_queue_t *, int); -typedef void (elevator_add_req_fn) (request_queue_t *, struct request *, int); +typedef void (elevator_add_req_fn) (request_queue_t *, struct request *); typedef int (elevator_queue_empty_fn) (request_queue_t *); -typedef void (elevator_remove_req_fn) (request_queue_t *, struct request *); -typedef void (elevator_requeue_req_fn) (request_queue_t *, struct request *); typedef struct request *(elevator_request_list_fn) (request_queue_t *, struct request *); typedef void (elevator_completed_req_fn) (request_queue_t *, struct request *); typedef int (elevator_may_queue_fn) (request_queue_t *, int, struct bio *); typedef int (elevator_set_req_fn) (request_queue_t *, struct request *, struct bio *, int); typedef void (elevator_put_req_fn) (request_queue_t *, struct request *); +typedef void (elevator_activate_req_fn) (request_queue_t *, struct request *); typedef void (elevator_deactivate_req_fn) (request_queue_t *, struct request *); typedef int (elevator_init_fn) (request_queue_t *, elevator_t *); @@ -31,10 +30,9 @@ struct elevator_ops elevator_merged_fn *elevator_merged_fn; elevator_merge_req_fn *elevator_merge_req_fn; - elevator_next_req_fn *elevator_next_req_fn; + elevator_dispatch_fn *elevator_dispatch_fn; elevator_add_req_fn *elevator_add_req_fn; - elevator_remove_req_fn *elevator_remove_req_fn; - elevator_requeue_req_fn *elevator_requeue_req_fn; + elevator_activate_req_fn *elevator_activate_req_fn; elevator_deactivate_req_fn *elevator_deactivate_req_fn; elevator_queue_empty_fn *elevator_queue_empty_fn; @@ -81,15 +79,15 @@ struct elevator_queue /* * block elevator interface */ +extern void elv_dispatch_insert(request_queue_t *, struct request *, int); extern void elv_add_request(request_queue_t *, struct request *, int, int); extern void __elv_add_request(request_queue_t *, struct request *, int, int); extern int elv_merge(request_queue_t *, struct request **, struct bio *); extern void elv_merge_requests(request_queue_t *, struct request *, struct request *); extern void elv_merged_request(request_queue_t *, struct request *); -extern void elv_remove_request(request_queue_t *, struct request *); +extern void elv_dequeue_request(request_queue_t *, struct request *); extern void elv_requeue_request(request_queue_t *, struct request *); -extern void elv_deactivate_request(request_queue_t *, struct request *); extern int elv_queue_empty(request_queue_t *); extern struct request *elv_next_request(struct request_queue *q); extern struct request *elv_former_request(request_queue_t *, struct request *); -- cgit v1.2.3 From 1b47f531e244e339b648dfff071c086f936e49e1 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 20 Oct 2005 16:37:00 +0200 Subject: [PATCH] generic dispatch fixes - Split elv_dispatch_insert() into two functions - Rename rq_last_sector() to rq_end_sector() Signed-off-by: Jens Axboe --- drivers/block/elevator.c | 33 ++++++++++++--------------------- include/linux/blkdev.h | 13 ++++++++++++- include/linux/elevator.h | 4 +++- 3 files changed, 27 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/elevator.c b/drivers/block/elevator.c index a27555908d35..237e43d760c8 100644 --- a/drivers/block/elevator.c +++ b/drivers/block/elevator.c @@ -40,11 +40,6 @@ static DEFINE_SPINLOCK(elv_list_lock); static LIST_HEAD(elv_list); -static inline sector_t rq_last_sector(struct request *rq) -{ - return rq->sector + rq->nr_sectors; -} - /* * can we safely merge with this request? */ @@ -148,7 +143,7 @@ static int elevator_attach(request_queue_t *q, struct elevator_type *e, INIT_LIST_HEAD(&q->queue_head); q->last_merge = NULL; q->elevator = eq; - q->last_sector = 0; + q->end_sector = 0; q->boundary_rq = NULL; q->max_back_kb = 0; @@ -233,29 +228,25 @@ void elevator_exit(elevator_t *e) kfree(e); } +void elv_dispatch_insert_tail(request_queue_t *q, struct request *rq) +{ +} + /* * Insert rq into dispatch queue of q. Queue lock must be held on * entry. If sort != 0, rq is sort-inserted; otherwise, rq will be * appended to the dispatch queue. To be used by specific elevators. */ -void elv_dispatch_insert(request_queue_t *q, struct request *rq, int sort) +void elv_dispatch_sort(request_queue_t *q, struct request *rq) { sector_t boundary; unsigned max_back; struct list_head *entry; - if (!sort) { - /* Specific elevator is performing sort. Step away. */ - q->last_sector = rq_last_sector(rq); - q->boundary_rq = rq; - list_add_tail(&rq->queuelist, &q->queue_head); - return; - } - - boundary = q->last_sector; + boundary = q->end_sector; max_back = q->max_back_kb * 2; boundary = boundary > max_back ? boundary - max_back : 0; - + list_for_each_prev(entry, &q->queue_head) { struct request *pos = list_entry_rq(entry); @@ -343,10 +334,10 @@ void __elv_add_request(request_queue_t *q, struct request *rq, int where, where = ELEVATOR_INSERT_BACK; /* - * this request is scheduling boundary, update last_sector + * this request is scheduling boundary, update end_sector */ if (blk_fs_request(rq)) { - q->last_sector = rq_last_sector(rq); + q->end_sector = rq_end_sector(rq); q->boundary_rq = rq; } } @@ -479,7 +470,7 @@ struct request *elv_next_request(request_queue_t *q) q->last_merge = NULL; if (!q->boundary_rq || q->boundary_rq == rq) { - q->last_sector = rq_last_sector(rq); + q->end_sector = rq_end_sector(rq); q->boundary_rq = NULL; } @@ -802,7 +793,7 @@ ssize_t elv_iosched_show(request_queue_t *q, char *name) return len; } -EXPORT_SYMBOL(elv_dispatch_insert); +EXPORT_SYMBOL(elv_dispatch_sort); EXPORT_SYMBOL(elv_add_request); EXPORT_SYMBOL(__elv_add_request); EXPORT_SYMBOL(elv_requeue_request); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2c7b9154927a..a2e496f285a4 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -337,7 +337,7 @@ struct request_queue /* * Dispatch queue sorting */ - sector_t last_sector; + sector_t end_sector; struct request *boundary_rq; unsigned int max_back_kb; @@ -624,6 +624,17 @@ static inline void blkdev_dequeue_request(struct request *req) elv_dequeue_request(req->q, req); } +/* + * This should be in elevator.h, but that requires pulling in rq and q + */ +static inline void elv_dispatch_add_tail(struct request_queue *q, + struct request *rq) +{ + q->end_sector = rq_end_sector(rq); + q->boundary_rq = rq; + list_add_tail(&rq->queuelist, &q->queue_head); +} + /* * Access functions for manipulating queue properties */ diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 76f4f6920744..f9b5ec38121a 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -79,7 +79,7 @@ struct elevator_queue /* * block elevator interface */ -extern void elv_dispatch_insert(request_queue_t *, struct request *, int); +extern void elv_dispatch_sort(request_queue_t *, struct request *); extern void elv_add_request(request_queue_t *, struct request *, int, int); extern void __elv_add_request(request_queue_t *, struct request *, int, int); extern int elv_merge(request_queue_t *, struct request **, struct bio *); @@ -140,4 +140,6 @@ enum { ELV_MQUEUE_MUST, }; +#define rq_end_sector(rq) ((rq)->sector + (rq)->nr_sectors) + #endif -- cgit v1.2.3 From 06b86245c052963029bfd9020ca1f08ceb66f85a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 20 Oct 2005 16:46:23 +0200 Subject: [PATCH] 03/05 move last_merge handlin into generic elevator code Currently, both generic elevator code and specific ioscheds participate in the management and usage of last_merge. This and the following patches move last_merge handling into generic elevator code. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- drivers/block/elevator.c | 43 ++++++++++++++++++------------------------- include/linux/blkdev.h | 3 +++ 2 files changed, 21 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/elevator.c b/drivers/block/elevator.c index 29d6c8237ab3..415144372c75 100644 --- a/drivers/block/elevator.c +++ b/drivers/block/elevator.c @@ -83,15 +83,6 @@ inline int elv_try_merge(struct request *__rq, struct bio *bio) } EXPORT_SYMBOL(elv_try_merge); -inline int elv_try_last_merge(request_queue_t *q, struct bio *bio) -{ - if (q->last_merge) - return elv_try_merge(q->last_merge, bio); - - return ELEVATOR_NO_MERGE; -} -EXPORT_SYMBOL(elv_try_last_merge); - static struct elevator_type *elevator_find(const char *name) { struct elevator_type *e = NULL; @@ -239,6 +230,9 @@ void elv_dispatch_sort(request_queue_t *q, struct request *rq) unsigned max_back; struct list_head *entry; + if (q->last_merge == rq) + q->last_merge = NULL; + boundary = q->end_sector; max_back = q->max_back_kb * 2; boundary = boundary > max_back ? boundary - max_back : 0; @@ -265,6 +259,15 @@ void elv_dispatch_sort(request_queue_t *q, struct request *rq) int elv_merge(request_queue_t *q, struct request **req, struct bio *bio) { elevator_t *e = q->elevator; + int ret; + + if (q->last_merge) { + ret = elv_try_merge(q->last_merge, bio); + if (ret != ELEVATOR_NO_MERGE) { + *req = q->last_merge; + return ret; + } + } if (e->ops->elevator_merge_fn) return e->ops->elevator_merge_fn(q, req, bio); @@ -278,6 +281,8 @@ void elv_merged_request(request_queue_t *q, struct request *rq) if (e->ops->elevator_merged_fn) e->ops->elevator_merged_fn(q, rq); + + q->last_merge = rq; } void elv_merge_requests(request_queue_t *q, struct request *rq, @@ -285,11 +290,10 @@ void elv_merge_requests(request_queue_t *q, struct request *rq, { elevator_t *e = q->elevator; - if (q->last_merge == next) - q->last_merge = NULL; - if (e->ops->elevator_merge_req_fn) e->ops->elevator_merge_req_fn(q, rq, next); + + q->last_merge = rq; } void elv_requeue_request(request_queue_t *q, struct request *rq) @@ -384,6 +388,8 @@ void __elv_add_request(request_queue_t *q, struct request *rq, int where, BUG_ON(!blk_fs_request(rq)); rq->flags |= REQ_SORTED; q->elevator->ops->elevator_add_req_fn(q, rq); + if (q->last_merge == NULL && rq_mergeable(rq)) + q->last_merge = rq; break; default: @@ -462,9 +468,6 @@ struct request *elv_next_request(request_queue_t *q) rq->flags |= REQ_STARTED; } - if (rq == q->last_merge) - q->last_merge = NULL; - if (!q->boundary_rq || q->boundary_rq == rq) { q->end_sector = rq_end_sector(rq); q->boundary_rq = NULL; @@ -518,16 +521,6 @@ void elv_dequeue_request(request_queue_t *q, struct request *rq) */ if (blk_account_rq(rq)) q->in_flight++; - - /* - * the main clearing point for q->last_merge is on retrieval of - * request by driver (it calls elv_next_request()), but it _can_ - * also happen here if a request is added to the queue but later - * deleted without ever being given to driver (merged with another - * request). - */ - if (rq == q->last_merge) - q->last_merge = NULL; } int elv_queue_empty(request_queue_t *q) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index a2e496f285a4..bd55b1df8997 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -630,6 +630,9 @@ static inline void blkdev_dequeue_request(struct request *req) static inline void elv_dispatch_add_tail(struct request_queue *q, struct request *rq) { + if (q->last_merge == rq) + q->last_merge = NULL; + q->end_sector = rq_end_sector(rq); q->boundary_rq = rq; list_add_tail(&rq->queuelist, &q->queue_head); -- cgit v1.2.3 From cb19833dccb32f97cacbfff834b53523915f13f6 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 24 Oct 2005 08:35:58 +0200 Subject: [BLOCK] kill generic max_back_kb handling This patch kills max_back_kb handling from elv_dispatch_sort() and kills max_back_kb field from struct request_queue. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- drivers/block/elevator.c | 6 +----- include/linux/blkdev.h | 1 - 2 files changed, 1 insertion(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/elevator.c b/drivers/block/elevator.c index 415144372c75..af2388e73f61 100644 --- a/drivers/block/elevator.c +++ b/drivers/block/elevator.c @@ -136,7 +136,6 @@ static int elevator_attach(request_queue_t *q, struct elevator_type *e, q->elevator = eq; q->end_sector = 0; q->boundary_rq = NULL; - q->max_back_kb = 0; if (eq->ops->elevator_init_fn) ret = eq->ops->elevator_init_fn(q, eq); @@ -227,16 +226,13 @@ void elevator_exit(elevator_t *e) void elv_dispatch_sort(request_queue_t *q, struct request *rq) { sector_t boundary; - unsigned max_back; struct list_head *entry; if (q->last_merge == rq) q->last_merge = NULL; boundary = q->end_sector; - max_back = q->max_back_kb * 2; - boundary = boundary > max_back ? boundary - max_back : 0; - + list_for_each_prev(entry, &q->queue_head) { struct request *pos = list_entry_rq(entry); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index bd55b1df8997..159dbcd2eb59 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -339,7 +339,6 @@ struct request_queue */ sector_t end_sector; struct request *boundary_rq; - unsigned int max_back_kb; /* * Auto-unplugging state -- cgit v1.2.3 From cb98fc8bb9c141009e2bda99c0db39d387e142cf Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 28 Oct 2005 08:29:39 +0200 Subject: [BLOCK] Reimplement elevator switch This patch reimplements elevator switch. This patch assumes generic dispatch queue patchset is applied. * Each request is tagged with REQ_ELVPRIV flag if it has its elevator private data set. * Requests which doesn't have REQ_ELVPRIV flag set never enter iosched. They are always directly back inserted to dispatch queue. Of course, elevator_put_req_fn is called only for requests which have its REQ_ELVPRIV set. * Request queue maintains the current number of requests which have its elevator data set (elevator_set_req_fn called) in q->rq->elvpriv. * If a request queue has QUEUE_FLAG_BYPASS set, elevator private data is not allocated for new requests. To switch to another iosched, we set QUEUE_FLAG_BYPASS and wait until elvpriv goes to zero; then, we attach the new iosched and clears QUEUE_FLAG_BYPASS. New implementation is much simpler and main code paths are less cluttered, IMHO. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- drivers/block/elevator.c | 78 ++++++++++++------------- drivers/block/ll_rw_blk.c | 142 +++++++++------------------------------------- include/linux/blkdev.h | 10 ++-- 3 files changed, 66 insertions(+), 164 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/elevator.c b/drivers/block/elevator.c index af2388e73f61..272d93946621 100644 --- a/drivers/block/elevator.c +++ b/drivers/block/elevator.c @@ -34,6 +34,7 @@ #include #include #include +#include #include @@ -131,11 +132,7 @@ static int elevator_attach(request_queue_t *q, struct elevator_type *e, eq->ops = &e->ops; eq->elevator_type = e; - INIT_LIST_HEAD(&q->queue_head); - q->last_merge = NULL; q->elevator = eq; - q->end_sector = 0; - q->boundary_rq = NULL; if (eq->ops->elevator_init_fn) ret = eq->ops->elevator_init_fn(q, eq); @@ -184,6 +181,12 @@ int elevator_init(request_queue_t *q, char *name) struct elevator_queue *eq; int ret = 0; + INIT_LIST_HEAD(&q->queue_head); + q->last_merge = NULL; + q->end_sector = 0; + q->boundary_rq = NULL; + q->max_back_kb = 0; + elevator_setup_default(); if (!name) @@ -336,23 +339,14 @@ void __elv_add_request(request_queue_t *q, struct request *rq, int where, q->end_sector = rq_end_sector(rq); q->boundary_rq = rq; } - } + } else if (!(rq->flags & REQ_ELVPRIV) && where == ELEVATOR_INSERT_SORT) + where = ELEVATOR_INSERT_BACK; if (plug) blk_plug_device(q); rq->q = q; - if (unlikely(test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags))) { - /* - * if drain is set, store the request "locally". when the drain - * is finished, the requests will be handed ordered to the io - * scheduler - */ - list_add_tail(&rq->queuelist, &q->drain_list); - return; - } - switch (where) { case ELEVATOR_INSERT_FRONT: rq->flags |= REQ_SOFTBARRIER; @@ -659,25 +653,36 @@ EXPORT_SYMBOL_GPL(elv_unregister); * switch to new_e io scheduler. be careful not to introduce deadlocks - * we don't free the old io scheduler, before we have allocated what we * need for the new one. this way we have a chance of going back to the old - * one, if the new one fails init for some reason. we also do an intermediate - * switch to noop to ensure safety with stack-allocated requests, since they - * don't originate from the block layer allocator. noop is safe here, because - * it never needs to touch the elevator itself for completion events. DRAIN - * flags will make sure we don't touch it for additions either. + * one, if the new one fails init for some reason. */ static void elevator_switch(request_queue_t *q, struct elevator_type *new_e) { - elevator_t *e = kmalloc(sizeof(elevator_t), GFP_KERNEL); - struct elevator_type *noop_elevator = NULL; - elevator_t *old_elevator; + elevator_t *old_elevator, *e; + /* + * Allocate new elevator + */ + e = kmalloc(sizeof(elevator_t), GFP_KERNEL); if (!e) goto error; /* - * first step, drain requests from the block freelist + * Turn on BYPASS and drain all requests w/ elevator private data */ - blk_wait_queue_drained(q, 0); + spin_lock_irq(q->queue_lock); + + set_bit(QUEUE_FLAG_BYPASS, &q->queue_flags); + + while (q->elevator->ops->elevator_dispatch_fn(q, 1)) + ; + + while (q->rq.elvpriv) { + spin_unlock_irq(q->queue_lock); + msleep(100); + spin_lock_irq(q->queue_lock); + } + + spin_unlock_irq(q->queue_lock); /* * unregister old elevator data @@ -685,18 +690,6 @@ static void elevator_switch(request_queue_t *q, struct elevator_type *new_e) elv_unregister_queue(q); old_elevator = q->elevator; - /* - * next step, switch to noop since it uses no private rq structures - * and doesn't allocate any memory for anything. then wait for any - * non-fs requests in-flight - */ - noop_elevator = elevator_get("noop"); - spin_lock_irq(q->queue_lock); - elevator_attach(q, noop_elevator, e); - spin_unlock_irq(q->queue_lock); - - blk_wait_queue_drained(q, 1); - /* * attach and start new elevator */ @@ -707,11 +700,10 @@ static void elevator_switch(request_queue_t *q, struct elevator_type *new_e) goto fail_register; /* - * finally exit old elevator and start queue again + * finally exit old elevator and turn off BYPASS. */ elevator_exit(old_elevator); - blk_finish_queue_drain(q); - elevator_put(noop_elevator); + clear_bit(QUEUE_FLAG_BYPASS, &q->queue_flags); return; fail_register: @@ -720,13 +712,13 @@ fail_register: * one again (along with re-adding the sysfs dir) */ elevator_exit(e); + e = NULL; fail: q->elevator = old_elevator; elv_register_queue(q); - blk_finish_queue_drain(q); + clear_bit(QUEUE_FLAG_BYPASS, &q->queue_flags); + kfree(e); error: - if (noop_elevator) - elevator_put(noop_elevator); elevator_put(new_e); printk(KERN_ERR "elevator: switch to %s failed\n",new_e->elevator_name); } diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c index d2a66fd309c3..f7c9931cb380 100644 --- a/drivers/block/ll_rw_blk.c +++ b/drivers/block/ll_rw_blk.c @@ -263,8 +263,6 @@ void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn) blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH); blk_queue_activity_fn(q, NULL, NULL); - - INIT_LIST_HEAD(&q->drain_list); } EXPORT_SYMBOL(blk_queue_make_request); @@ -1050,6 +1048,7 @@ static char *rq_flags[] = { "REQ_STARTED", "REQ_DONTPREP", "REQ_QUEUED", + "REQ_ELVPRIV", "REQ_PC", "REQ_BLOCK_PC", "REQ_SENSE", @@ -1640,9 +1639,9 @@ static int blk_init_free_list(request_queue_t *q) rl->count[READ] = rl->count[WRITE] = 0; rl->starved[READ] = rl->starved[WRITE] = 0; + rl->elvpriv = 0; init_waitqueue_head(&rl->wait[READ]); init_waitqueue_head(&rl->wait[WRITE]); - init_waitqueue_head(&rl->drain); rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab, mempool_free_slab, request_cachep, q->node); @@ -1785,12 +1784,14 @@ EXPORT_SYMBOL(blk_get_queue); static inline void blk_free_request(request_queue_t *q, struct request *rq) { - elv_put_request(q, rq); + if (rq->flags & REQ_ELVPRIV) + elv_put_request(q, rq); mempool_free(rq, q->rq.rq_pool); } static inline struct request * -blk_alloc_request(request_queue_t *q, int rw, struct bio *bio, int gfp_mask) +blk_alloc_request(request_queue_t *q, int rw, struct bio *bio, + int priv, int gfp_mask) { struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask); @@ -1803,11 +1804,15 @@ blk_alloc_request(request_queue_t *q, int rw, struct bio *bio, int gfp_mask) */ rq->flags = rw; - if (!elv_set_request(q, rq, bio, gfp_mask)) - return rq; + if (priv) { + if (unlikely(elv_set_request(q, rq, bio, gfp_mask))) { + mempool_free(rq, q->rq.rq_pool); + return NULL; + } + rq->flags |= REQ_ELVPRIV; + } - mempool_free(rq, q->rq.rq_pool); - return NULL; + return rq; } /* @@ -1863,22 +1868,18 @@ static void __freed_request(request_queue_t *q, int rw) * A request has just been released. Account for it, update the full and * congestion status, wake up any waiters. Called under q->queue_lock. */ -static void freed_request(request_queue_t *q, int rw) +static void freed_request(request_queue_t *q, int rw, int priv) { struct request_list *rl = &q->rq; rl->count[rw]--; + if (priv) + rl->elvpriv--; __freed_request(q, rw); if (unlikely(rl->starved[rw ^ 1])) __freed_request(q, rw ^ 1); - - if (!rl->count[READ] && !rl->count[WRITE]) { - smp_mb(); - if (unlikely(waitqueue_active(&rl->drain))) - wake_up(&rl->drain); - } } #define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist) @@ -1893,9 +1894,7 @@ static struct request *get_request(request_queue_t *q, int rw, struct bio *bio, struct request *rq = NULL; struct request_list *rl = &q->rq; struct io_context *ioc = current_io_context(GFP_ATOMIC); - - if (unlikely(test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags))) - goto out; + int priv; if (rl->count[rw]+1 >= q->nr_requests) { /* @@ -1940,9 +1939,14 @@ get_rq: rl->starved[rw] = 0; if (rl->count[rw] >= queue_congestion_on_threshold(q)) set_queue_congested(q, rw); + + priv = !test_bit(QUEUE_FLAG_BYPASS, &q->queue_flags); + if (priv) + rl->elvpriv++; + spin_unlock_irq(q->queue_lock); - rq = blk_alloc_request(q, rw, bio, gfp_mask); + rq = blk_alloc_request(q, rw, bio, priv, gfp_mask); if (!rq) { /* * Allocation failed presumably due to memory. Undo anything @@ -1952,7 +1956,7 @@ get_rq: * wait queue, but this is pretty rare. */ spin_lock_irq(q->queue_lock); - freed_request(q, rw); + freed_request(q, rw, priv); /* * in the very unlikely event that allocation failed and no @@ -2470,11 +2474,12 @@ static void __blk_put_request(request_queue_t *q, struct request *req) */ if (rl) { int rw = rq_data_dir(req); + int priv = req->flags & REQ_ELVPRIV; BUG_ON(!list_empty(&req->queuelist)); blk_free_request(q, req); - freed_request(q, rw); + freed_request(q, rw, priv); } } @@ -2802,97 +2807,6 @@ static inline void blk_partition_remap(struct bio *bio) } } -void blk_finish_queue_drain(request_queue_t *q) -{ - struct request_list *rl = &q->rq; - struct request *rq; - int requeued = 0; - - spin_lock_irq(q->queue_lock); - clear_bit(QUEUE_FLAG_DRAIN, &q->queue_flags); - - while (!list_empty(&q->drain_list)) { - rq = list_entry_rq(q->drain_list.next); - - list_del_init(&rq->queuelist); - elv_requeue_request(q, rq); - requeued++; - } - - if (requeued) - q->request_fn(q); - - spin_unlock_irq(q->queue_lock); - - wake_up(&rl->wait[0]); - wake_up(&rl->wait[1]); - wake_up(&rl->drain); -} - -static int wait_drain(request_queue_t *q, struct request_list *rl, int dispatch) -{ - int wait = rl->count[READ] + rl->count[WRITE]; - - if (dispatch) - wait += !list_empty(&q->queue_head); - - return wait; -} - -/* - * We rely on the fact that only requests allocated through blk_alloc_request() - * have io scheduler private data structures associated with them. Any other - * type of request (allocated on stack or through kmalloc()) should not go - * to the io scheduler core, but be attached to the queue head instead. - */ -void blk_wait_queue_drained(request_queue_t *q, int wait_dispatch) -{ - struct request_list *rl = &q->rq; - DEFINE_WAIT(wait); - - spin_lock_irq(q->queue_lock); - set_bit(QUEUE_FLAG_DRAIN, &q->queue_flags); - - while (wait_drain(q, rl, wait_dispatch)) { - prepare_to_wait(&rl->drain, &wait, TASK_UNINTERRUPTIBLE); - - if (wait_drain(q, rl, wait_dispatch)) { - __generic_unplug_device(q); - spin_unlock_irq(q->queue_lock); - io_schedule(); - spin_lock_irq(q->queue_lock); - } - - finish_wait(&rl->drain, &wait); - } - - spin_unlock_irq(q->queue_lock); -} - -/* - * block waiting for the io scheduler being started again. - */ -static inline void block_wait_queue_running(request_queue_t *q) -{ - DEFINE_WAIT(wait); - - while (unlikely(test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags))) { - struct request_list *rl = &q->rq; - - prepare_to_wait_exclusive(&rl->drain, &wait, - TASK_UNINTERRUPTIBLE); - - /* - * re-check the condition. avoids using prepare_to_wait() - * in the fast path (queue is running) - */ - if (test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags)) - io_schedule(); - - finish_wait(&rl->drain, &wait); - } -} - static void handle_bad_sector(struct bio *bio) { char b[BDEVNAME_SIZE]; @@ -2988,8 +2902,6 @@ end_io: if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) goto end_io; - block_wait_queue_running(q); - /* * If this device has partitions, remap block n * of partition p to block n+start(p) of the disk. diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 159dbcd2eb59..6186d5e2110f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -107,9 +107,9 @@ typedef void (rq_end_io_fn)(struct request *); struct request_list { int count[2]; int starved[2]; + int elvpriv; mempool_t *rq_pool; wait_queue_head_t wait[2]; - wait_queue_head_t drain; }; #define BLK_MAX_CDB 16 @@ -211,6 +211,7 @@ enum rq_flag_bits { __REQ_STARTED, /* drive already may have started this one */ __REQ_DONTPREP, /* don't call prep for this one */ __REQ_QUEUED, /* uses queueing */ + __REQ_ELVPRIV, /* elevator private data attached */ /* * for ATA/ATAPI devices */ @@ -244,6 +245,7 @@ enum rq_flag_bits { #define REQ_STARTED (1 << __REQ_STARTED) #define REQ_DONTPREP (1 << __REQ_DONTPREP) #define REQ_QUEUED (1 << __REQ_QUEUED) +#define REQ_ELVPRIV (1 << __REQ_ELVPRIV) #define REQ_PC (1 << __REQ_PC) #define REQ_BLOCK_PC (1 << __REQ_BLOCK_PC) #define REQ_SENSE (1 << __REQ_SENSE) @@ -413,8 +415,6 @@ struct request_queue unsigned int sg_reserved_size; int node; - struct list_head drain_list; - /* * reserved for flush operations */ @@ -442,7 +442,7 @@ enum { #define QUEUE_FLAG_DEAD 5 /* queue being torn down */ #define QUEUE_FLAG_REENTER 6 /* Re-entrancy avoidance */ #define QUEUE_FLAG_PLUGGED 7 /* queue is plugged */ -#define QUEUE_FLAG_DRAIN 8 /* draining queue for sched switch */ +#define QUEUE_FLAG_BYPASS 8 /* don't use elevator, just do FIFO */ #define QUEUE_FLAG_FLUSH 9 /* doing barrier flush sequence */ #define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags) @@ -668,8 +668,6 @@ extern void blk_dump_rq_flags(struct request *, char *); extern void generic_unplug_device(request_queue_t *); extern void __generic_unplug_device(request_queue_t *); extern long nr_blockdev_pages(void); -extern void blk_wait_queue_drained(request_queue_t *, int); -extern void blk_finish_queue_drain(request_queue_t *); int blk_get_queue(request_queue_t *); request_queue_t *blk_alloc_queue(int gfp_mask); -- cgit v1.2.3 From 64521d1a3ba7cc7fe4ab5640da83d88144f05340 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 28 Oct 2005 08:30:39 +0200 Subject: [BLOCK] elevator switch fixes/cleanup - 100msec sleep is a little excessive, lots of requests can complete in that timeframe. Use 10msec instead. - Rename QUEUE_FLAG_BYPASS to QUEUE_FLAG_ELVSWITCH to indicate what is going on. Signed-off-by: Jens Axboe --- drivers/block/elevator.c | 8 ++++---- drivers/block/ll_rw_blk.c | 2 +- include/linux/blkdev.h | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/elevator.c b/drivers/block/elevator.c index 272d93946621..cf9bedbb761b 100644 --- a/drivers/block/elevator.c +++ b/drivers/block/elevator.c @@ -671,14 +671,14 @@ static void elevator_switch(request_queue_t *q, struct elevator_type *new_e) */ spin_lock_irq(q->queue_lock); - set_bit(QUEUE_FLAG_BYPASS, &q->queue_flags); + set_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); while (q->elevator->ops->elevator_dispatch_fn(q, 1)) ; while (q->rq.elvpriv) { spin_unlock_irq(q->queue_lock); - msleep(100); + msleep(10); spin_lock_irq(q->queue_lock); } @@ -703,7 +703,7 @@ static void elevator_switch(request_queue_t *q, struct elevator_type *new_e) * finally exit old elevator and turn off BYPASS. */ elevator_exit(old_elevator); - clear_bit(QUEUE_FLAG_BYPASS, &q->queue_flags); + clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); return; fail_register: @@ -716,7 +716,7 @@ fail_register: fail: q->elevator = old_elevator; elv_register_queue(q); - clear_bit(QUEUE_FLAG_BYPASS, &q->queue_flags); + clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); kfree(e); error: elevator_put(new_e); diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c index f7c9931cb380..fc12d019b19d 100644 --- a/drivers/block/ll_rw_blk.c +++ b/drivers/block/ll_rw_blk.c @@ -1940,7 +1940,7 @@ get_rq: if (rl->count[rw] >= queue_congestion_on_threshold(q)) set_queue_congested(q, rw); - priv = !test_bit(QUEUE_FLAG_BYPASS, &q->queue_flags); + priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); if (priv) rl->elvpriv++; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 6186d5e2110f..7d9d14d6d8f9 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -442,7 +442,7 @@ enum { #define QUEUE_FLAG_DEAD 5 /* queue being torn down */ #define QUEUE_FLAG_REENTER 6 /* Re-entrancy avoidance */ #define QUEUE_FLAG_PLUGGED 7 /* queue is plugged */ -#define QUEUE_FLAG_BYPASS 8 /* don't use elevator, just do FIFO */ +#define QUEUE_FLAG_ELVSWITCH 8 /* don't use elevator, just do FIFO */ #define QUEUE_FLAG_FLUSH 9 /* doing barrier flush sequence */ #define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags) -- cgit v1.2.3 From af4ca457eaf2d6682059c18463eb106e2ce58198 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 21 Oct 2005 02:55:38 -0400 Subject: [PATCH] gfp_t: infrastructure Beginning of gfp_t annotations: - -Wbitwise added to CHECKFLAGS - old __bitwise renamed to __bitwise__ - __bitwise defined to either __bitwise__ or nothing, depending on __CHECK_ENDIAN__ being defined - gfp_t switched from __nocast to __bitwise__ - force cast to gfp_t added to __GFP_... constants - new helper - gfp_zone(); extracts zone bits out of gfp_t value and casts the result to int Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- Makefile | 2 +- fs/buffer.c | 2 +- include/linux/gfp.h | 39 ++++++++++++++++++++------------------- include/linux/types.h | 9 +++++++-- mm/mempolicy.c | 6 +++--- mm/page_alloc.c | 4 ++-- 6 files changed, 34 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/Makefile b/Makefile index 1fa7e5343464..f1d121f23025 100644 --- a/Makefile +++ b/Makefile @@ -334,7 +334,7 @@ KALLSYMS = scripts/kallsyms PERL = perl CHECK = sparse -CHECKFLAGS := -D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ $(CF) +CHECKFLAGS := -D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ -Wbitwise $(CF) MODFLAGS = -DMODULE CFLAGS_MODULE = $(MODFLAGS) AFLAGS_MODULE = $(MODFLAGS) diff --git a/fs/buffer.c b/fs/buffer.c index 1216c0d3c8ce..9657696fd6d7 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -502,7 +502,7 @@ static void free_more_memory(void) yield(); for_each_pgdat(pgdat) { - zones = pgdat->node_zonelists[GFP_NOFS&GFP_ZONEMASK].zones; + zones = pgdat->node_zonelists[gfp_zone(GFP_NOFS)].zones; if (*zones) try_to_free_pages(zones, GFP_NOFS); } diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 3010e172394d..c3779432a723 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -12,8 +12,8 @@ struct vm_area_struct; * GFP bitmasks.. */ /* Zone modifiers in GFP_ZONEMASK (see linux/mmzone.h - low two bits) */ -#define __GFP_DMA 0x01u -#define __GFP_HIGHMEM 0x02u +#define __GFP_DMA ((__force gfp_t)0x01u) +#define __GFP_HIGHMEM ((__force gfp_t)0x02u) /* * Action modifiers - doesn't change the zoning @@ -26,24 +26,24 @@ struct vm_area_struct; * * __GFP_NORETRY: The VM implementation must not retry indefinitely. */ -#define __GFP_WAIT 0x10u /* Can wait and reschedule? */ -#define __GFP_HIGH 0x20u /* Should access emergency pools? */ -#define __GFP_IO 0x40u /* Can start physical IO? */ -#define __GFP_FS 0x80u /* Can call down to low-level FS? */ -#define __GFP_COLD 0x100u /* Cache-cold page required */ -#define __GFP_NOWARN 0x200u /* Suppress page allocation failure warning */ -#define __GFP_REPEAT 0x400u /* Retry the allocation. Might fail */ -#define __GFP_NOFAIL 0x800u /* Retry for ever. Cannot fail */ -#define __GFP_NORETRY 0x1000u /* Do not retry. Might fail */ -#define __GFP_NO_GROW 0x2000u /* Slab internal usage */ -#define __GFP_COMP 0x4000u /* Add compound page metadata */ -#define __GFP_ZERO 0x8000u /* Return zeroed page on success */ -#define __GFP_NOMEMALLOC 0x10000u /* Don't use emergency reserves */ -#define __GFP_NORECLAIM 0x20000u /* No realy zone reclaim during allocation */ -#define __GFP_HARDWALL 0x40000u /* Enforce hardwall cpuset memory allocs */ +#define __GFP_WAIT ((__force gfp_t)0x10u) /* Can wait and reschedule? */ +#define __GFP_HIGH ((__force gfp_t)0x20u) /* Should access emergency pools? */ +#define __GFP_IO ((__force gfp_t)0x40u) /* Can start physical IO? */ +#define __GFP_FS ((__force gfp_t)0x80u) /* Can call down to low-level FS? */ +#define __GFP_COLD ((__force gfp_t)0x100u) /* Cache-cold page required */ +#define __GFP_NOWARN ((__force gfp_t)0x200u) /* Suppress page allocation failure warning */ +#define __GFP_REPEAT ((__force gfp_t)0x400u) /* Retry the allocation. Might fail */ +#define __GFP_NOFAIL ((__force gfp_t)0x800u) /* Retry for ever. Cannot fail */ +#define __GFP_NORETRY ((__force gfp_t)0x1000u)/* Do not retry. Might fail */ +#define __GFP_NO_GROW ((__force gfp_t)0x2000u)/* Slab internal usage */ +#define __GFP_COMP ((__force gfp_t)0x4000u)/* Add compound page metadata */ +#define __GFP_ZERO ((__force gfp_t)0x8000u)/* Return zeroed page on success */ +#define __GFP_NOMEMALLOC ((__force gfp_t)0x10000u) /* Don't use emergency reserves */ +#define __GFP_NORECLAIM ((__force gfp_t)0x20000u) /* No realy zone reclaim during allocation */ +#define __GFP_HARDWALL ((__force gfp_t)0x40000u) /* Enforce hardwall cpuset memory allocs */ #define __GFP_BITS_SHIFT 20 /* Room for 20 __GFP_FOO bits */ -#define __GFP_BITS_MASK ((1 << __GFP_BITS_SHIFT) - 1) +#define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) /* if you forget to add the bitmask here kernel will crash, period */ #define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \ @@ -64,6 +64,7 @@ struct vm_area_struct; #define GFP_DMA __GFP_DMA +#define gfp_zone(mask) ((__force int)((mask) & (__force gfp_t)GFP_ZONEMASK)) /* * There is only one page-allocator function, and two main namespaces to @@ -94,7 +95,7 @@ static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask, return NULL; return __alloc_pages(gfp_mask, order, - NODE_DATA(nid)->node_zonelists + (gfp_mask & GFP_ZONEMASK)); + NODE_DATA(nid)->node_zonelists + gfp_zone(gfp_mask)); } #ifdef CONFIG_NUMA diff --git a/include/linux/types.h b/include/linux/types.h index 0aee34f9da9f..21b9ce803644 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -151,7 +151,12 @@ typedef unsigned long sector_t; */ #ifdef __CHECKER__ -#define __bitwise __attribute__((bitwise)) +#define __bitwise__ __attribute__((bitwise)) +#else +#define __bitwise__ +#endif +#ifdef __CHECK_ENDIAN__ +#define __bitwise __bitwise__ #else #define __bitwise #endif @@ -166,7 +171,7 @@ typedef __u64 __bitwise __be64; #endif #ifdef __KERNEL__ -typedef unsigned __nocast gfp_t; +typedef unsigned __bitwise__ gfp_t; #endif struct ustat { diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 37af443eb094..1d5c64df1653 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -700,7 +700,7 @@ static struct zonelist *zonelist_policy(gfp_t gfp, struct mempolicy *policy) case MPOL_BIND: /* Lower zones don't get a policy applied */ /* Careful: current->mems_allowed might have moved */ - if ((gfp & GFP_ZONEMASK) >= policy_zone) + if (gfp_zone(gfp) >= policy_zone) if (cpuset_zonelist_valid_mems_allowed(policy->v.zonelist)) return policy->v.zonelist; /*FALL THROUGH*/ @@ -712,7 +712,7 @@ static struct zonelist *zonelist_policy(gfp_t gfp, struct mempolicy *policy) nd = 0; BUG(); } - return NODE_DATA(nd)->node_zonelists + (gfp & GFP_ZONEMASK); + return NODE_DATA(nd)->node_zonelists + gfp_zone(gfp); } /* Do dynamic interleaving for a process */ @@ -757,7 +757,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order, unsigned ni struct page *page; BUG_ON(!node_online(nid)); - zl = NODE_DATA(nid)->node_zonelists + (gfp & GFP_ZONEMASK); + zl = NODE_DATA(nid)->node_zonelists + gfp_zone(gfp); page = __alloc_pages(gfp, order, zl); if (page && page_zone(page) == zl->zones[0]) { zone_pcp(zl->zones[0],get_cpu())->interleave_hit++; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index e1d3d77f4aee..aa43ae3ab8c9 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1089,7 +1089,7 @@ static unsigned int nr_free_zone_pages(int offset) */ unsigned int nr_free_buffer_pages(void) { - return nr_free_zone_pages(GFP_USER & GFP_ZONEMASK); + return nr_free_zone_pages(gfp_zone(GFP_USER)); } /* @@ -1097,7 +1097,7 @@ unsigned int nr_free_buffer_pages(void) */ unsigned int nr_free_pagecache_pages(void) { - return nr_free_zone_pages(GFP_HIGHUSER & GFP_ZONEMASK); + return nr_free_zone_pages(gfp_zone(GFP_HIGHUSER)); } #ifdef CONFIG_HIGHMEM -- cgit v1.2.3 From 6daa0e28627abf362138244a620a821a9027d816 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 21 Oct 2005 03:18:50 -0400 Subject: [PATCH] gfp_t: mm/* (easy parts) Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 +- include/linux/pagemap.h | 6 +++--- include/linux/slab.h | 2 +- include/linux/swap.h | 4 ++-- mm/filemap.c | 8 ++++---- mm/mempool.c | 2 +- mm/shmem.c | 4 ++-- mm/slab.c | 8 ++++---- mm/vmscan.c | 8 ++++---- 9 files changed, 22 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 097b3a3c693d..e1649578fb0c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -747,7 +747,7 @@ extern unsigned long do_mremap(unsigned long addr, * The callback will be passed nr_to_scan == 0 when the VM is querying the * cache size, so a fastpath for that case is appropriate. */ -typedef int (*shrinker_t)(int nr_to_scan, unsigned int gfp_mask); +typedef int (*shrinker_t)(int nr_to_scan, gfp_t gfp_mask); /* * Add an aging callback. The int is the number of 'seeks' it takes diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index acbf31c154f8..efbae53fb078 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -69,7 +69,7 @@ extern struct page * find_lock_page(struct address_space *mapping, extern struct page * find_trylock_page(struct address_space *mapping, unsigned long index); extern struct page * find_or_create_page(struct address_space *mapping, - unsigned long index, unsigned int gfp_mask); + unsigned long index, gfp_t gfp_mask); unsigned find_get_pages(struct address_space *mapping, pgoff_t start, unsigned int nr_pages, struct page **pages); unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index, @@ -92,9 +92,9 @@ extern int read_cache_pages(struct address_space *mapping, struct list_head *pages, filler_t *filler, void *data); int add_to_page_cache(struct page *page, struct address_space *mapping, - unsigned long index, int gfp_mask); + unsigned long index, gfp_t gfp_mask); int add_to_page_cache_lru(struct page *page, struct address_space *mapping, - unsigned long index, int gfp_mask); + unsigned long index, gfp_t gfp_mask); extern void remove_from_page_cache(struct page *page); extern void __remove_from_page_cache(struct page *page); diff --git a/include/linux/slab.h b/include/linux/slab.h index 5fc04a16ecb0..09b9aa60063d 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -121,7 +121,7 @@ extern unsigned int ksize(const void *); extern void *kmem_cache_alloc_node(kmem_cache_t *, gfp_t flags, int node); extern void *kmalloc_node(size_t size, gfp_t flags, int node); #else -static inline void *kmem_cache_alloc_node(kmem_cache_t *cachep, int flags, int node) +static inline void *kmem_cache_alloc_node(kmem_cache_t *cachep, gfp_t flags, int node) { return kmem_cache_alloc(cachep, flags); } diff --git a/include/linux/swap.h b/include/linux/swap.h index a7bf1a3b1496..20c975642cab 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -171,8 +171,8 @@ extern int rotate_reclaimable_page(struct page *page); extern void swap_setup(void); /* linux/mm/vmscan.c */ -extern int try_to_free_pages(struct zone **, unsigned int); -extern int zone_reclaim(struct zone *, unsigned int, unsigned int); +extern int try_to_free_pages(struct zone **, gfp_t); +extern int zone_reclaim(struct zone *, gfp_t, unsigned int); extern int shrink_all_memory(int); extern int vm_swappiness; diff --git a/mm/filemap.c b/mm/filemap.c index b5346576e58d..1c31b2fd2ca5 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -377,7 +377,7 @@ int filemap_write_and_wait_range(struct address_space *mapping, * This function does not add the page to the LRU. The caller must do that. */ int add_to_page_cache(struct page *page, struct address_space *mapping, - pgoff_t offset, int gfp_mask) + pgoff_t offset, gfp_t gfp_mask) { int error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM); @@ -401,7 +401,7 @@ int add_to_page_cache(struct page *page, struct address_space *mapping, EXPORT_SYMBOL(add_to_page_cache); int add_to_page_cache_lru(struct page *page, struct address_space *mapping, - pgoff_t offset, int gfp_mask) + pgoff_t offset, gfp_t gfp_mask) { int ret = add_to_page_cache(page, mapping, offset, gfp_mask); if (ret == 0) @@ -591,7 +591,7 @@ EXPORT_SYMBOL(find_lock_page); * memory exhaustion. */ struct page *find_or_create_page(struct address_space *mapping, - unsigned long index, unsigned int gfp_mask) + unsigned long index, gfp_t gfp_mask) { struct page *page, *cached_page = NULL; int err; @@ -683,7 +683,7 @@ struct page * grab_cache_page_nowait(struct address_space *mapping, unsigned long index) { struct page *page = find_get_page(mapping, index); - unsigned int gfp_mask; + gfp_t gfp_mask; if (page) { if (!TestSetPageLocked(page)) diff --git a/mm/mempool.c b/mm/mempool.c index 9e377ea700b2..1a99b80480d3 100644 --- a/mm/mempool.c +++ b/mm/mempool.c @@ -205,7 +205,7 @@ void * mempool_alloc(mempool_t *pool, gfp_t gfp_mask) void *element; unsigned long flags; wait_queue_t wait; - unsigned int gfp_temp; + gfp_t gfp_temp; might_sleep_if(gfp_mask & __GFP_WAIT); diff --git a/mm/shmem.c b/mm/shmem.c index ea064d89cda9..55e04a0734c1 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -85,7 +85,7 @@ enum sgp_type { static int shmem_getpage(struct inode *inode, unsigned long idx, struct page **pagep, enum sgp_type sgp, int *type); -static inline struct page *shmem_dir_alloc(unsigned int gfp_mask) +static inline struct page *shmem_dir_alloc(gfp_t gfp_mask) { /* * The above definition of ENTRIES_PER_PAGE, and the use of @@ -898,7 +898,7 @@ struct page *shmem_swapin(struct shmem_inode_info *info, swp_entry_t entry, } static struct page * -shmem_alloc_page(unsigned long gfp, struct shmem_inode_info *info, +shmem_alloc_page(gfp_t gfp, struct shmem_inode_info *info, unsigned long idx) { struct vm_area_struct pvma; diff --git a/mm/slab.c b/mm/slab.c index d05c678bceb3..d30423f167a2 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -386,7 +386,7 @@ struct kmem_cache_s { unsigned int gfporder; /* force GFP flags, e.g. GFP_DMA */ - unsigned int gfpflags; + gfp_t gfpflags; size_t colour; /* cache colouring range */ unsigned int colour_off; /* colour offset */ @@ -2117,7 +2117,7 @@ static void cache_init_objs(kmem_cache_t *cachep, slabp->free = 0; } -static void kmem_flagcheck(kmem_cache_t *cachep, unsigned int flags) +static void kmem_flagcheck(kmem_cache_t *cachep, gfp_t flags) { if (flags & SLAB_DMA) { if (!(cachep->gfpflags & GFP_DMA)) @@ -2152,7 +2152,7 @@ static int cache_grow(kmem_cache_t *cachep, gfp_t flags, int nodeid) struct slab *slabp; void *objp; size_t offset; - unsigned int local_flags; + gfp_t local_flags; unsigned long ctor_flags; struct kmem_list3 *l3; @@ -2546,7 +2546,7 @@ static inline void *__cache_alloc(kmem_cache_t *cachep, gfp_t flags) /* * A interface to enable slab creation on nodeid */ -static void *__cache_alloc_node(kmem_cache_t *cachep, int flags, int nodeid) +static void *__cache_alloc_node(kmem_cache_t *cachep, gfp_t flags, int nodeid) { struct list_head *entry; struct slab *slabp; diff --git a/mm/vmscan.c b/mm/vmscan.c index 64f9570cff56..843c87d1e61f 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -70,7 +70,7 @@ struct scan_control { unsigned int priority; /* This context's GFP mask */ - unsigned int gfp_mask; + gfp_t gfp_mask; int may_writepage; @@ -186,7 +186,7 @@ EXPORT_SYMBOL(remove_shrinker); * * Returns the number of slab objects which we shrunk. */ -static int shrink_slab(unsigned long scanned, unsigned int gfp_mask, +static int shrink_slab(unsigned long scanned, gfp_t gfp_mask, unsigned long lru_pages) { struct shrinker *shrinker; @@ -926,7 +926,7 @@ shrink_caches(struct zone **zones, struct scan_control *sc) * holds filesystem locks which prevent writeout this might not work, and the * allocation attempt will fail. */ -int try_to_free_pages(struct zone **zones, unsigned int gfp_mask) +int try_to_free_pages(struct zone **zones, gfp_t gfp_mask) { int priority; int ret = 0; @@ -1338,7 +1338,7 @@ module_init(kswapd_init) /* * Try to free up some pages from this zone through reclaim. */ -int zone_reclaim(struct zone *zone, unsigned int gfp_mask, unsigned int order) +int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) { struct scan_control sc; int nr_pages = 1 << order; -- cgit v1.2.3 From fd4f2df24bc23e6b8fc069765b425c7dacf52347 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 21 Oct 2005 03:18:50 -0400 Subject: [PATCH] gfp_t: lib/* Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- include/linux/idr.h | 2 +- include/linux/kobject.h | 2 +- include/linux/radix-tree.h | 2 +- include/linux/textsearch.h | 4 ++-- lib/idr.c | 2 +- lib/kobject.c | 2 +- lib/kobject_uevent.c | 4 ++-- lib/textsearch.c | 2 +- 8 files changed, 10 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/idr.h b/include/linux/idr.h index 3d5de45f961b..7fb3ff9c7b0e 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -71,7 +71,7 @@ struct idr { */ void *idr_find(struct idr *idp, int id); -int idr_pre_get(struct idr *idp, unsigned gfp_mask); +int idr_pre_get(struct idr *idp, gfp_t gfp_mask); int idr_get_new(struct idr *idp, void *ptr, int *id); int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id); void idr_remove(struct idr *idp, int id); diff --git a/include/linux/kobject.h b/include/linux/kobject.h index 3b22304f12fd..7f7403aa4a41 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -65,7 +65,7 @@ extern void kobject_unregister(struct kobject *); extern struct kobject * kobject_get(struct kobject *); extern void kobject_put(struct kobject *); -extern char * kobject_get_path(struct kobject *, int); +extern char * kobject_get_path(struct kobject *, gfp_t); struct kobj_type { void (*release)(struct kobject *); diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 045d4761febc..9f0f9281f42a 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -24,7 +24,7 @@ struct radix_tree_root { unsigned int height; - unsigned int gfp_mask; + gfp_t gfp_mask; struct radix_tree_node *rnode; }; diff --git a/include/linux/textsearch.h b/include/linux/textsearch.h index 515046d1b2f4..fc5bb4e91a58 100644 --- a/include/linux/textsearch.h +++ b/include/linux/textsearch.h @@ -40,7 +40,7 @@ struct ts_state struct ts_ops { const char *name; - struct ts_config * (*init)(const void *, unsigned int, int); + struct ts_config * (*init)(const void *, unsigned int, gfp_t); unsigned int (*find)(struct ts_config *, struct ts_state *); void (*destroy)(struct ts_config *); @@ -148,7 +148,7 @@ static inline unsigned int textsearch_get_pattern_len(struct ts_config *conf) extern int textsearch_register(struct ts_ops *); extern int textsearch_unregister(struct ts_ops *); extern struct ts_config *textsearch_prepare(const char *, const void *, - unsigned int, int, int); + unsigned int, gfp_t, int); extern void textsearch_destroy(struct ts_config *conf); extern unsigned int textsearch_find_continuous(struct ts_config *, struct ts_state *, diff --git a/lib/idr.c b/lib/idr.c index d4df21debc4d..6414b2fb482d 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -72,7 +72,7 @@ static void free_layer(struct idr *idp, struct idr_layer *p) * If the system is REALLY out of memory this function returns 0, * otherwise 1. */ -int idr_pre_get(struct idr *idp, unsigned gfp_mask) +int idr_pre_get(struct idr *idp, gfp_t gfp_mask) { while (idp->id_free_cnt < IDR_FREE_MAX) { struct idr_layer *new; diff --git a/lib/kobject.c b/lib/kobject.c index dd0917dd9fa9..253d3004ace9 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -100,7 +100,7 @@ static void fill_kobj_path(struct kobject *kobj, char *path, int length) * @kobj: kobject in question, with which to build the path * @gfp_mask: the allocation type used to allocate the path */ -char *kobject_get_path(struct kobject *kobj, int gfp_mask) +char *kobject_get_path(struct kobject *kobj, gfp_t gfp_mask) { char *path; int len; diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index 04ca4429ddfa..7ef6f6a17aa6 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -62,7 +62,7 @@ static struct sock *uevent_sock; * @gfp_mask: */ static int send_uevent(const char *signal, const char *obj, - char **envp, int gfp_mask) + char **envp, gfp_t gfp_mask) { struct sk_buff *skb; char *pos; @@ -98,7 +98,7 @@ static int send_uevent(const char *signal, const char *obj, } static int do_kobject_uevent(struct kobject *kobj, enum kobject_action action, - struct attribute *attr, int gfp_mask) + struct attribute *attr, gfp_t gfp_mask) { char *path; char *attrpath; diff --git a/lib/textsearch.c b/lib/textsearch.c index 1e934c196f0f..6f3093efbd7b 100644 --- a/lib/textsearch.c +++ b/lib/textsearch.c @@ -254,7 +254,7 @@ unsigned int textsearch_find_continuous(struct ts_config *conf, * parameters or a ERR_PTR(). */ struct ts_config *textsearch_prepare(const char *algo, const void *pattern, - unsigned int len, int gfp_mask, int flags) + unsigned int len, gfp_t gfp_mask, int flags) { int err = -ENOENT; struct ts_config *conf; -- cgit v1.2.3 From 7d877f3bda870ab5f001bd92528654471d5966b3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 21 Oct 2005 03:20:43 -0400 Subject: [PATCH] gfp_t: net/* Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- include/linux/security.h | 2 +- include/net/sock.h | 2 +- net/core/sock.c | 2 +- net/dccp/output.c | 2 +- net/netlink/af_netlink.c | 2 +- security/dummy.c | 2 +- security/selinux/hooks.c | 4 ++-- 7 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index 627382e74057..dac956ed98f0 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1210,7 +1210,7 @@ struct security_operations { int (*socket_shutdown) (struct socket * sock, int how); int (*socket_sock_rcv_skb) (struct sock * sk, struct sk_buff * skb); int (*socket_getpeersec) (struct socket *sock, char __user *optval, int __user *optlen, unsigned len); - int (*sk_alloc_security) (struct sock *sk, int family, int priority); + int (*sk_alloc_security) (struct sock *sk, int family, gfp_t priority); void (*sk_free_security) (struct sock *sk); #endif /* CONFIG_SECURITY_NETWORK */ }; diff --git a/include/net/sock.h b/include/net/sock.h index ecb75526cba0..e0498bd36004 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -207,7 +207,7 @@ struct sock { struct sk_buff_head sk_write_queue; int sk_wmem_queued; int sk_forward_alloc; - unsigned int sk_allocation; + gfp_t sk_allocation; int sk_sndbuf; int sk_route_caps; unsigned long sk_flags; diff --git a/net/core/sock.c b/net/core/sock.c index 1c52fe809eda..9602ceb3bac9 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -940,7 +940,7 @@ static struct sk_buff *sock_alloc_send_pskb(struct sock *sk, int noblock, int *errcode) { struct sk_buff *skb; - unsigned int gfp_mask; + gfp_t gfp_mask; long timeo; int err; diff --git a/net/dccp/output.c b/net/dccp/output.c index 29250749f16f..d59f86f7ceab 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -495,7 +495,7 @@ void dccp_send_close(struct sock *sk, const int active) { struct dccp_sock *dp = dccp_sk(sk); struct sk_buff *skb; - const unsigned int prio = active ? GFP_KERNEL : GFP_ATOMIC; + const gfp_t prio = active ? GFP_KERNEL : GFP_ATOMIC; skb = alloc_skb(sk->sk_prot->max_header, prio); if (skb == NULL) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 678c3f2c0d0b..291df2e4c492 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -827,7 +827,7 @@ struct netlink_broadcast_data { int failure; int congested; int delivered; - unsigned int allocation; + gfp_t allocation; struct sk_buff *skb, *skb2; }; diff --git a/security/dummy.c b/security/dummy.c index 9623a61dfc76..3d34f3de7e82 100644 --- a/security/dummy.c +++ b/security/dummy.c @@ -768,7 +768,7 @@ static int dummy_socket_getpeersec(struct socket *sock, char __user *optval, return -ENOPROTOOPT; } -static inline int dummy_sk_alloc_security (struct sock *sk, int family, int priority) +static inline int dummy_sk_alloc_security (struct sock *sk, int family, gfp_t priority) { return 0; } diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index b13be15165f5..447a1e0f48cb 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -262,7 +262,7 @@ static void superblock_free_security(struct super_block *sb) } #ifdef CONFIG_SECURITY_NETWORK -static int sk_alloc_security(struct sock *sk, int family, int priority) +static int sk_alloc_security(struct sock *sk, int family, gfp_t priority) { struct sk_security_struct *ssec; @@ -3380,7 +3380,7 @@ out: return err; } -static int selinux_sk_alloc_security(struct sock *sk, int family, int priority) +static int selinux_sk_alloc_security(struct sock *sk, int family, gfp_t priority) { return sk_alloc_security(sk, family, priority); } -- cgit v1.2.3 From 27496a8c67bef4d789d8e3c8317ca35813a507ae Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 21 Oct 2005 03:20:48 -0400 Subject: [PATCH] gfp_t: fs/* - ->releasepage() annotated (s/int/gfp_t), instances updated - missing gfp_t in fs/* added - fixed misannotation from the original sweep caught by bitwise checks: XFS used __nocast both for gfp_t and for flags used by XFS allocator. The latter left with unsigned int __nocast; we might want to add a different type for those but for now let's leave them alone. That, BTW, is a case when __nocast use had been actively confusing - it had been used in the same code for two different and similar types, with no way to catch misuses. Switch of gfp_t to bitwise had caught that immediately... One tricky bit is left alone to be dealt with later - mapping->flags is a mix of gfp_t and error indications. Left alone for now. Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- fs/afs/file.c | 4 ++-- fs/bio.c | 4 ++-- fs/buffer.c | 2 +- fs/dcache.c | 2 +- fs/dquot.c | 2 +- fs/ext3/inode.c | 2 +- fs/hfs/inode.c | 2 +- fs/hfsplus/inode.c | 2 +- fs/inode.c | 2 +- fs/jbd/journal.c | 2 +- fs/jbd/transaction.c | 2 +- fs/jfs/jfs_metapage.c | 4 ++-- fs/mbcache.c | 6 +++--- fs/reiserfs/fix_node.c | 2 +- fs/reiserfs/inode.c | 2 +- fs/xfs/linux-2.6/kmem.c | 22 +++++++++++----------- fs/xfs/linux-2.6/kmem.h | 18 +++++++++--------- fs/xfs/linux-2.6/xfs_aops.c | 2 +- fs/xfs/linux-2.6/xfs_buf.c | 8 ++++---- include/linux/bio.h | 2 +- include/linux/buffer_head.h | 2 +- include/linux/fs.h | 2 +- include/linux/jbd.h | 4 ++-- include/linux/mbcache.h | 2 +- include/linux/reiserfs_fs.h | 2 +- 25 files changed, 52 insertions(+), 52 deletions(-) (limited to 'include/linux') diff --git a/fs/afs/file.c b/fs/afs/file.c index 23c125128024..0d576987ec67 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -29,7 +29,7 @@ static int afs_file_release(struct inode *inode, struct file *file); static int afs_file_readpage(struct file *file, struct page *page); static int afs_file_invalidatepage(struct page *page, unsigned long offset); -static int afs_file_releasepage(struct page *page, int gfp_flags); +static int afs_file_releasepage(struct page *page, gfp_t gfp_flags); static ssize_t afs_file_write(struct file *file, const char __user *buf, size_t size, loff_t *off); @@ -279,7 +279,7 @@ static int afs_file_invalidatepage(struct page *page, unsigned long offset) /* * release a page and cleanup its private data */ -static int afs_file_releasepage(struct page *page, int gfp_flags) +static int afs_file_releasepage(struct page *page, gfp_t gfp_flags) { struct cachefs_page *pageio; diff --git a/fs/bio.c b/fs/bio.c index 7d81a93afd48..460554b07ff9 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -778,7 +778,7 @@ static int bio_map_kern_endio(struct bio *bio, unsigned int bytes_done, int err) static struct bio *__bio_map_kern(request_queue_t *q, void *data, - unsigned int len, unsigned int gfp_mask) + unsigned int len, gfp_t gfp_mask) { unsigned long kaddr = (unsigned long)data; unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; @@ -825,7 +825,7 @@ static struct bio *__bio_map_kern(request_queue_t *q, void *data, * device. Returns an error pointer in case of error. */ struct bio *bio_map_kern(request_queue_t *q, void *data, unsigned int len, - unsigned int gfp_mask) + gfp_t gfp_mask) { struct bio *bio; diff --git a/fs/buffer.c b/fs/buffer.c index 9657696fd6d7..b1667986442f 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1571,7 +1571,7 @@ static inline void discard_buffer(struct buffer_head * bh) * * NOTE: @gfp_mask may go away, and this function may become non-blocking. */ -int try_to_release_page(struct page *page, int gfp_mask) +int try_to_release_page(struct page *page, gfp_t gfp_mask) { struct address_space * const mapping = page->mapping; diff --git a/fs/dcache.c b/fs/dcache.c index fb10386c59be..e90512ed35a4 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -689,7 +689,7 @@ void shrink_dcache_anon(struct hlist_head *head) * * In this case we return -1 to tell the caller that we baled. */ -static int shrink_dcache_memory(int nr, unsigned int gfp_mask) +static int shrink_dcache_memory(int nr, gfp_t gfp_mask) { if (nr) { if (!(gfp_mask & __GFP_FS)) diff --git a/fs/dquot.c b/fs/dquot.c index b9732335bcdc..05f3327d64a3 100644 --- a/fs/dquot.c +++ b/fs/dquot.c @@ -500,7 +500,7 @@ static void prune_dqcache(int count) * more memory */ -static int shrink_dqcache_memory(int nr, unsigned int gfp_mask) +static int shrink_dqcache_memory(int nr, gfp_t gfp_mask) { if (nr) { spin_lock(&dq_list_lock); diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index b5177c90d6f1..8b38f2232796 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -1434,7 +1434,7 @@ static int ext3_invalidatepage(struct page *page, unsigned long offset) return journal_invalidatepage(journal, page, offset); } -static int ext3_releasepage(struct page *page, int wait) +static int ext3_releasepage(struct page *page, gfp_t wait) { journal_t *journal = EXT3_JOURNAL(page->mapping->host); diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index f1570b9f9de3..3f680c5675bf 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -46,7 +46,7 @@ static sector_t hfs_bmap(struct address_space *mapping, sector_t block) return generic_block_bmap(mapping, block, hfs_get_block); } -static int hfs_releasepage(struct page *page, int mask) +static int hfs_releasepage(struct page *page, gfp_t mask) { struct inode *inode = page->mapping->host; struct super_block *sb = inode->i_sb; diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index d5642705f633..f205773ddfbe 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -40,7 +40,7 @@ static sector_t hfsplus_bmap(struct address_space *mapping, sector_t block) return generic_block_bmap(mapping, block, hfsplus_get_block); } -static int hfsplus_releasepage(struct page *page, int mask) +static int hfsplus_releasepage(struct page *page, gfp_t mask) { struct inode *inode = page->mapping->host; struct super_block *sb = inode->i_sb; diff --git a/fs/inode.c b/fs/inode.c index f80a79ff156b..7d3316527767 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -475,7 +475,7 @@ static void prune_icache(int nr_to_scan) * This function is passed the number of inodes to scan, and it returns the * total number of remaining possibly-reclaimable inodes. */ -static int shrink_icache_memory(int nr, unsigned int gfp_mask) +static int shrink_icache_memory(int nr, gfp_t gfp_mask) { if (nr) { /* diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index 7ae2c4fe506b..e4b516ac4989 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -1606,7 +1606,7 @@ int journal_blocks_per_page(struct inode *inode) * Simple support for retrying memory allocations. Introduced to help to * debug different VM deadlock avoidance strategies. */ -void * __jbd_kmalloc (const char *where, size_t size, int flags, int retry) +void * __jbd_kmalloc (const char *where, size_t size, gfp_t flags, int retry) { return kmalloc(size, flags | (retry ? __GFP_NOFAIL : 0)); } diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index 49bbc2be3d72..13cb05bf6048 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c @@ -1621,7 +1621,7 @@ out: * while the data is part of a transaction. Yes? */ int journal_try_to_free_buffers(journal_t *journal, - struct page *page, int unused_gfp_mask) + struct page *page, gfp_t unused_gfp_mask) { struct buffer_head *head; struct buffer_head *bh; diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c index 13d7e3f1feb4..eeb37d70e650 100644 --- a/fs/jfs/jfs_metapage.c +++ b/fs/jfs/jfs_metapage.c @@ -198,7 +198,7 @@ static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags) } } -static inline struct metapage *alloc_metapage(unsigned int gfp_mask) +static inline struct metapage *alloc_metapage(gfp_t gfp_mask) { return mempool_alloc(metapage_mempool, gfp_mask); } @@ -534,7 +534,7 @@ add_failed: return -EIO; } -static int metapage_releasepage(struct page *page, int gfp_mask) +static int metapage_releasepage(struct page *page, gfp_t gfp_mask) { struct metapage *mp; int busy = 0; diff --git a/fs/mbcache.c b/fs/mbcache.c index b002a088857d..298997f17475 100644 --- a/fs/mbcache.c +++ b/fs/mbcache.c @@ -116,7 +116,7 @@ mb_cache_indexes(struct mb_cache *cache) * What the mbcache registers as to get shrunk dynamically. */ -static int mb_cache_shrink_fn(int nr_to_scan, unsigned int gfp_mask); +static int mb_cache_shrink_fn(int nr_to_scan, gfp_t gfp_mask); static inline int @@ -140,7 +140,7 @@ __mb_cache_entry_unhash(struct mb_cache_entry *ce) static inline void -__mb_cache_entry_forget(struct mb_cache_entry *ce, int gfp_mask) +__mb_cache_entry_forget(struct mb_cache_entry *ce, gfp_t gfp_mask) { struct mb_cache *cache = ce->e_cache; @@ -193,7 +193,7 @@ forget: * Returns the number of objects which are present in the cache. */ static int -mb_cache_shrink_fn(int nr_to_scan, unsigned int gfp_mask) +mb_cache_shrink_fn(int nr_to_scan, gfp_t gfp_mask) { LIST_HEAD(free_list); struct list_head *l, *ltmp; diff --git a/fs/reiserfs/fix_node.c b/fs/reiserfs/fix_node.c index 2706e2adffab..45829889dcdc 100644 --- a/fs/reiserfs/fix_node.c +++ b/fs/reiserfs/fix_node.c @@ -2022,7 +2022,7 @@ static int get_neighbors(struct tree_balance *p_s_tb, int n_h) } #ifdef CONFIG_REISERFS_CHECK -void *reiserfs_kmalloc(size_t size, int flags, struct super_block *s) +void *reiserfs_kmalloc(size_t size, gfp_t flags, struct super_block *s) { void *vp; static size_t malloced; diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index d76ee6c4f9b8..5f82352b97e1 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -2842,7 +2842,7 @@ static int reiserfs_set_page_dirty(struct page *page) * even in -o notail mode, we can't be sure an old mount without -o notail * didn't create files with tails. */ -static int reiserfs_releasepage(struct page *page, int unused_gfp_flags) +static int reiserfs_releasepage(struct page *page, gfp_t unused_gfp_flags) { struct inode *inode = page->mapping->host; struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb); diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c index d2653b589b1c..3c92162dc728 100644 --- a/fs/xfs/linux-2.6/kmem.c +++ b/fs/xfs/linux-2.6/kmem.c @@ -45,11 +45,11 @@ void * -kmem_alloc(size_t size, gfp_t flags) +kmem_alloc(size_t size, unsigned int __nocast flags) { - int retries = 0; - unsigned int lflags = kmem_flags_convert(flags); - void *ptr; + int retries = 0; + gfp_t lflags = kmem_flags_convert(flags); + void *ptr; do { if (size < MAX_SLAB_SIZE || retries > MAX_VMALLOCS) @@ -67,7 +67,7 @@ kmem_alloc(size_t size, gfp_t flags) } void * -kmem_zalloc(size_t size, gfp_t flags) +kmem_zalloc(size_t size, unsigned int __nocast flags) { void *ptr; @@ -90,7 +90,7 @@ kmem_free(void *ptr, size_t size) void * kmem_realloc(void *ptr, size_t newsize, size_t oldsize, - gfp_t flags) + unsigned int __nocast flags) { void *new; @@ -105,11 +105,11 @@ kmem_realloc(void *ptr, size_t newsize, size_t oldsize, } void * -kmem_zone_alloc(kmem_zone_t *zone, gfp_t flags) +kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags) { - int retries = 0; - unsigned int lflags = kmem_flags_convert(flags); - void *ptr; + int retries = 0; + gfp_t lflags = kmem_flags_convert(flags); + void *ptr; do { ptr = kmem_cache_alloc(zone, lflags); @@ -124,7 +124,7 @@ kmem_zone_alloc(kmem_zone_t *zone, gfp_t flags) } void * -kmem_zone_zalloc(kmem_zone_t *zone, gfp_t flags) +kmem_zone_zalloc(kmem_zone_t *zone, unsigned int __nocast flags) { void *ptr; diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h index ee7010f085bc..f4bb78c268c0 100644 --- a/fs/xfs/linux-2.6/kmem.h +++ b/fs/xfs/linux-2.6/kmem.h @@ -81,9 +81,9 @@ typedef unsigned long xfs_pflags_t; *(NSTATEP) = *(OSTATEP); \ } while (0) -static __inline unsigned int kmem_flags_convert(gfp_t flags) +static __inline gfp_t kmem_flags_convert(unsigned int __nocast flags) { - unsigned int lflags = __GFP_NOWARN; /* we'll report problems, if need be */ + gfp_t lflags = __GFP_NOWARN; /* we'll report problems, if need be */ #ifdef DEBUG if (unlikely(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL))) { @@ -125,16 +125,16 @@ kmem_zone_destroy(kmem_zone_t *zone) BUG(); } -extern void *kmem_zone_zalloc(kmem_zone_t *, gfp_t); -extern void *kmem_zone_alloc(kmem_zone_t *, gfp_t); +extern void *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast); +extern void *kmem_zone_alloc(kmem_zone_t *, unsigned int __nocast); -extern void *kmem_alloc(size_t, gfp_t); -extern void *kmem_realloc(void *, size_t, size_t, gfp_t); -extern void *kmem_zalloc(size_t, gfp_t); +extern void *kmem_alloc(size_t, unsigned int __nocast); +extern void *kmem_realloc(void *, size_t, size_t, unsigned int __nocast); +extern void *kmem_zalloc(size_t, unsigned int __nocast); extern void kmem_free(void *, size_t); typedef struct shrinker *kmem_shaker_t; -typedef int (*kmem_shake_func_t)(int, unsigned int); +typedef int (*kmem_shake_func_t)(int, gfp_t); static __inline kmem_shaker_t kmem_shake_register(kmem_shake_func_t sfunc) @@ -149,7 +149,7 @@ kmem_shake_deregister(kmem_shaker_t shrinker) } static __inline int -kmem_shake_allow(unsigned int gfp_mask) +kmem_shake_allow(gfp_t gfp_mask) { return (gfp_mask & __GFP_WAIT); } diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index c6c077978fe3..7aa398724706 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -1296,7 +1296,7 @@ linvfs_invalidate_page( STATIC int linvfs_release_page( struct page *page, - int gfp_mask) + gfp_t gfp_mask) { struct inode *inode = page->mapping->host; int dirty, delalloc, unmapped, unwritten; diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index e82cf72ac599..ba4767c04adf 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -64,7 +64,7 @@ STATIC kmem_cache_t *pagebuf_zone; STATIC kmem_shaker_t pagebuf_shake; -STATIC int xfsbufd_wakeup(int, unsigned int); +STATIC int xfsbufd_wakeup(int, gfp_t); STATIC void pagebuf_delwri_queue(xfs_buf_t *, int); STATIC struct workqueue_struct *xfslogd_workqueue; @@ -383,7 +383,7 @@ _pagebuf_lookup_pages( size_t blocksize = bp->pb_target->pbr_bsize; size_t size = bp->pb_count_desired; size_t nbytes, offset; - int gfp_mask = pb_to_gfp(flags); + gfp_t gfp_mask = pb_to_gfp(flags); unsigned short page_count, i; pgoff_t first; loff_t end; @@ -1749,8 +1749,8 @@ STATIC int xfsbufd_force_sleep; STATIC int xfsbufd_wakeup( - int priority, - unsigned int mask) + int priority, + gfp_t mask) { if (xfsbufd_force_sleep) return 0; diff --git a/include/linux/bio.h b/include/linux/bio.h index 3344b4e8e43a..685fd3720df5 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -301,7 +301,7 @@ extern struct bio *bio_map_user_iov(struct request_queue *, struct sg_iovec *, int, int); extern void bio_unmap_user(struct bio *); extern struct bio *bio_map_kern(struct request_queue *, void *, unsigned int, - unsigned int); + gfp_t); extern void bio_set_pages_dirty(struct bio *bio); extern void bio_check_pages_dirty(struct bio *bio); extern struct bio *bio_copy_user(struct request_queue *, unsigned long, unsigned int, int); diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 6a1d154c0825..88af42f5e04a 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -188,7 +188,7 @@ extern int buffer_heads_over_limit; * Generic address_space_operations implementations for buffer_head-backed * address_spaces. */ -int try_to_release_page(struct page * page, int gfp_mask); +int try_to_release_page(struct page * page, gfp_t gfp_mask); int block_invalidatepage(struct page *page, unsigned long offset); int block_write_full_page(struct page *page, get_block_t *get_block, struct writeback_control *wbc); diff --git a/include/linux/fs.h b/include/linux/fs.h index e0b77c5af9a0..f83d997c5582 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -320,7 +320,7 @@ struct address_space_operations { /* Unfortunately this kludge is needed for FIBMAP. Don't use it */ sector_t (*bmap)(struct address_space *, sector_t); int (*invalidatepage) (struct page *, unsigned long); - int (*releasepage) (struct page *, int); + int (*releasepage) (struct page *, gfp_t); ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov, loff_t offset, unsigned long nr_segs); struct page* (*get_xip_page)(struct address_space *, sector_t, diff --git a/include/linux/jbd.h b/include/linux/jbd.h index ff853b3173c6..be197eb90077 100644 --- a/include/linux/jbd.h +++ b/include/linux/jbd.h @@ -69,7 +69,7 @@ extern int journal_enable_debug; #define jbd_debug(f, a...) /**/ #endif -extern void * __jbd_kmalloc (const char *where, size_t size, int flags, int retry); +extern void * __jbd_kmalloc (const char *where, size_t size, gfp_t flags, int retry); #define jbd_kmalloc(size, flags) \ __jbd_kmalloc(__FUNCTION__, (size), (flags), journal_oom_retry) #define jbd_rep_kmalloc(size, flags) \ @@ -890,7 +890,7 @@ extern int journal_forget (handle_t *, struct buffer_head *); extern void journal_sync_buffer (struct buffer_head *); extern int journal_invalidatepage(journal_t *, struct page *, unsigned long); -extern int journal_try_to_free_buffers(journal_t *, struct page *, int); +extern int journal_try_to_free_buffers(journal_t *, struct page *, gfp_t); extern int journal_stop(handle_t *); extern int journal_flush (journal_t *); extern void journal_lock_updates (journal_t *); diff --git a/include/linux/mbcache.h b/include/linux/mbcache.h index 9263d2db2d67..99e044b4efc6 100644 --- a/include/linux/mbcache.h +++ b/include/linux/mbcache.h @@ -22,7 +22,7 @@ struct mb_cache_entry { }; struct mb_cache_op { - int (*free)(struct mb_cache_entry *, int); + int (*free)(struct mb_cache_entry *, gfp_t); }; /* Functions on caches */ diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h index af00b10294cd..001ab82df051 100644 --- a/include/linux/reiserfs_fs.h +++ b/include/linux/reiserfs_fs.h @@ -1972,7 +1972,7 @@ extern struct address_space_operations reiserfs_address_space_operations; /* fix_nodes.c */ #ifdef CONFIG_REISERFS_CHECK -void *reiserfs_kmalloc(size_t size, int flags, struct super_block *s); +void *reiserfs_kmalloc(size_t size, gfp_t flags, struct super_block *s); void reiserfs_kfree(const void *vp, size_t size, struct super_block *s); #else static inline void *reiserfs_kmalloc(size_t size, int flags, -- cgit v1.2.3 From 8267e268e0914ac9371d07f711fcf20cc572993c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 21 Oct 2005 03:20:53 -0400 Subject: [PATCH] gfp_t: block layer core Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- drivers/block/as-iosched.c | 2 +- drivers/block/cfq-iosched.c | 8 ++++---- drivers/block/deadline-iosched.c | 2 +- drivers/block/elevator.c | 2 +- drivers/block/ll_rw_blk.c | 16 ++++++++-------- include/linux/blkdev.h | 14 +++++++------- include/linux/elevator.h | 4 ++-- 7 files changed, 24 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/as-iosched.c b/drivers/block/as-iosched.c index 95c0a3690b0f..1f08e14697e9 100644 --- a/drivers/block/as-iosched.c +++ b/drivers/block/as-iosched.c @@ -1807,7 +1807,7 @@ static void as_put_request(request_queue_t *q, struct request *rq) } static int as_set_request(request_queue_t *q, struct request *rq, - struct bio *bio, int gfp_mask) + struct bio *bio, gfp_t gfp_mask) { struct as_data *ad = q->elevator->elevator_data; struct as_rq *arq = mempool_alloc(ad->arq_pool, gfp_mask); diff --git a/drivers/block/cfq-iosched.c b/drivers/block/cfq-iosched.c index cd056e7e64ec..d3bfe8cfb039 100644 --- a/drivers/block/cfq-iosched.c +++ b/drivers/block/cfq-iosched.c @@ -1422,7 +1422,7 @@ static void cfq_exit_io_context(struct cfq_io_context *cic) } static struct cfq_io_context * -cfq_alloc_io_context(struct cfq_data *cfqd, int gfp_mask) +cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) { struct cfq_io_context *cic = kmem_cache_alloc(cfq_ioc_pool, gfp_mask); @@ -1517,7 +1517,7 @@ static int cfq_ioc_set_ioprio(struct io_context *ioc, unsigned int ioprio) static struct cfq_queue * cfq_get_queue(struct cfq_data *cfqd, unsigned int key, unsigned short ioprio, - int gfp_mask) + gfp_t gfp_mask) { const int hashval = hash_long(key, CFQ_QHASH_SHIFT); struct cfq_queue *cfqq, *new_cfqq = NULL; @@ -1578,7 +1578,7 @@ out: * cfqq, so we don't need to worry about it disappearing */ static struct cfq_io_context * -cfq_get_io_context(struct cfq_data *cfqd, pid_t pid, int gfp_mask) +cfq_get_io_context(struct cfq_data *cfqd, pid_t pid, gfp_t gfp_mask) { struct io_context *ioc = NULL; struct cfq_io_context *cic; @@ -2075,7 +2075,7 @@ static void cfq_put_request(request_queue_t *q, struct request *rq) */ static int cfq_set_request(request_queue_t *q, struct request *rq, struct bio *bio, - int gfp_mask) + gfp_t gfp_mask) { struct cfq_data *cfqd = q->elevator->elevator_data; struct task_struct *tsk = current; diff --git a/drivers/block/deadline-iosched.c b/drivers/block/deadline-iosched.c index 52a3ae5289a0..753546ba2262 100644 --- a/drivers/block/deadline-iosched.c +++ b/drivers/block/deadline-iosched.c @@ -756,7 +756,7 @@ static void deadline_put_request(request_queue_t *q, struct request *rq) static int deadline_set_request(request_queue_t *q, struct request *rq, struct bio *bio, - int gfp_mask) + gfp_t gfp_mask) { struct deadline_data *dd = q->elevator->elevator_data; struct deadline_rq *drq; diff --git a/drivers/block/elevator.c b/drivers/block/elevator.c index 98f0126a2deb..c744d2a13062 100644 --- a/drivers/block/elevator.c +++ b/drivers/block/elevator.c @@ -487,7 +487,7 @@ struct request *elv_former_request(request_queue_t *q, struct request *rq) } int elv_set_request(request_queue_t *q, struct request *rq, struct bio *bio, - int gfp_mask) + gfp_t gfp_mask) { elevator_t *e = q->elevator; diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c index baedac522945..0f64ee7d8d27 100644 --- a/drivers/block/ll_rw_blk.c +++ b/drivers/block/ll_rw_blk.c @@ -1652,13 +1652,13 @@ static int blk_init_free_list(request_queue_t *q) static int __make_request(request_queue_t *, struct bio *); -request_queue_t *blk_alloc_queue(int gfp_mask) +request_queue_t *blk_alloc_queue(gfp_t gfp_mask) { return blk_alloc_queue_node(gfp_mask, -1); } EXPORT_SYMBOL(blk_alloc_queue); -request_queue_t *blk_alloc_queue_node(int gfp_mask, int node_id) +request_queue_t *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) { request_queue_t *q; @@ -1787,7 +1787,7 @@ static inline void blk_free_request(request_queue_t *q, struct request *rq) } static inline struct request * -blk_alloc_request(request_queue_t *q, int rw, struct bio *bio, int gfp_mask) +blk_alloc_request(request_queue_t *q, int rw, struct bio *bio, gfp_t gfp_mask) { struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask); @@ -1885,7 +1885,7 @@ static void freed_request(request_queue_t *q, int rw) * Returns !NULL on success, with queue_lock *not held*. */ static struct request *get_request(request_queue_t *q, int rw, struct bio *bio, - int gfp_mask) + gfp_t gfp_mask) { struct request *rq = NULL; struct request_list *rl = &q->rq; @@ -2019,7 +2019,7 @@ static struct request *get_request_wait(request_queue_t *q, int rw, return rq; } -struct request *blk_get_request(request_queue_t *q, int rw, int gfp_mask) +struct request *blk_get_request(request_queue_t *q, int rw, gfp_t gfp_mask) { struct request *rq; @@ -2251,7 +2251,7 @@ EXPORT_SYMBOL(blk_rq_unmap_user); * @gfp_mask: memory allocation flags */ int blk_rq_map_kern(request_queue_t *q, struct request *rq, void *kbuf, - unsigned int len, unsigned int gfp_mask) + unsigned int len, gfp_t gfp_mask) { struct bio *bio; @@ -3393,7 +3393,7 @@ void exit_io_context(void) * but since the current task itself holds a reference, the context can be * used in general code, so long as it stays within `current` context. */ -struct io_context *current_io_context(int gfp_flags) +struct io_context *current_io_context(gfp_t gfp_flags) { struct task_struct *tsk = current; struct io_context *ret; @@ -3424,7 +3424,7 @@ EXPORT_SYMBOL(current_io_context); * * This is always called in the context of the task which submitted the I/O. */ -struct io_context *get_io_context(int gfp_flags) +struct io_context *get_io_context(gfp_t gfp_flags) { struct io_context *ret; ret = current_io_context(gfp_flags); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index efdc9b5bc05c..1afbdb2d752c 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -96,8 +96,8 @@ struct io_context { void put_io_context(struct io_context *ioc); void exit_io_context(void); -struct io_context *current_io_context(int gfp_flags); -struct io_context *get_io_context(int gfp_flags); +struct io_context *current_io_context(gfp_t gfp_flags); +struct io_context *get_io_context(gfp_t gfp_flags); void copy_io_context(struct io_context **pdst, struct io_context **psrc); void swap_io_context(struct io_context **ioc1, struct io_context **ioc2); @@ -354,7 +354,7 @@ struct request_queue * queue needs bounce pages for pages above this limit */ unsigned long bounce_pfn; - unsigned int bounce_gfp; + gfp_t bounce_gfp; /* * various queue flags, see QUEUE_* below @@ -550,7 +550,7 @@ extern void generic_make_request(struct bio *bio); extern void blk_put_request(struct request *); extern void blk_end_sync_rq(struct request *rq); extern void blk_attempt_remerge(request_queue_t *, struct request *); -extern struct request *blk_get_request(request_queue_t *, int, int); +extern struct request *blk_get_request(request_queue_t *, int, gfp_t); extern void blk_insert_request(request_queue_t *, struct request *, int, void *); extern void blk_requeue_request(request_queue_t *, struct request *); extern void blk_plug_device(request_queue_t *); @@ -565,7 +565,7 @@ extern void blk_run_queue(request_queue_t *); extern void blk_queue_activity_fn(request_queue_t *, activity_fn *, void *); extern int blk_rq_map_user(request_queue_t *, struct request *, void __user *, unsigned int); extern int blk_rq_unmap_user(struct bio *, unsigned int); -extern int blk_rq_map_kern(request_queue_t *, struct request *, void *, unsigned int, unsigned int); +extern int blk_rq_map_kern(request_queue_t *, struct request *, void *, unsigned int, gfp_t); extern int blk_rq_map_user_iov(request_queue_t *, struct request *, struct sg_iovec *, int); extern int blk_execute_rq(request_queue_t *, struct gendisk *, struct request *, int); @@ -654,8 +654,8 @@ extern void blk_wait_queue_drained(request_queue_t *, int); extern void blk_finish_queue_drain(request_queue_t *); int blk_get_queue(request_queue_t *); -request_queue_t *blk_alloc_queue(int gfp_mask); -request_queue_t *blk_alloc_queue_node(int,int); +request_queue_t *blk_alloc_queue(gfp_t); +request_queue_t *blk_alloc_queue_node(gfp_t, int); #define blk_put_queue(q) blk_cleanup_queue((q)) /* diff --git a/include/linux/elevator.h b/include/linux/elevator.h index ea6bbc2d7407..ed93125c1db5 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -18,7 +18,7 @@ typedef struct request *(elevator_request_list_fn) (request_queue_t *, struct re typedef void (elevator_completed_req_fn) (request_queue_t *, struct request *); typedef int (elevator_may_queue_fn) (request_queue_t *, int, struct bio *); -typedef int (elevator_set_req_fn) (request_queue_t *, struct request *, struct bio *, int); +typedef int (elevator_set_req_fn) (request_queue_t *, struct request *, struct bio *, gfp_t); typedef void (elevator_put_req_fn) (request_queue_t *, struct request *); typedef void (elevator_deactivate_req_fn) (request_queue_t *, struct request *); @@ -98,7 +98,7 @@ extern int elv_register_queue(request_queue_t *q); extern void elv_unregister_queue(request_queue_t *q); extern int elv_may_queue(request_queue_t *, int, struct bio *); extern void elv_completed_request(request_queue_t *, struct request *); -extern int elv_set_request(request_queue_t *, struct request *, struct bio *, int); +extern int elv_set_request(request_queue_t *, struct request *, struct bio *, gfp_t); extern void elv_put_request(request_queue_t *, struct request *); /* -- cgit v1.2.3 From 55016f10e31bb15b85d8c500f979dfdceb37d548 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 21 Oct 2005 03:21:58 -0400 Subject: [PATCH] gfp_t: drivers/usb Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- drivers/usb/core/buffer.c | 2 +- drivers/usb/core/hcd.c | 2 +- drivers/usb/core/hcd.h | 8 ++++---- drivers/usb/core/message.c | 2 +- drivers/usb/core/urb.c | 4 ++-- drivers/usb/core/usb.c | 2 +- drivers/usb/gadget/dummy_hcd.c | 8 ++++---- drivers/usb/gadget/ether.c | 22 +++++++++++----------- drivers/usb/gadget/goku_udc.c | 6 +++--- drivers/usb/gadget/lh7a40x_udc.c | 12 ++++++------ drivers/usb/gadget/net2280.c | 6 +++--- drivers/usb/gadget/omap_udc.c | 6 +++--- drivers/usb/gadget/pxa2xx_udc.c | 6 +++--- drivers/usb/gadget/serial.c | 16 ++++++++-------- drivers/usb/gadget/zero.c | 8 ++++---- drivers/usb/host/ehci-hcd.c | 2 +- drivers/usb/host/ehci-mem.c | 6 +++--- drivers/usb/host/ehci-q.c | 6 +++--- drivers/usb/host/ehci-sched.c | 14 +++++++------- drivers/usb/host/isp116x-hcd.c | 2 +- drivers/usb/host/ohci-hcd.c | 2 +- drivers/usb/host/ohci-mem.c | 4 ++-- drivers/usb/host/sl811-hcd.c | 2 +- drivers/usb/host/uhci-q.c | 2 +- drivers/usb/misc/uss720.c | 6 +++--- drivers/usb/net/asix.c | 2 +- drivers/usb/net/gl620a.c | 2 +- drivers/usb/net/kaweth.c | 6 +++--- drivers/usb/net/net1080.c | 2 +- drivers/usb/net/rndis_host.c | 2 +- drivers/usb/net/usbnet.c | 2 +- drivers/usb/net/usbnet.h | 2 +- drivers/usb/net/zaurus.c | 2 +- drivers/usb/net/zd1201.c | 2 +- include/linux/usb.h | 8 ++++---- include/linux/usb_gadget.h | 12 ++++++------ sound/usb/usbmidi.c | 2 +- 37 files changed, 100 insertions(+), 100 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/core/buffer.c b/drivers/usb/core/buffer.c index fc15b4acc8af..57e800ac3cee 100644 --- a/drivers/usb/core/buffer.c +++ b/drivers/usb/core/buffer.c @@ -106,7 +106,7 @@ void hcd_buffer_destroy (struct usb_hcd *hcd) void *hcd_buffer_alloc ( struct usb_bus *bus, size_t size, - unsigned mem_flags, + gfp_t mem_flags, dma_addr_t *dma ) { diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index 1017a97a418b..ff19d64041b5 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -1112,7 +1112,7 @@ static void urb_unlink (struct urb *urb) * expects usb_submit_urb() to have sanity checked and conditioned all * inputs in the urb */ -static int hcd_submit_urb (struct urb *urb, unsigned mem_flags) +static int hcd_submit_urb (struct urb *urb, gfp_t mem_flags) { int status; struct usb_hcd *hcd = urb->dev->bus->hcpriv; diff --git a/drivers/usb/core/hcd.h b/drivers/usb/core/hcd.h index ac451fa7e4d2..1f1ed6211af8 100644 --- a/drivers/usb/core/hcd.h +++ b/drivers/usb/core/hcd.h @@ -142,12 +142,12 @@ struct hcd_timeout { /* timeouts we allocate */ struct usb_operations { int (*get_frame_number) (struct usb_device *usb_dev); - int (*submit_urb) (struct urb *urb, unsigned mem_flags); + int (*submit_urb) (struct urb *urb, gfp_t mem_flags); int (*unlink_urb) (struct urb *urb, int status); /* allocate dma-consistent buffer for URB_DMA_NOMAPPING */ void *(*buffer_alloc)(struct usb_bus *bus, size_t size, - unsigned mem_flags, + gfp_t mem_flags, dma_addr_t *dma); void (*buffer_free)(struct usb_bus *bus, size_t size, void *addr, dma_addr_t dma); @@ -200,7 +200,7 @@ struct hc_driver { int (*urb_enqueue) (struct usb_hcd *hcd, struct usb_host_endpoint *ep, struct urb *urb, - unsigned mem_flags); + gfp_t mem_flags); int (*urb_dequeue) (struct usb_hcd *hcd, struct urb *urb); /* hw synch, freeing endpoint resources that urb_dequeue can't */ @@ -247,7 +247,7 @@ int hcd_buffer_create (struct usb_hcd *hcd); void hcd_buffer_destroy (struct usb_hcd *hcd); void *hcd_buffer_alloc (struct usb_bus *bus, size_t size, - unsigned mem_flags, dma_addr_t *dma); + gfp_t mem_flags, dma_addr_t *dma); void hcd_buffer_free (struct usb_bus *bus, size_t size, void *addr, dma_addr_t dma); diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c index f1fb67fe22a8..f9a81e84dbdf 100644 --- a/drivers/usb/core/message.c +++ b/drivers/usb/core/message.c @@ -321,7 +321,7 @@ int usb_sg_init ( struct scatterlist *sg, int nents, size_t length, - unsigned mem_flags + gfp_t mem_flags ) { int i; diff --git a/drivers/usb/core/urb.c b/drivers/usb/core/urb.c index c846fefb7386..b32898e0a27d 100644 --- a/drivers/usb/core/urb.c +++ b/drivers/usb/core/urb.c @@ -60,7 +60,7 @@ void usb_init_urb(struct urb *urb) * * The driver must call usb_free_urb() when it is finished with the urb. */ -struct urb *usb_alloc_urb(int iso_packets, unsigned mem_flags) +struct urb *usb_alloc_urb(int iso_packets, gfp_t mem_flags) { struct urb *urb; @@ -224,7 +224,7 @@ struct urb * usb_get_urb(struct urb *urb) * GFP_NOIO, unless b) or c) apply * */ -int usb_submit_urb(struct urb *urb, unsigned mem_flags) +int usb_submit_urb(struct urb *urb, gfp_t mem_flags) { int pipe, temp, max; struct usb_device *dev; diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c index 7d131509e419..4c57f3f649ed 100644 --- a/drivers/usb/core/usb.c +++ b/drivers/usb/core/usb.c @@ -1147,7 +1147,7 @@ int __usb_get_extra_descriptor(char *buffer, unsigned size, void *usb_buffer_alloc ( struct usb_device *dev, size_t size, - unsigned mem_flags, + gfp_t mem_flags, dma_addr_t *dma ) { diff --git a/drivers/usb/gadget/dummy_hcd.c b/drivers/usb/gadget/dummy_hcd.c index 583db7c38cf1..8d9d8ee89554 100644 --- a/drivers/usb/gadget/dummy_hcd.c +++ b/drivers/usb/gadget/dummy_hcd.c @@ -470,7 +470,7 @@ static int dummy_disable (struct usb_ep *_ep) } static struct usb_request * -dummy_alloc_request (struct usb_ep *_ep, unsigned mem_flags) +dummy_alloc_request (struct usb_ep *_ep, gfp_t mem_flags) { struct dummy_ep *ep; struct dummy_request *req; @@ -507,7 +507,7 @@ dummy_alloc_buffer ( struct usb_ep *_ep, unsigned bytes, dma_addr_t *dma, - unsigned mem_flags + gfp_t mem_flags ) { char *retval; struct dummy_ep *ep; @@ -541,7 +541,7 @@ fifo_complete (struct usb_ep *ep, struct usb_request *req) static int dummy_queue (struct usb_ep *_ep, struct usb_request *_req, - unsigned mem_flags) + gfp_t mem_flags) { struct dummy_ep *ep; struct dummy_request *req; @@ -999,7 +999,7 @@ static int dummy_urb_enqueue ( struct usb_hcd *hcd, struct usb_host_endpoint *ep, struct urb *urb, - unsigned mem_flags + gfp_t mem_flags ) { struct dummy *dum; struct urbp *urbp; diff --git a/drivers/usb/gadget/ether.c b/drivers/usb/gadget/ether.c index 49459e33e952..f1024e804d5c 100644 --- a/drivers/usb/gadget/ether.c +++ b/drivers/usb/gadget/ether.c @@ -945,11 +945,11 @@ config_buf (enum usb_device_speed speed, /*-------------------------------------------------------------------------*/ -static void eth_start (struct eth_dev *dev, unsigned gfp_flags); -static int alloc_requests (struct eth_dev *dev, unsigned n, unsigned gfp_flags); +static void eth_start (struct eth_dev *dev, gfp_t gfp_flags); +static int alloc_requests (struct eth_dev *dev, unsigned n, gfp_t gfp_flags); static int -set_ether_config (struct eth_dev *dev, unsigned gfp_flags) +set_ether_config (struct eth_dev *dev, gfp_t gfp_flags) { int result = 0; struct usb_gadget *gadget = dev->gadget; @@ -1081,7 +1081,7 @@ static void eth_reset_config (struct eth_dev *dev) * that returns config descriptors, and altsetting code. */ static int -eth_set_config (struct eth_dev *dev, unsigned number, unsigned gfp_flags) +eth_set_config (struct eth_dev *dev, unsigned number, gfp_t gfp_flags) { int result = 0; struct usb_gadget *gadget = dev->gadget; @@ -1598,7 +1598,7 @@ static void defer_kevent (struct eth_dev *dev, int flag) static void rx_complete (struct usb_ep *ep, struct usb_request *req); static int -rx_submit (struct eth_dev *dev, struct usb_request *req, unsigned gfp_flags) +rx_submit (struct eth_dev *dev, struct usb_request *req, gfp_t gfp_flags) { struct sk_buff *skb; int retval = -ENOMEM; @@ -1724,7 +1724,7 @@ clean: } static int prealloc (struct list_head *list, struct usb_ep *ep, - unsigned n, unsigned gfp_flags) + unsigned n, gfp_t gfp_flags) { unsigned i; struct usb_request *req; @@ -1763,7 +1763,7 @@ extra: return 0; } -static int alloc_requests (struct eth_dev *dev, unsigned n, unsigned gfp_flags) +static int alloc_requests (struct eth_dev *dev, unsigned n, gfp_t gfp_flags) { int status; @@ -1779,7 +1779,7 @@ fail: return status; } -static void rx_fill (struct eth_dev *dev, unsigned gfp_flags) +static void rx_fill (struct eth_dev *dev, gfp_t gfp_flags) { struct usb_request *req; unsigned long flags; @@ -1962,7 +1962,7 @@ drop: * normally just one notification will be queued. */ -static struct usb_request *eth_req_alloc (struct usb_ep *, unsigned, unsigned); +static struct usb_request *eth_req_alloc (struct usb_ep *, unsigned, gfp_t); static void eth_req_free (struct usb_ep *ep, struct usb_request *req); static void @@ -2024,7 +2024,7 @@ static int rndis_control_ack (struct net_device *net) #endif /* RNDIS */ -static void eth_start (struct eth_dev *dev, unsigned gfp_flags) +static void eth_start (struct eth_dev *dev, gfp_t gfp_flags) { DEBUG (dev, "%s\n", __FUNCTION__); @@ -2092,7 +2092,7 @@ static int eth_stop (struct net_device *net) /*-------------------------------------------------------------------------*/ static struct usb_request * -eth_req_alloc (struct usb_ep *ep, unsigned size, unsigned gfp_flags) +eth_req_alloc (struct usb_ep *ep, unsigned size, gfp_t gfp_flags) { struct usb_request *req; diff --git a/drivers/usb/gadget/goku_udc.c b/drivers/usb/gadget/goku_udc.c index eaab26f4ed37..b0f3cd63e3b9 100644 --- a/drivers/usb/gadget/goku_udc.c +++ b/drivers/usb/gadget/goku_udc.c @@ -269,7 +269,7 @@ static int goku_ep_disable(struct usb_ep *_ep) /*-------------------------------------------------------------------------*/ static struct usb_request * -goku_alloc_request(struct usb_ep *_ep, unsigned gfp_flags) +goku_alloc_request(struct usb_ep *_ep, gfp_t gfp_flags) { struct goku_request *req; @@ -327,7 +327,7 @@ goku_free_request(struct usb_ep *_ep, struct usb_request *_req) */ static void * goku_alloc_buffer(struct usb_ep *_ep, unsigned bytes, - dma_addr_t *dma, unsigned gfp_flags) + dma_addr_t *dma, gfp_t gfp_flags) { void *retval; struct goku_ep *ep; @@ -789,7 +789,7 @@ finished: /*-------------------------------------------------------------------------*/ static int -goku_queue(struct usb_ep *_ep, struct usb_request *_req, unsigned gfp_flags) +goku_queue(struct usb_ep *_ep, struct usb_request *_req, gfp_t gfp_flags) { struct goku_request *req; struct goku_ep *ep; diff --git a/drivers/usb/gadget/lh7a40x_udc.c b/drivers/usb/gadget/lh7a40x_udc.c index 4842577789c9..012d1e5f1524 100644 --- a/drivers/usb/gadget/lh7a40x_udc.c +++ b/drivers/usb/gadget/lh7a40x_udc.c @@ -71,13 +71,13 @@ static char *state_names[] = { static int lh7a40x_ep_enable(struct usb_ep *ep, const struct usb_endpoint_descriptor *); static int lh7a40x_ep_disable(struct usb_ep *ep); -static struct usb_request *lh7a40x_alloc_request(struct usb_ep *ep, int); +static struct usb_request *lh7a40x_alloc_request(struct usb_ep *ep, gfp_t); static void lh7a40x_free_request(struct usb_ep *ep, struct usb_request *); static void *lh7a40x_alloc_buffer(struct usb_ep *ep, unsigned, dma_addr_t *, - int); + gfp_t); static void lh7a40x_free_buffer(struct usb_ep *ep, void *, dma_addr_t, unsigned); -static int lh7a40x_queue(struct usb_ep *ep, struct usb_request *, int); +static int lh7a40x_queue(struct usb_ep *ep, struct usb_request *, gfp_t); static int lh7a40x_dequeue(struct usb_ep *ep, struct usb_request *); static int lh7a40x_set_halt(struct usb_ep *ep, int); static int lh7a40x_fifo_status(struct usb_ep *ep); @@ -1106,7 +1106,7 @@ static int lh7a40x_ep_disable(struct usb_ep *_ep) } static struct usb_request *lh7a40x_alloc_request(struct usb_ep *ep, - unsigned gfp_flags) + gfp_t gfp_flags) { struct lh7a40x_request *req; @@ -1134,7 +1134,7 @@ static void lh7a40x_free_request(struct usb_ep *ep, struct usb_request *_req) } static void *lh7a40x_alloc_buffer(struct usb_ep *ep, unsigned bytes, - dma_addr_t * dma, unsigned gfp_flags) + dma_addr_t * dma, gfp_t gfp_flags) { char *retval; @@ -1158,7 +1158,7 @@ static void lh7a40x_free_buffer(struct usb_ep *ep, void *buf, dma_addr_t dma, * NOTE: Sets INDEX register */ static int lh7a40x_queue(struct usb_ep *_ep, struct usb_request *_req, - unsigned gfp_flags) + gfp_t gfp_flags) { struct lh7a40x_request *req; struct lh7a40x_ep *ep; diff --git a/drivers/usb/gadget/net2280.c b/drivers/usb/gadget/net2280.c index 477fab2e74d1..c32e1f7476da 100644 --- a/drivers/usb/gadget/net2280.c +++ b/drivers/usb/gadget/net2280.c @@ -376,7 +376,7 @@ static int net2280_disable (struct usb_ep *_ep) /*-------------------------------------------------------------------------*/ static struct usb_request * -net2280_alloc_request (struct usb_ep *_ep, unsigned gfp_flags) +net2280_alloc_request (struct usb_ep *_ep, gfp_t gfp_flags) { struct net2280_ep *ep; struct net2280_request *req; @@ -463,7 +463,7 @@ net2280_alloc_buffer ( struct usb_ep *_ep, unsigned bytes, dma_addr_t *dma, - unsigned gfp_flags + gfp_t gfp_flags ) { void *retval; @@ -897,7 +897,7 @@ done (struct net2280_ep *ep, struct net2280_request *req, int status) /*-------------------------------------------------------------------------*/ static int -net2280_queue (struct usb_ep *_ep, struct usb_request *_req, unsigned gfp_flags) +net2280_queue (struct usb_ep *_ep, struct usb_request *_req, gfp_t gfp_flags) { struct net2280_request *req; struct net2280_ep *ep; diff --git a/drivers/usb/gadget/omap_udc.c b/drivers/usb/gadget/omap_udc.c index ff5533e69560..287c5900fb13 100644 --- a/drivers/usb/gadget/omap_udc.c +++ b/drivers/usb/gadget/omap_udc.c @@ -269,7 +269,7 @@ static int omap_ep_disable(struct usb_ep *_ep) /*-------------------------------------------------------------------------*/ static struct usb_request * -omap_alloc_request(struct usb_ep *ep, unsigned gfp_flags) +omap_alloc_request(struct usb_ep *ep, gfp_t gfp_flags) { struct omap_req *req; @@ -298,7 +298,7 @@ omap_alloc_buffer( struct usb_ep *_ep, unsigned bytes, dma_addr_t *dma, - unsigned gfp_flags + gfp_t gfp_flags ) { void *retval; @@ -937,7 +937,7 @@ static void dma_channel_release(struct omap_ep *ep) /*-------------------------------------------------------------------------*/ static int -omap_ep_queue(struct usb_ep *_ep, struct usb_request *_req, unsigned gfp_flags) +omap_ep_queue(struct usb_ep *_ep, struct usb_request *_req, gfp_t gfp_flags) { struct omap_ep *ep = container_of(_ep, struct omap_ep, ep); struct omap_req *req = container_of(_req, struct omap_req, req); diff --git a/drivers/usb/gadget/pxa2xx_udc.c b/drivers/usb/gadget/pxa2xx_udc.c index 73f8c9404156..6e545393cfff 100644 --- a/drivers/usb/gadget/pxa2xx_udc.c +++ b/drivers/usb/gadget/pxa2xx_udc.c @@ -332,7 +332,7 @@ static int pxa2xx_ep_disable (struct usb_ep *_ep) * pxa2xx_ep_alloc_request - allocate a request data structure */ static struct usb_request * -pxa2xx_ep_alloc_request (struct usb_ep *_ep, unsigned gfp_flags) +pxa2xx_ep_alloc_request (struct usb_ep *_ep, gfp_t gfp_flags) { struct pxa2xx_request *req; @@ -367,7 +367,7 @@ pxa2xx_ep_free_request (struct usb_ep *_ep, struct usb_request *_req) */ static void * pxa2xx_ep_alloc_buffer(struct usb_ep *_ep, unsigned bytes, - dma_addr_t *dma, unsigned gfp_flags) + dma_addr_t *dma, gfp_t gfp_flags) { char *retval; @@ -874,7 +874,7 @@ done: /*-------------------------------------------------------------------------*/ static int -pxa2xx_ep_queue(struct usb_ep *_ep, struct usb_request *_req, unsigned gfp_flags) +pxa2xx_ep_queue(struct usb_ep *_ep, struct usb_request *_req, gfp_t gfp_flags) { struct pxa2xx_request *req; struct pxa2xx_ep *ep; diff --git a/drivers/usb/gadget/serial.c b/drivers/usb/gadget/serial.c index c925d9222f53..b35ac6d334f8 100644 --- a/drivers/usb/gadget/serial.c +++ b/drivers/usb/gadget/serial.c @@ -300,18 +300,18 @@ static int gs_build_config_buf(u8 *buf, enum usb_device_speed speed, u8 type, unsigned int index, int is_otg); static struct usb_request *gs_alloc_req(struct usb_ep *ep, unsigned int len, - unsigned kmalloc_flags); + gfp_t kmalloc_flags); static void gs_free_req(struct usb_ep *ep, struct usb_request *req); static struct gs_req_entry *gs_alloc_req_entry(struct usb_ep *ep, unsigned len, - unsigned kmalloc_flags); + gfp_t kmalloc_flags); static void gs_free_req_entry(struct usb_ep *ep, struct gs_req_entry *req); -static int gs_alloc_ports(struct gs_dev *dev, unsigned kmalloc_flags); +static int gs_alloc_ports(struct gs_dev *dev, gfp_t kmalloc_flags); static void gs_free_ports(struct gs_dev *dev); /* circular buffer */ -static struct gs_buf *gs_buf_alloc(unsigned int size, unsigned kmalloc_flags); +static struct gs_buf *gs_buf_alloc(unsigned int size, gfp_t kmalloc_flags); static void gs_buf_free(struct gs_buf *gb); static void gs_buf_clear(struct gs_buf *gb); static unsigned int gs_buf_data_avail(struct gs_buf *gb); @@ -2091,7 +2091,7 @@ static int gs_build_config_buf(u8 *buf, enum usb_device_speed speed, * usb_request or NULL if there is an error. */ static struct usb_request * -gs_alloc_req(struct usb_ep *ep, unsigned int len, unsigned kmalloc_flags) +gs_alloc_req(struct usb_ep *ep, unsigned int len, gfp_t kmalloc_flags) { struct usb_request *req; @@ -2132,7 +2132,7 @@ static void gs_free_req(struct usb_ep *ep, struct usb_request *req) * endpoint, buffer len, and kmalloc flags. */ static struct gs_req_entry * -gs_alloc_req_entry(struct usb_ep *ep, unsigned len, unsigned kmalloc_flags) +gs_alloc_req_entry(struct usb_ep *ep, unsigned len, gfp_t kmalloc_flags) { struct gs_req_entry *req; @@ -2173,7 +2173,7 @@ static void gs_free_req_entry(struct usb_ep *ep, struct gs_req_entry *req) * * The device lock is normally held when calling this function. */ -static int gs_alloc_ports(struct gs_dev *dev, unsigned kmalloc_flags) +static int gs_alloc_ports(struct gs_dev *dev, gfp_t kmalloc_flags) { int i; struct gs_port *port; @@ -2255,7 +2255,7 @@ static void gs_free_ports(struct gs_dev *dev) * * Allocate a circular buffer and all associated memory. */ -static struct gs_buf *gs_buf_alloc(unsigned int size, unsigned kmalloc_flags) +static struct gs_buf *gs_buf_alloc(unsigned int size, gfp_t kmalloc_flags) { struct gs_buf *gb; diff --git a/drivers/usb/gadget/zero.c b/drivers/usb/gadget/zero.c index 6890e773b2a2..ec9c424f1d97 100644 --- a/drivers/usb/gadget/zero.c +++ b/drivers/usb/gadget/zero.c @@ -612,7 +612,7 @@ static void source_sink_complete (struct usb_ep *ep, struct usb_request *req) } static struct usb_request * -source_sink_start_ep (struct usb_ep *ep, unsigned gfp_flags) +source_sink_start_ep (struct usb_ep *ep, gfp_t gfp_flags) { struct usb_request *req; int status; @@ -640,7 +640,7 @@ source_sink_start_ep (struct usb_ep *ep, unsigned gfp_flags) } static int -set_source_sink_config (struct zero_dev *dev, unsigned gfp_flags) +set_source_sink_config (struct zero_dev *dev, gfp_t gfp_flags) { int result = 0; struct usb_ep *ep; @@ -744,7 +744,7 @@ static void loopback_complete (struct usb_ep *ep, struct usb_request *req) } static int -set_loopback_config (struct zero_dev *dev, unsigned gfp_flags) +set_loopback_config (struct zero_dev *dev, gfp_t gfp_flags) { int result = 0; struct usb_ep *ep; @@ -845,7 +845,7 @@ static void zero_reset_config (struct zero_dev *dev) * by limiting configuration choices (like the pxa2xx). */ static int -zero_set_config (struct zero_dev *dev, unsigned number, unsigned gfp_flags) +zero_set_config (struct zero_dev *dev, unsigned number, gfp_t gfp_flags) { int result = 0; struct usb_gadget *gadget = dev->gadget; diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c index b948ffd94f45..f5eb9e7b5b18 100644 --- a/drivers/usb/host/ehci-hcd.c +++ b/drivers/usb/host/ehci-hcd.c @@ -983,7 +983,7 @@ static int ehci_urb_enqueue ( struct usb_hcd *hcd, struct usb_host_endpoint *ep, struct urb *urb, - unsigned mem_flags + gfp_t mem_flags ) { struct ehci_hcd *ehci = hcd_to_ehci (hcd); struct list_head qtd_list; diff --git a/drivers/usb/host/ehci-mem.c b/drivers/usb/host/ehci-mem.c index 5c38ad869485..91c2ab43cbcc 100644 --- a/drivers/usb/host/ehci-mem.c +++ b/drivers/usb/host/ehci-mem.c @@ -45,7 +45,7 @@ static inline void ehci_qtd_init (struct ehci_qtd *qtd, dma_addr_t dma) INIT_LIST_HEAD (&qtd->qtd_list); } -static struct ehci_qtd *ehci_qtd_alloc (struct ehci_hcd *ehci, int flags) +static struct ehci_qtd *ehci_qtd_alloc (struct ehci_hcd *ehci, gfp_t flags) { struct ehci_qtd *qtd; dma_addr_t dma; @@ -79,7 +79,7 @@ static void qh_destroy (struct kref *kref) dma_pool_free (ehci->qh_pool, qh, qh->qh_dma); } -static struct ehci_qh *ehci_qh_alloc (struct ehci_hcd *ehci, int flags) +static struct ehci_qh *ehci_qh_alloc (struct ehci_hcd *ehci, gfp_t flags) { struct ehci_qh *qh; dma_addr_t dma; @@ -161,7 +161,7 @@ static void ehci_mem_cleanup (struct ehci_hcd *ehci) } /* remember to add cleanup code (above) if you add anything here */ -static int ehci_mem_init (struct ehci_hcd *ehci, int flags) +static int ehci_mem_init (struct ehci_hcd *ehci, gfp_t flags) { int i; diff --git a/drivers/usb/host/ehci-q.c b/drivers/usb/host/ehci-q.c index 940d38ca7d91..5bb872c3496d 100644 --- a/drivers/usb/host/ehci-q.c +++ b/drivers/usb/host/ehci-q.c @@ -477,7 +477,7 @@ qh_urb_transaction ( struct ehci_hcd *ehci, struct urb *urb, struct list_head *head, - int flags + gfp_t flags ) { struct ehci_qtd *qtd, *qtd_prev; dma_addr_t buf; @@ -629,7 +629,7 @@ static struct ehci_qh * qh_make ( struct ehci_hcd *ehci, struct urb *urb, - int flags + gfp_t flags ) { struct ehci_qh *qh = ehci_qh_alloc (ehci, flags); u32 info1 = 0, info2 = 0; @@ -906,7 +906,7 @@ submit_async ( struct usb_host_endpoint *ep, struct urb *urb, struct list_head *qtd_list, - unsigned mem_flags + gfp_t mem_flags ) { struct ehci_qtd *qtd; int epnum; diff --git a/drivers/usb/host/ehci-sched.c b/drivers/usb/host/ehci-sched.c index ccc7300baa6d..f0c8aa1ccd5d 100644 --- a/drivers/usb/host/ehci-sched.c +++ b/drivers/usb/host/ehci-sched.c @@ -589,7 +589,7 @@ static int intr_submit ( struct usb_host_endpoint *ep, struct urb *urb, struct list_head *qtd_list, - unsigned mem_flags + gfp_t mem_flags ) { unsigned epnum; unsigned long flags; @@ -634,7 +634,7 @@ done: /* ehci_iso_stream ops work with both ITD and SITD */ static struct ehci_iso_stream * -iso_stream_alloc (unsigned mem_flags) +iso_stream_alloc (gfp_t mem_flags) { struct ehci_iso_stream *stream; @@ -851,7 +851,7 @@ iso_stream_find (struct ehci_hcd *ehci, struct urb *urb) /* ehci_iso_sched ops can be ITD-only or SITD-only */ static struct ehci_iso_sched * -iso_sched_alloc (unsigned packets, unsigned mem_flags) +iso_sched_alloc (unsigned packets, gfp_t mem_flags) { struct ehci_iso_sched *iso_sched; int size = sizeof *iso_sched; @@ -924,7 +924,7 @@ itd_urb_transaction ( struct ehci_iso_stream *stream, struct ehci_hcd *ehci, struct urb *urb, - unsigned mem_flags + gfp_t mem_flags ) { struct ehci_itd *itd; @@ -1418,7 +1418,7 @@ itd_complete ( /*-------------------------------------------------------------------------*/ static int itd_submit (struct ehci_hcd *ehci, struct urb *urb, - unsigned mem_flags) + gfp_t mem_flags) { int status = -EINVAL; unsigned long flags; @@ -1529,7 +1529,7 @@ sitd_urb_transaction ( struct ehci_iso_stream *stream, struct ehci_hcd *ehci, struct urb *urb, - unsigned mem_flags + gfp_t mem_flags ) { struct ehci_sitd *sitd; @@ -1779,7 +1779,7 @@ sitd_complete ( static int sitd_submit (struct ehci_hcd *ehci, struct urb *urb, - unsigned mem_flags) + gfp_t mem_flags) { int status = -EINVAL; unsigned long flags; diff --git a/drivers/usb/host/isp116x-hcd.c b/drivers/usb/host/isp116x-hcd.c index e142056b0d2c..2548d94fcd72 100644 --- a/drivers/usb/host/isp116x-hcd.c +++ b/drivers/usb/host/isp116x-hcd.c @@ -694,7 +694,7 @@ static int balance(struct isp116x *isp116x, u16 period, u16 load) static int isp116x_urb_enqueue(struct usb_hcd *hcd, struct usb_host_endpoint *hep, struct urb *urb, - unsigned mem_flags) + gfp_t mem_flags) { struct isp116x *isp116x = hcd_to_isp116x(hcd); struct usb_device *udev = urb->dev; diff --git a/drivers/usb/host/ohci-hcd.c b/drivers/usb/host/ohci-hcd.c index 67c1aa5eb1c1..f8da8c7af7c6 100644 --- a/drivers/usb/host/ohci-hcd.c +++ b/drivers/usb/host/ohci-hcd.c @@ -180,7 +180,7 @@ static int ohci_urb_enqueue ( struct usb_hcd *hcd, struct usb_host_endpoint *ep, struct urb *urb, - unsigned mem_flags + gfp_t mem_flags ) { struct ohci_hcd *ohci = hcd_to_ohci (hcd); struct ed *ed; diff --git a/drivers/usb/host/ohci-mem.c b/drivers/usb/host/ohci-mem.c index fd3c4d3714bd..9fb83dfb1eb4 100644 --- a/drivers/usb/host/ohci-mem.c +++ b/drivers/usb/host/ohci-mem.c @@ -84,7 +84,7 @@ dma_to_td (struct ohci_hcd *hc, dma_addr_t td_dma) /* TDs ... */ static struct td * -td_alloc (struct ohci_hcd *hc, unsigned mem_flags) +td_alloc (struct ohci_hcd *hc, gfp_t mem_flags) { dma_addr_t dma; struct td *td; @@ -118,7 +118,7 @@ td_free (struct ohci_hcd *hc, struct td *td) /* EDs ... */ static struct ed * -ed_alloc (struct ohci_hcd *hc, unsigned mem_flags) +ed_alloc (struct ohci_hcd *hc, gfp_t mem_flags) { dma_addr_t dma; struct ed *ed; diff --git a/drivers/usb/host/sl811-hcd.c b/drivers/usb/host/sl811-hcd.c index d42a15d10a46..cad858575cea 100644 --- a/drivers/usb/host/sl811-hcd.c +++ b/drivers/usb/host/sl811-hcd.c @@ -818,7 +818,7 @@ static int sl811h_urb_enqueue( struct usb_hcd *hcd, struct usb_host_endpoint *hep, struct urb *urb, - unsigned mem_flags + gfp_t mem_flags ) { struct sl811 *sl811 = hcd_to_sl811(hcd); struct usb_device *udev = urb->dev; diff --git a/drivers/usb/host/uhci-q.c b/drivers/usb/host/uhci-q.c index ea0d168a8c67..4e0fbe2c1a9a 100644 --- a/drivers/usb/host/uhci-q.c +++ b/drivers/usb/host/uhci-q.c @@ -1164,7 +1164,7 @@ static struct urb *uhci_find_urb_ep(struct uhci_hcd *uhci, struct urb *urb) static int uhci_urb_enqueue(struct usb_hcd *hcd, struct usb_host_endpoint *ep, - struct urb *urb, unsigned mem_flags) + struct urb *urb, gfp_t mem_flags) { int ret; struct uhci_hcd *uhci = hcd_to_uhci(hcd); diff --git a/drivers/usb/misc/uss720.c b/drivers/usb/misc/uss720.c index 03fb70ef2eb3..0592cb5e6c4d 100644 --- a/drivers/usb/misc/uss720.c +++ b/drivers/usb/misc/uss720.c @@ -137,7 +137,7 @@ static void async_complete(struct urb *urb, struct pt_regs *ptregs) static struct uss720_async_request *submit_async_request(struct parport_uss720_private *priv, __u8 request, __u8 requesttype, __u16 value, __u16 index, - unsigned int mem_flags) + gfp_t mem_flags) { struct usb_device *usbdev; struct uss720_async_request *rq; @@ -204,7 +204,7 @@ static unsigned int kill_all_async_requests_priv(struct parport_uss720_private * /* --------------------------------------------------------------------- */ -static int get_1284_register(struct parport *pp, unsigned char reg, unsigned char *val, unsigned int mem_flags) +static int get_1284_register(struct parport *pp, unsigned char reg, unsigned char *val, gfp_t mem_flags) { struct parport_uss720_private *priv; struct uss720_async_request *rq; @@ -238,7 +238,7 @@ static int get_1284_register(struct parport *pp, unsigned char reg, unsigned cha return -EIO; } -static int set_1284_register(struct parport *pp, unsigned char reg, unsigned char val, unsigned int mem_flags) +static int set_1284_register(struct parport *pp, unsigned char reg, unsigned char val, gfp_t mem_flags) { struct parport_uss720_private *priv; struct uss720_async_request *rq; diff --git a/drivers/usb/net/asix.c b/drivers/usb/net/asix.c index 861f00a43750..252a34fbb42c 100644 --- a/drivers/usb/net/asix.c +++ b/drivers/usb/net/asix.c @@ -753,7 +753,7 @@ static int ax88772_rx_fixup(struct usbnet *dev, struct sk_buff *skb) } static struct sk_buff *ax88772_tx_fixup(struct usbnet *dev, struct sk_buff *skb, - unsigned flags) + gfp_t flags) { int padlen; int headroom = skb_headroom(skb); diff --git a/drivers/usb/net/gl620a.c b/drivers/usb/net/gl620a.c index c8763ae33c73..c0f263b202a6 100644 --- a/drivers/usb/net/gl620a.c +++ b/drivers/usb/net/gl620a.c @@ -301,7 +301,7 @@ static int genelink_rx_fixup(struct usbnet *dev, struct sk_buff *skb) } static struct sk_buff * -genelink_tx_fixup(struct usbnet *dev, struct sk_buff *skb, unsigned flags) +genelink_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags) { int padlen; int length = skb->len; diff --git a/drivers/usb/net/kaweth.c b/drivers/usb/net/kaweth.c index e04b0ce3611a..c82655d3d448 100644 --- a/drivers/usb/net/kaweth.c +++ b/drivers/usb/net/kaweth.c @@ -477,13 +477,13 @@ static int kaweth_reset(struct kaweth_device *kaweth) } static void kaweth_usb_receive(struct urb *, struct pt_regs *regs); -static int kaweth_resubmit_rx_urb(struct kaweth_device *, unsigned); +static int kaweth_resubmit_rx_urb(struct kaweth_device *, gfp_t); /**************************************************************** int_callback *****************************************************************/ -static void kaweth_resubmit_int_urb(struct kaweth_device *kaweth, int mf) +static void kaweth_resubmit_int_urb(struct kaweth_device *kaweth, gfp_t mf) { int status; @@ -550,7 +550,7 @@ static void kaweth_resubmit_tl(void *d) * kaweth_resubmit_rx_urb ****************************************************************/ static int kaweth_resubmit_rx_urb(struct kaweth_device *kaweth, - unsigned mem_flags) + gfp_t mem_flags) { int result; diff --git a/drivers/usb/net/net1080.c b/drivers/usb/net/net1080.c index a4309c4a491b..cee55f8cf64f 100644 --- a/drivers/usb/net/net1080.c +++ b/drivers/usb/net/net1080.c @@ -500,7 +500,7 @@ static int net1080_rx_fixup(struct usbnet *dev, struct sk_buff *skb) } static struct sk_buff * -net1080_tx_fixup(struct usbnet *dev, struct sk_buff *skb, unsigned flags) +net1080_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags) { int padlen; struct sk_buff *skb2; diff --git a/drivers/usb/net/rndis_host.c b/drivers/usb/net/rndis_host.c index 2ed2e5fb7778..b5a925dc1beb 100644 --- a/drivers/usb/net/rndis_host.c +++ b/drivers/usb/net/rndis_host.c @@ -517,7 +517,7 @@ static int rndis_rx_fixup(struct usbnet *dev, struct sk_buff *skb) } static struct sk_buff * -rndis_tx_fixup(struct usbnet *dev, struct sk_buff *skb, unsigned flags) +rndis_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags) { struct rndis_data_hdr *hdr; struct sk_buff *skb2; diff --git a/drivers/usb/net/usbnet.c b/drivers/usb/net/usbnet.c index 6c460918d54f..fce81d738933 100644 --- a/drivers/usb/net/usbnet.c +++ b/drivers/usb/net/usbnet.c @@ -288,7 +288,7 @@ EXPORT_SYMBOL_GPL(usbnet_defer_kevent); static void rx_complete (struct urb *urb, struct pt_regs *regs); -static void rx_submit (struct usbnet *dev, struct urb *urb, unsigned flags) +static void rx_submit (struct usbnet *dev, struct urb *urb, gfp_t flags) { struct sk_buff *skb; struct skb_data *entry; diff --git a/drivers/usb/net/usbnet.h b/drivers/usb/net/usbnet.h index 7aa0abd1a9bd..89fc4958eecf 100644 --- a/drivers/usb/net/usbnet.h +++ b/drivers/usb/net/usbnet.h @@ -107,7 +107,7 @@ struct driver_info { /* fixup tx packet (add framing) */ struct sk_buff *(*tx_fixup)(struct usbnet *dev, - struct sk_buff *skb, unsigned flags); + struct sk_buff *skb, gfp_t flags); /* for new devices, use the descriptor-reading code instead */ int in; /* rx endpoint */ diff --git a/drivers/usb/net/zaurus.c b/drivers/usb/net/zaurus.c index ee3b892aeabc..5d4b7d55b097 100644 --- a/drivers/usb/net/zaurus.c +++ b/drivers/usb/net/zaurus.c @@ -62,7 +62,7 @@ */ static struct sk_buff * -zaurus_tx_fixup(struct usbnet *dev, struct sk_buff *skb, unsigned flags) +zaurus_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags) { int padlen; struct sk_buff *skb2; diff --git a/drivers/usb/net/zd1201.c b/drivers/usb/net/zd1201.c index c4e479ee926a..2f52261c7cc1 100644 --- a/drivers/usb/net/zd1201.c +++ b/drivers/usb/net/zd1201.c @@ -521,7 +521,7 @@ static int zd1201_setconfig(struct zd1201 *zd, int rid, void *buf, int len, int int reqlen; char seq=0; struct urb *urb; - unsigned int gfp_mask = wait ? GFP_NOIO : GFP_ATOMIC; + gfp_t gfp_mask = wait ? GFP_NOIO : GFP_ATOMIC; len += 4; /* first 4 are for header */ diff --git a/include/linux/usb.h b/include/linux/usb.h index 4dbe580f9335..8f731e8f2821 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -933,17 +933,17 @@ static inline void usb_fill_int_urb (struct urb *urb, } extern void usb_init_urb(struct urb *urb); -extern struct urb *usb_alloc_urb(int iso_packets, unsigned mem_flags); +extern struct urb *usb_alloc_urb(int iso_packets, gfp_t mem_flags); extern void usb_free_urb(struct urb *urb); #define usb_put_urb usb_free_urb extern struct urb *usb_get_urb(struct urb *urb); -extern int usb_submit_urb(struct urb *urb, unsigned mem_flags); +extern int usb_submit_urb(struct urb *urb, gfp_t mem_flags); extern int usb_unlink_urb(struct urb *urb); extern void usb_kill_urb(struct urb *urb); #define HAVE_USB_BUFFERS void *usb_buffer_alloc (struct usb_device *dev, size_t size, - unsigned mem_flags, dma_addr_t *dma); + gfp_t mem_flags, dma_addr_t *dma); void usb_buffer_free (struct usb_device *dev, size_t size, void *addr, dma_addr_t dma); @@ -1050,7 +1050,7 @@ int usb_sg_init ( struct scatterlist *sg, int nents, size_t length, - unsigned mem_flags + gfp_t mem_flags ); void usb_sg_cancel (struct usb_sg_request *io); void usb_sg_wait (struct usb_sg_request *io); diff --git a/include/linux/usb_gadget.h b/include/linux/usb_gadget.h index 71e608607324..ff81117eb733 100644 --- a/include/linux/usb_gadget.h +++ b/include/linux/usb_gadget.h @@ -107,18 +107,18 @@ struct usb_ep_ops { int (*disable) (struct usb_ep *ep); struct usb_request *(*alloc_request) (struct usb_ep *ep, - unsigned gfp_flags); + gfp_t gfp_flags); void (*free_request) (struct usb_ep *ep, struct usb_request *req); void *(*alloc_buffer) (struct usb_ep *ep, unsigned bytes, - dma_addr_t *dma, unsigned gfp_flags); + dma_addr_t *dma, gfp_t gfp_flags); void (*free_buffer) (struct usb_ep *ep, void *buf, dma_addr_t dma, unsigned bytes); // NOTE: on 2.6, drivers may also use dma_map() and // dma_sync_single_*() to directly manage dma overhead. int (*queue) (struct usb_ep *ep, struct usb_request *req, - unsigned gfp_flags); + gfp_t gfp_flags); int (*dequeue) (struct usb_ep *ep, struct usb_request *req); int (*set_halt) (struct usb_ep *ep, int value); @@ -214,7 +214,7 @@ usb_ep_disable (struct usb_ep *ep) * Returns the request, or null if one could not be allocated. */ static inline struct usb_request * -usb_ep_alloc_request (struct usb_ep *ep, unsigned gfp_flags) +usb_ep_alloc_request (struct usb_ep *ep, gfp_t gfp_flags) { return ep->ops->alloc_request (ep, gfp_flags); } @@ -254,7 +254,7 @@ usb_ep_free_request (struct usb_ep *ep, struct usb_request *req) */ static inline void * usb_ep_alloc_buffer (struct usb_ep *ep, unsigned len, dma_addr_t *dma, - unsigned gfp_flags) + gfp_t gfp_flags) { return ep->ops->alloc_buffer (ep, len, dma, gfp_flags); } @@ -330,7 +330,7 @@ usb_ep_free_buffer (struct usb_ep *ep, void *buf, dma_addr_t dma, unsigned len) * reported when the usb peripheral is disconnected. */ static inline int -usb_ep_queue (struct usb_ep *ep, struct usb_request *req, unsigned gfp_flags) +usb_ep_queue (struct usb_ep *ep, struct usb_request *req, gfp_t gfp_flags) { return ep->ops->queue (ep, req, gfp_flags); } diff --git a/sound/usb/usbmidi.c b/sound/usb/usbmidi.c index e0d0365453b3..f1a2e2c2e02f 100644 --- a/sound/usb/usbmidi.c +++ b/sound/usb/usbmidi.c @@ -163,7 +163,7 @@ static const uint8_t snd_usbmidi_cin_length[] = { /* * Submits the URB, with error handling. */ -static int snd_usbmidi_submit_urb(struct urb* urb, int flags) +static int snd_usbmidi_submit_urb(struct urb* urb, gfp_t flags) { int err = usb_submit_urb(urb, flags); if (err < 0 && err != -ENODEV) -- cgit v1.2.3 From 9796fdd829da626374458e8706daedcc0e432ddd Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 21 Oct 2005 03:22:03 -0400 Subject: [PATCH] gfp_t: kernel/* Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- include/linux/audit.h | 4 ++-- include/linux/suspend.h | 2 +- kernel/audit.c | 6 +++--- kernel/auditsc.c | 2 +- kernel/kexec.c | 7 +++---- kernel/power/swsusp.c | 2 +- 6 files changed, 11 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index b2a2509bd7ea..da3c01955f3d 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -260,11 +260,11 @@ extern int audit_filter_user(struct netlink_skb_parms *cb, int type); #ifdef CONFIG_AUDIT /* These are defined in audit.c */ /* Public API */ -extern void audit_log(struct audit_context *ctx, int gfp_mask, +extern void audit_log(struct audit_context *ctx, gfp_t gfp_mask, int type, const char *fmt, ...) __attribute__((format(printf,4,5))); -extern struct audit_buffer *audit_log_start(struct audit_context *ctx, int gfp_mask, int type); +extern struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask, int type); extern void audit_log_format(struct audit_buffer *ab, const char *fmt, ...) __attribute__((format(printf,2,3))); diff --git a/include/linux/suspend.h b/include/linux/suspend.h index ad15a54806d8..ba448c760168 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -71,7 +71,7 @@ void restore_processor_state(void); struct saved_context; void __save_processor_state(struct saved_context *ctxt); void __restore_processor_state(struct saved_context *ctxt); -extern unsigned long get_usable_page(unsigned gfp_mask); +extern unsigned long get_usable_page(gfp_t gfp_mask); extern void free_eaten_memory(void); #endif /* _LINUX_SWSUSP_H */ diff --git a/kernel/audit.c b/kernel/audit.c index aefa73a8a586..0c56320d38dc 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -133,7 +133,7 @@ struct audit_buffer { struct list_head list; struct sk_buff *skb; /* formatted skb ready to send */ struct audit_context *ctx; /* NULL or associated context */ - int gfp_mask; + gfp_t gfp_mask; }; static void audit_set_pid(struct audit_buffer *ab, pid_t pid) @@ -647,7 +647,7 @@ static inline void audit_get_stamp(struct audit_context *ctx, * will be written at syscall exit. If there is no associated task, tsk * should be NULL. */ -struct audit_buffer *audit_log_start(struct audit_context *ctx, int gfp_mask, +struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask, int type) { struct audit_buffer *ab = NULL; @@ -879,7 +879,7 @@ void audit_log_end(struct audit_buffer *ab) /* Log an audit record. This is a convenience function that calls * audit_log_start, audit_log_vformat, and audit_log_end. It may be * called in any context. */ -void audit_log(struct audit_context *ctx, int gfp_mask, int type, +void audit_log(struct audit_context *ctx, gfp_t gfp_mask, int type, const char *fmt, ...) { struct audit_buffer *ab; diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 88696f639aab..d8a68509e729 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -803,7 +803,7 @@ static void audit_log_task_info(struct audit_buffer *ab) up_read(&mm->mmap_sem); } -static void audit_log_exit(struct audit_context *context, unsigned int gfp_mask) +static void audit_log_exit(struct audit_context *context, gfp_t gfp_mask) { int i; struct audit_buffer *ab; diff --git a/kernel/kexec.c b/kernel/kexec.c index cdd4dcd8fb63..36c5d9cd4cc1 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -90,7 +90,7 @@ int kexec_should_crash(struct task_struct *p) static int kimage_is_destination_range(struct kimage *image, unsigned long start, unsigned long end); static struct page *kimage_alloc_page(struct kimage *image, - unsigned int gfp_mask, + gfp_t gfp_mask, unsigned long dest); static int do_kimage_alloc(struct kimage **rimage, unsigned long entry, @@ -326,8 +326,7 @@ static int kimage_is_destination_range(struct kimage *image, return 0; } -static struct page *kimage_alloc_pages(unsigned int gfp_mask, - unsigned int order) +static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order) { struct page *pages; @@ -654,7 +653,7 @@ static kimage_entry_t *kimage_dst_used(struct kimage *image, } static struct page *kimage_alloc_page(struct kimage *image, - unsigned int gfp_mask, + gfp_t gfp_mask, unsigned long destination) { /* diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index 2d5c45676442..10bc5ec496d7 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c @@ -1095,7 +1095,7 @@ static inline void eat_page(void *page) *eaten_memory = c; } -unsigned long get_usable_page(unsigned gfp_mask) +unsigned long get_usable_page(gfp_t gfp_mask) { unsigned long m; -- cgit v1.2.3 From b4e3ca1ab1ae9ae86134126dcdc88da1caaa32ca Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 21 Oct 2005 03:22:34 -0400 Subject: [PATCH] gfp_t: remaining bits of drivers/* Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- drivers/block/loop.c | 2 +- drivers/block/rd.c | 2 +- drivers/char/n_tty.c | 2 +- drivers/ieee1394/eth1394.c | 2 +- drivers/md/bitmap.c | 2 +- drivers/md/dm-crypt.c | 2 +- drivers/s390/net/fsm.c | 2 +- drivers/s390/net/fsm.h | 2 +- include/linux/i2o.h | 4 ++-- include/linux/loop.h | 2 +- 10 files changed, 11 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/loop.c b/drivers/block/loop.c index b35e08876dd4..96c664af8d06 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -881,7 +881,7 @@ loop_init_xfer(struct loop_device *lo, struct loop_func_table *xfer, static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev) { struct file *filp = lo->lo_backing_file; - int gfp = lo->old_gfp_mask; + gfp_t gfp = lo->old_gfp_mask; if (lo->lo_state != Lo_bound) return -ENXIO; diff --git a/drivers/block/rd.c b/drivers/block/rd.c index 145c1fbffe01..68c60a5bcdab 100644 --- a/drivers/block/rd.c +++ b/drivers/block/rd.c @@ -348,7 +348,7 @@ static int rd_open(struct inode *inode, struct file *filp) struct block_device *bdev = inode->i_bdev; struct address_space *mapping; unsigned bsize; - int gfp_mask; + gfp_t gfp_mask; inode = igrab(bdev->bd_inode); rd_bdev[unit] = bdev; diff --git a/drivers/char/n_tty.c b/drivers/char/n_tty.c index c9bdf544ed2c..c556f4d3ccd7 100644 --- a/drivers/char/n_tty.c +++ b/drivers/char/n_tty.c @@ -62,7 +62,7 @@ static inline unsigned char *alloc_buf(void) { - unsigned int prio = in_interrupt() ? GFP_ATOMIC : GFP_KERNEL; + gfp_t prio = in_interrupt() ? GFP_ATOMIC : GFP_KERNEL; if (PAGE_SIZE != N_TTY_BUF_SIZE) return kmalloc(N_TTY_BUF_SIZE, prio); diff --git a/drivers/ieee1394/eth1394.c b/drivers/ieee1394/eth1394.c index 4802bbbb6dc9..c9e92d85c893 100644 --- a/drivers/ieee1394/eth1394.c +++ b/drivers/ieee1394/eth1394.c @@ -1630,7 +1630,7 @@ static void ether1394_complete_cb(void *__ptask) /* Transmit a packet (called by kernel) */ static int ether1394_tx (struct sk_buff *skb, struct net_device *dev) { - int kmflags = in_interrupt() ? GFP_ATOMIC : GFP_KERNEL; + gfp_t kmflags = in_interrupt() ? GFP_ATOMIC : GFP_KERNEL; struct eth1394hdr *eth; struct eth1394_priv *priv = netdev_priv(dev); int proto; diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 2fba2bbe72d8..01654fcabc52 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -91,7 +91,7 @@ int bitmap_active(struct bitmap *bitmap) #define WRITE_POOL_SIZE 256 /* mempool for queueing pending writes on the bitmap file */ -static void *write_pool_alloc(unsigned int gfp_flags, void *data) +static void *write_pool_alloc(gfp_t gfp_flags, void *data) { return kmalloc(sizeof(struct page_list), gfp_flags); } diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index b6148f6f7836..28c1a628621f 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -331,7 +331,7 @@ crypt_alloc_buffer(struct crypt_config *cc, unsigned int size, { struct bio *bio; unsigned int nr_iovecs = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; - int gfp_mask = GFP_NOIO | __GFP_HIGHMEM; + gfp_t gfp_mask = GFP_NOIO | __GFP_HIGHMEM; unsigned int i; /* diff --git a/drivers/s390/net/fsm.c b/drivers/s390/net/fsm.c index fa09440d82e5..38f50b7129a2 100644 --- a/drivers/s390/net/fsm.c +++ b/drivers/s390/net/fsm.c @@ -16,7 +16,7 @@ MODULE_LICENSE("GPL"); fsm_instance * init_fsm(char *name, const char **state_names, const char **event_names, int nr_states, - int nr_events, const fsm_node *tmpl, int tmpl_len, int order) + int nr_events, const fsm_node *tmpl, int tmpl_len, gfp_t order) { int i; fsm_instance *this; diff --git a/drivers/s390/net/fsm.h b/drivers/s390/net/fsm.h index f9a011001eb6..1b8a7e7c34f3 100644 --- a/drivers/s390/net/fsm.h +++ b/drivers/s390/net/fsm.h @@ -110,7 +110,7 @@ extern fsm_instance * init_fsm(char *name, const char **state_names, const char **event_names, int nr_states, int nr_events, const fsm_node *tmpl, - int tmpl_len, int order); + int tmpl_len, gfp_t order); /** * Releases an FSM diff --git a/include/linux/i2o.h b/include/linux/i2o.h index bdc286ec947c..b4af45aad25d 100644 --- a/include/linux/i2o.h +++ b/include/linux/i2o.h @@ -492,7 +492,7 @@ static inline int i2o_dma_map_sg(struct i2o_controller *c, * Returns 0 on success or -ENOMEM on failure. */ static inline int i2o_dma_alloc(struct device *dev, struct i2o_dma *addr, - size_t len, unsigned int gfp_mask) + size_t len, gfp_t gfp_mask) { struct pci_dev *pdev = to_pci_dev(dev); int dma_64 = 0; @@ -551,7 +551,7 @@ static inline void i2o_dma_free(struct device *dev, struct i2o_dma *addr) * Returns the 0 on success or negative error code on failure. */ static inline int i2o_dma_realloc(struct device *dev, struct i2o_dma *addr, - size_t len, unsigned int gfp_mask) + size_t len, gfp_t gfp_mask) { i2o_dma_free(dev, addr); diff --git a/include/linux/loop.h b/include/linux/loop.h index 53fa51595443..40f63c9879d2 100644 --- a/include/linux/loop.h +++ b/include/linux/loop.h @@ -52,7 +52,7 @@ struct loop_device { unsigned lo_blocksize; void *key_data; - int old_gfp_mask; + gfp_t old_gfp_mask; spinlock_t lo_lock; struct bio *lo_bio; -- cgit v1.2.3 From 260b23674fdb570f3235ce55892246bef1c24c2a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 21 Oct 2005 03:22:44 -0400 Subject: [PATCH] gfp_t: the rest zone handling, mapping->flags handling Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 2 +- include/linux/pagemap.h | 7 ++++--- mm/highmem.c | 14 +++++++++----- mm/page_alloc.c | 29 +++++++++++++++-------------- 4 files changed, 29 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 5ed471b58f4f..7519eb4191e7 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -302,7 +302,7 @@ void get_zone_counts(unsigned long *active, unsigned long *inactive, void build_all_zonelists(void); void wakeup_kswapd(struct zone *zone, int order); int zone_watermark_ok(struct zone *z, int order, unsigned long mark, - int alloc_type, int can_try_harder, int gfp_high); + int alloc_type, int can_try_harder, gfp_t gfp_high); #ifdef CONFIG_HAVE_MEMORY_PRESENT void memory_present(int nid, unsigned long start, unsigned long end); diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index efbae53fb078..ba6c310a055f 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -21,16 +21,17 @@ static inline gfp_t mapping_gfp_mask(struct address_space * mapping) { - return mapping->flags & __GFP_BITS_MASK; + return (__force gfp_t)mapping->flags & __GFP_BITS_MASK; } /* * This is non-atomic. Only to be used before the mapping is activated. * Probably needs a barrier... */ -static inline void mapping_set_gfp_mask(struct address_space *m, int mask) +static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask) { - m->flags = (m->flags & ~__GFP_BITS_MASK) | mask; + m->flags = (m->flags & ~(__force unsigned long)__GFP_BITS_MASK) | + (__force unsigned long)mask; } /* diff --git a/mm/highmem.c b/mm/highmem.c index 90e1861e2da0..ce2e7e8bbfa7 100644 --- a/mm/highmem.c +++ b/mm/highmem.c @@ -30,11 +30,9 @@ static mempool_t *page_pool, *isa_page_pool; -static void *page_pool_alloc(gfp_t gfp_mask, void *data) +static void *page_pool_alloc_isa(gfp_t gfp_mask, void *data) { - unsigned int gfp = gfp_mask | (unsigned int) (long) data; - - return alloc_page(gfp); + return alloc_page(gfp_mask | GFP_DMA); } static void page_pool_free(void *page, void *data) @@ -51,6 +49,12 @@ static void page_pool_free(void *page, void *data) * n means that there are (n-1) current users of it. */ #ifdef CONFIG_HIGHMEM + +static void *page_pool_alloc(gfp_t gfp_mask, void *data) +{ + return alloc_page(gfp_mask); +} + static int pkmap_count[LAST_PKMAP]; static unsigned int last_pkmap_nr; static __cacheline_aligned_in_smp DEFINE_SPINLOCK(kmap_lock); @@ -267,7 +271,7 @@ int init_emergency_isa_pool(void) if (isa_page_pool) return 0; - isa_page_pool = mempool_create(ISA_POOL_SIZE, page_pool_alloc, page_pool_free, (void *) __GFP_DMA); + isa_page_pool = mempool_create(ISA_POOL_SIZE, page_pool_alloc_isa, page_pool_free, NULL); if (!isa_page_pool) BUG(); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index aa43ae3ab8c9..94c864eac9c4 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -734,7 +734,7 @@ buffered_rmqueue(struct zone *zone, int order, gfp_t gfp_flags) * of the allocation. */ int zone_watermark_ok(struct zone *z, int order, unsigned long mark, - int classzone_idx, int can_try_harder, int gfp_high) + int classzone_idx, int can_try_harder, gfp_t gfp_high) { /* free_pages my go negative - that's OK */ long min = mark, free_pages = z->free_pages - (1 << order) + 1; @@ -777,7 +777,7 @@ struct page * fastcall __alloc_pages(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist) { - const int wait = gfp_mask & __GFP_WAIT; + const gfp_t wait = gfp_mask & __GFP_WAIT; struct zone **zones, *z; struct page *page; struct reclaim_state reclaim_state; @@ -996,7 +996,7 @@ fastcall unsigned long get_zeroed_page(gfp_t gfp_mask) * get_zeroed_page() returns a 32-bit address, which cannot represent * a highmem page */ - BUG_ON(gfp_mask & __GFP_HIGHMEM); + BUG_ON((gfp_mask & __GFP_HIGHMEM) != 0); page = alloc_pages(gfp_mask | __GFP_ZERO, 0); if (page) @@ -1428,6 +1428,16 @@ static int __init build_zonelists_node(pg_data_t *pgdat, struct zonelist *zoneli return j; } +static inline int highest_zone(int zone_bits) +{ + int res = ZONE_NORMAL; + if (zone_bits & (__force int)__GFP_HIGHMEM) + res = ZONE_HIGHMEM; + if (zone_bits & (__force int)__GFP_DMA) + res = ZONE_DMA; + return res; +} + #ifdef CONFIG_NUMA #define MAX_NODE_LOAD (num_online_nodes()) static int __initdata node_load[MAX_NUMNODES]; @@ -1524,11 +1534,7 @@ static void __init build_zonelists(pg_data_t *pgdat) zonelist = pgdat->node_zonelists + i; for (j = 0; zonelist->zones[j] != NULL; j++); - k = ZONE_NORMAL; - if (i & __GFP_HIGHMEM) - k = ZONE_HIGHMEM; - if (i & __GFP_DMA) - k = ZONE_DMA; + k = highest_zone(i); j = build_zonelists_node(NODE_DATA(node), zonelist, j, k); zonelist->zones[j] = NULL; @@ -1549,12 +1555,7 @@ static void __init build_zonelists(pg_data_t *pgdat) zonelist = pgdat->node_zonelists + i; j = 0; - k = ZONE_NORMAL; - if (i & __GFP_HIGHMEM) - k = ZONE_HIGHMEM; - if (i & __GFP_DMA) - k = ZONE_DMA; - + k = highest_zone(i); j = build_zonelists_node(pgdat, zonelist, j, k); /* * Now we build the zonelist so that it contains the zones -- cgit v1.2.3