diff options
| author | Linus Torvalds <torvalds@ppc970.osdl.org> | 2004-12-28 19:06:28 -0800 |
|---|---|---|
| committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2004-12-28 19:06:28 -0800 |
| commit | 1c9a512c3de2d3cf0697ceae110595ea4de9ecbb (patch) | |
| tree | f9afca047a64b963b5b74ac61a03fdae384fca43 | |
| parent | d70e75637caeba4bfb5c35ed16379d0e4c1a2b68 (diff) | |
| parent | 7faf92a451f2dc9c086c48bbfa9f069bed748248 (diff) | |
Merge bk://kernel.bkbits.net/davem/net-2.6
into ppc970.osdl.org:/home/torvalds/v2.6/linux
280 files changed, 25376 insertions, 2202 deletions
diff --git a/Documentation/infiniband/ipoib.txt b/Documentation/infiniband/ipoib.txt new file mode 100644 index 000000000000..18e184b56040 --- /dev/null +++ b/Documentation/infiniband/ipoib.txt @@ -0,0 +1,56 @@ +IP OVER INFINIBAND + + The ib_ipoib driver is an implementation of the IP over InfiniBand + protocol as specified by the latest Internet-Drafts issued by the + IETF ipoib working group. It is a "native" implementation in the + sense of setting the interface type to ARPHRD_INFINIBAND and the + hardware address length to 20 (earlier proprietary implementations + masqueraded to the kernel as ethernet interfaces). + +Partitions and P_Keys + + When the IPoIB driver is loaded, it creates one interface for each + port using the P_Key at index 0. To create an interface with a + different P_Key, write the desired P_Key into the main interface's + /sys/class/net/<intf name>/create_child file. For example: + + echo 0x8001 > /sys/class/net/ib0/create_child + + This will create an interface named ib0.8001 with P_Key 0x8001. To + remove a subinterface, use the "delete_child" file: + + echo 0x8001 > /sys/class/net/ib0/delete_child + + The P_Key for any interface is given by the "pkey" file, and the + main interface for a subinterface is in "parent." + +Debugging Information + + By compiling the IPoIB driver with CONFIG_INFINIBAND_IPOIB_DEBUG set + to 'y', tracing messages are compiled into the driver. They are + turned on by setting the module parameters debug_level and + mcast_debug_level to 1. These parameters can be controlled at + runtime through files in /sys/module/ib_ipoib/. + + CONFIG_INFINIBAND_IPOIB_DEBUG also enables the "ipoib_debugfs" + virtual filesystem. By mounting this filesystem, for example with + + mkdir -p /ipoib_debugfs + mount -t ipoib_debugfs none /ipoib_debufs + + it is possible to get statistics about multicast groups from the + files /ipoib_debugfs/ib0_mcg and so on. + + The performance impact of this option is negligible, so it + is safe to enable this option with debug_level set to 0 for normal + operation. + + CONFIG_INFINIBAND_IPOIB_DEBUG_DATA enables even more debug output in + the data path when data_debug_level is set to 1. However, even with + the output disabled, enabling this configuration option will affect + performance, because it adds tests to the fast path. + +References + + IETF IP over InfiniBand (ipoib) Working Group + http://ietf.org/html.charters/ipoib-charter.html diff --git a/Documentation/infiniband/sysfs.txt b/Documentation/infiniband/sysfs.txt new file mode 100644 index 000000000000..45e517525fb8 --- /dev/null +++ b/Documentation/infiniband/sysfs.txt @@ -0,0 +1,64 @@ +SYSFS FILES + + For each InfiniBand device, the InfiniBand drivers create the + following files under /sys/class/infiniband/<device name>: + + node_guid - Node GUID + sys_image_guid - System image GUID + + In addition, there is a "ports" subdirectory, with one subdirectory + for each port. For example, if mthca0 is a 2-port HCA, there will + be two directories: + + /sys/class/infiniband/mthca0/ports/1 + /sys/class/infiniband/mthca0/ports/2 + + (A switch will only have a single "0" subdirectory for switch port + 0; no subdirectory is created for normal switch ports) + + In each port subdirectory, the following files are created: + + cap_mask - Port capability mask + lid - Port LID + lid_mask_count - Port LID mask count + rate - Port data rate (active width * active speed) + sm_lid - Subnet manager LID for port's subnet + sm_sl - Subnet manager SL for port's subnet + state - Port state (DOWN, INIT, ARMED, ACTIVE or ACTIVE_DEFER) + + There is also a "counters" subdirectory, with files + + VL15_dropped + excessive_buffer_overrun_errors + link_downed + link_error_recovery + local_link_integrity_errors + port_rcv_constraint_errors + port_rcv_data + port_rcv_errors + port_rcv_packets + port_rcv_remote_physical_errors + port_rcv_switch_relay_errors + port_xmit_constraint_errors + port_xmit_data + port_xmit_discards + port_xmit_packets + symbol_error + + Each of these files contains the corresponding value from the port's + Performance Management PortCounters attribute, as described in + section 16.1.3.5 of the InfiniBand Architecture Specification. + + The "pkeys" and "gids" subdirectories contain one file for each + entry in the port's P_Key or GID table respectively. For example, + ports/1/pkeys/10 contains the value at index 10 in port 1's P_Key + table. + +MTHCA + + The Mellanox HCA driver also creates the files: + + hw_rev - Hardware revision number + fw_ver - Firmware version + hca_type - HCA type: "MT23108", "MT25208 (MT23108 compat mode)", + or "MT25208" diff --git a/Documentation/infiniband/user_mad.txt b/Documentation/infiniband/user_mad.txt new file mode 100644 index 000000000000..86d25ec5678b --- /dev/null +++ b/Documentation/infiniband/user_mad.txt @@ -0,0 +1,81 @@ +USERSPACE MAD ACCESS + +Device files + + Each port of each InfiniBand device has a "umad" device attached. + For example, a two-port HCA will have two devices, while a switch + will have one device (for switch port 0). + +Creating MAD agents + + A MAD agent can be created by filling in a struct ib_user_mad_reg_req + and then calling the IB_USER_MAD_REGISTER_AGENT ioctl on a file + descriptor for the appropriate device file. If the registration + request succeeds, a 32-bit id will be returned in the structure. + For example: + + struct ib_user_mad_reg_req req = { /* ... */ }; + ret = ioctl(fd, IB_USER_MAD_REGISTER_AGENT, (char *) &req); + if (!ret) + my_agent = req.id; + else + perror("agent register"); + + Agents can be unregistered with the IB_USER_MAD_UNREGISTER_AGENT + ioctl. Also, all agents registered through a file descriptor will + be unregistered when the descriptor is closed. + +Receiving MADs + + MADs are received using read(). The buffer passed to read() must be + large enough to hold at least one struct ib_user_mad. For example: + + struct ib_user_mad mad; + ret = read(fd, &mad, sizeof mad); + if (ret != sizeof mad) + perror("read"); + + In addition to the actual MAD contents, the other struct ib_user_mad + fields will be filled in with information on the received MAD. For + example, the remote LID will be in mad.lid. + + If a send times out, a receive will be generated with mad.status set + to ETIMEDOUT. Otherwise when a MAD has been successfully received, + mad.status will be 0. + + poll()/select() may be used to wait until a MAD can be read. + +Sending MADs + + MADs are sent using write(). The agent ID for sending should be + filled into the id field of the MAD, the destination LID should be + filled into the lid field, and so on. For example: + + struct ib_user_mad mad; + + /* fill in mad.data */ + + mad.id = my_agent; /* req.id from agent registration */ + mad.lid = my_dest; /* in network byte order... */ + /* etc. */ + + ret = write(fd, &mad, sizeof mad); + if (ret != sizeof mad) + perror("write"); + +/dev files + + To create the appropriate character device files automatically with + udev, a rule like + + KERNEL="umad*", NAME="infiniband/%k" + + can be used. This will create a device node named + + /dev/infiniband/umad0 + + for the first port, and so on. The InfiniBand device and port + associated with this device can be determined from the files + + /sys/class/infiniband_mad/umad0/ibdev + /sys/class/infiniband_mad/umad0/port diff --git a/Documentation/ioctl-number.txt b/Documentation/ioctl-number.txt index bd7a19777861..b63436aeea3a 100644 --- a/Documentation/ioctl-number.txt +++ b/Documentation/ioctl-number.txt @@ -72,6 +72,7 @@ Code Seq# Include File Comments 0x09 all linux/md.h 0x12 all linux/fs.h linux/blkpg.h +0x1b all InfiniBand Subsystem <http://www.openib.org/> 0x20 all drivers/cdrom/cm206.h 0x22 all scsi/sg.h '#' 00-3F IEEE 1394 Subsystem Block for the entire subsystem diff --git a/MAINTAINERS b/MAINTAINERS index d3dc9555cdcc..dc0d3389ad00 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1081,6 +1081,17 @@ M: lethal@chaoticdreams.org L: linux-fbdev-devel@lists.sourceforge.net S: Maintained +INFINIBAND SUBSYSTEM +P: Roland Dreier +M: roland@topspin.com +P: Sean Hefty +M: mshefty@ichips.intel.com +P: Hal Rosenstock +M: halr@voltaire.com +L: openib-general@openib.org +W: http://www.openib.org/ +S: Supported + INPUT (KEYBOARD, MOUSE, JOYSTICK) DRIVERS P: Vojtech Pavlik M: vojtech@suse.cz @@ -1568,7 +1579,6 @@ P: Wensong Zhang M: wensong@linux-vs.org P: Julian Anastasov M: ja@ssi.bg -L: lvs-users@linuxvirtualserver.org S: Maintained NFS CLIENT diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig index 79f88cd36a52..87b80b13dc83 100644 --- a/arch/sparc64/Kconfig +++ b/arch/sparc64/Kconfig @@ -591,6 +591,8 @@ source "sound/Kconfig" source "drivers/usb/Kconfig" +source "drivers/infiniband/Kconfig" + source "drivers/char/watchdog/Kconfig" source "arch/sparc64/oprofile/Kconfig" diff --git a/arch/sparc64/defconfig b/arch/sparc64/defconfig index 0ceffbca09d9..c5f2317481f4 100644 --- a/arch/sparc64/defconfig +++ b/arch/sparc64/defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.10-rc3 -# Wed Dec 8 21:14:26 2004 +# Linux kernel version: 2.6.10 +# Mon Dec 27 22:36:56 2004 # CONFIG_64BIT=y CONFIG_MMU=y @@ -674,6 +674,7 @@ CONFIG_BRIDGE_EBT_MARK_T=m CONFIG_BRIDGE_EBT_REDIRECT=m CONFIG_BRIDGE_EBT_SNAT=m CONFIG_BRIDGE_EBT_LOG=m +CONFIG_BRIDGE_EBT_ULOG=m CONFIG_XFRM=y CONFIG_XFRM_USER=m @@ -740,6 +741,7 @@ CONFIG_NET_CLS_FW=m CONFIG_NET_CLS_U32=m CONFIG_CLS_U32_PERF=y CONFIG_NET_CLS_IND=y +CONFIG_CLS_U32_MARK=y CONFIG_NET_CLS_RSVP=m CONFIG_NET_CLS_RSVP6=m CONFIG_NET_CLS_ACT=y @@ -925,6 +927,7 @@ CONFIG_HAMACHI=m CONFIG_YELLOWFIN=m CONFIG_R8169=m CONFIG_R8169_NAPI=y +CONFIG_R8169_VLAN=y CONFIG_SK98LIN=m CONFIG_VIA_VELOCITY=m CONFIG_TIGON3=m @@ -936,6 +939,7 @@ CONFIG_IXGB=m CONFIG_IXGB_NAPI=y CONFIG_S2IO=m CONFIG_S2IO_NAPI=y +CONFIG_2BUFF_MODE=y # # Token Ring devices @@ -1332,7 +1336,7 @@ CONFIG_SMB_FS=m CONFIG_CIFS=m # CONFIG_CIFS_STATS is not set # CONFIG_CIFS_XATTR is not set -CONFIG_CIFS_POSIX=y +# CONFIG_CIFS_EXPERIMENTAL is not set CONFIG_NCP_FS=m # CONFIG_NCPFS_PACKET_SIGNING is not set # CONFIG_NCPFS_IOCTL_LOCKING is not set @@ -1459,7 +1463,7 @@ CONFIG_DVB_BUDGET_PATCH=m # CONFIG_DVB_TTUSB_BUDGET is not set CONFIG_DVB_TTUSB_DEC=m CONFIG_DVB_DIBUSB=m -CONFIG_DVB_DIBUSB_MISDESIGNED_AN2235=y +# CONFIG_DVB_DIBUSB_MISDESIGNED_DEVICES is not set CONFIG_DVB_DIBCOM_DEBUG=y CONFIG_DVB_CINERGYT2=m # CONFIG_DVB_CINERGYT2_TUNING is not set @@ -1468,6 +1472,7 @@ CONFIG_DVB_CINERGYT2=m # Supported FlexCopII (B2C2) Adapters # CONFIG_DVB_B2C2_SKYSTAR=m +CONFIG_DVB_B2C2_USB=m # # Supported BT878 Adapters @@ -1504,6 +1509,7 @@ CONFIG_DVB_TDA1004X=m CONFIG_DVB_NXT6000=m CONFIG_DVB_MT352=m CONFIG_DVB_DIB3000MB=m +CONFIG_DVB_DIB3000MC=m # # DVB-C (cable) frontends @@ -1635,6 +1641,7 @@ CONFIG_USB_EHCI_HCD=m # CONFIG_USB_EHCI_ROOT_HUB_TT is not set CONFIG_USB_OHCI_HCD=y CONFIG_USB_UHCI_HCD=m +CONFIG_USB_SL811_HCD=m # # USB Device Class drivers @@ -1806,6 +1813,15 @@ CONFIG_USB_SPEEDTOUCH=m # CONFIG_USB_GADGET is not set # +# InfiniBand support +# +CONFIG_INFINIBAND=m +CONFIG_INFINIBAND_MTHCA=m +# CONFIG_INFINIBAND_MTHCA_DEBUG is not set +CONFIG_INFINIBAND_IPOIB=m +# CONFIG_INFINIBAND_IPOIB_DEBUG is not set + +# # Watchdog Cards # CONFIG_WATCHDOG=y @@ -1891,6 +1907,10 @@ CONFIG_CRYPTO_CRC32C=m CONFIG_CRYPTO_TEST=m # +# Hardware crypto devices +# + +# # Library routines # CONFIG_CRC_CCITT=m diff --git a/arch/sparc64/solaris/ioctl.c b/arch/sparc64/solaris/ioctl.c index b0d12032b1e1..cac0a1cf0050 100644 --- a/arch/sparc64/solaris/ioctl.c +++ b/arch/sparc64/solaris/ioctl.c @@ -298,7 +298,7 @@ static inline int solaris_sockmod(unsigned int fd, unsigned int cmd, u32 arg) if (! current->files->fd[fd] || ! current->files->fd[fd]->f_dentry || ! (ino = current->files->fd[fd]->f_dentry->d_inode) || - ! ino->i_sock) { + ! S_ISSOCK(ino->i_mode)) { spin_unlock(¤t->files->file_lock); return TBADF; } @@ -478,7 +478,7 @@ static inline int solaris_S(struct file *filp, unsigned int fd, unsigned int cmd struct module_info *mi; ino = filp->f_dentry->d_inode; - if (! ino->i_sock) + if (!S_ISSOCK(ino->i_mode)) return -EBADF; sock = filp->private_data; if (! sock) { diff --git a/arch/sparc64/solaris/socksys.c b/arch/sparc64/solaris/socksys.c index e16e64d8a6f8..d7c1c76582cc 100644 --- a/arch/sparc64/solaris/socksys.c +++ b/arch/sparc64/solaris/socksys.c @@ -150,7 +150,7 @@ static unsigned int socksys_poll(struct file * filp, poll_table * wait) unsigned int mask = 0; ino=filp->f_dentry->d_inode; - if (ino && ino->i_sock) { + if (ino && S_ISSOCK(ino->i_mode)) { struct sol_socket_struct *sock; sock = (struct sol_socket_struct*)filp->private_data; if (sock && sock->pfirst) { diff --git a/arch/sparc64/solaris/timod.c b/arch/sparc64/solaris/timod.c index 24de5062079f..79d15434d975 100644 --- a/arch/sparc64/solaris/timod.c +++ b/arch/sparc64/solaris/timod.c @@ -853,9 +853,7 @@ asmlinkage int solaris_getmsg(unsigned int fd, u32 arg1, u32 arg2, u32 arg3) if(!filp) goto out; ino = filp->f_dentry->d_inode; - if (!ino) goto out; - - if (!ino->i_sock) + if (!ino || !S_ISSOCK(ino->i_mode)) goto out; ctlptr = (struct strbuf __user *)A(arg1); @@ -923,7 +921,7 @@ asmlinkage int solaris_putmsg(unsigned int fd, u32 arg1, u32 arg2, u32 arg3) ino = filp->f_dentry->d_inode; if (!ino) goto out; - if (!ino->i_sock && + if (!S_ISSOCK(ino->i_mode) && (imajor(ino) != 30 || iminor(ino) != 1)) goto out; diff --git a/drivers/Kconfig b/drivers/Kconfig index 42e7ef5b241e..ed41d9036bfc 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -56,4 +56,6 @@ source "drivers/usb/Kconfig" source "drivers/mmc/Kconfig" +source "drivers/infiniband/Kconfig" + endmenu diff --git a/drivers/Makefile b/drivers/Makefile index fe3956d17e17..3010e5a54108 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -59,5 +59,6 @@ obj-$(CONFIG_MCA) += mca/ obj-$(CONFIG_EISA) += eisa/ obj-$(CONFIG_CPU_FREQ) += cpufreq/ obj-$(CONFIG_MMC) += mmc/ +obj-$(CONFIG_INFINIBAND) += infiniband/ obj-y += firmware/ obj-$(CONFIG_CRYPTO) += crypto/ diff --git a/drivers/atm/ambassador.c b/drivers/atm/ambassador.c index 8a8521cda4e3..924abd2654a0 100644 --- a/drivers/atm/ambassador.c +++ b/drivers/atm/ambassador.c @@ -1692,7 +1692,7 @@ static unsigned int command_timeouts [] = { }; -unsigned int command_successes [] = { +static unsigned int command_successes [] = { [host_memory_test] = COMMAND_PASSED_TEST, [read_adapter_memory] = COMMAND_READ_DATA_OK, [write_adapter_memory] = COMMAND_WRITE_DATA_OK, @@ -2088,7 +2088,7 @@ static void __init amb_ucode_version (amb_dev * dev) { } // swap bits within byte to get Ethernet ordering -u8 bit_swap (u8 byte) +static u8 bit_swap (u8 byte) { const u8 swap[] = { 0x0, 0x8, 0x4, 0xc, diff --git a/drivers/atm/atmtcp.c b/drivers/atm/atmtcp.c index 381e9c18a4fb..436b83a1b618 100644 --- a/drivers/atm/atmtcp.c +++ b/drivers/atm/atmtcp.c @@ -396,7 +396,7 @@ static int atmtcp_create(int itf,int persist,struct atm_dev **result) } -int atmtcp_attach(struct atm_vcc *vcc,int itf) +static int atmtcp_attach(struct atm_vcc *vcc,int itf) { struct atm_dev *dev; @@ -427,13 +427,13 @@ int atmtcp_attach(struct atm_vcc *vcc,int itf) } -int atmtcp_create_persistent(int itf) +static int atmtcp_create_persistent(int itf) { return atmtcp_create(itf,1,NULL); } -int atmtcp_remove_persistent(int itf) +static int atmtcp_remove_persistent(int itf) { struct atm_dev *dev; struct atmtcp_dev_data *dev_data; diff --git a/drivers/atm/firestream.c b/drivers/atm/firestream.c index d6f161d166d5..b5cd8619205e 100644 --- a/drivers/atm/firestream.c +++ b/drivers/atm/firestream.c @@ -82,14 +82,14 @@ static int num=0x5a; * would be interpreted. -- REW */ #define NP FS_NR_FREE_POOLS -int rx_buf_sizes[NP] = {128, 256, 512, 1024, 2048, 4096, 16384, 65520}; +static int rx_buf_sizes[NP] = {128, 256, 512, 1024, 2048, 4096, 16384, 65520}; /* log2: 7 8 9 10 11 12 14 16 */ #if 0 -int rx_pool_sizes[NP] = {1024, 1024, 512, 256, 128, 64, 32, 32}; +static int rx_pool_sizes[NP] = {1024, 1024, 512, 256, 128, 64, 32, 32}; #else /* debug */ -int rx_pool_sizes[NP] = {128, 128, 128, 64, 64, 64, 32, 32}; +static int rx_pool_sizes[NP] = {128, 128, 128, 64, 64, 64, 32, 32}; #endif /* log2: 10 10 9 8 7 6 5 5 */ /* sumlog2: 17 18 18 18 18 18 19 21 */ @@ -250,7 +250,7 @@ struct reginit_item { }; -struct reginit_item PHY_NTC_INIT[] __devinitdata = { +static struct reginit_item PHY_NTC_INIT[] __devinitdata = { { PHY_CLEARALL, 0x40 }, { 0x12, 0x0001 }, { 0x13, 0x7605 }, @@ -334,7 +334,7 @@ module_param(fs_keystream, int, 0); #define func_exit() fs_dprintk (FS_DEBUG_FLOW, "fs: exit %s\n", __FUNCTION__) -struct fs_dev *fs_boards = NULL; +static struct fs_dev *fs_boards = NULL; #ifdef DEBUG @@ -1921,7 +1921,7 @@ static int __devinit firestream_init_one (struct pci_dev *pci_dev, return -ENODEV; } -void __devexit firestream_remove_one (struct pci_dev *pdev) +static void __devexit firestream_remove_one (struct pci_dev *pdev) { int i; struct fs_dev *dev, *nxtdev; diff --git a/drivers/atm/he.c b/drivers/atm/he.c index ccd54a85c5a4..7221439b4937 100644 --- a/drivers/atm/he.c +++ b/drivers/atm/he.c @@ -147,7 +147,7 @@ static u8 read_prom_byte(struct he_dev *he_dev, int addr); /* globals */ -struct he_dev *he_devs = NULL; +static struct he_dev *he_devs = NULL; static int disable64 = 0; static short nvpibits = -1; static short nvcibits = -1; @@ -155,6 +155,48 @@ static short rx_skb_reserve = 16; static int irq_coalesce = 1; static int sdh = 0; +/* Read from EEPROM = 0000 0011b */ +static unsigned int readtab[] = { + CS_HIGH | CLK_HIGH, + CS_LOW | CLK_LOW, + CLK_HIGH, /* 0 */ + CLK_LOW, + CLK_HIGH, /* 0 */ + CLK_LOW, + CLK_HIGH, /* 0 */ + CLK_LOW, + CLK_HIGH, /* 0 */ + CLK_LOW, + CLK_HIGH, /* 0 */ + CLK_LOW, + CLK_HIGH, /* 0 */ + CLK_LOW | SI_HIGH, + CLK_HIGH | SI_HIGH, /* 1 */ + CLK_LOW | SI_HIGH, + CLK_HIGH | SI_HIGH /* 1 */ +}; + +/* Clock to read from/write to the EEPROM */ +static unsigned int clocktab[] = { + CLK_LOW, + CLK_HIGH, + CLK_LOW, + CLK_HIGH, + CLK_LOW, + CLK_HIGH, + CLK_LOW, + CLK_HIGH, + CLK_LOW, + CLK_HIGH, + CLK_LOW, + CLK_HIGH, + CLK_LOW, + CLK_HIGH, + CLK_LOW, + CLK_HIGH, + CLK_LOW +}; + static struct atmdev_ops he_ops = { .open = he_open, diff --git a/drivers/atm/he.h b/drivers/atm/he.h index 7075ef4aa916..1a903859343a 100644 --- a/drivers/atm/he.h +++ b/drivers/atm/he.h @@ -892,47 +892,4 @@ struct he_vcc #define SI_HIGH ID_DIN /* HOST_CNTL_ID_PROM_DATA_IN */ #define EEPROM_DELAY 400 /* microseconds */ -/* Read from EEPROM = 0000 0011b */ -unsigned int readtab[] = { - CS_HIGH | CLK_HIGH, - CS_LOW | CLK_LOW, - CLK_HIGH, /* 0 */ - CLK_LOW, - CLK_HIGH, /* 0 */ - CLK_LOW, - CLK_HIGH, /* 0 */ - CLK_LOW, - CLK_HIGH, /* 0 */ - CLK_LOW, - CLK_HIGH, /* 0 */ - CLK_LOW, - CLK_HIGH, /* 0 */ - CLK_LOW | SI_HIGH, - CLK_HIGH | SI_HIGH, /* 1 */ - CLK_LOW | SI_HIGH, - CLK_HIGH | SI_HIGH /* 1 */ -}; - -/* Clock to read from/write to the EEPROM */ -unsigned int clocktab[] = { - CLK_LOW, - CLK_HIGH, - CLK_LOW, - CLK_HIGH, - CLK_LOW, - CLK_HIGH, - CLK_LOW, - CLK_HIGH, - CLK_LOW, - CLK_HIGH, - CLK_LOW, - CLK_HIGH, - CLK_LOW, - CLK_HIGH, - CLK_LOW, - CLK_HIGH, - CLK_LOW -}; - - #endif /* _HE_H_ */ diff --git a/drivers/atm/idt77105.c b/drivers/atm/idt77105.c index 0747dbadbcf1..9d36cfc175e9 100644 --- a/drivers/atm/idt77105.c +++ b/drivers/atm/idt77105.c @@ -323,7 +323,7 @@ static int idt77105_start(struct atm_dev *dev) } -int idt77105_stop(struct atm_dev *dev) +static int idt77105_stop(struct atm_dev *dev) { struct idt77105_priv *walk, *prev; diff --git a/drivers/atm/idt77105.h b/drivers/atm/idt77105.h index bd542f364ad1..8ba8218aaefe 100644 --- a/drivers/atm/idt77105.h +++ b/drivers/atm/idt77105.h @@ -77,7 +77,6 @@ #ifdef __KERNEL__ int idt77105_init(struct atm_dev *dev) __init; -int idt77105_stop(struct atm_dev *dev); #endif /* diff --git a/drivers/atm/idt77252.h b/drivers/atm/idt77252.h index cfdd15e7d740..544b39738291 100644 --- a/drivers/atm/idt77252.h +++ b/drivers/atm/idt77252.h @@ -275,7 +275,7 @@ struct rsq_info { struct rsq_entry *next; struct rsq_entry *last; dma_addr_t paddr; -} rsq_info; +}; /*****************************************************************************/ diff --git a/drivers/atm/iphase.c b/drivers/atm/iphase.c index 73dd5cc38116..77d48f353623 100644 --- a/drivers/atm/iphase.c +++ b/drivers/atm/iphase.c @@ -72,13 +72,13 @@ struct suni_priv { #define PRIV(dev) ((struct suni_priv *) dev->phy_data) static unsigned char ia_phy_get(struct atm_dev *dev, unsigned long addr); +static void desc_dbg(IADEV *iadev); static IADEV *ia_dev[8]; static struct atm_dev *_ia_dev[8]; static int iadev_count; static void ia_led_timer(unsigned long arg); static struct timer_list ia_timer = TIMER_INITIALIZER(ia_led_timer, 0, 0); -struct atm_vcc *vcc_close_que[100]; static int IA_TX_BUF = DFL_TX_BUFFERS, IA_TX_BUF_SZ = DFL_TX_BUF_SZ; static int IA_RX_BUF = DFL_RX_BUFFERS, IA_RX_BUF_SZ = DFL_RX_BUF_SZ; static uint IADebugFlag = /* IF_IADBG_ERR | IF_IADBG_CBR| IF_IADBG_INIT_ADAPTER @@ -147,7 +147,6 @@ static void ia_hack_tcq(IADEV *dev) { u_short desc1; u_short tcq_wr; struct ia_vcc *iavcc_r = NULL; - extern void desc_dbg(IADEV *iadev); tcq_wr = readl(dev->seg_reg+TCQ_WR_PTR) & 0xffff; while (dev->host_tcq_wr != tcq_wr) { @@ -187,7 +186,6 @@ static u16 get_desc (IADEV *dev, struct ia_vcc *iavcc) { unsigned long delta; static unsigned long timer = 0; int ltimeout; - extern void desc_dbg(IADEV *iadev); ia_hack_tcq (dev); if(((jiffies - timer)>50)||((dev->ffL.tcq_rd==dev->host_tcq_wr))){ @@ -644,7 +642,7 @@ static int ia_que_tx (IADEV *iadev) { return 0; } -void ia_tx_poll (IADEV *iadev) { +static void ia_tx_poll (IADEV *iadev) { struct atm_vcc *vcc = NULL; struct sk_buff *skb = NULL, *skb1 = NULL; struct ia_vcc *iavcc; @@ -861,7 +859,7 @@ static void IaFrontEndIntr(IADEV *iadev) { return; } -void ia_mb25_init (IADEV *iadev) +static void ia_mb25_init (IADEV *iadev) { volatile ia_mb25_t *mb25 = (ia_mb25_t*)iadev->phy; #if 0 @@ -876,7 +874,7 @@ void ia_mb25_init (IADEV *iadev) return; } -void ia_suni_pm7345_init (IADEV *iadev) +static void ia_suni_pm7345_init (IADEV *iadev) { volatile suni_pm7345_t *suni_pm7345 = (suni_pm7345_t *)iadev->phy; if (iadev->phy_type & FE_DS3_PHY) @@ -959,9 +957,8 @@ void ia_suni_pm7345_init (IADEV *iadev) /***************************** IA_LIB END *****************************/ -/* pwang_test debug utility */ -int tcnter = 0, rcnter = 0; -void xdump( u_char* cp, int length, char* prefix ) +static int tcnter = 0; +static void xdump( u_char* cp, int length, char* prefix ) { int col, count; u_char prntBuf[120]; @@ -1008,7 +1005,7 @@ static struct atm_dev *ia_boards = NULL; /*-- some utilities and memory allocation stuff will come here -------------*/ -void desc_dbg(IADEV *iadev) { +static void desc_dbg(IADEV *iadev) { u_short tcq_wr_ptr, tcq_st_ptr, tcq_ed_ptr; u32 i; diff --git a/drivers/atm/iphase.h b/drivers/atm/iphase.h index dc6ff869173d..b8d0bd4d6c30 100644 --- a/drivers/atm/iphase.h +++ b/drivers/atm/iphase.h @@ -1126,8 +1126,6 @@ typedef struct { #define FE_DS3_PHY 0x0080 /* DS3 */ #define FE_E3_PHY 0x0090 /* E3 */ -extern void ia_mb25_init (IADEV *); - /*********************** SUNI_PM7345 PHY DEFINE HERE *********************/ typedef struct _suni_pm7345_t { @@ -1326,8 +1324,6 @@ typedef struct _suni_pm7345_t #define SUNI_DS3_FOVRI 0x02 /* FIFO overrun */ #define SUNI_DS3_FUDRI 0x01 /* FIFO underrun */ -extern void ia_suni_pm7345_init (IADEV *iadev); - ///////////////////SUNI_PM7345 PHY DEFINE END ///////////////////////////// /* ia_eeprom define*/ diff --git a/drivers/atm/nicstarmac.c b/drivers/atm/nicstarmac.c index 7a842ef71d92..c9cb962bb300 100644 --- a/drivers/atm/nicstarmac.c +++ b/drivers/atm/nicstarmac.c @@ -35,6 +35,7 @@ #define SI_LOW 0x0000 /* Serial input data low */ /* Read Status Register = 0000 0101b */ +#if 0 static u_int32_t rdsrtab[] = { CS_HIGH | CLK_HIGH, @@ -55,6 +56,7 @@ static u_int32_t rdsrtab[] = CLK_LOW | SI_HIGH, CLK_HIGH | SI_HIGH /* 1 */ }; +#endif /* 0 */ /* Read from EEPROM = 0000 0011b */ @@ -117,7 +119,7 @@ static u_int32_t clocktab[] = * eeprom, then pull the result from bit 16 of the NicSTaR's General Purpose * register. */ - +#if 0 u_int32_t nicstar_read_eprom_status( virt_addr_t base ) { @@ -153,6 +155,7 @@ nicstar_read_eprom_status( virt_addr_t base ) osp_MicroDelay( CYCLE_DELAY ); return rbyte; } +#endif /* 0 */ /* diff --git a/drivers/atm/nicstarmac.h b/drivers/atm/nicstarmac.h index 80beec52f1e4..9c95dfe95cc5 100644 --- a/drivers/atm/nicstarmac.h +++ b/drivers/atm/nicstarmac.h @@ -9,6 +9,5 @@ typedef void __iomem *virt_addr_t; -u_int32_t nicstar_read_eprom_status( virt_addr_t base ); void nicstar_init_eprom( virt_addr_t base ); void nicstar_read_eprom( virt_addr_t, u_int8_t, u_int8_t *, u_int32_t); diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig new file mode 100644 index 000000000000..3cc3ff0cccb1 --- /dev/null +++ b/drivers/infiniband/Kconfig @@ -0,0 +1,14 @@ +menu "InfiniBand support" + +config INFINIBAND + tristate "InfiniBand support" + ---help--- + Core support for InfiniBand (IB). Make sure to also select + any protocols you wish to use as well as drivers for your + InfiniBand hardware. + +source "drivers/infiniband/hw/mthca/Kconfig" + +source "drivers/infiniband/ulp/ipoib/Kconfig" + +endmenu diff --git a/drivers/infiniband/Makefile b/drivers/infiniband/Makefile new file mode 100644 index 000000000000..d256cf798218 --- /dev/null +++ b/drivers/infiniband/Makefile @@ -0,0 +1,3 @@ +obj-$(CONFIG_INFINIBAND) += core/ +obj-$(CONFIG_INFINIBAND_MTHCA) += hw/mthca/ +obj-$(CONFIG_INFINIBAND_IPOIB) += ulp/ipoib/ diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile new file mode 100644 index 000000000000..d2dbfb52c0a3 --- /dev/null +++ b/drivers/infiniband/core/Makefile @@ -0,0 +1,12 @@ +EXTRA_CFLAGS += -Idrivers/infiniband/include + +obj-$(CONFIG_INFINIBAND) += ib_core.o ib_mad.o ib_sa.o ib_umad.o + +ib_core-y := packer.o ud_header.o verbs.o sysfs.o \ + device.o fmr_pool.o cache.o + +ib_mad-y := mad.o smi.o agent.o + +ib_sa-y := sa_query.o + +ib_umad-y := user_mad.o diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c new file mode 100644 index 000000000000..49f4f9f92506 --- /dev/null +++ b/drivers/infiniband/core/agent.c @@ -0,0 +1,399 @@ +/* + * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2004 Infinicon Corporation. All rights reserved. + * Copyright (c) 2004 Intel Corporation. All rights reserved. + * Copyright (c) 2004 Topspin Corporation. All rights reserved. + * Copyright (c) 2004 Voltaire Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: agent.c 1389 2004-12-27 22:56:47Z roland $ + */ + +#include <linux/dma-mapping.h> + +#include <asm/bug.h> + +#include <ib_smi.h> + +#include "smi.h" +#include "agent_priv.h" +#include "mad_priv.h" + + +spinlock_t ib_agent_port_list_lock; +static LIST_HEAD(ib_agent_port_list); + +extern kmem_cache_t *ib_mad_cache; + + +/* + * Caller must hold ib_agent_port_list_lock + */ +static inline struct ib_agent_port_private * +__ib_get_agent_port(struct ib_device *device, int port_num, + struct ib_mad_agent *mad_agent) +{ + struct ib_agent_port_private *entry; + + BUG_ON(!(!!device ^ !!mad_agent)); /* Exactly one MUST be (!NULL) */ + + if (device) { + list_for_each_entry(entry, &ib_agent_port_list, port_list) { + if (entry->dr_smp_agent->device == device && + entry->port_num == port_num) + return entry; + } + } else { + list_for_each_entry(entry, &ib_agent_port_list, port_list) { + if ((entry->dr_smp_agent == mad_agent) || + (entry->lr_smp_agent == mad_agent) || + (entry->perf_mgmt_agent == mad_agent)) + return entry; + } + } + return NULL; +} + +static inline struct ib_agent_port_private * +ib_get_agent_port(struct ib_device *device, int port_num, + struct ib_mad_agent *mad_agent) +{ + struct ib_agent_port_private *entry; + unsigned long flags; + + spin_lock_irqsave(&ib_agent_port_list_lock, flags); + entry = __ib_get_agent_port(device, port_num, mad_agent); + spin_unlock_irqrestore(&ib_agent_port_list_lock, flags); + + return entry; +} + +int smi_check_local_dr_smp(struct ib_smp *smp, + struct ib_device *device, + int port_num) +{ + struct ib_agent_port_private *port_priv; + + if (smp->mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) + return 1; + port_priv = ib_get_agent_port(device, port_num, NULL); + if (!port_priv) { + printk(KERN_DEBUG SPFX "smi_check_local_dr_smp %s port %d " + "not open\n", + device->name, port_num); + return 1; + } + + return smi_check_local_smp(port_priv->dr_smp_agent, smp); +} + +static int agent_mad_send(struct ib_mad_agent *mad_agent, + struct ib_agent_port_private *port_priv, + struct ib_mad_private *mad_priv, + struct ib_grh *grh, + struct ib_wc *wc) +{ + struct ib_agent_send_wr *agent_send_wr; + struct ib_sge gather_list; + struct ib_send_wr send_wr; + struct ib_send_wr *bad_send_wr; + struct ib_ah_attr ah_attr; + unsigned long flags; + int ret = 1; + + agent_send_wr = kmalloc(sizeof(*agent_send_wr), GFP_KERNEL); + if (!agent_send_wr) + goto out; + agent_send_wr->mad = mad_priv; + + /* PCI mapping */ + gather_list.addr = dma_map_single(mad_agent->device->dma_device, + &mad_priv->mad, + sizeof(mad_priv->mad), + DMA_TO_DEVICE); + gather_list.length = sizeof(mad_priv->mad); + gather_list.lkey = (*port_priv->mr).lkey; + + send_wr.next = NULL; + send_wr.opcode = IB_WR_SEND; + send_wr.sg_list = &gather_list; + send_wr.num_sge = 1; + send_wr.wr.ud.remote_qpn = wc->src_qp; /* DQPN */ + send_wr.wr.ud.timeout_ms = 0; + send_wr.send_flags = IB_SEND_SIGNALED | IB_SEND_SOLICITED; + + ah_attr.dlid = wc->slid; + ah_attr.port_num = mad_agent->port_num; + ah_attr.src_path_bits = wc->dlid_path_bits; + ah_attr.sl = wc->sl; + ah_attr.static_rate = 0; + ah_attr.ah_flags = 0; /* No GRH */ + if (mad_priv->mad.mad.mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT) { + if (wc->wc_flags & IB_WC_GRH) { + ah_attr.ah_flags = IB_AH_GRH; + /* Should sgid be looked up ? */ + ah_attr.grh.sgid_index = 0; + ah_attr.grh.hop_limit = grh->hop_limit; + ah_attr.grh.flow_label = be32_to_cpup( + &grh->version_tclass_flow) & 0xfffff; + ah_attr.grh.traffic_class = (be32_to_cpup( + &grh->version_tclass_flow) >> 20) & 0xff; + memcpy(ah_attr.grh.dgid.raw, + grh->sgid.raw, + sizeof(ah_attr.grh.dgid)); + } + } + + agent_send_wr->ah = ib_create_ah(mad_agent->qp->pd, &ah_attr); + if (IS_ERR(agent_send_wr->ah)) { + printk(KERN_ERR SPFX "No memory for address handle\n"); + kfree(agent_send_wr); + goto out; + } + + send_wr.wr.ud.ah = agent_send_wr->ah; + if (mad_priv->mad.mad.mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT) { + send_wr.wr.ud.pkey_index = wc->pkey_index; + send_wr.wr.ud.remote_qkey = IB_QP1_QKEY; + } else { /* for SMPs */ + send_wr.wr.ud.pkey_index = 0; + send_wr.wr.ud.remote_qkey = 0; + } + send_wr.wr.ud.mad_hdr = &mad_priv->mad.mad.mad_hdr; + send_wr.wr_id = (unsigned long)agent_send_wr; + + pci_unmap_addr_set(agent_send_wr, mapping, gather_list.addr); + + /* Send */ + spin_lock_irqsave(&port_priv->send_list_lock, flags); + if (ib_post_send_mad(mad_agent, &send_wr, &bad_send_wr)) { + spin_unlock_irqrestore(&port_priv->send_list_lock, flags); + dma_unmap_single(mad_agent->device->dma_device, + pci_unmap_addr(agent_send_wr, mapping), + sizeof(mad_priv->mad), + DMA_TO_DEVICE); + ib_destroy_ah(agent_send_wr->ah); + kfree(agent_send_wr); + } else { + list_add_tail(&agent_send_wr->send_list, + &port_priv->send_posted_list); + spin_unlock_irqrestore(&port_priv->send_list_lock, flags); + ret = 0; + } + +out: + return ret; +} + +int agent_send(struct ib_mad_private *mad, + struct ib_grh *grh, + struct ib_wc *wc, + struct ib_device *device, + int port_num) +{ + struct ib_agent_port_private *port_priv; + struct ib_mad_agent *mad_agent; + + port_priv = ib_get_agent_port(device, port_num, NULL); + if (!port_priv) { + printk(KERN_DEBUG SPFX "agent_send %s port %d not open\n", + device->name, port_num); + return 1; + } + + /* Get mad agent based on mgmt_class in MAD */ + switch (mad->mad.mad.mad_hdr.mgmt_class) { + case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE: + mad_agent = port_priv->dr_smp_agent; + break; + case IB_MGMT_CLASS_SUBN_LID_ROUTED: + mad_agent = port_priv->lr_smp_agent; + break; + case IB_MGMT_CLASS_PERF_MGMT: + mad_agent = port_priv->perf_mgmt_agent; + break; + default: + return 1; + } + + return agent_mad_send(mad_agent, port_priv, mad, grh, wc); +} + +static void agent_send_handler(struct ib_mad_agent *mad_agent, + struct ib_mad_send_wc *mad_send_wc) +{ + struct ib_agent_port_private *port_priv; + struct ib_agent_send_wr *agent_send_wr; + unsigned long flags; + + /* Find matching MAD agent */ + port_priv = ib_get_agent_port(NULL, 0, mad_agent); + if (!port_priv) { + printk(KERN_ERR SPFX "agent_send_handler: no matching MAD " + "agent %p\n", mad_agent); + return; + } + + agent_send_wr = (struct ib_agent_send_wr *)(unsigned long)mad_send_wc->wr_id; + spin_lock_irqsave(&port_priv->send_list_lock, flags); + /* Remove completed send from posted send MAD list */ + list_del(&agent_send_wr->send_list); + spin_unlock_irqrestore(&port_priv->send_list_lock, flags); + + /* Unmap PCI */ + dma_unmap_single(mad_agent->device->dma_device, + pci_unmap_addr(agent_send_wr, mapping), + sizeof(agent_send_wr->mad->mad), + DMA_TO_DEVICE); + + ib_destroy_ah(agent_send_wr->ah); + + /* Release allocated memory */ + kmem_cache_free(ib_mad_cache, agent_send_wr->mad); + kfree(agent_send_wr); +} + +int ib_agent_port_open(struct ib_device *device, int port_num) +{ + int ret; + struct ib_agent_port_private *port_priv; + struct ib_mad_reg_req reg_req; + unsigned long flags; + + /* First, check if port already open for SMI */ + port_priv = ib_get_agent_port(device, port_num, NULL); + if (port_priv) { + printk(KERN_DEBUG SPFX "%s port %d already open\n", + device->name, port_num); + return 0; + } + + /* Create new device info */ + port_priv = kmalloc(sizeof *port_priv, GFP_KERNEL); + if (!port_priv) { + printk(KERN_ERR SPFX "No memory for ib_agent_port_private\n"); + ret = -ENOMEM; + goto error1; + } + + memset(port_priv, 0, sizeof *port_priv); + port_priv->port_num = port_num; + spin_lock_init(&port_priv->send_list_lock); + INIT_LIST_HEAD(&port_priv->send_posted_list); + + /* Obtain MAD agent for directed route SM class */ + reg_req.mgmt_class = IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE; + reg_req.mgmt_class_version = 1; + + port_priv->dr_smp_agent = ib_register_mad_agent(device, port_num, + IB_QPT_SMI, + NULL, 0, + &agent_send_handler, + NULL, NULL); + + if (IS_ERR(port_priv->dr_smp_agent)) { + ret = PTR_ERR(port_priv->dr_smp_agent); + goto error2; + } + + /* Obtain MAD agent for LID routed SM class */ + reg_req.mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; + port_priv->lr_smp_agent = ib_register_mad_agent(device, port_num, + IB_QPT_SMI, + NULL, 0, + &agent_send_handler, + NULL, NULL); + if (IS_ERR(port_priv->lr_smp_agent)) { + ret = PTR_ERR(port_priv->lr_smp_agent); + goto error3; + } + + /* Obtain MAD agent for PerfMgmt class */ + reg_req.mgmt_class = IB_MGMT_CLASS_PERF_MGMT; + port_priv->perf_mgmt_agent = ib_register_mad_agent(device, port_num, + IB_QPT_GSI, + NULL, 0, + &agent_send_handler, + NULL, NULL); + if (IS_ERR(port_priv->perf_mgmt_agent)) { + ret = PTR_ERR(port_priv->perf_mgmt_agent); + goto error4; + } + + port_priv->mr = ib_get_dma_mr(port_priv->dr_smp_agent->qp->pd, + IB_ACCESS_LOCAL_WRITE); + if (IS_ERR(port_priv->mr)) { + printk(KERN_ERR SPFX "Couldn't get DMA MR\n"); + ret = PTR_ERR(port_priv->mr); + goto error5; + } + + spin_lock_irqsave(&ib_agent_port_list_lock, flags); + list_add_tail(&port_priv->port_list, &ib_agent_port_list); + spin_unlock_irqrestore(&ib_agent_port_list_lock, flags); + + return 0; + +error5: + ib_unregister_mad_agent(port_priv->perf_mgmt_agent); +error4: + ib_unregister_mad_agent(port_priv->lr_smp_agent); +error3: + ib_unregister_mad_agent(port_priv->dr_smp_agent); +error2: + kfree(port_priv); +error1: + return ret; +} + +int ib_agent_port_close(struct ib_device *device, int port_num) +{ + struct ib_agent_port_private *port_priv; + unsigned long flags; + + spin_lock_irqsave(&ib_agent_port_list_lock, flags); + port_priv = __ib_get_agent_port(device, port_num, NULL); + if (port_priv == NULL) { + spin_unlock_irqrestore(&ib_agent_port_list_lock, flags); + printk(KERN_ERR SPFX "Port %d not found\n", port_num); + return -ENODEV; + } + list_del(&port_priv->port_list); + spin_unlock_irqrestore(&ib_agent_port_list_lock, flags); + + ib_dereg_mr(port_priv->mr); + + ib_unregister_mad_agent(port_priv->perf_mgmt_agent); + ib_unregister_mad_agent(port_priv->lr_smp_agent); + ib_unregister_mad_agent(port_priv->dr_smp_agent); + kfree(port_priv); + + return 0; +} diff --git a/drivers/infiniband/core/agent.h b/drivers/infiniband/core/agent.h new file mode 100644 index 000000000000..d9426842254a --- /dev/null +++ b/drivers/infiniband/core/agent.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2004 Infinicon Corporation. All rights reserved. + * Copyright (c) 2004 Intel Corporation. All rights reserved. + * Copyright (c) 2004 Topspin Corporation. All rights reserved. + * Copyright (c) 2004 Voltaire Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: agent.h 1389 2004-12-27 22:56:47Z roland $ + */ + +#ifndef __AGENT_H_ +#define __AGENT_H_ + +extern spinlock_t ib_agent_port_list_lock; + +extern int ib_agent_port_open(struct ib_device *device, + int port_num); + +extern int ib_agent_port_close(struct ib_device *device, int port_num); + +extern int agent_send(struct ib_mad_private *mad, + struct ib_grh *grh, + struct ib_wc *wc, + struct ib_device *device, + int port_num); + +#endif /* __AGENT_H_ */ diff --git a/drivers/infiniband/core/agent_priv.h b/drivers/infiniband/core/agent_priv.h new file mode 100644 index 000000000000..3b63338f12af --- /dev/null +++ b/drivers/infiniband/core/agent_priv.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2004 Infinicon Corporation. All rights reserved. + * Copyright (c) 2004 Intel Corporation. All rights reserved. + * Copyright (c) 2004 Topspin Corporation. All rights reserved. + * Copyright (c) 2004 Voltaire Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: agent_priv.h 1389 2004-12-27 22:56:47Z roland $ + */ + +#ifndef __IB_AGENT_PRIV_H__ +#define __IB_AGENT_PRIV_H__ + +#include <linux/pci.h> + +#define SPFX "ib_agent: " + +struct ib_agent_send_wr { + struct list_head send_list; + struct ib_ah *ah; + struct ib_mad_private *mad; + DECLARE_PCI_UNMAP_ADDR(mapping) +}; + +struct ib_agent_port_private { + struct list_head port_list; + struct list_head send_posted_list; + spinlock_t send_list_lock; + int port_num; + struct ib_mad_agent *dr_smp_agent; /* DR SM class */ + struct ib_mad_agent *lr_smp_agent; /* LR SM class */ + struct ib_mad_agent *perf_mgmt_agent; /* PerfMgmt class */ + struct ib_mr *mr; +}; + +#endif /* __IB_AGENT_PRIV_H__ */ diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c new file mode 100644 index 000000000000..c57ff7d46ce4 --- /dev/null +++ b/drivers/infiniband/core/cache.c @@ -0,0 +1,328 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: cache.c 1349 2004-12-16 21:09:43Z roland $ + */ + +#include <linux/version.h> +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/slab.h> + +#include "core_priv.h" + +struct ib_pkey_cache { + int table_len; + u16 table[0]; +}; + +struct ib_gid_cache { + int table_len; + union ib_gid table[0]; +}; + +struct ib_update_work { + struct work_struct work; + struct ib_device *device; + u8 port_num; +}; + +static inline int start_port(struct ib_device *device) +{ + return device->node_type == IB_NODE_SWITCH ? 0 : 1; +} + +static inline int end_port(struct ib_device *device) +{ + return device->node_type == IB_NODE_SWITCH ? 0 : device->phys_port_cnt; +} + +int ib_cached_gid_get(struct ib_device *device, + u8 port, + int index, + union ib_gid *gid) +{ + struct ib_gid_cache *cache; + unsigned long flags; + int ret = 0; + + if (port < start_port(device) || port > end_port(device)) + return -EINVAL; + + read_lock_irqsave(&device->cache.lock, flags); + + cache = device->cache.gid_cache[port - start_port(device)]; + + if (index < 0 || index >= cache->table_len) + ret = -EINVAL; + else + *gid = cache->table[index]; + + read_unlock_irqrestore(&device->cache.lock, flags); + + return ret; +} +EXPORT_SYMBOL(ib_cached_gid_get); + +int ib_cached_pkey_get(struct ib_device *device, + u8 port, + int index, + u16 *pkey) +{ + struct ib_pkey_cache *cache; + unsigned long flags; + int ret = 0; + + if (port < start_port(device) || port > end_port(device)) + return -EINVAL; + + read_lock_irqsave(&device->cache.lock, flags); + + cache = device->cache.pkey_cache[port - start_port(device)]; + + if (index < 0 || index >= cache->table_len) + ret = -EINVAL; + else + *pkey = cache->table[index]; + + read_unlock_irqrestore(&device->cache.lock, flags); + + return ret; +} +EXPORT_SYMBOL(ib_cached_pkey_get); + +int ib_cached_pkey_find(struct ib_device *device, + u8 port, + u16 pkey, + u16 *index) +{ + struct ib_pkey_cache *cache; + unsigned long flags; + int i; + int ret = -ENOENT; + + if (port < start_port(device) || port > end_port(device)) + return -EINVAL; + + read_lock_irqsave(&device->cache.lock, flags); + + cache = device->cache.pkey_cache[port - start_port(device)]; + + *index = -1; + + for (i = 0; i < cache->table_len; ++i) + if ((cache->table[i] & 0x7fff) == (pkey & 0x7fff)) { + *index = i; + ret = 0; + break; + } + + read_unlock_irqrestore(&device->cache.lock, flags); + + return ret; +} +EXPORT_SYMBOL(ib_cached_pkey_find); + +static void ib_cache_update(struct ib_device *device, + u8 port) +{ + struct ib_port_attr *tprops = NULL; + struct ib_pkey_cache *pkey_cache = NULL, *old_pkey_cache; + struct ib_gid_cache *gid_cache = NULL, *old_gid_cache; + int i; + int ret; + + tprops = kmalloc(sizeof *tprops, GFP_KERNEL); + if (!tprops) + return; + + ret = ib_query_port(device, port, tprops); + if (ret) { + printk(KERN_WARNING "ib_query_port failed (%d) for %s\n", + ret, device->name); + goto err; + } + + pkey_cache = kmalloc(sizeof *pkey_cache + tprops->pkey_tbl_len * + sizeof *pkey_cache->table, GFP_KERNEL); + if (!pkey_cache) + goto err; + + pkey_cache->table_len = tprops->pkey_tbl_len; + + gid_cache = kmalloc(sizeof *gid_cache + tprops->gid_tbl_len * + sizeof *gid_cache->table, GFP_KERNEL); + if (!gid_cache) + goto err; + + gid_cache->table_len = tprops->gid_tbl_len; + + for (i = 0; i < pkey_cache->table_len; ++i) { + ret = ib_query_pkey(device, port, i, pkey_cache->table + i); + if (ret) { + printk(KERN_WARNING "ib_query_pkey failed (%d) for %s (index %d)\n", + ret, device->name, i); + goto err; + } + } + + for (i = 0; i < gid_cache->table_len; ++i) { + ret = ib_query_gid(device, port, i, gid_cache->table + i); + if (ret) { + printk(KERN_WARNING "ib_query_gid failed (%d) for %s (index %d)\n", + ret, device->name, i); + goto err; + } + } + + write_lock_irq(&device->cache.lock); + + old_pkey_cache = device->cache.pkey_cache[port - start_port(device)]; + old_gid_cache = device->cache.gid_cache [port - start_port(device)]; + + device->cache.pkey_cache[port - start_port(device)] = pkey_cache; + device->cache.gid_cache [port - start_port(device)] = gid_cache; + + write_unlock_irq(&device->cache.lock); + + kfree(old_pkey_cache); + kfree(old_gid_cache); + kfree(tprops); + return; + +err: + kfree(pkey_cache); + kfree(gid_cache); + kfree(tprops); +} + +static void ib_cache_task(void *work_ptr) +{ + struct ib_update_work *work = work_ptr; + + ib_cache_update(work->device, work->port_num); + kfree(work); +} + +static void ib_cache_event(struct ib_event_handler *handler, + struct ib_event *event) +{ + struct ib_update_work *work; + + if (event->event == IB_EVENT_PORT_ERR || + event->event == IB_EVENT_PORT_ACTIVE || + event->event == IB_EVENT_LID_CHANGE || + event->event == IB_EVENT_PKEY_CHANGE || + event->event == IB_EVENT_SM_CHANGE) { + work = kmalloc(sizeof *work, GFP_ATOMIC); + if (work) { + INIT_WORK(&work->work, ib_cache_task, work); + work->device = event->device; + work->port_num = event->element.port_num; + schedule_work(&work->work); + } + } +} + +void ib_cache_setup_one(struct ib_device *device) +{ + int p; + + rwlock_init(&device->cache.lock); + + device->cache.pkey_cache = + kmalloc(sizeof *device->cache.pkey_cache * + (end_port(device) - start_port(device) + 1), GFP_KERNEL); + device->cache.gid_cache = + kmalloc(sizeof *device->cache.pkey_cache * + (end_port(device) - start_port(device) + 1), GFP_KERNEL); + + if (!device->cache.pkey_cache || !device->cache.gid_cache) { + printk(KERN_WARNING "Couldn't allocate cache " + "for %s\n", device->name); + goto err; + } + + for (p = 0; p <= end_port(device) - start_port(device); ++p) { + device->cache.pkey_cache[p] = NULL; + device->cache.gid_cache [p] = NULL; + ib_cache_update(device, p + start_port(device)); + } + + INIT_IB_EVENT_HANDLER(&device->cache.event_handler, + device, ib_cache_event); + if (ib_register_event_handler(&device->cache.event_handler)) + goto err_cache; + + return; + +err_cache: + for (p = 0; p <= end_port(device) - start_port(device); ++p) { + kfree(device->cache.pkey_cache[p]); + kfree(device->cache.gid_cache[p]); + } + +err: + kfree(device->cache.pkey_cache); + kfree(device->cache.gid_cache); +} + +void ib_cache_cleanup_one(struct ib_device *device) +{ + int p; + + ib_unregister_event_handler(&device->cache.event_handler); + flush_scheduled_work(); + + for (p = 0; p <= end_port(device) - start_port(device); ++p) { + kfree(device->cache.pkey_cache[p]); + kfree(device->cache.gid_cache[p]); + } + + kfree(device->cache.pkey_cache); + kfree(device->cache.gid_cache); +} + +struct ib_client cache_client = { + .name = "cache", + .add = ib_cache_setup_one, + .remove = ib_cache_cleanup_one +}; + +int __init ib_cache_setup(void) +{ + return ib_register_client(&cache_client); +} + +void __exit ib_cache_cleanup(void) +{ + ib_unregister_client(&cache_client); +} diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h new file mode 100644 index 000000000000..797049626ff6 --- /dev/null +++ b/drivers/infiniband/core/core_priv.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: core_priv.h 1349 2004-12-16 21:09:43Z roland $ + */ + +#ifndef _CORE_PRIV_H +#define _CORE_PRIV_H + +#include <linux/list.h> +#include <linux/spinlock.h> + +#include <ib_verbs.h> + +int ib_device_register_sysfs(struct ib_device *device); +void ib_device_unregister_sysfs(struct ib_device *device); + +int ib_sysfs_setup(void); +void ib_sysfs_cleanup(void); + +int ib_cache_setup(void); +void ib_cache_cleanup(void); + +#endif /* _CORE_PRIV_H */ diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c new file mode 100644 index 000000000000..9197e92d708a --- /dev/null +++ b/drivers/infiniband/core/device.c @@ -0,0 +1,614 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: device.c 1349 2004-12-16 21:09:43Z roland $ + */ + +#include <linux/module.h> +#include <linux/string.h> +#include <linux/errno.h> +#include <linux/slab.h> +#include <linux/init.h> + +#include <asm/semaphore.h> + +#include "core_priv.h" + +MODULE_AUTHOR("Roland Dreier"); +MODULE_DESCRIPTION("core kernel InfiniBand API"); +MODULE_LICENSE("Dual BSD/GPL"); + +struct ib_client_data { + struct list_head list; + struct ib_client *client; + void * data; +}; + +static LIST_HEAD(device_list); +static LIST_HEAD(client_list); + +/* + * device_sem protects access to both device_list and client_list. + * There's no real point to using multiple locks or something fancier + * like an rwsem: we always access both lists, and we're always + * modifying one list or the other list. In any case this is not a + * hot path so there's no point in trying to optimize. + */ +static DECLARE_MUTEX(device_sem); + +static int ib_device_check_mandatory(struct ib_device *device) +{ +#define IB_MANDATORY_FUNC(x) { offsetof(struct ib_device, x), #x } + static const struct { + size_t offset; + char *name; + } mandatory_table[] = { + IB_MANDATORY_FUNC(query_device), + IB_MANDATORY_FUNC(query_port), + IB_MANDATORY_FUNC(query_pkey), + IB_MANDATORY_FUNC(query_gid), + IB_MANDATORY_FUNC(alloc_pd), + IB_MANDATORY_FUNC(dealloc_pd), + IB_MANDATORY_FUNC(create_ah), + IB_MANDATORY_FUNC(destroy_ah), + IB_MANDATORY_FUNC(create_qp), + IB_MANDATORY_FUNC(modify_qp), + IB_MANDATORY_FUNC(destroy_qp), + IB_MANDATORY_FUNC(post_send), + IB_MANDATORY_FUNC(post_recv), + IB_MANDATORY_FUNC(create_cq), + IB_MANDATORY_FUNC(destroy_cq), + IB_MANDATORY_FUNC(poll_cq), + IB_MANDATORY_FUNC(req_notify_cq), + IB_MANDATORY_FUNC(get_dma_mr), + IB_MANDATORY_FUNC(dereg_mr) + }; + int i; + + for (i = 0; i < sizeof mandatory_table / sizeof mandatory_table[0]; ++i) { + if (!*(void **) ((void *) device + mandatory_table[i].offset)) { + printk(KERN_WARNING "Device %s is missing mandatory function %s\n", + device->name, mandatory_table[i].name); + return -EINVAL; + } + } + + return 0; +} + +static struct ib_device *__ib_device_get_by_name(const char *name) +{ + struct ib_device *device; + + list_for_each_entry(device, &device_list, core_list) + if (!strncmp(name, device->name, IB_DEVICE_NAME_MAX)) + return device; + + return NULL; +} + + +static int alloc_name(char *name) +{ + long *inuse; + char buf[IB_DEVICE_NAME_MAX]; + struct ib_device *device; + int i; + + inuse = (long *) get_zeroed_page(GFP_KERNEL); + if (!inuse) + return -ENOMEM; + + list_for_each_entry(device, &device_list, core_list) { + if (!sscanf(device->name, name, &i)) + continue; + if (i < 0 || i >= PAGE_SIZE * 8) + continue; + snprintf(buf, sizeof buf, name, i); + if (!strncmp(buf, device->name, IB_DEVICE_NAME_MAX)) + set_bit(i, inuse); + } + + i = find_first_zero_bit(inuse, PAGE_SIZE * 8); + free_page((unsigned long) inuse); + snprintf(buf, sizeof buf, name, i); + + if (__ib_device_get_by_name(buf)) + return -ENFILE; + + strlcpy(name, buf, IB_DEVICE_NAME_MAX); + return 0; +} + +/** + * ib_alloc_device - allocate an IB device struct + * @size:size of structure to allocate + * + * Low-level drivers should use ib_alloc_device() to allocate &struct + * ib_device. @size is the size of the structure to be allocated, + * including any private data used by the low-level driver. + * ib_dealloc_device() must be used to free structures allocated with + * ib_alloc_device(). + */ +struct ib_device *ib_alloc_device(size_t size) +{ + void *dev; + + BUG_ON(size < sizeof (struct ib_device)); + + dev = kmalloc(size, GFP_KERNEL); + if (!dev) + return NULL; + + memset(dev, 0, size); + + return dev; +} +EXPORT_SYMBOL(ib_alloc_device); + +/** + * ib_dealloc_device - free an IB device struct + * @device:structure to free + * + * Free a structure allocated with ib_alloc_device(). + */ +void ib_dealloc_device(struct ib_device *device) +{ + if (device->reg_state == IB_DEV_UNINITIALIZED) { + kfree(device); + return; + } + + BUG_ON(device->reg_state != IB_DEV_UNREGISTERED); + + ib_device_unregister_sysfs(device); +} +EXPORT_SYMBOL(ib_dealloc_device); + +static int add_client_context(struct ib_device *device, struct ib_client *client) +{ + struct ib_client_data *context; + unsigned long flags; + + context = kmalloc(sizeof *context, GFP_KERNEL); + if (!context) { + printk(KERN_WARNING "Couldn't allocate client context for %s/%s\n", + device->name, client->name); + return -ENOMEM; + } + + context->client = client; + context->data = NULL; + + spin_lock_irqsave(&device->client_data_lock, flags); + list_add(&context->list, &device->client_data_list); + spin_unlock_irqrestore(&device->client_data_lock, flags); + + return 0; +} + +/** + * ib_register_device - Register an IB device with IB core + * @device:Device to register + * + * Low-level drivers use ib_register_device() to register their + * devices with the IB core. All registered clients will receive a + * callback for each device that is added. @device must be allocated + * with ib_alloc_device(). + */ +int ib_register_device(struct ib_device *device) +{ + int ret; + + down(&device_sem); + + if (strchr(device->name, '%')) { + ret = alloc_name(device->name); + if (ret) + goto out; + } + + if (ib_device_check_mandatory(device)) { + ret = -EINVAL; + goto out; + } + + INIT_LIST_HEAD(&device->event_handler_list); + INIT_LIST_HEAD(&device->client_data_list); + spin_lock_init(&device->event_handler_lock); + spin_lock_init(&device->client_data_lock); + + ret = ib_device_register_sysfs(device); + if (ret) { + printk(KERN_WARNING "Couldn't register device %s with driver model\n", + device->name); + goto out; + } + + list_add_tail(&device->core_list, &device_list); + + device->reg_state = IB_DEV_REGISTERED; + + { + struct ib_client *client; + + list_for_each_entry(client, &client_list, list) + if (client->add && !add_client_context(device, client)) + client->add(device); + } + + out: + up(&device_sem); + return ret; +} +EXPORT_SYMBOL(ib_register_device); + +/** + * ib_unregister_device - Unregister an IB device + * @device:Device to unregister + * + * Unregister an IB device. All clients will receive a remove callback. + */ +void ib_unregister_device(struct ib_device *device) +{ + struct ib_client *client; + struct ib_client_data *context, *tmp; + unsigned long flags; + + down(&device_sem); + + list_for_each_entry_reverse(client, &client_list, list) + if (client->remove) + client->remove(device); + + list_del(&device->core_list); + + up(&device_sem); + + spin_lock_irqsave(&device->client_data_lock, flags); + list_for_each_entry_safe(context, tmp, &device->client_data_list, list) + kfree(context); + spin_unlock_irqrestore(&device->client_data_lock, flags); + + device->reg_state = IB_DEV_UNREGISTERED; +} +EXPORT_SYMBOL(ib_unregister_device); + +/** + * ib_register_client - Register an IB client + * @client:Client to register + * + * Upper level users of the IB drivers can use ib_register_client() to + * register callbacks for IB device addition and removal. When an IB + * device is added, each registered client's add method will be called + * (in the order the clients were registered), and when a device is + * removed, each client's remove method will be called (in the reverse + * order that clients were registered). In addition, when + * ib_register_client() is called, the client will receive an add + * callback for all devices already registered. + */ +int ib_register_client(struct ib_client *client) +{ + struct ib_device *device; + + down(&device_sem); + + list_add_tail(&client->list, &client_list); + list_for_each_entry(device, &device_list, core_list) + if (client->add && !add_client_context(device, client)) + client->add(device); + + up(&device_sem); + + return 0; +} +EXPORT_SYMBOL(ib_register_client); + +/** + * ib_unregister_client - Unregister an IB client + * @client:Client to unregister + * + * Upper level users use ib_unregister_client() to remove their client + * registration. When ib_unregister_client() is called, the client + * will receive a remove callback for each IB device still registered. + */ +void ib_unregister_client(struct ib_client *client) +{ + struct ib_client_data *context, *tmp; + struct ib_device *device; + unsigned long flags; + + down(&device_sem); + + list_for_each_entry(device, &device_list, core_list) { + if (client->remove) + client->remove(device); + + spin_lock_irqsave(&device->client_data_lock, flags); + list_for_each_entry_safe(context, tmp, &device->client_data_list, list) + if (context->client == client) { + list_del(&context->list); + kfree(context); + } + spin_unlock_irqrestore(&device->client_data_lock, flags); + } + list_del(&client->list); + + up(&device_sem); +} +EXPORT_SYMBOL(ib_unregister_client); + +/** + * ib_get_client_data - Get IB client context + * @device:Device to get context for + * @client:Client to get context for + * + * ib_get_client_data() returns client context set with + * ib_set_client_data(). + */ +void *ib_get_client_data(struct ib_device *device, struct ib_client *client) +{ + struct ib_client_data *context; + void *ret = NULL; + unsigned long flags; + + spin_lock_irqsave(&device->client_data_lock, flags); + list_for_each_entry(context, &device->client_data_list, list) + if (context->client == client) { + ret = context->data; + break; + } + spin_unlock_irqrestore(&device->client_data_lock, flags); + + return ret; +} +EXPORT_SYMBOL(ib_get_client_data); + +/** + * ib_set_client_data - Get IB client context + * @device:Device to set context for + * @client:Client to set context for + * @data:Context to set + * + * ib_set_client_data() sets client context that can be retrieved with + * ib_get_client_data(). + */ +void ib_set_client_data(struct ib_device *device, struct ib_client *client, + void *data) +{ + struct ib_client_data *context; + unsigned long flags; + + spin_lock_irqsave(&device->client_data_lock, flags); + list_for_each_entry(context, &device->client_data_list, list) + if (context->client == client) { + context->data = data; + goto out; + } + + printk(KERN_WARNING "No client context found for %s/%s\n", + device->name, client->name); + +out: + spin_unlock_irqrestore(&device->client_data_lock, flags); +} +EXPORT_SYMBOL(ib_set_client_data); + +/** + * ib_register_event_handler - Register an IB event handler + * @event_handler:Handler to register + * + * ib_register_event_handler() registers an event handler that will be + * called back when asynchronous IB events occur (as defined in + * chapter 11 of the InfiniBand Architecture Specification). This + * callback may occur in interrupt context. + */ +int ib_register_event_handler (struct ib_event_handler *event_handler) +{ + unsigned long flags; + + spin_lock_irqsave(&event_handler->device->event_handler_lock, flags); + list_add_tail(&event_handler->list, + &event_handler->device->event_handler_list); + spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags); + + return 0; +} +EXPORT_SYMBOL(ib_register_event_handler); + +/** + * ib_unregister_event_handler - Unregister an event handler + * @event_handler:Handler to unregister + * + * Unregister an event handler registered with + * ib_register_event_handler(). + */ +int ib_unregister_event_handler(struct ib_event_handler *event_handler) +{ + unsigned long flags; + + spin_lock_irqsave(&event_handler->device->event_handler_lock, flags); + list_del(&event_handler->list); + spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags); + + return 0; +} +EXPORT_SYMBOL(ib_unregister_event_handler); + +/** + * ib_dispatch_event - Dispatch an asynchronous event + * @event:Event to dispatch + * + * Low-level drivers must call ib_dispatch_event() to dispatch the + * event to all registered event handlers when an asynchronous event + * occurs. + */ +void ib_dispatch_event(struct ib_event *event) +{ + unsigned long flags; + struct ib_event_handler *handler; + + spin_lock_irqsave(&event->device->event_handler_lock, flags); + + list_for_each_entry(handler, &event->device->event_handler_list, list) + handler->handler(handler, event); + + spin_unlock_irqrestore(&event->device->event_handler_lock, flags); +} +EXPORT_SYMBOL(ib_dispatch_event); + +/** + * ib_query_device - Query IB device attributes + * @device:Device to query + * @device_attr:Device attributes + * + * ib_query_device() returns the attributes of a device through the + * @device_attr pointer. + */ +int ib_query_device(struct ib_device *device, + struct ib_device_attr *device_attr) +{ + return device->query_device(device, device_attr); +} +EXPORT_SYMBOL(ib_query_device); + +/** + * ib_query_port - Query IB port attributes + * @device:Device to query + * @port_num:Port number to query + * @port_attr:Port attributes + * + * ib_query_port() returns the attributes of a port through the + * @port_attr pointer. + */ +int ib_query_port(struct ib_device *device, + u8 port_num, + struct ib_port_attr *port_attr) +{ + return device->query_port(device, port_num, port_attr); +} +EXPORT_SYMBOL(ib_query_port); + +/** + * ib_query_gid - Get GID table entry + * @device:Device to query + * @port_num:Port number to query + * @index:GID table index to query + * @gid:Returned GID + * + * ib_query_gid() fetches the specified GID table entry. + */ +int ib_query_gid(struct ib_device *device, + u8 port_num, int index, union ib_gid *gid) +{ + return device->query_gid(device, port_num, index, gid); +} +EXPORT_SYMBOL(ib_query_gid); + +/** + * ib_query_pkey - Get P_Key table entry + * @device:Device to query + * @port_num:Port number to query + * @index:P_Key table index to query + * @pkey:Returned P_Key + * + * ib_query_pkey() fetches the specified P_Key table entry. + */ +int ib_query_pkey(struct ib_device *device, + u8 port_num, u16 index, u16 *pkey) +{ + return device->query_pkey(device, port_num, index, pkey); +} +EXPORT_SYMBOL(ib_query_pkey); + +/** + * ib_modify_device - Change IB device attributes + * @device:Device to modify + * @device_modify_mask:Mask of attributes to change + * @device_modify:New attribute values + * + * ib_modify_device() changes a device's attributes as specified by + * the @device_modify_mask and @device_modify structure. + */ +int ib_modify_device(struct ib_device *device, + int device_modify_mask, + struct ib_device_modify *device_modify) +{ + return device->modify_device(device, device_modify_mask, + device_modify); +} +EXPORT_SYMBOL(ib_modify_device); + +/** + * ib_modify_port - Modifies the attributes for the specified port. + * @device: The device to modify. + * @port_num: The number of the port to modify. + * @port_modify_mask: Mask used to specify which attributes of the port + * to change. + * @port_modify: New attribute values for the port. + * + * ib_modify_port() changes a port's attributes as specified by the + * @port_modify_mask and @port_modify structure. + */ +int ib_modify_port(struct ib_device *device, + u8 port_num, int port_modify_mask, + struct ib_port_modify *port_modify) +{ + return device->modify_port(device, port_num, port_modify_mask, + port_modify); +} +EXPORT_SYMBOL(ib_modify_port); + +static int __init ib_core_init(void) +{ + int ret; + + ret = ib_sysfs_setup(); + if (ret) + printk(KERN_WARNING "Couldn't create InfiniBand device class\n"); + + ret = ib_cache_setup(); + if (ret) { + printk(KERN_WARNING "Couldn't set up InfiniBand P_Key/GID cache\n"); + ib_sysfs_cleanup(); + } + + return ret; +} + +static void __exit ib_core_cleanup(void) +{ + ib_cache_cleanup(); + ib_sysfs_cleanup(); +} + +module_init(ib_core_init); +module_exit(ib_core_cleanup); diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c new file mode 100644 index 000000000000..2e9469f18926 --- /dev/null +++ b/drivers/infiniband/core/fmr_pool.c @@ -0,0 +1,507 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: fmr_pool.c 1349 2004-12-16 21:09:43Z roland $ + */ + +#include <linux/errno.h> +#include <linux/spinlock.h> +#include <linux/slab.h> +#include <linux/jhash.h> +#include <linux/kthread.h> + +#include <ib_fmr_pool.h> + +#include "core_priv.h" + +enum { + IB_FMR_MAX_REMAPS = 32, + + IB_FMR_HASH_BITS = 8, + IB_FMR_HASH_SIZE = 1 << IB_FMR_HASH_BITS, + IB_FMR_HASH_MASK = IB_FMR_HASH_SIZE - 1 +}; + +/* + * If an FMR is not in use, then the list member will point to either + * its pool's free_list (if the FMR can be mapped again; that is, + * remap_count < IB_FMR_MAX_REMAPS) or its pool's dirty_list (if the + * FMR needs to be unmapped before being remapped). In either of + * these cases it is a bug if the ref_count is not 0. In other words, + * if ref_count is > 0, then the list member must not be linked into + * either free_list or dirty_list. + * + * The cache_node member is used to link the FMR into a cache bucket + * (if caching is enabled). This is independent of the reference + * count of the FMR. When a valid FMR is released, its ref_count is + * decremented, and if ref_count reaches 0, the FMR is placed in + * either free_list or dirty_list as appropriate. However, it is not + * removed from the cache and may be "revived" if a call to + * ib_fmr_register_physical() occurs before the FMR is remapped. In + * this case we just increment the ref_count and remove the FMR from + * free_list/dirty_list. + * + * Before we remap an FMR from free_list, we remove it from the cache + * (to prevent another user from obtaining a stale FMR). When an FMR + * is released, we add it to the tail of the free list, so that our + * cache eviction policy is "least recently used." + * + * All manipulation of ref_count, list and cache_node is protected by + * pool_lock to maintain consistency. + */ + +struct ib_fmr_pool { + spinlock_t pool_lock; + + int pool_size; + int max_pages; + int dirty_watermark; + int dirty_len; + struct list_head free_list; + struct list_head dirty_list; + struct hlist_head *cache_bucket; + + void (*flush_function)(struct ib_fmr_pool *pool, + void * arg); + void *flush_arg; + + struct task_struct *thread; + + atomic_t req_ser; + atomic_t flush_ser; + + wait_queue_head_t force_wait; +}; + +static inline u32 ib_fmr_hash(u64 first_page) +{ + return jhash_2words((u32) first_page, + (u32) (first_page >> 32), + 0); +} + +/* Caller must hold pool_lock */ +static inline struct ib_pool_fmr *ib_fmr_cache_lookup(struct ib_fmr_pool *pool, + u64 *page_list, + int page_list_len, + u64 io_virtual_address) +{ + struct hlist_head *bucket; + struct ib_pool_fmr *fmr; + struct hlist_node *pos; + + if (!pool->cache_bucket) + return NULL; + + bucket = pool->cache_bucket + ib_fmr_hash(*page_list); + + hlist_for_each_entry(fmr, pos, bucket, cache_node) + if (io_virtual_address == fmr->io_virtual_address && + page_list_len == fmr->page_list_len && + !memcmp(page_list, fmr->page_list, + page_list_len * sizeof *page_list)) + return fmr; + + return NULL; +} + +static void ib_fmr_batch_release(struct ib_fmr_pool *pool) +{ + int ret; + struct ib_pool_fmr *fmr; + LIST_HEAD(unmap_list); + LIST_HEAD(fmr_list); + + spin_lock_irq(&pool->pool_lock); + + list_for_each_entry(fmr, &pool->dirty_list, list) { + hlist_del_init(&fmr->cache_node); + fmr->remap_count = 0; + list_add_tail(&fmr->fmr->list, &fmr_list); + +#ifdef DEBUG + if (fmr->ref_count !=0) { + printk(KERN_WARNING "Unmapping FMR 0x%08x with ref count %d", + fmr, fmr->ref_count); + } +#endif + } + + list_splice(&pool->dirty_list, &unmap_list); + INIT_LIST_HEAD(&pool->dirty_list); + pool->dirty_len = 0; + + spin_unlock_irq(&pool->pool_lock); + + if (list_empty(&unmap_list)) { + return; + } + + ret = ib_unmap_fmr(&fmr_list); + if (ret) + printk(KERN_WARNING "ib_unmap_fmr returned %d", ret); + + spin_lock_irq(&pool->pool_lock); + list_splice(&unmap_list, &pool->free_list); + spin_unlock_irq(&pool->pool_lock); +} + +static int ib_fmr_cleanup_thread(void *pool_ptr) +{ + struct ib_fmr_pool *pool = pool_ptr; + + do { + if (pool->dirty_len >= pool->dirty_watermark || + atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) < 0) { + ib_fmr_batch_release(pool); + + atomic_inc(&pool->flush_ser); + wake_up_interruptible(&pool->force_wait); + + if (pool->flush_function) + pool->flush_function(pool, pool->flush_arg); + } + + set_current_state(TASK_INTERRUPTIBLE); + if (pool->dirty_len < pool->dirty_watermark && + atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) >= 0 && + !kthread_should_stop()) + schedule(); + __set_current_state(TASK_RUNNING); + } while (!kthread_should_stop()); + + return 0; +} + +/** + * ib_create_fmr_pool - Create an FMR pool + * @pd:Protection domain for FMRs + * @params:FMR pool parameters + * + * Create a pool of FMRs. Return value is pointer to new pool or + * error code if creation failed. + */ +struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd, + struct ib_fmr_pool_param *params) +{ + struct ib_device *device; + struct ib_fmr_pool *pool; + int i; + int ret; + + if (!params) + return ERR_PTR(-EINVAL); + + device = pd->device; + if (!device->alloc_fmr || !device->dealloc_fmr || + !device->map_phys_fmr || !device->unmap_fmr) { + printk(KERN_WARNING "Device %s does not support fast memory regions", + device->name); + return ERR_PTR(-ENOSYS); + } + + pool = kmalloc(sizeof *pool, GFP_KERNEL); + if (!pool) { + printk(KERN_WARNING "couldn't allocate pool struct"); + return ERR_PTR(-ENOMEM); + } + + pool->cache_bucket = NULL; + + pool->flush_function = params->flush_function; + pool->flush_arg = params->flush_arg; + + INIT_LIST_HEAD(&pool->free_list); + INIT_LIST_HEAD(&pool->dirty_list); + + if (params->cache) { + pool->cache_bucket = + kmalloc(IB_FMR_HASH_SIZE * sizeof *pool->cache_bucket, + GFP_KERNEL); + if (!pool->cache_bucket) { + printk(KERN_WARNING "Failed to allocate cache in pool"); + ret = -ENOMEM; + goto out_free_pool; + } + + for (i = 0; i < IB_FMR_HASH_SIZE; ++i) + INIT_HLIST_HEAD(pool->cache_bucket + i); + } + + pool->pool_size = 0; + pool->max_pages = params->max_pages_per_fmr; + pool->dirty_watermark = params->dirty_watermark; + pool->dirty_len = 0; + spin_lock_init(&pool->pool_lock); + atomic_set(&pool->req_ser, 0); + atomic_set(&pool->flush_ser, 0); + init_waitqueue_head(&pool->force_wait); + + pool->thread = kthread_create(ib_fmr_cleanup_thread, + pool, + "ib_fmr(%s)", + device->name); + if (IS_ERR(pool->thread)) { + printk(KERN_WARNING "couldn't start cleanup thread"); + ret = PTR_ERR(pool->thread); + goto out_free_pool; + } + + { + struct ib_pool_fmr *fmr; + struct ib_fmr_attr attr = { + .max_pages = params->max_pages_per_fmr, + .max_maps = IB_FMR_MAX_REMAPS, + .page_size = PAGE_SHIFT + }; + + for (i = 0; i < params->pool_size; ++i) { + fmr = kmalloc(sizeof *fmr + params->max_pages_per_fmr * sizeof (u64), + GFP_KERNEL); + if (!fmr) { + printk(KERN_WARNING "failed to allocate fmr struct " + "for FMR %d", i); + goto out_fail; + } + + fmr->pool = pool; + fmr->remap_count = 0; + fmr->ref_count = 0; + INIT_HLIST_NODE(&fmr->cache_node); + + fmr->fmr = ib_alloc_fmr(pd, params->access, &attr); + if (IS_ERR(fmr->fmr)) { + printk(KERN_WARNING "fmr_create failed for FMR %d", i); + kfree(fmr); + goto out_fail; + } + + list_add_tail(&fmr->list, &pool->free_list); + ++pool->pool_size; + } + } + + return pool; + + out_free_pool: + kfree(pool->cache_bucket); + kfree(pool); + + return ERR_PTR(ret); + + out_fail: + ib_destroy_fmr_pool(pool); + + return ERR_PTR(-ENOMEM); +} +EXPORT_SYMBOL(ib_create_fmr_pool); + +/** + * ib_destroy_fmr_pool - Free FMR pool + * @pool:FMR pool to free + * + * Destroy an FMR pool and free all associated resources. + */ +int ib_destroy_fmr_pool(struct ib_fmr_pool *pool) +{ + struct ib_pool_fmr *fmr; + struct ib_pool_fmr *tmp; + int i; + + kthread_stop(pool->thread); + ib_fmr_batch_release(pool); + + i = 0; + list_for_each_entry_safe(fmr, tmp, &pool->free_list, list) { + ib_dealloc_fmr(fmr->fmr); + list_del(&fmr->list); + kfree(fmr); + ++i; + } + + if (i < pool->pool_size) + printk(KERN_WARNING "pool still has %d regions registered", + pool->pool_size - i); + + kfree(pool->cache_bucket); + kfree(pool); + + return 0; +} +EXPORT_SYMBOL(ib_destroy_fmr_pool); + +/** + * ib_flush_fmr_pool - Invalidate all unmapped FMRs + * @pool:FMR pool to flush + * + * Ensure that all unmapped FMRs are fully invalidated. + */ +int ib_flush_fmr_pool(struct ib_fmr_pool *pool) +{ + int serial; + + atomic_inc(&pool->req_ser); + /* + * It's OK if someone else bumps req_ser again here -- we'll + * just wait a little longer. + */ + serial = atomic_read(&pool->req_ser); + + wake_up_process(pool->thread); + + if (wait_event_interruptible(pool->force_wait, + atomic_read(&pool->flush_ser) - + atomic_read(&pool->req_ser) >= 0)) + return -EINTR; + + return 0; +} +EXPORT_SYMBOL(ib_flush_fmr_pool); + +/** + * ib_fmr_pool_map_phys - + * @pool:FMR pool to allocate FMR from + * @page_list:List of pages to map + * @list_len:Number of pages in @page_list + * @io_virtual_address:I/O virtual address for new FMR + * + * Map an FMR from an FMR pool. + */ +struct ib_pool_fmr *ib_fmr_pool_map_phys(struct ib_fmr_pool *pool_handle, + u64 *page_list, + int list_len, + u64 *io_virtual_address) +{ + struct ib_fmr_pool *pool = pool_handle; + struct ib_pool_fmr *fmr; + unsigned long flags; + int result; + + if (list_len < 1 || list_len > pool->max_pages) + return ERR_PTR(-EINVAL); + + spin_lock_irqsave(&pool->pool_lock, flags); + fmr = ib_fmr_cache_lookup(pool, + page_list, + list_len, + *io_virtual_address); + if (fmr) { + /* found in cache */ + ++fmr->ref_count; + if (fmr->ref_count == 1) { + list_del(&fmr->list); + } + + spin_unlock_irqrestore(&pool->pool_lock, flags); + + return fmr; + } + + if (list_empty(&pool->free_list)) { + spin_unlock_irqrestore(&pool->pool_lock, flags); + return ERR_PTR(-EAGAIN); + } + + fmr = list_entry(pool->free_list.next, struct ib_pool_fmr, list); + list_del(&fmr->list); + hlist_del_init(&fmr->cache_node); + spin_unlock_irqrestore(&pool->pool_lock, flags); + + result = ib_map_phys_fmr(fmr->fmr, page_list, list_len, + *io_virtual_address); + + if (result) { + spin_lock_irqsave(&pool->pool_lock, flags); + list_add(&fmr->list, &pool->free_list); + spin_unlock_irqrestore(&pool->pool_lock, flags); + + printk(KERN_WARNING "fmr_map returns %d", + result); + + return ERR_PTR(result); + } + + ++fmr->remap_count; + fmr->ref_count = 1; + + if (pool->cache_bucket) { + fmr->io_virtual_address = *io_virtual_address; + fmr->page_list_len = list_len; + memcpy(fmr->page_list, page_list, list_len * sizeof(*page_list)); + + spin_lock_irqsave(&pool->pool_lock, flags); + hlist_add_head(&fmr->cache_node, + pool->cache_bucket + ib_fmr_hash(fmr->page_list[0])); + spin_unlock_irqrestore(&pool->pool_lock, flags); + } + + return fmr; +} +EXPORT_SYMBOL(ib_fmr_pool_map_phys); + +/** + * ib_fmr_pool_unmap - Unmap FMR + * @fmr:FMR to unmap + * + * Unmap an FMR. The FMR mapping may remain valid until the FMR is + * reused (or until ib_flush_fmr_pool() is called). + */ +int ib_fmr_pool_unmap(struct ib_pool_fmr *fmr) +{ + struct ib_fmr_pool *pool; + unsigned long flags; + + pool = fmr->pool; + + spin_lock_irqsave(&pool->pool_lock, flags); + + --fmr->ref_count; + if (!fmr->ref_count) { + if (fmr->remap_count < IB_FMR_MAX_REMAPS) { + list_add_tail(&fmr->list, &pool->free_list); + } else { + list_add_tail(&fmr->list, &pool->dirty_list); + ++pool->dirty_len; + wake_up_process(pool->thread); + } + } + +#ifdef DEBUG + if (fmr->ref_count < 0) + printk(KERN_WARNING "FMR %p has ref count %d < 0", + fmr, fmr->ref_count); +#endif + + spin_unlock_irqrestore(&pool->pool_lock, flags); + + return 0; +} +EXPORT_SYMBOL(ib_fmr_pool_unmap); diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c new file mode 100644 index 000000000000..35872866fda2 --- /dev/null +++ b/drivers/infiniband/core/mad.c @@ -0,0 +1,2632 @@ +/* + * Copyright (c) 2004, Voltaire, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: mad.c 1389 2004-12-27 22:56:47Z roland $ + */ + +#include <linux/dma-mapping.h> +#include <linux/interrupt.h> + +#include <ib_mad.h> + +#include "mad_priv.h" +#include "smi.h" +#include "agent.h" + + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_DESCRIPTION("kernel IB MAD API"); +MODULE_AUTHOR("Hal Rosenstock"); +MODULE_AUTHOR("Sean Hefty"); + + +kmem_cache_t *ib_mad_cache; +static struct list_head ib_mad_port_list; +static u32 ib_mad_client_id = 0; + +/* Port list lock */ +static spinlock_t ib_mad_port_list_lock; + + +/* Forward declarations */ +static int method_in_use(struct ib_mad_mgmt_method_table **method, + struct ib_mad_reg_req *mad_reg_req); +static void remove_mad_reg_req(struct ib_mad_agent_private *priv); +static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info, + struct ib_mad_private *mad); +static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv); +static void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr, + struct ib_mad_send_wc *mad_send_wc); +static void timeout_sends(void *data); +static void local_completions(void *data); +static int solicited_mad(struct ib_mad *mad); +static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req, + struct ib_mad_agent_private *agent_priv, + u8 mgmt_class); +static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req, + struct ib_mad_agent_private *agent_priv); + +/* + * Returns a ib_mad_port_private structure or NULL for a device/port + * Assumes ib_mad_port_list_lock is being held + */ +static inline struct ib_mad_port_private * +__ib_get_mad_port(struct ib_device *device, int port_num) +{ + struct ib_mad_port_private *entry; + + list_for_each_entry(entry, &ib_mad_port_list, port_list) { + if (entry->device == device && entry->port_num == port_num) + return entry; + } + return NULL; +} + +/* + * Wrapper function to return a ib_mad_port_private structure or NULL + * for a device/port + */ +static inline struct ib_mad_port_private * +ib_get_mad_port(struct ib_device *device, int port_num) +{ + struct ib_mad_port_private *entry; + unsigned long flags; + + spin_lock_irqsave(&ib_mad_port_list_lock, flags); + entry = __ib_get_mad_port(device, port_num); + spin_unlock_irqrestore(&ib_mad_port_list_lock, flags); + + return entry; +} + +static inline u8 convert_mgmt_class(u8 mgmt_class) +{ + /* Alias IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE to 0 */ + return mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE ? + 0 : mgmt_class; +} + +static int get_spl_qp_index(enum ib_qp_type qp_type) +{ + switch (qp_type) + { + case IB_QPT_SMI: + return 0; + case IB_QPT_GSI: + return 1; + default: + return -1; + } +} + +static int vendor_class_index(u8 mgmt_class) +{ + return mgmt_class - IB_MGMT_CLASS_VENDOR_RANGE2_START; +} + +static int is_vendor_class(u8 mgmt_class) +{ + if ((mgmt_class < IB_MGMT_CLASS_VENDOR_RANGE2_START) || + (mgmt_class > IB_MGMT_CLASS_VENDOR_RANGE2_END)) + return 0; + return 1; +} + +static int is_vendor_oui(char *oui) +{ + if (oui[0] || oui[1] || oui[2]) + return 1; + return 0; +} + +static int is_vendor_method_in_use( + struct ib_mad_mgmt_vendor_class *vendor_class, + struct ib_mad_reg_req *mad_reg_req) +{ + struct ib_mad_mgmt_method_table *method; + int i; + + for (i = 0; i < MAX_MGMT_OUI; i++) { + if (!memcmp(vendor_class->oui[i], mad_reg_req->oui, 3)) { + method = vendor_class->method_table[i]; + if (method) { + if (method_in_use(&method, mad_reg_req)) + return 1; + else + break; + } + } + } + return 0; +} + +/* + * ib_register_mad_agent - Register to send/receive MADs + */ +struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, + u8 port_num, + enum ib_qp_type qp_type, + struct ib_mad_reg_req *mad_reg_req, + u8 rmpp_version, + ib_mad_send_handler send_handler, + ib_mad_recv_handler recv_handler, + void *context) +{ + struct ib_mad_port_private *port_priv; + struct ib_mad_agent *ret = ERR_PTR(-EINVAL); + struct ib_mad_agent_private *mad_agent_priv; + struct ib_mad_reg_req *reg_req = NULL; + struct ib_mad_mgmt_class_table *class; + struct ib_mad_mgmt_vendor_class_table *vendor; + struct ib_mad_mgmt_vendor_class *vendor_class; + struct ib_mad_mgmt_method_table *method; + int ret2, qpn; + unsigned long flags; + u8 mgmt_class, vclass; + + /* Validate parameters */ + qpn = get_spl_qp_index(qp_type); + if (qpn == -1) + goto error1; + + if (rmpp_version) + goto error1; /* XXX: until RMPP implemented */ + + /* Validate MAD registration request if supplied */ + if (mad_reg_req) { + if (mad_reg_req->mgmt_class_version >= MAX_MGMT_VERSION) + goto error1; + if (!recv_handler) + goto error1; + if (mad_reg_req->mgmt_class >= MAX_MGMT_CLASS) { + /* + * IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE is the only + * one in this range currently allowed + */ + if (mad_reg_req->mgmt_class != + IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) + goto error1; + } else if (mad_reg_req->mgmt_class == 0) { + /* + * Class 0 is reserved in IBA and is used for + * aliasing of IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE + */ + goto error1; + } else if (is_vendor_class(mad_reg_req->mgmt_class)) { + /* + * If class is in "new" vendor range, + * ensure supplied OUI is not zero + */ + if (!is_vendor_oui(mad_reg_req->oui)) + goto error1; + } + /* Make sure class supplied is consistent with QP type */ + if (qp_type == IB_QPT_SMI) { + if ((mad_reg_req->mgmt_class != + IB_MGMT_CLASS_SUBN_LID_ROUTED) && + (mad_reg_req->mgmt_class != + IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) + goto error1; + } else { + if ((mad_reg_req->mgmt_class == + IB_MGMT_CLASS_SUBN_LID_ROUTED) || + (mad_reg_req->mgmt_class == + IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) + goto error1; + } + } else { + /* No registration request supplied */ + if (!send_handler) + goto error1; + } + + /* Validate device and port */ + port_priv = ib_get_mad_port(device, port_num); + if (!port_priv) { + ret = ERR_PTR(-ENODEV); + goto error1; + } + + /* Allocate structures */ + mad_agent_priv = kmalloc(sizeof *mad_agent_priv, GFP_KERNEL); + if (!mad_agent_priv) { + ret = ERR_PTR(-ENOMEM); + goto error1; + } + + if (mad_reg_req) { + reg_req = kmalloc(sizeof *reg_req, GFP_KERNEL); + if (!reg_req) { + ret = ERR_PTR(-ENOMEM); + goto error2; + } + /* Make a copy of the MAD registration request */ + memcpy(reg_req, mad_reg_req, sizeof *reg_req); + } + + /* Now, fill in the various structures */ + memset(mad_agent_priv, 0, sizeof *mad_agent_priv); + mad_agent_priv->qp_info = &port_priv->qp_info[qpn]; + mad_agent_priv->reg_req = reg_req; + mad_agent_priv->rmpp_version = rmpp_version; + mad_agent_priv->agent.device = device; + mad_agent_priv->agent.recv_handler = recv_handler; + mad_agent_priv->agent.send_handler = send_handler; + mad_agent_priv->agent.context = context; + mad_agent_priv->agent.qp = port_priv->qp_info[qpn].qp; + mad_agent_priv->agent.port_num = port_num; + + spin_lock_irqsave(&port_priv->reg_lock, flags); + mad_agent_priv->agent.hi_tid = ++ib_mad_client_id; + + /* + * Make sure MAD registration (if supplied) + * is non overlapping with any existing ones + */ + if (mad_reg_req) { + mgmt_class = convert_mgmt_class(mad_reg_req->mgmt_class); + if (!is_vendor_class(mgmt_class)) { + class = port_priv->version[mad_reg_req-> + mgmt_class_version].class; + if (class) { + method = class->method_table[mgmt_class]; + if (method) { + if (method_in_use(&method, + mad_reg_req)) + goto error3; + } + } + ret2 = add_nonoui_reg_req(mad_reg_req, mad_agent_priv, + mgmt_class); + } else { + /* "New" vendor class range */ + vendor = port_priv->version[mad_reg_req-> + mgmt_class_version].vendor; + if (vendor) { + vclass = vendor_class_index(mgmt_class); + vendor_class = vendor->vendor_class[vclass]; + if (vendor_class) { + if (is_vendor_method_in_use( + vendor_class, + mad_reg_req)) + goto error3; + } + } + ret2 = add_oui_reg_req(mad_reg_req, mad_agent_priv); + } + if (ret2) { + ret = ERR_PTR(ret2); + goto error3; + } + } + + /* Add mad agent into port's agent list */ + list_add_tail(&mad_agent_priv->agent_list, &port_priv->agent_list); + spin_unlock_irqrestore(&port_priv->reg_lock, flags); + + spin_lock_init(&mad_agent_priv->lock); + INIT_LIST_HEAD(&mad_agent_priv->send_list); + INIT_LIST_HEAD(&mad_agent_priv->wait_list); + INIT_WORK(&mad_agent_priv->timed_work, timeout_sends, mad_agent_priv); + INIT_LIST_HEAD(&mad_agent_priv->local_list); + INIT_WORK(&mad_agent_priv->local_work, local_completions, + mad_agent_priv); + atomic_set(&mad_agent_priv->refcount, 1); + init_waitqueue_head(&mad_agent_priv->wait); + + return &mad_agent_priv->agent; + +error3: + spin_unlock_irqrestore(&port_priv->reg_lock, flags); + kfree(reg_req); +error2: + kfree(mad_agent_priv); +error1: + return ret; +} +EXPORT_SYMBOL(ib_register_mad_agent); + +static inline int is_snooping_sends(int mad_snoop_flags) +{ + return (mad_snoop_flags & + (/*IB_MAD_SNOOP_POSTED_SENDS | + IB_MAD_SNOOP_RMPP_SENDS |*/ + IB_MAD_SNOOP_SEND_COMPLETIONS /*| + IB_MAD_SNOOP_RMPP_SEND_COMPLETIONS*/)); +} + +static inline int is_snooping_recvs(int mad_snoop_flags) +{ + return (mad_snoop_flags & + (IB_MAD_SNOOP_RECVS /*| + IB_MAD_SNOOP_RMPP_RECVS*/)); +} + +static int register_snoop_agent(struct ib_mad_qp_info *qp_info, + struct ib_mad_snoop_private *mad_snoop_priv) +{ + struct ib_mad_snoop_private **new_snoop_table; + unsigned long flags; + int i; + + spin_lock_irqsave(&qp_info->snoop_lock, flags); + /* Check for empty slot in array. */ + for (i = 0; i < qp_info->snoop_table_size; i++) + if (!qp_info->snoop_table[i]) + break; + + if (i == qp_info->snoop_table_size) { + /* Grow table. */ + new_snoop_table = kmalloc(sizeof mad_snoop_priv * + qp_info->snoop_table_size + 1, + GFP_ATOMIC); + if (!new_snoop_table) { + i = -ENOMEM; + goto out; + } + if (qp_info->snoop_table) { + memcpy(new_snoop_table, qp_info->snoop_table, + sizeof mad_snoop_priv * + qp_info->snoop_table_size); + kfree(qp_info->snoop_table); + } + qp_info->snoop_table = new_snoop_table; + qp_info->snoop_table_size++; + } + qp_info->snoop_table[i] = mad_snoop_priv; + atomic_inc(&qp_info->snoop_count); +out: + spin_unlock_irqrestore(&qp_info->snoop_lock, flags); + return i; +} + +struct ib_mad_agent *ib_register_mad_snoop(struct ib_device *device, + u8 port_num, + enum ib_qp_type qp_type, + int mad_snoop_flags, + ib_mad_snoop_handler snoop_handler, + ib_mad_recv_handler recv_handler, + void *context) +{ + struct ib_mad_port_private *port_priv; + struct ib_mad_agent *ret; + struct ib_mad_snoop_private *mad_snoop_priv; + int qpn; + + /* Validate parameters */ + if ((is_snooping_sends(mad_snoop_flags) && !snoop_handler) || + (is_snooping_recvs(mad_snoop_flags) && !recv_handler)) { + ret = ERR_PTR(-EINVAL); + goto error1; + } + qpn = get_spl_qp_index(qp_type); + if (qpn == -1) { + ret = ERR_PTR(-EINVAL); + goto error1; + } + port_priv = ib_get_mad_port(device, port_num); + if (!port_priv) { + ret = ERR_PTR(-ENODEV); + goto error1; + } + /* Allocate structures */ + mad_snoop_priv = kmalloc(sizeof *mad_snoop_priv, GFP_KERNEL); + if (!mad_snoop_priv) { + ret = ERR_PTR(-ENOMEM); + goto error1; + } + + /* Now, fill in the various structures */ + memset(mad_snoop_priv, 0, sizeof *mad_snoop_priv); + mad_snoop_priv->qp_info = &port_priv->qp_info[qpn]; + mad_snoop_priv->agent.device = device; + mad_snoop_priv->agent.recv_handler = recv_handler; + mad_snoop_priv->agent.snoop_handler = snoop_handler; + mad_snoop_priv->agent.context = context; + mad_snoop_priv->agent.qp = port_priv->qp_info[qpn].qp; + mad_snoop_priv->agent.port_num = port_num; + mad_snoop_priv->mad_snoop_flags = mad_snoop_flags; + init_waitqueue_head(&mad_snoop_priv->wait); + mad_snoop_priv->snoop_index = register_snoop_agent( + &port_priv->qp_info[qpn], + mad_snoop_priv); + if (mad_snoop_priv->snoop_index < 0) { + ret = ERR_PTR(mad_snoop_priv->snoop_index); + goto error2; + } + + atomic_set(&mad_snoop_priv->refcount, 1); + return &mad_snoop_priv->agent; + +error2: + kfree(mad_snoop_priv); +error1: + return ret; +} +EXPORT_SYMBOL(ib_register_mad_snoop); + +static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv) +{ + struct ib_mad_port_private *port_priv; + unsigned long flags; + + /* Note that we could still be handling received MADs */ + + /* + * Canceling all sends results in dropping received response + * MADs, preventing us from queuing additional work + */ + cancel_mads(mad_agent_priv); + + port_priv = mad_agent_priv->qp_info->port_priv; + cancel_delayed_work(&mad_agent_priv->timed_work); + flush_workqueue(port_priv->wq); + + spin_lock_irqsave(&port_priv->reg_lock, flags); + remove_mad_reg_req(mad_agent_priv); + list_del(&mad_agent_priv->agent_list); + spin_unlock_irqrestore(&port_priv->reg_lock, flags); + + /* XXX: Cleanup pending RMPP receives for this agent */ + + atomic_dec(&mad_agent_priv->refcount); + wait_event(mad_agent_priv->wait, + !atomic_read(&mad_agent_priv->refcount)); + + if (mad_agent_priv->reg_req) + kfree(mad_agent_priv->reg_req); + kfree(mad_agent_priv); +} + +static void unregister_mad_snoop(struct ib_mad_snoop_private *mad_snoop_priv) +{ + struct ib_mad_qp_info *qp_info; + unsigned long flags; + + qp_info = mad_snoop_priv->qp_info; + spin_lock_irqsave(&qp_info->snoop_lock, flags); + qp_info->snoop_table[mad_snoop_priv->snoop_index] = NULL; + atomic_dec(&qp_info->snoop_count); + spin_unlock_irqrestore(&qp_info->snoop_lock, flags); + + atomic_dec(&mad_snoop_priv->refcount); + wait_event(mad_snoop_priv->wait, + !atomic_read(&mad_snoop_priv->refcount)); + + kfree(mad_snoop_priv); +} + +/* + * ib_unregister_mad_agent - Unregisters a client from using MAD services + */ +int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent) +{ + struct ib_mad_agent_private *mad_agent_priv; + struct ib_mad_snoop_private *mad_snoop_priv; + + /* If the TID is zero, the agent can only snoop. */ + if (mad_agent->hi_tid) { + mad_agent_priv = container_of(mad_agent, + struct ib_mad_agent_private, + agent); + unregister_mad_agent(mad_agent_priv); + } else { + mad_snoop_priv = container_of(mad_agent, + struct ib_mad_snoop_private, + agent); + unregister_mad_snoop(mad_snoop_priv); + } + return 0; +} +EXPORT_SYMBOL(ib_unregister_mad_agent); + +static void dequeue_mad(struct ib_mad_list_head *mad_list) +{ + struct ib_mad_queue *mad_queue; + unsigned long flags; + + BUG_ON(!mad_list->mad_queue); + mad_queue = mad_list->mad_queue; + spin_lock_irqsave(&mad_queue->lock, flags); + list_del(&mad_list->list); + mad_queue->count--; + spin_unlock_irqrestore(&mad_queue->lock, flags); +} + +static void snoop_send(struct ib_mad_qp_info *qp_info, + struct ib_send_wr *send_wr, + struct ib_mad_send_wc *mad_send_wc, + int mad_snoop_flags) +{ + struct ib_mad_snoop_private *mad_snoop_priv; + unsigned long flags; + int i; + + spin_lock_irqsave(&qp_info->snoop_lock, flags); + for (i = 0; i < qp_info->snoop_table_size; i++) { + mad_snoop_priv = qp_info->snoop_table[i]; + if (!mad_snoop_priv || + !(mad_snoop_priv->mad_snoop_flags & mad_snoop_flags)) + continue; + + atomic_inc(&mad_snoop_priv->refcount); + spin_unlock_irqrestore(&qp_info->snoop_lock, flags); + mad_snoop_priv->agent.snoop_handler(&mad_snoop_priv->agent, + send_wr, mad_send_wc); + if (atomic_dec_and_test(&mad_snoop_priv->refcount)) + wake_up(&mad_snoop_priv->wait); + spin_lock_irqsave(&qp_info->snoop_lock, flags); + } + spin_unlock_irqrestore(&qp_info->snoop_lock, flags); +} + +static void snoop_recv(struct ib_mad_qp_info *qp_info, + struct ib_mad_recv_wc *mad_recv_wc, + int mad_snoop_flags) +{ + struct ib_mad_snoop_private *mad_snoop_priv; + unsigned long flags; + int i; + + spin_lock_irqsave(&qp_info->snoop_lock, flags); + for (i = 0; i < qp_info->snoop_table_size; i++) { + mad_snoop_priv = qp_info->snoop_table[i]; + if (!mad_snoop_priv || + !(mad_snoop_priv->mad_snoop_flags & mad_snoop_flags)) + continue; + + atomic_inc(&mad_snoop_priv->refcount); + spin_unlock_irqrestore(&qp_info->snoop_lock, flags); + mad_snoop_priv->agent.recv_handler(&mad_snoop_priv->agent, + mad_recv_wc); + if (atomic_dec_and_test(&mad_snoop_priv->refcount)) + wake_up(&mad_snoop_priv->wait); + spin_lock_irqsave(&qp_info->snoop_lock, flags); + } + spin_unlock_irqrestore(&qp_info->snoop_lock, flags); +} + +/* + * Return 0 if SMP is to be sent + * Return 1 if SMP was consumed locally (whether or not solicited) + * Return < 0 if error + */ +static int handle_outgoing_smp(struct ib_mad_agent_private *mad_agent_priv, + struct ib_smp *smp, + struct ib_send_wr *send_wr) +{ + int ret, alloc_flags; + unsigned long flags; + struct ib_mad_local_private *local; + struct ib_mad_private *mad_priv; + struct ib_device *device = mad_agent_priv->agent.device; + u8 port_num = mad_agent_priv->agent.port_num; + + if (!smi_handle_dr_smp_send(smp, device->node_type, port_num)) { + ret = -EINVAL; + printk(KERN_ERR PFX "Invalid directed route\n"); + goto out; + } + /* Check to post send on QP or process locally */ + ret = smi_check_local_dr_smp(smp, device, port_num); + if (!ret || !device->process_mad) + goto out; + + if (in_atomic() || irqs_disabled()) + alloc_flags = GFP_ATOMIC; + else + alloc_flags = GFP_KERNEL; + local = kmalloc(sizeof *local, alloc_flags); + if (!local) { + ret = -ENOMEM; + printk(KERN_ERR PFX "No memory for ib_mad_local_private\n"); + goto out; + } + local->mad_priv = NULL; + mad_priv = kmem_cache_alloc(ib_mad_cache, alloc_flags); + if (!mad_priv) { + ret = -ENOMEM; + printk(KERN_ERR PFX "No memory for local response MAD\n"); + kfree(local); + goto out; + } + ret = device->process_mad(device, 0, port_num, smp->dr_slid, + (struct ib_mad *)smp, + (struct ib_mad *)&mad_priv->mad); + switch (ret) + { + case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY: + /* + * See if response is solicited and + * there is a recv handler + */ + if (solicited_mad(&mad_priv->mad.mad) && + mad_agent_priv->agent.recv_handler) + local->mad_priv = mad_priv; + else + kmem_cache_free(ib_mad_cache, mad_priv); + break; + case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED: + kmem_cache_free(ib_mad_cache, mad_priv); + break; + case IB_MAD_RESULT_SUCCESS: + kmem_cache_free(ib_mad_cache, mad_priv); + kfree(local); + ret = 0; + goto out; + default: + kmem_cache_free(ib_mad_cache, mad_priv); + kfree(local); + ret = -EINVAL; + goto out; + } + + local->send_wr = *send_wr; + local->send_wr.sg_list = local->sg_list; + memcpy(local->sg_list, send_wr->sg_list, + sizeof *send_wr->sg_list * send_wr->num_sge); + local->send_wr.next = NULL; + local->tid = send_wr->wr.ud.mad_hdr->tid; + local->wr_id = send_wr->wr_id; + /* Reference MAD agent until local completion handled */ + atomic_inc(&mad_agent_priv->refcount); + /* Queue local completion to local list */ + spin_lock_irqsave(&mad_agent_priv->lock, flags); + list_add_tail(&local->completion_list, &mad_agent_priv->local_list); + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); + queue_work(mad_agent_priv->qp_info->port_priv->wq, + &mad_agent_priv->local_work); + ret = 1; +out: + return ret; +} + +static int ib_send_mad(struct ib_mad_agent_private *mad_agent_priv, + struct ib_mad_send_wr_private *mad_send_wr) +{ + struct ib_mad_qp_info *qp_info; + struct ib_send_wr *bad_send_wr; + unsigned long flags; + int ret; + + /* Replace user's WR ID with our own to find WR upon completion */ + qp_info = mad_agent_priv->qp_info; + mad_send_wr->wr_id = mad_send_wr->send_wr.wr_id; + mad_send_wr->send_wr.wr_id = (unsigned long)&mad_send_wr->mad_list; + mad_send_wr->mad_list.mad_queue = &qp_info->send_queue; + + spin_lock_irqsave(&qp_info->send_queue.lock, flags); + if (qp_info->send_queue.count++ < qp_info->send_queue.max_active) { + list_add_tail(&mad_send_wr->mad_list.list, + &qp_info->send_queue.list); + spin_unlock_irqrestore(&qp_info->send_queue.lock, flags); + ret = ib_post_send(mad_agent_priv->agent.qp, + &mad_send_wr->send_wr, &bad_send_wr); + if (ret) { + printk(KERN_ERR PFX "ib_post_send failed: %d\n", ret); + dequeue_mad(&mad_send_wr->mad_list); + } + } else { + list_add_tail(&mad_send_wr->mad_list.list, + &qp_info->overflow_list); + spin_unlock_irqrestore(&qp_info->send_queue.lock, flags); + ret = 0; + } + return ret; +} + +/* + * ib_post_send_mad - Posts MAD(s) to the send queue of the QP associated + * with the registered client + */ +int ib_post_send_mad(struct ib_mad_agent *mad_agent, + struct ib_send_wr *send_wr, + struct ib_send_wr **bad_send_wr) +{ + int ret = -EINVAL; + struct ib_mad_agent_private *mad_agent_priv; + + /* Validate supplied parameters */ + if (!bad_send_wr) + goto error1; + + if (!mad_agent || !send_wr) + goto error2; + + if (!mad_agent->send_handler) + goto error2; + + mad_agent_priv = container_of(mad_agent, + struct ib_mad_agent_private, + agent); + + /* Walk list of send WRs and post each on send list */ + while (send_wr) { + unsigned long flags; + struct ib_send_wr *next_send_wr; + struct ib_mad_send_wr_private *mad_send_wr; + struct ib_smp *smp; + + /* Validate more parameters */ + if (send_wr->num_sge > IB_MAD_SEND_REQ_MAX_SG) + goto error2; + + if (send_wr->wr.ud.timeout_ms && !mad_agent->recv_handler) + goto error2; + + if (!send_wr->wr.ud.mad_hdr) { + printk(KERN_ERR PFX "MAD header must be supplied " + "in WR %p\n", send_wr); + goto error2; + } + + /* + * Save pointer to next work request to post in case the + * current one completes, and the user modifies the work + * request associated with the completion + */ + next_send_wr = (struct ib_send_wr *)send_wr->next; + + smp = (struct ib_smp *)send_wr->wr.ud.mad_hdr; + if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { + ret = handle_outgoing_smp(mad_agent_priv, smp, send_wr); + if (ret < 0) /* error */ + goto error2; + else if (ret == 1) /* locally consumed */ + goto next; + } + + /* Allocate MAD send WR tracking structure */ + mad_send_wr = kmalloc(sizeof *mad_send_wr, + (in_atomic() || irqs_disabled()) ? + GFP_ATOMIC : GFP_KERNEL); + if (!mad_send_wr) { + printk(KERN_ERR PFX "No memory for " + "ib_mad_send_wr_private\n"); + ret = -ENOMEM; + goto error2; + } + + mad_send_wr->send_wr = *send_wr; + mad_send_wr->send_wr.sg_list = mad_send_wr->sg_list; + memcpy(mad_send_wr->sg_list, send_wr->sg_list, + sizeof *send_wr->sg_list * send_wr->num_sge); + mad_send_wr->send_wr.next = NULL; + mad_send_wr->tid = send_wr->wr.ud.mad_hdr->tid; + mad_send_wr->agent = mad_agent; + /* Timeout will be updated after send completes */ + mad_send_wr->timeout = msecs_to_jiffies(send_wr->wr. + ud.timeout_ms); + mad_send_wr->retry = 0; + /* One reference for each work request to QP + response */ + mad_send_wr->refcount = 1 + (mad_send_wr->timeout > 0); + mad_send_wr->status = IB_WC_SUCCESS; + + /* Reference MAD agent until send completes */ + atomic_inc(&mad_agent_priv->refcount); + spin_lock_irqsave(&mad_agent_priv->lock, flags); + list_add_tail(&mad_send_wr->agent_list, + &mad_agent_priv->send_list); + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); + + ret = ib_send_mad(mad_agent_priv, mad_send_wr); + if (ret) { + /* Fail send request */ + spin_lock_irqsave(&mad_agent_priv->lock, flags); + list_del(&mad_send_wr->agent_list); + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); + atomic_dec(&mad_agent_priv->refcount); + goto error2; + } +next: + send_wr = next_send_wr; + } + return 0; + +error2: + *bad_send_wr = send_wr; +error1: + return ret; +} +EXPORT_SYMBOL(ib_post_send_mad); + +/* + * ib_free_recv_mad - Returns data buffers used to receive + * a MAD to the access layer + */ +void ib_free_recv_mad(struct ib_mad_recv_wc *mad_recv_wc) +{ + struct ib_mad_recv_buf *entry; + struct ib_mad_private_header *mad_priv_hdr; + struct ib_mad_private *priv; + + mad_priv_hdr = container_of(mad_recv_wc, + struct ib_mad_private_header, + recv_wc); + priv = container_of(mad_priv_hdr, struct ib_mad_private, header); + + /* + * Walk receive buffer list associated with this WC + * No need to remove them from list of receive buffers + */ + list_for_each_entry(entry, &mad_recv_wc->recv_buf.list, list) { + /* Free previous receive buffer */ + kmem_cache_free(ib_mad_cache, priv); + mad_priv_hdr = container_of(mad_recv_wc, + struct ib_mad_private_header, + recv_wc); + priv = container_of(mad_priv_hdr, struct ib_mad_private, + header); + } + + /* Free last buffer */ + kmem_cache_free(ib_mad_cache, priv); +} +EXPORT_SYMBOL(ib_free_recv_mad); + +void ib_coalesce_recv_mad(struct ib_mad_recv_wc *mad_recv_wc, + void *buf) +{ + printk(KERN_ERR PFX "ib_coalesce_recv_mad() not implemented yet\n"); +} +EXPORT_SYMBOL(ib_coalesce_recv_mad); + +struct ib_mad_agent *ib_redirect_mad_qp(struct ib_qp *qp, + u8 rmpp_version, + ib_mad_send_handler send_handler, + ib_mad_recv_handler recv_handler, + void *context) +{ + return ERR_PTR(-EINVAL); /* XXX: for now */ +} +EXPORT_SYMBOL(ib_redirect_mad_qp); + +int ib_process_mad_wc(struct ib_mad_agent *mad_agent, + struct ib_wc *wc) +{ + printk(KERN_ERR PFX "ib_process_mad_wc() not implemented yet\n"); + return 0; +} +EXPORT_SYMBOL(ib_process_mad_wc); + +static int method_in_use(struct ib_mad_mgmt_method_table **method, + struct ib_mad_reg_req *mad_reg_req) +{ + int i; + + for (i = find_first_bit(mad_reg_req->method_mask, IB_MGMT_MAX_METHODS); + i < IB_MGMT_MAX_METHODS; + i = find_next_bit(mad_reg_req->method_mask, IB_MGMT_MAX_METHODS, + 1+i)) { + if ((*method)->agent[i]) { + printk(KERN_ERR PFX "Method %d already in use\n", i); + return -EINVAL; + } + } + return 0; +} + +static int allocate_method_table(struct ib_mad_mgmt_method_table **method) +{ + /* Allocate management method table */ + *method = kmalloc(sizeof **method, GFP_ATOMIC); + if (!*method) { + printk(KERN_ERR PFX "No memory for " + "ib_mad_mgmt_method_table\n"); + return -ENOMEM; + } + /* Clear management method table */ + memset(*method, 0, sizeof **method); + + return 0; +} + +/* + * Check to see if there are any methods still in use + */ +static int check_method_table(struct ib_mad_mgmt_method_table *method) +{ + int i; + + for (i = 0; i < IB_MGMT_MAX_METHODS; i++) + if (method->agent[i]) + return 1; + return 0; +} + +/* + * Check to see if there are any method tables for this class still in use + */ +static int check_class_table(struct ib_mad_mgmt_class_table *class) +{ + int i; + + for (i = 0; i < MAX_MGMT_CLASS; i++) + if (class->method_table[i]) + return 1; + return 0; +} + +static int check_vendor_class(struct ib_mad_mgmt_vendor_class *vendor_class) +{ + int i; + + for (i = 0; i < MAX_MGMT_OUI; i++) + if (vendor_class->method_table[i]) + return 1; + return 0; +} + +static int find_vendor_oui(struct ib_mad_mgmt_vendor_class *vendor_class, + char *oui) +{ + int i; + + for (i = 0; i < MAX_MGMT_OUI; i++) + /* Is there matching OUI for this vendor class ? */ + if (!memcmp(vendor_class->oui[i], oui, 3)) + return i; + + return -1; +} + +static int check_vendor_table(struct ib_mad_mgmt_vendor_class_table *vendor) +{ + int i; + + for (i = 0; i < MAX_MGMT_VENDOR_RANGE2; i++) + if (vendor->vendor_class[i]) + return 1; + + return 0; +} + +static void remove_methods_mad_agent(struct ib_mad_mgmt_method_table *method, + struct ib_mad_agent_private *agent) +{ + int i; + + /* Remove any methods for this mad agent */ + for (i = 0; i < IB_MGMT_MAX_METHODS; i++) { + if (method->agent[i] == agent) { + method->agent[i] = NULL; + } + } +} + +static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req, + struct ib_mad_agent_private *agent_priv, + u8 mgmt_class) +{ + struct ib_mad_port_private *port_priv; + struct ib_mad_mgmt_class_table **class; + struct ib_mad_mgmt_method_table **method; + int i, ret; + + port_priv = agent_priv->qp_info->port_priv; + class = &port_priv->version[mad_reg_req->mgmt_class_version].class; + if (!*class) { + /* Allocate management class table for "new" class version */ + *class = kmalloc(sizeof **class, GFP_ATOMIC); + if (!*class) { + printk(KERN_ERR PFX "No memory for " + "ib_mad_mgmt_class_table\n"); + ret = -ENOMEM; + goto error1; + } + /* Clear management class table */ + memset(*class, 0, sizeof(**class)); + /* Allocate method table for this management class */ + method = &(*class)->method_table[mgmt_class]; + if ((ret = allocate_method_table(method))) + goto error2; + } else { + method = &(*class)->method_table[mgmt_class]; + if (!*method) { + /* Allocate method table for this management class */ + if ((ret = allocate_method_table(method))) + goto error1; + } + } + + /* Now, make sure methods are not already in use */ + if (method_in_use(method, mad_reg_req)) + goto error3; + + /* Finally, add in methods being registered */ + for (i = find_first_bit(mad_reg_req->method_mask, + IB_MGMT_MAX_METHODS); + i < IB_MGMT_MAX_METHODS; + i = find_next_bit(mad_reg_req->method_mask, IB_MGMT_MAX_METHODS, + 1+i)) { + (*method)->agent[i] = agent_priv; + } + return 0; + +error3: + /* Remove any methods for this mad agent */ + remove_methods_mad_agent(*method, agent_priv); + /* Now, check to see if there are any methods in use */ + if (!check_method_table(*method)) { + /* If not, release management method table */ + kfree(*method); + *method = NULL; + } + ret = -EINVAL; + goto error1; +error2: + kfree(*class); + *class = NULL; +error1: + return ret; +} + +static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req, + struct ib_mad_agent_private *agent_priv) +{ + struct ib_mad_port_private *port_priv; + struct ib_mad_mgmt_vendor_class_table **vendor_table; + struct ib_mad_mgmt_vendor_class_table *vendor = NULL; + struct ib_mad_mgmt_vendor_class *vendor_class = NULL; + struct ib_mad_mgmt_method_table **method; + int i, ret = -ENOMEM; + u8 vclass; + + /* "New" vendor (with OUI) class */ + vclass = vendor_class_index(mad_reg_req->mgmt_class); + port_priv = agent_priv->qp_info->port_priv; + vendor_table = &port_priv->version[ + mad_reg_req->mgmt_class_version].vendor; + if (!*vendor_table) { + /* Allocate mgmt vendor class table for "new" class version */ + vendor = kmalloc(sizeof *vendor, GFP_ATOMIC); + if (!vendor) { + printk(KERN_ERR PFX "No memory for " + "ib_mad_mgmt_vendor_class_table\n"); + goto error1; + } + /* Clear management vendor class table */ + memset(vendor, 0, sizeof(*vendor)); + *vendor_table = vendor; + } + if (!(*vendor_table)->vendor_class[vclass]) { + /* Allocate table for this management vendor class */ + vendor_class = kmalloc(sizeof *vendor_class, GFP_ATOMIC); + if (!vendor_class) { + printk(KERN_ERR PFX "No memory for " + "ib_mad_mgmt_vendor_class\n"); + goto error2; + } + memset(vendor_class, 0, sizeof(*vendor_class)); + (*vendor_table)->vendor_class[vclass] = vendor_class; + } + for (i = 0; i < MAX_MGMT_OUI; i++) { + /* Is there matching OUI for this vendor class ? */ + if (!memcmp((*vendor_table)->vendor_class[vclass]->oui[i], + mad_reg_req->oui, 3)) { + method = &(*vendor_table)->vendor_class[ + vclass]->method_table[i]; + BUG_ON(!*method); + goto check_in_use; + } + } + for (i = 0; i < MAX_MGMT_OUI; i++) { + /* OUI slot available ? */ + if (!is_vendor_oui((*vendor_table)->vendor_class[ + vclass]->oui[i])) { + method = &(*vendor_table)->vendor_class[ + vclass]->method_table[i]; + BUG_ON(*method); + /* Allocate method table for this OUI */ + if ((ret = allocate_method_table(method))) + goto error3; + memcpy((*vendor_table)->vendor_class[vclass]->oui[i], + mad_reg_req->oui, 3); + goto check_in_use; + } + } + printk(KERN_ERR PFX "All OUI slots in use\n"); + goto error3; + +check_in_use: + /* Now, make sure methods are not already in use */ + if (method_in_use(method, mad_reg_req)) + goto error4; + + /* Finally, add in methods being registered */ + for (i = find_first_bit(mad_reg_req->method_mask, + IB_MGMT_MAX_METHODS); + i < IB_MGMT_MAX_METHODS; + i = find_next_bit(mad_reg_req->method_mask, IB_MGMT_MAX_METHODS, + 1+i)) { + (*method)->agent[i] = agent_priv; + } + return 0; + +error4: + /* Remove any methods for this mad agent */ + remove_methods_mad_agent(*method, agent_priv); + /* Now, check to see if there are any methods in use */ + if (!check_method_table(*method)) { + /* If not, release management method table */ + kfree(*method); + *method = NULL; + } + ret = -EINVAL; +error3: + if (vendor_class) { + (*vendor_table)->vendor_class[vclass] = NULL; + kfree(vendor_class); + } +error2: + if (vendor) { + *vendor_table = NULL; + kfree(vendor); + } +error1: + return ret; +} + +static void remove_mad_reg_req(struct ib_mad_agent_private *agent_priv) +{ + struct ib_mad_port_private *port_priv; + struct ib_mad_mgmt_class_table *class; + struct ib_mad_mgmt_method_table *method; + struct ib_mad_mgmt_vendor_class_table *vendor; + struct ib_mad_mgmt_vendor_class *vendor_class; + int index; + u8 mgmt_class; + + /* + * Was MAD registration request supplied + * with original registration ? + */ + if (!agent_priv->reg_req) { + goto out; + } + + port_priv = agent_priv->qp_info->port_priv; + class = port_priv->version[ + agent_priv->reg_req->mgmt_class_version].class; + if (!class) + goto vendor_check; + + mgmt_class = convert_mgmt_class(agent_priv->reg_req->mgmt_class); + method = class->method_table[mgmt_class]; + if (method) { + /* Remove any methods for this mad agent */ + remove_methods_mad_agent(method, agent_priv); + /* Now, check to see if there are any methods still in use */ + if (!check_method_table(method)) { + /* If not, release management method table */ + kfree(method); + class->method_table[mgmt_class] = NULL; + /* Any management classes left ? */ + if (!check_class_table(class)) { + /* If not, release management class table */ + kfree(class); + port_priv->version[ + agent_priv->reg_req-> + mgmt_class_version].class = NULL; + } + } + } + +vendor_check: + vendor = port_priv->version[ + agent_priv->reg_req->mgmt_class_version].vendor; + if (!vendor) + goto out; + + mgmt_class = vendor_class_index(agent_priv->reg_req->mgmt_class); + vendor_class = vendor->vendor_class[mgmt_class]; + if (vendor_class) { + index = find_vendor_oui(vendor_class, agent_priv->reg_req->oui); + if (index == -1) + goto out; + method = vendor_class->method_table[index]; + if (method) { + /* Remove any methods for this mad agent */ + remove_methods_mad_agent(method, agent_priv); + /* + * Now, check to see if there are + * any methods still in use + */ + if (!check_method_table(method)) { + /* If not, release management method table */ + kfree(method); + vendor_class->method_table[index] = NULL; + memset(vendor_class->oui[index], 0, 3); + /* Any OUIs left ? */ + if (!check_vendor_class(vendor_class)) { + /* If not, release vendor class table */ + kfree(vendor_class); + vendor->vendor_class[mgmt_class] = NULL; + /* Any other vendor classes left ? */ + if (!check_vendor_table(vendor)) { + kfree(vendor); + port_priv->version[ + agent_priv->reg_req-> + mgmt_class_version]. + vendor = NULL; + } + } + } + } + } + +out: + return; +} + +static int response_mad(struct ib_mad *mad) +{ + /* Trap represses are responses although response bit is reset */ + return ((mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS) || + (mad->mad_hdr.method & IB_MGMT_METHOD_RESP)); +} + +static int solicited_mad(struct ib_mad *mad) +{ + /* CM MADs are never solicited */ + if (mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_CM) { + return 0; + } + + /* XXX: Determine whether MAD is using RMPP */ + + /* Not using RMPP */ + /* Is this MAD a response to a previous MAD ? */ + return response_mad(mad); +} + +static struct ib_mad_agent_private * +find_mad_agent(struct ib_mad_port_private *port_priv, + struct ib_mad *mad, + int solicited) +{ + struct ib_mad_agent_private *mad_agent = NULL; + unsigned long flags; + + spin_lock_irqsave(&port_priv->reg_lock, flags); + + /* + * Whether MAD was solicited determines type of routing to + * MAD client. + */ + if (solicited) { + u32 hi_tid; + struct ib_mad_agent_private *entry; + + /* + * Routing is based on high 32 bits of transaction ID + * of MAD. + */ + hi_tid = be64_to_cpu(mad->mad_hdr.tid) >> 32; + list_for_each_entry(entry, &port_priv->agent_list, + agent_list) { + if (entry->agent.hi_tid == hi_tid) { + mad_agent = entry; + break; + } + } + } else { + struct ib_mad_mgmt_class_table *class; + struct ib_mad_mgmt_method_table *method; + struct ib_mad_mgmt_vendor_class_table *vendor; + struct ib_mad_mgmt_vendor_class *vendor_class; + struct ib_vendor_mad *vendor_mad; + int index; + + /* + * Routing is based on version, class, and method + * For "newer" vendor MADs, also based on OUI + */ + if (mad->mad_hdr.class_version >= MAX_MGMT_VERSION) + goto out; + if (!is_vendor_class(mad->mad_hdr.mgmt_class)) { + class = port_priv->version[ + mad->mad_hdr.class_version].class; + if (!class) + goto out; + method = class->method_table[convert_mgmt_class( + mad->mad_hdr.mgmt_class)]; + if (method) + mad_agent = method->agent[mad->mad_hdr.method & + ~IB_MGMT_METHOD_RESP]; + } else { + vendor = port_priv->version[ + mad->mad_hdr.class_version].vendor; + if (!vendor) + goto out; + vendor_class = vendor->vendor_class[vendor_class_index( + mad->mad_hdr.mgmt_class)]; + if (!vendor_class) + goto out; + /* Find matching OUI */ + vendor_mad = (struct ib_vendor_mad *)mad; + index = find_vendor_oui(vendor_class, vendor_mad->oui); + if (index == -1) + goto out; + method = vendor_class->method_table[index]; + if (method) { + mad_agent = method->agent[mad->mad_hdr.method & + ~IB_MGMT_METHOD_RESP]; + } + } + } + + if (mad_agent) { + if (mad_agent->agent.recv_handler) + atomic_inc(&mad_agent->refcount); + else { + printk(KERN_NOTICE PFX "No receive handler for client " + "%p on port %d\n", + &mad_agent->agent, port_priv->port_num); + mad_agent = NULL; + } + } +out: + spin_unlock_irqrestore(&port_priv->reg_lock, flags); + + return mad_agent; +} + +static int validate_mad(struct ib_mad *mad, u32 qp_num) +{ + int valid = 0; + + /* Make sure MAD base version is understood */ + if (mad->mad_hdr.base_version != IB_MGMT_BASE_VERSION) { + printk(KERN_ERR PFX "MAD received with unsupported base " + "version %d\n", mad->mad_hdr.base_version); + goto out; + } + + /* Filter SMI packets sent to other than QP0 */ + if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED) || + (mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) { + if (qp_num == 0) + valid = 1; + } else { + /* Filter GSI packets sent to QP0 */ + if (qp_num != 0) + valid = 1; + } + +out: + return valid; +} + +/* + * Return start of fully reassembled MAD, or NULL, if MAD isn't assembled yet + */ +static struct ib_mad_private * +reassemble_recv(struct ib_mad_agent_private *mad_agent_priv, + struct ib_mad_private *recv) +{ + /* Until we have RMPP, all receives are reassembled!... */ + INIT_LIST_HEAD(&recv->header.recv_wc.recv_buf.list); + return recv; +} + +static struct ib_mad_send_wr_private* +find_send_req(struct ib_mad_agent_private *mad_agent_priv, + u64 tid) +{ + struct ib_mad_send_wr_private *mad_send_wr; + + list_for_each_entry(mad_send_wr, &mad_agent_priv->wait_list, + agent_list) { + if (mad_send_wr->tid == tid) + return mad_send_wr; + } + + /* + * It's possible to receive the response before we've + * been notified that the send has completed + */ + list_for_each_entry(mad_send_wr, &mad_agent_priv->send_list, + agent_list) { + if (mad_send_wr->tid == tid && mad_send_wr->timeout) { + /* Verify request has not been canceled */ + return (mad_send_wr->status == IB_WC_SUCCESS) ? + mad_send_wr : NULL; + } + } + return NULL; +} + +static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv, + struct ib_mad_private *recv, + int solicited) +{ + struct ib_mad_send_wr_private *mad_send_wr; + struct ib_mad_send_wc mad_send_wc; + unsigned long flags; + + /* Fully reassemble receive before processing */ + recv = reassemble_recv(mad_agent_priv, recv); + if (!recv) { + if (atomic_dec_and_test(&mad_agent_priv->refcount)) + wake_up(&mad_agent_priv->wait); + return; + } + + /* Complete corresponding request */ + if (solicited) { + spin_lock_irqsave(&mad_agent_priv->lock, flags); + mad_send_wr = find_send_req(mad_agent_priv, + recv->mad.mad.mad_hdr.tid); + if (!mad_send_wr) { + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); + ib_free_recv_mad(&recv->header.recv_wc); + if (atomic_dec_and_test(&mad_agent_priv->refcount)) + wake_up(&mad_agent_priv->wait); + return; + } + /* Timeout = 0 means that we won't wait for a response */ + mad_send_wr->timeout = 0; + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); + + /* Defined behavior is to complete response before request */ + recv->header.recv_wc.wc->wr_id = mad_send_wr->wr_id; + mad_agent_priv->agent.recv_handler( + &mad_agent_priv->agent, + &recv->header.recv_wc); + atomic_dec(&mad_agent_priv->refcount); + + mad_send_wc.status = IB_WC_SUCCESS; + mad_send_wc.vendor_err = 0; + mad_send_wc.wr_id = mad_send_wr->wr_id; + ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc); + } else { + mad_agent_priv->agent.recv_handler( + &mad_agent_priv->agent, + &recv->header.recv_wc); + if (atomic_dec_and_test(&mad_agent_priv->refcount)) + wake_up(&mad_agent_priv->wait); + } +} + +static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv, + struct ib_wc *wc) +{ + struct ib_mad_qp_info *qp_info; + struct ib_mad_private_header *mad_priv_hdr; + struct ib_mad_private *recv, *response; + struct ib_mad_list_head *mad_list; + struct ib_mad_agent_private *mad_agent; + int solicited; + + response = kmem_cache_alloc(ib_mad_cache, GFP_KERNEL); + if (!response) + printk(KERN_ERR PFX "ib_mad_recv_done_handler no memory " + "for response buffer\n"); + + mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id; + qp_info = mad_list->mad_queue->qp_info; + dequeue_mad(mad_list); + + mad_priv_hdr = container_of(mad_list, struct ib_mad_private_header, + mad_list); + recv = container_of(mad_priv_hdr, struct ib_mad_private, header); + dma_unmap_single(port_priv->device->dma_device, + pci_unmap_addr(&recv->header, mapping), + sizeof(struct ib_mad_private) - + sizeof(struct ib_mad_private_header), + DMA_FROM_DEVICE); + + /* Setup MAD receive work completion from "normal" work completion */ + recv->header.recv_wc.wc = wc; + recv->header.recv_wc.mad_len = sizeof(struct ib_mad); + recv->header.recv_wc.recv_buf.mad = &recv->mad.mad; + recv->header.recv_wc.recv_buf.grh = &recv->grh; + + if (atomic_read(&qp_info->snoop_count)) + snoop_recv(qp_info, &recv->header.recv_wc, IB_MAD_SNOOP_RECVS); + + /* Validate MAD */ + if (!validate_mad(&recv->mad.mad, qp_info->qp->qp_num)) + goto out; + + if (recv->mad.mad.mad_hdr.mgmt_class == + IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { + if (!smi_handle_dr_smp_recv(&recv->mad.smp, + port_priv->device->node_type, + port_priv->port_num, + port_priv->device->phys_port_cnt)) + goto out; + if (!smi_check_forward_dr_smp(&recv->mad.smp)) + goto local; + if (!smi_handle_dr_smp_send(&recv->mad.smp, + port_priv->device->node_type, + port_priv->port_num)) + goto out; + if (!smi_check_local_dr_smp(&recv->mad.smp, + port_priv->device, + port_priv->port_num)) + goto out; + } + +local: + /* Give driver "right of first refusal" on incoming MAD */ + if (port_priv->device->process_mad) { + int ret; + + if (!response) { + printk(KERN_ERR PFX "No memory for response MAD\n"); + /* + * Is it better to assume that + * it wouldn't be processed ? + */ + goto out; + } + + ret = port_priv->device->process_mad(port_priv->device, 0, + port_priv->port_num, + wc->slid, + &recv->mad.mad, + &response->mad.mad); + if (ret & IB_MAD_RESULT_SUCCESS) { + if (ret & IB_MAD_RESULT_CONSUMED) + goto out; + if (ret & IB_MAD_RESULT_REPLY) { + /* Send response */ + if (!agent_send(response, &recv->grh, wc, + port_priv->device, + port_priv->port_num)) + response = NULL; + goto out; + } + } + } + + /* Determine corresponding MAD agent for incoming receive MAD */ + solicited = solicited_mad(&recv->mad.mad); + mad_agent = find_mad_agent(port_priv, &recv->mad.mad, solicited); + if (mad_agent) { + ib_mad_complete_recv(mad_agent, recv, solicited); + /* + * recv is freed up in error cases in ib_mad_complete_recv + * or via recv_handler in ib_mad_complete_recv() + */ + recv = NULL; + } + +out: + /* Post another receive request for this QP */ + if (response) { + ib_mad_post_receive_mads(qp_info, response); + if (recv) + kmem_cache_free(ib_mad_cache, recv); + } else + ib_mad_post_receive_mads(qp_info, recv); +} + +static void adjust_timeout(struct ib_mad_agent_private *mad_agent_priv) +{ + struct ib_mad_send_wr_private *mad_send_wr; + unsigned long delay; + + if (list_empty(&mad_agent_priv->wait_list)) { + cancel_delayed_work(&mad_agent_priv->timed_work); + } else { + mad_send_wr = list_entry(mad_agent_priv->wait_list.next, + struct ib_mad_send_wr_private, + agent_list); + + if (time_after(mad_agent_priv->timeout, + mad_send_wr->timeout)) { + mad_agent_priv->timeout = mad_send_wr->timeout; + cancel_delayed_work(&mad_agent_priv->timed_work); + delay = mad_send_wr->timeout - jiffies; + if ((long)delay <= 0) + delay = 1; + queue_delayed_work(mad_agent_priv->qp_info-> + port_priv->wq, + &mad_agent_priv->timed_work, delay); + } + } +} + +static void wait_for_response(struct ib_mad_agent_private *mad_agent_priv, + struct ib_mad_send_wr_private *mad_send_wr ) +{ + struct ib_mad_send_wr_private *temp_mad_send_wr; + struct list_head *list_item; + unsigned long delay; + + list_del(&mad_send_wr->agent_list); + + delay = mad_send_wr->timeout; + mad_send_wr->timeout += jiffies; + + list_for_each_prev(list_item, &mad_agent_priv->wait_list) { + temp_mad_send_wr = list_entry(list_item, + struct ib_mad_send_wr_private, + agent_list); + if (time_after(mad_send_wr->timeout, + temp_mad_send_wr->timeout)) + break; + } + list_add(&mad_send_wr->agent_list, list_item); + + /* Reschedule a work item if we have a shorter timeout */ + if (mad_agent_priv->wait_list.next == &mad_send_wr->agent_list) { + cancel_delayed_work(&mad_agent_priv->timed_work); + queue_delayed_work(mad_agent_priv->qp_info->port_priv->wq, + &mad_agent_priv->timed_work, delay); + } +} + +/* + * Process a send work completion + */ +static void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr, + struct ib_mad_send_wc *mad_send_wc) +{ + struct ib_mad_agent_private *mad_agent_priv; + unsigned long flags; + + mad_agent_priv = container_of(mad_send_wr->agent, + struct ib_mad_agent_private, agent); + + spin_lock_irqsave(&mad_agent_priv->lock, flags); + if (mad_send_wc->status != IB_WC_SUCCESS && + mad_send_wr->status == IB_WC_SUCCESS) { + mad_send_wr->status = mad_send_wc->status; + mad_send_wr->refcount -= (mad_send_wr->timeout > 0); + } + + if (--mad_send_wr->refcount > 0) { + if (mad_send_wr->refcount == 1 && mad_send_wr->timeout && + mad_send_wr->status == IB_WC_SUCCESS) { + wait_for_response(mad_agent_priv, mad_send_wr); + } + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); + return; + } + + /* Remove send from MAD agent and notify client of completion */ + list_del(&mad_send_wr->agent_list); + adjust_timeout(mad_agent_priv); + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); + + if (mad_send_wr->status != IB_WC_SUCCESS ) + mad_send_wc->status = mad_send_wr->status; + mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, + mad_send_wc); + + /* Release reference on agent taken when sending */ + if (atomic_dec_and_test(&mad_agent_priv->refcount)) + wake_up(&mad_agent_priv->wait); + + kfree(mad_send_wr); +} + +static void ib_mad_send_done_handler(struct ib_mad_port_private *port_priv, + struct ib_wc *wc) +{ + struct ib_mad_send_wr_private *mad_send_wr, *queued_send_wr; + struct ib_mad_list_head *mad_list; + struct ib_mad_qp_info *qp_info; + struct ib_mad_queue *send_queue; + struct ib_send_wr *bad_send_wr; + unsigned long flags; + int ret; + + mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id; + mad_send_wr = container_of(mad_list, struct ib_mad_send_wr_private, + mad_list); + send_queue = mad_list->mad_queue; + qp_info = send_queue->qp_info; + +retry: + queued_send_wr = NULL; + spin_lock_irqsave(&send_queue->lock, flags); + list_del(&mad_list->list); + + /* Move queued send to the send queue */ + if (send_queue->count-- > send_queue->max_active) { + mad_list = container_of(qp_info->overflow_list.next, + struct ib_mad_list_head, list); + queued_send_wr = container_of(mad_list, + struct ib_mad_send_wr_private, + mad_list); + list_del(&mad_list->list); + list_add_tail(&mad_list->list, &send_queue->list); + } + spin_unlock_irqrestore(&send_queue->lock, flags); + + /* Restore client wr_id in WC and complete send */ + wc->wr_id = mad_send_wr->wr_id; + if (atomic_read(&qp_info->snoop_count)) + snoop_send(qp_info, &mad_send_wr->send_wr, + (struct ib_mad_send_wc *)wc, + IB_MAD_SNOOP_SEND_COMPLETIONS); + ib_mad_complete_send_wr(mad_send_wr, (struct ib_mad_send_wc *)wc); + + if (queued_send_wr) { + ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr, + &bad_send_wr); + if (ret) { + printk(KERN_ERR PFX "ib_post_send failed: %d\n", ret); + mad_send_wr = queued_send_wr; + wc->status = IB_WC_LOC_QP_OP_ERR; + goto retry; + } + } +} + +static void mark_sends_for_retry(struct ib_mad_qp_info *qp_info) +{ + struct ib_mad_send_wr_private *mad_send_wr; + struct ib_mad_list_head *mad_list; + unsigned long flags; + + spin_lock_irqsave(&qp_info->send_queue.lock, flags); + list_for_each_entry(mad_list, &qp_info->send_queue.list, list) { + mad_send_wr = container_of(mad_list, + struct ib_mad_send_wr_private, + mad_list); + mad_send_wr->retry = 1; + } + spin_unlock_irqrestore(&qp_info->send_queue.lock, flags); +} + +static void mad_error_handler(struct ib_mad_port_private *port_priv, + struct ib_wc *wc) +{ + struct ib_mad_list_head *mad_list; + struct ib_mad_qp_info *qp_info; + struct ib_mad_send_wr_private *mad_send_wr; + int ret; + + /* Determine if failure was a send or receive */ + mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id; + qp_info = mad_list->mad_queue->qp_info; + if (mad_list->mad_queue == &qp_info->recv_queue) + /* + * Receive errors indicate that the QP has entered the error + * state - error handling/shutdown code will cleanup + */ + return; + + /* + * Send errors will transition the QP to SQE - move + * QP to RTS and repost flushed work requests + */ + mad_send_wr = container_of(mad_list, struct ib_mad_send_wr_private, + mad_list); + if (wc->status == IB_WC_WR_FLUSH_ERR) { + if (mad_send_wr->retry) { + /* Repost send */ + struct ib_send_wr *bad_send_wr; + + mad_send_wr->retry = 0; + ret = ib_post_send(qp_info->qp, &mad_send_wr->send_wr, + &bad_send_wr); + if (ret) + ib_mad_send_done_handler(port_priv, wc); + } else + ib_mad_send_done_handler(port_priv, wc); + } else { + struct ib_qp_attr *attr; + + /* Transition QP to RTS and fail offending send */ + attr = kmalloc(sizeof *attr, GFP_KERNEL); + if (attr) { + attr->qp_state = IB_QPS_RTS; + attr->cur_qp_state = IB_QPS_SQE; + ret = ib_modify_qp(qp_info->qp, attr, + IB_QP_STATE | IB_QP_CUR_STATE); + kfree(attr); + if (ret) + printk(KERN_ERR PFX "mad_error_handler - " + "ib_modify_qp to RTS : %d\n", ret); + else + mark_sends_for_retry(qp_info); + } + ib_mad_send_done_handler(port_priv, wc); + } +} + +/* + * IB MAD completion callback + */ +static void ib_mad_completion_handler(void *data) +{ + struct ib_mad_port_private *port_priv; + struct ib_wc wc; + + port_priv = (struct ib_mad_port_private *)data; + ib_req_notify_cq(port_priv->cq, IB_CQ_NEXT_COMP); + + while (ib_poll_cq(port_priv->cq, 1, &wc) == 1) { + if (wc.status == IB_WC_SUCCESS) { + switch (wc.opcode) { + case IB_WC_SEND: + ib_mad_send_done_handler(port_priv, &wc); + break; + case IB_WC_RECV: + ib_mad_recv_done_handler(port_priv, &wc); + break; + default: + BUG_ON(1); + break; + } + } else + mad_error_handler(port_priv, &wc); + } +} + +static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv) +{ + unsigned long flags; + struct ib_mad_send_wr_private *mad_send_wr, *temp_mad_send_wr; + struct ib_mad_send_wc mad_send_wc; + struct list_head cancel_list; + + INIT_LIST_HEAD(&cancel_list); + + spin_lock_irqsave(&mad_agent_priv->lock, flags); + list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr, + &mad_agent_priv->send_list, agent_list) { + if (mad_send_wr->status == IB_WC_SUCCESS) { + mad_send_wr->status = IB_WC_WR_FLUSH_ERR; + mad_send_wr->refcount -= (mad_send_wr->timeout > 0); + } + } + + /* Empty wait list to prevent receives from finding a request */ + list_splice_init(&mad_agent_priv->wait_list, &cancel_list); + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); + + /* Report all cancelled requests */ + mad_send_wc.status = IB_WC_WR_FLUSH_ERR; + mad_send_wc.vendor_err = 0; + + list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr, + &cancel_list, agent_list) { + mad_send_wc.wr_id = mad_send_wr->wr_id; + mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, + &mad_send_wc); + + list_del(&mad_send_wr->agent_list); + kfree(mad_send_wr); + atomic_dec(&mad_agent_priv->refcount); + } +} + +static struct ib_mad_send_wr_private* +find_send_by_wr_id(struct ib_mad_agent_private *mad_agent_priv, + u64 wr_id) +{ + struct ib_mad_send_wr_private *mad_send_wr; + + list_for_each_entry(mad_send_wr, &mad_agent_priv->wait_list, + agent_list) { + if (mad_send_wr->wr_id == wr_id) + return mad_send_wr; + } + + list_for_each_entry(mad_send_wr, &mad_agent_priv->send_list, + agent_list) { + if (mad_send_wr->wr_id == wr_id) + return mad_send_wr; + } + return NULL; +} + +void ib_cancel_mad(struct ib_mad_agent *mad_agent, + u64 wr_id) +{ + struct ib_mad_agent_private *mad_agent_priv; + struct ib_mad_send_wr_private *mad_send_wr; + struct ib_mad_send_wc mad_send_wc; + unsigned long flags; + + mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private, + agent); + spin_lock_irqsave(&mad_agent_priv->lock, flags); + mad_send_wr = find_send_by_wr_id(mad_agent_priv, wr_id); + if (!mad_send_wr) { + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); + goto out; + } + + if (mad_send_wr->status == IB_WC_SUCCESS) + mad_send_wr->refcount -= (mad_send_wr->timeout > 0); + + if (mad_send_wr->refcount != 0) { + mad_send_wr->status = IB_WC_WR_FLUSH_ERR; + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); + goto out; + } + + list_del(&mad_send_wr->agent_list); + adjust_timeout(mad_agent_priv); + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); + + mad_send_wc.status = IB_WC_WR_FLUSH_ERR; + mad_send_wc.vendor_err = 0; + mad_send_wc.wr_id = mad_send_wr->wr_id; + mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, + &mad_send_wc); + + kfree(mad_send_wr); + if (atomic_dec_and_test(&mad_agent_priv->refcount)) + wake_up(&mad_agent_priv->wait); + +out: + return; +} +EXPORT_SYMBOL(ib_cancel_mad); + +static void local_completions(void *data) +{ + struct ib_mad_agent_private *mad_agent_priv; + struct ib_mad_local_private *local; + unsigned long flags; + struct ib_wc wc; + struct ib_mad_send_wc mad_send_wc; + + mad_agent_priv = (struct ib_mad_agent_private *)data; + + spin_lock_irqsave(&mad_agent_priv->lock, flags); + while (!list_empty(&mad_agent_priv->local_list)) { + local = list_entry(mad_agent_priv->local_list.next, + struct ib_mad_local_private, + completion_list); + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); + if (local->mad_priv) { + /* + * Defined behavior is to complete response + * before request + */ + wc.wr_id = local->wr_id; + wc.status = IB_WC_SUCCESS; + wc.opcode = IB_WC_RECV; + wc.vendor_err = 0; + wc.byte_len = sizeof(struct ib_mad); + wc.src_qp = IB_QP0; + wc.wc_flags = 0; + wc.pkey_index = 0; + wc.slid = IB_LID_PERMISSIVE; + wc.sl = 0; + wc.dlid_path_bits = 0; + local->mad_priv->header.recv_wc.wc = &wc; + local->mad_priv->header.recv_wc.mad_len = + sizeof(struct ib_mad); + INIT_LIST_HEAD(&local->mad_priv->header.recv_wc.recv_buf.list); + local->mad_priv->header.recv_wc.recv_buf.grh = NULL; + local->mad_priv->header.recv_wc.recv_buf.mad = + &local->mad_priv->mad.mad; + if (atomic_read(&mad_agent_priv->qp_info->snoop_count)) + snoop_recv(mad_agent_priv->qp_info, + &local->mad_priv->header.recv_wc, + IB_MAD_SNOOP_RECVS); + mad_agent_priv->agent.recv_handler( + &mad_agent_priv->agent, + &local->mad_priv->header.recv_wc); + } + + /* Complete send */ + mad_send_wc.status = IB_WC_SUCCESS; + mad_send_wc.vendor_err = 0; + mad_send_wc.wr_id = local->wr_id; + if (atomic_read(&mad_agent_priv->qp_info->snoop_count)) + snoop_send(mad_agent_priv->qp_info, &local->send_wr, + &mad_send_wc, + IB_MAD_SNOOP_SEND_COMPLETIONS); + mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, + &mad_send_wc); + + spin_lock_irqsave(&mad_agent_priv->lock, flags); + list_del(&local->completion_list); + atomic_dec(&mad_agent_priv->refcount); + kfree(local); + } + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); +} + +static void timeout_sends(void *data) +{ + struct ib_mad_agent_private *mad_agent_priv; + struct ib_mad_send_wr_private *mad_send_wr; + struct ib_mad_send_wc mad_send_wc; + unsigned long flags, delay; + + mad_agent_priv = (struct ib_mad_agent_private *)data; + + mad_send_wc.status = IB_WC_RESP_TIMEOUT_ERR; + mad_send_wc.vendor_err = 0; + + spin_lock_irqsave(&mad_agent_priv->lock, flags); + while (!list_empty(&mad_agent_priv->wait_list)) { + mad_send_wr = list_entry(mad_agent_priv->wait_list.next, + struct ib_mad_send_wr_private, + agent_list); + + if (time_after(mad_send_wr->timeout, jiffies)) { + delay = mad_send_wr->timeout - jiffies; + if ((long)delay <= 0) + delay = 1; + queue_delayed_work(mad_agent_priv->qp_info-> + port_priv->wq, + &mad_agent_priv->timed_work, delay); + break; + } + + list_del(&mad_send_wr->agent_list); + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); + + mad_send_wc.wr_id = mad_send_wr->wr_id; + mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, + &mad_send_wc); + + kfree(mad_send_wr); + atomic_dec(&mad_agent_priv->refcount); + spin_lock_irqsave(&mad_agent_priv->lock, flags); + } + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); +} + +static void ib_mad_thread_completion_handler(struct ib_cq *cq) +{ + struct ib_mad_port_private *port_priv = cq->cq_context; + + queue_work(port_priv->wq, &port_priv->work); +} + +/* + * Allocate receive MADs and post receive WRs for them + */ +static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info, + struct ib_mad_private *mad) +{ + unsigned long flags; + int post, ret; + struct ib_mad_private *mad_priv; + struct ib_sge sg_list; + struct ib_recv_wr recv_wr, *bad_recv_wr; + struct ib_mad_queue *recv_queue = &qp_info->recv_queue; + + /* Initialize common scatter list fields */ + sg_list.length = sizeof *mad_priv - sizeof mad_priv->header; + sg_list.lkey = (*qp_info->port_priv->mr).lkey; + + /* Initialize common receive WR fields */ + recv_wr.next = NULL; + recv_wr.sg_list = &sg_list; + recv_wr.num_sge = 1; + recv_wr.recv_flags = IB_RECV_SIGNALED; + + do { + /* Allocate and map receive buffer */ + if (mad) { + mad_priv = mad; + mad = NULL; + } else { + mad_priv = kmem_cache_alloc(ib_mad_cache, GFP_KERNEL); + if (!mad_priv) { + printk(KERN_ERR PFX "No memory for receive buffer\n"); + ret = -ENOMEM; + break; + } + } + sg_list.addr = dma_map_single(qp_info->port_priv-> + device->dma_device, + &mad_priv->grh, + sizeof *mad_priv - + sizeof mad_priv->header, + DMA_FROM_DEVICE); + pci_unmap_addr_set(&mad_priv->header, mapping, sg_list.addr); + recv_wr.wr_id = (unsigned long)&mad_priv->header.mad_list; + mad_priv->header.mad_list.mad_queue = recv_queue; + + /* Post receive WR */ + spin_lock_irqsave(&recv_queue->lock, flags); + post = (++recv_queue->count < recv_queue->max_active); + list_add_tail(&mad_priv->header.mad_list.list, &recv_queue->list); + spin_unlock_irqrestore(&recv_queue->lock, flags); + ret = ib_post_recv(qp_info->qp, &recv_wr, &bad_recv_wr); + if (ret) { + spin_lock_irqsave(&recv_queue->lock, flags); + list_del(&mad_priv->header.mad_list.list); + recv_queue->count--; + spin_unlock_irqrestore(&recv_queue->lock, flags); + dma_unmap_single(qp_info->port_priv->device->dma_device, + pci_unmap_addr(&mad_priv->header, + mapping), + sizeof *mad_priv - + sizeof mad_priv->header, + DMA_FROM_DEVICE); + kmem_cache_free(ib_mad_cache, mad_priv); + printk(KERN_ERR PFX "ib_post_recv failed: %d\n", ret); + break; + } + } while (post); + + return ret; +} + +/* + * Return all the posted receive MADs + */ +static void cleanup_recv_queue(struct ib_mad_qp_info *qp_info) +{ + struct ib_mad_private_header *mad_priv_hdr; + struct ib_mad_private *recv; + struct ib_mad_list_head *mad_list; + + while (!list_empty(&qp_info->recv_queue.list)) { + + mad_list = list_entry(qp_info->recv_queue.list.next, + struct ib_mad_list_head, list); + mad_priv_hdr = container_of(mad_list, + struct ib_mad_private_header, + mad_list); + recv = container_of(mad_priv_hdr, struct ib_mad_private, + header); + + /* Remove from posted receive MAD list */ + list_del(&mad_list->list); + + /* Undo PCI mapping */ + dma_unmap_single(qp_info->port_priv->device->dma_device, + pci_unmap_addr(&recv->header, mapping), + sizeof(struct ib_mad_private) - + sizeof(struct ib_mad_private_header), + DMA_FROM_DEVICE); + kmem_cache_free(ib_mad_cache, recv); + } + + qp_info->recv_queue.count = 0; +} + +/* + * Start the port + */ +static int ib_mad_port_start(struct ib_mad_port_private *port_priv) +{ + int ret, i; + struct ib_qp_attr *attr; + struct ib_qp *qp; + + attr = kmalloc(sizeof *attr, GFP_KERNEL); + if (!attr) { + printk(KERN_ERR PFX "Couldn't kmalloc ib_qp_attr\n"); + return -ENOMEM; + } + + for (i = 0; i < IB_MAD_QPS_CORE; i++) { + qp = port_priv->qp_info[i].qp; + /* + * PKey index for QP1 is irrelevant but + * one is needed for the Reset to Init transition + */ + attr->qp_state = IB_QPS_INIT; + attr->pkey_index = 0; + attr->qkey = (qp->qp_num == 0) ? 0 : IB_QP1_QKEY; + ret = ib_modify_qp(qp, attr, IB_QP_STATE | + IB_QP_PKEY_INDEX | IB_QP_QKEY); + if (ret) { + printk(KERN_ERR PFX "Couldn't change QP%d state to " + "INIT: %d\n", i, ret); + goto out; + } + + attr->qp_state = IB_QPS_RTR; + ret = ib_modify_qp(qp, attr, IB_QP_STATE); + if (ret) { + printk(KERN_ERR PFX "Couldn't change QP%d state to " + "RTR: %d\n", i, ret); + goto out; + } + + attr->qp_state = IB_QPS_RTS; + attr->sq_psn = IB_MAD_SEND_Q_PSN; + ret = ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_SQ_PSN); + if (ret) { + printk(KERN_ERR PFX "Couldn't change QP%d state to " + "RTS: %d\n", i, ret); + goto out; + } + } + + ret = ib_req_notify_cq(port_priv->cq, IB_CQ_NEXT_COMP); + if (ret) { + printk(KERN_ERR PFX "Failed to request completion " + "notification: %d\n", ret); + goto out; + } + + for (i = 0; i < IB_MAD_QPS_CORE; i++) { + ret = ib_mad_post_receive_mads(&port_priv->qp_info[i], NULL); + if (ret) { + printk(KERN_ERR PFX "Couldn't post receive WRs\n"); + goto out; + } + } +out: + kfree(attr); + return ret; +} + +static void qp_event_handler(struct ib_event *event, void *qp_context) +{ + struct ib_mad_qp_info *qp_info = qp_context; + + /* It's worse than that! He's dead, Jim! */ + printk(KERN_ERR PFX "Fatal error (%d) on MAD QP (%d)\n", + event->event, qp_info->qp->qp_num); +} + +static void init_mad_queue(struct ib_mad_qp_info *qp_info, + struct ib_mad_queue *mad_queue) +{ + mad_queue->qp_info = qp_info; + mad_queue->count = 0; + spin_lock_init(&mad_queue->lock); + INIT_LIST_HEAD(&mad_queue->list); +} + +static void init_mad_qp(struct ib_mad_port_private *port_priv, + struct ib_mad_qp_info *qp_info) +{ + qp_info->port_priv = port_priv; + init_mad_queue(qp_info, &qp_info->send_queue); + init_mad_queue(qp_info, &qp_info->recv_queue); + INIT_LIST_HEAD(&qp_info->overflow_list); + spin_lock_init(&qp_info->snoop_lock); + qp_info->snoop_table = NULL; + qp_info->snoop_table_size = 0; + atomic_set(&qp_info->snoop_count, 0); +} + +static int create_mad_qp(struct ib_mad_qp_info *qp_info, + enum ib_qp_type qp_type) +{ + struct ib_qp_init_attr qp_init_attr; + int ret; + + memset(&qp_init_attr, 0, sizeof qp_init_attr); + qp_init_attr.send_cq = qp_info->port_priv->cq; + qp_init_attr.recv_cq = qp_info->port_priv->cq; + qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR; + qp_init_attr.rq_sig_type = IB_SIGNAL_ALL_WR; + qp_init_attr.cap.max_send_wr = IB_MAD_QP_SEND_SIZE; + qp_init_attr.cap.max_recv_wr = IB_MAD_QP_RECV_SIZE; + qp_init_attr.cap.max_send_sge = IB_MAD_SEND_REQ_MAX_SG; + qp_init_attr.cap.max_recv_sge = IB_MAD_RECV_REQ_MAX_SG; + qp_init_attr.qp_type = qp_type; + qp_init_attr.port_num = qp_info->port_priv->port_num; + qp_init_attr.qp_context = qp_info; + qp_init_attr.event_handler = qp_event_handler; + qp_info->qp = ib_create_qp(qp_info->port_priv->pd, &qp_init_attr); + if (IS_ERR(qp_info->qp)) { + printk(KERN_ERR PFX "Couldn't create ib_mad QP%d\n", + get_spl_qp_index(qp_type)); + ret = PTR_ERR(qp_info->qp); + goto error; + } + /* Use minimum queue sizes unless the CQ is resized */ + qp_info->send_queue.max_active = IB_MAD_QP_SEND_SIZE; + qp_info->recv_queue.max_active = IB_MAD_QP_RECV_SIZE; + return 0; + +error: + return ret; +} + +static void destroy_mad_qp(struct ib_mad_qp_info *qp_info) +{ + ib_destroy_qp(qp_info->qp); + if (qp_info->snoop_table) + kfree(qp_info->snoop_table); +} + +/* + * Open the port + * Create the QP, PD, MR, and CQ if needed + */ +static int ib_mad_port_open(struct ib_device *device, + int port_num) +{ + int ret, cq_size; + struct ib_mad_port_private *port_priv; + unsigned long flags; + char name[sizeof "ib_mad123"]; + + /* First, check if port already open at MAD layer */ + port_priv = ib_get_mad_port(device, port_num); + if (port_priv) { + printk(KERN_DEBUG PFX "%s port %d already open\n", + device->name, port_num); + return 0; + } + + /* Create new device info */ + port_priv = kmalloc(sizeof *port_priv, GFP_KERNEL); + if (!port_priv) { + printk(KERN_ERR PFX "No memory for ib_mad_port_private\n"); + return -ENOMEM; + } + memset(port_priv, 0, sizeof *port_priv); + port_priv->device = device; + port_priv->port_num = port_num; + spin_lock_init(&port_priv->reg_lock); + INIT_LIST_HEAD(&port_priv->agent_list); + init_mad_qp(port_priv, &port_priv->qp_info[0]); + init_mad_qp(port_priv, &port_priv->qp_info[1]); + + cq_size = (IB_MAD_QP_SEND_SIZE + IB_MAD_QP_RECV_SIZE) * 2; + port_priv->cq = ib_create_cq(port_priv->device, + (ib_comp_handler) + ib_mad_thread_completion_handler, + NULL, port_priv, cq_size); + if (IS_ERR(port_priv->cq)) { + printk(KERN_ERR PFX "Couldn't create ib_mad CQ\n"); + ret = PTR_ERR(port_priv->cq); + goto error3; + } + + port_priv->pd = ib_alloc_pd(device); + if (IS_ERR(port_priv->pd)) { + printk(KERN_ERR PFX "Couldn't create ib_mad PD\n"); + ret = PTR_ERR(port_priv->pd); + goto error4; + } + + port_priv->mr = ib_get_dma_mr(port_priv->pd, IB_ACCESS_LOCAL_WRITE); + if (IS_ERR(port_priv->mr)) { + printk(KERN_ERR PFX "Couldn't get ib_mad DMA MR\n"); + ret = PTR_ERR(port_priv->mr); + goto error5; + } + + ret = create_mad_qp(&port_priv->qp_info[0], IB_QPT_SMI); + if (ret) + goto error6; + ret = create_mad_qp(&port_priv->qp_info[1], IB_QPT_GSI); + if (ret) + goto error7; + + snprintf(name, sizeof name, "ib_mad%d", port_num); + port_priv->wq = create_singlethread_workqueue(name); + if (!port_priv->wq) { + ret = -ENOMEM; + goto error8; + } + INIT_WORK(&port_priv->work, ib_mad_completion_handler, port_priv); + + ret = ib_mad_port_start(port_priv); + if (ret) { + printk(KERN_ERR PFX "Couldn't start port\n"); + goto error9; + } + + spin_lock_irqsave(&ib_mad_port_list_lock, flags); + list_add_tail(&port_priv->port_list, &ib_mad_port_list); + spin_unlock_irqrestore(&ib_mad_port_list_lock, flags); + return 0; + +error9: + destroy_workqueue(port_priv->wq); +error8: + destroy_mad_qp(&port_priv->qp_info[1]); +error7: + destroy_mad_qp(&port_priv->qp_info[0]); +error6: + ib_dereg_mr(port_priv->mr); +error5: + ib_dealloc_pd(port_priv->pd); +error4: + ib_destroy_cq(port_priv->cq); + cleanup_recv_queue(&port_priv->qp_info[1]); + cleanup_recv_queue(&port_priv->qp_info[0]); +error3: + kfree(port_priv); + + return ret; +} + +/* + * Close the port + * If there are no classes using the port, free the port + * resources (CQ, MR, PD, QP) and remove the port's info structure + */ +static int ib_mad_port_close(struct ib_device *device, int port_num) +{ + struct ib_mad_port_private *port_priv; + unsigned long flags; + + spin_lock_irqsave(&ib_mad_port_list_lock, flags); + port_priv = __ib_get_mad_port(device, port_num); + if (port_priv == NULL) { + spin_unlock_irqrestore(&ib_mad_port_list_lock, flags); + printk(KERN_ERR PFX "Port %d not found\n", port_num); + return -ENODEV; + } + list_del(&port_priv->port_list); + spin_unlock_irqrestore(&ib_mad_port_list_lock, flags); + + /* Stop processing completions. */ + flush_workqueue(port_priv->wq); + destroy_workqueue(port_priv->wq); + destroy_mad_qp(&port_priv->qp_info[1]); + destroy_mad_qp(&port_priv->qp_info[0]); + ib_dereg_mr(port_priv->mr); + ib_dealloc_pd(port_priv->pd); + ib_destroy_cq(port_priv->cq); + cleanup_recv_queue(&port_priv->qp_info[1]); + cleanup_recv_queue(&port_priv->qp_info[0]); + /* XXX: Handle deallocation of MAD registration tables */ + + kfree(port_priv); + + return 0; +} + +static void ib_mad_init_device(struct ib_device *device) +{ + int ret, num_ports, cur_port, i, ret2; + + if (device->node_type == IB_NODE_SWITCH) { + num_ports = 1; + cur_port = 0; + } else { + num_ports = device->phys_port_cnt; + cur_port = 1; + } + for (i = 0; i < num_ports; i++, cur_port++) { + ret = ib_mad_port_open(device, cur_port); + if (ret) { + printk(KERN_ERR PFX "Couldn't open %s port %d\n", + device->name, cur_port); + goto error_device_open; + } + ret = ib_agent_port_open(device, cur_port); + if (ret) { + printk(KERN_ERR PFX "Couldn't open %s port %d " + "for agents\n", + device->name, cur_port); + goto error_device_open; + } + } + + goto error_device_query; + +error_device_open: + while (i > 0) { + cur_port--; + ret2 = ib_agent_port_close(device, cur_port); + if (ret2) { + printk(KERN_ERR PFX "Couldn't close %s port %d " + "for agents\n", + device->name, cur_port); + } + ret2 = ib_mad_port_close(device, cur_port); + if (ret2) { + printk(KERN_ERR PFX "Couldn't close %s port %d\n", + device->name, cur_port); + } + i--; + } + +error_device_query: + return; +} + +static void ib_mad_remove_device(struct ib_device *device) +{ + int ret = 0, i, num_ports, cur_port, ret2; + + if (device->node_type == IB_NODE_SWITCH) { + num_ports = 1; + cur_port = 0; + } else { + num_ports = device->phys_port_cnt; + cur_port = 1; + } + for (i = 0; i < num_ports; i++, cur_port++) { + ret2 = ib_agent_port_close(device, cur_port); + if (ret2) { + printk(KERN_ERR PFX "Couldn't close %s port %d " + "for agents\n", + device->name, cur_port); + if (!ret) + ret = ret2; + } + ret2 = ib_mad_port_close(device, cur_port); + if (ret2) { + printk(KERN_ERR PFX "Couldn't close %s port %d\n", + device->name, cur_port); + if (!ret) + ret = ret2; + } + } +} + +static struct ib_client mad_client = { + .name = "mad", + .add = ib_mad_init_device, + .remove = ib_mad_remove_device +}; + +static int __init ib_mad_init_module(void) +{ + int ret; + + spin_lock_init(&ib_mad_port_list_lock); + spin_lock_init(&ib_agent_port_list_lock); + + ib_mad_cache = kmem_cache_create("ib_mad", + sizeof(struct ib_mad_private), + 0, + SLAB_HWCACHE_ALIGN, + NULL, + NULL); + if (!ib_mad_cache) { + printk(KERN_ERR PFX "Couldn't create ib_mad cache\n"); + ret = -ENOMEM; + goto error1; + } + + INIT_LIST_HEAD(&ib_mad_port_list); + + if (ib_register_client(&mad_client)) { + printk(KERN_ERR PFX "Couldn't register ib_mad client\n"); + ret = -EINVAL; + goto error2; + } + + return 0; + +error2: + kmem_cache_destroy(ib_mad_cache); +error1: + return ret; +} + +static void __exit ib_mad_cleanup_module(void) +{ + ib_unregister_client(&mad_client); + + if (kmem_cache_destroy(ib_mad_cache)) { + printk(KERN_DEBUG PFX "Failed to destroy ib_mad cache\n"); + } +} + +module_init(ib_mad_init_module); +module_exit(ib_mad_cleanup_module); diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h new file mode 100644 index 000000000000..5b7d5ff0e836 --- /dev/null +++ b/drivers/infiniband/core/mad_priv.h @@ -0,0 +1,194 @@ +/* + * Copyright (c) 2004, Voltaire, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: mad_priv.h 1389 2004-12-27 22:56:47Z roland $ + */ + +#ifndef __IB_MAD_PRIV_H__ +#define __IB_MAD_PRIV_H__ + +#include <linux/pci.h> +#include <linux/kthread.h> +#include <linux/workqueue.h> +#include <ib_mad.h> +#include <ib_smi.h> + + +#define PFX "ib_mad: " + +#define IB_MAD_QPS_CORE 2 /* Always QP0 and QP1 as a minimum */ + +/* QP and CQ parameters */ +#define IB_MAD_QP_SEND_SIZE 128 +#define IB_MAD_QP_RECV_SIZE 512 +#define IB_MAD_SEND_REQ_MAX_SG 2 +#define IB_MAD_RECV_REQ_MAX_SG 1 + +#define IB_MAD_SEND_Q_PSN 0 + +/* Registration table sizes */ +#define MAX_MGMT_CLASS 80 +#define MAX_MGMT_VERSION 8 +#define MAX_MGMT_OUI 8 +#define MAX_MGMT_VENDOR_RANGE2 IB_MGMT_CLASS_VENDOR_RANGE2_END - \ + IB_MGMT_CLASS_VENDOR_RANGE2_START + 1 + +struct ib_mad_list_head { + struct list_head list; + struct ib_mad_queue *mad_queue; +}; + +struct ib_mad_private_header { + struct ib_mad_list_head mad_list; + struct ib_mad_recv_wc recv_wc; + DECLARE_PCI_UNMAP_ADDR(mapping) +} __attribute__ ((packed)); + +struct ib_mad_private { + struct ib_mad_private_header header; + struct ib_grh grh; + union { + struct ib_mad mad; + struct ib_rmpp_mad rmpp_mad; + struct ib_smp smp; + } mad; +} __attribute__ ((packed)); + +struct ib_mad_agent_private { + struct list_head agent_list; + struct ib_mad_agent agent; + struct ib_mad_reg_req *reg_req; + struct ib_mad_qp_info *qp_info; + + spinlock_t lock; + struct list_head send_list; + struct list_head wait_list; + struct work_struct timed_work; + unsigned long timeout; + struct list_head local_list; + struct work_struct local_work; + + atomic_t refcount; + wait_queue_head_t wait; + u8 rmpp_version; +}; + +struct ib_mad_snoop_private { + struct ib_mad_agent agent; + struct ib_mad_qp_info *qp_info; + int snoop_index; + int mad_snoop_flags; + atomic_t refcount; + wait_queue_head_t wait; +}; + +struct ib_mad_send_wr_private { + struct ib_mad_list_head mad_list; + struct list_head agent_list; + struct ib_mad_agent *agent; + struct ib_send_wr send_wr; + struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG]; + u64 wr_id; /* client WR ID */ + u64 tid; + unsigned long timeout; + int retry; + int refcount; + enum ib_wc_status status; +}; + +struct ib_mad_local_private { + struct list_head completion_list; + struct ib_mad_private *mad_priv; + struct ib_send_wr send_wr; + struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG]; + u64 wr_id; /* client WR ID */ + u64 tid; +}; + +struct ib_mad_mgmt_method_table { + struct ib_mad_agent_private *agent[IB_MGMT_MAX_METHODS]; +}; + +struct ib_mad_mgmt_class_table { + struct ib_mad_mgmt_method_table *method_table[MAX_MGMT_CLASS]; +}; + +struct ib_mad_mgmt_vendor_class { + u8 oui[MAX_MGMT_OUI][3]; + struct ib_mad_mgmt_method_table *method_table[MAX_MGMT_OUI]; +}; + +struct ib_mad_mgmt_vendor_class_table { + struct ib_mad_mgmt_vendor_class *vendor_class[MAX_MGMT_VENDOR_RANGE2]; +}; + +struct ib_mad_mgmt_version_table { + struct ib_mad_mgmt_class_table *class; + struct ib_mad_mgmt_vendor_class_table *vendor; +}; + +struct ib_mad_queue { + spinlock_t lock; + struct list_head list; + int count; + int max_active; + struct ib_mad_qp_info *qp_info; +}; + +struct ib_mad_qp_info { + struct ib_mad_port_private *port_priv; + struct ib_qp *qp; + struct ib_mad_queue send_queue; + struct ib_mad_queue recv_queue; + struct list_head overflow_list; + spinlock_t snoop_lock; + struct ib_mad_snoop_private **snoop_table; + int snoop_table_size; + atomic_t snoop_count; +}; + +struct ib_mad_port_private { + struct list_head port_list; + struct ib_device *device; + int port_num; + struct ib_cq *cq; + struct ib_pd *pd; + struct ib_mr *mr; + + spinlock_t reg_lock; + struct ib_mad_mgmt_version_table version[MAX_MGMT_VERSION]; + struct list_head agent_list; + struct workqueue_struct *wq; + struct work_struct work; + struct ib_mad_qp_info qp_info[IB_MAD_QPS_CORE]; +}; + +#endif /* __IB_MAD_PRIV_H__ */ diff --git a/drivers/infiniband/core/packer.c b/drivers/infiniband/core/packer.c new file mode 100644 index 000000000000..5f15feffeae2 --- /dev/null +++ b/drivers/infiniband/core/packer.c @@ -0,0 +1,201 @@ +/* + * Copyright (c) 2004 Topspin Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: packer.c 1349 2004-12-16 21:09:43Z roland $ + */ + +#include <ib_pack.h> + +static u64 value_read(int offset, int size, void *structure) +{ + switch (size) { + case 1: return *(u8 *) (structure + offset); + case 2: return be16_to_cpup((__be16 *) (structure + offset)); + case 4: return be32_to_cpup((__be32 *) (structure + offset)); + case 8: return be64_to_cpup((__be64 *) (structure + offset)); + default: + printk(KERN_WARNING "Field size %d bits not handled\n", size * 8); + return 0; + } +} + +/** + * ib_pack - Pack a structure into a buffer + * @desc:Array of structure field descriptions + * @desc_len:Number of entries in @desc + * @structure:Structure to pack from + * @buf:Buffer to pack into + * + * ib_pack() packs a list of structure fields into a buffer, + * controlled by the array of fields in @desc. + */ +void ib_pack(const struct ib_field *desc, + int desc_len, + void *structure, + void *buf) +{ + int i; + + for (i = 0; i < desc_len; ++i) { + if (desc[i].size_bits <= 32) { + int shift; + u32 val; + __be32 mask; + __be32 *addr; + + shift = 32 - desc[i].offset_bits - desc[i].size_bits; + if (desc[i].struct_size_bytes) + val = value_read(desc[i].struct_offset_bytes, + desc[i].struct_size_bytes, + structure) << shift; + else + val = 0; + + mask = cpu_to_be32(((1ull << desc[i].size_bits) - 1) << shift); + addr = (__be32 *) buf + desc[i].offset_words; + *addr = (*addr & ~mask) | (cpu_to_be32(val) & mask); + } else if (desc[i].size_bits <= 64) { + int shift; + u64 val; + __be64 mask; + __be64 *addr; + + shift = 64 - desc[i].offset_bits - desc[i].size_bits; + if (desc[i].struct_size_bytes) + val = value_read(desc[i].struct_offset_bytes, + desc[i].struct_size_bytes, + structure) << shift; + else + val = 0; + + mask = cpu_to_be64(((1ull << desc[i].size_bits) - 1) << shift); + addr = (__be64 *) ((__be32 *) buf + desc[i].offset_words); + *addr = (*addr & ~mask) | (cpu_to_be64(val) & mask); + } else { + if (desc[i].offset_bits % 8 || + desc[i].size_bits % 8) { + printk(KERN_WARNING "Structure field %s of size %d " + "bits is not byte-aligned\n", + desc[i].field_name, desc[i].size_bits); + } + + if (desc[i].struct_size_bytes) + memcpy(buf + desc[i].offset_words * 4 + + desc[i].offset_bits / 8, + structure + desc[i].struct_offset_bytes, + desc[i].size_bits / 8); + else + memset(buf + desc[i].offset_words * 4 + + desc[i].offset_bits / 8, + 0, + desc[i].size_bits / 8); + } + } +} +EXPORT_SYMBOL(ib_pack); + +static void value_write(int offset, int size, u64 val, void *structure) +{ + switch (size * 8) { + case 8: *( u8 *) (structure + offset) = val; break; + case 16: *(__be16 *) (structure + offset) = cpu_to_be16(val); break; + case 32: *(__be32 *) (structure + offset) = cpu_to_be32(val); break; + case 64: *(__be64 *) (structure + offset) = cpu_to_be64(val); break; + default: + printk(KERN_WARNING "Field size %d bits not handled\n", size * 8); + } +} + +/** + * ib_unpack - Unpack a buffer into a structure + * @desc:Array of structure field descriptions + * @desc_len:Number of entries in @desc + * @buf:Buffer to unpack from + * @structure:Structure to unpack into + * + * ib_pack() unpacks a list of structure fields from a buffer, + * controlled by the array of fields in @desc. + */ +void ib_unpack(const struct ib_field *desc, + int desc_len, + void *buf, + void *structure) +{ + int i; + + for (i = 0; i < desc_len; ++i) { + if (!desc[i].struct_size_bytes) + continue; + + if (desc[i].size_bits <= 32) { + int shift; + u32 val; + u32 mask; + __be32 *addr; + + shift = 32 - desc[i].offset_bits - desc[i].size_bits; + mask = ((1ull << desc[i].size_bits) - 1) << shift; + addr = (__be32 *) buf + desc[i].offset_words; + val = (be32_to_cpup(addr) & mask) >> shift; + value_write(desc[i].struct_offset_bytes, + desc[i].struct_size_bytes, + val, + structure); + } else if (desc[i].size_bits <= 64) { + int shift; + u64 val; + u64 mask; + __be64 *addr; + + shift = 64 - desc[i].offset_bits - desc[i].size_bits; + mask = ((1ull << desc[i].size_bits) - 1) << shift; + addr = (__be64 *) buf + desc[i].offset_words; + val = (be64_to_cpup(addr) & mask) >> shift; + value_write(desc[i].struct_offset_bytes, + desc[i].struct_size_bytes, + val, + structure); + } else { + if (desc[i].offset_bits % 8 || + desc[i].size_bits % 8) { + printk(KERN_WARNING "Structure field %s of size %d " + "bits is not byte-aligned\n", + desc[i].field_name, desc[i].size_bits); + } + + memcpy(structure + desc[i].struct_offset_bytes, + buf + desc[i].offset_words * 4 + + desc[i].offset_bits / 8, + desc[i].size_bits / 8); + } + } +} +EXPORT_SYMBOL(ib_unpack); diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c new file mode 100644 index 000000000000..d4233ee61c35 --- /dev/null +++ b/drivers/infiniband/core/sa_query.c @@ -0,0 +1,866 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: sa_query.c 1389 2004-12-27 22:56:47Z roland $ + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/err.h> +#include <linux/random.h> +#include <linux/spinlock.h> +#include <linux/slab.h> +#include <linux/pci.h> +#include <linux/dma-mapping.h> +#include <linux/kref.h> +#include <linux/idr.h> + +#include <ib_pack.h> +#include <ib_sa.h> + +MODULE_AUTHOR("Roland Dreier"); +MODULE_DESCRIPTION("InfiniBand subnet administration query support"); +MODULE_LICENSE("Dual BSD/GPL"); + +/* + * These two structures must be packed because they have 64-bit fields + * that are only 32-bit aligned. 64-bit architectures will lay them + * out wrong otherwise. (And unfortunately they are sent on the wire + * so we can't change the layout) + */ +struct ib_sa_hdr { + u64 sm_key; + u16 attr_offset; + u16 reserved; + ib_sa_comp_mask comp_mask; +} __attribute__ ((packed)); + +struct ib_sa_mad { + struct ib_mad_hdr mad_hdr; + struct ib_rmpp_hdr rmpp_hdr; + struct ib_sa_hdr sa_hdr; + u8 data[200]; +} __attribute__ ((packed)); + +struct ib_sa_sm_ah { + struct ib_ah *ah; + struct kref ref; +}; + +struct ib_sa_port { + struct ib_mad_agent *agent; + struct ib_mr *mr; + struct ib_sa_sm_ah *sm_ah; + struct work_struct update_task; + spinlock_t ah_lock; + u8 port_num; +}; + +struct ib_sa_device { + int start_port, end_port; + struct ib_event_handler event_handler; + struct ib_sa_port port[0]; +}; + +struct ib_sa_query { + void (*callback)(struct ib_sa_query *, int, struct ib_sa_mad *); + void (*release)(struct ib_sa_query *); + struct ib_sa_port *port; + struct ib_sa_mad *mad; + struct ib_sa_sm_ah *sm_ah; + DECLARE_PCI_UNMAP_ADDR(mapping) + int id; +}; + +struct ib_sa_path_query { + void (*callback)(int, struct ib_sa_path_rec *, void *); + void *context; + struct ib_sa_query sa_query; +}; + +struct ib_sa_mcmember_query { + void (*callback)(int, struct ib_sa_mcmember_rec *, void *); + void *context; + struct ib_sa_query sa_query; +}; + +static void ib_sa_add_one(struct ib_device *device); +static void ib_sa_remove_one(struct ib_device *device); + +static struct ib_client sa_client = { + .name = "sa", + .add = ib_sa_add_one, + .remove = ib_sa_remove_one +}; + +static spinlock_t idr_lock; +static DEFINE_IDR(query_idr); + +static spinlock_t tid_lock; +static u32 tid; + +enum { + IB_SA_ATTR_CLASS_PORTINFO = 0x01, + IB_SA_ATTR_NOTICE = 0x02, + IB_SA_ATTR_INFORM_INFO = 0x03, + IB_SA_ATTR_NODE_REC = 0x11, + IB_SA_ATTR_PORT_INFO_REC = 0x12, + IB_SA_ATTR_SL2VL_REC = 0x13, + IB_SA_ATTR_SWITCH_REC = 0x14, + IB_SA_ATTR_LINEAR_FDB_REC = 0x15, + IB_SA_ATTR_RANDOM_FDB_REC = 0x16, + IB_SA_ATTR_MCAST_FDB_REC = 0x17, + IB_SA_ATTR_SM_INFO_REC = 0x18, + IB_SA_ATTR_LINK_REC = 0x20, + IB_SA_ATTR_GUID_INFO_REC = 0x30, + IB_SA_ATTR_SERVICE_REC = 0x31, + IB_SA_ATTR_PARTITION_REC = 0x33, + IB_SA_ATTR_RANGE_REC = 0x34, + IB_SA_ATTR_PATH_REC = 0x35, + IB_SA_ATTR_VL_ARB_REC = 0x36, + IB_SA_ATTR_MC_GROUP_REC = 0x37, + IB_SA_ATTR_MC_MEMBER_REC = 0x38, + IB_SA_ATTR_TRACE_REC = 0x39, + IB_SA_ATTR_MULTI_PATH_REC = 0x3a, + IB_SA_ATTR_SERVICE_ASSOC_REC = 0x3b +}; + +#define PATH_REC_FIELD(field) \ + .struct_offset_bytes = offsetof(struct ib_sa_path_rec, field), \ + .struct_size_bytes = sizeof ((struct ib_sa_path_rec *) 0)->field, \ + .field_name = "sa_path_rec:" #field + +static const struct ib_field path_rec_table[] = { + { RESERVED, + .offset_words = 0, + .offset_bits = 0, + .size_bits = 32 }, + { RESERVED, + .offset_words = 1, + .offset_bits = 0, + .size_bits = 32 }, + { PATH_REC_FIELD(dgid), + .offset_words = 2, + .offset_bits = 0, + .size_bits = 128 }, + { PATH_REC_FIELD(sgid), + .offset_words = 6, + .offset_bits = 0, + .size_bits = 128 }, + { PATH_REC_FIELD(dlid), + .offset_words = 10, + .offset_bits = 0, + .size_bits = 16 }, + { PATH_REC_FIELD(slid), + .offset_words = 10, + .offset_bits = 16, + .size_bits = 16 }, + { PATH_REC_FIELD(raw_traffic), + .offset_words = 11, + .offset_bits = 0, + .size_bits = 1 }, + { RESERVED, + .offset_words = 11, + .offset_bits = 1, + .size_bits = 3 }, + { PATH_REC_FIELD(flow_label), + .offset_words = 11, + .offset_bits = 4, + .size_bits = 20 }, + { PATH_REC_FIELD(hop_limit), + .offset_words = 11, + .offset_bits = 24, + .size_bits = 8 }, + { PATH_REC_FIELD(traffic_class), + .offset_words = 12, + .offset_bits = 0, + .size_bits = 8 }, + { PATH_REC_FIELD(reversible), + .offset_words = 12, + .offset_bits = 8, + .size_bits = 1 }, + { PATH_REC_FIELD(numb_path), + .offset_words = 12, + .offset_bits = 9, + .size_bits = 7 }, + { PATH_REC_FIELD(pkey), + .offset_words = 12, + .offset_bits = 16, + .size_bits = 16 }, + { RESERVED, + .offset_words = 13, + .offset_bits = 0, + .size_bits = 12 }, + { PATH_REC_FIELD(sl), + .offset_words = 13, + .offset_bits = 12, + .size_bits = 4 }, + { PATH_REC_FIELD(mtu_selector), + .offset_words = 13, + .offset_bits = 16, + .size_bits = 2 }, + { PATH_REC_FIELD(mtu), + .offset_words = 13, + .offset_bits = 18, + .size_bits = 6 }, + { PATH_REC_FIELD(rate_selector), + .offset_words = 13, + .offset_bits = 24, + .size_bits = 2 }, + { PATH_REC_FIELD(rate), + .offset_words = 13, + .offset_bits = 26, + .size_bits = 6 }, + { PATH_REC_FIELD(packet_life_time_selector), + .offset_words = 14, + .offset_bits = 0, + .size_bits = 2 }, + { PATH_REC_FIELD(packet_life_time), + .offset_words = 14, + .offset_bits = 2, + .size_bits = 6 }, + { PATH_REC_FIELD(preference), + .offset_words = 14, + .offset_bits = 8, + .size_bits = 8 }, + { RESERVED, + .offset_words = 14, + .offset_bits = 16, + .size_bits = 48 }, +}; + +#define MCMEMBER_REC_FIELD(field) \ + .struct_offset_bytes = offsetof(struct ib_sa_mcmember_rec, field), \ + .struct_size_bytes = sizeof ((struct ib_sa_mcmember_rec *) 0)->field, \ + .field_name = "sa_mcmember_rec:" #field + +static const struct ib_field mcmember_rec_table[] = { + { MCMEMBER_REC_FIELD(mgid), + .offset_words = 0, + .offset_bits = 0, + .size_bits = 128 }, + { MCMEMBER_REC_FIELD(port_gid), + .offset_words = 4, + .offset_bits = 0, + .size_bits = 128 }, + { MCMEMBER_REC_FIELD(qkey), + .offset_words = 8, + .offset_bits = 0, + .size_bits = 32 }, + { MCMEMBER_REC_FIELD(mlid), + .offset_words = 9, + .offset_bits = 0, + .size_bits = 16 }, + { MCMEMBER_REC_FIELD(mtu_selector), + .offset_words = 9, + .offset_bits = 16, + .size_bits = 2 }, + { MCMEMBER_REC_FIELD(mtu), + .offset_words = 9, + .offset_bits = 18, + .size_bits = 6 }, + { MCMEMBER_REC_FIELD(traffic_class), + .offset_words = 9, + .offset_bits = 24, + .size_bits = 8 }, + { MCMEMBER_REC_FIELD(pkey), + .offset_words = 10, + .offset_bits = 0, + .size_bits = 16 }, + { MCMEMBER_REC_FIELD(rate_selector), + .offset_words = 10, + .offset_bits = 16, + .size_bits = 2 }, + { MCMEMBER_REC_FIELD(rate), + .offset_words = 10, + .offset_bits = 18, + .size_bits = 6 }, + { MCMEMBER_REC_FIELD(packet_life_time_selector), + .offset_words = 10, + .offset_bits = 24, + .size_bits = 2 }, + { MCMEMBER_REC_FIELD(packet_life_time), + .offset_words = 10, + .offset_bits = 26, + .size_bits = 6 }, + { MCMEMBER_REC_FIELD(sl), + .offset_words = 11, + .offset_bits = 0, + .size_bits = 4 }, + { MCMEMBER_REC_FIELD(flow_label), + .offset_words = 11, + .offset_bits = 4, + .size_bits = 20 }, + { MCMEMBER_REC_FIELD(hop_limit), + .offset_words = 11, + .offset_bits = 24, + .size_bits = 8 }, + { MCMEMBER_REC_FIELD(scope), + .offset_words = 12, + .offset_bits = 0, + .size_bits = 4 }, + { MCMEMBER_REC_FIELD(join_state), + .offset_words = 12, + .offset_bits = 4, + .size_bits = 4 }, + { MCMEMBER_REC_FIELD(proxy_join), + .offset_words = 12, + .offset_bits = 8, + .size_bits = 1 }, + { RESERVED, + .offset_words = 12, + .offset_bits = 9, + .size_bits = 23 }, +}; + +static void free_sm_ah(struct kref *kref) +{ + struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref); + + ib_destroy_ah(sm_ah->ah); + kfree(sm_ah); +} + +static void update_sm_ah(void *port_ptr) +{ + struct ib_sa_port *port = port_ptr; + struct ib_sa_sm_ah *new_ah, *old_ah; + struct ib_port_attr port_attr; + struct ib_ah_attr ah_attr; + + if (ib_query_port(port->agent->device, port->port_num, &port_attr)) { + printk(KERN_WARNING "Couldn't query port\n"); + return; + } + + new_ah = kmalloc(sizeof *new_ah, GFP_KERNEL); + if (!new_ah) { + printk(KERN_WARNING "Couldn't allocate new SM AH\n"); + return; + } + + kref_init(&new_ah->ref); + + memset(&ah_attr, 0, sizeof ah_attr); + ah_attr.dlid = port_attr.sm_lid; + ah_attr.sl = port_attr.sm_sl; + ah_attr.port_num = port->port_num; + + new_ah->ah = ib_create_ah(port->agent->qp->pd, &ah_attr); + if (IS_ERR(new_ah->ah)) { + printk(KERN_WARNING "Couldn't create new SM AH\n"); + kfree(new_ah); + return; + } + + spin_lock_irq(&port->ah_lock); + old_ah = port->sm_ah; + port->sm_ah = new_ah; + spin_unlock_irq(&port->ah_lock); + + if (old_ah) + kref_put(&old_ah->ref, free_sm_ah); +} + +static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event) +{ + if (event->event == IB_EVENT_PORT_ERR || + event->event == IB_EVENT_PORT_ACTIVE || + event->event == IB_EVENT_LID_CHANGE || + event->event == IB_EVENT_PKEY_CHANGE || + event->event == IB_EVENT_SM_CHANGE) { + struct ib_sa_device *sa_dev = + ib_get_client_data(event->device, &sa_client); + + schedule_work(&sa_dev->port[event->element.port_num - + sa_dev->start_port].update_task); + } +} + +/** + * ib_sa_cancel_query - try to cancel an SA query + * @id:ID of query to cancel + * @query:query pointer to cancel + * + * Try to cancel an SA query. If the id and query don't match up or + * the query has already completed, nothing is done. Otherwise the + * query is canceled and will complete with a status of -EINTR. + */ +void ib_sa_cancel_query(int id, struct ib_sa_query *query) +{ + unsigned long flags; + struct ib_mad_agent *agent; + + spin_lock_irqsave(&idr_lock, flags); + if (idr_find(&query_idr, id) != query) { + spin_unlock_irqrestore(&idr_lock, flags); + return; + } + agent = query->port->agent; + spin_unlock_irqrestore(&idr_lock, flags); + + ib_cancel_mad(agent, id); +} +EXPORT_SYMBOL(ib_sa_cancel_query); + +static void init_mad(struct ib_sa_mad *mad, struct ib_mad_agent *agent) +{ + unsigned long flags; + + memset(mad, 0, sizeof *mad); + + mad->mad_hdr.base_version = IB_MGMT_BASE_VERSION; + mad->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM; + mad->mad_hdr.class_version = IB_SA_CLASS_VERSION; + + spin_lock_irqsave(&tid_lock, flags); + mad->mad_hdr.tid = + cpu_to_be64(((u64) agent->hi_tid) << 32 | tid++); + spin_unlock_irqrestore(&tid_lock, flags); +} + +static int send_mad(struct ib_sa_query *query, int timeout_ms) +{ + struct ib_sa_port *port = query->port; + unsigned long flags; + int ret; + struct ib_sge gather_list; + struct ib_send_wr *bad_wr, wr = { + .opcode = IB_WR_SEND, + .sg_list = &gather_list, + .num_sge = 1, + .send_flags = IB_SEND_SIGNALED, + .wr = { + .ud = { + .mad_hdr = &query->mad->mad_hdr, + .remote_qpn = 1, + .remote_qkey = IB_QP1_QKEY, + .timeout_ms = timeout_ms + } + } + }; + +retry: + if (!idr_pre_get(&query_idr, GFP_ATOMIC)) + return -ENOMEM; + spin_lock_irqsave(&idr_lock, flags); + ret = idr_get_new(&query_idr, query, &query->id); + spin_unlock_irqrestore(&idr_lock, flags); + if (ret == -EAGAIN) + goto retry; + if (ret) + return ret; + + wr.wr_id = query->id; + + spin_lock_irqsave(&port->ah_lock, flags); + kref_get(&port->sm_ah->ref); + query->sm_ah = port->sm_ah; + wr.wr.ud.ah = port->sm_ah->ah; + spin_unlock_irqrestore(&port->ah_lock, flags); + + gather_list.addr = dma_map_single(port->agent->device->dma_device, + query->mad, + sizeof (struct ib_sa_mad), + DMA_TO_DEVICE); + gather_list.length = sizeof (struct ib_sa_mad); + gather_list.lkey = port->mr->lkey; + pci_unmap_addr_set(query, mapping, gather_list.addr); + + ret = ib_post_send_mad(port->agent, &wr, &bad_wr); + if (ret) { + dma_unmap_single(port->agent->device->dma_device, + pci_unmap_addr(query, mapping), + sizeof (struct ib_sa_mad), + DMA_TO_DEVICE); + kref_put(&query->sm_ah->ref, free_sm_ah); + spin_lock_irqsave(&idr_lock, flags); + idr_remove(&query_idr, query->id); + spin_unlock_irqrestore(&idr_lock, flags); + } + + return ret; +} + +static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query, + int status, + struct ib_sa_mad *mad) +{ + struct ib_sa_path_query *query = + container_of(sa_query, struct ib_sa_path_query, sa_query); + + if (mad) { + struct ib_sa_path_rec rec; + + ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table), + mad->data, &rec); + query->callback(status, &rec, query->context); + } else + query->callback(status, NULL, query->context); +} + +static void ib_sa_path_rec_release(struct ib_sa_query *sa_query) +{ + kfree(sa_query->mad); + kfree(container_of(sa_query, struct ib_sa_path_query, sa_query)); +} + +/** + * ib_sa_path_rec_get - Start a Path get query + * @device:device to send query on + * @port_num: port number to send query on + * @rec:Path Record to send in query + * @comp_mask:component mask to send in query + * @timeout_ms:time to wait for response + * @gfp_mask:GFP mask to use for internal allocations + * @callback:function called when query completes, times out or is + * canceled + * @context:opaque user context passed to callback + * @sa_query:query context, used to cancel query + * + * Send a Path Record Get query to the SA to look up a path. The + * callback function will be called when the query completes (or + * fails); status is 0 for a successful response, -EINTR if the query + * is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error + * occurred sending the query. The resp parameter of the callback is + * only valid if status is 0. + * + * If the return value of ib_sa_path_rec_get() is negative, it is an + * error code. Otherwise it is a query ID that can be used to cancel + * the query. + */ +int ib_sa_path_rec_get(struct ib_device *device, u8 port_num, + struct ib_sa_path_rec *rec, + ib_sa_comp_mask comp_mask, + int timeout_ms, int gfp_mask, + void (*callback)(int status, + struct ib_sa_path_rec *resp, + void *context), + void *context, + struct ib_sa_query **sa_query) +{ + struct ib_sa_path_query *query; + struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); + struct ib_sa_port *port = &sa_dev->port[port_num - sa_dev->start_port]; + struct ib_mad_agent *agent = port->agent; + int ret; + + query = kmalloc(sizeof *query, gfp_mask); + if (!query) + return -ENOMEM; + query->sa_query.mad = kmalloc(sizeof *query->sa_query.mad, gfp_mask); + if (!query->sa_query.mad) { + kfree(query); + return -ENOMEM; + } + + query->callback = callback; + query->context = context; + + init_mad(query->sa_query.mad, agent); + + query->sa_query.callback = ib_sa_path_rec_callback; + query->sa_query.release = ib_sa_path_rec_release; + query->sa_query.port = port; + query->sa_query.mad->mad_hdr.method = IB_MGMT_METHOD_GET; + query->sa_query.mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_PATH_REC); + query->sa_query.mad->sa_hdr.comp_mask = comp_mask; + + ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), + rec, query->sa_query.mad->data); + + *sa_query = &query->sa_query; + ret = send_mad(&query->sa_query, timeout_ms); + if (ret) { + *sa_query = NULL; + kfree(query->sa_query.mad); + kfree(query); + } + + return ret ? ret : query->sa_query.id; +} +EXPORT_SYMBOL(ib_sa_path_rec_get); + +static void ib_sa_mcmember_rec_callback(struct ib_sa_query *sa_query, + int status, + struct ib_sa_mad *mad) +{ + struct ib_sa_mcmember_query *query = + container_of(sa_query, struct ib_sa_mcmember_query, sa_query); + + if (mad) { + struct ib_sa_mcmember_rec rec; + + ib_unpack(mcmember_rec_table, ARRAY_SIZE(mcmember_rec_table), + mad->data, &rec); + query->callback(status, &rec, query->context); + } else + query->callback(status, NULL, query->context); +} + +static void ib_sa_mcmember_rec_release(struct ib_sa_query *sa_query) +{ + kfree(sa_query->mad); + kfree(container_of(sa_query, struct ib_sa_mcmember_query, sa_query)); +} + +int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num, + u8 method, + struct ib_sa_mcmember_rec *rec, + ib_sa_comp_mask comp_mask, + int timeout_ms, int gfp_mask, + void (*callback)(int status, + struct ib_sa_mcmember_rec *resp, + void *context), + void *context, + struct ib_sa_query **sa_query) +{ + struct ib_sa_mcmember_query *query; + struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); + struct ib_sa_port *port = &sa_dev->port[port_num - sa_dev->start_port]; + struct ib_mad_agent *agent = port->agent; + int ret; + + query = kmalloc(sizeof *query, gfp_mask); + if (!query) + return -ENOMEM; + query->sa_query.mad = kmalloc(sizeof *query->sa_query.mad, gfp_mask); + if (!query->sa_query.mad) { + kfree(query); + return -ENOMEM; + } + + query->callback = callback; + query->context = context; + + init_mad(query->sa_query.mad, agent); + + query->sa_query.callback = ib_sa_mcmember_rec_callback; + query->sa_query.release = ib_sa_mcmember_rec_release; + query->sa_query.port = port; + query->sa_query.mad->mad_hdr.method = method; + query->sa_query.mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC); + query->sa_query.mad->sa_hdr.comp_mask = comp_mask; + + ib_pack(mcmember_rec_table, ARRAY_SIZE(mcmember_rec_table), + rec, query->sa_query.mad->data); + + *sa_query = &query->sa_query; + ret = send_mad(&query->sa_query, timeout_ms); + if (ret) { + *sa_query = NULL; + kfree(query->sa_query.mad); + kfree(query); + } + + return ret ? ret : query->sa_query.id; +} +EXPORT_SYMBOL(ib_sa_mcmember_rec_query); + +static void send_handler(struct ib_mad_agent *agent, + struct ib_mad_send_wc *mad_send_wc) +{ + struct ib_sa_query *query; + unsigned long flags; + + spin_lock_irqsave(&idr_lock, flags); + query = idr_find(&query_idr, mad_send_wc->wr_id); + spin_unlock_irqrestore(&idr_lock, flags); + + if (!query) + return; + + switch (mad_send_wc->status) { + case IB_WC_SUCCESS: + /* No callback -- already got recv */ + break; + case IB_WC_RESP_TIMEOUT_ERR: + query->callback(query, -ETIMEDOUT, NULL); + break; + case IB_WC_WR_FLUSH_ERR: + query->callback(query, -EINTR, NULL); + break; + default: + query->callback(query, -EIO, NULL); + break; + } + + dma_unmap_single(agent->device->dma_device, + pci_unmap_addr(query, mapping), + sizeof (struct ib_sa_mad), + DMA_TO_DEVICE); + kref_put(&query->sm_ah->ref, free_sm_ah); + + query->release(query); + + spin_lock_irqsave(&idr_lock, flags); + idr_remove(&query_idr, mad_send_wc->wr_id); + spin_unlock_irqrestore(&idr_lock, flags); +} + +static void recv_handler(struct ib_mad_agent *mad_agent, + struct ib_mad_recv_wc *mad_recv_wc) +{ + struct ib_sa_query *query; + unsigned long flags; + + spin_lock_irqsave(&idr_lock, flags); + query = idr_find(&query_idr, mad_recv_wc->wc->wr_id); + spin_unlock_irqrestore(&idr_lock, flags); + + if (query) { + if (mad_recv_wc->wc->status == IB_WC_SUCCESS) + query->callback(query, + mad_recv_wc->recv_buf.mad->mad_hdr.status ? + -EINVAL : 0, + (struct ib_sa_mad *) mad_recv_wc->recv_buf.mad); + else + query->callback(query, -EIO, NULL); + } + + ib_free_recv_mad(mad_recv_wc); +} + +static void ib_sa_add_one(struct ib_device *device) +{ + struct ib_sa_device *sa_dev; + int s, e, i; + + if (device->node_type == IB_NODE_SWITCH) + s = e = 0; + else { + s = 1; + e = device->phys_port_cnt; + } + + sa_dev = kmalloc(sizeof *sa_dev + + (e - s + 1) * sizeof (struct ib_sa_port), + GFP_KERNEL); + if (!sa_dev) + return; + + sa_dev->start_port = s; + sa_dev->end_port = e; + + for (i = 0; i <= e - s; ++i) { + sa_dev->port[i].mr = NULL; + sa_dev->port[i].sm_ah = NULL; + sa_dev->port[i].port_num = i + s; + spin_lock_init(&sa_dev->port[i].ah_lock); + + sa_dev->port[i].agent = + ib_register_mad_agent(device, i + s, IB_QPT_GSI, + NULL, 0, send_handler, + recv_handler, sa_dev); + if (IS_ERR(sa_dev->port[i].agent)) + goto err; + + sa_dev->port[i].mr = ib_get_dma_mr(sa_dev->port[i].agent->qp->pd, + IB_ACCESS_LOCAL_WRITE); + if (IS_ERR(sa_dev->port[i].mr)) { + ib_unregister_mad_agent(sa_dev->port[i].agent); + goto err; + } + + INIT_WORK(&sa_dev->port[i].update_task, + update_sm_ah, &sa_dev->port[i]); + } + + ib_set_client_data(device, &sa_client, sa_dev); + + /* + * We register our event handler after everything is set up, + * and then update our cached info after the event handler is + * registered to avoid any problems if a port changes state + * during our initialization. + */ + + INIT_IB_EVENT_HANDLER(&sa_dev->event_handler, device, ib_sa_event); + if (ib_register_event_handler(&sa_dev->event_handler)) + goto err; + + for (i = 0; i <= e - s; ++i) + update_sm_ah(&sa_dev->port[i]); + + return; + +err: + while (--i >= 0) { + ib_dereg_mr(sa_dev->port[i].mr); + ib_unregister_mad_agent(sa_dev->port[i].agent); + } + + kfree(sa_dev); + + return; +} + +static void ib_sa_remove_one(struct ib_device *device) +{ + struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); + int i; + + if (!sa_dev) + return; + + ib_unregister_event_handler(&sa_dev->event_handler); + + for (i = 0; i <= sa_dev->end_port - sa_dev->start_port; ++i) { + ib_unregister_mad_agent(sa_dev->port[i].agent); + kref_put(&sa_dev->port[i].sm_ah->ref, free_sm_ah); + } + + kfree(sa_dev); +} + +static int __init ib_sa_init(void) +{ + int ret; + + spin_lock_init(&idr_lock); + spin_lock_init(&tid_lock); + + get_random_bytes(&tid, sizeof tid); + + ret = ib_register_client(&sa_client); + if (ret) + printk(KERN_ERR "Couldn't register ib_sa client\n"); + + return ret; +} + +static void __exit ib_sa_cleanup(void) +{ + ib_unregister_client(&sa_client); +} + +module_init(ib_sa_init); +module_exit(ib_sa_cleanup); diff --git a/drivers/infiniband/core/smi.c b/drivers/infiniband/core/smi.c new file mode 100644 index 000000000000..6adf653704fc --- /dev/null +++ b/drivers/infiniband/core/smi.c @@ -0,0 +1,234 @@ +/* + * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2004 Infinicon Corporation. All rights reserved. + * Copyright (c) 2004 Intel Corporation. All rights reserved. + * Copyright (c) 2004 Topspin Corporation. All rights reserved. + * Copyright (c) 2004 Voltaire Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: smi.c 1389 2004-12-27 22:56:47Z roland $ + */ + +#include <ib_smi.h> + + +/* + * Fixup a directed route SMP for sending + * Return 0 if the SMP should be discarded + */ +int smi_handle_dr_smp_send(struct ib_smp *smp, + u8 node_type, + int port_num) +{ + u8 hop_ptr, hop_cnt; + + hop_ptr = smp->hop_ptr; + hop_cnt = smp->hop_cnt; + + /* See section 14.2.2.2, Vol 1 IB spec */ + if (!ib_get_smp_direction(smp)) { + /* C14-9:1 */ + if (hop_cnt && hop_ptr == 0) { + smp->hop_ptr++; + return (smp->initial_path[smp->hop_ptr] == + port_num); + } + + /* C14-9:2 */ + if (hop_ptr && hop_ptr < hop_cnt) { + if (node_type != IB_NODE_SWITCH) + return 0; + + /* smp->return_path set when received */ + smp->hop_ptr++; + return (smp->initial_path[smp->hop_ptr] == + port_num); + } + + /* C14-9:3 -- We're at the end of the DR segment of path */ + if (hop_ptr == hop_cnt) { + /* smp->return_path set when received */ + smp->hop_ptr++; + return (node_type == IB_NODE_SWITCH || + smp->dr_dlid == IB_LID_PERMISSIVE); + } + + /* C14-9:4 -- hop_ptr = hop_cnt + 1 -> give to SMA/SM */ + /* C14-9:5 -- Fail unreasonable hop pointer */ + return (hop_ptr == hop_cnt + 1); + + } else { + /* C14-13:1 */ + if (hop_cnt && hop_ptr == hop_cnt + 1) { + smp->hop_ptr--; + return (smp->return_path[smp->hop_ptr] == + port_num); + } + + /* C14-13:2 */ + if (2 <= hop_ptr && hop_ptr <= hop_cnt) { + if (node_type != IB_NODE_SWITCH) + return 0; + + smp->hop_ptr--; + return (smp->return_path[smp->hop_ptr] == + port_num); + } + + /* C14-13:3 -- at the end of the DR segment of path */ + if (hop_ptr == 1) { + smp->hop_ptr--; + /* C14-13:3 -- SMPs destined for SM shouldn't be here */ + return (node_type == IB_NODE_SWITCH || + smp->dr_slid == IB_LID_PERMISSIVE); + } + + /* C14-13:4 -- hop_ptr = 0 -> should have gone to SM */ + if (hop_ptr == 0) + return 1; + + /* C14-13:5 -- Check for unreasonable hop pointer */ + return 0; + } +} + +/* + * Adjust information for a received SMP + * Return 0 if the SMP should be dropped + */ +int smi_handle_dr_smp_recv(struct ib_smp *smp, + u8 node_type, + int port_num, + int phys_port_cnt) +{ + u8 hop_ptr, hop_cnt; + + hop_ptr = smp->hop_ptr; + hop_cnt = smp->hop_cnt; + + /* See section 14.2.2.2, Vol 1 IB spec */ + if (!ib_get_smp_direction(smp)) { + /* C14-9:1 -- sender should have incremented hop_ptr */ + if (hop_cnt && hop_ptr == 0) + return 0; + + /* C14-9:2 -- intermediate hop */ + if (hop_ptr && hop_ptr < hop_cnt) { + if (node_type != IB_NODE_SWITCH) + return 0; + + smp->return_path[hop_ptr] = port_num; + /* smp->hop_ptr updated when sending */ + return (smp->initial_path[hop_ptr+1] <= phys_port_cnt); + } + + /* C14-9:3 -- We're at the end of the DR segment of path */ + if (hop_ptr == hop_cnt) { + if (hop_cnt) + smp->return_path[hop_ptr] = port_num; + /* smp->hop_ptr updated when sending */ + + return (node_type == IB_NODE_SWITCH || + smp->dr_dlid == IB_LID_PERMISSIVE); + } + + /* C14-9:4 -- hop_ptr = hop_cnt + 1 -> give to SMA/SM */ + /* C14-9:5 -- fail unreasonable hop pointer */ + return (hop_ptr == hop_cnt + 1); + + } else { + + /* C14-13:1 */ + if (hop_cnt && hop_ptr == hop_cnt + 1) { + smp->hop_ptr--; + return (smp->return_path[smp->hop_ptr] == + port_num); + } + + /* C14-13:2 */ + if (2 <= hop_ptr && hop_ptr <= hop_cnt) { + if (node_type != IB_NODE_SWITCH) + return 0; + + /* smp->hop_ptr updated when sending */ + return (smp->return_path[hop_ptr-1] <= phys_port_cnt); + } + + /* C14-13:3 -- We're at the end of the DR segment of path */ + if (hop_ptr == 1) { + if (smp->dr_slid == IB_LID_PERMISSIVE) { + /* giving SMP to SM - update hop_ptr */ + smp->hop_ptr--; + return 1; + } + /* smp->hop_ptr updated when sending */ + return (node_type == IB_NODE_SWITCH); + } + + /* C14-13:4 -- hop_ptr = 0 -> give to SM */ + /* C14-13:5 -- Check for unreasonable hop pointer */ + return (hop_ptr == 0); + } +} + +/* + * Return 1 if the received DR SMP should be forwarded to the send queue + * Return 0 if the SMP should be completed up the stack + */ +int smi_check_forward_dr_smp(struct ib_smp *smp) +{ + u8 hop_ptr, hop_cnt; + + hop_ptr = smp->hop_ptr; + hop_cnt = smp->hop_cnt; + + if (!ib_get_smp_direction(smp)) { + /* C14-9:2 -- intermediate hop */ + if (hop_ptr && hop_ptr < hop_cnt) + return 1; + + /* C14-9:3 -- at the end of the DR segment of path */ + if (hop_ptr == hop_cnt) + return (smp->dr_dlid == IB_LID_PERMISSIVE); + + /* C14-9:4 -- hop_ptr = hop_cnt + 1 -> give to SMA/SM */ + if (hop_ptr == hop_cnt + 1) + return 1; + } else { + /* C14-13:2 */ + if (2 <= hop_ptr && hop_ptr <= hop_cnt) + return 1; + + /* C14-13:3 -- at the end of the DR segment of path */ + if (hop_ptr == 1) + return (smp->dr_slid != IB_LID_PERMISSIVE); + } + return 0; +} diff --git a/drivers/infiniband/core/smi.h b/drivers/infiniband/core/smi.h new file mode 100644 index 000000000000..db25503a0736 --- /dev/null +++ b/drivers/infiniband/core/smi.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2004 Infinicon Corporation. All rights reserved. + * Copyright (c) 2004 Intel Corporation. All rights reserved. + * Copyright (c) 2004 Topspin Corporation. All rights reserved. + * Copyright (c) 2004 Voltaire Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: smi.h 1389 2004-12-27 22:56:47Z roland $ + */ + +#ifndef __SMI_H_ +#define __SMI_H_ + +int smi_handle_dr_smp_recv(struct ib_smp *smp, + u8 node_type, + int port_num, + int phys_port_cnt); +extern int smi_check_forward_dr_smp(struct ib_smp *smp); +extern int smi_handle_dr_smp_send(struct ib_smp *smp, + u8 node_type, + int port_num); +extern int smi_check_local_dr_smp(struct ib_smp *smp, + struct ib_device *device, + int port_num); + +/* + * Return 1 if the SMP should be handled by the local SMA/SM via process_mad + */ +static inline int smi_check_local_smp(struct ib_mad_agent *mad_agent, + struct ib_smp *smp) +{ + /* C14-9:3 -- We're at the end of the DR segment of path */ + /* C14-9:4 -- Hop Pointer = Hop Count + 1 -> give to SMA/SM */ + return ((mad_agent->device->process_mad && + !ib_get_smp_direction(smp) && + (smp->hop_ptr == smp->hop_cnt + 1))); +} + +#endif /* __SMI_H_ */ diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c new file mode 100644 index 000000000000..a200544a7931 --- /dev/null +++ b/drivers/infiniband/core/sysfs.c @@ -0,0 +1,725 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: sysfs.c 1349 2004-12-16 21:09:43Z roland $ + */ + +#include "core_priv.h" + +#include <ib_mad.h> + +struct ib_port { + struct kobject kobj; + struct ib_device *ibdev; + struct attribute_group gid_group; + struct attribute **gid_attr; + struct attribute_group pkey_group; + struct attribute **pkey_attr; + u8 port_num; +}; + +struct port_attribute { + struct attribute attr; + ssize_t (*show)(struct ib_port *, struct port_attribute *, char *buf); + ssize_t (*store)(struct ib_port *, struct port_attribute *, + const char *buf, size_t count); +}; + +#define PORT_ATTR(_name, _mode, _show, _store) \ +struct port_attribute port_attr_##_name = __ATTR(_name, _mode, _show, _store) + +#define PORT_ATTR_RO(_name) \ +struct port_attribute port_attr_##_name = __ATTR_RO(_name) + +struct port_table_attribute { + struct port_attribute attr; + int index; +}; + +static ssize_t port_attr_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct port_attribute *port_attr = + container_of(attr, struct port_attribute, attr); + struct ib_port *p = container_of(kobj, struct ib_port, kobj); + + if (!port_attr->show) + return 0; + + return port_attr->show(p, port_attr, buf); +} + +static struct sysfs_ops port_sysfs_ops = { + .show = port_attr_show +}; + +static ssize_t state_show(struct ib_port *p, struct port_attribute *unused, + char *buf) +{ + struct ib_port_attr attr; + ssize_t ret; + + static const char *state_name[] = { + [IB_PORT_NOP] = "NOP", + [IB_PORT_DOWN] = "DOWN", + [IB_PORT_INIT] = "INIT", + [IB_PORT_ARMED] = "ARMED", + [IB_PORT_ACTIVE] = "ACTIVE", + [IB_PORT_ACTIVE_DEFER] = "ACTIVE_DEFER" + }; + + ret = ib_query_port(p->ibdev, p->port_num, &attr); + if (ret) + return ret; + + return sprintf(buf, "%d: %s\n", attr.state, + attr.state >= 0 && attr.state <= ARRAY_SIZE(state_name) ? + state_name[attr.state] : "UNKNOWN"); +} + +static ssize_t lid_show(struct ib_port *p, struct port_attribute *unused, + char *buf) +{ + struct ib_port_attr attr; + ssize_t ret; + + ret = ib_query_port(p->ibdev, p->port_num, &attr); + if (ret) + return ret; + + return sprintf(buf, "0x%x\n", attr.lid); +} + +static ssize_t lid_mask_count_show(struct ib_port *p, + struct port_attribute *unused, + char *buf) +{ + struct ib_port_attr attr; + ssize_t ret; + + ret = ib_query_port(p->ibdev, p->port_num, &attr); + if (ret) + return ret; + + return sprintf(buf, "%d\n", attr.lmc); +} + +static ssize_t sm_lid_show(struct ib_port *p, struct port_attribute *unused, + char *buf) +{ + struct ib_port_attr attr; + ssize_t ret; + + ret = ib_query_port(p->ibdev, p->port_num, &attr); + if (ret) + return ret; + + return sprintf(buf, "0x%x\n", attr.sm_lid); +} + +static ssize_t sm_sl_show(struct ib_port *p, struct port_attribute *unused, + char *buf) +{ + struct ib_port_attr attr; + ssize_t ret; + + ret = ib_query_port(p->ibdev, p->port_num, &attr); + if (ret) + return ret; + + return sprintf(buf, "%d\n", attr.sm_sl); +} + +static ssize_t cap_mask_show(struct ib_port *p, struct port_attribute *unused, + char *buf) +{ + struct ib_port_attr attr; + ssize_t ret; + + ret = ib_query_port(p->ibdev, p->port_num, &attr); + if (ret) + return ret; + + return sprintf(buf, "0x%08x\n", attr.port_cap_flags); +} + +static ssize_t rate_show(struct ib_port *p, struct port_attribute *unused, + char *buf) +{ + struct ib_port_attr attr; + char *speed = ""; + int rate; + ssize_t ret; + + ret = ib_query_port(p->ibdev, p->port_num, &attr); + if (ret) + return ret; + + switch (attr.active_speed) { + case 2: speed = " DDR"; break; + case 4: speed = " QDR"; break; + } + + printk(KERN_ERR "width %d speed %d\n", attr.active_width, attr.active_speed); + + rate = 25 * ib_width_enum_to_int(attr.active_width) * attr.active_speed; + if (rate < 0) + return -EINVAL; + + return sprintf(buf, "%d%s Gb/sec (%dX%s)\n", + rate / 10, rate % 10 ? ".5" : "", + ib_width_enum_to_int(attr.active_width), speed); +} + +static PORT_ATTR_RO(state); +static PORT_ATTR_RO(lid); +static PORT_ATTR_RO(lid_mask_count); +static PORT_ATTR_RO(sm_lid); +static PORT_ATTR_RO(sm_sl); +static PORT_ATTR_RO(cap_mask); +static PORT_ATTR_RO(rate); + +static struct attribute *port_default_attrs[] = { + &port_attr_state.attr, + &port_attr_lid.attr, + &port_attr_lid_mask_count.attr, + &port_attr_sm_lid.attr, + &port_attr_sm_sl.attr, + &port_attr_cap_mask.attr, + &port_attr_rate.attr, + NULL +}; + +static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr, + char *buf) +{ + struct port_table_attribute *tab_attr = + container_of(attr, struct port_table_attribute, attr); + union ib_gid gid; + ssize_t ret; + + ret = ib_query_gid(p->ibdev, p->port_num, tab_attr->index, &gid); + if (ret) + return ret; + + return sprintf(buf, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n", + be16_to_cpu(((u16 *) gid.raw)[0]), + be16_to_cpu(((u16 *) gid.raw)[1]), + be16_to_cpu(((u16 *) gid.raw)[2]), + be16_to_cpu(((u16 *) gid.raw)[3]), + be16_to_cpu(((u16 *) gid.raw)[4]), + be16_to_cpu(((u16 *) gid.raw)[5]), + be16_to_cpu(((u16 *) gid.raw)[6]), + be16_to_cpu(((u16 *) gid.raw)[7])); +} + +static ssize_t show_port_pkey(struct ib_port *p, struct port_attribute *attr, + char *buf) +{ + struct port_table_attribute *tab_attr = + container_of(attr, struct port_table_attribute, attr); + u16 pkey; + ssize_t ret; + + ret = ib_query_pkey(p->ibdev, p->port_num, tab_attr->index, &pkey); + if (ret) + return ret; + + return sprintf(buf, "0x%04x\n", pkey); +} + +#define PORT_PMA_ATTR(_name, _counter, _width, _offset) \ +struct port_table_attribute port_pma_attr_##_name = { \ + .attr = __ATTR(_name, S_IRUGO, show_pma_counter, NULL), \ + .index = (_offset) | ((_width) << 16) | ((_counter) << 24) \ +} + +static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr, + char *buf) +{ + struct port_table_attribute *tab_attr = + container_of(attr, struct port_table_attribute, attr); + int offset = tab_attr->index & 0xffff; + int width = (tab_attr->index >> 16) & 0xff; + struct ib_mad *in_mad = NULL; + struct ib_mad *out_mad = NULL; + ssize_t ret; + + if (!p->ibdev->process_mad) + return sprintf(buf, "N/A (no PMA)\n"); + + in_mad = kmalloc(sizeof *in_mad, GFP_KERNEL); + out_mad = kmalloc(sizeof *in_mad, GFP_KERNEL); + if (!in_mad || !out_mad) { + ret = -ENOMEM; + goto out; + } + + memset(in_mad, 0, sizeof *in_mad); + in_mad->mad_hdr.base_version = 1; + in_mad->mad_hdr.mgmt_class = IB_MGMT_CLASS_PERF_MGMT; + in_mad->mad_hdr.class_version = 1; + in_mad->mad_hdr.method = IB_MGMT_METHOD_GET; + in_mad->mad_hdr.attr_id = cpu_to_be16(0x12); /* PortCounters */ + + in_mad->data[41] = p->port_num; /* PortSelect field */ + + if ((p->ibdev->process_mad(p->ibdev, IB_MAD_IGNORE_MKEY, p->port_num, 0xffff, + in_mad, out_mad) & + (IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY)) != + (IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY)) { + ret = -EINVAL; + goto out; + } + + switch (width) { + case 4: + ret = sprintf(buf, "%u\n", (out_mad->data[40 + offset / 8] >> + (offset % 4)) & 0xf); + break; + case 8: + ret = sprintf(buf, "%u\n", out_mad->data[40 + offset / 8]); + break; + case 16: + ret = sprintf(buf, "%u\n", + be16_to_cpup((u16 *)(out_mad->data + 40 + offset / 8))); + break; + case 32: + ret = sprintf(buf, "%u\n", + be32_to_cpup((u32 *)(out_mad->data + 40 + offset / 8))); + break; + default: + ret = 0; + } + +out: + kfree(in_mad); + kfree(out_mad); + + return ret; +} + +static PORT_PMA_ATTR(symbol_error , 0, 16, 32); +static PORT_PMA_ATTR(link_error_recovery , 1, 8, 48); +static PORT_PMA_ATTR(link_downed , 2, 8, 56); +static PORT_PMA_ATTR(port_rcv_errors , 3, 16, 64); +static PORT_PMA_ATTR(port_rcv_remote_physical_errors, 4, 16, 80); +static PORT_PMA_ATTR(port_rcv_switch_relay_errors , 5, 16, 96); +static PORT_PMA_ATTR(port_xmit_discards , 6, 16, 112); +static PORT_PMA_ATTR(port_xmit_constraint_errors , 7, 8, 128); +static PORT_PMA_ATTR(port_rcv_constraint_errors , 8, 8, 136); +static PORT_PMA_ATTR(local_link_integrity_errors , 9, 4, 152); +static PORT_PMA_ATTR(excessive_buffer_overrun_errors, 10, 4, 156); +static PORT_PMA_ATTR(VL15_dropped , 11, 16, 176); +static PORT_PMA_ATTR(port_xmit_data , 12, 32, 192); +static PORT_PMA_ATTR(port_rcv_data , 13, 32, 224); +static PORT_PMA_ATTR(port_xmit_packets , 14, 32, 256); +static PORT_PMA_ATTR(port_rcv_packets , 15, 32, 288); + +static struct attribute *pma_attrs[] = { + &port_pma_attr_symbol_error.attr.attr, + &port_pma_attr_link_error_recovery.attr.attr, + &port_pma_attr_link_downed.attr.attr, + &port_pma_attr_port_rcv_errors.attr.attr, + &port_pma_attr_port_rcv_remote_physical_errors.attr.attr, + &port_pma_attr_port_rcv_switch_relay_errors.attr.attr, + &port_pma_attr_port_xmit_discards.attr.attr, + &port_pma_attr_port_xmit_constraint_errors.attr.attr, + &port_pma_attr_port_rcv_constraint_errors.attr.attr, + &port_pma_attr_local_link_integrity_errors.attr.attr, + &port_pma_attr_excessive_buffer_overrun_errors.attr.attr, + &port_pma_attr_VL15_dropped.attr.attr, + &port_pma_attr_port_xmit_data.attr.attr, + &port_pma_attr_port_rcv_data.attr.attr, + &port_pma_attr_port_xmit_packets.attr.attr, + &port_pma_attr_port_rcv_packets.attr.attr, + NULL +}; + +static struct attribute_group pma_group = { + .name = "counters", + .attrs = pma_attrs +}; + +static void ib_port_release(struct kobject *kobj) +{ + struct ib_port *p = container_of(kobj, struct ib_port, kobj); + struct attribute *a; + int i; + + for (i = 0; (a = p->gid_attr[i]); ++i) { + kfree(a->name); + kfree(a); + } + + for (i = 0; (a = p->pkey_attr[i]); ++i) { + kfree(a->name); + kfree(a); + } + + kfree(p->gid_attr); + kfree(p); +} + +static struct kobj_type port_type = { + .release = ib_port_release, + .sysfs_ops = &port_sysfs_ops, + .default_attrs = port_default_attrs +}; + +static void ib_device_release(struct class_device *cdev) +{ + struct ib_device *dev = container_of(cdev, struct ib_device, class_dev); + + kfree(dev); +} + +static int ib_device_hotplug(struct class_device *cdev, char **envp, + int num_envp, char *buf, int size) +{ + struct ib_device *dev = container_of(cdev, struct ib_device, class_dev); + int i = 0, len = 0; + + if (add_hotplug_env_var(envp, num_envp, &i, buf, size, &len, + "NAME=%s", dev->name)) + return -ENOMEM; + + /* + * It might be nice to pass the node GUID to hotplug, but + * right now the only way to get it is to query the device + * provider, and this can crash during device removal because + * we are will be running after driver removal has started. + * We could add a node_guid field to struct ib_device, or we + * could just let the hotplug script read the node GUID from + * sysfs when devices are added. + */ + + envp[i] = NULL; + return 0; +} + +static int alloc_group(struct attribute ***attr, + ssize_t (*show)(struct ib_port *, + struct port_attribute *, char *buf), + int len) +{ + struct port_table_attribute ***tab_attr = + (struct port_table_attribute ***) attr; + int i; + int ret; + + *tab_attr = kmalloc((1 + len) * sizeof *tab_attr, GFP_KERNEL); + if (!*tab_attr) + return -ENOMEM; + + memset(*tab_attr, 0, (1 + len) * sizeof *tab_attr); + + for (i = 0; i < len; ++i) { + (*tab_attr)[i] = kmalloc(sizeof *(*tab_attr)[i], GFP_KERNEL); + if (!(*tab_attr)[i]) { + ret = -ENOMEM; + goto err; + } + memset((*tab_attr)[i], 0, sizeof *(*tab_attr)[i]); + (*tab_attr)[i]->attr.attr.name = kmalloc(8, GFP_KERNEL); + if (!(*tab_attr)[i]->attr.attr.name) { + ret = -ENOMEM; + goto err; + } + + if (snprintf((*tab_attr)[i]->attr.attr.name, 8, "%d", i) >= 8) { + ret = -ENOMEM; + goto err; + } + + (*tab_attr)[i]->attr.attr.mode = S_IRUGO; + (*tab_attr)[i]->attr.attr.owner = THIS_MODULE; + (*tab_attr)[i]->attr.show = show; + (*tab_attr)[i]->index = i; + } + + return 0; + +err: + for (i = 0; i < len; ++i) { + if ((*tab_attr)[i]) + kfree((*tab_attr)[i]->attr.attr.name); + kfree((*tab_attr)[i]); + } + + kfree(*tab_attr); + + return ret; +} + +static int add_port(struct ib_device *device, int port_num) +{ + struct ib_port *p; + struct ib_port_attr attr; + int i; + int ret; + + ret = ib_query_port(device, port_num, &attr); + if (ret) + return ret; + + p = kmalloc(sizeof *p, GFP_KERNEL); + if (!p) + return -ENOMEM; + memset(p, 0, sizeof *p); + + p->ibdev = device; + p->port_num = port_num; + p->kobj.ktype = &port_type; + + p->kobj.parent = kobject_get(&device->ports_parent); + if (!p->kobj.parent) { + ret = -EBUSY; + goto err; + } + + ret = kobject_set_name(&p->kobj, "%d", port_num); + if (ret) + goto err_put; + + ret = kobject_register(&p->kobj); + if (ret) + goto err_put; + + ret = sysfs_create_group(&p->kobj, &pma_group); + if (ret) + goto err_put; + + ret = alloc_group(&p->gid_attr, show_port_gid, attr.gid_tbl_len); + if (ret) + goto err_remove_pma; + + p->gid_group.name = "gids"; + p->gid_group.attrs = p->gid_attr; + + ret = sysfs_create_group(&p->kobj, &p->gid_group); + if (ret) + goto err_free_gid; + + ret = alloc_group(&p->pkey_attr, show_port_pkey, attr.pkey_tbl_len); + if (ret) + goto err_remove_gid; + + p->pkey_group.name = "pkeys"; + p->pkey_group.attrs = p->pkey_attr; + + ret = sysfs_create_group(&p->kobj, &p->pkey_group); + if (ret) + goto err_free_pkey; + + list_add_tail(&p->kobj.entry, &device->port_list); + + return 0; + +err_free_pkey: + for (i = 0; i < attr.pkey_tbl_len; ++i) { + kfree(p->pkey_attr[i]->name); + kfree(p->pkey_attr[i]); + } + + kfree(p->pkey_attr); + +err_remove_gid: + sysfs_remove_group(&p->kobj, &p->gid_group); + +err_free_gid: + for (i = 0; i < attr.gid_tbl_len; ++i) { + kfree(p->gid_attr[i]->name); + kfree(p->gid_attr[i]); + } + + kfree(p->gid_attr); + +err_remove_pma: + sysfs_remove_group(&p->kobj, &pma_group); + +err_put: + kobject_put(&device->ports_parent); + +err: + kfree(p); + return ret; +} + +static ssize_t show_sys_image_guid(struct class_device *cdev, char *buf) +{ + struct ib_device *dev = container_of(cdev, struct ib_device, class_dev); + struct ib_device_attr attr; + ssize_t ret; + + ret = ib_query_device(dev, &attr); + if (ret) + return ret; + + return sprintf(buf, "%04x:%04x:%04x:%04x\n", + be16_to_cpu(((u16 *) &attr.sys_image_guid)[0]), + be16_to_cpu(((u16 *) &attr.sys_image_guid)[1]), + be16_to_cpu(((u16 *) &attr.sys_image_guid)[2]), + be16_to_cpu(((u16 *) &attr.sys_image_guid)[3])); +} + +static ssize_t show_node_guid(struct class_device *cdev, char *buf) +{ + struct ib_device *dev = container_of(cdev, struct ib_device, class_dev); + struct ib_device_attr attr; + ssize_t ret; + + ret = ib_query_device(dev, &attr); + if (ret) + return ret; + + return sprintf(buf, "%04x:%04x:%04x:%04x\n", + be16_to_cpu(((u16 *) &attr.node_guid)[0]), + be16_to_cpu(((u16 *) &attr.node_guid)[1]), + be16_to_cpu(((u16 *) &attr.node_guid)[2]), + be16_to_cpu(((u16 *) &attr.node_guid)[3])); +} + +static CLASS_DEVICE_ATTR(sys_image_guid, S_IRUGO, show_sys_image_guid, NULL); +static CLASS_DEVICE_ATTR(node_guid, S_IRUGO, show_node_guid, NULL); + +static struct class_device_attribute *ib_class_attributes[] = { + &class_device_attr_sys_image_guid, + &class_device_attr_node_guid +}; + +static struct class ib_class = { + .name = "infiniband", + .release = ib_device_release, + .hotplug = ib_device_hotplug, +}; + +int ib_device_register_sysfs(struct ib_device *device) +{ + struct class_device *class_dev = &device->class_dev; + int ret; + int i; + + class_dev->class = &ib_class; + class_dev->class_data = device; + strlcpy(class_dev->class_id, device->name, BUS_ID_SIZE); + + INIT_LIST_HEAD(&device->port_list); + + ret = class_device_register(class_dev); + if (ret) + goto err; + + for (i = 0; i < ARRAY_SIZE(ib_class_attributes); ++i) { + ret = class_device_create_file(class_dev, ib_class_attributes[i]); + if (ret) + goto err_unregister; + } + + device->ports_parent.parent = kobject_get(&class_dev->kobj); + if (!device->ports_parent.parent) { + ret = -EBUSY; + goto err_unregister; + } + ret = kobject_set_name(&device->ports_parent, "ports"); + if (ret) + goto err_put; + ret = kobject_register(&device->ports_parent); + if (ret) + goto err_put; + + if (device->node_type == IB_NODE_SWITCH) { + ret = add_port(device, 0); + if (ret) + goto err_put; + } else { + int i; + + for (i = 1; i <= device->phys_port_cnt; ++i) { + ret = add_port(device, i); + if (ret) + goto err_put; + } + } + + return 0; + +err_put: + { + struct kobject *p, *t; + struct ib_port *port; + + list_for_each_entry_safe(p, t, &device->port_list, entry) { + list_del(&p->entry); + port = container_of(p, struct ib_port, kobj); + sysfs_remove_group(p, &pma_group); + sysfs_remove_group(p, &port->pkey_group); + sysfs_remove_group(p, &port->gid_group); + kobject_unregister(p); + } + } + + kobject_put(&class_dev->kobj); + +err_unregister: + class_device_unregister(class_dev); + +err: + return ret; +} + +void ib_device_unregister_sysfs(struct ib_device *device) +{ + struct kobject *p, *t; + struct ib_port *port; + + list_for_each_entry_safe(p, t, &device->port_list, entry) { + list_del(&p->entry); + port = container_of(p, struct ib_port, kobj); + sysfs_remove_group(p, &pma_group); + sysfs_remove_group(p, &port->pkey_group); + sysfs_remove_group(p, &port->gid_group); + kobject_unregister(p); + } + + kobject_unregister(&device->ports_parent); + class_device_unregister(&device->class_dev); +} + +int ib_sysfs_setup(void) +{ + return class_register(&ib_class); +} + +void ib_sysfs_cleanup(void) +{ + class_unregister(&ib_class); +} diff --git a/drivers/infiniband/core/ud_header.c b/drivers/infiniband/core/ud_header.c new file mode 100644 index 000000000000..dc4eb1db5e96 --- /dev/null +++ b/drivers/infiniband/core/ud_header.c @@ -0,0 +1,365 @@ +/* + * Copyright (c) 2004 Topspin Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: ud_header.c 1349 2004-12-16 21:09:43Z roland $ + */ + +#include <linux/errno.h> + +#include <ib_pack.h> + +#define STRUCT_FIELD(header, field) \ + .struct_offset_bytes = offsetof(struct ib_unpacked_ ## header, field), \ + .struct_size_bytes = sizeof ((struct ib_unpacked_ ## header *) 0)->field, \ + .field_name = #header ":" #field + +static const struct ib_field lrh_table[] = { + { STRUCT_FIELD(lrh, virtual_lane), + .offset_words = 0, + .offset_bits = 0, + .size_bits = 4 }, + { STRUCT_FIELD(lrh, link_version), + .offset_words = 0, + .offset_bits = 4, + .size_bits = 4 }, + { STRUCT_FIELD(lrh, service_level), + .offset_words = 0, + .offset_bits = 8, + .size_bits = 4 }, + { RESERVED, + .offset_words = 0, + .offset_bits = 12, + .size_bits = 2 }, + { STRUCT_FIELD(lrh, link_next_header), + .offset_words = 0, + .offset_bits = 14, + .size_bits = 2 }, + { STRUCT_FIELD(lrh, destination_lid), + .offset_words = 0, + .offset_bits = 16, + .size_bits = 16 }, + { RESERVED, + .offset_words = 1, + .offset_bits = 0, + .size_bits = 5 }, + { STRUCT_FIELD(lrh, packet_length), + .offset_words = 1, + .offset_bits = 5, + .size_bits = 11 }, + { STRUCT_FIELD(lrh, source_lid), + .offset_words = 1, + .offset_bits = 16, + .size_bits = 16 } +}; + +static const struct ib_field grh_table[] = { + { STRUCT_FIELD(grh, ip_version), + .offset_words = 0, + .offset_bits = 0, + .size_bits = 4 }, + { STRUCT_FIELD(grh, traffic_class), + .offset_words = 0, + .offset_bits = 4, + .size_bits = 8 }, + { STRUCT_FIELD(grh, flow_label), + .offset_words = 0, + .offset_bits = 12, + .size_bits = 20 }, + { STRUCT_FIELD(grh, payload_length), + .offset_words = 1, + .offset_bits = 0, + .size_bits = 16 }, + { STRUCT_FIELD(grh, next_header), + .offset_words = 1, + .offset_bits = 16, + .size_bits = 8 }, + { STRUCT_FIELD(grh, hop_limit), + .offset_words = 1, + .offset_bits = 24, + .size_bits = 8 }, + { STRUCT_FIELD(grh, source_gid), + .offset_words = 2, + .offset_bits = 0, + .size_bits = 128 }, + { STRUCT_FIELD(grh, destination_gid), + .offset_words = 6, + .offset_bits = 0, + .size_bits = 128 } +}; + +static const struct ib_field bth_table[] = { + { STRUCT_FIELD(bth, opcode), + .offset_words = 0, + .offset_bits = 0, + .size_bits = 8 }, + { STRUCT_FIELD(bth, solicited_event), + .offset_words = 0, + .offset_bits = 8, + .size_bits = 1 }, + { STRUCT_FIELD(bth, mig_req), + .offset_words = 0, + .offset_bits = 9, + .size_bits = 1 }, + { STRUCT_FIELD(bth, pad_count), + .offset_words = 0, + .offset_bits = 10, + .size_bits = 2 }, + { STRUCT_FIELD(bth, transport_header_version), + .offset_words = 0, + .offset_bits = 12, + .size_bits = 4 }, + { STRUCT_FIELD(bth, pkey), + .offset_words = 0, + .offset_bits = 16, + .size_bits = 16 }, + { RESERVED, + .offset_words = 1, + .offset_bits = 0, + .size_bits = 8 }, + { STRUCT_FIELD(bth, destination_qpn), + .offset_words = 1, + .offset_bits = 8, + .size_bits = 24 }, + { STRUCT_FIELD(bth, ack_req), + .offset_words = 2, + .offset_bits = 0, + .size_bits = 1 }, + { RESERVED, + .offset_words = 2, + .offset_bits = 1, + .size_bits = 7 }, + { STRUCT_FIELD(bth, psn), + .offset_words = 2, + .offset_bits = 8, + .size_bits = 24 } +}; + +static const struct ib_field deth_table[] = { + { STRUCT_FIELD(deth, qkey), + .offset_words = 0, + .offset_bits = 0, + .size_bits = 32 }, + { RESERVED, + .offset_words = 1, + .offset_bits = 0, + .size_bits = 8 }, + { STRUCT_FIELD(deth, source_qpn), + .offset_words = 1, + .offset_bits = 8, + .size_bits = 24 } +}; + +/** + * ib_ud_header_init - Initialize UD header structure + * @payload_bytes:Length of packet payload + * @grh_present:GRH flag (if non-zero, GRH will be included) + * @header:Structure to initialize + * + * ib_ud_header_init() initializes the lrh.link_version, lrh.link_next_header, + * lrh.packet_length, grh.ip_version, grh.payload_length, + * grh.next_header, bth.opcode, bth.pad_count and + * bth.transport_header_version fields of a &struct ib_ud_header given + * the payload length and whether a GRH will be included. + */ +void ib_ud_header_init(int payload_bytes, + int grh_present, + struct ib_ud_header *header) +{ + int header_len; + + memset(header, 0, sizeof *header); + + header_len = + IB_LRH_BYTES + + IB_BTH_BYTES + + IB_DETH_BYTES; + if (grh_present) { + header_len += IB_GRH_BYTES; + } + + header->lrh.link_version = 0; + header->lrh.link_next_header = + grh_present ? IB_LNH_IBA_GLOBAL : IB_LNH_IBA_LOCAL; + header->lrh.packet_length = (IB_LRH_BYTES + + IB_BTH_BYTES + + IB_DETH_BYTES + + payload_bytes + + 4 + /* ICRC */ + 3) / 4; /* round up */ + + header->grh_present = grh_present; + if (grh_present) { + header->lrh.packet_length += IB_GRH_BYTES / 4; + + header->grh.ip_version = 6; + header->grh.payload_length = + cpu_to_be16((IB_BTH_BYTES + + IB_DETH_BYTES + + payload_bytes + + 4 + /* ICRC */ + 3) & ~3); /* round up */ + header->grh.next_header = 0x1b; + } + + cpu_to_be16s(&header->lrh.packet_length); + + if (header->immediate_present) + header->bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE; + else + header->bth.opcode = IB_OPCODE_UD_SEND_ONLY; + header->bth.pad_count = (4 - payload_bytes) & 3; + header->bth.transport_header_version = 0; +} +EXPORT_SYMBOL(ib_ud_header_init); + +/** + * ib_ud_header_pack - Pack UD header struct into wire format + * @header:UD header struct + * @buf:Buffer to pack into + * + * ib_ud_header_pack() packs the UD header structure @header into wire + * format in the buffer @buf. + */ +int ib_ud_header_pack(struct ib_ud_header *header, + void *buf) +{ + int len = 0; + + ib_pack(lrh_table, ARRAY_SIZE(lrh_table), + &header->lrh, buf); + len += IB_LRH_BYTES; + + if (header->grh_present) { + ib_pack(grh_table, ARRAY_SIZE(grh_table), + &header->grh, buf + len); + len += IB_GRH_BYTES; + } + + ib_pack(bth_table, ARRAY_SIZE(bth_table), + &header->bth, buf + len); + len += IB_BTH_BYTES; + + ib_pack(deth_table, ARRAY_SIZE(deth_table), + &header->deth, buf + len); + len += IB_DETH_BYTES; + + if (header->immediate_present) { + memcpy(buf + len, &header->immediate_data, sizeof header->immediate_data); + len += sizeof header->immediate_data; + } + + return len; +} +EXPORT_SYMBOL(ib_ud_header_pack); + +/** + * ib_ud_header_unpack - Unpack UD header struct from wire format + * @header:UD header struct + * @buf:Buffer to pack into + * + * ib_ud_header_pack() unpacks the UD header structure @header from wire + * format in the buffer @buf. + */ +int ib_ud_header_unpack(void *buf, + struct ib_ud_header *header) +{ + ib_unpack(lrh_table, ARRAY_SIZE(lrh_table), + buf, &header->lrh); + buf += IB_LRH_BYTES; + + if (header->lrh.link_version != 0) { + printk(KERN_WARNING "Invalid LRH.link_version %d\n", + header->lrh.link_version); + return -EINVAL; + } + + switch (header->lrh.link_next_header) { + case IB_LNH_IBA_LOCAL: + header->grh_present = 0; + break; + + case IB_LNH_IBA_GLOBAL: + header->grh_present = 1; + ib_unpack(grh_table, ARRAY_SIZE(grh_table), + buf, &header->grh); + buf += IB_GRH_BYTES; + + if (header->grh.ip_version != 6) { + printk(KERN_WARNING "Invalid GRH.ip_version %d\n", + header->grh.ip_version); + return -EINVAL; + } + if (header->grh.next_header != 0x1b) { + printk(KERN_WARNING "Invalid GRH.next_header 0x%02x\n", + header->grh.next_header); + return -EINVAL; + } + break; + + default: + printk(KERN_WARNING "Invalid LRH.link_next_header %d\n", + header->lrh.link_next_header); + return -EINVAL; + } + + ib_unpack(bth_table, ARRAY_SIZE(bth_table), + buf, &header->bth); + buf += IB_BTH_BYTES; + + switch (header->bth.opcode) { + case IB_OPCODE_UD_SEND_ONLY: + header->immediate_present = 0; + break; + case IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE: + header->immediate_present = 1; + break; + default: + printk(KERN_WARNING "Invalid BTH.opcode 0x%02x\n", + header->bth.opcode); + return -EINVAL; + } + + if (header->bth.transport_header_version != 0) { + printk(KERN_WARNING "Invalid BTH.transport_header_version %d\n", + header->bth.transport_header_version); + return -EINVAL; + } + + ib_unpack(deth_table, ARRAY_SIZE(deth_table), + buf, &header->deth); + buf += IB_DETH_BYTES; + + if (header->immediate_present) + memcpy(&header->immediate_data, buf, sizeof header->immediate_data); + + return 0; +} +EXPORT_SYMBOL(ib_ud_header_unpack); diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c new file mode 100644 index 000000000000..b3a73924141f --- /dev/null +++ b/drivers/infiniband/core/user_mad.c @@ -0,0 +1,738 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: user_mad.c 1389 2004-12-27 22:56:47Z roland $ + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/device.h> +#include <linux/err.h> +#include <linux/fs.h> +#include <linux/cdev.h> +#include <linux/pci.h> +#include <linux/dma-mapping.h> +#include <linux/poll.h> +#include <linux/rwsem.h> +#include <linux/kref.h> +#include <linux/ioctl32.h> + +#include <asm/uaccess.h> + +#include <ib_mad.h> +#include <ib_user_mad.h> + +MODULE_AUTHOR("Roland Dreier"); +MODULE_DESCRIPTION("InfiniBand userspace MAD packet access"); +MODULE_LICENSE("Dual BSD/GPL"); + +enum { + IB_UMAD_MAX_PORTS = 256, + IB_UMAD_MAX_AGENTS = 32 +}; + +struct ib_umad_port { + int devnum; + struct cdev dev; + struct class_device class_dev; + struct ib_device *ib_dev; + struct ib_umad_device *umad_dev; + u8 port_num; +}; + +struct ib_umad_device { + int start_port, end_port; + struct kref ref; + struct ib_umad_port port[0]; +}; + +struct ib_umad_file { + struct ib_umad_port *port; + spinlock_t recv_lock; + struct list_head recv_list; + wait_queue_head_t recv_wait; + struct rw_semaphore agent_mutex; + struct ib_mad_agent *agent[IB_UMAD_MAX_AGENTS]; + struct ib_mr *mr[IB_UMAD_MAX_AGENTS]; +}; + +struct ib_umad_packet { + struct ib_user_mad mad; + struct ib_ah *ah; + struct list_head list; + DECLARE_PCI_UNMAP_ADDR(mapping) +}; + +static dev_t base_dev; +static spinlock_t map_lock; +static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS); + +static void ib_umad_add_one(struct ib_device *device); +static void ib_umad_remove_one(struct ib_device *device); + +static int queue_packet(struct ib_umad_file *file, + struct ib_mad_agent *agent, + struct ib_umad_packet *packet) +{ + int ret = 1; + + down_read(&file->agent_mutex); + for (packet->mad.id = 0; + packet->mad.id < IB_UMAD_MAX_AGENTS; + packet->mad.id++) + if (agent == file->agent[packet->mad.id]) { + spin_lock_irq(&file->recv_lock); + list_add_tail(&packet->list, &file->recv_list); + spin_unlock_irq(&file->recv_lock); + wake_up_interruptible(&file->recv_wait); + ret = 0; + break; + } + + up_read(&file->agent_mutex); + + return ret; +} + +static void send_handler(struct ib_mad_agent *agent, + struct ib_mad_send_wc *send_wc) +{ + struct ib_umad_file *file = agent->context; + struct ib_umad_packet *packet = + (void *) (unsigned long) send_wc->wr_id; + + dma_unmap_single(agent->device->dma_device, + pci_unmap_addr(packet, mapping), + sizeof packet->mad.data, + DMA_TO_DEVICE); + ib_destroy_ah(packet->ah); + + if (send_wc->status == IB_WC_RESP_TIMEOUT_ERR) { + packet->mad.status = ETIMEDOUT; + + if (!queue_packet(file, agent, packet)) + return; + } + + kfree(packet); +} + +static void recv_handler(struct ib_mad_agent *agent, + struct ib_mad_recv_wc *mad_recv_wc) +{ + struct ib_umad_file *file = agent->context; + struct ib_umad_packet *packet; + + if (mad_recv_wc->wc->status != IB_WC_SUCCESS) + goto out; + + packet = kmalloc(sizeof *packet, GFP_KERNEL); + if (!packet) + goto out; + + memset(packet, 0, sizeof *packet); + + memcpy(packet->mad.data, mad_recv_wc->recv_buf.mad, sizeof packet->mad.data); + packet->mad.status = 0; + packet->mad.qpn = cpu_to_be32(mad_recv_wc->wc->src_qp); + packet->mad.lid = cpu_to_be16(mad_recv_wc->wc->slid); + packet->mad.sl = mad_recv_wc->wc->sl; + packet->mad.path_bits = mad_recv_wc->wc->dlid_path_bits; + packet->mad.grh_present = !!(mad_recv_wc->wc->wc_flags & IB_WC_GRH); + if (packet->mad.grh_present) { + /* XXX parse GRH */ + packet->mad.gid_index = 0; + packet->mad.hop_limit = 0; + packet->mad.traffic_class = 0; + memset(packet->mad.gid, 0, 16); + packet->mad.flow_label = 0; + } + + if (queue_packet(file, agent, packet)) + kfree(packet); + +out: + ib_free_recv_mad(mad_recv_wc); +} + +static ssize_t ib_umad_read(struct file *filp, char __user *buf, + size_t count, loff_t *pos) +{ + struct ib_umad_file *file = filp->private_data; + struct ib_umad_packet *packet; + ssize_t ret; + + if (count < sizeof (struct ib_user_mad)) + return -EINVAL; + + spin_lock_irq(&file->recv_lock); + + while (list_empty(&file->recv_list)) { + spin_unlock_irq(&file->recv_lock); + + if (filp->f_flags & O_NONBLOCK) + return -EAGAIN; + + if (wait_event_interruptible(file->recv_wait, + !list_empty(&file->recv_list))) + return -ERESTARTSYS; + + spin_lock_irq(&file->recv_lock); + } + + packet = list_entry(file->recv_list.next, struct ib_umad_packet, list); + list_del(&packet->list); + + spin_unlock_irq(&file->recv_lock); + + if (copy_to_user(buf, &packet->mad, sizeof packet->mad)) + ret = -EFAULT; + else + ret = sizeof packet->mad; + + kfree(packet); + return ret; +} + +static ssize_t ib_umad_write(struct file *filp, const char __user *buf, + size_t count, loff_t *pos) +{ + struct ib_umad_file *file = filp->private_data; + struct ib_umad_packet *packet; + struct ib_mad_agent *agent; + struct ib_ah_attr ah_attr; + struct ib_sge gather_list; + struct ib_send_wr *bad_wr, wr = { + .opcode = IB_WR_SEND, + .sg_list = &gather_list, + .num_sge = 1, + .send_flags = IB_SEND_SIGNALED, + }; + u8 method; + u64 *tid; + int ret; + + if (count < sizeof (struct ib_user_mad)) + return -EINVAL; + + packet = kmalloc(sizeof *packet, GFP_KERNEL); + if (!packet) + return -ENOMEM; + + if (copy_from_user(&packet->mad, buf, sizeof packet->mad)) { + kfree(packet); + return -EFAULT; + } + + if (packet->mad.id < 0 || packet->mad.id >= IB_UMAD_MAX_AGENTS) { + ret = -EINVAL; + goto err; + } + + down_read(&file->agent_mutex); + + agent = file->agent[packet->mad.id]; + if (!agent) { + ret = -EINVAL; + goto err_up; + } + + /* + * If userspace is generating a request that will generate a + * response, we need to make sure the high-order part of the + * transaction ID matches the agent being used to send the + * MAD. + */ + method = ((struct ib_mad_hdr *) packet->mad.data)->method; + + if (!(method & IB_MGMT_METHOD_RESP) && + method != IB_MGMT_METHOD_TRAP_REPRESS && + method != IB_MGMT_METHOD_SEND) { + tid = &((struct ib_mad_hdr *) packet->mad.data)->tid; + *tid = cpu_to_be64(((u64) agent->hi_tid) << 32 | + (be64_to_cpup(tid) & 0xffffffff)); + } + + memset(&ah_attr, 0, sizeof ah_attr); + ah_attr.dlid = be16_to_cpu(packet->mad.lid); + ah_attr.sl = packet->mad.sl; + ah_attr.src_path_bits = packet->mad.path_bits; + ah_attr.port_num = file->port->port_num; + if (packet->mad.grh_present) { + ah_attr.ah_flags = IB_AH_GRH; + memcpy(ah_attr.grh.dgid.raw, packet->mad.gid, 16); + ah_attr.grh.flow_label = packet->mad.flow_label; + ah_attr.grh.hop_limit = packet->mad.hop_limit; + ah_attr.grh.traffic_class = packet->mad.traffic_class; + } + + packet->ah = ib_create_ah(agent->qp->pd, &ah_attr); + if (IS_ERR(packet->ah)) { + ret = PTR_ERR(packet->ah); + goto err_up; + } + + gather_list.addr = dma_map_single(agent->device->dma_device, + packet->mad.data, + sizeof packet->mad.data, + DMA_TO_DEVICE); + gather_list.length = sizeof packet->mad.data; + gather_list.lkey = file->mr[packet->mad.id]->lkey; + pci_unmap_addr_set(packet, mapping, gather_list.addr); + + wr.wr.ud.mad_hdr = (struct ib_mad_hdr *) packet->mad.data; + wr.wr.ud.ah = packet->ah; + wr.wr.ud.remote_qpn = be32_to_cpu(packet->mad.qpn); + wr.wr.ud.remote_qkey = be32_to_cpu(packet->mad.qkey); + wr.wr.ud.timeout_ms = packet->mad.timeout_ms; + + wr.wr_id = (unsigned long) packet; + + ret = ib_post_send_mad(agent, &wr, &bad_wr); + if (ret) { + dma_unmap_single(agent->device->dma_device, + pci_unmap_addr(packet, mapping), + sizeof packet->mad.data, + DMA_TO_DEVICE); + goto err_up; + } + + up_read(&file->agent_mutex); + + return sizeof packet->mad; + +err_up: + up_read(&file->agent_mutex); + +err: + kfree(packet); + return ret; +} + +static unsigned int ib_umad_poll(struct file *filp, struct poll_table_struct *wait) +{ + struct ib_umad_file *file = filp->private_data; + + /* we will always be able to post a MAD send */ + unsigned int mask = POLLOUT | POLLWRNORM; + + poll_wait(filp, &file->recv_wait, wait); + + if (!list_empty(&file->recv_list)) + mask |= POLLIN | POLLRDNORM; + + return mask; +} + +static int ib_umad_reg_agent(struct ib_umad_file *file, unsigned long arg) +{ + struct ib_user_mad_reg_req ureq; + struct ib_mad_reg_req req; + struct ib_mad_agent *agent; + int agent_id; + int ret; + + down_write(&file->agent_mutex); + + if (copy_from_user(&ureq, (void __user *) arg, sizeof ureq)) { + ret = -EFAULT; + goto out; + } + + if (ureq.qpn != 0 && ureq.qpn != 1) { + ret = -EINVAL; + goto out; + } + + for (agent_id = 0; agent_id < IB_UMAD_MAX_AGENTS; ++agent_id) + if (!file->agent[agent_id]) + goto found; + + ret = -ENOMEM; + goto out; + +found: + req.mgmt_class = ureq.mgmt_class; + req.mgmt_class_version = ureq.mgmt_class_version; + memcpy(req.method_mask, ureq.method_mask, sizeof req.method_mask); + memcpy(req.oui, ureq.oui, sizeof req.oui); + + agent = ib_register_mad_agent(file->port->ib_dev, file->port->port_num, + ureq.qpn ? IB_QPT_GSI : IB_QPT_SMI, + &req, 0, send_handler, recv_handler, + file); + if (IS_ERR(agent)) { + ret = PTR_ERR(agent); + goto out; + } + + file->agent[agent_id] = agent; + + file->mr[agent_id] = ib_get_dma_mr(agent->qp->pd, IB_ACCESS_LOCAL_WRITE); + if (IS_ERR(file->mr[agent_id])) { + ret = -ENOMEM; + goto err; + } + + if (put_user(agent_id, + (u32 __user *) (arg + offsetof(struct ib_user_mad_reg_req, id)))) { + ret = -EFAULT; + goto err_mr; + } + + ret = 0; + goto out; + +err_mr: + ib_dereg_mr(file->mr[agent_id]); + +err: + file->agent[agent_id] = NULL; + ib_unregister_mad_agent(agent); + +out: + up_write(&file->agent_mutex); + return ret; +} + +static int ib_umad_unreg_agent(struct ib_umad_file *file, unsigned long arg) +{ + u32 id; + int ret = 0; + + down_write(&file->agent_mutex); + + if (get_user(id, (u32 __user *) arg)) { + ret = -EFAULT; + goto out; + } + + if (id < 0 || id >= IB_UMAD_MAX_AGENTS || !file->agent[id]) { + ret = -EINVAL; + goto out; + } + + ib_dereg_mr(file->mr[id]); + ib_unregister_mad_agent(file->agent[id]); + file->agent[id] = NULL; + +out: + up_write(&file->agent_mutex); + return ret; +} + +static int ib_umad_ioctl(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg) +{ + switch (cmd) { + case IB_USER_MAD_REGISTER_AGENT: + return ib_umad_reg_agent(filp->private_data, arg); + case IB_USER_MAD_UNREGISTER_AGENT: + return ib_umad_unreg_agent(filp->private_data, arg); + default: + return -ENOIOCTLCMD; + } +} + +static int ib_umad_open(struct inode *inode, struct file *filp) +{ + struct ib_umad_port *port = + container_of(inode->i_cdev, struct ib_umad_port, dev); + struct ib_umad_file *file; + + file = kmalloc(sizeof *file, GFP_KERNEL); + if (!file) + return -ENOMEM; + + memset(file, 0, sizeof *file); + + spin_lock_init(&file->recv_lock); + init_rwsem(&file->agent_mutex); + INIT_LIST_HEAD(&file->recv_list); + init_waitqueue_head(&file->recv_wait); + + file->port = port; + filp->private_data = file; + + return 0; +} + +static int ib_umad_close(struct inode *inode, struct file *filp) +{ + struct ib_umad_file *file = filp->private_data; + int i; + + for (i = 0; i < IB_UMAD_MAX_AGENTS; ++i) + if (file->agent[i]) { + ib_dereg_mr(file->mr[i]); + ib_unregister_mad_agent(file->agent[i]); + } + + kfree(file); + + return 0; +} + +static struct file_operations umad_fops = { + .owner = THIS_MODULE, + .read = ib_umad_read, + .write = ib_umad_write, + .poll = ib_umad_poll, + .ioctl = ib_umad_ioctl, + .open = ib_umad_open, + .release = ib_umad_close +}; + +static struct ib_client umad_client = { + .name = "umad", + .add = ib_umad_add_one, + .remove = ib_umad_remove_one +}; + +static ssize_t show_dev(struct class_device *class_dev, char *buf) +{ + struct ib_umad_port *port = + container_of(class_dev, struct ib_umad_port, class_dev); + + return print_dev_t(buf, port->dev.dev); +} +static CLASS_DEVICE_ATTR(dev, S_IRUGO, show_dev, NULL); + +static ssize_t show_ibdev(struct class_device *class_dev, char *buf) +{ + struct ib_umad_port *port = + container_of(class_dev, struct ib_umad_port, class_dev); + + return sprintf(buf, "%s\n", port->ib_dev->name); +} +static CLASS_DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); + +static ssize_t show_port(struct class_device *class_dev, char *buf) +{ + struct ib_umad_port *port = + container_of(class_dev, struct ib_umad_port, class_dev); + + return sprintf(buf, "%d\n", port->port_num); +} +static CLASS_DEVICE_ATTR(port, S_IRUGO, show_port, NULL); + +static void ib_umad_release_dev(struct kref *ref) +{ + struct ib_umad_device *dev = + container_of(ref, struct ib_umad_device, ref); + + kfree(dev); +} + +static void ib_umad_release_port(struct class_device *class_dev) +{ + struct ib_umad_port *port = + container_of(class_dev, struct ib_umad_port, class_dev); + + cdev_del(&port->dev); + clear_bit(port->devnum, dev_map); + kref_put(&port->umad_dev->ref, ib_umad_release_dev); +} + +static struct class umad_class = { + .name = "infiniband_mad", + .release = ib_umad_release_port +}; + +static ssize_t show_abi_version(struct class *class, char *buf) +{ + return sprintf(buf, "%d\n", IB_USER_MAD_ABI_VERSION); +} +static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL); + +static void ib_umad_add_one(struct ib_device *device) +{ + struct ib_umad_device *umad_dev; + int s, e, i; + + if (device->node_type == IB_NODE_SWITCH) + s = e = 0; + else { + s = 1; + e = device->phys_port_cnt; + } + + umad_dev = kmalloc(sizeof *umad_dev + + (e - s + 1) * sizeof (struct ib_umad_port), + GFP_KERNEL); + if (!umad_dev) + return; + + memset(umad_dev, 0, sizeof *umad_dev + + (e - s + 1) * sizeof (struct ib_umad_port)); + + kref_init(&umad_dev->ref); + + umad_dev->start_port = s; + umad_dev->end_port = e; + + for (i = s; i <= e; ++i) { + umad_dev->port[i - s].umad_dev = umad_dev; + kref_get(&umad_dev->ref); + + spin_lock(&map_lock); + umad_dev->port[i - s].devnum = + find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS); + if (umad_dev->port[i - s].devnum >= IB_UMAD_MAX_PORTS) { + spin_unlock(&map_lock); + goto err; + } + set_bit(umad_dev->port[i - s].devnum, dev_map); + spin_unlock(&map_lock); + + umad_dev->port[i - s].ib_dev = device; + umad_dev->port[i - s].port_num = i; + + cdev_init(&umad_dev->port[i - s].dev, &umad_fops); + umad_dev->port[i - s].dev.owner = THIS_MODULE; + kobject_set_name(&umad_dev->port[i - s].dev.kobj, + "umad%d", umad_dev->port[i - s].devnum); + if (cdev_add(&umad_dev->port[i - s].dev, base_dev + + umad_dev->port[i - s].devnum, 1)) + goto err; + + umad_dev->port[i - s].class_dev.class = &umad_class; + umad_dev->port[i - s].class_dev.dev = device->dma_device; + snprintf(umad_dev->port[i - s].class_dev.class_id, + BUS_ID_SIZE, "umad%d", umad_dev->port[i - s].devnum); + if (class_device_register(&umad_dev->port[i - s].class_dev)) + goto err_class; + + if (class_device_create_file(&umad_dev->port[i - s].class_dev, + &class_device_attr_dev)) + goto err_class; + if (class_device_create_file(&umad_dev->port[i - s].class_dev, + &class_device_attr_ibdev)) + goto err_class; + if (class_device_create_file(&umad_dev->port[i - s].class_dev, + &class_device_attr_port)) + goto err_class; + } + + ib_set_client_data(device, &umad_client, umad_dev); + + return; + +err_class: + cdev_del(&umad_dev->port[i - s].dev); + clear_bit(umad_dev->port[i - s].devnum, dev_map); + +err: + while (--i >= s) + class_device_unregister(&umad_dev->port[i - s].class_dev); + + kref_put(&umad_dev->ref, ib_umad_release_dev); +} + +static void ib_umad_remove_one(struct ib_device *device) +{ + struct ib_umad_device *umad_dev = ib_get_client_data(device, &umad_client); + int i; + + if (!umad_dev) + return; + + for (i = 0; i <= umad_dev->end_port - umad_dev->start_port; ++i) + class_device_unregister(&umad_dev->port[i].class_dev); + + kref_put(&umad_dev->ref, ib_umad_release_dev); +} + +static int __init ib_umad_init(void) +{ + int ret; + + spin_lock_init(&map_lock); + + ret = alloc_chrdev_region(&base_dev, 0, IB_UMAD_MAX_PORTS, + "infiniband_mad"); + if (ret) { + printk(KERN_ERR "user_mad: couldn't get device number\n"); + goto out; + } + + ret = class_register(&umad_class); + if (ret) { + printk(KERN_ERR "user_mad: couldn't create class infiniband_mad\n"); + goto out_chrdev; + } + + ret = class_create_file(&umad_class, &class_attr_abi_version); + if (ret) { + printk(KERN_ERR "user_mad: couldn't create abi_version attribute\n"); + goto out_class; + } + + ret = ib_register_client(&umad_client); + if (ret) { + printk(KERN_ERR "user_mad: couldn't register ib_umad client\n"); + goto out_class; + } + + /* Our ioctls are 32/64 clean */ + ret = register_ioctl32_conversion(IB_USER_MAD_REGISTER_AGENT, NULL); + ret |= register_ioctl32_conversion(IB_USER_MAD_UNREGISTER_AGENT, NULL); + if (ret) { + printk(KERN_ERR "user_mad: couldn't register ioctl32 conversions\n"); + goto out_client; + } + + return 0; + +out_client: + ib_unregister_client(&umad_client); + +out_class: + class_unregister(&umad_class); + +out_chrdev: + unregister_chrdev_region(base_dev, IB_UMAD_MAX_PORTS); + +out: + return ret; +} + +static void __exit ib_umad_cleanup(void) +{ + unregister_ioctl32_conversion(IB_USER_MAD_REGISTER_AGENT); + unregister_ioctl32_conversion(IB_USER_MAD_UNREGISTER_AGENT); + ib_unregister_client(&umad_client); + class_unregister(&umad_class); + unregister_chrdev_region(base_dev, IB_UMAD_MAX_PORTS); +} + +module_init(ib_umad_init); +module_exit(ib_umad_cleanup); diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c new file mode 100644 index 000000000000..4ed3633171cb --- /dev/null +++ b/drivers/infiniband/core/verbs.c @@ -0,0 +1,433 @@ +/* + * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2004 Infinicon Corporation. All rights reserved. + * Copyright (c) 2004 Intel Corporation. All rights reserved. + * Copyright (c) 2004 Topspin Corporation. All rights reserved. + * Copyright (c) 2004 Voltaire Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: verbs.c 1349 2004-12-16 21:09:43Z roland $ + */ + +#include <linux/errno.h> +#include <linux/err.h> + +#include <ib_verbs.h> + +/* Protection domains */ + +struct ib_pd *ib_alloc_pd(struct ib_device *device) +{ + struct ib_pd *pd; + + pd = device->alloc_pd(device); + + if (!IS_ERR(pd)) { + pd->device = device; + atomic_set(&pd->usecnt, 0); + } + + return pd; +} +EXPORT_SYMBOL(ib_alloc_pd); + +int ib_dealloc_pd(struct ib_pd *pd) +{ + if (atomic_read(&pd->usecnt)) + return -EBUSY; + + return pd->device->dealloc_pd(pd); +} +EXPORT_SYMBOL(ib_dealloc_pd); + +/* Address handles */ + +struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) +{ + struct ib_ah *ah; + + ah = pd->device->create_ah(pd, ah_attr); + + if (!IS_ERR(ah)) { + ah->device = pd->device; + ah->pd = pd; + atomic_inc(&pd->usecnt); + } + + return ah; +} +EXPORT_SYMBOL(ib_create_ah); + +int ib_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr) +{ + return ah->device->modify_ah ? + ah->device->modify_ah(ah, ah_attr) : + -ENOSYS; +} +EXPORT_SYMBOL(ib_modify_ah); + +int ib_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr) +{ + return ah->device->query_ah ? + ah->device->query_ah(ah, ah_attr) : + -ENOSYS; +} +EXPORT_SYMBOL(ib_query_ah); + +int ib_destroy_ah(struct ib_ah *ah) +{ + struct ib_pd *pd; + int ret; + + pd = ah->pd; + ret = ah->device->destroy_ah(ah); + if (!ret) + atomic_dec(&pd->usecnt); + + return ret; +} +EXPORT_SYMBOL(ib_destroy_ah); + +/* Queue pairs */ + +struct ib_qp *ib_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *qp_init_attr) +{ + struct ib_qp *qp; + + qp = pd->device->create_qp(pd, qp_init_attr); + + if (!IS_ERR(qp)) { + qp->device = pd->device; + qp->pd = pd; + qp->send_cq = qp_init_attr->send_cq; + qp->recv_cq = qp_init_attr->recv_cq; + qp->srq = qp_init_attr->srq; + qp->event_handler = qp_init_attr->event_handler; + qp->qp_context = qp_init_attr->qp_context; + atomic_inc(&pd->usecnt); + atomic_inc(&qp_init_attr->send_cq->usecnt); + atomic_inc(&qp_init_attr->recv_cq->usecnt); + if (qp_init_attr->srq) + atomic_inc(&qp_init_attr->srq->usecnt); + } + + return qp; +} +EXPORT_SYMBOL(ib_create_qp); + +int ib_modify_qp(struct ib_qp *qp, + struct ib_qp_attr *qp_attr, + int qp_attr_mask) +{ + return qp->device->modify_qp(qp, qp_attr, qp_attr_mask); +} +EXPORT_SYMBOL(ib_modify_qp); + +int ib_query_qp(struct ib_qp *qp, + struct ib_qp_attr *qp_attr, + int qp_attr_mask, + struct ib_qp_init_attr *qp_init_attr) +{ + return qp->device->query_qp ? + qp->device->query_qp(qp, qp_attr, qp_attr_mask, qp_init_attr) : + -ENOSYS; +} +EXPORT_SYMBOL(ib_query_qp); + +int ib_destroy_qp(struct ib_qp *qp) +{ + struct ib_pd *pd; + struct ib_cq *scq, *rcq; + struct ib_srq *srq; + int ret; + + pd = qp->pd; + scq = qp->send_cq; + rcq = qp->recv_cq; + srq = qp->srq; + + ret = qp->device->destroy_qp(qp); + if (!ret) { + atomic_dec(&pd->usecnt); + atomic_dec(&scq->usecnt); + atomic_dec(&rcq->usecnt); + if (srq) + atomic_dec(&srq->usecnt); + } + + return ret; +} +EXPORT_SYMBOL(ib_destroy_qp); + +/* Completion queues */ + +struct ib_cq *ib_create_cq(struct ib_device *device, + ib_comp_handler comp_handler, + void (*event_handler)(struct ib_event *, void *), + void *cq_context, int cqe) +{ + struct ib_cq *cq; + + cq = device->create_cq(device, cqe); + + if (!IS_ERR(cq)) { + cq->device = device; + cq->comp_handler = comp_handler; + cq->event_handler = event_handler; + cq->cq_context = cq_context; + atomic_set(&cq->usecnt, 0); + } + + return cq; +} +EXPORT_SYMBOL(ib_create_cq); + +int ib_destroy_cq(struct ib_cq *cq) +{ + if (atomic_read(&cq->usecnt)) + return -EBUSY; + + return cq->device->destroy_cq(cq); +} +EXPORT_SYMBOL(ib_destroy_cq); + +int ib_resize_cq(struct ib_cq *cq, + int cqe) +{ + int ret; + + if (!cq->device->resize_cq) + return -ENOSYS; + + ret = cq->device->resize_cq(cq, &cqe); + if (!ret) + cq->cqe = cqe; + + return ret; +} +EXPORT_SYMBOL(ib_resize_cq); + +/* Memory regions */ + +struct ib_mr *ib_get_dma_mr(struct ib_pd *pd, int mr_access_flags) +{ + struct ib_mr *mr; + + mr = pd->device->get_dma_mr(pd, mr_access_flags); + + if (!IS_ERR(mr)) { + mr->device = pd->device; + mr->pd = pd; + atomic_inc(&pd->usecnt); + atomic_set(&mr->usecnt, 0); + } + + return mr; +} +EXPORT_SYMBOL(ib_get_dma_mr); + +struct ib_mr *ib_reg_phys_mr(struct ib_pd *pd, + struct ib_phys_buf *phys_buf_array, + int num_phys_buf, + int mr_access_flags, + u64 *iova_start) +{ + struct ib_mr *mr; + + mr = pd->device->reg_phys_mr(pd, phys_buf_array, num_phys_buf, + mr_access_flags, iova_start); + + if (!IS_ERR(mr)) { + mr->device = pd->device; + mr->pd = pd; + atomic_inc(&pd->usecnt); + atomic_set(&mr->usecnt, 0); + } + + return mr; +} +EXPORT_SYMBOL(ib_reg_phys_mr); + +int ib_rereg_phys_mr(struct ib_mr *mr, + int mr_rereg_mask, + struct ib_pd *pd, + struct ib_phys_buf *phys_buf_array, + int num_phys_buf, + int mr_access_flags, + u64 *iova_start) +{ + struct ib_pd *old_pd; + int ret; + + if (!mr->device->rereg_phys_mr) + return -ENOSYS; + + if (atomic_read(&mr->usecnt)) + return -EBUSY; + + old_pd = mr->pd; + + ret = mr->device->rereg_phys_mr(mr, mr_rereg_mask, pd, + phys_buf_array, num_phys_buf, + mr_access_flags, iova_start); + + if (!ret && (mr_rereg_mask & IB_MR_REREG_PD)) { + atomic_dec(&old_pd->usecnt); + atomic_inc(&pd->usecnt); + } + + return ret; +} +EXPORT_SYMBOL(ib_rereg_phys_mr); + +int ib_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr) +{ + return mr->device->query_mr ? + mr->device->query_mr(mr, mr_attr) : -ENOSYS; +} +EXPORT_SYMBOL(ib_query_mr); + +int ib_dereg_mr(struct ib_mr *mr) +{ + struct ib_pd *pd; + int ret; + + if (atomic_read(&mr->usecnt)) + return -EBUSY; + + pd = mr->pd; + ret = mr->device->dereg_mr(mr); + if (!ret) + atomic_dec(&pd->usecnt); + + return ret; +} +EXPORT_SYMBOL(ib_dereg_mr); + +/* Memory windows */ + +struct ib_mw *ib_alloc_mw(struct ib_pd *pd) +{ + struct ib_mw *mw; + + if (!pd->device->alloc_mw) + return ERR_PTR(-ENOSYS); + + mw = pd->device->alloc_mw(pd); + if (!IS_ERR(mw)) { + mw->device = pd->device; + mw->pd = pd; + atomic_inc(&pd->usecnt); + } + + return mw; +} +EXPORT_SYMBOL(ib_alloc_mw); + +int ib_dealloc_mw(struct ib_mw *mw) +{ + struct ib_pd *pd; + int ret; + + pd = mw->pd; + ret = mw->device->dealloc_mw(mw); + if (!ret) + atomic_dec(&pd->usecnt); + + return ret; +} +EXPORT_SYMBOL(ib_dealloc_mw); + +/* "Fast" memory regions */ + +struct ib_fmr *ib_alloc_fmr(struct ib_pd *pd, + int mr_access_flags, + struct ib_fmr_attr *fmr_attr) +{ + struct ib_fmr *fmr; + + if (!pd->device->alloc_fmr) + return ERR_PTR(-ENOSYS); + + fmr = pd->device->alloc_fmr(pd, mr_access_flags, fmr_attr); + if (!IS_ERR(fmr)) { + fmr->device = pd->device; + fmr->pd = pd; + atomic_inc(&pd->usecnt); + } + + return fmr; +} +EXPORT_SYMBOL(ib_alloc_fmr); + +int ib_unmap_fmr(struct list_head *fmr_list) +{ + struct ib_fmr *fmr; + + if (list_empty(fmr_list)) + return 0; + + fmr = list_entry(fmr_list->next, struct ib_fmr, list); + return fmr->device->unmap_fmr(fmr_list); +} +EXPORT_SYMBOL(ib_unmap_fmr); + +int ib_dealloc_fmr(struct ib_fmr *fmr) +{ + struct ib_pd *pd; + int ret; + + pd = fmr->pd; + ret = fmr->device->dealloc_fmr(fmr); + if (!ret) + atomic_dec(&pd->usecnt); + + return ret; +} +EXPORT_SYMBOL(ib_dealloc_fmr); + +/* Multicast groups */ + +int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid) +{ + return qp->device->attach_mcast ? + qp->device->attach_mcast(qp, gid, lid) : + -ENOSYS; +} +EXPORT_SYMBOL(ib_attach_mcast); + +int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid) +{ + return qp->device->detach_mcast ? + qp->device->detach_mcast(qp, gid, lid) : + -ENOSYS; +} +EXPORT_SYMBOL(ib_detach_mcast); diff --git a/drivers/infiniband/hw/mthca/Kconfig b/drivers/infiniband/hw/mthca/Kconfig new file mode 100644 index 000000000000..2f303e468764 --- /dev/null +++ b/drivers/infiniband/hw/mthca/Kconfig @@ -0,0 +1,26 @@ +config INFINIBAND_MTHCA + tristate "Mellanox HCA support" + depends on PCI && INFINIBAND + ---help--- + This is a low-level driver for Mellanox InfiniHost host + channel adapters (HCAs), including the MT23108 PCI-X HCA + ("Tavor") and the MT25208 PCI Express HCA ("Arbel"). + +config INFINIBAND_MTHCA_DEBUG + bool "Verbose debugging output" + depends on INFINIBAND_MTHCA + default n + ---help--- + This option causes the mthca driver produce a bunch of debug + messages. Select this is you are developing the driver or + trying to diagnose a problem. + +config INFINIBAND_MTHCA_SSE_DOORBELL + bool "SSE doorbell code" + depends on INFINIBAND_MTHCA && X86 && !X86_64 + default n + ---help--- + This option will have the mthca driver use SSE instructions + to ring hardware doorbell registers. This may improve + performance for some workloads, but the driver will not run + on processors without SSE instructions. diff --git a/drivers/infiniband/hw/mthca/Makefile b/drivers/infiniband/hw/mthca/Makefile new file mode 100644 index 000000000000..e795cdbc71a2 --- /dev/null +++ b/drivers/infiniband/hw/mthca/Makefile @@ -0,0 +1,12 @@ +EXTRA_CFLAGS += -Idrivers/infiniband/include + +ifdef CONFIG_INFINIBAND_MTHCA_DEBUG +EXTRA_CFLAGS += -DDEBUG +endif + +obj-$(CONFIG_INFINIBAND_MTHCA) += ib_mthca.o + +ib_mthca-y := mthca_main.o mthca_cmd.o mthca_profile.o mthca_reset.o \ + mthca_allocator.o mthca_eq.o mthca_pd.o mthca_cq.o \ + mthca_mr.o mthca_qp.o mthca_av.o mthca_mcg.o mthca_mad.o \ + mthca_provider.o diff --git a/drivers/infiniband/hw/mthca/mthca_allocator.c b/drivers/infiniband/hw/mthca/mthca_allocator.c new file mode 100644 index 000000000000..b1db48dd91d6 --- /dev/null +++ b/drivers/infiniband/hw/mthca/mthca_allocator.c @@ -0,0 +1,179 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: mthca_allocator.c 1349 2004-12-16 21:09:43Z roland $ + */ + +#include <linux/errno.h> +#include <linux/slab.h> +#include <linux/bitmap.h> + +#include "mthca_dev.h" + +/* Trivial bitmap-based allocator */ +u32 mthca_alloc(struct mthca_alloc *alloc) +{ + u32 obj; + + spin_lock(&alloc->lock); + obj = find_next_zero_bit(alloc->table, alloc->max, alloc->last); + if (obj >= alloc->max) { + alloc->top = (alloc->top + alloc->max) & alloc->mask; + obj = find_first_zero_bit(alloc->table, alloc->max); + } + + if (obj < alloc->max) { + set_bit(obj, alloc->table); + obj |= alloc->top; + } else + obj = -1; + + spin_unlock(&alloc->lock); + + return obj; +} + +void mthca_free(struct mthca_alloc *alloc, u32 obj) +{ + obj &= alloc->max - 1; + spin_lock(&alloc->lock); + clear_bit(obj, alloc->table); + alloc->last = min(alloc->last, obj); + alloc->top = (alloc->top + alloc->max) & alloc->mask; + spin_unlock(&alloc->lock); +} + +int mthca_alloc_init(struct mthca_alloc *alloc, u32 num, u32 mask, + u32 reserved) +{ + int i; + + /* num must be a power of 2 */ + if (num != 1 << (ffs(num) - 1)) + return -EINVAL; + + alloc->last = 0; + alloc->top = 0; + alloc->max = num; + alloc->mask = mask; + spin_lock_init(&alloc->lock); + alloc->table = kmalloc(BITS_TO_LONGS(num) * sizeof (long), + GFP_KERNEL); + if (!alloc->table) + return -ENOMEM; + + bitmap_zero(alloc->table, num); + for (i = 0; i < reserved; ++i) + set_bit(i, alloc->table); + + return 0; +} + +void mthca_alloc_cleanup(struct mthca_alloc *alloc) +{ + kfree(alloc->table); +} + +/* + * Array of pointers with lazy allocation of leaf pages. Callers of + * _get, _set and _clear methods must use a lock or otherwise + * serialize access to the array. + */ + +void *mthca_array_get(struct mthca_array *array, int index) +{ + int p = (index * sizeof (void *)) >> PAGE_SHIFT; + + if (array->page_list[p].page) { + int i = index & (PAGE_SIZE / sizeof (void *) - 1); + return array->page_list[p].page[i]; + } else + return NULL; +} + +int mthca_array_set(struct mthca_array *array, int index, void *value) +{ + int p = (index * sizeof (void *)) >> PAGE_SHIFT; + + /* Allocate with GFP_ATOMIC because we'll be called with locks held. */ + if (!array->page_list[p].page) + array->page_list[p].page = (void **) get_zeroed_page(GFP_ATOMIC); + + if (!array->page_list[p].page) + return -ENOMEM; + + array->page_list[p].page[index & (PAGE_SIZE / sizeof (void *) - 1)] = + value; + ++array->page_list[p].used; + + return 0; +} + +void mthca_array_clear(struct mthca_array *array, int index) +{ + int p = (index * sizeof (void *)) >> PAGE_SHIFT; + + if (--array->page_list[p].used == 0) { + free_page((unsigned long) array->page_list[p].page); + array->page_list[p].page = NULL; + } + + if (array->page_list[p].used < 0) + pr_debug("Array %p index %d page %d with ref count %d < 0\n", + array, index, p, array->page_list[p].used); +} + +int mthca_array_init(struct mthca_array *array, int nent) +{ + int npage = (nent * sizeof (void *) + PAGE_SIZE - 1) / PAGE_SIZE; + int i; + + array->page_list = kmalloc(npage * sizeof *array->page_list, GFP_KERNEL); + if (!array->page_list) + return -ENOMEM; + + for (i = 0; i < npage; ++i) { + array->page_list[i].page = NULL; + array->page_list[i].used = 0; + } + + return 0; +} + +void mthca_array_cleanup(struct mthca_array *array, int nent) +{ + int i; + + for (i = 0; i < (nent * sizeof (void *) + PAGE_SIZE - 1) / PAGE_SIZE; ++i) + free_page((unsigned long) array->page_list[i].page); + + kfree(array->page_list); +} diff --git a/drivers/infiniband/hw/mthca/mthca_av.c b/drivers/infiniband/hw/mthca/mthca_av.c new file mode 100644 index 000000000000..c4865130f7c1 --- /dev/null +++ b/drivers/infiniband/hw/mthca/mthca_av.c @@ -0,0 +1,219 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: mthca_av.c 1349 2004-12-16 21:09:43Z roland $ + */ + +#include <linux/init.h> + +#include <ib_verbs.h> +#include <ib_cache.h> + +#include "mthca_dev.h" + +struct mthca_av { + u32 port_pd; + u8 reserved1; + u8 g_slid; + u16 dlid; + u8 reserved2; + u8 gid_index; + u8 msg_sr; + u8 hop_limit; + u32 sl_tclass_flowlabel; + u32 dgid[4]; +}; + +int mthca_create_ah(struct mthca_dev *dev, + struct mthca_pd *pd, + struct ib_ah_attr *ah_attr, + struct mthca_ah *ah) +{ + u32 index = -1; + struct mthca_av *av = NULL; + + ah->on_hca = 0; + + if (!atomic_read(&pd->sqp_count) && + !(dev->mthca_flags & MTHCA_FLAG_DDR_HIDDEN)) { + index = mthca_alloc(&dev->av_table.alloc); + + /* fall back to allocate in host memory */ + if (index == -1) + goto host_alloc; + + av = kmalloc(sizeof *av, GFP_KERNEL); + if (!av) + goto host_alloc; + + ah->on_hca = 1; + ah->avdma = dev->av_table.ddr_av_base + + index * MTHCA_AV_SIZE; + } + + host_alloc: + if (!ah->on_hca) { + ah->av = pci_pool_alloc(dev->av_table.pool, + SLAB_KERNEL, &ah->avdma); + if (!ah->av) + return -ENOMEM; + + av = ah->av; + } + + ah->key = pd->ntmr.ibmr.lkey; + + memset(av, 0, MTHCA_AV_SIZE); + + av->port_pd = cpu_to_be32(pd->pd_num | (ah_attr->port_num << 24)); + av->g_slid = ah_attr->src_path_bits; + av->dlid = cpu_to_be16(ah_attr->dlid); + av->msg_sr = (3 << 4) | /* 2K message */ + ah_attr->static_rate; + av->sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28); + if (ah_attr->ah_flags & IB_AH_GRH) { + av->g_slid |= 0x80; + av->gid_index = (ah_attr->port_num - 1) * dev->limits.gid_table_len + + ah_attr->grh.sgid_index; + av->hop_limit = ah_attr->grh.hop_limit; + av->sl_tclass_flowlabel |= + cpu_to_be32((ah_attr->grh.traffic_class << 20) | + ah_attr->grh.flow_label); + memcpy(av->dgid, ah_attr->grh.dgid.raw, 16); + } else { + /* Arbel workaround -- low byte of GID must be 2 */ + av->dgid[3] = cpu_to_be32(2); + } + + if (0) { + int j; + + mthca_dbg(dev, "Created UDAV at %p/%08lx:\n", + av, (unsigned long) ah->avdma); + for (j = 0; j < 8; ++j) + printk(KERN_DEBUG " [%2x] %08x\n", + j * 4, be32_to_cpu(((u32 *) av)[j])); + } + + if (ah->on_hca) { + memcpy_toio(dev->av_table.av_map + index * MTHCA_AV_SIZE, + av, MTHCA_AV_SIZE); + kfree(av); + } + + return 0; +} + +int mthca_destroy_ah(struct mthca_dev *dev, struct mthca_ah *ah) +{ + if (ah->on_hca) + mthca_free(&dev->av_table.alloc, + (ah->avdma - dev->av_table.ddr_av_base) / + MTHCA_AV_SIZE); + else + pci_pool_free(dev->av_table.pool, ah->av, ah->avdma); + + return 0; +} + +int mthca_read_ah(struct mthca_dev *dev, struct mthca_ah *ah, + struct ib_ud_header *header) +{ + if (ah->on_hca) + return -EINVAL; + + header->lrh.service_level = be32_to_cpu(ah->av->sl_tclass_flowlabel) >> 28; + header->lrh.destination_lid = ah->av->dlid; + header->lrh.source_lid = ah->av->g_slid & 0x7f; + if (ah->av->g_slid & 0x80) { + header->grh_present = 1; + header->grh.traffic_class = + (be32_to_cpu(ah->av->sl_tclass_flowlabel) >> 20) & 0xff; + header->grh.flow_label = + ah->av->sl_tclass_flowlabel & cpu_to_be32(0xfffff); + ib_cached_gid_get(&dev->ib_dev, + be32_to_cpu(ah->av->port_pd) >> 24, + ah->av->gid_index, + &header->grh.source_gid); + memcpy(header->grh.destination_gid.raw, + ah->av->dgid, 16); + } else { + header->grh_present = 0; + } + + return 0; +} + +int __devinit mthca_init_av_table(struct mthca_dev *dev) +{ + int err; + + err = mthca_alloc_init(&dev->av_table.alloc, + dev->av_table.num_ddr_avs, + dev->av_table.num_ddr_avs - 1, + 0); + if (err) + return err; + + dev->av_table.pool = pci_pool_create("mthca_av", dev->pdev, + MTHCA_AV_SIZE, + MTHCA_AV_SIZE, 0); + if (!dev->av_table.pool) + goto out_free_alloc; + + if (!(dev->mthca_flags & MTHCA_FLAG_DDR_HIDDEN)) { + dev->av_table.av_map = ioremap(pci_resource_start(dev->pdev, 4) + + dev->av_table.ddr_av_base - + dev->ddr_start, + dev->av_table.num_ddr_avs * + MTHCA_AV_SIZE); + if (!dev->av_table.av_map) + goto out_free_pool; + } else + dev->av_table.av_map = NULL; + + return 0; + + out_free_pool: + pci_pool_destroy(dev->av_table.pool); + + out_free_alloc: + mthca_alloc_cleanup(&dev->av_table.alloc); + return -ENOMEM; +} + +void __devexit mthca_cleanup_av_table(struct mthca_dev *dev) +{ + if (dev->av_table.av_map) + iounmap(dev->av_table.av_map); + pci_pool_destroy(dev->av_table.pool); + mthca_alloc_cleanup(&dev->av_table.alloc); +} diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c new file mode 100644 index 000000000000..3b2211ddefab --- /dev/null +++ b/drivers/infiniband/hw/mthca/mthca_cmd.c @@ -0,0 +1,1573 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: mthca_cmd.c 1349 2004-12-16 21:09:43Z roland $ + */ + +#include <linux/sched.h> +#include <linux/pci.h> +#include <linux/errno.h> +#include <asm/io.h> + +#include "mthca_dev.h" +#include "mthca_config_reg.h" +#include "mthca_cmd.h" + +#define CMD_POLL_TOKEN 0xffff + +enum { + HCR_IN_PARAM_OFFSET = 0x00, + HCR_IN_MODIFIER_OFFSET = 0x08, + HCR_OUT_PARAM_OFFSET = 0x0c, + HCR_TOKEN_OFFSET = 0x14, + HCR_STATUS_OFFSET = 0x18, + + HCR_OPMOD_SHIFT = 12, + HCA_E_BIT = 22, + HCR_GO_BIT = 23 +}; + +enum { + /* initialization and general commands */ + CMD_SYS_EN = 0x1, + CMD_SYS_DIS = 0x2, + CMD_MAP_FA = 0xfff, + CMD_UNMAP_FA = 0xffe, + CMD_RUN_FW = 0xff6, + CMD_MOD_STAT_CFG = 0x34, + CMD_QUERY_DEV_LIM = 0x3, + CMD_QUERY_FW = 0x4, + CMD_ENABLE_LAM = 0xff8, + CMD_DISABLE_LAM = 0xff7, + CMD_QUERY_DDR = 0x5, + CMD_QUERY_ADAPTER = 0x6, + CMD_INIT_HCA = 0x7, + CMD_CLOSE_HCA = 0x8, + CMD_INIT_IB = 0x9, + CMD_CLOSE_IB = 0xa, + CMD_QUERY_HCA = 0xb, + CMD_SET_IB = 0xc, + CMD_ACCESS_DDR = 0x2e, + CMD_MAP_ICM = 0xffa, + CMD_UNMAP_ICM = 0xff9, + CMD_MAP_ICM_AUX = 0xffc, + CMD_UNMAP_ICM_AUX = 0xffb, + CMD_SET_ICM_SIZE = 0xffd, + + /* TPT commands */ + CMD_SW2HW_MPT = 0xd, + CMD_QUERY_MPT = 0xe, + CMD_HW2SW_MPT = 0xf, + CMD_READ_MTT = 0x10, + CMD_WRITE_MTT = 0x11, + CMD_SYNC_TPT = 0x2f, + + /* EQ commands */ + CMD_MAP_EQ = 0x12, + CMD_SW2HW_EQ = 0x13, + CMD_HW2SW_EQ = 0x14, + CMD_QUERY_EQ = 0x15, + + /* CQ commands */ + CMD_SW2HW_CQ = 0x16, + CMD_HW2SW_CQ = 0x17, + CMD_QUERY_CQ = 0x18, + CMD_RESIZE_CQ = 0x2c, + + /* SRQ commands */ + CMD_SW2HW_SRQ = 0x35, + CMD_HW2SW_SRQ = 0x36, + CMD_QUERY_SRQ = 0x37, + + /* QP/EE commands */ + CMD_RST2INIT_QPEE = 0x19, + CMD_INIT2RTR_QPEE = 0x1a, + CMD_RTR2RTS_QPEE = 0x1b, + CMD_RTS2RTS_QPEE = 0x1c, + CMD_SQERR2RTS_QPEE = 0x1d, + CMD_2ERR_QPEE = 0x1e, + CMD_RTS2SQD_QPEE = 0x1f, + CMD_SQD2SQD_QPEE = 0x38, + CMD_SQD2RTS_QPEE = 0x20, + CMD_ERR2RST_QPEE = 0x21, + CMD_QUERY_QPEE = 0x22, + CMD_INIT2INIT_QPEE = 0x2d, + CMD_SUSPEND_QPEE = 0x32, + CMD_UNSUSPEND_QPEE = 0x33, + /* special QPs and management commands */ + CMD_CONF_SPECIAL_QP = 0x23, + CMD_MAD_IFC = 0x24, + + /* multicast commands */ + CMD_READ_MGM = 0x25, + CMD_WRITE_MGM = 0x26, + CMD_MGID_HASH = 0x27, + + /* miscellaneous commands */ + CMD_DIAG_RPRT = 0x30, + CMD_NOP = 0x31, + + /* debug commands */ + CMD_QUERY_DEBUG_MSG = 0x2a, + CMD_SET_DEBUG_MSG = 0x2b, +}; + +/* + * According to Mellanox code, FW may be starved and never complete + * commands. So we can't use strict timeouts described in PRM -- we + * just arbitrarily select 60 seconds for now. + */ +#if 0 +/* + * Round up and add 1 to make sure we get the full wait time (since we + * will be starting in the middle of a jiffy) + */ +enum { + CMD_TIME_CLASS_A = (HZ + 999) / 1000 + 1, + CMD_TIME_CLASS_B = (HZ + 99) / 100 + 1, + CMD_TIME_CLASS_C = (HZ + 9) / 10 + 1 +}; +#else +enum { + CMD_TIME_CLASS_A = 60 * HZ, + CMD_TIME_CLASS_B = 60 * HZ, + CMD_TIME_CLASS_C = 60 * HZ +}; +#endif + +enum { + GO_BIT_TIMEOUT = HZ * 10 +}; + +struct mthca_cmd_context { + struct completion done; + struct timer_list timer; + int result; + int next; + u64 out_param; + u16 token; + u8 status; +}; + +static inline int go_bit(struct mthca_dev *dev) +{ + return readl(dev->hcr + HCR_STATUS_OFFSET) & + swab32(1 << HCR_GO_BIT); +} + +static int mthca_cmd_post(struct mthca_dev *dev, + u64 in_param, + u64 out_param, + u32 in_modifier, + u8 op_modifier, + u16 op, + u16 token, + int event) +{ + int err = 0; + + if (down_interruptible(&dev->cmd.hcr_sem)) + return -EINTR; + + if (event) { + unsigned long end = jiffies + GO_BIT_TIMEOUT; + + while (go_bit(dev) && time_before(jiffies, end)) { + set_current_state(TASK_RUNNING); + schedule(); + } + } + + if (go_bit(dev)) { + err = -EAGAIN; + goto out; + } + + /* + * We use writel (instead of something like memcpy_toio) + * because writes of less than 32 bits to the HCR don't work + * (and some architectures such as ia64 implement memcpy_toio + * in terms of writeb). + */ + __raw_writel(cpu_to_be32(in_param >> 32), dev->hcr + 0 * 4); + __raw_writel(cpu_to_be32(in_param & 0xfffffffful), dev->hcr + 1 * 4); + __raw_writel(cpu_to_be32(in_modifier), dev->hcr + 2 * 4); + __raw_writel(cpu_to_be32(out_param >> 32), dev->hcr + 3 * 4); + __raw_writel(cpu_to_be32(out_param & 0xfffffffful), dev->hcr + 4 * 4); + __raw_writel(cpu_to_be32(token << 16), dev->hcr + 5 * 4); + + /* __raw_writel may not order writes. */ + wmb(); + + __raw_writel(cpu_to_be32((1 << HCR_GO_BIT) | + (event ? (1 << HCA_E_BIT) : 0) | + (op_modifier << HCR_OPMOD_SHIFT) | + op), dev->hcr + 6 * 4); + +out: + up(&dev->cmd.hcr_sem); + return err; +} + +static int mthca_cmd_poll(struct mthca_dev *dev, + u64 in_param, + u64 *out_param, + int out_is_imm, + u32 in_modifier, + u8 op_modifier, + u16 op, + unsigned long timeout, + u8 *status) +{ + int err = 0; + unsigned long end; + + if (down_interruptible(&dev->cmd.poll_sem)) + return -EINTR; + + err = mthca_cmd_post(dev, in_param, + out_param ? *out_param : 0, + in_modifier, op_modifier, + op, CMD_POLL_TOKEN, 0); + if (err) + goto out; + + end = timeout + jiffies; + while (go_bit(dev) && time_before(jiffies, end)) { + set_current_state(TASK_RUNNING); + schedule(); + } + + if (go_bit(dev)) { + err = -EBUSY; + goto out; + } + + if (out_is_imm) { + memcpy_fromio(out_param, dev->hcr + HCR_OUT_PARAM_OFFSET, sizeof (u64)); + be64_to_cpus(out_param); + } + + *status = be32_to_cpu(__raw_readl(dev->hcr + HCR_STATUS_OFFSET)) >> 24; + +out: + up(&dev->cmd.poll_sem); + return err; +} + +void mthca_cmd_event(struct mthca_dev *dev, + u16 token, + u8 status, + u64 out_param) +{ + struct mthca_cmd_context *context = + &dev->cmd.context[token & dev->cmd.token_mask]; + + /* previously timed out command completing at long last */ + if (token != context->token) + return; + + context->result = 0; + context->status = status; + context->out_param = out_param; + + context->token += dev->cmd.token_mask + 1; + + complete(&context->done); +} + +static void event_timeout(unsigned long context_ptr) +{ + struct mthca_cmd_context *context = + (struct mthca_cmd_context *) context_ptr; + + context->result = -EBUSY; + complete(&context->done); +} + +static int mthca_cmd_wait(struct mthca_dev *dev, + u64 in_param, + u64 *out_param, + int out_is_imm, + u32 in_modifier, + u8 op_modifier, + u16 op, + unsigned long timeout, + u8 *status) +{ + int err = 0; + struct mthca_cmd_context *context; + + if (down_interruptible(&dev->cmd.event_sem)) + return -EINTR; + + spin_lock(&dev->cmd.context_lock); + BUG_ON(dev->cmd.free_head < 0); + context = &dev->cmd.context[dev->cmd.free_head]; + dev->cmd.free_head = context->next; + spin_unlock(&dev->cmd.context_lock); + + init_completion(&context->done); + + err = mthca_cmd_post(dev, in_param, + out_param ? *out_param : 0, + in_modifier, op_modifier, + op, context->token, 1); + if (err) + goto out; + + context->timer.expires = jiffies + timeout; + add_timer(&context->timer); + + wait_for_completion(&context->done); + del_timer_sync(&context->timer); + + err = context->result; + if (err) + goto out; + + *status = context->status; + if (*status) + mthca_dbg(dev, "Command %02x completed with status %02x\n", + op, *status); + + if (out_is_imm) + *out_param = context->out_param; + +out: + spin_lock(&dev->cmd.context_lock); + context->next = dev->cmd.free_head; + dev->cmd.free_head = context - dev->cmd.context; + spin_unlock(&dev->cmd.context_lock); + + up(&dev->cmd.event_sem); + return err; +} + +/* Invoke a command with an output mailbox */ +static int mthca_cmd_box(struct mthca_dev *dev, + u64 in_param, + u64 out_param, + u32 in_modifier, + u8 op_modifier, + u16 op, + unsigned long timeout, + u8 *status) +{ + if (dev->cmd.use_events) + return mthca_cmd_wait(dev, in_param, &out_param, 0, + in_modifier, op_modifier, op, + timeout, status); + else + return mthca_cmd_poll(dev, in_param, &out_param, 0, + in_modifier, op_modifier, op, + timeout, status); +} + +/* Invoke a command with no output parameter */ +static int mthca_cmd(struct mthca_dev *dev, + u64 in_param, + u32 in_modifier, + u8 op_modifier, + u16 op, + unsigned long timeout, + u8 *status) +{ + return mthca_cmd_box(dev, in_param, 0, in_modifier, + op_modifier, op, timeout, status); +} + +/* + * Invoke a command with an immediate output parameter (and copy the + * output into the caller's out_param pointer after the command + * executes). + */ +static int mthca_cmd_imm(struct mthca_dev *dev, + u64 in_param, + u64 *out_param, + u32 in_modifier, + u8 op_modifier, + u16 op, + unsigned long timeout, + u8 *status) +{ + if (dev->cmd.use_events) + return mthca_cmd_wait(dev, in_param, out_param, 1, + in_modifier, op_modifier, op, + timeout, status); + else + return mthca_cmd_poll(dev, in_param, out_param, 1, + in_modifier, op_modifier, op, + timeout, status); +} + +/* + * Switch to using events to issue FW commands (should be called after + * event queue to command events has been initialized). + */ +int mthca_cmd_use_events(struct mthca_dev *dev) +{ + int i; + + dev->cmd.context = kmalloc(dev->cmd.max_cmds * + sizeof (struct mthca_cmd_context), + GFP_KERNEL); + if (!dev->cmd.context) + return -ENOMEM; + + for (i = 0; i < dev->cmd.max_cmds; ++i) { + dev->cmd.context[i].token = i; + dev->cmd.context[i].next = i + 1; + init_timer(&dev->cmd.context[i].timer); + dev->cmd.context[i].timer.data = + (unsigned long) &dev->cmd.context[i]; + dev->cmd.context[i].timer.function = event_timeout; + } + + dev->cmd.context[dev->cmd.max_cmds - 1].next = -1; + dev->cmd.free_head = 0; + + sema_init(&dev->cmd.event_sem, dev->cmd.max_cmds); + spin_lock_init(&dev->cmd.context_lock); + + for (dev->cmd.token_mask = 1; + dev->cmd.token_mask < dev->cmd.max_cmds; + dev->cmd.token_mask <<= 1) + ; /* nothing */ + --dev->cmd.token_mask; + + dev->cmd.use_events = 1; + down(&dev->cmd.poll_sem); + + return 0; +} + +/* + * Switch back to polling (used when shutting down the device) + */ +void mthca_cmd_use_polling(struct mthca_dev *dev) +{ + int i; + + dev->cmd.use_events = 0; + + for (i = 0; i < dev->cmd.max_cmds; ++i) + down(&dev->cmd.event_sem); + + kfree(dev->cmd.context); + + up(&dev->cmd.poll_sem); +} + +int mthca_SYS_EN(struct mthca_dev *dev, u8 *status) +{ + u64 out; + int ret; + + ret = mthca_cmd_imm(dev, 0, &out, 0, 0, CMD_SYS_EN, HZ, status); + + if (*status == MTHCA_CMD_STAT_DDR_MEM_ERR) + mthca_warn(dev, "SYS_EN DDR error: syn=%x, sock=%d, " + "sladdr=%d, SPD source=%s\n", + (int) (out >> 6) & 0xf, (int) (out >> 4) & 3, + (int) (out >> 1) & 7, (int) out & 1 ? "NVMEM" : "DIMM"); + + return ret; +} + +int mthca_SYS_DIS(struct mthca_dev *dev, u8 *status) +{ + return mthca_cmd(dev, 0, 0, 0, CMD_SYS_DIS, HZ, status); +} + +int mthca_MAP_FA(struct mthca_dev *dev, int count, + struct scatterlist *sglist, u8 *status) +{ + u32 *inbox; + dma_addr_t indma; + int lg; + int nent = 0; + int i, j; + int err = 0; + int ts = 0; + + inbox = pci_alloc_consistent(dev->pdev, PAGE_SIZE, &indma); + memset(inbox, 0, PAGE_SIZE); + + for (i = 0; i < count; ++i) { + /* + * We have to pass pages that are aligned to their + * size, so find the least significant 1 in the + * address or size and use that as our log2 size. + */ + lg = ffs(sg_dma_address(sglist + i) | sg_dma_len(sglist + i)) - 1; + if (lg < 12) { + mthca_warn(dev, "Got FW area not aligned to 4K (%llx/%x).\n", + (unsigned long long) sg_dma_address(sglist + i), + sg_dma_len(sglist + i)); + err = -EINVAL; + goto out; + } + for (j = 0; j < sg_dma_len(sglist + i) / (1 << lg); ++j, ++nent) { + *((__be64 *) (inbox + nent * 4 + 2)) = + cpu_to_be64((sg_dma_address(sglist + i) + + (j << lg)) | + (lg - 12)); + ts += 1 << (lg - 10); + if (nent == PAGE_SIZE / 16) { + err = mthca_cmd(dev, indma, nent, 0, CMD_MAP_FA, + CMD_TIME_CLASS_B, status); + if (err || *status) + goto out; + nent = 0; + } + } + } + + if (nent) { + err = mthca_cmd(dev, indma, nent, 0, CMD_MAP_FA, + CMD_TIME_CLASS_B, status); + } + + mthca_dbg(dev, "Mapped %d KB of host memory for FW.\n", ts); + +out: + pci_free_consistent(dev->pdev, PAGE_SIZE, inbox, indma); + return err; +} + +int mthca_UNMAP_FA(struct mthca_dev *dev, u8 *status) +{ + return mthca_cmd(dev, 0, 0, 0, CMD_UNMAP_FA, CMD_TIME_CLASS_B, status); +} + +int mthca_RUN_FW(struct mthca_dev *dev, u8 *status) +{ + return mthca_cmd(dev, 0, 0, 0, CMD_RUN_FW, CMD_TIME_CLASS_A, status); +} + +int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status) +{ + u32 *outbox; + dma_addr_t outdma; + int err = 0; + u8 lg; + +#define QUERY_FW_OUT_SIZE 0x100 +#define QUERY_FW_VER_OFFSET 0x00 +#define QUERY_FW_MAX_CMD_OFFSET 0x0f +#define QUERY_FW_ERR_START_OFFSET 0x30 +#define QUERY_FW_ERR_SIZE_OFFSET 0x38 + +#define QUERY_FW_START_OFFSET 0x20 +#define QUERY_FW_END_OFFSET 0x28 + +#define QUERY_FW_SIZE_OFFSET 0x00 +#define QUERY_FW_CLR_INT_BASE_OFFSET 0x20 +#define QUERY_FW_EQ_ARM_BASE_OFFSET 0x40 +#define QUERY_FW_EQ_SET_CI_BASE_OFFSET 0x48 + + outbox = pci_alloc_consistent(dev->pdev, QUERY_FW_OUT_SIZE, &outdma); + if (!outbox) { + return -ENOMEM; + } + + err = mthca_cmd_box(dev, 0, outdma, 0, 0, CMD_QUERY_FW, + CMD_TIME_CLASS_A, status); + + if (err) + goto out; + + MTHCA_GET(dev->fw_ver, outbox, QUERY_FW_VER_OFFSET); + /* + * FW subminor version is at more signifant bits than minor + * version, so swap here. + */ + dev->fw_ver = (dev->fw_ver & 0xffff00000000ull) | + ((dev->fw_ver & 0xffff0000ull) >> 16) | + ((dev->fw_ver & 0x0000ffffull) << 16); + + MTHCA_GET(lg, outbox, QUERY_FW_MAX_CMD_OFFSET); + dev->cmd.max_cmds = 1 << lg; + + mthca_dbg(dev, "FW version %012llx, max commands %d\n", + (unsigned long long) dev->fw_ver, dev->cmd.max_cmds); + + if (dev->hca_type == ARBEL_NATIVE) { + MTHCA_GET(dev->fw.arbel.fw_pages, outbox, QUERY_FW_SIZE_OFFSET); + MTHCA_GET(dev->fw.arbel.clr_int_base, outbox, QUERY_FW_CLR_INT_BASE_OFFSET); + MTHCA_GET(dev->fw.arbel.eq_arm_base, outbox, QUERY_FW_EQ_ARM_BASE_OFFSET); + MTHCA_GET(dev->fw.arbel.eq_set_ci_base, outbox, QUERY_FW_EQ_SET_CI_BASE_OFFSET); + mthca_dbg(dev, "FW size %d KB\n", dev->fw.arbel.fw_pages << 2); + + /* + * Arbel page size is always 4 KB; round up number of + * system pages needed. + */ + dev->fw.arbel.fw_pages = + (dev->fw.arbel.fw_pages + (1 << (PAGE_SHIFT - 12)) - 1) >> + (PAGE_SHIFT - 12); + + mthca_dbg(dev, "Clear int @ %llx, EQ arm @ %llx, EQ set CI @ %llx\n", + (unsigned long long) dev->fw.arbel.clr_int_base, + (unsigned long long) dev->fw.arbel.eq_arm_base, + (unsigned long long) dev->fw.arbel.eq_set_ci_base); + } else { + MTHCA_GET(dev->fw.tavor.fw_start, outbox, QUERY_FW_START_OFFSET); + MTHCA_GET(dev->fw.tavor.fw_end, outbox, QUERY_FW_END_OFFSET); + + mthca_dbg(dev, "FW size %d KB (start %llx, end %llx)\n", + (int) ((dev->fw.tavor.fw_end - dev->fw.tavor.fw_start) >> 10), + (unsigned long long) dev->fw.tavor.fw_start, + (unsigned long long) dev->fw.tavor.fw_end); + } + +out: + pci_free_consistent(dev->pdev, QUERY_FW_OUT_SIZE, outbox, outdma); + return err; +} + +int mthca_ENABLE_LAM(struct mthca_dev *dev, u8 *status) +{ + u8 info; + u32 *outbox; + dma_addr_t outdma; + int err = 0; + +#define ENABLE_LAM_OUT_SIZE 0x100 +#define ENABLE_LAM_START_OFFSET 0x00 +#define ENABLE_LAM_END_OFFSET 0x08 +#define ENABLE_LAM_INFO_OFFSET 0x13 + +#define ENABLE_LAM_INFO_HIDDEN_FLAG (1 << 4) +#define ENABLE_LAM_INFO_ECC_MASK 0x3 + + outbox = pci_alloc_consistent(dev->pdev, ENABLE_LAM_OUT_SIZE, &outdma); + if (!outbox) + return -ENOMEM; + + err = mthca_cmd_box(dev, 0, outdma, 0, 0, CMD_ENABLE_LAM, + CMD_TIME_CLASS_C, status); + + if (err) + goto out; + + if (*status == MTHCA_CMD_STAT_LAM_NOT_PRE) + goto out; + + MTHCA_GET(dev->ddr_start, outbox, ENABLE_LAM_START_OFFSET); + MTHCA_GET(dev->ddr_end, outbox, ENABLE_LAM_END_OFFSET); + MTHCA_GET(info, outbox, ENABLE_LAM_INFO_OFFSET); + + if (!!(info & ENABLE_LAM_INFO_HIDDEN_FLAG) != + !!(dev->mthca_flags & MTHCA_FLAG_DDR_HIDDEN)) { + mthca_info(dev, "FW reports that HCA-attached memory " + "is %s hidden; does not match PCI config\n", + (info & ENABLE_LAM_INFO_HIDDEN_FLAG) ? + "" : "not"); + } + if (info & ENABLE_LAM_INFO_HIDDEN_FLAG) + mthca_dbg(dev, "HCA-attached memory is hidden.\n"); + + mthca_dbg(dev, "HCA memory size %d KB (start %llx, end %llx)\n", + (int) ((dev->ddr_end - dev->ddr_start) >> 10), + (unsigned long long) dev->ddr_start, + (unsigned long long) dev->ddr_end); + +out: + pci_free_consistent(dev->pdev, ENABLE_LAM_OUT_SIZE, outbox, outdma); + return err; +} + +int mthca_DISABLE_LAM(struct mthca_dev *dev, u8 *status) +{ + return mthca_cmd(dev, 0, 0, 0, CMD_SYS_DIS, CMD_TIME_CLASS_C, status); +} + +int mthca_QUERY_DDR(struct mthca_dev *dev, u8 *status) +{ + u8 info; + u32 *outbox; + dma_addr_t outdma; + int err = 0; + +#define QUERY_DDR_OUT_SIZE 0x100 +#define QUERY_DDR_START_OFFSET 0x00 +#define QUERY_DDR_END_OFFSET 0x08 +#define QUERY_DDR_INFO_OFFSET 0x13 + +#define QUERY_DDR_INFO_HIDDEN_FLAG (1 << 4) +#define QUERY_DDR_INFO_ECC_MASK 0x3 + + outbox = pci_alloc_consistent(dev->pdev, QUERY_DDR_OUT_SIZE, &outdma); + if (!outbox) + return -ENOMEM; + + err = mthca_cmd_box(dev, 0, outdma, 0, 0, CMD_QUERY_DDR, + CMD_TIME_CLASS_A, status); + + if (err) + goto out; + + MTHCA_GET(dev->ddr_start, outbox, QUERY_DDR_START_OFFSET); + MTHCA_GET(dev->ddr_end, outbox, QUERY_DDR_END_OFFSET); + MTHCA_GET(info, outbox, QUERY_DDR_INFO_OFFSET); + + if (!!(info & QUERY_DDR_INFO_HIDDEN_FLAG) != + !!(dev->mthca_flags & MTHCA_FLAG_DDR_HIDDEN)) { + mthca_info(dev, "FW reports that HCA-attached memory " + "is %s hidden; does not match PCI config\n", + (info & QUERY_DDR_INFO_HIDDEN_FLAG) ? + "" : "not"); + } + if (info & QUERY_DDR_INFO_HIDDEN_FLAG) + mthca_dbg(dev, "HCA-attached memory is hidden.\n"); + + mthca_dbg(dev, "HCA memory size %d KB (start %llx, end %llx)\n", + (int) ((dev->ddr_end - dev->ddr_start) >> 10), + (unsigned long long) dev->ddr_start, + (unsigned long long) dev->ddr_end); + +out: + pci_free_consistent(dev->pdev, QUERY_DDR_OUT_SIZE, outbox, outdma); + return err; +} + +int mthca_QUERY_DEV_LIM(struct mthca_dev *dev, + struct mthca_dev_lim *dev_lim, u8 *status) +{ + u32 *outbox; + dma_addr_t outdma; + u8 field; + u16 size; + int err; + +#define QUERY_DEV_LIM_OUT_SIZE 0x100 +#define QUERY_DEV_LIM_MAX_SRQ_SZ_OFFSET 0x10 +#define QUERY_DEV_LIM_MAX_QP_SZ_OFFSET 0x11 +#define QUERY_DEV_LIM_RSVD_QP_OFFSET 0x12 +#define QUERY_DEV_LIM_MAX_QP_OFFSET 0x13 +#define QUERY_DEV_LIM_RSVD_SRQ_OFFSET 0x14 +#define QUERY_DEV_LIM_MAX_SRQ_OFFSET 0x15 +#define QUERY_DEV_LIM_RSVD_EEC_OFFSET 0x16 +#define QUERY_DEV_LIM_MAX_EEC_OFFSET 0x17 +#define QUERY_DEV_LIM_MAX_CQ_SZ_OFFSET 0x19 +#define QUERY_DEV_LIM_RSVD_CQ_OFFSET 0x1a +#define QUERY_DEV_LIM_MAX_CQ_OFFSET 0x1b +#define QUERY_DEV_LIM_MAX_MPT_OFFSET 0x1d +#define QUERY_DEV_LIM_RSVD_EQ_OFFSET 0x1e +#define QUERY_DEV_LIM_MAX_EQ_OFFSET 0x1f +#define QUERY_DEV_LIM_RSVD_MTT_OFFSET 0x20 +#define QUERY_DEV_LIM_MAX_MRW_SZ_OFFSET 0x21 +#define QUERY_DEV_LIM_RSVD_MRW_OFFSET 0x22 +#define QUERY_DEV_LIM_MAX_MTT_SEG_OFFSET 0x23 +#define QUERY_DEV_LIM_MAX_AV_OFFSET 0x27 +#define QUERY_DEV_LIM_MAX_REQ_QP_OFFSET 0x29 +#define QUERY_DEV_LIM_MAX_RES_QP_OFFSET 0x2b +#define QUERY_DEV_LIM_MAX_RDMA_OFFSET 0x2f +#define QUERY_DEV_LIM_RSZ_SRQ_OFFSET 0x33 +#define QUERY_DEV_LIM_ACK_DELAY_OFFSET 0x35 +#define QUERY_DEV_LIM_MTU_WIDTH_OFFSET 0x36 +#define QUERY_DEV_LIM_VL_PORT_OFFSET 0x37 +#define QUERY_DEV_LIM_MAX_GID_OFFSET 0x3b +#define QUERY_DEV_LIM_MAX_PKEY_OFFSET 0x3f +#define QUERY_DEV_LIM_FLAGS_OFFSET 0x44 +#define QUERY_DEV_LIM_RSVD_UAR_OFFSET 0x48 +#define QUERY_DEV_LIM_UAR_SZ_OFFSET 0x49 +#define QUERY_DEV_LIM_PAGE_SZ_OFFSET 0x4b +#define QUERY_DEV_LIM_MAX_SG_OFFSET 0x51 +#define QUERY_DEV_LIM_MAX_DESC_SZ_OFFSET 0x52 +#define QUERY_DEV_LIM_MAX_SG_RQ_OFFSET 0x55 +#define QUERY_DEV_LIM_MAX_DESC_SZ_RQ_OFFSET 0x56 +#define QUERY_DEV_LIM_MAX_QP_MCG_OFFSET 0x61 +#define QUERY_DEV_LIM_RSVD_MCG_OFFSET 0x62 +#define QUERY_DEV_LIM_MAX_MCG_OFFSET 0x63 +#define QUERY_DEV_LIM_RSVD_PD_OFFSET 0x64 +#define QUERY_DEV_LIM_MAX_PD_OFFSET 0x65 +#define QUERY_DEV_LIM_RSVD_RDD_OFFSET 0x66 +#define QUERY_DEV_LIM_MAX_RDD_OFFSET 0x67 +#define QUERY_DEV_LIM_EEC_ENTRY_SZ_OFFSET 0x80 +#define QUERY_DEV_LIM_QPC_ENTRY_SZ_OFFSET 0x82 +#define QUERY_DEV_LIM_EEEC_ENTRY_SZ_OFFSET 0x84 +#define QUERY_DEV_LIM_EQPC_ENTRY_SZ_OFFSET 0x86 +#define QUERY_DEV_LIM_EQC_ENTRY_SZ_OFFSET 0x88 +#define QUERY_DEV_LIM_CQC_ENTRY_SZ_OFFSET 0x8a +#define QUERY_DEV_LIM_SRQ_ENTRY_SZ_OFFSET 0x8c +#define QUERY_DEV_LIM_UAR_ENTRY_SZ_OFFSET 0x8e +#define QUERY_DEV_LIM_MTT_ENTRY_SZ_OFFSET 0x90 +#define QUERY_DEV_LIM_MPT_ENTRY_SZ_OFFSET 0x92 +#define QUERY_DEV_LIM_PBL_SZ_OFFSET 0x96 +#define QUERY_DEV_LIM_BMME_FLAGS_OFFSET 0x97 +#define QUERY_DEV_LIM_RSVD_LKEY_OFFSET 0x98 +#define QUERY_DEV_LIM_LAMR_OFFSET 0x9f +#define QUERY_DEV_LIM_MAX_ICM_SZ_OFFSET 0xa0 + + outbox = pci_alloc_consistent(dev->pdev, QUERY_DEV_LIM_OUT_SIZE, &outdma); + if (!outbox) + return -ENOMEM; + + err = mthca_cmd_box(dev, 0, outdma, 0, 0, CMD_QUERY_DEV_LIM, + CMD_TIME_CLASS_A, status); + + if (err) + goto out; + + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SRQ_SZ_OFFSET); + dev_lim->max_srq_sz = 1 << field; + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_SZ_OFFSET); + dev_lim->max_qp_sz = 1 << field; + MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_QP_OFFSET); + dev_lim->reserved_qps = 1 << (field & 0xf); + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_OFFSET); + dev_lim->max_qps = 1 << (field & 0x1f); + MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_SRQ_OFFSET); + dev_lim->reserved_srqs = 1 << (field >> 4); + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SRQ_OFFSET); + dev_lim->max_srqs = 1 << (field & 0x1f); + MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_EEC_OFFSET); + dev_lim->reserved_eecs = 1 << (field & 0xf); + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_EEC_OFFSET); + dev_lim->max_eecs = 1 << (field & 0x1f); + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_CQ_SZ_OFFSET); + dev_lim->max_cq_sz = 1 << field; + MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_CQ_OFFSET); + dev_lim->reserved_cqs = 1 << (field & 0xf); + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_CQ_OFFSET); + dev_lim->max_cqs = 1 << (field & 0x1f); + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_MPT_OFFSET); + dev_lim->max_mpts = 1 << (field & 0x3f); + MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_EQ_OFFSET); + dev_lim->reserved_eqs = 1 << (field & 0xf); + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_EQ_OFFSET); + dev_lim->max_eqs = 1 << (field & 0x7); + MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_MTT_OFFSET); + dev_lim->reserved_mtts = 1 << (field >> 4); + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_MRW_SZ_OFFSET); + dev_lim->max_mrw_sz = 1 << field; + MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_MRW_OFFSET); + dev_lim->reserved_mrws = 1 << (field & 0xf); + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_MTT_SEG_OFFSET); + dev_lim->max_mtt_seg = 1 << (field & 0x3f); + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_REQ_QP_OFFSET); + dev_lim->max_requester_per_qp = 1 << (field & 0x3f); + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_RES_QP_OFFSET); + dev_lim->max_responder_per_qp = 1 << (field & 0x3f); + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_RDMA_OFFSET); + dev_lim->max_rdma_global = 1 << (field & 0x3f); + MTHCA_GET(field, outbox, QUERY_DEV_LIM_ACK_DELAY_OFFSET); + dev_lim->local_ca_ack_delay = field & 0x1f; + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MTU_WIDTH_OFFSET); + dev_lim->max_mtu = field >> 4; + dev_lim->max_port_width = field & 0xf; + MTHCA_GET(field, outbox, QUERY_DEV_LIM_VL_PORT_OFFSET); + dev_lim->max_vl = field >> 4; + dev_lim->num_ports = field & 0xf; + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_GID_OFFSET); + dev_lim->max_gids = 1 << (field & 0xf); + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_PKEY_OFFSET); + dev_lim->max_pkeys = 1 << (field & 0xf); + MTHCA_GET(dev_lim->flags, outbox, QUERY_DEV_LIM_FLAGS_OFFSET); + MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_UAR_OFFSET); + dev_lim->reserved_uars = field >> 4; + MTHCA_GET(field, outbox, QUERY_DEV_LIM_UAR_SZ_OFFSET); + dev_lim->uar_size = 1 << ((field & 0x3f) + 20); + MTHCA_GET(field, outbox, QUERY_DEV_LIM_PAGE_SZ_OFFSET); + dev_lim->min_page_sz = 1 << field; + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SG_OFFSET); + dev_lim->max_sg = field; + + MTHCA_GET(size, outbox, QUERY_DEV_LIM_MAX_DESC_SZ_OFFSET); + dev_lim->max_desc_sz = size; + + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_MCG_OFFSET); + dev_lim->max_qp_per_mcg = 1 << field; + MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_MCG_OFFSET); + dev_lim->reserved_mgms = field & 0xf; + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_MCG_OFFSET); + dev_lim->max_mcgs = 1 << field; + MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_PD_OFFSET); + dev_lim->reserved_pds = field >> 4; + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_PD_OFFSET); + dev_lim->max_pds = 1 << (field & 0x3f); + MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_RDD_OFFSET); + dev_lim->reserved_rdds = field >> 4; + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_RDD_OFFSET); + dev_lim->max_rdds = 1 << (field & 0x3f); + + MTHCA_GET(size, outbox, QUERY_DEV_LIM_EEC_ENTRY_SZ_OFFSET); + dev_lim->eec_entry_sz = size; + MTHCA_GET(size, outbox, QUERY_DEV_LIM_QPC_ENTRY_SZ_OFFSET); + dev_lim->qpc_entry_sz = size; + MTHCA_GET(size, outbox, QUERY_DEV_LIM_EEEC_ENTRY_SZ_OFFSET); + dev_lim->eeec_entry_sz = size; + MTHCA_GET(size, outbox, QUERY_DEV_LIM_EQPC_ENTRY_SZ_OFFSET); + dev_lim->eqpc_entry_sz = size; + MTHCA_GET(size, outbox, QUERY_DEV_LIM_EQC_ENTRY_SZ_OFFSET); + dev_lim->eqc_entry_sz = size; + MTHCA_GET(size, outbox, QUERY_DEV_LIM_CQC_ENTRY_SZ_OFFSET); + dev_lim->cqc_entry_sz = size; + MTHCA_GET(size, outbox, QUERY_DEV_LIM_SRQ_ENTRY_SZ_OFFSET); + dev_lim->srq_entry_sz = size; + MTHCA_GET(size, outbox, QUERY_DEV_LIM_UAR_ENTRY_SZ_OFFSET); + dev_lim->uar_scratch_entry_sz = size; + + mthca_dbg(dev, "Max QPs: %d, reserved QPs: %d, entry size: %d\n", + dev_lim->max_qps, dev_lim->reserved_qps, dev_lim->qpc_entry_sz); + mthca_dbg(dev, "Max CQs: %d, reserved CQs: %d, entry size: %d\n", + dev_lim->max_cqs, dev_lim->reserved_cqs, dev_lim->cqc_entry_sz); + mthca_dbg(dev, "Max EQs: %d, reserved EQs: %d, entry size: %d\n", + dev_lim->max_eqs, dev_lim->reserved_eqs, dev_lim->eqc_entry_sz); + mthca_dbg(dev, "reserved MPTs: %d, reserved MTTs: %d\n", + dev_lim->reserved_mrws, dev_lim->reserved_mtts); + mthca_dbg(dev, "Max PDs: %d, reserved PDs: %d, reserved UARs: %d\n", + dev_lim->max_pds, dev_lim->reserved_pds, dev_lim->reserved_uars); + mthca_dbg(dev, "Max QP/MCG: %d, reserved MGMs: %d\n", + dev_lim->max_pds, dev_lim->reserved_mgms); + + mthca_dbg(dev, "Flags: %08x\n", dev_lim->flags); + + if (dev->hca_type == ARBEL_NATIVE) { + MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSZ_SRQ_OFFSET); + dev_lim->hca.arbel.resize_srq = field & 1; + MTHCA_GET(size, outbox, QUERY_DEV_LIM_MTT_ENTRY_SZ_OFFSET); + dev_lim->hca.arbel.mtt_entry_sz = size; + MTHCA_GET(size, outbox, QUERY_DEV_LIM_MPT_ENTRY_SZ_OFFSET); + dev_lim->hca.arbel.mpt_entry_sz = size; + MTHCA_GET(field, outbox, QUERY_DEV_LIM_PBL_SZ_OFFSET); + dev_lim->hca.arbel.max_pbl_sz = 1 << (field & 0x3f); + MTHCA_GET(dev_lim->hca.arbel.bmme_flags, outbox, + QUERY_DEV_LIM_BMME_FLAGS_OFFSET); + MTHCA_GET(dev_lim->hca.arbel.reserved_lkey, outbox, + QUERY_DEV_LIM_RSVD_LKEY_OFFSET); + MTHCA_GET(field, outbox, QUERY_DEV_LIM_LAMR_OFFSET); + dev_lim->hca.arbel.lam_required = field & 1; + MTHCA_GET(dev_lim->hca.arbel.max_icm_sz, outbox, + QUERY_DEV_LIM_MAX_ICM_SZ_OFFSET); + + if (dev_lim->hca.arbel.bmme_flags & 1) + mthca_dbg(dev, "Base MM extensions: yes " + "(flags %d, max PBL %d, rsvd L_Key %08x)\n", + dev_lim->hca.arbel.bmme_flags, + dev_lim->hca.arbel.max_pbl_sz, + dev_lim->hca.arbel.reserved_lkey); + else + mthca_dbg(dev, "Base MM extensions: no\n"); + + mthca_dbg(dev, "Max ICM size %lld MB\n", + (unsigned long long) dev_lim->hca.arbel.max_icm_sz >> 20); + } else { + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_AV_OFFSET); + dev_lim->hca.tavor.max_avs = 1 << (field & 0x3f); + } + +out: + pci_free_consistent(dev->pdev, QUERY_DEV_LIM_OUT_SIZE, outbox, outdma); + return err; +} + +int mthca_QUERY_ADAPTER(struct mthca_dev *dev, + struct mthca_adapter *adapter, u8 *status) +{ + u32 *outbox; + dma_addr_t outdma; + int err; + +#define QUERY_ADAPTER_OUT_SIZE 0x100 +#define QUERY_ADAPTER_VENDOR_ID_OFFSET 0x00 +#define QUERY_ADAPTER_DEVICE_ID_OFFSET 0x04 +#define QUERY_ADAPTER_REVISION_ID_OFFSET 0x08 +#define QUERY_ADAPTER_INTA_PIN_OFFSET 0x10 + + outbox = pci_alloc_consistent(dev->pdev, QUERY_ADAPTER_OUT_SIZE, &outdma); + if (!outbox) + return -ENOMEM; + + err = mthca_cmd_box(dev, 0, outdma, 0, 0, CMD_QUERY_ADAPTER, + CMD_TIME_CLASS_A, status); + + if (err) + goto out; + + MTHCA_GET(adapter->vendor_id, outbox, QUERY_ADAPTER_VENDOR_ID_OFFSET); + MTHCA_GET(adapter->device_id, outbox, QUERY_ADAPTER_DEVICE_ID_OFFSET); + MTHCA_GET(adapter->revision_id, outbox, QUERY_ADAPTER_REVISION_ID_OFFSET); + MTHCA_GET(adapter->inta_pin, outbox, QUERY_ADAPTER_INTA_PIN_OFFSET); + +out: + pci_free_consistent(dev->pdev, QUERY_DEV_LIM_OUT_SIZE, outbox, outdma); + return err; +} + +int mthca_INIT_HCA(struct mthca_dev *dev, + struct mthca_init_hca_param *param, + u8 *status) +{ + u32 *inbox; + dma_addr_t indma; + int err; + +#define INIT_HCA_IN_SIZE 0x200 +#define INIT_HCA_FLAGS_OFFSET 0x014 +#define INIT_HCA_QPC_OFFSET 0x020 +#define INIT_HCA_QPC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x10) +#define INIT_HCA_LOG_QP_OFFSET (INIT_HCA_QPC_OFFSET + 0x17) +#define INIT_HCA_EEC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x20) +#define INIT_HCA_LOG_EEC_OFFSET (INIT_HCA_QPC_OFFSET + 0x27) +#define INIT_HCA_SRQC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x28) +#define INIT_HCA_LOG_SRQ_OFFSET (INIT_HCA_QPC_OFFSET + 0x2f) +#define INIT_HCA_CQC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x30) +#define INIT_HCA_LOG_CQ_OFFSET (INIT_HCA_QPC_OFFSET + 0x37) +#define INIT_HCA_EQPC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x40) +#define INIT_HCA_EEEC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x50) +#define INIT_HCA_EQC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x60) +#define INIT_HCA_LOG_EQ_OFFSET (INIT_HCA_QPC_OFFSET + 0x67) +#define INIT_HCA_RDB_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x70) +#define INIT_HCA_UDAV_OFFSET 0x0b0 +#define INIT_HCA_UDAV_LKEY_OFFSET (INIT_HCA_UDAV_OFFSET + 0x0) +#define INIT_HCA_UDAV_PD_OFFSET (INIT_HCA_UDAV_OFFSET + 0x4) +#define INIT_HCA_MCAST_OFFSET 0x0c0 +#define INIT_HCA_MC_BASE_OFFSET (INIT_HCA_MCAST_OFFSET + 0x00) +#define INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x12) +#define INIT_HCA_MC_HASH_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x16) +#define INIT_HCA_LOG_MC_TABLE_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x1b) +#define INIT_HCA_TPT_OFFSET 0x0f0 +#define INIT_HCA_MPT_BASE_OFFSET (INIT_HCA_TPT_OFFSET + 0x00) +#define INIT_HCA_MTT_SEG_SZ_OFFSET (INIT_HCA_TPT_OFFSET + 0x09) +#define INIT_HCA_LOG_MPT_SZ_OFFSET (INIT_HCA_TPT_OFFSET + 0x0b) +#define INIT_HCA_MTT_BASE_OFFSET (INIT_HCA_TPT_OFFSET + 0x10) +#define INIT_HCA_UAR_OFFSET 0x120 +#define INIT_HCA_UAR_BASE_OFFSET (INIT_HCA_UAR_OFFSET + 0x00) +#define INIT_HCA_UAR_PAGE_SZ_OFFSET (INIT_HCA_UAR_OFFSET + 0x0b) +#define INIT_HCA_UAR_SCATCH_BASE_OFFSET (INIT_HCA_UAR_OFFSET + 0x10) + + inbox = pci_alloc_consistent(dev->pdev, INIT_HCA_IN_SIZE, &indma); + if (!inbox) + return -ENOMEM; + + memset(inbox, 0, INIT_HCA_IN_SIZE); + +#if defined(__LITTLE_ENDIAN) + *(inbox + INIT_HCA_FLAGS_OFFSET / 4) &= ~cpu_to_be32(1 << 1); +#elif defined(__BIG_ENDIAN) + *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 1); +#else +#error Host endianness not defined +#endif + /* Check port for UD address vector: */ + *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1); + + /* We leave wqe_quota, responder_exu, etc as 0 (default) */ + + /* QPC/EEC/CQC/EQC/RDB attributes */ + + MTHCA_PUT(inbox, param->qpc_base, INIT_HCA_QPC_BASE_OFFSET); + MTHCA_PUT(inbox, param->log_num_qps, INIT_HCA_LOG_QP_OFFSET); + MTHCA_PUT(inbox, param->eec_base, INIT_HCA_EEC_BASE_OFFSET); + MTHCA_PUT(inbox, param->log_num_eecs, INIT_HCA_LOG_EEC_OFFSET); + MTHCA_PUT(inbox, param->srqc_base, INIT_HCA_SRQC_BASE_OFFSET); + MTHCA_PUT(inbox, param->log_num_srqs, INIT_HCA_LOG_SRQ_OFFSET); + MTHCA_PUT(inbox, param->cqc_base, INIT_HCA_CQC_BASE_OFFSET); + MTHCA_PUT(inbox, param->log_num_cqs, INIT_HCA_LOG_CQ_OFFSET); + MTHCA_PUT(inbox, param->eqpc_base, INIT_HCA_EQPC_BASE_OFFSET); + MTHCA_PUT(inbox, param->eeec_base, INIT_HCA_EEEC_BASE_OFFSET); + MTHCA_PUT(inbox, param->eqc_base, INIT_HCA_EQC_BASE_OFFSET); + MTHCA_PUT(inbox, param->log_num_eqs, INIT_HCA_LOG_EQ_OFFSET); + MTHCA_PUT(inbox, param->rdb_base, INIT_HCA_RDB_BASE_OFFSET); + + /* UD AV attributes */ + + /* multicast attributes */ + + MTHCA_PUT(inbox, param->mc_base, INIT_HCA_MC_BASE_OFFSET); + MTHCA_PUT(inbox, param->log_mc_entry_sz, INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET); + MTHCA_PUT(inbox, param->mc_hash_sz, INIT_HCA_MC_HASH_SZ_OFFSET); + MTHCA_PUT(inbox, param->log_mc_table_sz, INIT_HCA_LOG_MC_TABLE_SZ_OFFSET); + + /* TPT attributes */ + + MTHCA_PUT(inbox, param->mpt_base, INIT_HCA_MPT_BASE_OFFSET); + MTHCA_PUT(inbox, param->mtt_seg_sz, INIT_HCA_MTT_SEG_SZ_OFFSET); + MTHCA_PUT(inbox, param->log_mpt_sz, INIT_HCA_LOG_MPT_SZ_OFFSET); + MTHCA_PUT(inbox, param->mtt_base, INIT_HCA_MTT_BASE_OFFSET); + + /* UAR attributes */ + { + u8 uar_page_sz = PAGE_SHIFT - 12; + MTHCA_PUT(inbox, uar_page_sz, INIT_HCA_UAR_PAGE_SZ_OFFSET); + MTHCA_PUT(inbox, param->uar_scratch_base, INIT_HCA_UAR_SCATCH_BASE_OFFSET); + } + + err = mthca_cmd(dev, indma, 0, 0, CMD_INIT_HCA, + HZ, status); + + pci_free_consistent(dev->pdev, INIT_HCA_IN_SIZE, inbox, indma); + return err; +} + +int mthca_INIT_IB(struct mthca_dev *dev, + struct mthca_init_ib_param *param, + int port, u8 *status) +{ + u32 *inbox; + dma_addr_t indma; + int err; + u32 flags; + +#define INIT_IB_IN_SIZE 56 +#define INIT_IB_FLAGS_OFFSET 0x00 +#define INIT_IB_FLAG_SIG (1 << 18) +#define INIT_IB_FLAG_NG (1 << 17) +#define INIT_IB_FLAG_G0 (1 << 16) +#define INIT_IB_FLAG_1X (1 << 8) +#define INIT_IB_FLAG_4X (1 << 9) +#define INIT_IB_FLAG_12X (1 << 11) +#define INIT_IB_VL_SHIFT 4 +#define INIT_IB_MTU_SHIFT 12 +#define INIT_IB_MAX_GID_OFFSET 0x06 +#define INIT_IB_MAX_PKEY_OFFSET 0x0a +#define INIT_IB_GUID0_OFFSET 0x10 +#define INIT_IB_NODE_GUID_OFFSET 0x18 +#define INIT_IB_SI_GUID_OFFSET 0x20 + + inbox = pci_alloc_consistent(dev->pdev, INIT_IB_IN_SIZE, &indma); + if (!inbox) + return -ENOMEM; + + memset(inbox, 0, INIT_IB_IN_SIZE); + + flags = 0; + flags |= param->enable_1x ? INIT_IB_FLAG_1X : 0; + flags |= param->enable_4x ? INIT_IB_FLAG_4X : 0; + flags |= param->set_guid0 ? INIT_IB_FLAG_G0 : 0; + flags |= param->set_node_guid ? INIT_IB_FLAG_NG : 0; + flags |= param->set_si_guid ? INIT_IB_FLAG_SIG : 0; + flags |= param->vl_cap << INIT_IB_VL_SHIFT; + flags |= param->mtu_cap << INIT_IB_MTU_SHIFT; + MTHCA_PUT(inbox, flags, INIT_IB_FLAGS_OFFSET); + + MTHCA_PUT(inbox, param->gid_cap, INIT_IB_MAX_GID_OFFSET); + MTHCA_PUT(inbox, param->pkey_cap, INIT_IB_MAX_PKEY_OFFSET); + MTHCA_PUT(inbox, param->guid0, INIT_IB_GUID0_OFFSET); + MTHCA_PUT(inbox, param->node_guid, INIT_IB_NODE_GUID_OFFSET); + MTHCA_PUT(inbox, param->si_guid, INIT_IB_SI_GUID_OFFSET); + + err = mthca_cmd(dev, indma, port, 0, CMD_INIT_IB, + CMD_TIME_CLASS_A, status); + + pci_free_consistent(dev->pdev, INIT_HCA_IN_SIZE, inbox, indma); + return err; +} + +int mthca_CLOSE_IB(struct mthca_dev *dev, int port, u8 *status) +{ + return mthca_cmd(dev, 0, port, 0, CMD_CLOSE_IB, HZ, status); +} + +int mthca_CLOSE_HCA(struct mthca_dev *dev, int panic, u8 *status) +{ + return mthca_cmd(dev, 0, 0, panic, CMD_CLOSE_HCA, HZ, status); +} + +int mthca_SW2HW_MPT(struct mthca_dev *dev, void *mpt_entry, + int mpt_index, u8 *status) +{ + dma_addr_t indma; + int err; + + indma = pci_map_single(dev->pdev, mpt_entry, + MTHCA_MPT_ENTRY_SIZE, + PCI_DMA_TODEVICE); + if (pci_dma_mapping_error(indma)) + return -ENOMEM; + + err = mthca_cmd(dev, indma, mpt_index, 0, CMD_SW2HW_MPT, + CMD_TIME_CLASS_B, status); + + pci_unmap_single(dev->pdev, indma, + MTHCA_MPT_ENTRY_SIZE, PCI_DMA_TODEVICE); + return err; +} + +int mthca_HW2SW_MPT(struct mthca_dev *dev, void *mpt_entry, + int mpt_index, u8 *status) +{ + dma_addr_t outdma = 0; + int err; + + if (mpt_entry) { + outdma = pci_map_single(dev->pdev, mpt_entry, + MTHCA_MPT_ENTRY_SIZE, + PCI_DMA_FROMDEVICE); + if (pci_dma_mapping_error(outdma)) + return -ENOMEM; + } + + err = mthca_cmd_box(dev, 0, outdma, mpt_index, !mpt_entry, + CMD_HW2SW_MPT, + CMD_TIME_CLASS_B, status); + + if (mpt_entry) + pci_unmap_single(dev->pdev, outdma, + MTHCA_MPT_ENTRY_SIZE, + PCI_DMA_FROMDEVICE); + return err; +} + +int mthca_WRITE_MTT(struct mthca_dev *dev, u64 *mtt_entry, + int num_mtt, u8 *status) +{ + dma_addr_t indma; + int err; + + indma = pci_map_single(dev->pdev, mtt_entry, + (num_mtt + 2) * 8, + PCI_DMA_TODEVICE); + if (pci_dma_mapping_error(indma)) + return -ENOMEM; + + err = mthca_cmd(dev, indma, num_mtt, 0, CMD_WRITE_MTT, + CMD_TIME_CLASS_B, status); + + pci_unmap_single(dev->pdev, indma, + (num_mtt + 2) * 8, PCI_DMA_TODEVICE); + return err; +} + +int mthca_MAP_EQ(struct mthca_dev *dev, u64 event_mask, int unmap, + int eq_num, u8 *status) +{ + mthca_dbg(dev, "%s mask %016llx for eqn %d\n", + unmap ? "Clearing" : "Setting", + (unsigned long long) event_mask, eq_num); + return mthca_cmd(dev, event_mask, (unmap << 31) | eq_num, + 0, CMD_MAP_EQ, CMD_TIME_CLASS_B, status); +} + +int mthca_SW2HW_EQ(struct mthca_dev *dev, void *eq_context, + int eq_num, u8 *status) +{ + dma_addr_t indma; + int err; + + indma = pci_map_single(dev->pdev, eq_context, + MTHCA_EQ_CONTEXT_SIZE, + PCI_DMA_TODEVICE); + if (pci_dma_mapping_error(indma)) + return -ENOMEM; + + err = mthca_cmd(dev, indma, eq_num, 0, CMD_SW2HW_EQ, + CMD_TIME_CLASS_A, status); + + pci_unmap_single(dev->pdev, indma, + MTHCA_EQ_CONTEXT_SIZE, PCI_DMA_TODEVICE); + return err; +} + +int mthca_HW2SW_EQ(struct mthca_dev *dev, void *eq_context, + int eq_num, u8 *status) +{ + dma_addr_t outdma = 0; + int err; + + outdma = pci_map_single(dev->pdev, eq_context, + MTHCA_EQ_CONTEXT_SIZE, + PCI_DMA_FROMDEVICE); + if (pci_dma_mapping_error(outdma)) + return -ENOMEM; + + err = mthca_cmd_box(dev, 0, outdma, eq_num, 0, + CMD_HW2SW_EQ, + CMD_TIME_CLASS_A, status); + + pci_unmap_single(dev->pdev, outdma, + MTHCA_EQ_CONTEXT_SIZE, + PCI_DMA_FROMDEVICE); + return err; +} + +int mthca_SW2HW_CQ(struct mthca_dev *dev, void *cq_context, + int cq_num, u8 *status) +{ + dma_addr_t indma; + int err; + + indma = pci_map_single(dev->pdev, cq_context, + MTHCA_CQ_CONTEXT_SIZE, + PCI_DMA_TODEVICE); + if (pci_dma_mapping_error(indma)) + return -ENOMEM; + + err = mthca_cmd(dev, indma, cq_num, 0, CMD_SW2HW_CQ, + CMD_TIME_CLASS_A, status); + + pci_unmap_single(dev->pdev, indma, + MTHCA_CQ_CONTEXT_SIZE, PCI_DMA_TODEVICE); + return err; +} + +int mthca_HW2SW_CQ(struct mthca_dev *dev, void *cq_context, + int cq_num, u8 *status) +{ + dma_addr_t outdma = 0; + int err; + + outdma = pci_map_single(dev->pdev, cq_context, + MTHCA_CQ_CONTEXT_SIZE, + PCI_DMA_FROMDEVICE); + if (pci_dma_mapping_error(outdma)) + return -ENOMEM; + + err = mthca_cmd_box(dev, 0, outdma, cq_num, 0, + CMD_HW2SW_CQ, + CMD_TIME_CLASS_A, status); + + pci_unmap_single(dev->pdev, outdma, + MTHCA_CQ_CONTEXT_SIZE, + PCI_DMA_FROMDEVICE); + return err; +} + +int mthca_MODIFY_QP(struct mthca_dev *dev, int trans, u32 num, + int is_ee, void *qp_context, u32 optmask, + u8 *status) +{ + static const u16 op[] = { + [MTHCA_TRANS_RST2INIT] = CMD_RST2INIT_QPEE, + [MTHCA_TRANS_INIT2INIT] = CMD_INIT2INIT_QPEE, + [MTHCA_TRANS_INIT2RTR] = CMD_INIT2RTR_QPEE, + [MTHCA_TRANS_RTR2RTS] = CMD_RTR2RTS_QPEE, + [MTHCA_TRANS_RTS2RTS] = CMD_RTS2RTS_QPEE, + [MTHCA_TRANS_SQERR2RTS] = CMD_SQERR2RTS_QPEE, + [MTHCA_TRANS_ANY2ERR] = CMD_2ERR_QPEE, + [MTHCA_TRANS_RTS2SQD] = CMD_RTS2SQD_QPEE, + [MTHCA_TRANS_SQD2SQD] = CMD_SQD2SQD_QPEE, + [MTHCA_TRANS_SQD2RTS] = CMD_SQD2RTS_QPEE, + [MTHCA_TRANS_ANY2RST] = CMD_ERR2RST_QPEE + }; + u8 op_mod = 0; + + dma_addr_t indma; + int err; + + if (trans < 0 || trans >= ARRAY_SIZE(op)) + return -EINVAL; + + if (trans == MTHCA_TRANS_ANY2RST) { + indma = 0; + op_mod = 3; /* don't write outbox, any->reset */ + + /* For debugging */ + qp_context = pci_alloc_consistent(dev->pdev, MTHCA_QP_CONTEXT_SIZE, + &indma); + op_mod = 2; /* write outbox, any->reset */ + } else { + indma = pci_map_single(dev->pdev, qp_context, + MTHCA_QP_CONTEXT_SIZE, + PCI_DMA_TODEVICE); + if (pci_dma_mapping_error(indma)) + return -ENOMEM; + + if (0) { + int i; + mthca_dbg(dev, "Dumping QP context:\n"); + printk(" %08x\n", be32_to_cpup(qp_context)); + for (i = 0; i < 0x100 / 4; ++i) { + if (i % 8 == 0) + printk("[%02x] ", i * 4); + printk(" %08x", be32_to_cpu(((u32 *) qp_context)[i + 2])); + if ((i + 1) % 8 == 0) + printk("\n"); + } + } + } + + if (trans == MTHCA_TRANS_ANY2RST) { + err = mthca_cmd_box(dev, 0, indma, (!!is_ee << 24) | num, + op_mod, op[trans], CMD_TIME_CLASS_C, status); + + if (0) { + int i; + mthca_dbg(dev, "Dumping QP context:\n"); + printk(" %08x\n", be32_to_cpup(qp_context)); + for (i = 0; i < 0x100 / 4; ++i) { + if (i % 8 == 0) + printk("[%02x] ", i * 4); + printk(" %08x", be32_to_cpu(((u32 *) qp_context)[i + 2])); + if ((i + 1) % 8 == 0) + printk("\n"); + } + } + + } else + err = mthca_cmd(dev, indma, (!!is_ee << 24) | num, + op_mod, op[trans], CMD_TIME_CLASS_C, status); + + if (trans != MTHCA_TRANS_ANY2RST) + pci_unmap_single(dev->pdev, indma, + MTHCA_QP_CONTEXT_SIZE, PCI_DMA_TODEVICE); + else + pci_free_consistent(dev->pdev, MTHCA_QP_CONTEXT_SIZE, + qp_context, indma); + return err; +} + +int mthca_QUERY_QP(struct mthca_dev *dev, u32 num, int is_ee, + void *qp_context, u8 *status) +{ + dma_addr_t outdma = 0; + int err; + + outdma = pci_map_single(dev->pdev, qp_context, + MTHCA_QP_CONTEXT_SIZE, + PCI_DMA_FROMDEVICE); + if (pci_dma_mapping_error(outdma)) + return -ENOMEM; + + err = mthca_cmd_box(dev, 0, outdma, (!!is_ee << 24) | num, 0, + CMD_QUERY_QPEE, + CMD_TIME_CLASS_A, status); + + pci_unmap_single(dev->pdev, outdma, + MTHCA_QP_CONTEXT_SIZE, + PCI_DMA_FROMDEVICE); + return err; +} + +int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn, + u8 *status) +{ + u8 op_mod; + + switch (type) { + case IB_QPT_SMI: + op_mod = 0; + break; + case IB_QPT_GSI: + op_mod = 1; + break; + case IB_QPT_RAW_IPV6: + op_mod = 2; + break; + case IB_QPT_RAW_ETY: + op_mod = 3; + break; + default: + return -EINVAL; + } + + return mthca_cmd(dev, 0, qpn, op_mod, CMD_CONF_SPECIAL_QP, + CMD_TIME_CLASS_B, status); +} + +int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int port, + void *in_mad, void *response_mad, u8 *status) { + void *box; + dma_addr_t dma; + int err; + +#define MAD_IFC_BOX_SIZE 512 + + box = pci_alloc_consistent(dev->pdev, MAD_IFC_BOX_SIZE, &dma); + if (!box) + return -ENOMEM; + + memcpy(box, in_mad, 256); + + err = mthca_cmd_box(dev, dma, dma + 256, port, !!ignore_mkey, + CMD_MAD_IFC, CMD_TIME_CLASS_C, status); + + if (!err && !*status) + memcpy(response_mad, box + 256, 256); + + pci_free_consistent(dev->pdev, MAD_IFC_BOX_SIZE, box, dma); + return err; +} + +int mthca_READ_MGM(struct mthca_dev *dev, int index, void *mgm, + u8 *status) +{ + dma_addr_t outdma = 0; + int err; + + outdma = pci_map_single(dev->pdev, mgm, + MTHCA_MGM_ENTRY_SIZE, + PCI_DMA_FROMDEVICE); + if (pci_dma_mapping_error(outdma)) + return -ENOMEM; + + err = mthca_cmd_box(dev, 0, outdma, index, 0, + CMD_READ_MGM, + CMD_TIME_CLASS_A, status); + + pci_unmap_single(dev->pdev, outdma, + MTHCA_MGM_ENTRY_SIZE, + PCI_DMA_FROMDEVICE); + return err; +} + +int mthca_WRITE_MGM(struct mthca_dev *dev, int index, void *mgm, + u8 *status) +{ + dma_addr_t indma; + int err; + + indma = pci_map_single(dev->pdev, mgm, + MTHCA_MGM_ENTRY_SIZE, + PCI_DMA_TODEVICE); + if (pci_dma_mapping_error(indma)) + return -ENOMEM; + + err = mthca_cmd(dev, indma, index, 0, CMD_WRITE_MGM, + CMD_TIME_CLASS_A, status); + + pci_unmap_single(dev->pdev, indma, + MTHCA_MGM_ENTRY_SIZE, PCI_DMA_TODEVICE); + return err; +} + +int mthca_MGID_HASH(struct mthca_dev *dev, void *gid, u16 *hash, + u8 *status) +{ + dma_addr_t indma; + u64 imm; + int err; + + indma = pci_map_single(dev->pdev, gid, 16, PCI_DMA_TODEVICE); + if (pci_dma_mapping_error(indma)) + return -ENOMEM; + + err = mthca_cmd_imm(dev, indma, &imm, 0, 0, CMD_MGID_HASH, + CMD_TIME_CLASS_A, status); + *hash = imm; + + pci_unmap_single(dev->pdev, indma, 16, PCI_DMA_TODEVICE); + return err; +} diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.h b/drivers/infiniband/hw/mthca/mthca_cmd.h new file mode 100644 index 000000000000..9373b83cfe57 --- /dev/null +++ b/drivers/infiniband/hw/mthca/mthca_cmd.h @@ -0,0 +1,276 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: mthca_cmd.h 1349 2004-12-16 21:09:43Z roland $ + */ + +#ifndef MTHCA_CMD_H +#define MTHCA_CMD_H + +#include <ib_verbs.h> + +#define MTHCA_CMD_MAILBOX_ALIGN 16UL +#define MTHCA_CMD_MAILBOX_EXTRA (MTHCA_CMD_MAILBOX_ALIGN - 1) + +enum { + /* command completed successfully: */ + MTHCA_CMD_STAT_OK = 0x00, + /* Internal error (such as a bus error) occurred while processing command: */ + MTHCA_CMD_STAT_INTERNAL_ERR = 0x01, + /* Operation/command not supported or opcode modifier not supported: */ + MTHCA_CMD_STAT_BAD_OP = 0x02, + /* Parameter not supported or parameter out of range: */ + MTHCA_CMD_STAT_BAD_PARAM = 0x03, + /* System not enabled or bad system state: */ + MTHCA_CMD_STAT_BAD_SYS_STATE = 0x04, + /* Attempt to access reserved or unallocaterd resource: */ + MTHCA_CMD_STAT_BAD_RESOURCE = 0x05, + /* Requested resource is currently executing a command, or is otherwise busy: */ + MTHCA_CMD_STAT_RESOURCE_BUSY = 0x06, + /* memory error: */ + MTHCA_CMD_STAT_DDR_MEM_ERR = 0x07, + /* Required capability exceeds device limits: */ + MTHCA_CMD_STAT_EXCEED_LIM = 0x08, + /* Resource is not in the appropriate state or ownership: */ + MTHCA_CMD_STAT_BAD_RES_STATE = 0x09, + /* Index out of range: */ + MTHCA_CMD_STAT_BAD_INDEX = 0x0a, + /* FW image corrupted: */ + MTHCA_CMD_STAT_BAD_NVMEM = 0x0b, + /* Attempt to modify a QP/EE which is not in the presumed state: */ + MTHCA_CMD_STAT_BAD_QPEE_STATE = 0x10, + /* Bad segment parameters (Address/Size): */ + MTHCA_CMD_STAT_BAD_SEG_PARAM = 0x20, + /* Memory Region has Memory Windows bound to: */ + MTHCA_CMD_STAT_REG_BOUND = 0x21, + /* HCA local attached memory not present: */ + MTHCA_CMD_STAT_LAM_NOT_PRE = 0x22, + /* Bad management packet (silently discarded): */ + MTHCA_CMD_STAT_BAD_PKT = 0x30, + /* More outstanding CQEs in CQ than new CQ size: */ + MTHCA_CMD_STAT_BAD_SIZE = 0x40 +}; + +enum { + MTHCA_TRANS_INVALID = 0, + MTHCA_TRANS_RST2INIT, + MTHCA_TRANS_INIT2INIT, + MTHCA_TRANS_INIT2RTR, + MTHCA_TRANS_RTR2RTS, + MTHCA_TRANS_RTS2RTS, + MTHCA_TRANS_SQERR2RTS, + MTHCA_TRANS_ANY2ERR, + MTHCA_TRANS_RTS2SQD, + MTHCA_TRANS_SQD2SQD, + MTHCA_TRANS_SQD2RTS, + MTHCA_TRANS_ANY2RST, +}; + +enum { + DEV_LIM_FLAG_SRQ = 1 << 6 +}; + +struct mthca_dev_lim { + int max_srq_sz; + int max_qp_sz; + int reserved_qps; + int max_qps; + int reserved_srqs; + int max_srqs; + int reserved_eecs; + int max_eecs; + int max_cq_sz; + int reserved_cqs; + int max_cqs; + int max_mpts; + int reserved_eqs; + int max_eqs; + int reserved_mtts; + int max_mrw_sz; + int reserved_mrws; + int max_mtt_seg; + int max_requester_per_qp; + int max_responder_per_qp; + int max_rdma_global; + int local_ca_ack_delay; + int max_mtu; + int max_port_width; + int max_vl; + int num_ports; + int max_gids; + int max_pkeys; + u32 flags; + int reserved_uars; + int uar_size; + int min_page_sz; + int max_sg; + int max_desc_sz; + int max_qp_per_mcg; + int reserved_mgms; + int max_mcgs; + int reserved_pds; + int max_pds; + int reserved_rdds; + int max_rdds; + int eec_entry_sz; + int qpc_entry_sz; + int eeec_entry_sz; + int eqpc_entry_sz; + int eqc_entry_sz; + int cqc_entry_sz; + int srq_entry_sz; + int uar_scratch_entry_sz; + union { + struct { + int max_avs; + } tavor; + struct { + int resize_srq; + int mtt_entry_sz; + int mpt_entry_sz; + int max_pbl_sz; + u8 bmme_flags; + u32 reserved_lkey; + int lam_required; + u64 max_icm_sz; + } arbel; + } hca; +}; + +struct mthca_adapter { + u32 vendor_id; + u32 device_id; + u32 revision_id; + u8 inta_pin; +}; + +struct mthca_init_hca_param { + u64 qpc_base; + u8 log_num_qps; + u64 eec_base; + u8 log_num_eecs; + u64 srqc_base; + u8 log_num_srqs; + u64 cqc_base; + u8 log_num_cqs; + u64 eqpc_base; + u64 eeec_base; + u64 eqc_base; + u8 log_num_eqs; + u64 rdb_base; + u64 mc_base; + u16 log_mc_entry_sz; + u16 mc_hash_sz; + u8 log_mc_table_sz; + u64 mpt_base; + u8 mtt_seg_sz; + u8 log_mpt_sz; + u64 mtt_base; + u64 uar_scratch_base; +}; + +struct mthca_init_ib_param { + int enable_1x; + int enable_4x; + int vl_cap; + int mtu_cap; + u16 gid_cap; + u16 pkey_cap; + int set_guid0; + u64 guid0; + int set_node_guid; + u64 node_guid; + int set_si_guid; + u64 si_guid; +}; + +int mthca_cmd_use_events(struct mthca_dev *dev); +void mthca_cmd_use_polling(struct mthca_dev *dev); +void mthca_cmd_event(struct mthca_dev *dev, u16 token, + u8 status, u64 out_param); + +int mthca_SYS_EN(struct mthca_dev *dev, u8 *status); +int mthca_SYS_DIS(struct mthca_dev *dev, u8 *status); +int mthca_MAP_FA(struct mthca_dev *dev, int count, + struct scatterlist *sglist, u8 *status); +int mthca_UNMAP_FA(struct mthca_dev *dev, u8 *status); +int mthca_RUN_FW(struct mthca_dev *dev, u8 *status); +int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status); +int mthca_ENABLE_LAM(struct mthca_dev *dev, u8 *status); +int mthca_DISABLE_LAM(struct mthca_dev *dev, u8 *status); +int mthca_QUERY_DDR(struct mthca_dev *dev, u8 *status); +int mthca_QUERY_DEV_LIM(struct mthca_dev *dev, + struct mthca_dev_lim *dev_lim, u8 *status); +int mthca_QUERY_ADAPTER(struct mthca_dev *dev, + struct mthca_adapter *adapter, u8 *status); +int mthca_INIT_HCA(struct mthca_dev *dev, + struct mthca_init_hca_param *param, + u8 *status); +int mthca_INIT_IB(struct mthca_dev *dev, + struct mthca_init_ib_param *param, + int port, u8 *status); +int mthca_CLOSE_IB(struct mthca_dev *dev, int port, u8 *status); +int mthca_CLOSE_HCA(struct mthca_dev *dev, int panic, u8 *status); +int mthca_SW2HW_MPT(struct mthca_dev *dev, void *mpt_entry, + int mpt_index, u8 *status); +int mthca_HW2SW_MPT(struct mthca_dev *dev, void *mpt_entry, + int mpt_index, u8 *status); +int mthca_WRITE_MTT(struct mthca_dev *dev, u64 *mtt_entry, + int num_mtt, u8 *status); +int mthca_MAP_EQ(struct mthca_dev *dev, u64 event_mask, int unmap, + int eq_num, u8 *status); +int mthca_SW2HW_EQ(struct mthca_dev *dev, void *eq_context, + int eq_num, u8 *status); +int mthca_HW2SW_EQ(struct mthca_dev *dev, void *eq_context, + int eq_num, u8 *status); +int mthca_SW2HW_CQ(struct mthca_dev *dev, void *cq_context, + int cq_num, u8 *status); +int mthca_HW2SW_CQ(struct mthca_dev *dev, void *cq_context, + int cq_num, u8 *status); +int mthca_MODIFY_QP(struct mthca_dev *dev, int trans, u32 num, + int is_ee, void *qp_context, u32 optmask, + u8 *status); +int mthca_QUERY_QP(struct mthca_dev *dev, u32 num, int is_ee, + void *qp_context, u8 *status); +int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn, + u8 *status); +int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int port, + void *in_mad, void *response_mad, u8 *status); +int mthca_READ_MGM(struct mthca_dev *dev, int index, void *mgm, + u8 *status); +int mthca_WRITE_MGM(struct mthca_dev *dev, int index, void *mgm, + u8 *status); +int mthca_MGID_HASH(struct mthca_dev *dev, void *gid, u16 *hash, + u8 *status); + +#define MAILBOX_ALIGN(x) ((void *) ALIGN((unsigned long) (x), MTHCA_CMD_MAILBOX_ALIGN)) + +#endif /* MTHCA_CMD_H */ diff --git a/drivers/infiniband/hw/mthca/mthca_config_reg.h b/drivers/infiniband/hw/mthca/mthca_config_reg.h new file mode 100644 index 000000000000..07881969dfd1 --- /dev/null +++ b/drivers/infiniband/hw/mthca/mthca_config_reg.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: mthca_config_reg.h 1349 2004-12-16 21:09:43Z roland $ + */ + +#ifndef MTHCA_CONFIG_REG_H +#define MTHCA_CONFIG_REG_H + +#include <asm/page.h> + +#define MTHCA_HCR_BASE 0x80680 +#define MTHCA_HCR_SIZE 0x0001c +#define MTHCA_ECR_BASE 0x80700 +#define MTHCA_ECR_SIZE 0x00008 +#define MTHCA_ECR_CLR_BASE 0x80708 +#define MTHCA_ECR_CLR_SIZE 0x00008 +#define MTHCA_ECR_OFFSET (MTHCA_ECR_BASE - MTHCA_HCR_BASE) +#define MTHCA_ECR_CLR_OFFSET (MTHCA_ECR_CLR_BASE - MTHCA_HCR_BASE) +#define MTHCA_CLR_INT_BASE 0xf00d8 +#define MTHCA_CLR_INT_SIZE 0x00008 + +#define MTHCA_MAP_HCR_SIZE (MTHCA_ECR_CLR_BASE + \ + MTHCA_ECR_CLR_SIZE - \ + MTHCA_HCR_BASE) + +#endif /* MTHCA_CONFIG_REG_H */ diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c new file mode 100644 index 000000000000..02dcf5610f98 --- /dev/null +++ b/drivers/infiniband/hw/mthca/mthca_cq.c @@ -0,0 +1,836 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: mthca_cq.c 1369 2004-12-20 16:17:07Z roland $ + */ + +#include <linux/init.h> + +#include <ib_pack.h> + +#include "mthca_dev.h" +#include "mthca_cmd.h" + +enum { + MTHCA_MAX_DIRECT_CQ_SIZE = 4 * PAGE_SIZE +}; + +enum { + MTHCA_CQ_ENTRY_SIZE = 0x20 +}; + +/* + * Must be packed because start is 64 bits but only aligned to 32 bits. + */ +struct mthca_cq_context { + u32 flags; + u64 start; + u32 logsize_usrpage; + u32 error_eqn; + u32 comp_eqn; + u32 pd; + u32 lkey; + u32 last_notified_index; + u32 solicit_producer_index; + u32 consumer_index; + u32 producer_index; + u32 cqn; + u32 reserved[3]; +} __attribute__((packed)); + +#define MTHCA_CQ_STATUS_OK ( 0 << 28) +#define MTHCA_CQ_STATUS_OVERFLOW ( 9 << 28) +#define MTHCA_CQ_STATUS_WRITE_FAIL (10 << 28) +#define MTHCA_CQ_FLAG_TR ( 1 << 18) +#define MTHCA_CQ_FLAG_OI ( 1 << 17) +#define MTHCA_CQ_STATE_DISARMED ( 0 << 8) +#define MTHCA_CQ_STATE_ARMED ( 1 << 8) +#define MTHCA_CQ_STATE_ARMED_SOL ( 4 << 8) +#define MTHCA_EQ_STATE_FIRED (10 << 8) + +enum { + MTHCA_ERROR_CQE_OPCODE_MASK = 0xfe +}; + +enum { + SYNDROME_LOCAL_LENGTH_ERR = 0x01, + SYNDROME_LOCAL_QP_OP_ERR = 0x02, + SYNDROME_LOCAL_EEC_OP_ERR = 0x03, + SYNDROME_LOCAL_PROT_ERR = 0x04, + SYNDROME_WR_FLUSH_ERR = 0x05, + SYNDROME_MW_BIND_ERR = 0x06, + SYNDROME_BAD_RESP_ERR = 0x10, + SYNDROME_LOCAL_ACCESS_ERR = 0x11, + SYNDROME_REMOTE_INVAL_REQ_ERR = 0x12, + SYNDROME_REMOTE_ACCESS_ERR = 0x13, + SYNDROME_REMOTE_OP_ERR = 0x14, + SYNDROME_RETRY_EXC_ERR = 0x15, + SYNDROME_RNR_RETRY_EXC_ERR = 0x16, + SYNDROME_LOCAL_RDD_VIOL_ERR = 0x20, + SYNDROME_REMOTE_INVAL_RD_REQ_ERR = 0x21, + SYNDROME_REMOTE_ABORTED_ERR = 0x22, + SYNDROME_INVAL_EECN_ERR = 0x23, + SYNDROME_INVAL_EEC_STATE_ERR = 0x24 +}; + +struct mthca_cqe { + u32 my_qpn; + u32 my_ee; + u32 rqpn; + u16 sl_g_mlpath; + u16 rlid; + u32 imm_etype_pkey_eec; + u32 byte_cnt; + u32 wqe; + u8 opcode; + u8 is_send; + u8 reserved; + u8 owner; +}; + +struct mthca_err_cqe { + u32 my_qpn; + u32 reserved1[3]; + u8 syndrome; + u8 reserved2; + u16 db_cnt; + u32 reserved3; + u32 wqe; + u8 opcode; + u8 reserved4[2]; + u8 owner; +}; + +#define MTHCA_CQ_ENTRY_OWNER_SW (0 << 7) +#define MTHCA_CQ_ENTRY_OWNER_HW (1 << 7) + +#define MTHCA_CQ_DB_INC_CI (1 << 24) +#define MTHCA_CQ_DB_REQ_NOT (2 << 24) +#define MTHCA_CQ_DB_REQ_NOT_SOL (3 << 24) +#define MTHCA_CQ_DB_SET_CI (4 << 24) +#define MTHCA_CQ_DB_REQ_NOT_MULT (5 << 24) + +static inline struct mthca_cqe *get_cqe(struct mthca_cq *cq, int entry) +{ + if (cq->is_direct) + return cq->queue.direct.buf + (entry * MTHCA_CQ_ENTRY_SIZE); + else + return cq->queue.page_list[entry * MTHCA_CQ_ENTRY_SIZE / PAGE_SIZE].buf + + (entry * MTHCA_CQ_ENTRY_SIZE) % PAGE_SIZE; +} + +static inline int cqe_sw(struct mthca_cq *cq, int i) +{ + return !(MTHCA_CQ_ENTRY_OWNER_HW & + get_cqe(cq, i)->owner); +} + +static inline int next_cqe_sw(struct mthca_cq *cq) +{ + return cqe_sw(cq, cq->cons_index); +} + +static inline void set_cqe_hw(struct mthca_cq *cq, int entry) +{ + get_cqe(cq, entry)->owner = MTHCA_CQ_ENTRY_OWNER_HW; +} + +static inline void inc_cons_index(struct mthca_dev *dev, struct mthca_cq *cq, + int nent) +{ + u32 doorbell[2]; + + doorbell[0] = cpu_to_be32(MTHCA_CQ_DB_INC_CI | cq->cqn); + doorbell[1] = cpu_to_be32(nent - 1); + + mthca_write64(doorbell, + dev->kar + MTHCA_CQ_DOORBELL, + MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); +} + +void mthca_cq_event(struct mthca_dev *dev, u32 cqn) +{ + struct mthca_cq *cq; + + spin_lock(&dev->cq_table.lock); + cq = mthca_array_get(&dev->cq_table.cq, cqn & (dev->limits.num_cqs - 1)); + if (cq) + atomic_inc(&cq->refcount); + spin_unlock(&dev->cq_table.lock); + + if (!cq) { + mthca_warn(dev, "Completion event for bogus CQ %08x\n", cqn); + return; + } + + cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); + + if (atomic_dec_and_test(&cq->refcount)) + wake_up(&cq->wait); +} + +void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn) +{ + struct mthca_cq *cq; + struct mthca_cqe *cqe; + int prod_index; + int nfreed = 0; + + spin_lock_irq(&dev->cq_table.lock); + cq = mthca_array_get(&dev->cq_table.cq, cqn & (dev->limits.num_cqs - 1)); + if (cq) + atomic_inc(&cq->refcount); + spin_unlock_irq(&dev->cq_table.lock); + + if (!cq) + return; + + spin_lock_irq(&cq->lock); + + /* + * First we need to find the current producer index, so we + * know where to start cleaning from. It doesn't matter if HW + * adds new entries after this loop -- the QP we're worried + * about is already in RESET, so the new entries won't come + * from our QP and therefore don't need to be checked. + */ + for (prod_index = cq->cons_index; + cqe_sw(cq, prod_index & cq->ibcq.cqe); + ++prod_index) + if (prod_index == cq->cons_index + cq->ibcq.cqe) + break; + + if (0) + mthca_dbg(dev, "Cleaning QPN %06x from CQN %06x; ci %d, pi %d\n", + qpn, cqn, cq->cons_index, prod_index); + + /* + * Now sweep backwards through the CQ, removing CQ entries + * that match our QP by copying older entries on top of them. + */ + while (prod_index > cq->cons_index) { + cqe = get_cqe(cq, (prod_index - 1) & cq->ibcq.cqe); + if (cqe->my_qpn == cpu_to_be32(qpn)) + ++nfreed; + else if (nfreed) + memcpy(get_cqe(cq, (prod_index - 1 + nfreed) & + cq->ibcq.cqe), + cqe, + MTHCA_CQ_ENTRY_SIZE); + --prod_index; + } + + if (nfreed) { + wmb(); + inc_cons_index(dev, cq, nfreed); + cq->cons_index = (cq->cons_index + nfreed) & cq->ibcq.cqe; + } + + spin_unlock_irq(&cq->lock); + if (atomic_dec_and_test(&cq->refcount)) + wake_up(&cq->wait); +} + +static int handle_error_cqe(struct mthca_dev *dev, struct mthca_cq *cq, + struct mthca_qp *qp, int wqe_index, int is_send, + struct mthca_err_cqe *cqe, + struct ib_wc *entry, int *free_cqe) +{ + int err; + int dbd; + u32 new_wqe; + + if (1 && cqe->syndrome != SYNDROME_WR_FLUSH_ERR) { + int j; + + mthca_dbg(dev, "%x/%d: error CQE -> QPN %06x, WQE @ %08x\n", + cq->cqn, cq->cons_index, be32_to_cpu(cqe->my_qpn), + be32_to_cpu(cqe->wqe)); + + for (j = 0; j < 8; ++j) + printk(KERN_DEBUG " [%2x] %08x\n", + j * 4, be32_to_cpu(((u32 *) cqe)[j])); + } + + /* + * For completions in error, only work request ID, status (and + * freed resource count for RD) have to be set. + */ + switch (cqe->syndrome) { + case SYNDROME_LOCAL_LENGTH_ERR: + entry->status = IB_WC_LOC_LEN_ERR; + break; + case SYNDROME_LOCAL_QP_OP_ERR: + entry->status = IB_WC_LOC_QP_OP_ERR; + break; + case SYNDROME_LOCAL_EEC_OP_ERR: + entry->status = IB_WC_LOC_EEC_OP_ERR; + break; + case SYNDROME_LOCAL_PROT_ERR: + entry->status = IB_WC_LOC_PROT_ERR; + break; + case SYNDROME_WR_FLUSH_ERR: + entry->status = IB_WC_WR_FLUSH_ERR; + break; + case SYNDROME_MW_BIND_ERR: + entry->status = IB_WC_MW_BIND_ERR; + break; + case SYNDROME_BAD_RESP_ERR: + entry->status = IB_WC_BAD_RESP_ERR; + break; + case SYNDROME_LOCAL_ACCESS_ERR: + entry->status = IB_WC_LOC_ACCESS_ERR; + break; + case SYNDROME_REMOTE_INVAL_REQ_ERR: + entry->status = IB_WC_REM_INV_REQ_ERR; + break; + case SYNDROME_REMOTE_ACCESS_ERR: + entry->status = IB_WC_REM_ACCESS_ERR; + break; + case SYNDROME_REMOTE_OP_ERR: + entry->status = IB_WC_REM_OP_ERR; + break; + case SYNDROME_RETRY_EXC_ERR: + entry->status = IB_WC_RETRY_EXC_ERR; + break; + case SYNDROME_RNR_RETRY_EXC_ERR: + entry->status = IB_WC_RNR_RETRY_EXC_ERR; + break; + case SYNDROME_LOCAL_RDD_VIOL_ERR: + entry->status = IB_WC_LOC_RDD_VIOL_ERR; + break; + case SYNDROME_REMOTE_INVAL_RD_REQ_ERR: + entry->status = IB_WC_REM_INV_RD_REQ_ERR; + break; + case SYNDROME_REMOTE_ABORTED_ERR: + entry->status = IB_WC_REM_ABORT_ERR; + break; + case SYNDROME_INVAL_EECN_ERR: + entry->status = IB_WC_INV_EECN_ERR; + break; + case SYNDROME_INVAL_EEC_STATE_ERR: + entry->status = IB_WC_INV_EEC_STATE_ERR; + break; + default: + entry->status = IB_WC_GENERAL_ERR; + break; + } + + err = mthca_free_err_wqe(qp, is_send, wqe_index, &dbd, &new_wqe); + if (err) + return err; + + /* + * If we're at the end of the WQE chain, or we've used up our + * doorbell count, free the CQE. Otherwise just update it for + * the next poll operation. + */ + if (!(new_wqe & cpu_to_be32(0x3f)) || (!cqe->db_cnt && dbd)) + return 0; + + cqe->db_cnt = cpu_to_be16(be16_to_cpu(cqe->db_cnt) - dbd); + cqe->wqe = new_wqe; + cqe->syndrome = SYNDROME_WR_FLUSH_ERR; + + *free_cqe = 0; + + return 0; +} + +static void dump_cqe(struct mthca_cqe *cqe) +{ + int j; + + for (j = 0; j < 8; ++j) + printk(KERN_DEBUG " [%2x] %08x\n", + j * 4, be32_to_cpu(((u32 *) cqe)[j])); +} + +static inline int mthca_poll_one(struct mthca_dev *dev, + struct mthca_cq *cq, + struct mthca_qp **cur_qp, + int *freed, + struct ib_wc *entry) +{ + struct mthca_wq *wq; + struct mthca_cqe *cqe; + int wqe_index; + int is_error = 0; + int is_send; + int free_cqe = 1; + int err = 0; + + if (!next_cqe_sw(cq)) + return -EAGAIN; + + rmb(); + + cqe = get_cqe(cq, cq->cons_index); + + if (0) { + mthca_dbg(dev, "%x/%d: CQE -> QPN %06x, WQE @ %08x\n", + cq->cqn, cq->cons_index, be32_to_cpu(cqe->my_qpn), + be32_to_cpu(cqe->wqe)); + + dump_cqe(cqe); + } + + if ((cqe->opcode & MTHCA_ERROR_CQE_OPCODE_MASK) == + MTHCA_ERROR_CQE_OPCODE_MASK) { + is_error = 1; + is_send = cqe->opcode & 1; + } else + is_send = cqe->is_send & 0x80; + + if (!*cur_qp || be32_to_cpu(cqe->my_qpn) != (*cur_qp)->qpn) { + if (*cur_qp) { + if (*freed) { + wmb(); + inc_cons_index(dev, cq, *freed); + *freed = 0; + } + spin_unlock(&(*cur_qp)->lock); + if (atomic_dec_and_test(&(*cur_qp)->refcount)) + wake_up(&(*cur_qp)->wait); + } + + spin_lock(&dev->qp_table.lock); + *cur_qp = mthca_array_get(&dev->qp_table.qp, + be32_to_cpu(cqe->my_qpn) & + (dev->limits.num_qps - 1)); + if (*cur_qp) + atomic_inc(&(*cur_qp)->refcount); + spin_unlock(&dev->qp_table.lock); + + if (!*cur_qp) { + mthca_warn(dev, "CQ entry for unknown QP %06x\n", + be32_to_cpu(cqe->my_qpn) & 0xffffff); + err = -EINVAL; + goto out; + } + + spin_lock(&(*cur_qp)->lock); + } + + if (is_send) { + wq = &(*cur_qp)->sq; + wqe_index = ((be32_to_cpu(cqe->wqe) - (*cur_qp)->send_wqe_offset) + >> wq->wqe_shift); + entry->wr_id = (*cur_qp)->wrid[wqe_index + + (*cur_qp)->rq.max]; + } else { + wq = &(*cur_qp)->rq; + wqe_index = be32_to_cpu(cqe->wqe) >> wq->wqe_shift; + entry->wr_id = (*cur_qp)->wrid[wqe_index]; + } + + if (wq->last_comp < wqe_index) + wq->cur -= wqe_index - wq->last_comp; + else + wq->cur -= wq->max - wq->last_comp + wqe_index; + + wq->last_comp = wqe_index; + + if (0) + mthca_dbg(dev, "%s completion for QP %06x, index %d (nr %d)\n", + is_send ? "Send" : "Receive", + (*cur_qp)->qpn, wqe_index, wq->max); + + if (is_error) { + err = handle_error_cqe(dev, cq, *cur_qp, wqe_index, is_send, + (struct mthca_err_cqe *) cqe, + entry, &free_cqe); + goto out; + } + + if (is_send) { + entry->opcode = IB_WC_SEND; /* XXX */ + } else { + entry->byte_len = be32_to_cpu(cqe->byte_cnt); + switch (cqe->opcode & 0x1f) { + case IB_OPCODE_SEND_LAST_WITH_IMMEDIATE: + case IB_OPCODE_SEND_ONLY_WITH_IMMEDIATE: + entry->wc_flags = IB_WC_WITH_IMM; + entry->imm_data = cqe->imm_etype_pkey_eec; + entry->opcode = IB_WC_RECV; + break; + case IB_OPCODE_RDMA_WRITE_LAST_WITH_IMMEDIATE: + case IB_OPCODE_RDMA_WRITE_ONLY_WITH_IMMEDIATE: + entry->wc_flags = IB_WC_WITH_IMM; + entry->imm_data = cqe->imm_etype_pkey_eec; + entry->opcode = IB_WC_RECV_RDMA_WITH_IMM; + break; + default: + entry->wc_flags = 0; + entry->opcode = IB_WC_RECV; + break; + } + entry->slid = be16_to_cpu(cqe->rlid); + entry->sl = be16_to_cpu(cqe->sl_g_mlpath) >> 12; + entry->src_qp = be32_to_cpu(cqe->rqpn) & 0xffffff; + entry->dlid_path_bits = be16_to_cpu(cqe->sl_g_mlpath) & 0x7f; + entry->pkey_index = be32_to_cpu(cqe->imm_etype_pkey_eec) >> 16; + entry->wc_flags |= be16_to_cpu(cqe->sl_g_mlpath) & 0x80 ? + IB_WC_GRH : 0; + } + + entry->status = IB_WC_SUCCESS; + + out: + if (free_cqe) { + set_cqe_hw(cq, cq->cons_index); + ++(*freed); + cq->cons_index = (cq->cons_index + 1) & cq->ibcq.cqe; + } + + return err; +} + +int mthca_poll_cq(struct ib_cq *ibcq, int num_entries, + struct ib_wc *entry) +{ + struct mthca_dev *dev = to_mdev(ibcq->device); + struct mthca_cq *cq = to_mcq(ibcq); + struct mthca_qp *qp = NULL; + unsigned long flags; + int err = 0; + int freed = 0; + int npolled; + + spin_lock_irqsave(&cq->lock, flags); + + for (npolled = 0; npolled < num_entries; ++npolled) { + err = mthca_poll_one(dev, cq, &qp, + &freed, entry + npolled); + if (err) + break; + } + + if (freed) { + wmb(); + inc_cons_index(dev, cq, freed); + } + + if (qp) { + spin_unlock(&qp->lock); + if (atomic_dec_and_test(&qp->refcount)) + wake_up(&qp->wait); + } + + + spin_unlock_irqrestore(&cq->lock, flags); + + return err == 0 || err == -EAGAIN ? npolled : err; +} + +void mthca_arm_cq(struct mthca_dev *dev, struct mthca_cq *cq, + int solicited) +{ + u32 doorbell[2]; + + doorbell[0] = cpu_to_be32((solicited ? + MTHCA_CQ_DB_REQ_NOT_SOL : + MTHCA_CQ_DB_REQ_NOT) | + cq->cqn); + doorbell[1] = 0xffffffff; + + mthca_write64(doorbell, + dev->kar + MTHCA_CQ_DOORBELL, + MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); +} + +int mthca_init_cq(struct mthca_dev *dev, int nent, + struct mthca_cq *cq) +{ + int size = nent * MTHCA_CQ_ENTRY_SIZE; + dma_addr_t t; + void *mailbox = NULL; + int npages, shift; + u64 *dma_list = NULL; + struct mthca_cq_context *cq_context; + int err = -ENOMEM; + u8 status; + int i; + + might_sleep(); + + mailbox = kmalloc(sizeof (struct mthca_cq_context) + MTHCA_CMD_MAILBOX_EXTRA, + GFP_KERNEL); + if (!mailbox) + goto err_out; + + cq_context = MAILBOX_ALIGN(mailbox); + + if (size <= MTHCA_MAX_DIRECT_CQ_SIZE) { + if (0) + mthca_dbg(dev, "Creating direct CQ of size %d\n", size); + + cq->is_direct = 1; + npages = 1; + shift = get_order(size) + PAGE_SHIFT; + + cq->queue.direct.buf = pci_alloc_consistent(dev->pdev, + size, &t); + if (!cq->queue.direct.buf) + goto err_out; + + pci_unmap_addr_set(&cq->queue.direct, mapping, t); + + memset(cq->queue.direct.buf, 0, size); + + while (t & ((1 << shift) - 1)) { + --shift; + npages *= 2; + } + + dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL); + if (!dma_list) + goto err_out_free; + + for (i = 0; i < npages; ++i) + dma_list[i] = t + i * (1 << shift); + } else { + cq->is_direct = 0; + npages = (size + PAGE_SIZE - 1) / PAGE_SIZE; + shift = PAGE_SHIFT; + + if (0) + mthca_dbg(dev, "Creating indirect CQ with %d pages\n", npages); + + dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL); + if (!dma_list) + goto err_out; + + cq->queue.page_list = kmalloc(npages * sizeof *cq->queue.page_list, + GFP_KERNEL); + if (!cq->queue.page_list) + goto err_out; + + for (i = 0; i < npages; ++i) + cq->queue.page_list[i].buf = NULL; + + for (i = 0; i < npages; ++i) { + cq->queue.page_list[i].buf = + pci_alloc_consistent(dev->pdev, PAGE_SIZE, &t); + if (!cq->queue.page_list[i].buf) + goto err_out_free; + + dma_list[i] = t; + pci_unmap_addr_set(&cq->queue.page_list[i], mapping, t); + + memset(cq->queue.page_list[i].buf, 0, PAGE_SIZE); + } + } + + for (i = 0; i < nent; ++i) + set_cqe_hw(cq, i); + + cq->cqn = mthca_alloc(&dev->cq_table.alloc); + if (cq->cqn == -1) + goto err_out_free; + + err = mthca_mr_alloc_phys(dev, dev->driver_pd.pd_num, + dma_list, shift, npages, + 0, size, + MTHCA_MPT_FLAG_LOCAL_WRITE | + MTHCA_MPT_FLAG_LOCAL_READ, + &cq->mr); + if (err) + goto err_out_free_cq; + + spin_lock_init(&cq->lock); + atomic_set(&cq->refcount, 1); + init_waitqueue_head(&cq->wait); + + memset(cq_context, 0, sizeof *cq_context); + cq_context->flags = cpu_to_be32(MTHCA_CQ_STATUS_OK | + MTHCA_CQ_STATE_DISARMED | + MTHCA_CQ_FLAG_TR); + cq_context->start = cpu_to_be64(0); + cq_context->logsize_usrpage = cpu_to_be32((ffs(nent) - 1) << 24 | + MTHCA_KAR_PAGE); + cq_context->error_eqn = cpu_to_be32(dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn); + cq_context->comp_eqn = cpu_to_be32(dev->eq_table.eq[MTHCA_EQ_COMP].eqn); + cq_context->pd = cpu_to_be32(dev->driver_pd.pd_num); + cq_context->lkey = cpu_to_be32(cq->mr.ibmr.lkey); + cq_context->cqn = cpu_to_be32(cq->cqn); + + err = mthca_SW2HW_CQ(dev, cq_context, cq->cqn, &status); + if (err) { + mthca_warn(dev, "SW2HW_CQ failed (%d)\n", err); + goto err_out_free_mr; + } + + if (status) { + mthca_warn(dev, "SW2HW_CQ returned status 0x%02x\n", + status); + err = -EINVAL; + goto err_out_free_mr; + } + + spin_lock_irq(&dev->cq_table.lock); + if (mthca_array_set(&dev->cq_table.cq, + cq->cqn & (dev->limits.num_cqs - 1), + cq)) { + spin_unlock_irq(&dev->cq_table.lock); + goto err_out_free_mr; + } + spin_unlock_irq(&dev->cq_table.lock); + + cq->cons_index = 0; + + kfree(dma_list); + kfree(mailbox); + + return 0; + + err_out_free_mr: + mthca_free_mr(dev, &cq->mr); + + err_out_free_cq: + mthca_free(&dev->cq_table.alloc, cq->cqn); + + err_out_free: + if (cq->is_direct) + pci_free_consistent(dev->pdev, size, + cq->queue.direct.buf, + pci_unmap_addr(&cq->queue.direct, mapping)); + else { + for (i = 0; i < npages; ++i) + if (cq->queue.page_list[i].buf) + pci_free_consistent(dev->pdev, PAGE_SIZE, + cq->queue.page_list[i].buf, + pci_unmap_addr(&cq->queue.page_list[i], + mapping)); + + kfree(cq->queue.page_list); + } + + err_out: + kfree(dma_list); + kfree(mailbox); + + return err; +} + +void mthca_free_cq(struct mthca_dev *dev, + struct mthca_cq *cq) +{ + void *mailbox; + int err; + u8 status; + + might_sleep(); + + mailbox = kmalloc(sizeof (struct mthca_cq_context) + MTHCA_CMD_MAILBOX_EXTRA, + GFP_KERNEL); + if (!mailbox) { + mthca_warn(dev, "No memory for mailbox to free CQ.\n"); + return; + } + + err = mthca_HW2SW_CQ(dev, MAILBOX_ALIGN(mailbox), cq->cqn, &status); + if (err) + mthca_warn(dev, "HW2SW_CQ failed (%d)\n", err); + else if (status) + mthca_warn(dev, "HW2SW_CQ returned status 0x%02x\n", + status); + + if (0) { + u32 *ctx = MAILBOX_ALIGN(mailbox); + int j; + + printk(KERN_ERR "context for CQN %x\n", cq->cqn); + for (j = 0; j < 16; ++j) + printk(KERN_ERR "[%2x] %08x\n", j * 4, be32_to_cpu(ctx[j])); + } + + spin_lock_irq(&dev->cq_table.lock); + mthca_array_clear(&dev->cq_table.cq, + cq->cqn & (dev->limits.num_cqs - 1)); + spin_unlock_irq(&dev->cq_table.lock); + + atomic_dec(&cq->refcount); + wait_event(cq->wait, !atomic_read(&cq->refcount)); + + mthca_free_mr(dev, &cq->mr); + + if (cq->is_direct) + pci_free_consistent(dev->pdev, + (cq->ibcq.cqe + 1) * MTHCA_CQ_ENTRY_SIZE, + cq->queue.direct.buf, + pci_unmap_addr(&cq->queue.direct, + mapping)); + else { + int i; + + for (i = 0; + i < ((cq->ibcq.cqe + 1) * MTHCA_CQ_ENTRY_SIZE + PAGE_SIZE - 1) / + PAGE_SIZE; + ++i) + pci_free_consistent(dev->pdev, PAGE_SIZE, + cq->queue.page_list[i].buf, + pci_unmap_addr(&cq->queue.page_list[i], + mapping)); + + kfree(cq->queue.page_list); + } + + mthca_free(&dev->cq_table.alloc, cq->cqn); + kfree(mailbox); +} + +int __devinit mthca_init_cq_table(struct mthca_dev *dev) +{ + int err; + + spin_lock_init(&dev->cq_table.lock); + + err = mthca_alloc_init(&dev->cq_table.alloc, + dev->limits.num_cqs, + (1 << 24) - 1, + dev->limits.reserved_cqs); + if (err) + return err; + + err = mthca_array_init(&dev->cq_table.cq, + dev->limits.num_cqs); + if (err) + mthca_alloc_cleanup(&dev->cq_table.alloc); + + return err; +} + +void __devexit mthca_cleanup_cq_table(struct mthca_dev *dev) +{ + mthca_array_cleanup(&dev->cq_table.cq, dev->limits.num_cqs); + mthca_alloc_cleanup(&dev->cq_table.alloc); +} diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h new file mode 100644 index 000000000000..d7c12e23d3d1 --- /dev/null +++ b/drivers/infiniband/hw/mthca/mthca_dev.h @@ -0,0 +1,391 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: mthca_dev.h 1349 2004-12-16 21:09:43Z roland $ + */ + +#ifndef MTHCA_DEV_H +#define MTHCA_DEV_H + +#include <linux/spinlock.h> +#include <linux/kernel.h> +#include <linux/pci.h> +#include <linux/dma-mapping.h> +#include <asm/semaphore.h> +#include <asm/scatterlist.h> + +#include "mthca_provider.h" +#include "mthca_doorbell.h" + +#define DRV_NAME "ib_mthca" +#define PFX DRV_NAME ": " +#define DRV_VERSION "0.06-pre" +#define DRV_RELDATE "November 8, 2004" + +/* Types of supported HCA */ +enum { + TAVOR, /* MT23108 */ + ARBEL_COMPAT, /* MT25208 in Tavor compat mode */ + ARBEL_NATIVE /* MT25208 with extended features */ +}; + +enum { + MTHCA_FLAG_DDR_HIDDEN = 1 << 1, + MTHCA_FLAG_SRQ = 1 << 2, + MTHCA_FLAG_MSI = 1 << 3, + MTHCA_FLAG_MSI_X = 1 << 4, + MTHCA_FLAG_NO_LAM = 1 << 5 +}; + +enum { + MTHCA_KAR_PAGE = 1, + MTHCA_MAX_PORTS = 2 +}; + +enum { + MTHCA_MPT_ENTRY_SIZE = 0x40, + MTHCA_EQ_CONTEXT_SIZE = 0x40, + MTHCA_CQ_CONTEXT_SIZE = 0x40, + MTHCA_QP_CONTEXT_SIZE = 0x200, + MTHCA_AV_SIZE = 0x20, + MTHCA_MGM_ENTRY_SIZE = 0x40 +}; + +enum { + MTHCA_EQ_CMD, + MTHCA_EQ_ASYNC, + MTHCA_EQ_COMP, + MTHCA_NUM_EQ +}; + +struct mthca_cmd { + int use_events; + struct semaphore hcr_sem; + struct semaphore poll_sem; + struct semaphore event_sem; + int max_cmds; + spinlock_t context_lock; + int free_head; + struct mthca_cmd_context *context; + u16 token_mask; +}; + +struct mthca_limits { + int num_ports; + int vl_cap; + int mtu_cap; + int gid_table_len; + int pkey_table_len; + int local_ca_ack_delay; + int max_sg; + int num_qps; + int reserved_qps; + int num_srqs; + int reserved_srqs; + int num_eecs; + int reserved_eecs; + int num_cqs; + int reserved_cqs; + int num_eqs; + int reserved_eqs; + int num_mpts; + int num_mtt_segs; + int mtt_seg_size; + int reserved_mtts; + int reserved_mrws; + int num_rdbs; + int reserved_uars; + int num_mgms; + int num_amgms; + int reserved_mcgs; + int num_pds; + int reserved_pds; +}; + +struct mthca_alloc { + u32 last; + u32 top; + u32 max; + u32 mask; + spinlock_t lock; + unsigned long *table; +}; + +struct mthca_array { + struct { + void **page; + int used; + } *page_list; +}; + +struct mthca_pd_table { + struct mthca_alloc alloc; +}; + +struct mthca_mr_table { + struct mthca_alloc mpt_alloc; + int max_mtt_order; + unsigned long **mtt_buddy; + u64 mtt_base; +}; + +struct mthca_eq_table { + struct mthca_alloc alloc; + void __iomem *clr_int; + u32 clr_mask; + struct mthca_eq eq[MTHCA_NUM_EQ]; + int have_irq; + u8 inta_pin; +}; + +struct mthca_cq_table { + struct mthca_alloc alloc; + spinlock_t lock; + struct mthca_array cq; +}; + +struct mthca_qp_table { + struct mthca_alloc alloc; + int sqp_start; + spinlock_t lock; + struct mthca_array qp; +}; + +struct mthca_av_table { + struct pci_pool *pool; + int num_ddr_avs; + u64 ddr_av_base; + void __iomem *av_map; + struct mthca_alloc alloc; +}; + +struct mthca_mcg_table { + struct semaphore sem; + struct mthca_alloc alloc; +}; + +struct mthca_dev { + struct ib_device ib_dev; + struct pci_dev *pdev; + + int hca_type; + unsigned long mthca_flags; + + u32 rev_id; + + /* firmware info */ + u64 fw_ver; + union { + struct { + u64 fw_start; + u64 fw_end; + } tavor; + struct { + u64 clr_int_base; + u64 eq_arm_base; + u64 eq_set_ci_base; + struct scatterlist *mem; + u16 fw_pages; + } arbel; + } fw; + + u64 ddr_start; + u64 ddr_end; + + MTHCA_DECLARE_DOORBELL_LOCK(doorbell_lock) + + void __iomem *hcr; + void __iomem *clr_base; + void __iomem *kar; + + struct mthca_cmd cmd; + struct mthca_limits limits; + + struct mthca_pd_table pd_table; + struct mthca_mr_table mr_table; + struct mthca_eq_table eq_table; + struct mthca_cq_table cq_table; + struct mthca_qp_table qp_table; + struct mthca_av_table av_table; + struct mthca_mcg_table mcg_table; + + struct mthca_pd driver_pd; + struct mthca_mr driver_mr; + + struct ib_mad_agent *send_agent[MTHCA_MAX_PORTS][2]; + struct ib_ah *sm_ah[MTHCA_MAX_PORTS]; + spinlock_t sm_lock; +}; + +#define mthca_dbg(mdev, format, arg...) \ + dev_dbg(&mdev->pdev->dev, format, ## arg) +#define mthca_err(mdev, format, arg...) \ + dev_err(&mdev->pdev->dev, format, ## arg) +#define mthca_info(mdev, format, arg...) \ + dev_info(&mdev->pdev->dev, format, ## arg) +#define mthca_warn(mdev, format, arg...) \ + dev_warn(&mdev->pdev->dev, format, ## arg) + +extern void __buggy_use_of_MTHCA_GET(void); +extern void __buggy_use_of_MTHCA_PUT(void); + +#define MTHCA_GET(dest, source, offset) \ + do { \ + void *__p = (char *) (source) + (offset); \ + switch (sizeof (dest)) { \ + case 1: (dest) = *(u8 *) __p; break; \ + case 2: (dest) = be16_to_cpup(__p); break; \ + case 4: (dest) = be32_to_cpup(__p); break; \ + case 8: (dest) = be64_to_cpup(__p); break; \ + default: __buggy_use_of_MTHCA_GET(); \ + } \ + } while (0) + +#define MTHCA_PUT(dest, source, offset) \ + do { \ + __typeof__(source) *__p = \ + (__typeof__(source) *) ((char *) (dest) + (offset)); \ + switch (sizeof(source)) { \ + case 1: *__p = (source); break; \ + case 2: *__p = cpu_to_be16(source); break; \ + case 4: *__p = cpu_to_be32(source); break; \ + case 8: *__p = cpu_to_be64(source); break; \ + default: __buggy_use_of_MTHCA_PUT(); \ + } \ + } while (0) + +int mthca_reset(struct mthca_dev *mdev); + +u32 mthca_alloc(struct mthca_alloc *alloc); +void mthca_free(struct mthca_alloc *alloc, u32 obj); +int mthca_alloc_init(struct mthca_alloc *alloc, u32 num, u32 mask, + u32 reserved); +void mthca_alloc_cleanup(struct mthca_alloc *alloc); +void *mthca_array_get(struct mthca_array *array, int index); +int mthca_array_set(struct mthca_array *array, int index, void *value); +void mthca_array_clear(struct mthca_array *array, int index); +int mthca_array_init(struct mthca_array *array, int nent); +void mthca_array_cleanup(struct mthca_array *array, int nent); + +int mthca_init_pd_table(struct mthca_dev *dev); +int mthca_init_mr_table(struct mthca_dev *dev); +int mthca_init_eq_table(struct mthca_dev *dev); +int mthca_init_cq_table(struct mthca_dev *dev); +int mthca_init_qp_table(struct mthca_dev *dev); +int mthca_init_av_table(struct mthca_dev *dev); +int mthca_init_mcg_table(struct mthca_dev *dev); + +void mthca_cleanup_pd_table(struct mthca_dev *dev); +void mthca_cleanup_mr_table(struct mthca_dev *dev); +void mthca_cleanup_eq_table(struct mthca_dev *dev); +void mthca_cleanup_cq_table(struct mthca_dev *dev); +void mthca_cleanup_qp_table(struct mthca_dev *dev); +void mthca_cleanup_av_table(struct mthca_dev *dev); +void mthca_cleanup_mcg_table(struct mthca_dev *dev); + +int mthca_register_device(struct mthca_dev *dev); +void mthca_unregister_device(struct mthca_dev *dev); + +int mthca_pd_alloc(struct mthca_dev *dev, struct mthca_pd *pd); +void mthca_pd_free(struct mthca_dev *dev, struct mthca_pd *pd); + +int mthca_mr_alloc_notrans(struct mthca_dev *dev, u32 pd, + u32 access, struct mthca_mr *mr); +int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd, + u64 *buffer_list, int buffer_size_shift, + int list_len, u64 iova, u64 total_size, + u32 access, struct mthca_mr *mr); +void mthca_free_mr(struct mthca_dev *dev, struct mthca_mr *mr); + +int mthca_poll_cq(struct ib_cq *ibcq, int num_entries, + struct ib_wc *entry); +void mthca_arm_cq(struct mthca_dev *dev, struct mthca_cq *cq, + int solicited); +int mthca_init_cq(struct mthca_dev *dev, int nent, + struct mthca_cq *cq); +void mthca_free_cq(struct mthca_dev *dev, + struct mthca_cq *cq); +void mthca_cq_event(struct mthca_dev *dev, u32 cqn); +void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn); + +void mthca_qp_event(struct mthca_dev *dev, u32 qpn, + enum ib_event_type event_type); +int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask); +int mthca_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr); +int mthca_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr); +int mthca_free_err_wqe(struct mthca_qp *qp, int is_send, + int index, int *dbd, u32 *new_wqe); +int mthca_alloc_qp(struct mthca_dev *dev, + struct mthca_pd *pd, + struct mthca_cq *send_cq, + struct mthca_cq *recv_cq, + enum ib_qp_type type, + enum ib_sig_type send_policy, + enum ib_sig_type recv_policy, + struct mthca_qp *qp); +int mthca_alloc_sqp(struct mthca_dev *dev, + struct mthca_pd *pd, + struct mthca_cq *send_cq, + struct mthca_cq *recv_cq, + enum ib_sig_type send_policy, + enum ib_sig_type recv_policy, + int qpn, + int port, + struct mthca_sqp *sqp); +void mthca_free_qp(struct mthca_dev *dev, struct mthca_qp *qp); +int mthca_create_ah(struct mthca_dev *dev, + struct mthca_pd *pd, + struct ib_ah_attr *ah_attr, + struct mthca_ah *ah); +int mthca_destroy_ah(struct mthca_dev *dev, struct mthca_ah *ah); +int mthca_read_ah(struct mthca_dev *dev, struct mthca_ah *ah, + struct ib_ud_header *header); + +int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid); +int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid); + +int mthca_process_mad(struct ib_device *ibdev, + int mad_flags, + u8 port_num, + u16 slid, + struct ib_mad *in_mad, + struct ib_mad *out_mad); +int mthca_create_agents(struct mthca_dev *dev); +void mthca_free_agents(struct mthca_dev *dev); + +static inline struct mthca_dev *to_mdev(struct ib_device *ibdev) +{ + return container_of(ibdev, struct mthca_dev, ib_dev); +} + +#endif /* MTHCA_DEV_H */ diff --git a/drivers/infiniband/hw/mthca/mthca_doorbell.h b/drivers/infiniband/hw/mthca/mthca_doorbell.h new file mode 100644 index 000000000000..6d5420767274 --- /dev/null +++ b/drivers/infiniband/hw/mthca/mthca_doorbell.h @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: mthca_doorbell.h 1349 2004-12-16 21:09:43Z roland $ + */ + +#include <linux/config.h> +#include <linux/types.h> +#include <linux/preempt.h> + +#define MTHCA_RD_DOORBELL 0x00 +#define MTHCA_SEND_DOORBELL 0x10 +#define MTHCA_RECEIVE_DOORBELL 0x18 +#define MTHCA_CQ_DOORBELL 0x20 +#define MTHCA_EQ_DOORBELL 0x28 + +#if BITS_PER_LONG == 64 +/* + * Assume that we can just write a 64-bit doorbell atomically. s390 + * actually doesn't have writeq() but S/390 systems don't even have + * PCI so we won't worry about it. + */ + +#define MTHCA_DECLARE_DOORBELL_LOCK(name) +#define MTHCA_INIT_DOORBELL_LOCK(ptr) do { } while (0) +#define MTHCA_GET_DOORBELL_LOCK(ptr) (NULL) + +static inline void mthca_write64(u32 val[2], void __iomem *dest, + spinlock_t *doorbell_lock) +{ + __raw_writeq(*(u64 *) val, dest); +} + +#elif defined(CONFIG_INFINIBAND_MTHCA_SSE_DOORBELL) +/* Use SSE to write 64 bits atomically without a lock. */ + +#define MTHCA_DECLARE_DOORBELL_LOCK(name) +#define MTHCA_INIT_DOORBELL_LOCK(ptr) do { } while (0) +#define MTHCA_GET_DOORBELL_LOCK(ptr) (NULL) + +static inline unsigned long mthca_get_fpu(void) +{ + unsigned long cr0; + + preempt_disable(); + asm volatile("mov %%cr0,%0; clts" : "=r" (cr0)); + return cr0; +} + +static inline void mthca_put_fpu(unsigned long cr0) +{ + asm volatile("mov %0,%%cr0" : : "r" (cr0)); + preempt_enable(); +} + +static inline void mthca_write64(u32 val[2], void __iomem *dest, + spinlock_t *doorbell_lock) +{ + /* i386 stack is aligned to 8 bytes, so this should be OK: */ + u8 xmmsave[8] __attribute__((aligned(8))); + unsigned long cr0; + + cr0 = mthca_get_fpu(); + + asm volatile ( + "movlps %%xmm0,(%0); \n\t" + "movlps (%1),%%xmm0; \n\t" + "movlps %%xmm0,(%2); \n\t" + "movlps (%0),%%xmm0; \n\t" + : + : "r" (xmmsave), "r" (val), "r" (dest) + : "memory" ); + + mthca_put_fpu(cr0); +} + +#else +/* Just fall back to a spinlock to protect the doorbell */ + +#define MTHCA_DECLARE_DOORBELL_LOCK(name) spinlock_t name; +#define MTHCA_INIT_DOORBELL_LOCK(ptr) spin_lock_init(ptr) +#define MTHCA_GET_DOORBELL_LOCK(ptr) (ptr) + +static inline void mthca_write64(u32 val[2], void __iomem *dest, + spinlock_t *doorbell_lock) +{ + unsigned long flags; + + spin_lock_irqsave(doorbell_lock, flags); + __raw_writel(val[0], dest); + __raw_writel(val[1], dest + 4); + spin_unlock_irqrestore(doorbell_lock, flags); +} + +#endif diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c new file mode 100644 index 000000000000..010d62454618 --- /dev/null +++ b/drivers/infiniband/hw/mthca/mthca_eq.c @@ -0,0 +1,690 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: mthca_eq.c 1382 2004-12-24 02:21:02Z roland $ + */ + +#include <linux/init.h> +#include <linux/errno.h> +#include <linux/interrupt.h> +#include <linux/pci.h> + +#include "mthca_dev.h" +#include "mthca_cmd.h" +#include "mthca_config_reg.h" + +enum { + MTHCA_NUM_ASYNC_EQE = 0x80, + MTHCA_NUM_CMD_EQE = 0x80, + MTHCA_EQ_ENTRY_SIZE = 0x20 +}; + +/* + * Must be packed because start is 64 bits but only aligned to 32 bits. + */ +struct mthca_eq_context { + u32 flags; + u64 start; + u32 logsize_usrpage; + u32 pd; + u8 reserved1[3]; + u8 intr; + u32 lost_count; + u32 lkey; + u32 reserved2[2]; + u32 consumer_index; + u32 producer_index; + u32 reserved3[4]; +} __attribute__((packed)); + +#define MTHCA_EQ_STATUS_OK ( 0 << 28) +#define MTHCA_EQ_STATUS_OVERFLOW ( 9 << 28) +#define MTHCA_EQ_STATUS_WRITE_FAIL (10 << 28) +#define MTHCA_EQ_OWNER_SW ( 0 << 24) +#define MTHCA_EQ_OWNER_HW ( 1 << 24) +#define MTHCA_EQ_FLAG_TR ( 1 << 18) +#define MTHCA_EQ_FLAG_OI ( 1 << 17) +#define MTHCA_EQ_STATE_ARMED ( 1 << 8) +#define MTHCA_EQ_STATE_FIRED ( 2 << 8) +#define MTHCA_EQ_STATE_ALWAYS_ARMED ( 3 << 8) + +enum { + MTHCA_EVENT_TYPE_COMP = 0x00, + MTHCA_EVENT_TYPE_PATH_MIG = 0x01, + MTHCA_EVENT_TYPE_COMM_EST = 0x02, + MTHCA_EVENT_TYPE_SQ_DRAINED = 0x03, + MTHCA_EVENT_TYPE_SRQ_LAST_WQE = 0x13, + MTHCA_EVENT_TYPE_CQ_ERROR = 0x04, + MTHCA_EVENT_TYPE_WQ_CATAS_ERROR = 0x05, + MTHCA_EVENT_TYPE_EEC_CATAS_ERROR = 0x06, + MTHCA_EVENT_TYPE_PATH_MIG_FAILED = 0x07, + MTHCA_EVENT_TYPE_WQ_INVAL_REQ_ERROR = 0x10, + MTHCA_EVENT_TYPE_WQ_ACCESS_ERROR = 0x11, + MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR = 0x12, + MTHCA_EVENT_TYPE_LOCAL_CATAS_ERROR = 0x08, + MTHCA_EVENT_TYPE_PORT_CHANGE = 0x09, + MTHCA_EVENT_TYPE_EQ_OVERFLOW = 0x0f, + MTHCA_EVENT_TYPE_ECC_DETECT = 0x0e, + MTHCA_EVENT_TYPE_CMD = 0x0a +}; + +#define MTHCA_ASYNC_EVENT_MASK ((1ULL << MTHCA_EVENT_TYPE_PATH_MIG) | \ + (1ULL << MTHCA_EVENT_TYPE_COMM_EST) | \ + (1ULL << MTHCA_EVENT_TYPE_SQ_DRAINED) | \ + (1ULL << MTHCA_EVENT_TYPE_CQ_ERROR) | \ + (1ULL << MTHCA_EVENT_TYPE_WQ_CATAS_ERROR) | \ + (1ULL << MTHCA_EVENT_TYPE_EEC_CATAS_ERROR) | \ + (1ULL << MTHCA_EVENT_TYPE_PATH_MIG_FAILED) | \ + (1ULL << MTHCA_EVENT_TYPE_WQ_INVAL_REQ_ERROR) | \ + (1ULL << MTHCA_EVENT_TYPE_WQ_ACCESS_ERROR) | \ + (1ULL << MTHCA_EVENT_TYPE_LOCAL_CATAS_ERROR) | \ + (1ULL << MTHCA_EVENT_TYPE_PORT_CHANGE) | \ + (1ULL << MTHCA_EVENT_TYPE_ECC_DETECT)) +#define MTHCA_SRQ_EVENT_MASK (1ULL << MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR) | \ + (1ULL << MTHCA_EVENT_TYPE_SRQ_LAST_WQE) +#define MTHCA_CMD_EVENT_MASK (1ULL << MTHCA_EVENT_TYPE_CMD) + +#define MTHCA_EQ_DB_INC_CI (1 << 24) +#define MTHCA_EQ_DB_REQ_NOT (2 << 24) +#define MTHCA_EQ_DB_DISARM_CQ (3 << 24) +#define MTHCA_EQ_DB_SET_CI (4 << 24) +#define MTHCA_EQ_DB_ALWAYS_ARM (5 << 24) + +struct mthca_eqe { + u8 reserved1; + u8 type; + u8 reserved2; + u8 subtype; + union { + u32 raw[6]; + struct { + u32 cqn; + } __attribute__((packed)) comp; + struct { + u16 reserved1; + u16 token; + u32 reserved2; + u8 reserved3[3]; + u8 status; + u64 out_param; + } __attribute__((packed)) cmd; + struct { + u32 qpn; + } __attribute__((packed)) qp; + struct { + u32 cqn; + u32 reserved1; + u8 reserved2[3]; + u8 syndrome; + } __attribute__((packed)) cq_err; + struct { + u32 reserved1[2]; + u32 port; + } __attribute__((packed)) port_change; + } event; + u8 reserved3[3]; + u8 owner; +} __attribute__((packed)); + +#define MTHCA_EQ_ENTRY_OWNER_SW (0 << 7) +#define MTHCA_EQ_ENTRY_OWNER_HW (1 << 7) + +static inline u64 async_mask(struct mthca_dev *dev) +{ + return dev->mthca_flags & MTHCA_FLAG_SRQ ? + MTHCA_ASYNC_EVENT_MASK | MTHCA_SRQ_EVENT_MASK : + MTHCA_ASYNC_EVENT_MASK; +} + +static inline void set_eq_ci(struct mthca_dev *dev, int eqn, int ci) +{ + u32 doorbell[2]; + + doorbell[0] = cpu_to_be32(MTHCA_EQ_DB_SET_CI | eqn); + doorbell[1] = cpu_to_be32(ci); + + mthca_write64(doorbell, + dev->kar + MTHCA_EQ_DOORBELL, + MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); +} + +static inline void eq_req_not(struct mthca_dev *dev, int eqn) +{ + u32 doorbell[2]; + + doorbell[0] = cpu_to_be32(MTHCA_EQ_DB_REQ_NOT | eqn); + doorbell[1] = 0; + + mthca_write64(doorbell, + dev->kar + MTHCA_EQ_DOORBELL, + MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); +} + +static inline void disarm_cq(struct mthca_dev *dev, int eqn, int cqn) +{ + u32 doorbell[2]; + + doorbell[0] = cpu_to_be32(MTHCA_EQ_DB_DISARM_CQ | eqn); + doorbell[1] = cpu_to_be32(cqn); + + mthca_write64(doorbell, + dev->kar + MTHCA_EQ_DOORBELL, + MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); +} + +static inline struct mthca_eqe *get_eqe(struct mthca_eq *eq, int entry) +{ + return eq->page_list[entry * MTHCA_EQ_ENTRY_SIZE / PAGE_SIZE].buf + + (entry * MTHCA_EQ_ENTRY_SIZE) % PAGE_SIZE; +} + +static inline int next_eqe_sw(struct mthca_eq *eq) +{ + return !(MTHCA_EQ_ENTRY_OWNER_HW & + get_eqe(eq, eq->cons_index)->owner); +} + +static inline void set_eqe_hw(struct mthca_eq *eq, int entry) +{ + get_eqe(eq, entry)->owner = MTHCA_EQ_ENTRY_OWNER_HW; +} + +static void port_change(struct mthca_dev *dev, int port, int active) +{ + struct ib_event record; + + mthca_dbg(dev, "Port change to %s for port %d\n", + active ? "active" : "down", port); + + record.device = &dev->ib_dev; + record.event = active ? IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR; + record.element.port_num = port; + + ib_dispatch_event(&record); +} + +static void mthca_eq_int(struct mthca_dev *dev, struct mthca_eq *eq) +{ + struct mthca_eqe *eqe; + int disarm_cqn; + + while (next_eqe_sw(eq)) { + int set_ci = 0; + eqe = get_eqe(eq, eq->cons_index); + + switch (eqe->type) { + case MTHCA_EVENT_TYPE_COMP: + disarm_cqn = be32_to_cpu(eqe->event.comp.cqn) & 0xffffff; + disarm_cq(dev, eq->eqn, disarm_cqn); + mthca_cq_event(dev, disarm_cqn); + break; + + case MTHCA_EVENT_TYPE_PATH_MIG: + mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff, + IB_EVENT_PATH_MIG); + break; + + case MTHCA_EVENT_TYPE_COMM_EST: + mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff, + IB_EVENT_COMM_EST); + break; + + case MTHCA_EVENT_TYPE_SQ_DRAINED: + mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff, + IB_EVENT_SQ_DRAINED); + break; + + case MTHCA_EVENT_TYPE_WQ_CATAS_ERROR: + mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff, + IB_EVENT_QP_FATAL); + break; + + case MTHCA_EVENT_TYPE_PATH_MIG_FAILED: + mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff, + IB_EVENT_PATH_MIG_ERR); + break; + + case MTHCA_EVENT_TYPE_WQ_INVAL_REQ_ERROR: + mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff, + IB_EVENT_QP_REQ_ERR); + break; + + case MTHCA_EVENT_TYPE_WQ_ACCESS_ERROR: + mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff, + IB_EVENT_QP_ACCESS_ERR); + break; + + case MTHCA_EVENT_TYPE_CMD: + mthca_cmd_event(dev, + be16_to_cpu(eqe->event.cmd.token), + eqe->event.cmd.status, + be64_to_cpu(eqe->event.cmd.out_param)); + /* + * cmd_event() may add more commands. + * The card will think the queue has overflowed if + * we don't tell it we've been processing events. + */ + set_ci = 1; + break; + + case MTHCA_EVENT_TYPE_PORT_CHANGE: + port_change(dev, + (be32_to_cpu(eqe->event.port_change.port) >> 28) & 3, + eqe->subtype == 0x4); + break; + + case MTHCA_EVENT_TYPE_CQ_ERROR: + mthca_warn(dev, "CQ %s on CQN %08x\n", + eqe->event.cq_err.syndrome == 1 ? + "overrun" : "access violation", + be32_to_cpu(eqe->event.cq_err.cqn)); + break; + + case MTHCA_EVENT_TYPE_EQ_OVERFLOW: + mthca_warn(dev, "EQ overrun on EQN %d\n", eq->eqn); + break; + + case MTHCA_EVENT_TYPE_EEC_CATAS_ERROR: + case MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR: + case MTHCA_EVENT_TYPE_LOCAL_CATAS_ERROR: + case MTHCA_EVENT_TYPE_ECC_DETECT: + default: + mthca_warn(dev, "Unhandled event %02x(%02x) on EQ %d\n", + eqe->type, eqe->subtype, eq->eqn); + break; + }; + + set_eqe_hw(eq, eq->cons_index); + eq->cons_index = (eq->cons_index + 1) & (eq->nent - 1); + + if (set_ci) { + wmb(); /* see comment below */ + set_eq_ci(dev, eq->eqn, eq->cons_index); + set_ci = 0; + } + } + + /* + * This barrier makes sure that all updates to + * ownership bits done by set_eqe_hw() hit memory + * before the consumer index is updated. set_eq_ci() + * allows the HCA to possibly write more EQ entries, + * and we want to avoid the exceedingly unlikely + * possibility of the HCA writing an entry and then + * having set_eqe_hw() overwrite the owner field. + */ + wmb(); + set_eq_ci(dev, eq->eqn, eq->cons_index); + eq_req_not(dev, eq->eqn); +} + +static irqreturn_t mthca_interrupt(int irq, void *dev_ptr, struct pt_regs *regs) +{ + struct mthca_dev *dev = dev_ptr; + u32 ecr; + int work = 0; + int i; + + if (dev->eq_table.clr_mask) + writel(dev->eq_table.clr_mask, dev->eq_table.clr_int); + + while ((ecr = readl(dev->hcr + MTHCA_ECR_OFFSET + 4)) != 0) { + work = 1; + + writel(ecr, dev->hcr + MTHCA_ECR_CLR_OFFSET + 4); + + for (i = 0; i < MTHCA_NUM_EQ; ++i) + if (ecr & dev->eq_table.eq[i].ecr_mask) + mthca_eq_int(dev, &dev->eq_table.eq[i]); + } + + return IRQ_RETVAL(work); +} + +static irqreturn_t mthca_msi_x_interrupt(int irq, void *eq_ptr, + struct pt_regs *regs) +{ + struct mthca_eq *eq = eq_ptr; + struct mthca_dev *dev = eq->dev; + + writel(eq->ecr_mask, dev->hcr + MTHCA_ECR_CLR_OFFSET + 4); + mthca_eq_int(dev, eq); + + /* MSI-X vectors always belong to us */ + return IRQ_HANDLED; +} + +static int __devinit mthca_create_eq(struct mthca_dev *dev, + int nent, + u8 intr, + struct mthca_eq *eq) +{ + int npages = (nent * MTHCA_EQ_ENTRY_SIZE + PAGE_SIZE - 1) / + PAGE_SIZE; + u64 *dma_list = NULL; + dma_addr_t t; + void *mailbox = NULL; + struct mthca_eq_context *eq_context; + int err = -ENOMEM; + int i; + u8 status; + + /* Make sure EQ size is aligned to a power of 2 size. */ + for (i = 1; i < nent; i <<= 1) + ; /* nothing */ + nent = i; + + eq->dev = dev; + + eq->page_list = kmalloc(npages * sizeof *eq->page_list, + GFP_KERNEL); + if (!eq->page_list) + goto err_out; + + for (i = 0; i < npages; ++i) + eq->page_list[i].buf = NULL; + + dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL); + if (!dma_list) + goto err_out_free; + + mailbox = kmalloc(sizeof *eq_context + MTHCA_CMD_MAILBOX_EXTRA, + GFP_KERNEL); + if (!mailbox) + goto err_out_free; + eq_context = MAILBOX_ALIGN(mailbox); + + for (i = 0; i < npages; ++i) { + eq->page_list[i].buf = pci_alloc_consistent(dev->pdev, + PAGE_SIZE, &t); + if (!eq->page_list[i].buf) + goto err_out_free; + + dma_list[i] = t; + pci_unmap_addr_set(&eq->page_list[i], mapping, t); + + memset(eq->page_list[i].buf, 0, PAGE_SIZE); + } + + for (i = 0; i < nent; ++i) + set_eqe_hw(eq, i); + + eq->eqn = mthca_alloc(&dev->eq_table.alloc); + if (eq->eqn == -1) + goto err_out_free; + + err = mthca_mr_alloc_phys(dev, dev->driver_pd.pd_num, + dma_list, PAGE_SHIFT, npages, + 0, npages * PAGE_SIZE, + MTHCA_MPT_FLAG_LOCAL_WRITE | + MTHCA_MPT_FLAG_LOCAL_READ, + &eq->mr); + if (err) + goto err_out_free_eq; + + eq->nent = nent; + + memset(eq_context, 0, sizeof *eq_context); + eq_context->flags = cpu_to_be32(MTHCA_EQ_STATUS_OK | + MTHCA_EQ_OWNER_HW | + MTHCA_EQ_STATE_ARMED | + MTHCA_EQ_FLAG_TR); + eq_context->start = cpu_to_be64(0); + eq_context->logsize_usrpage = cpu_to_be32((ffs(nent) - 1) << 24 | + MTHCA_KAR_PAGE); + eq_context->pd = cpu_to_be32(dev->driver_pd.pd_num); + eq_context->intr = intr; + eq_context->lkey = cpu_to_be32(eq->mr.ibmr.lkey); + + err = mthca_SW2HW_EQ(dev, eq_context, eq->eqn, &status); + if (err) { + mthca_warn(dev, "SW2HW_EQ failed (%d)\n", err); + goto err_out_free_mr; + } + if (status) { + mthca_warn(dev, "SW2HW_EQ returned status 0x%02x\n", + status); + err = -EINVAL; + goto err_out_free_mr; + } + + kfree(dma_list); + kfree(mailbox); + + eq->ecr_mask = swab32(1 << eq->eqn); + eq->cons_index = 0; + + eq_req_not(dev, eq->eqn); + + mthca_dbg(dev, "Allocated EQ %d with %d entries\n", + eq->eqn, nent); + + return err; + + err_out_free_mr: + mthca_free_mr(dev, &eq->mr); + + err_out_free_eq: + mthca_free(&dev->eq_table.alloc, eq->eqn); + + err_out_free: + for (i = 0; i < npages; ++i) + if (eq->page_list[i].buf) + pci_free_consistent(dev->pdev, PAGE_SIZE, + eq->page_list[i].buf, + pci_unmap_addr(&eq->page_list[i], + mapping)); + + kfree(eq->page_list); + kfree(dma_list); + kfree(mailbox); + + err_out: + return err; +} + +static void mthca_free_eq(struct mthca_dev *dev, + struct mthca_eq *eq) +{ + void *mailbox = NULL; + int err; + u8 status; + int npages = (eq->nent * MTHCA_EQ_ENTRY_SIZE + PAGE_SIZE - 1) / + PAGE_SIZE; + int i; + + mailbox = kmalloc(sizeof (struct mthca_eq_context) + MTHCA_CMD_MAILBOX_EXTRA, + GFP_KERNEL); + if (!mailbox) + return; + + err = mthca_HW2SW_EQ(dev, MAILBOX_ALIGN(mailbox), + eq->eqn, &status); + if (err) + mthca_warn(dev, "HW2SW_EQ failed (%d)\n", err); + if (status) + mthca_warn(dev, "HW2SW_EQ returned status 0x%02x\n", + status); + + if (0) { + mthca_dbg(dev, "Dumping EQ context %02x:\n", eq->eqn); + for (i = 0; i < sizeof (struct mthca_eq_context) / 4; ++i) { + if (i % 4 == 0) + printk("[%02x] ", i * 4); + printk(" %08x", be32_to_cpup(MAILBOX_ALIGN(mailbox) + i * 4)); + if ((i + 1) % 4 == 0) + printk("\n"); + } + } + + + mthca_free_mr(dev, &eq->mr); + for (i = 0; i < npages; ++i) + pci_free_consistent(dev->pdev, PAGE_SIZE, + eq->page_list[i].buf, + pci_unmap_addr(&eq->page_list[i], mapping)); + + kfree(eq->page_list); + kfree(mailbox); +} + +static void mthca_free_irqs(struct mthca_dev *dev) +{ + int i; + + if (dev->eq_table.have_irq) + free_irq(dev->pdev->irq, dev); + for (i = 0; i < MTHCA_NUM_EQ; ++i) + if (dev->eq_table.eq[i].have_irq) + free_irq(dev->eq_table.eq[i].msi_x_vector, + dev->eq_table.eq + i); +} + +int __devinit mthca_init_eq_table(struct mthca_dev *dev) +{ + int err; + u8 status; + u8 intr; + int i; + + err = mthca_alloc_init(&dev->eq_table.alloc, + dev->limits.num_eqs, + dev->limits.num_eqs - 1, + dev->limits.reserved_eqs); + if (err) + return err; + + if (dev->mthca_flags & MTHCA_FLAG_MSI || + dev->mthca_flags & MTHCA_FLAG_MSI_X) { + dev->eq_table.clr_mask = 0; + } else { + dev->eq_table.clr_mask = + swab32(1 << (dev->eq_table.inta_pin & 31)); + dev->eq_table.clr_int = dev->clr_base + + (dev->eq_table.inta_pin < 31 ? 4 : 0); + } + + intr = (dev->mthca_flags & MTHCA_FLAG_MSI) ? + 128 : dev->eq_table.inta_pin; + + err = mthca_create_eq(dev, dev->limits.num_cqs, + (dev->mthca_flags & MTHCA_FLAG_MSI_X) ? 128 : intr, + &dev->eq_table.eq[MTHCA_EQ_COMP]); + if (err) + goto err_out_free; + + err = mthca_create_eq(dev, MTHCA_NUM_ASYNC_EQE, + (dev->mthca_flags & MTHCA_FLAG_MSI_X) ? 129 : intr, + &dev->eq_table.eq[MTHCA_EQ_ASYNC]); + if (err) + goto err_out_comp; + + err = mthca_create_eq(dev, MTHCA_NUM_CMD_EQE, + (dev->mthca_flags & MTHCA_FLAG_MSI_X) ? 130 : intr, + &dev->eq_table.eq[MTHCA_EQ_CMD]); + if (err) + goto err_out_async; + + if (dev->mthca_flags & MTHCA_FLAG_MSI_X) { + static const char *eq_name[] = { + [MTHCA_EQ_COMP] = DRV_NAME " (comp)", + [MTHCA_EQ_ASYNC] = DRV_NAME " (async)", + [MTHCA_EQ_CMD] = DRV_NAME " (cmd)" + }; + + for (i = 0; i < MTHCA_NUM_EQ; ++i) { + err = request_irq(dev->eq_table.eq[i].msi_x_vector, + mthca_msi_x_interrupt, 0, + eq_name[i], dev->eq_table.eq + i); + if (err) + goto err_out_cmd; + dev->eq_table.eq[i].have_irq = 1; + } + } else { + err = request_irq(dev->pdev->irq, mthca_interrupt, SA_SHIRQ, + DRV_NAME, dev); + if (err) + goto err_out_cmd; + dev->eq_table.have_irq = 1; + } + + err = mthca_MAP_EQ(dev, async_mask(dev), + 0, dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn, &status); + if (err) + mthca_warn(dev, "MAP_EQ for async EQ %d failed (%d)\n", + dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn, err); + if (status) + mthca_warn(dev, "MAP_EQ for async EQ %d returned status 0x%02x\n", + dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn, status); + + err = mthca_MAP_EQ(dev, MTHCA_CMD_EVENT_MASK, + 0, dev->eq_table.eq[MTHCA_EQ_CMD].eqn, &status); + if (err) + mthca_warn(dev, "MAP_EQ for cmd EQ %d failed (%d)\n", + dev->eq_table.eq[MTHCA_EQ_CMD].eqn, err); + if (status) + mthca_warn(dev, "MAP_EQ for cmd EQ %d returned status 0x%02x\n", + dev->eq_table.eq[MTHCA_EQ_CMD].eqn, status); + + return 0; + +err_out_cmd: + mthca_free_irqs(dev); + mthca_free_eq(dev, &dev->eq_table.eq[MTHCA_EQ_CMD]); + +err_out_async: + mthca_free_eq(dev, &dev->eq_table.eq[MTHCA_EQ_ASYNC]); + +err_out_comp: + mthca_free_eq(dev, &dev->eq_table.eq[MTHCA_EQ_COMP]); + +err_out_free: + mthca_alloc_cleanup(&dev->eq_table.alloc); + return err; +} + +void __devexit mthca_cleanup_eq_table(struct mthca_dev *dev) +{ + u8 status; + int i; + + mthca_free_irqs(dev); + + mthca_MAP_EQ(dev, async_mask(dev), + 1, dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn, &status); + mthca_MAP_EQ(dev, MTHCA_CMD_EVENT_MASK, + 1, dev->eq_table.eq[MTHCA_EQ_CMD].eqn, &status); + + for (i = 0; i < MTHCA_NUM_EQ; ++i) + mthca_free_eq(dev, &dev->eq_table.eq[i]); + + mthca_alloc_cleanup(&dev->eq_table.alloc); +} diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c new file mode 100644 index 000000000000..9edf2f07b941 --- /dev/null +++ b/drivers/infiniband/hw/mthca/mthca_mad.c @@ -0,0 +1,320 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: mthca_mad.c 1349 2004-12-16 21:09:43Z roland $ + */ + +#include <ib_verbs.h> +#include <ib_mad.h> +#include <ib_smi.h> + +#include "mthca_dev.h" +#include "mthca_cmd.h" + +enum { + MTHCA_VENDOR_CLASS1 = 0x9, + MTHCA_VENDOR_CLASS2 = 0xa +}; + +struct mthca_trap_mad { + struct ib_mad *mad; + DECLARE_PCI_UNMAP_ADDR(mapping) +}; + +static void update_sm_ah(struct mthca_dev *dev, + u8 port_num, u16 lid, u8 sl) +{ + struct ib_ah *new_ah; + struct ib_ah_attr ah_attr; + unsigned long flags; + + if (!dev->send_agent[port_num - 1][0]) + return; + + memset(&ah_attr, 0, sizeof ah_attr); + ah_attr.dlid = lid; + ah_attr.sl = sl; + ah_attr.port_num = port_num; + + new_ah = ib_create_ah(dev->send_agent[port_num - 1][0]->qp->pd, + &ah_attr); + if (IS_ERR(new_ah)) + return; + + spin_lock_irqsave(&dev->sm_lock, flags); + if (dev->sm_ah[port_num - 1]) + ib_destroy_ah(dev->sm_ah[port_num - 1]); + dev->sm_ah[port_num - 1] = new_ah; + spin_unlock_irqrestore(&dev->sm_lock, flags); +} + +/* + * Snoop SM MADs for port info and P_Key table sets, so we can + * synthesize LID change and P_Key change events. + */ +static void smp_snoop(struct ib_device *ibdev, + u8 port_num, + struct ib_mad *mad) +{ + struct ib_event event; + + if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || + mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) && + mad->mad_hdr.method == IB_MGMT_METHOD_SET) { + if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO) { + update_sm_ah(to_mdev(ibdev), port_num, + be16_to_cpup((__be16 *) (mad->data + 58)), + (*(u8 *) (mad->data + 76)) & 0xf); + + event.device = ibdev; + event.event = IB_EVENT_LID_CHANGE; + event.element.port_num = port_num; + ib_dispatch_event(&event); + } + + if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PKEY_TABLE) { + event.device = ibdev; + event.event = IB_EVENT_PKEY_CHANGE; + event.element.port_num = port_num; + ib_dispatch_event(&event); + } + } +} + +static void forward_trap(struct mthca_dev *dev, + u8 port_num, + struct ib_mad *mad) +{ + int qpn = mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_LID_ROUTED; + struct mthca_trap_mad *tmad; + struct ib_sge gather_list; + struct ib_send_wr *bad_wr, wr = { + .opcode = IB_WR_SEND, + .sg_list = &gather_list, + .num_sge = 1, + .send_flags = IB_SEND_SIGNALED, + .wr = { + .ud = { + .remote_qpn = qpn, + .remote_qkey = qpn ? IB_QP1_QKEY : 0, + .timeout_ms = 0 + } + } + }; + struct ib_mad_agent *agent = dev->send_agent[port_num - 1][qpn]; + int ret; + unsigned long flags; + + if (agent) { + tmad = kmalloc(sizeof *tmad, GFP_KERNEL); + if (!tmad) + return; + + tmad->mad = kmalloc(sizeof *tmad->mad, GFP_KERNEL); + if (!tmad->mad) { + kfree(tmad); + return; + } + + memcpy(tmad->mad, mad, sizeof *mad); + + wr.wr.ud.mad_hdr = &tmad->mad->mad_hdr; + wr.wr_id = (unsigned long) tmad; + + gather_list.addr = dma_map_single(agent->device->dma_device, + tmad->mad, + sizeof *tmad->mad, + DMA_TO_DEVICE); + gather_list.length = sizeof *tmad->mad; + gather_list.lkey = to_mpd(agent->qp->pd)->ntmr.ibmr.lkey; + pci_unmap_addr_set(tmad, mapping, gather_list.addr); + + /* + * We rely here on the fact that MLX QPs don't use the + * address handle after the send is posted (this is + * wrong following the IB spec strictly, but we know + * it's OK for our devices). + */ + spin_lock_irqsave(&dev->sm_lock, flags); + wr.wr.ud.ah = dev->sm_ah[port_num - 1]; + if (wr.wr.ud.ah) + ret = ib_post_send_mad(agent, &wr, &bad_wr); + else + ret = -EINVAL; + spin_unlock_irqrestore(&dev->sm_lock, flags); + + if (ret) { + dma_unmap_single(agent->device->dma_device, + pci_unmap_addr(tmad, mapping), + sizeof *tmad->mad, + DMA_TO_DEVICE); + kfree(tmad->mad); + kfree(tmad); + } + } +} + +int mthca_process_mad(struct ib_device *ibdev, + int mad_flags, + u8 port_num, + u16 slid, + struct ib_mad *in_mad, + struct ib_mad *out_mad) +{ + int err; + u8 status; + + /* Forward locally generated traps to the SM */ + if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && + slid == 0) { + forward_trap(to_mdev(ibdev), port_num, in_mad); + return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; + } + + /* + * Only handle SM gets, sets and trap represses for SM class + * + * Only handle PMA and Mellanox vendor-specific class gets and + * sets for other classes. + */ + if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || + in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { + if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET && + in_mad->mad_hdr.method != IB_MGMT_METHOD_SET && + in_mad->mad_hdr.method != IB_MGMT_METHOD_TRAP_REPRESS) + return IB_MAD_RESULT_SUCCESS; + + /* + * Don't process SMInfo queries or vendor-specific + * MADs -- the SMA can't handle them. + */ + if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO || + ((in_mad->mad_hdr.attr_id & IB_SMP_ATTR_VENDOR_MASK) == + IB_SMP_ATTR_VENDOR_MASK)) + return IB_MAD_RESULT_SUCCESS; + } else if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT || + in_mad->mad_hdr.mgmt_class == MTHCA_VENDOR_CLASS1 || + in_mad->mad_hdr.mgmt_class == MTHCA_VENDOR_CLASS2) { + if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET && + in_mad->mad_hdr.method != IB_MGMT_METHOD_SET) + return IB_MAD_RESULT_SUCCESS; + } else + return IB_MAD_RESULT_SUCCESS; + + err = mthca_MAD_IFC(to_mdev(ibdev), + !!(mad_flags & IB_MAD_IGNORE_MKEY), + port_num, in_mad, out_mad, + &status); + if (err) { + mthca_err(to_mdev(ibdev), "MAD_IFC failed\n"); + return IB_MAD_RESULT_FAILURE; + } + if (status == MTHCA_CMD_STAT_BAD_PKT) + return IB_MAD_RESULT_SUCCESS; + if (status) { + mthca_err(to_mdev(ibdev), "MAD_IFC returned status %02x\n", + status); + return IB_MAD_RESULT_FAILURE; + } + + if (!out_mad->mad_hdr.status) + smp_snoop(ibdev, port_num, in_mad); + + /* set return bit in status of directed route responses */ + if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) + out_mad->mad_hdr.status |= cpu_to_be16(1 << 15); + + if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS) + /* no response for trap repress */ + return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; + + return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; +} + +static void send_handler(struct ib_mad_agent *agent, + struct ib_mad_send_wc *mad_send_wc) +{ + struct mthca_trap_mad *tmad = + (void *) (unsigned long) mad_send_wc->wr_id; + + dma_unmap_single(agent->device->dma_device, + pci_unmap_addr(tmad, mapping), + sizeof *tmad->mad, + DMA_TO_DEVICE); + kfree(tmad->mad); + kfree(tmad); +} + +int mthca_create_agents(struct mthca_dev *dev) +{ + struct ib_mad_agent *agent; + int p, q; + + spin_lock_init(&dev->sm_lock); + + for (p = 0; p < dev->limits.num_ports; ++p) + for (q = 0; q <= 1; ++q) { + agent = ib_register_mad_agent(&dev->ib_dev, p + 1, + q ? IB_QPT_GSI : IB_QPT_SMI, + NULL, 0, send_handler, + NULL, NULL); + if (IS_ERR(agent)) + goto err; + dev->send_agent[p][q] = agent; + } + + return 0; + +err: + for (p = 0; p < dev->limits.num_ports; ++p) + for (q = 0; q <= 1; ++q) + if (dev->send_agent[p][q]) + ib_unregister_mad_agent(dev->send_agent[p][q]); + + return PTR_ERR(agent); +} + +void mthca_free_agents(struct mthca_dev *dev) +{ + struct ib_mad_agent *agent; + int p, q; + + for (p = 0; p < dev->limits.num_ports; ++p) { + for (q = 0; q <= 1; ++q) { + agent = dev->send_agent[p][q]; + dev->send_agent[p][q] = NULL; + ib_unregister_mad_agent(agent); + } + + if (dev->sm_ah[p]) + ib_destroy_ah(dev->sm_ah[p]); + } +} diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c new file mode 100644 index 000000000000..f82f7f0a2904 --- /dev/null +++ b/drivers/infiniband/hw/mthca/mthca_main.c @@ -0,0 +1,936 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: mthca_main.c 1396 2004-12-28 04:10:27Z roland $ + */ + +#include <linux/config.h> +#include <linux/version.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/errno.h> +#include <linux/pci.h> +#include <linux/interrupt.h> + +#ifdef CONFIG_INFINIBAND_MTHCA_SSE_DOORBELL +#include <asm/cpufeature.h> +#endif + +#include "mthca_dev.h" +#include "mthca_config_reg.h" +#include "mthca_cmd.h" +#include "mthca_profile.h" + +MODULE_AUTHOR("Roland Dreier"); +MODULE_DESCRIPTION("Mellanox InfiniBand HCA low-level driver"); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_VERSION(DRV_VERSION); + +#ifdef CONFIG_PCI_MSI + +static int msi_x = 0; +module_param(msi_x, int, 0444); +MODULE_PARM_DESC(msi_x, "attempt to use MSI-X if nonzero"); + +static int msi = 0; +module_param(msi, int, 0444); +MODULE_PARM_DESC(msi, "attempt to use MSI if nonzero"); + +#else /* CONFIG_PCI_MSI */ + +#define msi_x (0) +#define msi (0) + +#endif /* CONFIG_PCI_MSI */ + +static const char mthca_version[] __devinitdata = + "ib_mthca: Mellanox InfiniBand HCA driver v" + DRV_VERSION " (" DRV_RELDATE ")\n"; + +static int __devinit mthca_tune_pci(struct mthca_dev *mdev) +{ + int cap; + u16 val; + + /* First try to max out Read Byte Count */ + cap = pci_find_capability(mdev->pdev, PCI_CAP_ID_PCIX); + if (cap) { + if (pci_read_config_word(mdev->pdev, cap + PCI_X_CMD, &val)) { + mthca_err(mdev, "Couldn't read PCI-X command register, " + "aborting.\n"); + return -ENODEV; + } + val = (val & ~PCI_X_CMD_MAX_READ) | (3 << 2); + if (pci_write_config_word(mdev->pdev, cap + PCI_X_CMD, val)) { + mthca_err(mdev, "Couldn't write PCI-X command register, " + "aborting.\n"); + return -ENODEV; + } + } else if (mdev->hca_type == TAVOR) + mthca_info(mdev, "No PCI-X capability, not setting RBC.\n"); + + cap = pci_find_capability(mdev->pdev, PCI_CAP_ID_EXP); + if (cap) { + if (pci_read_config_word(mdev->pdev, cap + PCI_EXP_DEVCTL, &val)) { + mthca_err(mdev, "Couldn't read PCI Express device control " + "register, aborting.\n"); + return -ENODEV; + } + val = (val & ~PCI_EXP_DEVCTL_READRQ) | (5 << 12); + if (pci_write_config_word(mdev->pdev, cap + PCI_EXP_DEVCTL, val)) { + mthca_err(mdev, "Couldn't write PCI Express device control " + "register, aborting.\n"); + return -ENODEV; + } + } else if (mdev->hca_type == ARBEL_NATIVE || + mdev->hca_type == ARBEL_COMPAT) + mthca_info(mdev, "No PCI Express capability, " + "not setting Max Read Request Size.\n"); + + return 0; +} + +static int __devinit mthca_dev_lim(struct mthca_dev *mdev, struct mthca_dev_lim *dev_lim) +{ + int err; + u8 status; + + err = mthca_QUERY_DEV_LIM(mdev, dev_lim, &status); + if (err) { + mthca_err(mdev, "QUERY_DEV_LIM command failed, aborting.\n"); + return err; + } + if (status) { + mthca_err(mdev, "QUERY_DEV_LIM returned status 0x%02x, " + "aborting.\n", status); + return -EINVAL; + } + if (dev_lim->min_page_sz > PAGE_SIZE) { + mthca_err(mdev, "HCA minimum page size of %d bigger than " + "kernel PAGE_SIZE of %ld, aborting.\n", + dev_lim->min_page_sz, PAGE_SIZE); + return -ENODEV; + } + if (dev_lim->num_ports > MTHCA_MAX_PORTS) { + mthca_err(mdev, "HCA has %d ports, but we only support %d, " + "aborting.\n", + dev_lim->num_ports, MTHCA_MAX_PORTS); + return -ENODEV; + } + + mdev->limits.num_ports = dev_lim->num_ports; + mdev->limits.vl_cap = dev_lim->max_vl; + mdev->limits.mtu_cap = dev_lim->max_mtu; + mdev->limits.gid_table_len = dev_lim->max_gids; + mdev->limits.pkey_table_len = dev_lim->max_pkeys; + mdev->limits.local_ca_ack_delay = dev_lim->local_ca_ack_delay; + mdev->limits.max_sg = dev_lim->max_sg; + mdev->limits.reserved_qps = dev_lim->reserved_qps; + mdev->limits.reserved_srqs = dev_lim->reserved_srqs; + mdev->limits.reserved_eecs = dev_lim->reserved_eecs; + mdev->limits.reserved_cqs = dev_lim->reserved_cqs; + mdev->limits.reserved_eqs = dev_lim->reserved_eqs; + mdev->limits.reserved_mtts = dev_lim->reserved_mtts; + mdev->limits.reserved_mrws = dev_lim->reserved_mrws; + mdev->limits.reserved_uars = dev_lim->reserved_uars; + mdev->limits.reserved_pds = dev_lim->reserved_pds; + + if (dev_lim->flags & DEV_LIM_FLAG_SRQ) + mdev->mthca_flags |= MTHCA_FLAG_SRQ; + + return 0; +} + +static int __devinit mthca_init_tavor(struct mthca_dev *mdev) +{ + u8 status; + int err; + struct mthca_dev_lim dev_lim; + struct mthca_init_hca_param init_hca; + struct mthca_adapter adapter; + + err = mthca_SYS_EN(mdev, &status); + if (err) { + mthca_err(mdev, "SYS_EN command failed, aborting.\n"); + return err; + } + if (status) { + mthca_err(mdev, "SYS_EN returned status 0x%02x, " + "aborting.\n", status); + return -EINVAL; + } + + err = mthca_QUERY_FW(mdev, &status); + if (err) { + mthca_err(mdev, "QUERY_FW command failed, aborting.\n"); + goto err_out_disable; + } + if (status) { + mthca_err(mdev, "QUERY_FW returned status 0x%02x, " + "aborting.\n", status); + err = -EINVAL; + goto err_out_disable; + } + err = mthca_QUERY_DDR(mdev, &status); + if (err) { + mthca_err(mdev, "QUERY_DDR command failed, aborting.\n"); + goto err_out_disable; + } + if (status) { + mthca_err(mdev, "QUERY_DDR returned status 0x%02x, " + "aborting.\n", status); + err = -EINVAL; + goto err_out_disable; + } + + err = mthca_dev_lim(mdev, &dev_lim); + + err = mthca_make_profile(mdev, &dev_lim, &init_hca); + if (err) + goto err_out_disable; + + err = mthca_INIT_HCA(mdev, &init_hca, &status); + if (err) { + mthca_err(mdev, "INIT_HCA command failed, aborting.\n"); + goto err_out_disable; + } + if (status) { + mthca_err(mdev, "INIT_HCA returned status 0x%02x, " + "aborting.\n", status); + err = -EINVAL; + goto err_out_disable; + } + + err = mthca_QUERY_ADAPTER(mdev, &adapter, &status); + if (err) { + mthca_err(mdev, "QUERY_ADAPTER command failed, aborting.\n"); + goto err_out_disable; + } + if (status) { + mthca_err(mdev, "QUERY_ADAPTER returned status 0x%02x, " + "aborting.\n", status); + err = -EINVAL; + goto err_out_close; + } + + mdev->eq_table.inta_pin = adapter.inta_pin; + mdev->rev_id = adapter.revision_id; + + return 0; + +err_out_close: + mthca_CLOSE_HCA(mdev, 0, &status); + +err_out_disable: + mthca_SYS_DIS(mdev, &status); + + return err; +} + +static int __devinit mthca_load_fw(struct mthca_dev *mdev) +{ + u8 status; + int err; + int num_ent, num_sg, fw_pages, cur_order; + int i; + + /* FIXME: use HCA-attached memory for FW if present */ + + mdev->fw.arbel.mem = kmalloc(sizeof *mdev->fw.arbel.mem * + mdev->fw.arbel.fw_pages, + GFP_KERNEL); + if (!mdev->fw.arbel.mem) { + mthca_err(mdev, "Couldn't allocate FW area, aborting.\n"); + return -ENOMEM; + } + + memset(mdev->fw.arbel.mem, 0, + sizeof *mdev->fw.arbel.mem * mdev->fw.arbel.fw_pages); + + fw_pages = mdev->fw.arbel.fw_pages; + num_ent = 0; + + /* + * We allocate in as big chunks as we can, up to a maximum of + * 256 KB per chunk. + */ + cur_order = get_order(1 << 18); + + while (fw_pages > 0) { + while (1 << cur_order > fw_pages) + --cur_order; + + /* + * We allocate with GFP_HIGHUSER because only the + * firmware is going to touch these pages, so there's + * no need for a kernel virtual address. We use + * __GFP_NOWARN because we'll deal with any allocation + * failures ourselves. + */ + mdev->fw.arbel.mem[num_ent].page = alloc_pages(GFP_HIGHUSER | __GFP_NOWARN, + cur_order); + mdev->fw.arbel.mem[num_ent].length = PAGE_SIZE << cur_order; + if (!mdev->fw.arbel.mem[num_ent].page) { + --cur_order; + if (cur_order < 0) { + mthca_err(mdev, "Couldn't allocate FW area, aborting.\n"); + err = -ENOMEM; + goto err_free; + } + } else { + ++num_ent; + fw_pages -= 1 << cur_order; + } + } + + num_sg = pci_map_sg(mdev->pdev, mdev->fw.arbel.mem, num_ent, + PCI_DMA_BIDIRECTIONAL); + if (num_sg <= 0) { + mthca_err(mdev, "Couldn't allocate FW area, aborting.\n"); + err = -ENOMEM; + goto err_free; + } + + err = mthca_MAP_FA(mdev, num_sg, mdev->fw.arbel.mem, &status); + if (err) { + mthca_err(mdev, "MAP_FA command failed, aborting.\n"); + goto err_unmap; + } + if (status) { + mthca_err(mdev, "MAP_FA returned status 0x%02x, aborting.\n", status); + err = -EINVAL; + goto err_unmap; + } + err = mthca_RUN_FW(mdev, &status); + if (err) { + mthca_err(mdev, "RUN_FW command failed, aborting.\n"); + goto err_unmap_fa; + } + if (status) { + mthca_err(mdev, "RUN_FW returned status 0x%02x, aborting.\n", status); + err = -EINVAL; + goto err_unmap_fa; + } + + return 0; + +err_unmap_fa: + mthca_UNMAP_FA(mdev, &status); + +err_unmap: + pci_unmap_sg(mdev->pdev, mdev->fw.arbel.mem, + mdev->fw.arbel.fw_pages, PCI_DMA_BIDIRECTIONAL); +err_free: + for (i = 0; i < mdev->fw.arbel.fw_pages; ++i) + if (mdev->fw.arbel.mem[i].page) + __free_pages(mdev->fw.arbel.mem[i].page, + get_order(mdev->fw.arbel.mem[i].length)); + kfree(mdev->fw.arbel.mem); + return err; +} + +static int __devinit mthca_init_arbel(struct mthca_dev *mdev) +{ + struct mthca_dev_lim dev_lim; + u8 status; + int err; + + err = mthca_QUERY_FW(mdev, &status); + if (err) { + mthca_err(mdev, "QUERY_FW command failed, aborting.\n"); + return err; + } + if (status) { + mthca_err(mdev, "QUERY_FW returned status 0x%02x, " + "aborting.\n", status); + return -EINVAL; + } + + err = mthca_ENABLE_LAM(mdev, &status); + if (err) { + mthca_err(mdev, "ENABLE_LAM command failed, aborting.\n"); + return err; + } + if (status == MTHCA_CMD_STAT_LAM_NOT_PRE) { + mthca_dbg(mdev, "No HCA-attached memory (running in MemFree mode)\n"); + mdev->mthca_flags |= MTHCA_FLAG_NO_LAM; + } else if (status) { + mthca_err(mdev, "ENABLE_LAM returned status 0x%02x, " + "aborting.\n", status); + return -EINVAL; + } + + err = mthca_load_fw(mdev); + if (err) { + mthca_err(mdev, "Failed to start FW, aborting.\n"); + goto err_out_disable; + } + + err = mthca_dev_lim(mdev, &dev_lim); + if (err) { + mthca_err(mdev, "QUERY_DEV_LIM command failed, aborting.\n"); + goto err_out_disable; + } + + mthca_warn(mdev, "Sorry, native MT25208 mode support is not done, " + "aborting.\n"); + err = -ENODEV; + +err_out_disable: + if (!(mdev->mthca_flags & MTHCA_FLAG_NO_LAM)) + mthca_DISABLE_LAM(mdev, &status); + return err; +} + +static int __devinit mthca_init_hca(struct mthca_dev *mdev) +{ + if (mdev->hca_type == ARBEL_NATIVE) + return mthca_init_arbel(mdev); + else + return mthca_init_tavor(mdev); +} + +static int __devinit mthca_setup_hca(struct mthca_dev *dev) +{ + int err; + + MTHCA_INIT_DOORBELL_LOCK(&dev->doorbell_lock); + + err = mthca_init_pd_table(dev); + if (err) { + mthca_err(dev, "Failed to initialize " + "protection domain table, aborting.\n"); + return err; + } + + err = mthca_init_mr_table(dev); + if (err) { + mthca_err(dev, "Failed to initialize " + "memory region table, aborting.\n"); + goto err_out_pd_table_free; + } + + err = mthca_pd_alloc(dev, &dev->driver_pd); + if (err) { + mthca_err(dev, "Failed to create driver PD, " + "aborting.\n"); + goto err_out_mr_table_free; + } + + err = mthca_init_eq_table(dev); + if (err) { + mthca_err(dev, "Failed to initialize " + "event queue table, aborting.\n"); + goto err_out_pd_free; + } + + err = mthca_cmd_use_events(dev); + if (err) { + mthca_err(dev, "Failed to switch to event-driven " + "firmware commands, aborting.\n"); + goto err_out_eq_table_free; + } + + err = mthca_init_cq_table(dev); + if (err) { + mthca_err(dev, "Failed to initialize " + "completion queue table, aborting.\n"); + goto err_out_cmd_poll; + } + + err = mthca_init_qp_table(dev); + if (err) { + mthca_err(dev, "Failed to initialize " + "queue pair table, aborting.\n"); + goto err_out_cq_table_free; + } + + err = mthca_init_av_table(dev); + if (err) { + mthca_err(dev, "Failed to initialize " + "address vector table, aborting.\n"); + goto err_out_qp_table_free; + } + + err = mthca_init_mcg_table(dev); + if (err) { + mthca_err(dev, "Failed to initialize " + "multicast group table, aborting.\n"); + goto err_out_av_table_free; + } + + return 0; + +err_out_av_table_free: + mthca_cleanup_av_table(dev); + +err_out_qp_table_free: + mthca_cleanup_qp_table(dev); + +err_out_cq_table_free: + mthca_cleanup_cq_table(dev); + +err_out_cmd_poll: + mthca_cmd_use_polling(dev); + +err_out_eq_table_free: + mthca_cleanup_eq_table(dev); + +err_out_pd_free: + mthca_pd_free(dev, &dev->driver_pd); + +err_out_mr_table_free: + mthca_cleanup_mr_table(dev); + +err_out_pd_table_free: + mthca_cleanup_pd_table(dev); + return err; +} + +static int __devinit mthca_request_regions(struct pci_dev *pdev, + int ddr_hidden) +{ + int err; + + /* + * We request our first BAR in two chunks, since the MSI-X + * vector table is right in the middle. + * + * This is why we can't just use pci_request_regions() -- if + * we did then setting up MSI-X would fail, since the PCI core + * wants to do request_mem_region on the MSI-X vector table. + */ + if (!request_mem_region(pci_resource_start(pdev, 0) + + MTHCA_HCR_BASE, + MTHCA_MAP_HCR_SIZE, + DRV_NAME)) + return -EBUSY; + + if (!request_mem_region(pci_resource_start(pdev, 0) + + MTHCA_CLR_INT_BASE, + MTHCA_CLR_INT_SIZE, + DRV_NAME)) { + err = -EBUSY; + goto err_out_bar0_beg; + } + + err = pci_request_region(pdev, 2, DRV_NAME); + if (err) + goto err_out_bar0_end; + + if (!ddr_hidden) { + err = pci_request_region(pdev, 4, DRV_NAME); + if (err) + goto err_out_bar2; + } + + return 0; + +err_out_bar0_beg: + release_mem_region(pci_resource_start(pdev, 0) + + MTHCA_HCR_BASE, + MTHCA_MAP_HCR_SIZE); + +err_out_bar0_end: + release_mem_region(pci_resource_start(pdev, 0) + + MTHCA_CLR_INT_BASE, + MTHCA_CLR_INT_SIZE); + +err_out_bar2: + pci_release_region(pdev, 2); + return err; +} + +static void mthca_release_regions(struct pci_dev *pdev, + int ddr_hidden) +{ + release_mem_region(pci_resource_start(pdev, 0) + + MTHCA_HCR_BASE, + MTHCA_MAP_HCR_SIZE); + release_mem_region(pci_resource_start(pdev, 0) + + MTHCA_CLR_INT_BASE, + MTHCA_CLR_INT_SIZE); + pci_release_region(pdev, 2); + if (!ddr_hidden) + pci_release_region(pdev, 4); +} + +static int __devinit mthca_enable_msi_x(struct mthca_dev *mdev) +{ + struct msix_entry entries[3]; + int err; + + entries[0].entry = 0; + entries[1].entry = 1; + entries[2].entry = 2; + + err = pci_enable_msix(mdev->pdev, entries, ARRAY_SIZE(entries)); + if (err) { + if (err > 0) + mthca_info(mdev, "Only %d MSI-X vectors available, " + "not using MSI-X\n", err); + return err; + } + + mdev->eq_table.eq[MTHCA_EQ_COMP ].msi_x_vector = entries[0].vector; + mdev->eq_table.eq[MTHCA_EQ_ASYNC].msi_x_vector = entries[1].vector; + mdev->eq_table.eq[MTHCA_EQ_CMD ].msi_x_vector = entries[2].vector; + + return 0; +} + +static void mthca_close_hca(struct mthca_dev *mdev) +{ + u8 status; + int i; + + mthca_CLOSE_HCA(mdev, 0, &status); + + if (mdev->hca_type == ARBEL_NATIVE) { + mthca_UNMAP_FA(mdev, &status); + + pci_unmap_sg(mdev->pdev, mdev->fw.arbel.mem, + mdev->fw.arbel.fw_pages, PCI_DMA_BIDIRECTIONAL); + + for (i = 0; i < mdev->fw.arbel.fw_pages; ++i) + if (mdev->fw.arbel.mem[i].page) + __free_pages(mdev->fw.arbel.mem[i].page, + get_order(mdev->fw.arbel.mem[i].length)); + kfree(mdev->fw.arbel.mem); + + if (!(mdev->mthca_flags & MTHCA_FLAG_NO_LAM)) + mthca_DISABLE_LAM(mdev, &status); + } else + mthca_SYS_DIS(mdev, &status); +} + +static int __devinit mthca_init_one(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + static int mthca_version_printed = 0; + int ddr_hidden = 0; + int err; + unsigned long mthca_base; + struct mthca_dev *mdev; + + if (!mthca_version_printed) { + printk(KERN_INFO "%s", mthca_version); + ++mthca_version_printed; + } + + printk(KERN_INFO PFX "Initializing %s (%s)\n", + pci_pretty_name(pdev), pci_name(pdev)); + + err = pci_enable_device(pdev); + if (err) { + dev_err(&pdev->dev, "Cannot enable PCI device, " + "aborting.\n"); + return err; + } + + /* + * Check for BARs. We expect 0: 1MB, 2: 8MB, 4: DDR (may not + * be present) + */ + if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM) || + pci_resource_len(pdev, 0) != 1 << 20) { + dev_err(&pdev->dev, "Missing DCS, aborting."); + err = -ENODEV; + goto err_out_disable_pdev; + } + if (!(pci_resource_flags(pdev, 2) & IORESOURCE_MEM) || + pci_resource_len(pdev, 2) != 1 << 23) { + dev_err(&pdev->dev, "Missing UAR, aborting."); + err = -ENODEV; + goto err_out_disable_pdev; + } + if (!(pci_resource_flags(pdev, 4) & IORESOURCE_MEM)) + ddr_hidden = 1; + + err = mthca_request_regions(pdev, ddr_hidden); + if (err) { + dev_err(&pdev->dev, "Cannot obtain PCI resources, " + "aborting.\n"); + goto err_out_disable_pdev; + } + + pci_set_master(pdev); + + err = pci_set_dma_mask(pdev, DMA_64BIT_MASK); + if (err) { + dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask.\n"); + err = pci_set_dma_mask(pdev, DMA_32BIT_MASK); + if (err) { + dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting.\n"); + goto err_out_free_res; + } + } + err = pci_set_consistent_dma_mask(pdev, DMA_64BIT_MASK); + if (err) { + dev_warn(&pdev->dev, "Warning: couldn't set 64-bit " + "consistent PCI DMA mask.\n"); + err = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK); + if (err) { + dev_err(&pdev->dev, "Can't set consistent PCI DMA mask, " + "aborting.\n"); + goto err_out_free_res; + } + } + + mdev = (struct mthca_dev *) ib_alloc_device(sizeof *mdev); + if (!mdev) { + dev_err(&pdev->dev, "Device struct alloc failed, " + "aborting.\n"); + err = -ENOMEM; + goto err_out_free_res; + } + + mdev->pdev = pdev; + mdev->hca_type = id->driver_data; + + if (ddr_hidden) + mdev->mthca_flags |= MTHCA_FLAG_DDR_HIDDEN; + + /* + * Now reset the HCA before we touch the PCI capabilities or + * attempt a firmware command, since a boot ROM may have left + * the HCA in an undefined state. + */ + err = mthca_reset(mdev); + if (err) { + mthca_err(mdev, "Failed to reset HCA, aborting.\n"); + goto err_out_free_dev; + } + + if (msi_x && !mthca_enable_msi_x(mdev)) + mdev->mthca_flags |= MTHCA_FLAG_MSI_X; + if (msi && !(mdev->mthca_flags & MTHCA_FLAG_MSI_X) && + !pci_enable_msi(pdev)) + mdev->mthca_flags |= MTHCA_FLAG_MSI; + + sema_init(&mdev->cmd.hcr_sem, 1); + sema_init(&mdev->cmd.poll_sem, 1); + mdev->cmd.use_events = 0; + + mthca_base = pci_resource_start(pdev, 0); + mdev->hcr = ioremap(mthca_base + MTHCA_HCR_BASE, MTHCA_MAP_HCR_SIZE); + if (!mdev->hcr) { + mthca_err(mdev, "Couldn't map command register, " + "aborting.\n"); + err = -ENOMEM; + goto err_out_free_dev; + } + mdev->clr_base = ioremap(mthca_base + MTHCA_CLR_INT_BASE, + MTHCA_CLR_INT_SIZE); + if (!mdev->clr_base) { + mthca_err(mdev, "Couldn't map command register, " + "aborting.\n"); + err = -ENOMEM; + goto err_out_iounmap; + } + + mthca_base = pci_resource_start(pdev, 2); + mdev->kar = ioremap(mthca_base + PAGE_SIZE * MTHCA_KAR_PAGE, PAGE_SIZE); + if (!mdev->kar) { + mthca_err(mdev, "Couldn't map kernel access region, " + "aborting.\n"); + err = -ENOMEM; + goto err_out_iounmap_clr; + } + + err = mthca_tune_pci(mdev); + if (err) + goto err_out_iounmap_kar; + + err = mthca_init_hca(mdev); + if (err) + goto err_out_iounmap_kar; + + err = mthca_setup_hca(mdev); + if (err) + goto err_out_close; + + err = mthca_register_device(mdev); + if (err) + goto err_out_cleanup; + + err = mthca_create_agents(mdev); + if (err) + goto err_out_unregister; + + pci_set_drvdata(pdev, mdev); + + return 0; + +err_out_unregister: + mthca_unregister_device(mdev); + +err_out_cleanup: + mthca_cleanup_mcg_table(mdev); + mthca_cleanup_av_table(mdev); + mthca_cleanup_qp_table(mdev); + mthca_cleanup_cq_table(mdev); + mthca_cmd_use_polling(mdev); + mthca_cleanup_eq_table(mdev); + + mthca_pd_free(mdev, &mdev->driver_pd); + + mthca_cleanup_mr_table(mdev); + mthca_cleanup_pd_table(mdev); + +err_out_close: + mthca_close_hca(mdev); + +err_out_iounmap_kar: + iounmap(mdev->kar); + +err_out_iounmap_clr: + iounmap(mdev->clr_base); + +err_out_iounmap: + iounmap(mdev->hcr); + +err_out_free_dev: + if (mdev->mthca_flags & MTHCA_FLAG_MSI_X) + pci_disable_msix(pdev); + if (mdev->mthca_flags & MTHCA_FLAG_MSI) + pci_disable_msi(pdev); + + ib_dealloc_device(&mdev->ib_dev); + +err_out_free_res: + mthca_release_regions(pdev, ddr_hidden); + +err_out_disable_pdev: + pci_disable_device(pdev); + pci_set_drvdata(pdev, NULL); + return err; +} + +static void __devexit mthca_remove_one(struct pci_dev *pdev) +{ + struct mthca_dev *mdev = pci_get_drvdata(pdev); + u8 status; + int p; + + if (mdev) { + mthca_free_agents(mdev); + mthca_unregister_device(mdev); + + for (p = 1; p <= mdev->limits.num_ports; ++p) + mthca_CLOSE_IB(mdev, p, &status); + + mthca_cleanup_mcg_table(mdev); + mthca_cleanup_av_table(mdev); + mthca_cleanup_qp_table(mdev); + mthca_cleanup_cq_table(mdev); + mthca_cmd_use_polling(mdev); + mthca_cleanup_eq_table(mdev); + + mthca_pd_free(mdev, &mdev->driver_pd); + + mthca_cleanup_mr_table(mdev); + mthca_cleanup_pd_table(mdev); + + mthca_close_hca(mdev); + + iounmap(mdev->hcr); + iounmap(mdev->clr_base); + + if (mdev->mthca_flags & MTHCA_FLAG_MSI_X) + pci_disable_msix(pdev); + if (mdev->mthca_flags & MTHCA_FLAG_MSI) + pci_disable_msi(pdev); + + ib_dealloc_device(&mdev->ib_dev); + mthca_release_regions(pdev, mdev->mthca_flags & + MTHCA_FLAG_DDR_HIDDEN); + pci_disable_device(pdev); + pci_set_drvdata(pdev, NULL); + } +} + +static struct pci_device_id mthca_pci_table[] = { + { PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_TAVOR), + .driver_data = TAVOR }, + { PCI_DEVICE(PCI_VENDOR_ID_TOPSPIN, PCI_DEVICE_ID_MELLANOX_TAVOR), + .driver_data = TAVOR }, + { PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_ARBEL_COMPAT), + .driver_data = ARBEL_COMPAT }, + { PCI_DEVICE(PCI_VENDOR_ID_TOPSPIN, PCI_DEVICE_ID_MELLANOX_ARBEL_COMPAT), + .driver_data = ARBEL_COMPAT }, + { PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_ARBEL), + .driver_data = ARBEL_NATIVE }, + { PCI_DEVICE(PCI_VENDOR_ID_TOPSPIN, PCI_DEVICE_ID_MELLANOX_ARBEL), + .driver_data = ARBEL_NATIVE }, + { 0, } +}; + +MODULE_DEVICE_TABLE(pci, mthca_pci_table); + +static struct pci_driver mthca_driver = { + .name = "ib_mthca", + .id_table = mthca_pci_table, + .probe = mthca_init_one, + .remove = __devexit_p(mthca_remove_one) +}; + +static int __init mthca_init(void) +{ + int ret; + + /* + * TODO: measure whether dynamically choosing doorbell code at + * runtime affects our performance. Is there a "magic" way to + * choose without having to follow a function pointer every + * time we ring a doorbell? + */ +#ifdef CONFIG_INFINIBAND_MTHCA_SSE_DOORBELL + if (!cpu_has_xmm) { + printk(KERN_ERR PFX "mthca was compiled with SSE doorbell code, but\n"); + printk(KERN_ERR PFX "the current CPU does not support SSE.\n"); + printk(KERN_ERR PFX "Turn off CONFIG_INFINIBAND_MTHCA_SSE_DOORBELL " + "and recompile.\n"); + return -ENODEV; + } +#endif + + ret = pci_register_driver(&mthca_driver); + return ret < 0 ? ret : 0; +} + +static void __exit mthca_cleanup(void) +{ + pci_unregister_driver(&mthca_driver); +} + +module_init(mthca_init); +module_exit(mthca_cleanup); diff --git a/drivers/infiniband/hw/mthca/mthca_mcg.c b/drivers/infiniband/hw/mthca/mthca_mcg.c new file mode 100644 index 000000000000..70a6553a588e --- /dev/null +++ b/drivers/infiniband/hw/mthca/mthca_mcg.c @@ -0,0 +1,376 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: mthca_mcg.c 1349 2004-12-16 21:09:43Z roland $ + */ + +#include <linux/init.h> + +#include "mthca_dev.h" +#include "mthca_cmd.h" + +enum { + MTHCA_QP_PER_MGM = 4 * (MTHCA_MGM_ENTRY_SIZE / 16 - 2) +}; + +struct mthca_mgm { + u32 next_gid_index; + u32 reserved[3]; + u8 gid[16]; + u32 qp[MTHCA_QP_PER_MGM]; +}; + +static const u8 zero_gid[16]; /* automatically initialized to 0 */ + +/* + * Caller must hold MCG table semaphore. gid and mgm parameters must + * be properly aligned for command interface. + * + * Returns 0 unless a firmware command error occurs. + * + * If GID is found in MGM or MGM is empty, *index = *hash, *prev = -1 + * and *mgm holds MGM entry. + * + * if GID is found in AMGM, *index = index in AMGM, *prev = index of + * previous entry in hash chain and *mgm holds AMGM entry. + * + * If no AMGM exists for given gid, *index = -1, *prev = index of last + * entry in hash chain and *mgm holds end of hash chain. + */ +static int find_mgm(struct mthca_dev *dev, + u8 *gid, struct mthca_mgm *mgm, + u16 *hash, int *prev, int *index) +{ + void *mailbox; + u8 *mgid; + int err; + u8 status; + + mailbox = kmalloc(16 + MTHCA_CMD_MAILBOX_EXTRA, GFP_KERNEL); + if (!mailbox) + return -ENOMEM; + mgid = MAILBOX_ALIGN(mailbox); + + memcpy(mgid, gid, 16); + + err = mthca_MGID_HASH(dev, mgid, hash, &status); + if (err) + goto out; + if (status) { + mthca_err(dev, "MGID_HASH returned status %02x\n", status); + err = -EINVAL; + goto out; + } + + if (0) + mthca_dbg(dev, "Hash for %04x:%04x:%04x:%04x:" + "%04x:%04x:%04x:%04x is %04x\n", + be16_to_cpu(((u16 *) gid)[0]), be16_to_cpu(((u16 *) gid)[1]), + be16_to_cpu(((u16 *) gid)[2]), be16_to_cpu(((u16 *) gid)[3]), + be16_to_cpu(((u16 *) gid)[4]), be16_to_cpu(((u16 *) gid)[5]), + be16_to_cpu(((u16 *) gid)[6]), be16_to_cpu(((u16 *) gid)[7]), + *hash); + + *index = *hash; + *prev = -1; + + do { + err = mthca_READ_MGM(dev, *index, mgm, &status); + if (err) + goto out; + if (status) { + mthca_err(dev, "READ_MGM returned status %02x\n", status); + return -EINVAL; + } + + if (!memcmp(mgm->gid, zero_gid, 16)) { + if (*index != *hash) { + mthca_err(dev, "Found zero MGID in AMGM.\n"); + err = -EINVAL; + } + goto out; + } + + if (!memcmp(mgm->gid, gid, 16)) + goto out; + + *prev = *index; + *index = be32_to_cpu(mgm->next_gid_index) >> 5; + } while (*index); + + *index = -1; + + out: + kfree(mailbox); + return err; +} + +int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) +{ + struct mthca_dev *dev = to_mdev(ibqp->device); + void *mailbox; + struct mthca_mgm *mgm; + u16 hash; + int index, prev; + int link = 0; + int i; + int err; + u8 status; + + mailbox = kmalloc(sizeof *mgm + MTHCA_CMD_MAILBOX_EXTRA, GFP_KERNEL); + if (!mailbox) + return -ENOMEM; + mgm = MAILBOX_ALIGN(mailbox); + + if (down_interruptible(&dev->mcg_table.sem)) + return -EINTR; + + err = find_mgm(dev, gid->raw, mgm, &hash, &prev, &index); + if (err) + goto out; + + if (index != -1) { + if (!memcmp(mgm->gid, zero_gid, 16)) + memcpy(mgm->gid, gid->raw, 16); + } else { + link = 1; + + index = mthca_alloc(&dev->mcg_table.alloc); + if (index == -1) { + mthca_err(dev, "No AMGM entries left\n"); + err = -ENOMEM; + goto out; + } + + err = mthca_READ_MGM(dev, index, mgm, &status); + if (err) + goto out; + if (status) { + mthca_err(dev, "READ_MGM returned status %02x\n", status); + err = -EINVAL; + goto out; + } + + memcpy(mgm->gid, gid->raw, 16); + mgm->next_gid_index = 0; + } + + for (i = 0; i < MTHCA_QP_PER_MGM; ++i) + if (!(mgm->qp[i] & cpu_to_be32(1 << 31))) { + mgm->qp[i] = cpu_to_be32(ibqp->qp_num | (1 << 31)); + break; + } + + if (i == MTHCA_QP_PER_MGM) { + mthca_err(dev, "MGM at index %x is full.\n", index); + err = -ENOMEM; + goto out; + } + + err = mthca_WRITE_MGM(dev, index, mgm, &status); + if (err) + goto out; + if (status) { + mthca_err(dev, "WRITE_MGM returned status %02x\n", status); + err = -EINVAL; + } + + if (!link) + goto out; + + err = mthca_READ_MGM(dev, prev, mgm, &status); + if (err) + goto out; + if (status) { + mthca_err(dev, "READ_MGM returned status %02x\n", status); + err = -EINVAL; + goto out; + } + + mgm->next_gid_index = cpu_to_be32(index << 5); + + err = mthca_WRITE_MGM(dev, prev, mgm, &status); + if (err) + goto out; + if (status) { + mthca_err(dev, "WRITE_MGM returned status %02x\n", status); + err = -EINVAL; + } + + out: + up(&dev->mcg_table.sem); + kfree(mailbox); + return err; +} + +int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) +{ + struct mthca_dev *dev = to_mdev(ibqp->device); + void *mailbox; + struct mthca_mgm *mgm; + u16 hash; + int prev, index; + int i, loc; + int err; + u8 status; + + mailbox = kmalloc(sizeof *mgm + MTHCA_CMD_MAILBOX_EXTRA, GFP_KERNEL); + if (!mailbox) + return -ENOMEM; + mgm = MAILBOX_ALIGN(mailbox); + + if (down_interruptible(&dev->mcg_table.sem)) + return -EINTR; + + err = find_mgm(dev, gid->raw, mgm, &hash, &prev, &index); + if (err) + goto out; + + if (index == -1) { + mthca_err(dev, "MGID %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x " + "not found\n", + be16_to_cpu(((u16 *) gid->raw)[0]), + be16_to_cpu(((u16 *) gid->raw)[1]), + be16_to_cpu(((u16 *) gid->raw)[2]), + be16_to_cpu(((u16 *) gid->raw)[3]), + be16_to_cpu(((u16 *) gid->raw)[4]), + be16_to_cpu(((u16 *) gid->raw)[5]), + be16_to_cpu(((u16 *) gid->raw)[6]), + be16_to_cpu(((u16 *) gid->raw)[7])); + err = -EINVAL; + goto out; + } + + for (loc = -1, i = 0; i < MTHCA_QP_PER_MGM; ++i) { + if (mgm->qp[i] == cpu_to_be32(ibqp->qp_num | (1 << 31))) + loc = i; + if (!(mgm->qp[i] & cpu_to_be32(1 << 31))) + break; + } + + if (loc == -1) { + mthca_err(dev, "QP %06x not found in MGM\n", ibqp->qp_num); + err = -EINVAL; + goto out; + } + + mgm->qp[loc] = mgm->qp[i - 1]; + mgm->qp[i - 1] = 0; + + err = mthca_WRITE_MGM(dev, index, mgm, &status); + if (err) + goto out; + if (status) { + mthca_err(dev, "WRITE_MGM returned status %02x\n", status); + err = -EINVAL; + goto out; + } + + if (i != 1) + goto out; + + goto out; + + if (prev == -1) { + /* Remove entry from MGM */ + if (be32_to_cpu(mgm->next_gid_index) >> 5) { + err = mthca_READ_MGM(dev, + be32_to_cpu(mgm->next_gid_index) >> 5, + mgm, &status); + if (err) + goto out; + if (status) { + mthca_err(dev, "READ_MGM returned status %02x\n", + status); + err = -EINVAL; + goto out; + } + } else + memset(mgm->gid, 0, 16); + + err = mthca_WRITE_MGM(dev, index, mgm, &status); + if (err) + goto out; + if (status) { + mthca_err(dev, "WRITE_MGM returned status %02x\n", status); + err = -EINVAL; + goto out; + } + } else { + /* Remove entry from AMGM */ + index = be32_to_cpu(mgm->next_gid_index) >> 5; + err = mthca_READ_MGM(dev, prev, mgm, &status); + if (err) + goto out; + if (status) { + mthca_err(dev, "READ_MGM returned status %02x\n", status); + err = -EINVAL; + goto out; + } + + mgm->next_gid_index = cpu_to_be32(index << 5); + + err = mthca_WRITE_MGM(dev, prev, mgm, &status); + if (err) + goto out; + if (status) { + mthca_err(dev, "WRITE_MGM returned status %02x\n", status); + err = -EINVAL; + goto out; + } + } + + out: + up(&dev->mcg_table.sem); + kfree(mailbox); + return err; +} + +int __devinit mthca_init_mcg_table(struct mthca_dev *dev) +{ + int err; + + err = mthca_alloc_init(&dev->mcg_table.alloc, + dev->limits.num_amgms, + dev->limits.num_amgms - 1, + 0); + if (err) + return err; + + init_MUTEX(&dev->mcg_table.sem); + + return 0; +} + +void __devexit mthca_cleanup_mcg_table(struct mthca_dev *dev) +{ + mthca_alloc_cleanup(&dev->mcg_table.alloc); +} diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c new file mode 100644 index 000000000000..869e70587e77 --- /dev/null +++ b/drivers/infiniband/hw/mthca/mthca_mr.c @@ -0,0 +1,396 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: mthca_mr.c 1349 2004-12-16 21:09:43Z roland $ + */ + +#include <linux/slab.h> +#include <linux/init.h> +#include <linux/errno.h> + +#include "mthca_dev.h" +#include "mthca_cmd.h" + +/* + * Must be packed because mtt_seg is 64 bits but only aligned to 32 bits. + */ +struct mthca_mpt_entry { + u32 flags; + u32 page_size; + u32 key; + u32 pd; + u64 start; + u64 length; + u32 lkey; + u32 window_count; + u32 window_count_limit; + u64 mtt_seg; + u32 reserved[3]; +} __attribute__((packed)); + +#define MTHCA_MPT_FLAG_SW_OWNS (0xfUL << 28) +#define MTHCA_MPT_FLAG_MIO (1 << 17) +#define MTHCA_MPT_FLAG_BIND_ENABLE (1 << 15) +#define MTHCA_MPT_FLAG_PHYSICAL (1 << 9) +#define MTHCA_MPT_FLAG_REGION (1 << 8) + +#define MTHCA_MTT_FLAG_PRESENT 1 + +/* + * Buddy allocator for MTT segments (currently not very efficient + * since it doesn't keep a free list and just searches linearly + * through the bitmaps) + */ + +static u32 mthca_alloc_mtt(struct mthca_dev *dev, int order) +{ + int o; + int m; + u32 seg; + + spin_lock(&dev->mr_table.mpt_alloc.lock); + + for (o = order; o <= dev->mr_table.max_mtt_order; ++o) { + m = 1 << (dev->mr_table.max_mtt_order - o); + seg = find_first_bit(dev->mr_table.mtt_buddy[o], m); + if (seg < m) + goto found; + } + + spin_unlock(&dev->mr_table.mpt_alloc.lock); + return -1; + + found: + clear_bit(seg, dev->mr_table.mtt_buddy[o]); + + while (o > order) { + --o; + seg <<= 1; + set_bit(seg ^ 1, dev->mr_table.mtt_buddy[o]); + } + + spin_unlock(&dev->mr_table.mpt_alloc.lock); + + seg <<= order; + + return seg; +} + +static void mthca_free_mtt(struct mthca_dev *dev, u32 seg, int order) +{ + seg >>= order; + + spin_lock(&dev->mr_table.mpt_alloc.lock); + + while (test_bit(seg ^ 1, dev->mr_table.mtt_buddy[order])) { + clear_bit(seg ^ 1, dev->mr_table.mtt_buddy[order]); + seg >>= 1; + ++order; + } + + set_bit(seg, dev->mr_table.mtt_buddy[order]); + + spin_unlock(&dev->mr_table.mpt_alloc.lock); +} + +int mthca_mr_alloc_notrans(struct mthca_dev *dev, u32 pd, + u32 access, struct mthca_mr *mr) +{ + void *mailbox; + struct mthca_mpt_entry *mpt_entry; + int err; + u8 status; + + might_sleep(); + + mr->order = -1; + mr->ibmr.lkey = mthca_alloc(&dev->mr_table.mpt_alloc); + if (mr->ibmr.lkey == -1) + return -ENOMEM; + mr->ibmr.rkey = mr->ibmr.lkey; + + mailbox = kmalloc(sizeof *mpt_entry + MTHCA_CMD_MAILBOX_EXTRA, + GFP_KERNEL); + if (!mailbox) { + mthca_free(&dev->mr_table.mpt_alloc, mr->ibmr.lkey); + return -ENOMEM; + } + mpt_entry = MAILBOX_ALIGN(mailbox); + + mpt_entry->flags = cpu_to_be32(MTHCA_MPT_FLAG_SW_OWNS | + MTHCA_MPT_FLAG_MIO | + MTHCA_MPT_FLAG_PHYSICAL | + MTHCA_MPT_FLAG_REGION | + access); + mpt_entry->page_size = 0; + mpt_entry->key = cpu_to_be32(mr->ibmr.lkey); + mpt_entry->pd = cpu_to_be32(pd); + mpt_entry->start = 0; + mpt_entry->length = ~0ULL; + + memset(&mpt_entry->lkey, 0, + sizeof *mpt_entry - offsetof(struct mthca_mpt_entry, lkey)); + + err = mthca_SW2HW_MPT(dev, mpt_entry, + mr->ibmr.lkey & (dev->limits.num_mpts - 1), + &status); + if (err) + mthca_warn(dev, "SW2HW_MPT failed (%d)\n", err); + else if (status) { + mthca_warn(dev, "SW2HW_MPT returned status 0x%02x\n", + status); + err = -EINVAL; + } + + kfree(mailbox); + return err; +} + +int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd, + u64 *buffer_list, int buffer_size_shift, + int list_len, u64 iova, u64 total_size, + u32 access, struct mthca_mr *mr) +{ + void *mailbox; + u64 *mtt_entry; + struct mthca_mpt_entry *mpt_entry; + int err = -ENOMEM; + u8 status; + int i; + + might_sleep(); + WARN_ON(buffer_size_shift >= 32); + + mr->ibmr.lkey = mthca_alloc(&dev->mr_table.mpt_alloc); + if (mr->ibmr.lkey == -1) + return -ENOMEM; + mr->ibmr.rkey = mr->ibmr.lkey; + + for (i = dev->limits.mtt_seg_size / 8, mr->order = 0; + i < list_len; + i <<= 1, ++mr->order) + /* nothing */ ; + + mr->first_seg = mthca_alloc_mtt(dev, mr->order); + if (mr->first_seg == -1) + goto err_out_mpt_free; + + /* + * If list_len is odd, we add one more dummy entry for + * firmware efficiency. + */ + mailbox = kmalloc(max(sizeof *mpt_entry, + (size_t) 8 * (list_len + (list_len & 1) + 2)) + + MTHCA_CMD_MAILBOX_EXTRA, + GFP_KERNEL); + if (!mailbox) + goto err_out_free_mtt; + + mtt_entry = MAILBOX_ALIGN(mailbox); + + mtt_entry[0] = cpu_to_be64(dev->mr_table.mtt_base + + mr->first_seg * dev->limits.mtt_seg_size); + mtt_entry[1] = 0; + for (i = 0; i < list_len; ++i) + mtt_entry[i + 2] = cpu_to_be64(buffer_list[i] | + MTHCA_MTT_FLAG_PRESENT); + if (list_len & 1) { + mtt_entry[i + 2] = 0; + ++list_len; + } + + if (0) { + mthca_dbg(dev, "Dumping MPT entry\n"); + for (i = 0; i < list_len + 2; ++i) + printk(KERN_ERR "[%2d] %016llx\n", + i, (unsigned long long) be64_to_cpu(mtt_entry[i])); + } + + err = mthca_WRITE_MTT(dev, mtt_entry, list_len, &status); + if (err) { + mthca_warn(dev, "WRITE_MTT failed (%d)\n", err); + goto err_out_mailbox_free; + } + if (status) { + mthca_warn(dev, "WRITE_MTT returned status 0x%02x\n", + status); + err = -EINVAL; + goto err_out_mailbox_free; + } + + mpt_entry = MAILBOX_ALIGN(mailbox); + + mpt_entry->flags = cpu_to_be32(MTHCA_MPT_FLAG_SW_OWNS | + MTHCA_MPT_FLAG_MIO | + MTHCA_MPT_FLAG_REGION | + access); + + mpt_entry->page_size = cpu_to_be32(buffer_size_shift - 12); + mpt_entry->key = cpu_to_be32(mr->ibmr.lkey); + mpt_entry->pd = cpu_to_be32(pd); + mpt_entry->start = cpu_to_be64(iova); + mpt_entry->length = cpu_to_be64(total_size); + memset(&mpt_entry->lkey, 0, + sizeof *mpt_entry - offsetof(struct mthca_mpt_entry, lkey)); + mpt_entry->mtt_seg = cpu_to_be64(dev->mr_table.mtt_base + + mr->first_seg * dev->limits.mtt_seg_size); + + if (0) { + mthca_dbg(dev, "Dumping MPT entry %08x:\n", mr->ibmr.lkey); + for (i = 0; i < sizeof (struct mthca_mpt_entry) / 4; ++i) { + if (i % 4 == 0) + printk("[%02x] ", i * 4); + printk(" %08x", be32_to_cpu(((u32 *) mpt_entry)[i])); + if ((i + 1) % 4 == 0) + printk("\n"); + } + } + + err = mthca_SW2HW_MPT(dev, mpt_entry, + mr->ibmr.lkey & (dev->limits.num_mpts - 1), + &status); + if (err) + mthca_warn(dev, "SW2HW_MPT failed (%d)\n", err); + else if (status) { + mthca_warn(dev, "SW2HW_MPT returned status 0x%02x\n", + status); + err = -EINVAL; + } + + kfree(mailbox); + return err; + + err_out_mailbox_free: + kfree(mailbox); + + err_out_free_mtt: + mthca_free_mtt(dev, mr->first_seg, mr->order); + + err_out_mpt_free: + mthca_free(&dev->mr_table.mpt_alloc, mr->ibmr.lkey); + return err; +} + +void mthca_free_mr(struct mthca_dev *dev, struct mthca_mr *mr) +{ + int err; + u8 status; + + might_sleep(); + + err = mthca_HW2SW_MPT(dev, NULL, + mr->ibmr.lkey & (dev->limits.num_mpts - 1), + &status); + if (err) + mthca_warn(dev, "HW2SW_MPT failed (%d)\n", err); + else if (status) + mthca_warn(dev, "HW2SW_MPT returned status 0x%02x\n", + status); + + if (mr->order >= 0) + mthca_free_mtt(dev, mr->first_seg, mr->order); + + mthca_free(&dev->mr_table.mpt_alloc, mr->ibmr.lkey); +} + +int __devinit mthca_init_mr_table(struct mthca_dev *dev) +{ + int err; + int i, s; + + err = mthca_alloc_init(&dev->mr_table.mpt_alloc, + dev->limits.num_mpts, + ~0, dev->limits.reserved_mrws); + if (err) + return err; + + err = -ENOMEM; + + for (i = 1, dev->mr_table.max_mtt_order = 0; + i < dev->limits.num_mtt_segs; + i <<= 1, ++dev->mr_table.max_mtt_order) + /* nothing */ ; + + dev->mr_table.mtt_buddy = kmalloc((dev->mr_table.max_mtt_order + 1) * + sizeof (long *), + GFP_KERNEL); + if (!dev->mr_table.mtt_buddy) + goto err_out; + + for (i = 0; i <= dev->mr_table.max_mtt_order; ++i) + dev->mr_table.mtt_buddy[i] = NULL; + + for (i = 0; i <= dev->mr_table.max_mtt_order; ++i) { + s = BITS_TO_LONGS(1 << (dev->mr_table.max_mtt_order - i)); + dev->mr_table.mtt_buddy[i] = kmalloc(s * sizeof (long), + GFP_KERNEL); + if (!dev->mr_table.mtt_buddy[i]) + goto err_out_free; + bitmap_zero(dev->mr_table.mtt_buddy[i], + 1 << (dev->mr_table.max_mtt_order - i)); + } + + set_bit(0, dev->mr_table.mtt_buddy[dev->mr_table.max_mtt_order]); + + for (i = 0; i < dev->mr_table.max_mtt_order; ++i) + if (1 << i >= dev->limits.reserved_mtts) + break; + + if (i == dev->mr_table.max_mtt_order) { + mthca_err(dev, "MTT table of order %d is " + "too small.\n", i); + goto err_out_free; + } + + (void) mthca_alloc_mtt(dev, i); + + return 0; + + err_out_free: + for (i = 0; i <= dev->mr_table.max_mtt_order; ++i) + kfree(dev->mr_table.mtt_buddy[i]); + + err_out: + mthca_alloc_cleanup(&dev->mr_table.mpt_alloc); + + return err; +} + +void __devexit mthca_cleanup_mr_table(struct mthca_dev *dev) +{ + int i; + + /* XXX check if any MRs are still allocated? */ + for (i = 0; i <= dev->mr_table.max_mtt_order; ++i) + kfree(dev->mr_table.mtt_buddy[i]); + kfree(dev->mr_table.mtt_buddy); + mthca_alloc_cleanup(&dev->mr_table.mpt_alloc); +} diff --git a/drivers/infiniband/hw/mthca/mthca_pd.c b/drivers/infiniband/hw/mthca/mthca_pd.c new file mode 100644 index 000000000000..ea66847e4ea3 --- /dev/null +++ b/drivers/infiniband/hw/mthca/mthca_pd.c @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: mthca_pd.c 1349 2004-12-16 21:09:43Z roland $ + */ + +#include <linux/init.h> +#include <linux/errno.h> + +#include "mthca_dev.h" + +int mthca_pd_alloc(struct mthca_dev *dev, struct mthca_pd *pd) +{ + int err; + + might_sleep(); + + atomic_set(&pd->sqp_count, 0); + pd->pd_num = mthca_alloc(&dev->pd_table.alloc); + if (pd->pd_num == -1) + return -ENOMEM; + + err = mthca_mr_alloc_notrans(dev, pd->pd_num, + MTHCA_MPT_FLAG_LOCAL_READ | + MTHCA_MPT_FLAG_LOCAL_WRITE, + &pd->ntmr); + if (err) + mthca_free(&dev->pd_table.alloc, pd->pd_num); + + return err; +} + +void mthca_pd_free(struct mthca_dev *dev, struct mthca_pd *pd) +{ + might_sleep(); + mthca_free_mr(dev, &pd->ntmr); + mthca_free(&dev->pd_table.alloc, pd->pd_num); +} + +int __devinit mthca_init_pd_table(struct mthca_dev *dev) +{ + return mthca_alloc_init(&dev->pd_table.alloc, + dev->limits.num_pds, + (1 << 24) - 1, + dev->limits.reserved_pds); +} + +void __devexit mthca_cleanup_pd_table(struct mthca_dev *dev) +{ + /* XXX check if any PDs are still allocated? */ + mthca_alloc_cleanup(&dev->pd_table.alloc); +} diff --git a/drivers/infiniband/hw/mthca/mthca_profile.c b/drivers/infiniband/hw/mthca/mthca_profile.c new file mode 100644 index 000000000000..701fff0e1906 --- /dev/null +++ b/drivers/infiniband/hw/mthca/mthca_profile.c @@ -0,0 +1,226 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: mthca_profile.c 1349 2004-12-16 21:09:43Z roland $ + */ + +#include <linux/module.h> +#include <linux/moduleparam.h> + +#include "mthca_profile.h" + +static int default_profile[MTHCA_RES_NUM] = { + [MTHCA_RES_QP] = 1 << 16, + [MTHCA_RES_EQP] = 1 << 16, + [MTHCA_RES_CQ] = 1 << 16, + [MTHCA_RES_EQ] = 32, + [MTHCA_RES_RDB] = 1 << 18, + [MTHCA_RES_MCG] = 1 << 13, + [MTHCA_RES_MPT] = 1 << 17, + [MTHCA_RES_MTT] = 1 << 20, + [MTHCA_RES_UDAV] = 1 << 15 +}; + +enum { + MTHCA_RDB_ENTRY_SIZE = 32, + MTHCA_MTT_SEG_SIZE = 64 +}; + +enum { + MTHCA_NUM_PDS = 1 << 15 +}; + +int mthca_make_profile(struct mthca_dev *dev, + struct mthca_dev_lim *dev_lim, + struct mthca_init_hca_param *init_hca) +{ + /* just use default profile for now */ + struct mthca_resource { + u64 size; + u64 start; + int type; + int num; + int log_num; + }; + + u64 total_size = 0; + struct mthca_resource *profile; + struct mthca_resource tmp; + int i, j; + + default_profile[MTHCA_RES_UAR] = dev_lim->uar_size / PAGE_SIZE; + + profile = kmalloc(MTHCA_RES_NUM * sizeof *profile, GFP_KERNEL); + if (!profile) + return -ENOMEM; + + profile[MTHCA_RES_QP].size = dev_lim->qpc_entry_sz; + profile[MTHCA_RES_EEC].size = dev_lim->eec_entry_sz; + profile[MTHCA_RES_SRQ].size = dev_lim->srq_entry_sz; + profile[MTHCA_RES_CQ].size = dev_lim->cqc_entry_sz; + profile[MTHCA_RES_EQP].size = dev_lim->eqpc_entry_sz; + profile[MTHCA_RES_EEEC].size = dev_lim->eeec_entry_sz; + profile[MTHCA_RES_EQ].size = dev_lim->eqc_entry_sz; + profile[MTHCA_RES_RDB].size = MTHCA_RDB_ENTRY_SIZE; + profile[MTHCA_RES_MCG].size = MTHCA_MGM_ENTRY_SIZE; + profile[MTHCA_RES_MPT].size = MTHCA_MPT_ENTRY_SIZE; + profile[MTHCA_RES_MTT].size = MTHCA_MTT_SEG_SIZE; + profile[MTHCA_RES_UAR].size = dev_lim->uar_scratch_entry_sz; + profile[MTHCA_RES_UDAV].size = MTHCA_AV_SIZE; + + for (i = 0; i < MTHCA_RES_NUM; ++i) { + profile[i].type = i; + profile[i].num = default_profile[i]; + profile[i].log_num = max(ffs(default_profile[i]) - 1, 0); + profile[i].size *= default_profile[i]; + } + + /* + * Sort the resources in decreasing order of size. Since they + * all have sizes that are powers of 2, we'll be able to keep + * resources aligned to their size and pack them without gaps + * using the sorted order. + */ + for (i = MTHCA_RES_NUM; i > 0; --i) + for (j = 1; j < i; ++j) { + if (profile[j].size > profile[j - 1].size) { + tmp = profile[j]; + profile[j] = profile[j - 1]; + profile[j - 1] = tmp; + } + } + + for (i = 0; i < MTHCA_RES_NUM; ++i) { + if (profile[i].size) { + profile[i].start = dev->ddr_start + total_size; + total_size += profile[i].size; + } + if (total_size > dev->fw.tavor.fw_start - dev->ddr_start) { + mthca_err(dev, "Profile requires 0x%llx bytes; " + "won't fit between DDR start at 0x%016llx " + "and FW start at 0x%016llx.\n", + (unsigned long long) total_size, + (unsigned long long) dev->ddr_start, + (unsigned long long) dev->fw.tavor.fw_start); + kfree(profile); + return -ENOMEM; + } + + if (profile[i].size) + mthca_dbg(dev, "profile[%2d]--%2d/%2d @ 0x%16llx " + "(size 0x%8llx)\n", + i, profile[i].type, profile[i].log_num, + (unsigned long long) profile[i].start, + (unsigned long long) profile[i].size); + } + + mthca_dbg(dev, "HCA memory: allocated %d KB/%d KB (%d KB free)\n", + (int) (total_size >> 10), + (int) ((dev->fw.tavor.fw_start - dev->ddr_start) >> 10), + (int) ((dev->fw.tavor.fw_start - dev->ddr_start - total_size) >> 10)); + + for (i = 0; i < MTHCA_RES_NUM; ++i) { + switch (profile[i].type) { + case MTHCA_RES_QP: + dev->limits.num_qps = profile[i].num; + init_hca->qpc_base = profile[i].start; + init_hca->log_num_qps = profile[i].log_num; + break; + case MTHCA_RES_EEC: + dev->limits.num_eecs = profile[i].num; + init_hca->eec_base = profile[i].start; + init_hca->log_num_eecs = profile[i].log_num; + break; + case MTHCA_RES_SRQ: + dev->limits.num_srqs = profile[i].num; + init_hca->srqc_base = profile[i].start; + init_hca->log_num_srqs = profile[i].log_num; + break; + case MTHCA_RES_CQ: + dev->limits.num_cqs = profile[i].num; + init_hca->cqc_base = profile[i].start; + init_hca->log_num_cqs = profile[i].log_num; + break; + case MTHCA_RES_EQP: + init_hca->eqpc_base = profile[i].start; + break; + case MTHCA_RES_EEEC: + init_hca->eeec_base = profile[i].start; + break; + case MTHCA_RES_EQ: + dev->limits.num_eqs = profile[i].num; + init_hca->eqc_base = profile[i].start; + init_hca->log_num_eqs = profile[i].log_num; + break; + case MTHCA_RES_RDB: + dev->limits.num_rdbs = profile[i].num; + init_hca->rdb_base = profile[i].start; + break; + case MTHCA_RES_MCG: + dev->limits.num_mgms = profile[i].num >> 1; + dev->limits.num_amgms = profile[i].num >> 1; + init_hca->mc_base = profile[i].start; + init_hca->log_mc_entry_sz = ffs(MTHCA_MGM_ENTRY_SIZE) - 1; + init_hca->log_mc_table_sz = profile[i].log_num; + init_hca->mc_hash_sz = 1 << (profile[i].log_num - 1); + break; + case MTHCA_RES_MPT: + dev->limits.num_mpts = profile[i].num; + init_hca->mpt_base = profile[i].start; + init_hca->log_mpt_sz = profile[i].log_num; + break; + case MTHCA_RES_MTT: + dev->limits.num_mtt_segs = profile[i].num; + dev->limits.mtt_seg_size = MTHCA_MTT_SEG_SIZE; + dev->mr_table.mtt_base = profile[i].start; + init_hca->mtt_base = profile[i].start; + init_hca->mtt_seg_sz = ffs(MTHCA_MTT_SEG_SIZE) - 7; + break; + case MTHCA_RES_UAR: + init_hca->uar_scratch_base = profile[i].start; + break; + case MTHCA_RES_UDAV: + dev->av_table.ddr_av_base = profile[i].start; + dev->av_table.num_ddr_avs = profile[i].num; + default: + break; + } + } + + /* + * PDs don't take any HCA memory, but we assign them as part + * of the HCA profile anyway. + */ + dev->limits.num_pds = MTHCA_NUM_PDS; + + kfree(profile); + return 0; +} diff --git a/drivers/infiniband/hw/mthca/mthca_profile.h b/drivers/infiniband/hw/mthca/mthca_profile.h new file mode 100644 index 000000000000..553cf2a64308 --- /dev/null +++ b/drivers/infiniband/hw/mthca/mthca_profile.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: mthca_profile.h 1349 2004-12-16 21:09:43Z roland $ + */ + +#ifndef MTHCA_PROFILE_H +#define MTHCA_PROFILE_H + +#include "mthca_dev.h" +#include "mthca_cmd.h" + +enum { + MTHCA_RES_QP, + MTHCA_RES_EEC, + MTHCA_RES_SRQ, + MTHCA_RES_CQ, + MTHCA_RES_EQP, + MTHCA_RES_EEEC, + MTHCA_RES_EQ, + MTHCA_RES_RDB, + MTHCA_RES_MCG, + MTHCA_RES_MPT, + MTHCA_RES_MTT, + MTHCA_RES_UAR, + MTHCA_RES_UDAV, + MTHCA_RES_NUM +}; + +int mthca_make_profile(struct mthca_dev *mdev, + struct mthca_dev_lim *dev_lim, + struct mthca_init_hca_param *init_hca); + +#endif /* MTHCA_PROFILE_H */ diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c new file mode 100644 index 000000000000..57427770702d --- /dev/null +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -0,0 +1,627 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: mthca_provider.c 1397 2004-12-28 05:09:00Z roland $ + */ + +#include <ib_smi.h> + +#include "mthca_dev.h" +#include "mthca_cmd.h" + +static int mthca_query_device(struct ib_device *ibdev, + struct ib_device_attr *props) +{ + struct ib_smp *in_mad = NULL; + struct ib_smp *out_mad = NULL; + int err = -ENOMEM; + u8 status; + + in_mad = kmalloc(sizeof *in_mad, GFP_KERNEL); + out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); + if (!in_mad || !out_mad) + goto out; + + props->fw_ver = to_mdev(ibdev)->fw_ver; + + memset(in_mad, 0, sizeof *in_mad); + in_mad->base_version = 1; + in_mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; + in_mad->class_version = 1; + in_mad->method = IB_MGMT_METHOD_GET; + in_mad->attr_id = IB_SMP_ATTR_NODE_INFO; + + err = mthca_MAD_IFC(to_mdev(ibdev), 1, + 1, in_mad, out_mad, + &status); + if (err) + goto out; + if (status) { + err = -EINVAL; + goto out; + } + + props->vendor_id = be32_to_cpup((u32 *) (out_mad->data + 36)) & + 0xffffff; + props->vendor_part_id = be16_to_cpup((u16 *) (out_mad->data + 30)); + props->hw_ver = be16_to_cpup((u16 *) (out_mad->data + 32)); + memcpy(&props->sys_image_guid, out_mad->data + 4, 8); + memcpy(&props->node_guid, out_mad->data + 12, 8); + + err = 0; + out: + kfree(in_mad); + kfree(out_mad); + return err; +} + +static int mthca_query_port(struct ib_device *ibdev, + u8 port, struct ib_port_attr *props) +{ + struct ib_smp *in_mad = NULL; + struct ib_smp *out_mad = NULL; + int err = -ENOMEM; + u8 status; + + in_mad = kmalloc(sizeof *in_mad, GFP_KERNEL); + out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); + if (!in_mad || !out_mad) + goto out; + + memset(in_mad, 0, sizeof *in_mad); + in_mad->base_version = 1; + in_mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; + in_mad->class_version = 1; + in_mad->method = IB_MGMT_METHOD_GET; + in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; + in_mad->attr_mod = cpu_to_be32(port); + + err = mthca_MAD_IFC(to_mdev(ibdev), 1, + port, in_mad, out_mad, + &status); + if (err) + goto out; + if (status) { + err = -EINVAL; + goto out; + } + + props->lid = be16_to_cpup((u16 *) (out_mad->data + 16)); + props->lmc = out_mad->data[34] & 0x7; + props->sm_lid = be16_to_cpup((u16 *) (out_mad->data + 18)); + props->sm_sl = out_mad->data[36] & 0xf; + props->state = out_mad->data[32] & 0xf; + props->port_cap_flags = be32_to_cpup((u32 *) (out_mad->data + 20)); + props->gid_tbl_len = to_mdev(ibdev)->limits.gid_table_len; + props->pkey_tbl_len = to_mdev(ibdev)->limits.pkey_table_len; + props->qkey_viol_cntr = be16_to_cpup((u16 *) (out_mad->data + 48)); + props->active_width = out_mad->data[31] & 0xf; + props->active_speed = out_mad->data[35] >> 4; + + out: + kfree(in_mad); + kfree(out_mad); + return err; +} + +static int mthca_modify_port(struct ib_device *ibdev, + u8 port, int port_modify_mask, + struct ib_port_modify *props) +{ + return 0; +} + +static int mthca_query_pkey(struct ib_device *ibdev, + u8 port, u16 index, u16 *pkey) +{ + struct ib_smp *in_mad = NULL; + struct ib_smp *out_mad = NULL; + int err = -ENOMEM; + u8 status; + + in_mad = kmalloc(sizeof *in_mad, GFP_KERNEL); + out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); + if (!in_mad || !out_mad) + goto out; + + memset(in_mad, 0, sizeof *in_mad); + in_mad->base_version = 1; + in_mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; + in_mad->class_version = 1; + in_mad->method = IB_MGMT_METHOD_GET; + in_mad->attr_id = IB_SMP_ATTR_PKEY_TABLE; + in_mad->attr_mod = cpu_to_be32(index / 32); + + err = mthca_MAD_IFC(to_mdev(ibdev), 1, + port, in_mad, out_mad, + &status); + if (err) + goto out; + if (status) { + err = -EINVAL; + goto out; + } + + *pkey = be16_to_cpu(((u16 *) out_mad->data)[index % 32]); + + out: + kfree(in_mad); + kfree(out_mad); + return err; +} + +static int mthca_query_gid(struct ib_device *ibdev, u8 port, + int index, union ib_gid *gid) +{ + struct ib_smp *in_mad = NULL; + struct ib_smp *out_mad = NULL; + int err = -ENOMEM; + u8 status; + + in_mad = kmalloc(sizeof *in_mad, GFP_KERNEL); + out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); + if (!in_mad || !out_mad) + goto out; + + memset(in_mad, 0, sizeof *in_mad); + in_mad->base_version = 1; + in_mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; + in_mad->class_version = 1; + in_mad->method = IB_MGMT_METHOD_GET; + in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; + in_mad->attr_mod = cpu_to_be32(port); + + err = mthca_MAD_IFC(to_mdev(ibdev), 1, + port, in_mad, out_mad, + &status); + if (err) + goto out; + if (status) { + err = -EINVAL; + goto out; + } + + memcpy(gid->raw, out_mad->data + 8, 8); + + memset(in_mad, 0, sizeof *in_mad); + in_mad->base_version = 1; + in_mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; + in_mad->class_version = 1; + in_mad->method = IB_MGMT_METHOD_GET; + in_mad->attr_id = IB_SMP_ATTR_GUID_INFO; + in_mad->attr_mod = cpu_to_be32(index / 8); + + err = mthca_MAD_IFC(to_mdev(ibdev), 1, + port, in_mad, out_mad, + &status); + if (err) + goto out; + if (status) { + err = -EINVAL; + goto out; + } + + memcpy(gid->raw + 8, out_mad->data + (index % 8) * 16, 8); + + out: + kfree(in_mad); + kfree(out_mad); + return err; +} + +static struct ib_pd *mthca_alloc_pd(struct ib_device *ibdev) +{ + struct mthca_pd *pd; + int err; + + pd = kmalloc(sizeof *pd, GFP_KERNEL); + if (!pd) + return ERR_PTR(-ENOMEM); + + err = mthca_pd_alloc(to_mdev(ibdev), pd); + if (err) { + kfree(pd); + return ERR_PTR(err); + } + + return &pd->ibpd; +} + +static int mthca_dealloc_pd(struct ib_pd *pd) +{ + mthca_pd_free(to_mdev(pd->device), to_mpd(pd)); + kfree(pd); + + return 0; +} + +static struct ib_ah *mthca_ah_create(struct ib_pd *pd, + struct ib_ah_attr *ah_attr) +{ + int err; + struct mthca_ah *ah; + + ah = kmalloc(sizeof *ah, GFP_KERNEL); + if (!ah) + return ERR_PTR(-ENOMEM); + + err = mthca_create_ah(to_mdev(pd->device), to_mpd(pd), ah_attr, ah); + if (err) { + kfree(ah); + return ERR_PTR(err); + } + + return &ah->ibah; +} + +static int mthca_ah_destroy(struct ib_ah *ah) +{ + mthca_destroy_ah(to_mdev(ah->device), to_mah(ah)); + kfree(ah); + + return 0; +} + +static struct ib_qp *mthca_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *init_attr) +{ + struct mthca_qp *qp; + int err; + + switch (init_attr->qp_type) { + case IB_QPT_RC: + case IB_QPT_UC: + case IB_QPT_UD: + { + qp = kmalloc(sizeof *qp, GFP_KERNEL); + if (!qp) + return ERR_PTR(-ENOMEM); + + qp->sq.max = init_attr->cap.max_send_wr; + qp->rq.max = init_attr->cap.max_recv_wr; + qp->sq.max_gs = init_attr->cap.max_send_sge; + qp->rq.max_gs = init_attr->cap.max_recv_sge; + + err = mthca_alloc_qp(to_mdev(pd->device), to_mpd(pd), + to_mcq(init_attr->send_cq), + to_mcq(init_attr->recv_cq), + init_attr->qp_type, init_attr->sq_sig_type, + init_attr->rq_sig_type, qp); + qp->ibqp.qp_num = qp->qpn; + break; + } + case IB_QPT_SMI: + case IB_QPT_GSI: + { + qp = kmalloc(sizeof (struct mthca_sqp), GFP_KERNEL); + if (!qp) + return ERR_PTR(-ENOMEM); + + qp->sq.max = init_attr->cap.max_send_wr; + qp->rq.max = init_attr->cap.max_recv_wr; + qp->sq.max_gs = init_attr->cap.max_send_sge; + qp->rq.max_gs = init_attr->cap.max_recv_sge; + + qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1; + + err = mthca_alloc_sqp(to_mdev(pd->device), to_mpd(pd), + to_mcq(init_attr->send_cq), + to_mcq(init_attr->recv_cq), + init_attr->sq_sig_type, init_attr->rq_sig_type, + qp->ibqp.qp_num, init_attr->port_num, + to_msqp(qp)); + break; + } + default: + /* Don't support raw QPs */ + return ERR_PTR(-ENOSYS); + } + + if (err) { + kfree(qp); + return ERR_PTR(err); + } + + init_attr->cap.max_inline_data = 0; + + return &qp->ibqp; +} + +static int mthca_destroy_qp(struct ib_qp *qp) +{ + mthca_free_qp(to_mdev(qp->device), to_mqp(qp)); + kfree(qp); + return 0; +} + +static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries) +{ + struct mthca_cq *cq; + int nent; + int err; + + cq = kmalloc(sizeof *cq, GFP_KERNEL); + if (!cq) + return ERR_PTR(-ENOMEM); + + for (nent = 1; nent <= entries; nent <<= 1) + ; /* nothing */ + + err = mthca_init_cq(to_mdev(ibdev), nent, cq); + if (err) { + kfree(cq); + cq = ERR_PTR(err); + } else + cq->ibcq.cqe = nent - 1; + + return &cq->ibcq; +} + +static int mthca_destroy_cq(struct ib_cq *cq) +{ + mthca_free_cq(to_mdev(cq->device), to_mcq(cq)); + kfree(cq); + + return 0; +} + +static int mthca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify notify) +{ + mthca_arm_cq(to_mdev(cq->device), to_mcq(cq), + notify == IB_CQ_SOLICITED); + return 0; +} + +static inline u32 convert_access(int acc) +{ + return (acc & IB_ACCESS_REMOTE_ATOMIC ? MTHCA_MPT_FLAG_ATOMIC : 0) | + (acc & IB_ACCESS_REMOTE_WRITE ? MTHCA_MPT_FLAG_REMOTE_WRITE : 0) | + (acc & IB_ACCESS_REMOTE_READ ? MTHCA_MPT_FLAG_REMOTE_READ : 0) | + (acc & IB_ACCESS_LOCAL_WRITE ? MTHCA_MPT_FLAG_LOCAL_WRITE : 0) | + MTHCA_MPT_FLAG_LOCAL_READ; +} + +static struct ib_mr *mthca_get_dma_mr(struct ib_pd *pd, int acc) +{ + struct mthca_mr *mr; + int err; + + mr = kmalloc(sizeof *mr, GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + err = mthca_mr_alloc_notrans(to_mdev(pd->device), + to_mpd(pd)->pd_num, + convert_access(acc), mr); + + if (err) { + kfree(mr); + return ERR_PTR(err); + } + + return &mr->ibmr; +} + +static struct ib_mr *mthca_reg_phys_mr(struct ib_pd *pd, + struct ib_phys_buf *buffer_list, + int num_phys_buf, + int acc, + u64 *iova_start) +{ + struct mthca_mr *mr; + u64 *page_list; + u64 total_size; + u64 mask; + int shift; + int npages; + int err; + int i, j, n; + + /* First check that we have enough alignment */ + if ((*iova_start & ~PAGE_MASK) != (buffer_list[0].addr & ~PAGE_MASK)) + return ERR_PTR(-EINVAL); + + if (num_phys_buf > 1 && + ((buffer_list[0].addr + buffer_list[0].size) & ~PAGE_MASK)) + return ERR_PTR(-EINVAL); + + mask = 0; + total_size = 0; + for (i = 0; i < num_phys_buf; ++i) { + if (buffer_list[i].addr & ~PAGE_MASK) + return ERR_PTR(-EINVAL); + if (i != 0 && i != num_phys_buf - 1 && + (buffer_list[i].size & ~PAGE_MASK)) + return ERR_PTR(-EINVAL); + + total_size += buffer_list[i].size; + if (i > 0) + mask |= buffer_list[i].addr; + } + + /* Find largest page shift we can use to cover buffers */ + for (shift = PAGE_SHIFT; shift < 31; ++shift) + if (num_phys_buf > 1) { + if ((1ULL << shift) & mask) + break; + } else { + if (1ULL << shift >= + buffer_list[0].size + + (buffer_list[0].addr & ((1ULL << shift) - 1))) + break; + } + + buffer_list[0].size += buffer_list[0].addr & ((1ULL << shift) - 1); + buffer_list[0].addr &= ~0ull << shift; + + mr = kmalloc(sizeof *mr, GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + npages = 0; + for (i = 0; i < num_phys_buf; ++i) + npages += (buffer_list[i].size + (1ULL << shift) - 1) >> shift; + + if (!npages) + return &mr->ibmr; + + page_list = kmalloc(npages * sizeof *page_list, GFP_KERNEL); + if (!page_list) { + kfree(mr); + return ERR_PTR(-ENOMEM); + } + + n = 0; + for (i = 0; i < num_phys_buf; ++i) + for (j = 0; + j < (buffer_list[i].size + (1ULL << shift) - 1) >> shift; + ++j) + page_list[n++] = buffer_list[i].addr + ((u64) j << shift); + + mthca_dbg(to_mdev(pd->device), "Registering memory at %llx (iova %llx) " + "in PD %x; shift %d, npages %d.\n", + (unsigned long long) buffer_list[0].addr, + (unsigned long long) *iova_start, + to_mpd(pd)->pd_num, + shift, npages); + + err = mthca_mr_alloc_phys(to_mdev(pd->device), + to_mpd(pd)->pd_num, + page_list, shift, npages, + *iova_start, total_size, + convert_access(acc), mr); + + if (err) { + kfree(mr); + return ERR_PTR(err); + } + + kfree(page_list); + return &mr->ibmr; +} + +static int mthca_dereg_mr(struct ib_mr *mr) +{ + mthca_free_mr(to_mdev(mr->device), to_mmr(mr)); + kfree(mr); + return 0; +} + +static ssize_t show_rev(struct class_device *cdev, char *buf) +{ + struct mthca_dev *dev = container_of(cdev, struct mthca_dev, ib_dev.class_dev); + return sprintf(buf, "%x\n", dev->rev_id); +} + +static ssize_t show_fw_ver(struct class_device *cdev, char *buf) +{ + struct mthca_dev *dev = container_of(cdev, struct mthca_dev, ib_dev.class_dev); + return sprintf(buf, "%x.%x.%x\n", (int) (dev->fw_ver >> 32), + (int) (dev->fw_ver >> 16) & 0xffff, + (int) dev->fw_ver & 0xffff); +} + +static ssize_t show_hca(struct class_device *cdev, char *buf) +{ + struct mthca_dev *dev = container_of(cdev, struct mthca_dev, ib_dev.class_dev); + switch (dev->hca_type) { + case TAVOR: return sprintf(buf, "MT23108\n"); + case ARBEL_COMPAT: return sprintf(buf, "MT25208 (MT23108 compat mode)\n"); + case ARBEL_NATIVE: return sprintf(buf, "MT25208\n"); + default: return sprintf(buf, "unknown\n"); + } +} + +static CLASS_DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); +static CLASS_DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); +static CLASS_DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); + +static struct class_device_attribute *mthca_class_attributes[] = { + &class_device_attr_hw_rev, + &class_device_attr_fw_ver, + &class_device_attr_hca_type +}; + +int mthca_register_device(struct mthca_dev *dev) +{ + int ret; + int i; + + strlcpy(dev->ib_dev.name, "mthca%d", IB_DEVICE_NAME_MAX); + dev->ib_dev.node_type = IB_NODE_CA; + dev->ib_dev.phys_port_cnt = dev->limits.num_ports; + dev->ib_dev.dma_device = &dev->pdev->dev; + dev->ib_dev.class_dev.dev = &dev->pdev->dev; + dev->ib_dev.query_device = mthca_query_device; + dev->ib_dev.query_port = mthca_query_port; + dev->ib_dev.modify_port = mthca_modify_port; + dev->ib_dev.query_pkey = mthca_query_pkey; + dev->ib_dev.query_gid = mthca_query_gid; + dev->ib_dev.alloc_pd = mthca_alloc_pd; + dev->ib_dev.dealloc_pd = mthca_dealloc_pd; + dev->ib_dev.create_ah = mthca_ah_create; + dev->ib_dev.destroy_ah = mthca_ah_destroy; + dev->ib_dev.create_qp = mthca_create_qp; + dev->ib_dev.modify_qp = mthca_modify_qp; + dev->ib_dev.destroy_qp = mthca_destroy_qp; + dev->ib_dev.post_send = mthca_post_send; + dev->ib_dev.post_recv = mthca_post_receive; + dev->ib_dev.create_cq = mthca_create_cq; + dev->ib_dev.destroy_cq = mthca_destroy_cq; + dev->ib_dev.poll_cq = mthca_poll_cq; + dev->ib_dev.req_notify_cq = mthca_req_notify_cq; + dev->ib_dev.get_dma_mr = mthca_get_dma_mr; + dev->ib_dev.reg_phys_mr = mthca_reg_phys_mr; + dev->ib_dev.dereg_mr = mthca_dereg_mr; + dev->ib_dev.attach_mcast = mthca_multicast_attach; + dev->ib_dev.detach_mcast = mthca_multicast_detach; + dev->ib_dev.process_mad = mthca_process_mad; + + ret = ib_register_device(&dev->ib_dev); + if (ret) + return ret; + + for (i = 0; i < ARRAY_SIZE(mthca_class_attributes); ++i) { + ret = class_device_create_file(&dev->ib_dev.class_dev, + mthca_class_attributes[i]); + if (ret) { + ib_unregister_device(&dev->ib_dev); + return ret; + } + } + + return 0; +} + +void mthca_unregister_device(struct mthca_dev *dev) +{ + ib_unregister_device(&dev->ib_dev); +} diff --git a/drivers/infiniband/hw/mthca/mthca_provider.h b/drivers/infiniband/hw/mthca/mthca_provider.h new file mode 100644 index 000000000000..e0cf07df5495 --- /dev/null +++ b/drivers/infiniband/hw/mthca/mthca_provider.h @@ -0,0 +1,225 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: mthca_provider.h 1349 2004-12-16 21:09:43Z roland $ + */ + +#ifndef MTHCA_PROVIDER_H +#define MTHCA_PROVIDER_H + +#include <ib_verbs.h> +#include <ib_pack.h> + +#define MTHCA_MPT_FLAG_ATOMIC (1 << 14) +#define MTHCA_MPT_FLAG_REMOTE_WRITE (1 << 13) +#define MTHCA_MPT_FLAG_REMOTE_READ (1 << 12) +#define MTHCA_MPT_FLAG_LOCAL_WRITE (1 << 11) +#define MTHCA_MPT_FLAG_LOCAL_READ (1 << 10) + +struct mthca_buf_list { + void *buf; + DECLARE_PCI_UNMAP_ADDR(mapping) +}; + +struct mthca_mr { + struct ib_mr ibmr; + int order; + u32 first_seg; +}; + +struct mthca_pd { + struct ib_pd ibpd; + u32 pd_num; + atomic_t sqp_count; + struct mthca_mr ntmr; +}; + +struct mthca_eq { + struct mthca_dev *dev; + int eqn; + u32 ecr_mask; + u16 msi_x_vector; + u16 msi_x_entry; + int have_irq; + int nent; + int cons_index; + struct mthca_buf_list *page_list; + struct mthca_mr mr; +}; + +struct mthca_av; + +struct mthca_ah { + struct ib_ah ibah; + int on_hca; + u32 key; + struct mthca_av *av; + dma_addr_t avdma; +}; + +/* + * Quick description of our CQ/QP locking scheme: + * + * We have one global lock that protects dev->cq/qp_table. Each + * struct mthca_cq/qp also has its own lock. An individual qp lock + * may be taken inside of an individual cq lock. Both cqs attached to + * a qp may be locked, with the send cq locked first. No other + * nesting should be done. + * + * Each struct mthca_cq/qp also has an atomic_t ref count. The + * pointer from the cq/qp_table to the struct counts as one reference. + * This reference also is good for access through the consumer API, so + * modifying the CQ/QP etc doesn't need to take another reference. + * Access because of a completion being polled does need a reference. + * + * Finally, each struct mthca_cq/qp has a wait_queue_head_t for the + * destroy function to sleep on. + * + * This means that access from the consumer API requires nothing but + * taking the struct's lock. + * + * Access because of a completion event should go as follows: + * - lock cq/qp_table and look up struct + * - increment ref count in struct + * - drop cq/qp_table lock + * - lock struct, do your thing, and unlock struct + * - decrement ref count; if zero, wake up waiters + * + * To destroy a CQ/QP, we can do the following: + * - lock cq/qp_table, remove pointer, unlock cq/qp_table lock + * - decrement ref count + * - wait_event until ref count is zero + * + * It is the consumer's responsibilty to make sure that no QP + * operations (WQE posting or state modification) are pending when the + * QP is destroyed. Also, the consumer must make sure that calls to + * qp_modify are serialized. + * + * Possible optimizations (wait for profile data to see if/where we + * have locks bouncing between CPUs): + * - split cq/qp table lock into n separate (cache-aligned) locks, + * indexed (say) by the page in the table + * - split QP struct lock into three (one for common info, one for the + * send queue and one for the receive queue) + */ + +struct mthca_cq { + struct ib_cq ibcq; + spinlock_t lock; + atomic_t refcount; + int cqn; + int cons_index; + int is_direct; + union { + struct mthca_buf_list direct; + struct mthca_buf_list *page_list; + } queue; + struct mthca_mr mr; + wait_queue_head_t wait; +}; + +struct mthca_wq { + int max; + int cur; + int next; + int last_comp; + void *last; + int max_gs; + int wqe_shift; + enum ib_sig_type policy; +}; + +struct mthca_qp { + struct ib_qp ibqp; + spinlock_t lock; + atomic_t refcount; + u32 qpn; + int transport; + enum ib_qp_state state; + int is_direct; + struct mthca_mr mr; + + struct mthca_wq rq; + struct mthca_wq sq; + int send_wqe_offset; + + u64 *wrid; + union { + struct mthca_buf_list direct; + struct mthca_buf_list *page_list; + } queue; + + wait_queue_head_t wait; +}; + +struct mthca_sqp { + struct mthca_qp qp; + int port; + int pkey_index; + u32 qkey; + u32 send_psn; + struct ib_ud_header ud_header; + int header_buf_size; + void *header_buf; + dma_addr_t header_dma; +}; + +static inline struct mthca_mr *to_mmr(struct ib_mr *ibmr) +{ + return container_of(ibmr, struct mthca_mr, ibmr); +} + +static inline struct mthca_pd *to_mpd(struct ib_pd *ibpd) +{ + return container_of(ibpd, struct mthca_pd, ibpd); +} + +static inline struct mthca_ah *to_mah(struct ib_ah *ibah) +{ + return container_of(ibah, struct mthca_ah, ibah); +} + +static inline struct mthca_cq *to_mcq(struct ib_cq *ibcq) +{ + return container_of(ibcq, struct mthca_cq, ibcq); +} + +static inline struct mthca_qp *to_mqp(struct ib_qp *ibqp) +{ + return container_of(ibqp, struct mthca_qp, ibqp); +} + +static inline struct mthca_sqp *to_msqp(struct mthca_qp *qp) +{ + return container_of(qp, struct mthca_sqp, qp); +} + +#endif /* MTHCA_PROVIDER_H */ diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c new file mode 100644 index 000000000000..2ad7332ebde9 --- /dev/null +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -0,0 +1,1536 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: mthca_qp.c 1355 2004-12-17 15:23:43Z roland $ + */ + +#include <linux/init.h> + +#include <ib_verbs.h> +#include <ib_cache.h> +#include <ib_pack.h> + +#include "mthca_dev.h" +#include "mthca_cmd.h" + +enum { + MTHCA_MAX_DIRECT_QP_SIZE = 4 * PAGE_SIZE, + MTHCA_ACK_REQ_FREQ = 10, + MTHCA_FLIGHT_LIMIT = 9, + MTHCA_UD_HEADER_SIZE = 72 /* largest UD header possible */ +}; + +enum { + MTHCA_QP_STATE_RST = 0, + MTHCA_QP_STATE_INIT = 1, + MTHCA_QP_STATE_RTR = 2, + MTHCA_QP_STATE_RTS = 3, + MTHCA_QP_STATE_SQE = 4, + MTHCA_QP_STATE_SQD = 5, + MTHCA_QP_STATE_ERR = 6, + MTHCA_QP_STATE_DRAINING = 7 +}; + +enum { + MTHCA_QP_ST_RC = 0x0, + MTHCA_QP_ST_UC = 0x1, + MTHCA_QP_ST_RD = 0x2, + MTHCA_QP_ST_UD = 0x3, + MTHCA_QP_ST_MLX = 0x7 +}; + +enum { + MTHCA_QP_PM_MIGRATED = 0x3, + MTHCA_QP_PM_ARMED = 0x0, + MTHCA_QP_PM_REARM = 0x1 +}; + +enum { + /* qp_context flags */ + MTHCA_QP_BIT_DE = 1 << 8, + /* params1 */ + MTHCA_QP_BIT_SRE = 1 << 15, + MTHCA_QP_BIT_SWE = 1 << 14, + MTHCA_QP_BIT_SAE = 1 << 13, + MTHCA_QP_BIT_SIC = 1 << 4, + MTHCA_QP_BIT_SSC = 1 << 3, + /* params2 */ + MTHCA_QP_BIT_RRE = 1 << 15, + MTHCA_QP_BIT_RWE = 1 << 14, + MTHCA_QP_BIT_RAE = 1 << 13, + MTHCA_QP_BIT_RIC = 1 << 4, + MTHCA_QP_BIT_RSC = 1 << 3 +}; + +struct mthca_qp_path { + u32 port_pkey; + u8 rnr_retry; + u8 g_mylmc; + u16 rlid; + u8 ackto; + u8 mgid_index; + u8 static_rate; + u8 hop_limit; + u32 sl_tclass_flowlabel; + u8 rgid[16]; +} __attribute__((packed)); + +struct mthca_qp_context { + u32 flags; + u32 sched_queue; + u32 mtu_msgmax; + u32 usr_page; + u32 local_qpn; + u32 remote_qpn; + u32 reserved1[2]; + struct mthca_qp_path pri_path; + struct mthca_qp_path alt_path; + u32 rdd; + u32 pd; + u32 wqe_base; + u32 wqe_lkey; + u32 params1; + u32 reserved2; + u32 next_send_psn; + u32 cqn_snd; + u32 next_snd_wqe[2]; + u32 last_acked_psn; + u32 ssn; + u32 params2; + u32 rnr_nextrecvpsn; + u32 ra_buff_indx; + u32 cqn_rcv; + u32 next_rcv_wqe[2]; + u32 qkey; + u32 srqn; + u32 rmsn; + u32 reserved3[19]; +} __attribute__((packed)); + +struct mthca_qp_param { + u32 opt_param_mask; + u32 reserved1; + struct mthca_qp_context context; + u32 reserved2[62]; +} __attribute__((packed)); + +enum { + MTHCA_QP_OPTPAR_ALT_ADDR_PATH = 1 << 0, + MTHCA_QP_OPTPAR_RRE = 1 << 1, + MTHCA_QP_OPTPAR_RAE = 1 << 2, + MTHCA_QP_OPTPAR_REW = 1 << 3, + MTHCA_QP_OPTPAR_PKEY_INDEX = 1 << 4, + MTHCA_QP_OPTPAR_Q_KEY = 1 << 5, + MTHCA_QP_OPTPAR_RNR_TIMEOUT = 1 << 6, + MTHCA_QP_OPTPAR_PRIMARY_ADDR_PATH = 1 << 7, + MTHCA_QP_OPTPAR_SRA_MAX = 1 << 8, + MTHCA_QP_OPTPAR_RRA_MAX = 1 << 9, + MTHCA_QP_OPTPAR_PM_STATE = 1 << 10, + MTHCA_QP_OPTPAR_PORT_NUM = 1 << 11, + MTHCA_QP_OPTPAR_RETRY_COUNT = 1 << 12, + MTHCA_QP_OPTPAR_ALT_RNR_RETRY = 1 << 13, + MTHCA_QP_OPTPAR_ACK_TIMEOUT = 1 << 14, + MTHCA_QP_OPTPAR_RNR_RETRY = 1 << 15, + MTHCA_QP_OPTPAR_SCHED_QUEUE = 1 << 16 +}; + +enum { + MTHCA_OPCODE_NOP = 0x00, + MTHCA_OPCODE_RDMA_WRITE = 0x08, + MTHCA_OPCODE_RDMA_WRITE_IMM = 0x09, + MTHCA_OPCODE_SEND = 0x0a, + MTHCA_OPCODE_SEND_IMM = 0x0b, + MTHCA_OPCODE_RDMA_READ = 0x10, + MTHCA_OPCODE_ATOMIC_CS = 0x11, + MTHCA_OPCODE_ATOMIC_FA = 0x12, + MTHCA_OPCODE_BIND_MW = 0x18, + MTHCA_OPCODE_INVALID = 0xff +}; + +enum { + MTHCA_NEXT_DBD = 1 << 7, + MTHCA_NEXT_FENCE = 1 << 6, + MTHCA_NEXT_CQ_UPDATE = 1 << 3, + MTHCA_NEXT_EVENT_GEN = 1 << 2, + MTHCA_NEXT_SOLICIT = 1 << 1, + + MTHCA_MLX_VL15 = 1 << 17, + MTHCA_MLX_SLR = 1 << 16 +}; + +struct mthca_next_seg { + u32 nda_op; /* [31:6] next WQE [4:0] next opcode */ + u32 ee_nds; /* [31:8] next EE [7] DBD [6] F [5:0] next WQE size */ + u32 flags; /* [3] CQ [2] Event [1] Solicit */ + u32 imm; /* immediate data */ +}; + +struct mthca_ud_seg { + u32 reserved1; + u32 lkey; + u64 av_addr; + u32 reserved2[4]; + u32 dqpn; + u32 qkey; + u32 reserved3[2]; +}; + +struct mthca_bind_seg { + u32 flags; /* [31] Atomic [30] rem write [29] rem read */ + u32 reserved; + u32 new_rkey; + u32 lkey; + u64 addr; + u64 length; +}; + +struct mthca_raddr_seg { + u64 raddr; + u32 rkey; + u32 reserved; +}; + +struct mthca_atomic_seg { + u64 swap_add; + u64 compare; +}; + +struct mthca_data_seg { + u32 byte_count; + u32 lkey; + u64 addr; +}; + +struct mthca_mlx_seg { + u32 nda_op; + u32 nds; + u32 flags; /* [17] VL15 [16] SLR [14:12] static rate + [11:8] SL [3] C [2] E */ + u16 rlid; + u16 vcrc; +}; + +static int is_sqp(struct mthca_dev *dev, struct mthca_qp *qp) +{ + return qp->qpn >= dev->qp_table.sqp_start && + qp->qpn <= dev->qp_table.sqp_start + 3; +} + +static int is_qp0(struct mthca_dev *dev, struct mthca_qp *qp) +{ + return qp->qpn >= dev->qp_table.sqp_start && + qp->qpn <= dev->qp_table.sqp_start + 1; +} + +static void *get_recv_wqe(struct mthca_qp *qp, int n) +{ + if (qp->is_direct) + return qp->queue.direct.buf + (n << qp->rq.wqe_shift); + else + return qp->queue.page_list[(n << qp->rq.wqe_shift) >> PAGE_SHIFT].buf + + ((n << qp->rq.wqe_shift) & (PAGE_SIZE - 1)); +} + +static void *get_send_wqe(struct mthca_qp *qp, int n) +{ + if (qp->is_direct) + return qp->queue.direct.buf + qp->send_wqe_offset + + (n << qp->sq.wqe_shift); + else + return qp->queue.page_list[(qp->send_wqe_offset + + (n << qp->sq.wqe_shift)) >> + PAGE_SHIFT].buf + + ((qp->send_wqe_offset + (n << qp->sq.wqe_shift)) & + (PAGE_SIZE - 1)); +} + +void mthca_qp_event(struct mthca_dev *dev, u32 qpn, + enum ib_event_type event_type) +{ + struct mthca_qp *qp; + struct ib_event event; + + spin_lock(&dev->qp_table.lock); + qp = mthca_array_get(&dev->qp_table.qp, qpn & (dev->limits.num_qps - 1)); + if (qp) + atomic_inc(&qp->refcount); + spin_unlock(&dev->qp_table.lock); + + if (!qp) { + mthca_warn(dev, "Async event for bogus QP %08x\n", qpn); + return; + } + + event.device = &dev->ib_dev; + event.event = event_type; + event.element.qp = &qp->ibqp; + if (qp->ibqp.event_handler) + qp->ibqp.event_handler(&event, qp->ibqp.qp_context); + + if (atomic_dec_and_test(&qp->refcount)) + wake_up(&qp->wait); +} + +static int to_mthca_state(enum ib_qp_state ib_state) +{ + switch (ib_state) { + case IB_QPS_RESET: return MTHCA_QP_STATE_RST; + case IB_QPS_INIT: return MTHCA_QP_STATE_INIT; + case IB_QPS_RTR: return MTHCA_QP_STATE_RTR; + case IB_QPS_RTS: return MTHCA_QP_STATE_RTS; + case IB_QPS_SQD: return MTHCA_QP_STATE_SQD; + case IB_QPS_SQE: return MTHCA_QP_STATE_SQE; + case IB_QPS_ERR: return MTHCA_QP_STATE_ERR; + default: return -1; + } +} + +enum { RC, UC, UD, RD, RDEE, MLX, NUM_TRANS }; + +static int to_mthca_st(int transport) +{ + switch (transport) { + case RC: return MTHCA_QP_ST_RC; + case UC: return MTHCA_QP_ST_UC; + case UD: return MTHCA_QP_ST_UD; + case RD: return MTHCA_QP_ST_RD; + case MLX: return MTHCA_QP_ST_MLX; + default: return -1; + } +} + +static const struct { + int trans; + u32 req_param[NUM_TRANS]; + u32 opt_param[NUM_TRANS]; +} state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = { + [IB_QPS_RESET] = { + [IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST }, + [IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR }, + [IB_QPS_INIT] = { + .trans = MTHCA_TRANS_RST2INIT, + .req_param = { + [UD] = (IB_QP_PKEY_INDEX | + IB_QP_PORT | + IB_QP_QKEY), + [RC] = (IB_QP_PKEY_INDEX | + IB_QP_PORT | + IB_QP_ACCESS_FLAGS), + [MLX] = (IB_QP_PKEY_INDEX | + IB_QP_QKEY), + }, + /* bug-for-bug compatibility with VAPI: */ + .opt_param = { + [MLX] = IB_QP_PORT + } + }, + }, + [IB_QPS_INIT] = { + [IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST }, + [IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR }, + [IB_QPS_INIT] = { + .trans = MTHCA_TRANS_INIT2INIT, + .opt_param = { + [UD] = (IB_QP_PKEY_INDEX | + IB_QP_PORT | + IB_QP_QKEY), + [RC] = (IB_QP_PKEY_INDEX | + IB_QP_PORT | + IB_QP_ACCESS_FLAGS), + [MLX] = (IB_QP_PKEY_INDEX | + IB_QP_QKEY), + } + }, + [IB_QPS_RTR] = { + .trans = MTHCA_TRANS_INIT2RTR, + .req_param = { + [RC] = (IB_QP_AV | + IB_QP_PATH_MTU | + IB_QP_DEST_QPN | + IB_QP_RQ_PSN | + IB_QP_MAX_DEST_RD_ATOMIC | + IB_QP_MIN_RNR_TIMER), + }, + .opt_param = { + [UD] = (IB_QP_PKEY_INDEX | + IB_QP_QKEY), + [RC] = (IB_QP_ALT_PATH | + IB_QP_ACCESS_FLAGS | + IB_QP_PKEY_INDEX), + [MLX] = (IB_QP_PKEY_INDEX | + IB_QP_QKEY), + } + } + }, + [IB_QPS_RTR] = { + [IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST }, + [IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR }, + [IB_QPS_RTS] = { + .trans = MTHCA_TRANS_RTR2RTS, + .req_param = { + [UD] = IB_QP_SQ_PSN, + [RC] = (IB_QP_TIMEOUT | + IB_QP_RETRY_CNT | + IB_QP_RNR_RETRY | + IB_QP_SQ_PSN | + IB_QP_MAX_QP_RD_ATOMIC), + [MLX] = IB_QP_SQ_PSN, + }, + .opt_param = { + [UD] = (IB_QP_CUR_STATE | + IB_QP_QKEY), + [RC] = (IB_QP_CUR_STATE | + IB_QP_ALT_PATH | + IB_QP_ACCESS_FLAGS | + IB_QP_PKEY_INDEX | + IB_QP_MIN_RNR_TIMER | + IB_QP_PATH_MIG_STATE), + [MLX] = (IB_QP_CUR_STATE | + IB_QP_QKEY), + } + } + }, + [IB_QPS_RTS] = { + [IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST }, + [IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR }, + [IB_QPS_RTS] = { + .trans = MTHCA_TRANS_RTS2RTS, + .opt_param = { + [UD] = (IB_QP_CUR_STATE | + IB_QP_QKEY), + [RC] = (IB_QP_ACCESS_FLAGS | + IB_QP_ALT_PATH | + IB_QP_PATH_MIG_STATE | + IB_QP_MIN_RNR_TIMER), + [MLX] = (IB_QP_CUR_STATE | + IB_QP_QKEY), + } + }, + [IB_QPS_SQD] = { + .trans = MTHCA_TRANS_RTS2SQD, + }, + }, + [IB_QPS_SQD] = { + [IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST }, + [IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR }, + [IB_QPS_RTS] = { + .trans = MTHCA_TRANS_SQD2RTS, + .opt_param = { + [UD] = (IB_QP_CUR_STATE | + IB_QP_QKEY), + [RC] = (IB_QP_CUR_STATE | + IB_QP_ALT_PATH | + IB_QP_ACCESS_FLAGS | + IB_QP_MIN_RNR_TIMER | + IB_QP_PATH_MIG_STATE), + [MLX] = (IB_QP_CUR_STATE | + IB_QP_QKEY), + } + }, + [IB_QPS_SQD] = { + .trans = MTHCA_TRANS_SQD2SQD, + .opt_param = { + [UD] = (IB_QP_PKEY_INDEX | + IB_QP_QKEY), + [RC] = (IB_QP_AV | + IB_QP_TIMEOUT | + IB_QP_RETRY_CNT | + IB_QP_RNR_RETRY | + IB_QP_MAX_QP_RD_ATOMIC | + IB_QP_MAX_DEST_RD_ATOMIC | + IB_QP_CUR_STATE | + IB_QP_ALT_PATH | + IB_QP_ACCESS_FLAGS | + IB_QP_PKEY_INDEX | + IB_QP_MIN_RNR_TIMER | + IB_QP_PATH_MIG_STATE), + [MLX] = (IB_QP_PKEY_INDEX | + IB_QP_QKEY), + } + } + }, + [IB_QPS_SQE] = { + [IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST }, + [IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR }, + [IB_QPS_RTS] = { + .trans = MTHCA_TRANS_SQERR2RTS, + .opt_param = { + [UD] = (IB_QP_CUR_STATE | + IB_QP_QKEY), + [RC] = (IB_QP_CUR_STATE | + IB_QP_MIN_RNR_TIMER), + [MLX] = (IB_QP_CUR_STATE | + IB_QP_QKEY), + } + } + }, + [IB_QPS_ERR] = { + [IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST }, + [IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR } + } +}; + +static void store_attrs(struct mthca_sqp *sqp, struct ib_qp_attr *attr, + int attr_mask) +{ + if (attr_mask & IB_QP_PKEY_INDEX) + sqp->pkey_index = attr->pkey_index; + if (attr_mask & IB_QP_QKEY) + sqp->qkey = attr->qkey; + if (attr_mask & IB_QP_SQ_PSN) + sqp->send_psn = attr->sq_psn; +} + +static void init_port(struct mthca_dev *dev, int port) +{ + int err; + u8 status; + struct mthca_init_ib_param param; + + memset(¶m, 0, sizeof param); + + param.enable_1x = 1; + param.enable_4x = 1; + param.vl_cap = dev->limits.vl_cap; + param.mtu_cap = dev->limits.mtu_cap; + param.gid_cap = dev->limits.gid_table_len; + param.pkey_cap = dev->limits.pkey_table_len; + + err = mthca_INIT_IB(dev, ¶m, port, &status); + if (err) + mthca_warn(dev, "INIT_IB failed, return code %d.\n", err); + if (status) + mthca_warn(dev, "INIT_IB returned status %02x.\n", status); +} + +int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) +{ + struct mthca_dev *dev = to_mdev(ibqp->device); + struct mthca_qp *qp = to_mqp(ibqp); + enum ib_qp_state cur_state, new_state; + void *mailbox = NULL; + struct mthca_qp_param *qp_param; + struct mthca_qp_context *qp_context; + u32 req_param, opt_param; + u8 status; + int err; + + if (attr_mask & IB_QP_CUR_STATE) { + if (attr->cur_qp_state != IB_QPS_RTR && + attr->cur_qp_state != IB_QPS_RTS && + attr->cur_qp_state != IB_QPS_SQD && + attr->cur_qp_state != IB_QPS_SQE) + return -EINVAL; + else + cur_state = attr->cur_qp_state; + } else { + spin_lock_irq(&qp->lock); + cur_state = qp->state; + spin_unlock_irq(&qp->lock); + } + + if (attr_mask & IB_QP_STATE) { + if (attr->qp_state < 0 || attr->qp_state > IB_QPS_ERR) + return -EINVAL; + new_state = attr->qp_state; + } else + new_state = cur_state; + + if (state_table[cur_state][new_state].trans == MTHCA_TRANS_INVALID) { + mthca_dbg(dev, "Illegal QP transition " + "%d->%d\n", cur_state, new_state); + return -EINVAL; + } + + req_param = state_table[cur_state][new_state].req_param[qp->transport]; + opt_param = state_table[cur_state][new_state].opt_param[qp->transport]; + + if ((req_param & attr_mask) != req_param) { + mthca_dbg(dev, "QP transition " + "%d->%d missing req attr 0x%08x\n", + cur_state, new_state, + req_param & ~attr_mask); + return -EINVAL; + } + + if (attr_mask & ~(req_param | opt_param | IB_QP_STATE)) { + mthca_dbg(dev, "QP transition (transport %d) " + "%d->%d has extra attr 0x%08x\n", + qp->transport, + cur_state, new_state, + attr_mask & ~(req_param | opt_param | + IB_QP_STATE)); + return -EINVAL; + } + + mailbox = kmalloc(sizeof (*qp_param) + MTHCA_CMD_MAILBOX_EXTRA, GFP_KERNEL); + if (!mailbox) + return -ENOMEM; + qp_param = MAILBOX_ALIGN(mailbox); + qp_context = &qp_param->context; + memset(qp_param, 0, sizeof *qp_param); + + qp_context->flags = cpu_to_be32((to_mthca_state(new_state) << 28) | + (to_mthca_st(qp->transport) << 16)); + qp_context->flags |= cpu_to_be32(MTHCA_QP_BIT_DE); + if (!(attr_mask & IB_QP_PATH_MIG_STATE)) + qp_context->flags |= cpu_to_be32(MTHCA_QP_PM_MIGRATED << 11); + else { + qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PM_STATE); + switch (attr->path_mig_state) { + case IB_MIG_MIGRATED: + qp_context->flags |= cpu_to_be32(MTHCA_QP_PM_MIGRATED << 11); + break; + case IB_MIG_REARM: + qp_context->flags |= cpu_to_be32(MTHCA_QP_PM_REARM << 11); + break; + case IB_MIG_ARMED: + qp_context->flags |= cpu_to_be32(MTHCA_QP_PM_ARMED << 11); + break; + } + } + /* leave sched_queue as 0 */ + if (qp->transport == MLX || qp->transport == UD) + qp_context->mtu_msgmax = cpu_to_be32((IB_MTU_2048 << 29) | + (11 << 24)); + else if (attr_mask & IB_QP_PATH_MTU) { + qp_context->mtu_msgmax = cpu_to_be32((attr->path_mtu << 29) | + (31 << 24)); + } + qp_context->usr_page = cpu_to_be32(MTHCA_KAR_PAGE); + qp_context->local_qpn = cpu_to_be32(qp->qpn); + if (attr_mask & IB_QP_DEST_QPN) { + qp_context->remote_qpn = cpu_to_be32(attr->dest_qp_num); + } + + if (qp->transport == MLX) + qp_context->pri_path.port_pkey |= + cpu_to_be32(to_msqp(qp)->port << 24); + else { + if (attr_mask & IB_QP_PORT) { + qp_context->pri_path.port_pkey |= + cpu_to_be32(attr->port_num << 24); + qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PORT_NUM); + } + } + + if (attr_mask & IB_QP_PKEY_INDEX) { + qp_context->pri_path.port_pkey |= + cpu_to_be32(attr->pkey_index); + qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PKEY_INDEX); + } + + if (attr_mask & IB_QP_RNR_RETRY) { + qp_context->pri_path.rnr_retry = attr->rnr_retry << 5; + qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RNR_RETRY); + } + + if (attr_mask & IB_QP_AV) { + qp_context->pri_path.g_mylmc = attr->ah_attr.src_path_bits & 0x7f; + qp_context->pri_path.rlid = cpu_to_be16(attr->ah_attr.dlid); + qp_context->pri_path.static_rate = (!!attr->ah_attr.static_rate) << 3; + if (attr->ah_attr.ah_flags & IB_AH_GRH) { + qp_context->pri_path.g_mylmc |= 1 << 7; + qp_context->pri_path.mgid_index = attr->ah_attr.grh.sgid_index; + qp_context->pri_path.hop_limit = attr->ah_attr.grh.hop_limit; + qp_context->pri_path.sl_tclass_flowlabel = + cpu_to_be32((attr->ah_attr.sl << 28) | + (attr->ah_attr.grh.traffic_class << 20) | + (attr->ah_attr.grh.flow_label)); + memcpy(qp_context->pri_path.rgid, + attr->ah_attr.grh.dgid.raw, 16); + } else { + qp_context->pri_path.sl_tclass_flowlabel = + cpu_to_be32(attr->ah_attr.sl << 28); + } + qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PRIMARY_ADDR_PATH); + } + + if (attr_mask & IB_QP_TIMEOUT) { + qp_context->pri_path.ackto = attr->timeout; + qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_ACK_TIMEOUT); + } + + /* XXX alt_path */ + + /* leave rdd as 0 */ + qp_context->pd = cpu_to_be32(to_mpd(ibqp->pd)->pd_num); + /* leave wqe_base as 0 (we always create an MR based at 0 for WQs) */ + qp_context->wqe_lkey = cpu_to_be32(qp->mr.ibmr.lkey); + qp_context->params1 = cpu_to_be32((MTHCA_ACK_REQ_FREQ << 28) | + (MTHCA_FLIGHT_LIMIT << 24) | + MTHCA_QP_BIT_SRE | + MTHCA_QP_BIT_SWE | + MTHCA_QP_BIT_SAE); + if (qp->sq.policy == IB_SIGNAL_ALL_WR) + qp_context->params1 |= cpu_to_be32(MTHCA_QP_BIT_SSC); + if (attr_mask & IB_QP_RETRY_CNT) { + qp_context->params1 |= cpu_to_be32(attr->retry_cnt << 16); + qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RETRY_COUNT); + } + + /* XXX initiator resources */ + + if (attr_mask & IB_QP_SQ_PSN) + qp_context->next_send_psn = cpu_to_be32(attr->sq_psn); + qp_context->cqn_snd = cpu_to_be32(to_mcq(ibqp->send_cq)->cqn); + + /* XXX RDMA/atomic enable, responder resources */ + + if (qp->rq.policy == IB_SIGNAL_ALL_WR) + qp_context->params2 |= cpu_to_be32(MTHCA_QP_BIT_RSC); + if (attr_mask & IB_QP_MIN_RNR_TIMER) { + qp_context->rnr_nextrecvpsn |= cpu_to_be32(attr->min_rnr_timer << 24); + qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RNR_TIMEOUT); + } + if (attr_mask & IB_QP_RQ_PSN) + qp_context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn); + + /* XXX ra_buff_indx */ + + qp_context->cqn_rcv = cpu_to_be32(to_mcq(ibqp->recv_cq)->cqn); + + if (attr_mask & IB_QP_QKEY) { + qp_context->qkey = cpu_to_be32(attr->qkey); + qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_Q_KEY); + } + + err = mthca_MODIFY_QP(dev, state_table[cur_state][new_state].trans, + qp->qpn, 0, qp_param, 0, &status); + if (status) { + mthca_warn(dev, "modify QP %d returned status %02x.\n", + state_table[cur_state][new_state].trans, status); + err = -EINVAL; + } + + if (!err) + qp->state = new_state; + + kfree(mailbox); + + if (is_sqp(dev, qp)) + store_attrs(to_msqp(qp), attr, attr_mask); + + /* + * If we are moving QP0 to RTR, bring the IB link up; if we + * are moving QP0 to RESET or ERROR, bring the link back down. + */ + if (is_qp0(dev, qp)) { + if (cur_state != IB_QPS_RTR && + new_state == IB_QPS_RTR) + init_port(dev, to_msqp(qp)->port); + + if (cur_state != IB_QPS_RESET && + cur_state != IB_QPS_ERR && + (new_state == IB_QPS_RESET || + new_state == IB_QPS_ERR)) + mthca_CLOSE_IB(dev, to_msqp(qp)->port, &status); + } + + return err; +} + +/* + * Allocate and register buffer for WQEs. qp->rq.max, sq.max, + * rq.max_gs and sq.max_gs must all be assigned. + * mthca_alloc_wqe_buf will calculate rq.wqe_shift and + * sq.wqe_shift (as well as send_wqe_offset, is_direct, and + * queue) + */ +static int mthca_alloc_wqe_buf(struct mthca_dev *dev, + struct mthca_pd *pd, + struct mthca_qp *qp) +{ + int size; + int i; + int npages, shift; + dma_addr_t t; + u64 *dma_list = NULL; + int err = -ENOMEM; + + size = sizeof (struct mthca_next_seg) + + qp->rq.max_gs * sizeof (struct mthca_data_seg); + + for (qp->rq.wqe_shift = 6; 1 << qp->rq.wqe_shift < size; + qp->rq.wqe_shift++) + ; /* nothing */ + + size = sizeof (struct mthca_next_seg) + + qp->sq.max_gs * sizeof (struct mthca_data_seg); + if (qp->transport == MLX) + size += 2 * sizeof (struct mthca_data_seg); + else if (qp->transport == UD) + size += sizeof (struct mthca_ud_seg); + else /* bind seg is as big as atomic + raddr segs */ + size += sizeof (struct mthca_bind_seg); + + for (qp->sq.wqe_shift = 6; 1 << qp->sq.wqe_shift < size; + qp->sq.wqe_shift++) + ; /* nothing */ + + qp->send_wqe_offset = ALIGN(qp->rq.max << qp->rq.wqe_shift, + 1 << qp->sq.wqe_shift); + size = PAGE_ALIGN(qp->send_wqe_offset + + (qp->sq.max << qp->sq.wqe_shift)); + + qp->wrid = kmalloc((qp->rq.max + qp->sq.max) * sizeof (u64), + GFP_KERNEL); + if (!qp->wrid) + goto err_out; + + if (size <= MTHCA_MAX_DIRECT_QP_SIZE) { + qp->is_direct = 1; + npages = 1; + shift = get_order(size) + PAGE_SHIFT; + + if (0) + mthca_dbg(dev, "Creating direct QP of size %d (shift %d)\n", + size, shift); + + qp->queue.direct.buf = pci_alloc_consistent(dev->pdev, size, &t); + if (!qp->queue.direct.buf) + goto err_out; + + pci_unmap_addr_set(&qp->queue.direct, mapping, t); + + memset(qp->queue.direct.buf, 0, size); + + while (t & ((1 << shift) - 1)) { + --shift; + npages *= 2; + } + + dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL); + if (!dma_list) + goto err_out_free; + + for (i = 0; i < npages; ++i) + dma_list[i] = t + i * (1 << shift); + } else { + qp->is_direct = 0; + npages = size / PAGE_SIZE; + shift = PAGE_SHIFT; + + if (0) + mthca_dbg(dev, "Creating indirect QP with %d pages\n", npages); + + dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL); + if (!dma_list) + goto err_out; + + qp->queue.page_list = kmalloc(npages * + sizeof *qp->queue.page_list, + GFP_KERNEL); + if (!qp->queue.page_list) + goto err_out; + + for (i = 0; i < npages; ++i) { + qp->queue.page_list[i].buf = + pci_alloc_consistent(dev->pdev, PAGE_SIZE, &t); + if (!qp->queue.page_list[i].buf) + goto err_out_free; + + memset(qp->queue.page_list[i].buf, 0, PAGE_SIZE); + + pci_unmap_addr_set(&qp->queue.page_list[i], mapping, t); + dma_list[i] = t; + } + } + + err = mthca_mr_alloc_phys(dev, pd->pd_num, dma_list, shift, + npages, 0, size, + MTHCA_MPT_FLAG_LOCAL_WRITE | + MTHCA_MPT_FLAG_LOCAL_READ, + &qp->mr); + if (err) + goto err_out_free; + + kfree(dma_list); + return 0; + + err_out_free: + if (qp->is_direct) { + pci_free_consistent(dev->pdev, size, + qp->queue.direct.buf, + pci_unmap_addr(&qp->queue.direct, mapping)); + } else + for (i = 0; i < npages; ++i) { + if (qp->queue.page_list[i].buf) + pci_free_consistent(dev->pdev, PAGE_SIZE, + qp->queue.page_list[i].buf, + pci_unmap_addr(&qp->queue.page_list[i], + mapping)); + + } + + err_out: + kfree(qp->wrid); + kfree(dma_list); + return err; +} + +static int mthca_alloc_qp_common(struct mthca_dev *dev, + struct mthca_pd *pd, + struct mthca_cq *send_cq, + struct mthca_cq *recv_cq, + enum ib_sig_type send_policy, + enum ib_sig_type recv_policy, + struct mthca_qp *qp) +{ + int err; + + spin_lock_init(&qp->lock); + atomic_set(&qp->refcount, 1); + qp->state = IB_QPS_RESET; + qp->sq.policy = send_policy; + qp->rq.policy = recv_policy; + qp->rq.cur = 0; + qp->sq.cur = 0; + qp->rq.next = 0; + qp->sq.next = 0; + qp->rq.last_comp = qp->rq.max - 1; + qp->sq.last_comp = qp->sq.max - 1; + qp->rq.last = NULL; + qp->sq.last = NULL; + + err = mthca_alloc_wqe_buf(dev, pd, qp); + return err; +} + +int mthca_alloc_qp(struct mthca_dev *dev, + struct mthca_pd *pd, + struct mthca_cq *send_cq, + struct mthca_cq *recv_cq, + enum ib_qp_type type, + enum ib_sig_type send_policy, + enum ib_sig_type recv_policy, + struct mthca_qp *qp) +{ + int err; + + switch (type) { + case IB_QPT_RC: qp->transport = RC; break; + case IB_QPT_UC: qp->transport = UC; break; + case IB_QPT_UD: qp->transport = UD; break; + default: return -EINVAL; + } + + qp->qpn = mthca_alloc(&dev->qp_table.alloc); + if (qp->qpn == -1) + return -ENOMEM; + + err = mthca_alloc_qp_common(dev, pd, send_cq, recv_cq, + send_policy, recv_policy, qp); + if (err) { + mthca_free(&dev->qp_table.alloc, qp->qpn); + return err; + } + + spin_lock_irq(&dev->qp_table.lock); + mthca_array_set(&dev->qp_table.qp, + qp->qpn & (dev->limits.num_qps - 1), qp); + spin_unlock_irq(&dev->qp_table.lock); + + return 0; +} + +int mthca_alloc_sqp(struct mthca_dev *dev, + struct mthca_pd *pd, + struct mthca_cq *send_cq, + struct mthca_cq *recv_cq, + enum ib_sig_type send_policy, + enum ib_sig_type recv_policy, + int qpn, + int port, + struct mthca_sqp *sqp) +{ + int err = 0; + u32 mqpn = qpn * 2 + dev->qp_table.sqp_start + port - 1; + + sqp->header_buf_size = sqp->qp.sq.max * MTHCA_UD_HEADER_SIZE; + sqp->header_buf = dma_alloc_coherent(&dev->pdev->dev, sqp->header_buf_size, + &sqp->header_dma, GFP_KERNEL); + if (!sqp->header_buf) + return -ENOMEM; + + spin_lock_irq(&dev->qp_table.lock); + if (mthca_array_get(&dev->qp_table.qp, mqpn)) + err = -EBUSY; + else + mthca_array_set(&dev->qp_table.qp, mqpn, sqp); + spin_unlock_irq(&dev->qp_table.lock); + + if (err) + goto err_out; + + sqp->port = port; + sqp->qp.qpn = mqpn; + sqp->qp.transport = MLX; + + err = mthca_alloc_qp_common(dev, pd, send_cq, recv_cq, + send_policy, recv_policy, + &sqp->qp); + if (err) + goto err_out_free; + + atomic_inc(&pd->sqp_count); + + return 0; + + err_out_free: + spin_lock_irq(&dev->qp_table.lock); + mthca_array_clear(&dev->qp_table.qp, mqpn); + spin_unlock_irq(&dev->qp_table.lock); + + err_out: + dma_free_coherent(&dev->pdev->dev, sqp->header_buf_size, + sqp->header_buf, sqp->header_dma); + + return err; +} + +void mthca_free_qp(struct mthca_dev *dev, + struct mthca_qp *qp) +{ + u8 status; + int size; + int i; + + spin_lock_irq(&dev->qp_table.lock); + mthca_array_clear(&dev->qp_table.qp, + qp->qpn & (dev->limits.num_qps - 1)); + spin_unlock_irq(&dev->qp_table.lock); + + atomic_dec(&qp->refcount); + wait_event(qp->wait, !atomic_read(&qp->refcount)); + + if (qp->state != IB_QPS_RESET) + mthca_MODIFY_QP(dev, MTHCA_TRANS_ANY2RST, qp->qpn, 0, NULL, 0, &status); + + mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq)->cqn, qp->qpn); + if (qp->ibqp.send_cq != qp->ibqp.recv_cq) + mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq)->cqn, qp->qpn); + + mthca_free_mr(dev, &qp->mr); + + size = PAGE_ALIGN(qp->send_wqe_offset + + (qp->sq.max << qp->sq.wqe_shift)); + + if (qp->is_direct) { + pci_free_consistent(dev->pdev, size, + qp->queue.direct.buf, + pci_unmap_addr(&qp->queue.direct, mapping)); + } else { + for (i = 0; i < size / PAGE_SIZE; ++i) { + pci_free_consistent(dev->pdev, PAGE_SIZE, + qp->queue.page_list[i].buf, + pci_unmap_addr(&qp->queue.page_list[i], + mapping)); + } + } + + kfree(qp->wrid); + + if (is_sqp(dev, qp)) { + atomic_dec(&(to_mpd(qp->ibqp.pd)->sqp_count)); + dma_free_coherent(&dev->pdev->dev, + to_msqp(qp)->header_buf_size, + to_msqp(qp)->header_buf, + to_msqp(qp)->header_dma); + } + else + mthca_free(&dev->qp_table.alloc, qp->qpn); +} + +/* Create UD header for an MLX send and build a data segment for it */ +static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp, + int ind, struct ib_send_wr *wr, + struct mthca_mlx_seg *mlx, + struct mthca_data_seg *data) +{ + int header_size; + int err; + + ib_ud_header_init(256, /* assume a MAD */ + sqp->ud_header.grh_present, + &sqp->ud_header); + + err = mthca_read_ah(dev, to_mah(wr->wr.ud.ah), &sqp->ud_header); + if (err) + return err; + mlx->flags &= ~cpu_to_be32(MTHCA_NEXT_SOLICIT | 1); + mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MTHCA_MLX_VL15 : 0) | + (sqp->ud_header.lrh.destination_lid == 0xffff ? + MTHCA_MLX_SLR : 0) | + (sqp->ud_header.lrh.service_level << 8)); + mlx->rlid = sqp->ud_header.lrh.destination_lid; + mlx->vcrc = 0; + + switch (wr->opcode) { + case IB_WR_SEND: + sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY; + sqp->ud_header.immediate_present = 0; + break; + case IB_WR_SEND_WITH_IMM: + sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE; + sqp->ud_header.immediate_present = 1; + sqp->ud_header.immediate_data = wr->imm_data; + break; + default: + return -EINVAL; + } + + sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0; + if (sqp->ud_header.lrh.destination_lid == 0xffff) + sqp->ud_header.lrh.source_lid = 0xffff; + sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED); + if (!sqp->qp.ibqp.qp_num) + ib_cached_pkey_get(&dev->ib_dev, sqp->port, + sqp->pkey_index, + &sqp->ud_header.bth.pkey); + else + ib_cached_pkey_get(&dev->ib_dev, sqp->port, + wr->wr.ud.pkey_index, + &sqp->ud_header.bth.pkey); + cpu_to_be16s(&sqp->ud_header.bth.pkey); + sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn); + sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1)); + sqp->ud_header.deth.qkey = cpu_to_be32(wr->wr.ud.remote_qkey & 0x80000000 ? + sqp->qkey : wr->wr.ud.remote_qkey); + sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num); + + header_size = ib_ud_header_pack(&sqp->ud_header, + sqp->header_buf + + ind * MTHCA_UD_HEADER_SIZE); + + data->byte_count = cpu_to_be32(header_size); + data->lkey = cpu_to_be32(to_mpd(sqp->qp.ibqp.pd)->ntmr.ibmr.lkey); + data->addr = cpu_to_be64(sqp->header_dma + + ind * MTHCA_UD_HEADER_SIZE); + + return 0; +} + +int mthca_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr) +{ + struct mthca_dev *dev = to_mdev(ibqp->device); + struct mthca_qp *qp = to_mqp(ibqp); + void *wqe; + void *prev_wqe; + unsigned long flags; + int err = 0; + int nreq; + int i; + int size; + int size0 = 0; + u32 f0 = 0; + int ind; + u8 op0 = 0; + + static const u8 opcode[] = { + [IB_WR_SEND] = MTHCA_OPCODE_SEND, + [IB_WR_SEND_WITH_IMM] = MTHCA_OPCODE_SEND_IMM, + [IB_WR_RDMA_WRITE] = MTHCA_OPCODE_RDMA_WRITE, + [IB_WR_RDMA_WRITE_WITH_IMM] = MTHCA_OPCODE_RDMA_WRITE_IMM, + [IB_WR_RDMA_READ] = MTHCA_OPCODE_RDMA_READ, + [IB_WR_ATOMIC_CMP_AND_SWP] = MTHCA_OPCODE_ATOMIC_CS, + [IB_WR_ATOMIC_FETCH_AND_ADD] = MTHCA_OPCODE_ATOMIC_FA, + }; + + spin_lock_irqsave(&qp->lock, flags); + + /* XXX check that state is OK to post send */ + + ind = qp->sq.next; + + for (nreq = 0; wr; ++nreq, wr = wr->next) { + if (qp->sq.cur + nreq >= qp->sq.max) { + mthca_err(dev, "SQ full (%d posted, %d max, %d nreq)\n", + qp->sq.cur, qp->sq.max, nreq); + err = -ENOMEM; + *bad_wr = wr; + goto out; + } + + wqe = get_send_wqe(qp, ind); + prev_wqe = qp->sq.last; + qp->sq.last = wqe; + + ((struct mthca_next_seg *) wqe)->nda_op = 0; + ((struct mthca_next_seg *) wqe)->ee_nds = 0; + ((struct mthca_next_seg *) wqe)->flags = + ((wr->send_flags & IB_SEND_SIGNALED) ? + cpu_to_be32(MTHCA_NEXT_CQ_UPDATE) : 0) | + ((wr->send_flags & IB_SEND_SOLICITED) ? + cpu_to_be32(MTHCA_NEXT_SOLICIT) : 0) | + cpu_to_be32(1); + if (wr->opcode == IB_WR_SEND_WITH_IMM || + wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) + ((struct mthca_next_seg *) wqe)->flags = wr->imm_data; + + wqe += sizeof (struct mthca_next_seg); + size = sizeof (struct mthca_next_seg) / 16; + + switch (qp->transport) { + case RC: + switch (wr->opcode) { + case IB_WR_ATOMIC_CMP_AND_SWP: + case IB_WR_ATOMIC_FETCH_AND_ADD: + ((struct mthca_raddr_seg *) wqe)->raddr = + cpu_to_be64(wr->wr.atomic.remote_addr); + ((struct mthca_raddr_seg *) wqe)->rkey = + cpu_to_be32(wr->wr.atomic.rkey); + ((struct mthca_raddr_seg *) wqe)->reserved = 0; + + wqe += sizeof (struct mthca_raddr_seg); + + if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) { + ((struct mthca_atomic_seg *) wqe)->swap_add = + cpu_to_be64(wr->wr.atomic.swap); + ((struct mthca_atomic_seg *) wqe)->compare = + cpu_to_be64(wr->wr.atomic.compare_add); + } else { + ((struct mthca_atomic_seg *) wqe)->swap_add = + cpu_to_be64(wr->wr.atomic.compare_add); + ((struct mthca_atomic_seg *) wqe)->compare = 0; + } + + wqe += sizeof (struct mthca_atomic_seg); + size += sizeof (struct mthca_raddr_seg) / 16 + + sizeof (struct mthca_atomic_seg); + break; + + case IB_WR_RDMA_WRITE: + case IB_WR_RDMA_WRITE_WITH_IMM: + case IB_WR_RDMA_READ: + ((struct mthca_raddr_seg *) wqe)->raddr = + cpu_to_be64(wr->wr.rdma.remote_addr); + ((struct mthca_raddr_seg *) wqe)->rkey = + cpu_to_be32(wr->wr.rdma.rkey); + ((struct mthca_raddr_seg *) wqe)->reserved = 0; + wqe += sizeof (struct mthca_raddr_seg); + size += sizeof (struct mthca_raddr_seg) / 16; + break; + + default: + /* No extra segments required for sends */ + break; + } + + case UD: + ((struct mthca_ud_seg *) wqe)->lkey = + cpu_to_be32(to_mah(wr->wr.ud.ah)->key); + ((struct mthca_ud_seg *) wqe)->av_addr = + cpu_to_be64(to_mah(wr->wr.ud.ah)->avdma); + ((struct mthca_ud_seg *) wqe)->dqpn = + cpu_to_be32(wr->wr.ud.remote_qpn); + ((struct mthca_ud_seg *) wqe)->qkey = + cpu_to_be32(wr->wr.ud.remote_qkey); + + wqe += sizeof (struct mthca_ud_seg); + size += sizeof (struct mthca_ud_seg) / 16; + break; + + case MLX: + err = build_mlx_header(dev, to_msqp(qp), ind, wr, + wqe - sizeof (struct mthca_next_seg), + wqe); + if (err) { + *bad_wr = wr; + goto out; + } + wqe += sizeof (struct mthca_data_seg); + size += sizeof (struct mthca_data_seg) / 16; + break; + } + + if (wr->num_sge > qp->sq.max_gs) { + mthca_err(dev, "too many gathers\n"); + err = -EINVAL; + *bad_wr = wr; + goto out; + } + + for (i = 0; i < wr->num_sge; ++i) { + ((struct mthca_data_seg *) wqe)->byte_count = + cpu_to_be32(wr->sg_list[i].length); + ((struct mthca_data_seg *) wqe)->lkey = + cpu_to_be32(wr->sg_list[i].lkey); + ((struct mthca_data_seg *) wqe)->addr = + cpu_to_be64(wr->sg_list[i].addr); + wqe += sizeof (struct mthca_data_seg); + size += sizeof (struct mthca_data_seg) / 16; + } + + /* Add one more inline data segment for ICRC */ + if (qp->transport == MLX) { + ((struct mthca_data_seg *) wqe)->byte_count = + cpu_to_be32((1 << 31) | 4); + ((u32 *) wqe)[1] = 0; + wqe += sizeof (struct mthca_data_seg); + size += sizeof (struct mthca_data_seg) / 16; + } + + qp->wrid[ind + qp->rq.max] = wr->wr_id; + + if (wr->opcode >= ARRAY_SIZE(opcode)) { + mthca_err(dev, "opcode invalid\n"); + err = -EINVAL; + *bad_wr = wr; + goto out; + } + + if (prev_wqe) { + ((struct mthca_next_seg *) prev_wqe)->nda_op = + cpu_to_be32(((ind << qp->sq.wqe_shift) + + qp->send_wqe_offset) | + opcode[wr->opcode]); + smp_wmb(); + ((struct mthca_next_seg *) prev_wqe)->ee_nds = + cpu_to_be32((size0 ? 0 : MTHCA_NEXT_DBD) | size); + } + + if (!size0) { + size0 = size; + op0 = opcode[wr->opcode]; + } + + ++ind; + if (unlikely(ind >= qp->sq.max)) + ind -= qp->sq.max; + } + +out: + if (nreq) { + u32 doorbell[2]; + + doorbell[0] = cpu_to_be32(((qp->sq.next << qp->sq.wqe_shift) + + qp->send_wqe_offset) | f0 | op0); + doorbell[1] = cpu_to_be32((qp->qpn << 8) | size0); + + wmb(); + + mthca_write64(doorbell, + dev->kar + MTHCA_SEND_DOORBELL, + MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); + } + + qp->sq.cur += nreq; + qp->sq.next = ind; + + spin_unlock_irqrestore(&qp->lock, flags); + return err; +} + +int mthca_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr) +{ + struct mthca_dev *dev = to_mdev(ibqp->device); + struct mthca_qp *qp = to_mqp(ibqp); + unsigned long flags; + int err = 0; + int nreq; + int i; + int size; + int size0 = 0; + int ind; + void *wqe; + void *prev_wqe; + + spin_lock_irqsave(&qp->lock, flags); + + /* XXX check that state is OK to post receive */ + + ind = qp->rq.next; + + for (nreq = 0; wr; ++nreq, wr = wr->next) { + if (qp->rq.cur + nreq >= qp->rq.max) { + mthca_err(dev, "RQ %06x full\n", qp->qpn); + err = -ENOMEM; + *bad_wr = wr; + goto out; + } + + wqe = get_recv_wqe(qp, ind); + prev_wqe = qp->rq.last; + qp->rq.last = wqe; + + ((struct mthca_next_seg *) wqe)->nda_op = 0; + ((struct mthca_next_seg *) wqe)->ee_nds = + cpu_to_be32(MTHCA_NEXT_DBD); + ((struct mthca_next_seg *) wqe)->flags = + (wr->recv_flags & IB_RECV_SIGNALED) ? + cpu_to_be32(MTHCA_NEXT_CQ_UPDATE) : 0; + + wqe += sizeof (struct mthca_next_seg); + size = sizeof (struct mthca_next_seg) / 16; + + if (wr->num_sge > qp->rq.max_gs) { + err = -EINVAL; + *bad_wr = wr; + goto out; + } + + for (i = 0; i < wr->num_sge; ++i) { + ((struct mthca_data_seg *) wqe)->byte_count = + cpu_to_be32(wr->sg_list[i].length); + ((struct mthca_data_seg *) wqe)->lkey = + cpu_to_be32(wr->sg_list[i].lkey); + ((struct mthca_data_seg *) wqe)->addr = + cpu_to_be64(wr->sg_list[i].addr); + wqe += sizeof (struct mthca_data_seg); + size += sizeof (struct mthca_data_seg) / 16; + } + + qp->wrid[ind] = wr->wr_id; + + if (prev_wqe) { + ((struct mthca_next_seg *) prev_wqe)->nda_op = + cpu_to_be32((ind << qp->rq.wqe_shift) | 1); + smp_wmb(); + ((struct mthca_next_seg *) prev_wqe)->ee_nds = + cpu_to_be32(MTHCA_NEXT_DBD | size); + } + + if (!size0) + size0 = size; + + ++ind; + if (unlikely(ind >= qp->rq.max)) + ind -= qp->rq.max; + } + +out: + if (nreq) { + u32 doorbell[2]; + + doorbell[0] = cpu_to_be32((qp->rq.next << qp->rq.wqe_shift) | size0); + doorbell[1] = cpu_to_be32((qp->qpn << 8) | nreq); + + wmb(); + + mthca_write64(doorbell, + dev->kar + MTHCA_RECEIVE_DOORBELL, + MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); + } + + qp->rq.cur += nreq; + qp->rq.next = ind; + + spin_unlock_irqrestore(&qp->lock, flags); + return err; +} + +int mthca_free_err_wqe(struct mthca_qp *qp, int is_send, + int index, int *dbd, u32 *new_wqe) +{ + struct mthca_next_seg *next; + + if (is_send) + next = get_send_wqe(qp, index); + else + next = get_recv_wqe(qp, index); + + *dbd = !!(next->ee_nds & cpu_to_be32(MTHCA_NEXT_DBD)); + if (next->ee_nds & cpu_to_be32(0x3f)) + *new_wqe = (next->nda_op & cpu_to_be32(~0x3f)) | + (next->ee_nds & cpu_to_be32(0x3f)); + else + *new_wqe = 0; + + return 0; +} + +int __devinit mthca_init_qp_table(struct mthca_dev *dev) +{ + int err; + u8 status; + int i; + + spin_lock_init(&dev->qp_table.lock); + + /* + * We reserve 2 extra QPs per port for the special QPs. The + * special QP for port 1 has to be even, so round up. + */ + dev->qp_table.sqp_start = (dev->limits.reserved_qps + 1) & ~1UL; + err = mthca_alloc_init(&dev->qp_table.alloc, + dev->limits.num_qps, + (1 << 24) - 1, + dev->qp_table.sqp_start + + MTHCA_MAX_PORTS * 2); + if (err) + return err; + + err = mthca_array_init(&dev->qp_table.qp, + dev->limits.num_qps); + if (err) { + mthca_alloc_cleanup(&dev->qp_table.alloc); + return err; + } + + for (i = 0; i < 2; ++i) { + err = mthca_CONF_SPECIAL_QP(dev, i ? IB_QPT_GSI : IB_QPT_SMI, + dev->qp_table.sqp_start + i * 2, + &status); + if (err) + goto err_out; + if (status) { + mthca_warn(dev, "CONF_SPECIAL_QP returned " + "status %02x, aborting.\n", + status); + err = -EINVAL; + goto err_out; + } + } + return 0; + + err_out: + for (i = 0; i < 2; ++i) + mthca_CONF_SPECIAL_QP(dev, i, 0, &status); + + mthca_array_cleanup(&dev->qp_table.qp, dev->limits.num_qps); + mthca_alloc_cleanup(&dev->qp_table.alloc); + + return err; +} + +void __devexit mthca_cleanup_qp_table(struct mthca_dev *dev) +{ + int i; + u8 status; + + for (i = 0; i < 2; ++i) + mthca_CONF_SPECIAL_QP(dev, i, 0, &status); + + mthca_alloc_cleanup(&dev->qp_table.alloc); +} diff --git a/drivers/infiniband/hw/mthca/mthca_reset.c b/drivers/infiniband/hw/mthca/mthca_reset.c new file mode 100644 index 000000000000..35df81c85df2 --- /dev/null +++ b/drivers/infiniband/hw/mthca/mthca_reset.c @@ -0,0 +1,232 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: mthca_reset.c 1349 2004-12-16 21:09:43Z roland $ + */ + +#include <linux/config.h> +#include <linux/init.h> +#include <linux/errno.h> +#include <linux/pci.h> +#include <linux/delay.h> + +#include "mthca_dev.h" +#include "mthca_cmd.h" + +int mthca_reset(struct mthca_dev *mdev) +{ + int i; + int err = 0; + u32 *hca_header = NULL; + u32 *bridge_header = NULL; + struct pci_dev *bridge = NULL; + +#define MTHCA_RESET_OFFSET 0xf0010 +#define MTHCA_RESET_VALUE cpu_to_be32(1) + + /* + * Reset the chip. This is somewhat ugly because we have to + * save off the PCI header before reset and then restore it + * after the chip reboots. We skip config space offsets 22 + * and 23 since those have a special meaning. + * + * To make matters worse, for Tavor (PCI-X HCA) we have to + * find the associated bridge device and save off its PCI + * header as well. + */ + + if (mdev->hca_type == TAVOR) { + /* Look for the bridge -- its device ID will be 2 more + than HCA's device ID. */ + while ((bridge = pci_get_device(mdev->pdev->vendor, + mdev->pdev->device + 2, + bridge)) != NULL) { + if (bridge->hdr_type == PCI_HEADER_TYPE_BRIDGE && + bridge->subordinate == mdev->pdev->bus) { + mthca_dbg(mdev, "Found bridge: %s (%s)\n", + pci_pretty_name(bridge), pci_name(bridge)); + break; + } + } + + if (!bridge) { + /* + * Didn't find a bridge for a Tavor device -- + * assume we're in no-bridge mode and hope for + * the best. + */ + mthca_warn(mdev, "No bridge found for %s (%s)\n", + pci_pretty_name(mdev->pdev), pci_name(mdev->pdev)); + } + + } + + /* For Arbel do we need to save off the full 4K PCI Express header?? */ + hca_header = kmalloc(256, GFP_KERNEL); + if (!hca_header) { + err = -ENOMEM; + mthca_err(mdev, "Couldn't allocate memory to save HCA " + "PCI header, aborting.\n"); + goto out; + } + + for (i = 0; i < 64; ++i) { + if (i == 22 || i == 23) + continue; + if (pci_read_config_dword(mdev->pdev, i * 4, hca_header + i)) { + err = -ENODEV; + mthca_err(mdev, "Couldn't save HCA " + "PCI header, aborting.\n"); + goto out; + } + } + + if (bridge) { + bridge_header = kmalloc(256, GFP_KERNEL); + if (!bridge_header) { + err = -ENOMEM; + mthca_err(mdev, "Couldn't allocate memory to save HCA " + "bridge PCI header, aborting.\n"); + goto out; + } + + for (i = 0; i < 64; ++i) { + if (i == 22 || i == 23) + continue; + if (pci_read_config_dword(bridge, i * 4, bridge_header + i)) { + err = -ENODEV; + mthca_err(mdev, "Couldn't save HCA bridge " + "PCI header, aborting.\n"); + goto out; + } + } + } + + /* actually hit reset */ + { + void __iomem *reset = ioremap(pci_resource_start(mdev->pdev, 0) + + MTHCA_RESET_OFFSET, 4); + + if (!reset) { + err = -ENOMEM; + mthca_err(mdev, "Couldn't map HCA reset register, " + "aborting.\n"); + goto out; + } + + writel(MTHCA_RESET_VALUE, reset); + iounmap(reset); + } + + /* Docs say to wait one second before accessing device */ + msleep(1000); + + /* Now wait for PCI device to start responding again */ + { + u32 v; + int c = 0; + + for (c = 0; c < 100; ++c) { + if (pci_read_config_dword(bridge ? bridge : mdev->pdev, 0, &v)) { + err = -ENODEV; + mthca_err(mdev, "Couldn't access HCA after reset, " + "aborting.\n"); + goto out; + } + + if (v != 0xffffffff) + goto good; + + msleep(100); + } + + err = -ENODEV; + mthca_err(mdev, "PCI device did not come back after reset, " + "aborting.\n"); + goto out; + } + +good: + /* Now restore the PCI headers */ + if (bridge) { + /* + * Bridge control register is at 0x3e, so we'll + * naturally restore it last in this loop. + */ + for (i = 0; i < 16; ++i) { + if (i * 4 == PCI_COMMAND) + continue; + + if (pci_write_config_dword(bridge, i * 4, bridge_header[i])) { + err = -ENODEV; + mthca_err(mdev, "Couldn't restore HCA bridge reg %x, " + "aborting.\n", i); + goto out; + } + } + + if (pci_write_config_dword(bridge, PCI_COMMAND, + bridge_header[PCI_COMMAND / 4])) { + err = -ENODEV; + mthca_err(mdev, "Couldn't restore HCA bridge COMMAND, " + "aborting.\n"); + goto out; + } + } + + for (i = 0; i < 16; ++i) { + if (i * 4 == PCI_COMMAND) + continue; + + if (pci_write_config_dword(mdev->pdev, i * 4, hca_header[i])) { + err = -ENODEV; + mthca_err(mdev, "Couldn't restore HCA reg %x, " + "aborting.\n", i); + goto out; + } + } + + if (pci_write_config_dword(mdev->pdev, PCI_COMMAND, + hca_header[PCI_COMMAND / 4])) { + err = -ENODEV; + mthca_err(mdev, "Couldn't restore HCA COMMAND, " + "aborting.\n"); + goto out; + } + +out: + if (bridge) + pci_dev_put(bridge); + kfree(bridge_header); + kfree(hca_header); + + return err; +} diff --git a/drivers/infiniband/include/ib_cache.h b/drivers/infiniband/include/ib_cache.h new file mode 100644 index 000000000000..211baa03bc8a --- /dev/null +++ b/drivers/infiniband/include/ib_cache.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: ib_cache.h 1349 2004-12-16 21:09:43Z roland $ + */ + +#ifndef _IB_CACHE_H +#define _IB_CACHE_H + +#include <ib_verbs.h> + +int ib_cached_gid_get(struct ib_device *device, + u8 port, + int index, + union ib_gid *gid); +int ib_cached_pkey_get(struct ib_device *device_handle, + u8 port, + int index, + u16 *pkey); +int ib_cached_pkey_find(struct ib_device *device, + u8 port, + u16 pkey, + u16 *index); + +#endif /* _IB_CACHE_H */ diff --git a/drivers/infiniband/include/ib_fmr_pool.h b/drivers/infiniband/include/ib_fmr_pool.h new file mode 100644 index 000000000000..e8769657cbbb --- /dev/null +++ b/drivers/infiniband/include/ib_fmr_pool.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2004 Topspin Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: ib_fmr_pool.h 1349 2004-12-16 21:09:43Z roland $ + */ + +#if !defined(IB_FMR_POOL_H) +#define IB_FMR_POOL_H + +#include <ib_verbs.h> + +struct ib_fmr_pool; + +/** + * struct ib_fmr_pool_param - Parameters for creating FMR pool + * @max_pages_per_fmr:Maximum number of pages per map request. + * @access:Access flags for FMRs in pool. + * @pool_size:Number of FMRs to allocate for pool. + * @dirty_watermark:Flush is triggered when @dirty_watermark dirty + * FMRs are present. + * @flush_function:Callback called when unmapped FMRs are flushed and + * more FMRs are possibly available for mapping + * @flush_arg:Context passed to user's flush function. + * @cache:If set, FMRs may be reused after unmapping for identical map + * requests. + */ +struct ib_fmr_pool_param { + int max_pages_per_fmr; + enum ib_access_flags access; + int pool_size; + int dirty_watermark; + void (*flush_function)(struct ib_fmr_pool *pool, + void * arg); + void *flush_arg; + unsigned cache:1; +}; + +struct ib_pool_fmr { + struct ib_fmr *fmr; + struct ib_fmr_pool *pool; + struct list_head list; + struct hlist_node cache_node; + int ref_count; + int remap_count; + u64 io_virtual_address; + int page_list_len; + u64 page_list[0]; +}; + +struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd, + struct ib_fmr_pool_param *params); + +int ib_destroy_fmr_pool(struct ib_fmr_pool *pool); + +int ib_flush_fmr_pool(struct ib_fmr_pool *pool); + +struct ib_pool_fmr *ib_fmr_pool_map_phys(struct ib_fmr_pool *pool_handle, + u64 *page_list, + int list_len, + u64 *io_virtual_address); + +int ib_fmr_pool_unmap(struct ib_pool_fmr *fmr); + +#endif /* IB_FMR_POOL_H */ diff --git a/drivers/infiniband/include/ib_mad.h b/drivers/infiniband/include/ib_mad.h new file mode 100644 index 000000000000..4a6bf6763a97 --- /dev/null +++ b/drivers/infiniband/include/ib_mad.h @@ -0,0 +1,404 @@ +/* + * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2004 Infinicon Corporation. All rights reserved. + * Copyright (c) 2004 Intel Corporation. All rights reserved. + * Copyright (c) 2004 Topspin Corporation. All rights reserved. + * Copyright (c) 2004 Voltaire Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: ib_mad.h 1389 2004-12-27 22:56:47Z roland $ + */ + +#if !defined( IB_MAD_H ) +#define IB_MAD_H + +#include <ib_verbs.h> + +/* Management base version */ +#define IB_MGMT_BASE_VERSION 1 + +/* Management classes */ +#define IB_MGMT_CLASS_SUBN_LID_ROUTED 0x01 +#define IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE 0x81 +#define IB_MGMT_CLASS_SUBN_ADM 0x03 +#define IB_MGMT_CLASS_PERF_MGMT 0x04 +#define IB_MGMT_CLASS_BM 0x05 +#define IB_MGMT_CLASS_DEVICE_MGMT 0x06 +#define IB_MGMT_CLASS_CM 0x07 +#define IB_MGMT_CLASS_SNMP 0x08 +#define IB_MGMT_CLASS_VENDOR_RANGE2_START 0x30 +#define IB_MGMT_CLASS_VENDOR_RANGE2_END 0x4F + +/* Management methods */ +#define IB_MGMT_METHOD_GET 0x01 +#define IB_MGMT_METHOD_SET 0x02 +#define IB_MGMT_METHOD_GET_RESP 0x81 +#define IB_MGMT_METHOD_SEND 0x03 +#define IB_MGMT_METHOD_TRAP 0x05 +#define IB_MGMT_METHOD_REPORT 0x06 +#define IB_MGMT_METHOD_REPORT_RESP 0x86 +#define IB_MGMT_METHOD_TRAP_REPRESS 0x07 + +#define IB_MGMT_METHOD_RESP 0x80 + +#define IB_MGMT_MAX_METHODS 128 + +#define IB_QP0 0 +#define IB_QP1 __constant_htonl(1) +#define IB_QP1_QKEY 0x80010000 + +struct ib_grh { + u32 version_tclass_flow; + u16 paylen; + u8 next_hdr; + u8 hop_limit; + union ib_gid sgid; + union ib_gid dgid; +} __attribute__ ((packed)); + +struct ib_mad_hdr { + u8 base_version; + u8 mgmt_class; + u8 class_version; + u8 method; + u16 status; + u16 class_specific; + u64 tid; + u16 attr_id; + u16 resv; + u32 attr_mod; +} __attribute__ ((packed)); + +struct ib_rmpp_hdr { + u8 rmpp_version; + u8 rmpp_type; + u8 rmpp_rtime_flags; + u8 rmpp_status; + u32 seg_num; + u32 paylen_newwin; +} __attribute__ ((packed)); + +struct ib_mad { + struct ib_mad_hdr mad_hdr; + u8 data[232]; +} __attribute__ ((packed)); + +struct ib_rmpp_mad { + struct ib_mad_hdr mad_hdr; + struct ib_rmpp_hdr rmpp_hdr; + u8 data[220]; +} __attribute__ ((packed)); + +struct ib_vendor_mad { + struct ib_mad_hdr mad_hdr; + struct ib_rmpp_hdr rmpp_hdr; + u8 reserved; + u8 oui[3]; + u8 data[216]; +} __attribute__ ((packed)); + +struct ib_mad_agent; +struct ib_mad_send_wc; +struct ib_mad_recv_wc; + +/** + * ib_mad_send_handler - callback handler for a sent MAD. + * @mad_agent: MAD agent that sent the MAD. + * @mad_send_wc: Send work completion information on the sent MAD. + */ +typedef void (*ib_mad_send_handler)(struct ib_mad_agent *mad_agent, + struct ib_mad_send_wc *mad_send_wc); + +/** + * ib_mad_snoop_handler - Callback handler for snooping sent MADs. + * @mad_agent: MAD agent that snooped the MAD. + * @send_wr: Work request information on the sent MAD. + * @mad_send_wc: Work completion information on the sent MAD. Valid + * only for snooping that occurs on a send completion. + * + * Clients snooping MADs should not modify data referenced by the @send_wr + * or @mad_send_wc. + */ +typedef void (*ib_mad_snoop_handler)(struct ib_mad_agent *mad_agent, + struct ib_send_wr *send_wr, + struct ib_mad_send_wc *mad_send_wc); + +/** + * ib_mad_recv_handler - callback handler for a received MAD. + * @mad_agent: MAD agent requesting the received MAD. + * @mad_recv_wc: Received work completion information on the received MAD. + * + * MADs received in response to a send request operation will be handed to + * the user after the send operation completes. All data buffers given + * to registered agents through this routine are owned by the receiving + * client, except for snooping agents. Clients snooping MADs should not + * modify the data referenced by @mad_recv_wc. + */ +typedef void (*ib_mad_recv_handler)(struct ib_mad_agent *mad_agent, + struct ib_mad_recv_wc *mad_recv_wc); + +/** + * ib_mad_agent - Used to track MAD registration with the access layer. + * @device: Reference to device registration is on. + * @qp: Reference to QP used for sending and receiving MADs. + * @recv_handler: Callback handler for a received MAD. + * @send_handler: Callback handler for a sent MAD. + * @snoop_handler: Callback handler for snooped sent MADs. + * @context: User-specified context associated with this registration. + * @hi_tid: Access layer assigned transaction ID for this client. + * Unsolicited MADs sent by this client will have the upper 32-bits + * of their TID set to this value. + * @port_num: Port number on which QP is registered + */ +struct ib_mad_agent { + struct ib_device *device; + struct ib_qp *qp; + ib_mad_recv_handler recv_handler; + ib_mad_send_handler send_handler; + ib_mad_snoop_handler snoop_handler; + void *context; + u32 hi_tid; + u8 port_num; +}; + +/** + * ib_mad_send_wc - MAD send completion information. + * @wr_id: Work request identifier associated with the send MAD request. + * @status: Completion status. + * @vendor_err: Optional vendor error information returned with a failed + * request. + */ +struct ib_mad_send_wc { + u64 wr_id; + enum ib_wc_status status; + u32 vendor_err; +}; + +/** + * ib_mad_recv_buf - received MAD buffer information. + * @list: Reference to next data buffer for a received RMPP MAD. + * @grh: References a data buffer containing the global route header. + * The data refereced by this buffer is only valid if the GRH is + * valid. + * @mad: References the start of the received MAD. + */ +struct ib_mad_recv_buf { + struct list_head list; + struct ib_grh *grh; + struct ib_mad *mad; +}; + +/** + * ib_mad_recv_wc - received MAD information. + * @wc: Completion information for the received data. + * @recv_buf: Specifies the location of the received data buffer(s). + * @mad_len: The length of the received MAD, without duplicated headers. + * + * For received response, the wr_id field of the wc is set to the wr_id + * for the corresponding send request. + */ +struct ib_mad_recv_wc { + struct ib_wc *wc; + struct ib_mad_recv_buf recv_buf; + int mad_len; +}; + +/** + * ib_mad_reg_req - MAD registration request + * @mgmt_class: Indicates which management class of MADs should be receive + * by the caller. This field is only required if the user wishes to + * receive unsolicited MADs, otherwise it should be 0. + * @mgmt_class_version: Indicates which version of MADs for the given + * management class to receive. + * @oui: Indicates IEEE OUI when mgmt_class is a vendor class + * in the range from 0x30 to 0x4f. Otherwise not used. + * @method_mask: The caller will receive unsolicited MADs for any method + * where @method_mask = 1. + */ +struct ib_mad_reg_req { + u8 mgmt_class; + u8 mgmt_class_version; + u8 oui[3]; + DECLARE_BITMAP(method_mask, IB_MGMT_MAX_METHODS); +}; + +/** + * ib_register_mad_agent - Register to send/receive MADs. + * @device: The device to register with. + * @port_num: The port on the specified device to use. + * @qp_type: Specifies which QP to access. Must be either + * IB_QPT_SMI or IB_QPT_GSI. + * @mad_reg_req: Specifies which unsolicited MADs should be received + * by the caller. This parameter may be NULL if the caller only + * wishes to receive solicited responses. + * @rmpp_version: If set, indicates that the client will send + * and receive MADs that contain the RMPP header for the given version. + * If set to 0, indicates that RMPP is not used by this client. + * @send_handler: The completion callback routine invoked after a send + * request has completed. + * @recv_handler: The completion callback routine invoked for a received + * MAD. + * @context: User specified context associated with the registration. + */ +struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, + u8 port_num, + enum ib_qp_type qp_type, + struct ib_mad_reg_req *mad_reg_req, + u8 rmpp_version, + ib_mad_send_handler send_handler, + ib_mad_recv_handler recv_handler, + void *context); + +enum ib_mad_snoop_flags { + /*IB_MAD_SNOOP_POSTED_SENDS = 1,*/ + /*IB_MAD_SNOOP_RMPP_SENDS = (1<<1),*/ + IB_MAD_SNOOP_SEND_COMPLETIONS = (1<<2), + /*IB_MAD_SNOOP_RMPP_SEND_COMPLETIONS = (1<<3),*/ + IB_MAD_SNOOP_RECVS = (1<<4) + /*IB_MAD_SNOOP_RMPP_RECVS = (1<<5),*/ + /*IB_MAD_SNOOP_REDIRECTED_QPS = (1<<6)*/ +}; + +/** + * ib_register_mad_snoop - Register to snoop sent and received MADs. + * @device: The device to register with. + * @port_num: The port on the specified device to use. + * @qp_type: Specifies which QP traffic to snoop. Must be either + * IB_QPT_SMI or IB_QPT_GSI. + * @mad_snoop_flags: Specifies information where snooping occurs. + * @send_handler: The callback routine invoked for a snooped send. + * @recv_handler: The callback routine invoked for a snooped receive. + * @context: User specified context associated with the registration. + */ +struct ib_mad_agent *ib_register_mad_snoop(struct ib_device *device, + u8 port_num, + enum ib_qp_type qp_type, + int mad_snoop_flags, + ib_mad_snoop_handler snoop_handler, + ib_mad_recv_handler recv_handler, + void *context); + +/** + * ib_unregister_mad_agent - Unregisters a client from using MAD services. + * @mad_agent: Corresponding MAD registration request to deregister. + * + * After invoking this routine, MAD services are no longer usable by the + * client on the associated QP. + */ +int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent); + +/** + * ib_post_send_mad - Posts MAD(s) to the send queue of the QP associated + * with the registered client. + * @mad_agent: Specifies the associated registration to post the send to. + * @send_wr: Specifies the information needed to send the MAD(s). + * @bad_send_wr: Specifies the MAD on which an error was encountered. + * + * Sent MADs are not guaranteed to complete in the order that they were posted. + */ +int ib_post_send_mad(struct ib_mad_agent *mad_agent, + struct ib_send_wr *send_wr, + struct ib_send_wr **bad_send_wr); + +/** + * ib_coalesce_recv_mad - Coalesces received MAD data into a single buffer. + * @mad_recv_wc: Work completion information for a received MAD. + * @buf: User-provided data buffer to receive the coalesced buffers. The + * referenced buffer should be at least the size of the mad_len specified + * by @mad_recv_wc. + * + * This call copies a chain of received RMPP MADs into a single data buffer, + * removing duplicated headers. + */ +void ib_coalesce_recv_mad(struct ib_mad_recv_wc *mad_recv_wc, + void *buf); + +/** + * ib_free_recv_mad - Returns data buffers used to receive a MAD to the + * access layer. + * @mad_recv_wc: Work completion information for a received MAD. + * + * Clients receiving MADs through their ib_mad_recv_handler must call this + * routine to return the work completion buffers to the access layer. + */ +void ib_free_recv_mad(struct ib_mad_recv_wc *mad_recv_wc); + +/** + * ib_cancel_mad - Cancels an outstanding send MAD operation. + * @mad_agent: Specifies the registration associated with sent MAD. + * @wr_id: Indicates the work request identifier of the MAD to cancel. + * + * MADs will be returned to the user through the corresponding + * ib_mad_send_handler. + */ +void ib_cancel_mad(struct ib_mad_agent *mad_agent, + u64 wr_id); + +/** + * ib_redirect_mad_qp - Registers a QP for MAD services. + * @qp: Reference to a QP that requires MAD services. + * @rmpp_version: If set, indicates that the client will send + * and receive MADs that contain the RMPP header for the given version. + * If set to 0, indicates that RMPP is not used by this client. + * @send_handler: The completion callback routine invoked after a send + * request has completed. + * @recv_handler: The completion callback routine invoked for a received + * MAD. + * @context: User specified context associated with the registration. + * + * Use of this call allows clients to use MAD services, such as RMPP, + * on user-owned QPs. After calling this routine, users may send + * MADs on the specified QP by calling ib_mad_post_send. + */ +struct ib_mad_agent *ib_redirect_mad_qp(struct ib_qp *qp, + u8 rmpp_version, + ib_mad_send_handler send_handler, + ib_mad_recv_handler recv_handler, + void *context); + +/** + * ib_process_mad_wc - Processes a work completion associated with a + * MAD sent or received on a redirected QP. + * @mad_agent: Specifies the registered MAD service using the redirected QP. + * @wc: References a work completion associated with a sent or received + * MAD segment. + * + * This routine is used to complete or continue processing on a MAD request. + * If the work completion is associated with a send operation, calling + * this routine is required to continue an RMPP transfer or to wait for a + * corresponding response, if it is a request. If the work completion is + * associated with a receive operation, calling this routine is required to + * process an inbound or outbound RMPP transfer, or to match a response MAD + * with its corresponding request. + */ +int ib_process_mad_wc(struct ib_mad_agent *mad_agent, + struct ib_wc *wc); + +#endif /* IB_MAD_H */ diff --git a/drivers/infiniband/include/ib_pack.h b/drivers/infiniband/include/ib_pack.h new file mode 100644 index 000000000000..fe480f3e8654 --- /dev/null +++ b/drivers/infiniband/include/ib_pack.h @@ -0,0 +1,245 @@ +/* + * Copyright (c) 2004 Topspin Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: ib_pack.h 1349 2004-12-16 21:09:43Z roland $ + */ + +#ifndef IB_PACK_H +#define IB_PACK_H + +#include <ib_verbs.h> + +enum { + IB_LRH_BYTES = 8, + IB_GRH_BYTES = 40, + IB_BTH_BYTES = 12, + IB_DETH_BYTES = 8 +}; + +struct ib_field { + size_t struct_offset_bytes; + size_t struct_size_bytes; + int offset_words; + int offset_bits; + int size_bits; + char *field_name; +}; + +#define RESERVED \ + .field_name = "reserved" + +/* + * This macro cleans up the definitions of constants for BTH opcodes. + * It is used to define constants such as IB_OPCODE_UD_SEND_ONLY, + * which becomes IB_OPCODE_UD + IB_OPCODE_SEND_ONLY, and this gives + * the correct value. + * + * In short, user code should use the constants defined using the + * macro rather than worrying about adding together other constants. +*/ +#define IB_OPCODE(transport, op) \ + IB_OPCODE_ ## transport ## _ ## op = \ + IB_OPCODE_ ## transport + IB_OPCODE_ ## op + +enum { + /* transport types -- just used to define real constants */ + IB_OPCODE_RC = 0x00, + IB_OPCODE_UC = 0x20, + IB_OPCODE_RD = 0x40, + IB_OPCODE_UD = 0x60, + + /* operations -- just used to define real constants */ + IB_OPCODE_SEND_FIRST = 0x00, + IB_OPCODE_SEND_MIDDLE = 0x01, + IB_OPCODE_SEND_LAST = 0x02, + IB_OPCODE_SEND_LAST_WITH_IMMEDIATE = 0x03, + IB_OPCODE_SEND_ONLY = 0x04, + IB_OPCODE_SEND_ONLY_WITH_IMMEDIATE = 0x05, + IB_OPCODE_RDMA_WRITE_FIRST = 0x06, + IB_OPCODE_RDMA_WRITE_MIDDLE = 0x07, + IB_OPCODE_RDMA_WRITE_LAST = 0x08, + IB_OPCODE_RDMA_WRITE_LAST_WITH_IMMEDIATE = 0x09, + IB_OPCODE_RDMA_WRITE_ONLY = 0x0a, + IB_OPCODE_RDMA_WRITE_ONLY_WITH_IMMEDIATE = 0x0b, + IB_OPCODE_RDMA_READ_REQUEST = 0x0c, + IB_OPCODE_RDMA_READ_RESPONSE_FIRST = 0x0d, + IB_OPCODE_RDMA_READ_RESPONSE_MIDDLE = 0x0e, + IB_OPCODE_RDMA_READ_RESPONSE_LAST = 0x0f, + IB_OPCODE_RDMA_READ_RESPONSE_ONLY = 0x10, + IB_OPCODE_ACKNOWLEDGE = 0x11, + IB_OPCODE_ATOMIC_ACKNOWLEDGE = 0x12, + IB_OPCODE_COMPARE_SWAP = 0x13, + IB_OPCODE_FETCH_ADD = 0x14, + + /* real constants follow -- see comment about above IB_OPCODE() + macro for more details */ + + /* RC */ + IB_OPCODE(RC, SEND_FIRST), + IB_OPCODE(RC, SEND_MIDDLE), + IB_OPCODE(RC, SEND_LAST), + IB_OPCODE(RC, SEND_LAST_WITH_IMMEDIATE), + IB_OPCODE(RC, SEND_ONLY), + IB_OPCODE(RC, SEND_ONLY_WITH_IMMEDIATE), + IB_OPCODE(RC, RDMA_WRITE_FIRST), + IB_OPCODE(RC, RDMA_WRITE_MIDDLE), + IB_OPCODE(RC, RDMA_WRITE_LAST), + IB_OPCODE(RC, RDMA_WRITE_LAST_WITH_IMMEDIATE), + IB_OPCODE(RC, RDMA_WRITE_ONLY), + IB_OPCODE(RC, RDMA_WRITE_ONLY_WITH_IMMEDIATE), + IB_OPCODE(RC, RDMA_READ_REQUEST), + IB_OPCODE(RC, RDMA_READ_RESPONSE_FIRST), + IB_OPCODE(RC, RDMA_READ_RESPONSE_MIDDLE), + IB_OPCODE(RC, RDMA_READ_RESPONSE_LAST), + IB_OPCODE(RC, RDMA_READ_RESPONSE_ONLY), + IB_OPCODE(RC, ACKNOWLEDGE), + IB_OPCODE(RC, ATOMIC_ACKNOWLEDGE), + IB_OPCODE(RC, COMPARE_SWAP), + IB_OPCODE(RC, FETCH_ADD), + + /* UC */ + IB_OPCODE(UC, SEND_FIRST), + IB_OPCODE(UC, SEND_MIDDLE), + IB_OPCODE(UC, SEND_LAST), + IB_OPCODE(UC, SEND_LAST_WITH_IMMEDIATE), + IB_OPCODE(UC, SEND_ONLY), + IB_OPCODE(UC, SEND_ONLY_WITH_IMMEDIATE), + IB_OPCODE(UC, RDMA_WRITE_FIRST), + IB_OPCODE(UC, RDMA_WRITE_MIDDLE), + IB_OPCODE(UC, RDMA_WRITE_LAST), + IB_OPCODE(UC, RDMA_WRITE_LAST_WITH_IMMEDIATE), + IB_OPCODE(UC, RDMA_WRITE_ONLY), + IB_OPCODE(UC, RDMA_WRITE_ONLY_WITH_IMMEDIATE), + + /* RD */ + IB_OPCODE(RD, SEND_FIRST), + IB_OPCODE(RD, SEND_MIDDLE), + IB_OPCODE(RD, SEND_LAST), + IB_OPCODE(RD, SEND_LAST_WITH_IMMEDIATE), + IB_OPCODE(RD, SEND_ONLY), + IB_OPCODE(RD, SEND_ONLY_WITH_IMMEDIATE), + IB_OPCODE(RD, RDMA_WRITE_FIRST), + IB_OPCODE(RD, RDMA_WRITE_MIDDLE), + IB_OPCODE(RD, RDMA_WRITE_LAST), + IB_OPCODE(RD, RDMA_WRITE_LAST_WITH_IMMEDIATE), + IB_OPCODE(RD, RDMA_WRITE_ONLY), + IB_OPCODE(RD, RDMA_WRITE_ONLY_WITH_IMMEDIATE), + IB_OPCODE(RD, RDMA_READ_REQUEST), + IB_OPCODE(RD, RDMA_READ_RESPONSE_FIRST), + IB_OPCODE(RD, RDMA_READ_RESPONSE_MIDDLE), + IB_OPCODE(RD, RDMA_READ_RESPONSE_LAST), + IB_OPCODE(RD, RDMA_READ_RESPONSE_ONLY), + IB_OPCODE(RD, ACKNOWLEDGE), + IB_OPCODE(RD, ATOMIC_ACKNOWLEDGE), + IB_OPCODE(RD, COMPARE_SWAP), + IB_OPCODE(RD, FETCH_ADD), + + /* UD */ + IB_OPCODE(UD, SEND_ONLY), + IB_OPCODE(UD, SEND_ONLY_WITH_IMMEDIATE) +}; + +enum { + IB_LNH_RAW = 0, + IB_LNH_IP = 1, + IB_LNH_IBA_LOCAL = 2, + IB_LNH_IBA_GLOBAL = 3 +}; + +struct ib_unpacked_lrh { + u8 virtual_lane; + u8 link_version; + u8 service_level; + u8 link_next_header; + __be16 destination_lid; + __be16 packet_length; + __be16 source_lid; +}; + +struct ib_unpacked_grh { + u8 ip_version; + u8 traffic_class; + __be32 flow_label; + __be16 payload_length; + u8 next_header; + u8 hop_limit; + union ib_gid source_gid; + union ib_gid destination_gid; +}; + +struct ib_unpacked_bth { + u8 opcode; + u8 solicited_event; + u8 mig_req; + u8 pad_count; + u8 transport_header_version; + __be16 pkey; + __be32 destination_qpn; + u8 ack_req; + __be32 psn; +}; + +struct ib_unpacked_deth { + __be32 qkey; + __be32 source_qpn; +}; + +struct ib_ud_header { + struct ib_unpacked_lrh lrh; + int grh_present; + struct ib_unpacked_grh grh; + struct ib_unpacked_bth bth; + struct ib_unpacked_deth deth; + int immediate_present; + __be32 immediate_data; +}; + +void ib_pack(const struct ib_field *desc, + int desc_len, + void *structure, + void *buf); + +void ib_unpack(const struct ib_field *desc, + int desc_len, + void *buf, + void *structure); + +void ib_ud_header_init(int payload_bytes, + int grh_present, + struct ib_ud_header *header); + +int ib_ud_header_pack(struct ib_ud_header *header, + void *buf); + +int ib_ud_header_unpack(void *buf, + struct ib_ud_header *header); + +#endif /* IB_PACK_H */ diff --git a/drivers/infiniband/include/ib_sa.h b/drivers/infiniband/include/ib_sa.h new file mode 100644 index 000000000000..5f6a9fc99e82 --- /dev/null +++ b/drivers/infiniband/include/ib_sa.h @@ -0,0 +1,280 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: ib_sa.h 1389 2004-12-27 22:56:47Z roland $ + */ + +#ifndef IB_SA_H +#define IB_SA_H + +#include <linux/compiler.h> + +#include <ib_verbs.h> +#include <ib_mad.h> + +enum { + IB_SA_CLASS_VERSION = 2, /* IB spec version 1.1/1.2 */ + + IB_SA_METHOD_DELETE = 0x15 +}; + +enum ib_sa_selector { + IB_SA_GTE = 0, + IB_SA_LTE = 1, + IB_SA_EQ = 2, + /* + * The meaning of "best" depends on the attribute: for + * example, for MTU best will return the largest available + * MTU, while for packet life time, best will return the + * smallest available life time. + */ + IB_SA_BEST = 3 +}; + +typedef u64 __bitwise ib_sa_comp_mask; + +#define IB_SA_COMP_MASK(n) ((__force ib_sa_comp_mask) cpu_to_be64(1ull << n)) + +/* + * Structures for SA records are named "struct ib_sa_xxx_rec." No + * attempt is made to pack structures to match the physical layout of + * SA records in SA MADs; all packing and unpacking is handled by the + * SA query code. + * + * For a record with structure ib_sa_xxx_rec, the naming convention + * for the component mask value for field yyy is IB_SA_XXX_REC_YYY (we + * never use different abbreviations or otherwise change the spelling + * of xxx/yyy between ib_sa_xxx_rec.yyy and IB_SA_XXX_REC_YYY). + * + * Reserved rows are indicated with comments to help maintainability. + */ + +/* reserved: 0 */ +/* reserved: 1 */ +#define IB_SA_PATH_REC_DGID IB_SA_COMP_MASK( 2) +#define IB_SA_PATH_REC_SGID IB_SA_COMP_MASK( 3) +#define IB_SA_PATH_REC_DLID IB_SA_COMP_MASK( 4) +#define IB_SA_PATH_REC_SLID IB_SA_COMP_MASK( 5) +#define IB_SA_PATH_REC_RAW_TRAFFIC IB_SA_COMP_MASK( 6) +/* reserved: 7 */ +#define IB_SA_PATH_REC_FLOW_LABEL IB_SA_COMP_MASK( 8) +#define IB_SA_PATH_REC_HOP_LIMIT IB_SA_COMP_MASK( 9) +#define IB_SA_PATH_REC_TRAFFIC_CLASS IB_SA_COMP_MASK(10) +#define IB_SA_PATH_REC_REVERSIBLE IB_SA_COMP_MASK(11) +#define IB_SA_PATH_REC_NUMB_PATH IB_SA_COMP_MASK(12) +#define IB_SA_PATH_REC_PKEY IB_SA_COMP_MASK(13) +/* reserved: 14 */ +#define IB_SA_PATH_REC_SL IB_SA_COMP_MASK(15) +#define IB_SA_PATH_REC_MTU_SELECTOR IB_SA_COMP_MASK(16) +#define IB_SA_PATH_REC_MTU IB_SA_COMP_MASK(17) +#define IB_SA_PATH_REC_RATE_SELECTOR IB_SA_COMP_MASK(18) +#define IB_SA_PATH_REC_RATE IB_SA_COMP_MASK(19) +#define IB_SA_PATH_REC_PACKET_LIFE_TIME_SELECTOR IB_SA_COMP_MASK(20) +#define IB_SA_PATH_REC_PACKET_LIFE_TIME IB_SA_COMP_MASK(21) +#define IB_SA_PATH_REC_PREFERENCE IB_SA_COMP_MASK(22) + +struct ib_sa_path_rec { + /* reserved */ + /* reserved */ + union ib_gid dgid; + union ib_gid sgid; + u16 dlid; + u16 slid; + int raw_traffic; + /* reserved */ + u32 flow_label; + u8 hop_limit; + u8 traffic_class; + int reversible; + u8 numb_path; + u16 pkey; + /* reserved */ + u8 sl; + u8 mtu_selector; + enum ib_mtu mtu; + u8 rate_selector; + u8 rate; + u8 packet_life_time_selector; + u8 packet_life_time; + u8 preference; +}; + +#define IB_SA_MCMEMBER_REC_MGID IB_SA_COMP_MASK( 0) +#define IB_SA_MCMEMBER_REC_PORT_GID IB_SA_COMP_MASK( 1) +#define IB_SA_MCMEMBER_REC_QKEY IB_SA_COMP_MASK( 2) +#define IB_SA_MCMEMBER_REC_MLID IB_SA_COMP_MASK( 3) +#define IB_SA_MCMEMBER_REC_MTU_SELECTOR IB_SA_COMP_MASK( 4) +#define IB_SA_MCMEMBER_REC_MTU IB_SA_COMP_MASK( 5) +#define IB_SA_MCMEMBER_REC_TRAFFIC_CLASS IB_SA_COMP_MASK( 6) +#define IB_SA_MCMEMBER_REC_PKEY IB_SA_COMP_MASK( 7) +#define IB_SA_MCMEMBER_REC_RATE_SELECTOR IB_SA_COMP_MASK( 8) +#define IB_SA_MCMEMBER_REC_RATE IB_SA_COMP_MASK( 9) +#define IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME_SELECTOR IB_SA_COMP_MASK(10) +#define IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME IB_SA_COMP_MASK(11) +#define IB_SA_MCMEMBER_REC_SL IB_SA_COMP_MASK(12) +#define IB_SA_MCMEMBER_REC_FLOW_LABEL IB_SA_COMP_MASK(13) +#define IB_SA_MCMEMBER_REC_HOP_LIMIT IB_SA_COMP_MASK(14) +#define IB_SA_MCMEMBER_REC_SCOPE IB_SA_COMP_MASK(15) +#define IB_SA_MCMEMBER_REC_JOIN_STATE IB_SA_COMP_MASK(16) +#define IB_SA_MCMEMBER_REC_PROXY_JOIN IB_SA_COMP_MASK(17) + +struct ib_sa_mcmember_rec { + union ib_gid mgid; + union ib_gid port_gid; + u32 qkey; + u16 mlid; + u8 mtu_selector; + enum ib_mtu mtu; + u8 traffic_class; + u16 pkey; + u8 rate_selector; + u8 rate; + u8 packet_life_time_selector; + u8 packet_life_time; + u8 sl; + u32 flow_label; + u8 hop_limit; + u8 scope; + u8 join_state; + int proxy_join; +}; + +struct ib_sa_query; + +void ib_sa_cancel_query(int id, struct ib_sa_query *query); + +int ib_sa_path_rec_get(struct ib_device *device, u8 port_num, + struct ib_sa_path_rec *rec, + ib_sa_comp_mask comp_mask, + int timeout_ms, int gfp_mask, + void (*callback)(int status, + struct ib_sa_path_rec *resp, + void *context), + void *context, + struct ib_sa_query **query); + +int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num, + u8 method, + struct ib_sa_mcmember_rec *rec, + ib_sa_comp_mask comp_mask, + int timeout_ms, int gfp_mask, + void (*callback)(int status, + struct ib_sa_mcmember_rec *resp, + void *context), + void *context, + struct ib_sa_query **query); + +/** + * ib_sa_mcmember_rec_set - Start an MCMember set query + * @device:device to send query on + * @port_num: port number to send query on + * @rec:MCMember Record to send in query + * @comp_mask:component mask to send in query + * @timeout_ms:time to wait for response + * @gfp_mask:GFP mask to use for internal allocations + * @callback:function called when query completes, times out or is + * canceled + * @context:opaque user context passed to callback + * @sa_query:query context, used to cancel query + * + * Send an MCMember Set query to the SA (eg to join a multicast + * group). The callback function will be called when the query + * completes (or fails); status is 0 for a successful response, -EINTR + * if the query is canceled, -ETIMEDOUT is the query timed out, or + * -EIO if an error occurred sending the query. The resp parameter of + * the callback is only valid if status is 0. + * + * If the return value of ib_sa_mcmember_rec_set() is negative, it is + * an error code. Otherwise it is a query ID that can be used to + * cancel the query. + */ +static inline int +ib_sa_mcmember_rec_set(struct ib_device *device, u8 port_num, + struct ib_sa_mcmember_rec *rec, + ib_sa_comp_mask comp_mask, + int timeout_ms, int gfp_mask, + void (*callback)(int status, + struct ib_sa_mcmember_rec *resp, + void *context), + void *context, + struct ib_sa_query **query) +{ + return ib_sa_mcmember_rec_query(device, port_num, + IB_MGMT_METHOD_SET, + rec, comp_mask, + timeout_ms, gfp_mask, callback, + context, query); +} + +/** + * ib_sa_mcmember_rec_delete - Start an MCMember delete query + * @device:device to send query on + * @port_num: port number to send query on + * @rec:MCMember Record to send in query + * @comp_mask:component mask to send in query + * @timeout_ms:time to wait for response + * @gfp_mask:GFP mask to use for internal allocations + * @callback:function called when query completes, times out or is + * canceled + * @context:opaque user context passed to callback + * @sa_query:query context, used to cancel query + * + * Send an MCMember Delete query to the SA (eg to leave a multicast + * group). The callback function will be called when the query + * completes (or fails); status is 0 for a successful response, -EINTR + * if the query is canceled, -ETIMEDOUT is the query timed out, or + * -EIO if an error occurred sending the query. The resp parameter of + * the callback is only valid if status is 0. + * + * If the return value of ib_sa_mcmember_rec_delete() is negative, it + * is an error code. Otherwise it is a query ID that can be used to + * cancel the query. + */ +static inline int +ib_sa_mcmember_rec_delete(struct ib_device *device, u8 port_num, + struct ib_sa_mcmember_rec *rec, + ib_sa_comp_mask comp_mask, + int timeout_ms, int gfp_mask, + void (*callback)(int status, + struct ib_sa_mcmember_rec *resp, + void *context), + void *context, + struct ib_sa_query **query) +{ + return ib_sa_mcmember_rec_query(device, port_num, + IB_SA_METHOD_DELETE, + rec, comp_mask, + timeout_ms, gfp_mask, callback, + context, query); +} + + +#endif /* IB_SA_H */ diff --git a/drivers/infiniband/include/ib_smi.h b/drivers/infiniband/include/ib_smi.h new file mode 100644 index 000000000000..ca8216514963 --- /dev/null +++ b/drivers/infiniband/include/ib_smi.h @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2004 Infinicon Corporation. All rights reserved. + * Copyright (c) 2004 Intel Corporation. All rights reserved. + * Copyright (c) 2004 Topspin Corporation. All rights reserved. + * Copyright (c) 2004 Voltaire Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: ib_smi.h 1389 2004-12-27 22:56:47Z roland $ + */ + +#if !defined( IB_SMI_H ) +#define IB_SMI_H + +#include <ib_mad.h> + +#define IB_LID_PERMISSIVE 0xFFFF + +#define IB_SMP_DATA_SIZE 64 +#define IB_SMP_MAX_PATH_HOPS 64 + +struct ib_smp { + u8 base_version; + u8 mgmt_class; + u8 class_version; + u8 method; + u16 status; + u8 hop_ptr; + u8 hop_cnt; + u64 tid; + u16 attr_id; + u16 resv; + u32 attr_mod; + u64 mkey; + u16 dr_slid; + u16 dr_dlid; + u8 reserved[28]; + u8 data[IB_SMP_DATA_SIZE]; + u8 initial_path[IB_SMP_MAX_PATH_HOPS]; + u8 return_path[IB_SMP_MAX_PATH_HOPS]; +} __attribute__ ((packed)); + +#define IB_SMP_DIRECTION __constant_htons(0x8000) + +/* Subnet management attributes */ +#define IB_SMP_ATTR_NOTICE __constant_htons(0x0002) +#define IB_SMP_ATTR_NODE_DESC __constant_htons(0x0010) +#define IB_SMP_ATTR_NODE_INFO __constant_htons(0x0011) +#define IB_SMP_ATTR_SWITCH_INFO __constant_htons(0x0012) +#define IB_SMP_ATTR_GUID_INFO __constant_htons(0x0014) +#define IB_SMP_ATTR_PORT_INFO __constant_htons(0x0015) +#define IB_SMP_ATTR_PKEY_TABLE __constant_htons(0x0016) +#define IB_SMP_ATTR_SL_TO_VL_TABLE __constant_htons(0x0017) +#define IB_SMP_ATTR_VL_ARB_TABLE __constant_htons(0x0018) +#define IB_SMP_ATTR_LINEAR_FORWARD_TABLE __constant_htons(0x0019) +#define IB_SMP_ATTR_RANDOM_FORWARD_TABLE __constant_htons(0x001A) +#define IB_SMP_ATTR_MCAST_FORWARD_TABLE __constant_htons(0x001B) +#define IB_SMP_ATTR_SM_INFO __constant_htons(0x0020) +#define IB_SMP_ATTR_VENDOR_DIAG __constant_htons(0x0030) +#define IB_SMP_ATTR_LED_INFO __constant_htons(0x0031) +#define IB_SMP_ATTR_VENDOR_MASK __constant_htons(0xFF00) + +static inline u8 +ib_get_smp_direction(struct ib_smp *smp) +{ + return ((smp->status & IB_SMP_DIRECTION) == IB_SMP_DIRECTION); +} + +#endif /* IB_SMI_H */ diff --git a/drivers/infiniband/include/ib_user_mad.h b/drivers/infiniband/include/ib_user_mad.h new file mode 100644 index 000000000000..06ad4a6075fa --- /dev/null +++ b/drivers/infiniband/include/ib_user_mad.h @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: ib_user_mad.h 1389 2004-12-27 22:56:47Z roland $ + */ + +#ifndef IB_USER_MAD_H +#define IB_USER_MAD_H + +#include <linux/types.h> +#include <linux/ioctl.h> + +/* + * Increment this value if any changes that break userspace ABI + * compatibility are made. + */ +#define IB_USER_MAD_ABI_VERSION 2 + +/* + * Make sure that all structs defined in this file remain laid out so + * that they pack the same way on 32-bit and 64-bit architectures (to + * avoid incompatibility between 32-bit userspace and 64-bit kernels). + */ + +/** + * ib_user_mad - MAD packet + * @data - Contents of MAD + * @id - ID of agent MAD received with/to be sent with + * @status - 0 on successful receive, ETIMEDOUT if no response + * received (transaction ID in data[] will be set to TID of original + * request) (ignored on send) + * @timeout_ms - Milliseconds to wait for response (unset on receive) + * @qpn - Remote QP number received from/to be sent to + * @qkey - Remote Q_Key to be sent with (unset on receive) + * @lid - Remote lid received from/to be sent to + * @sl - Service level received with/to be sent with + * @path_bits - Local path bits received with/to be sent with + * @grh_present - If set, GRH was received/should be sent + * @gid_index - Local GID index to send with (unset on receive) + * @hop_limit - Hop limit in GRH + * @traffic_class - Traffic class in GRH + * @gid - Remote GID in GRH + * @flow_label - Flow label in GRH + * + * All multi-byte quantities are stored in network (big endian) byte order. + */ +struct ib_user_mad { + __u8 data[256]; + __u32 id; + __u32 status; + __u32 timeout_ms; + __u32 qpn; + __u32 qkey; + __u16 lid; + __u8 sl; + __u8 path_bits; + __u8 grh_present; + __u8 gid_index; + __u8 hop_limit; + __u8 traffic_class; + __u8 gid[16]; + __u32 flow_label; +}; + +/** + * ib_user_mad_reg_req - MAD registration request + * @id - Set by the kernel; used to identify agent in future requests. + * @qpn - Queue pair number; must be 0 or 1. + * @method_mask - The caller will receive unsolicited MADs for any method + * where @method_mask = 1. + * @mgmt_class - Indicates which management class of MADs should be receive + * by the caller. This field is only required if the user wishes to + * receive unsolicited MADs, otherwise it should be 0. + * @mgmt_class_version - Indicates which version of MADs for the given + * management class to receive. + * @oui: Indicates IEEE OUI when mgmt_class is a vendor class + * in the range from 0x30 to 0x4f. Otherwise not used. + */ +struct ib_user_mad_reg_req { + __u32 id; + __u32 method_mask[4]; + __u8 qpn; + __u8 mgmt_class; + __u8 mgmt_class_version; + __u8 oui[3]; +}; + +#define IB_IOCTL_MAGIC 0x1b + +#define IB_USER_MAD_REGISTER_AGENT _IOWR(IB_IOCTL_MAGIC, 1, \ + struct ib_user_mad_reg_req) + +#define IB_USER_MAD_UNREGISTER_AGENT _IOW(IB_IOCTL_MAGIC, 2, __u32) + +#endif /* IB_USER_MAD_H */ diff --git a/drivers/infiniband/include/ib_verbs.h b/drivers/infiniband/include/ib_verbs.h new file mode 100644 index 000000000000..78681c7ed3f0 --- /dev/null +++ b/drivers/infiniband/include/ib_verbs.h @@ -0,0 +1,1249 @@ +/* + * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2004 Infinicon Corporation. All rights reserved. + * Copyright (c) 2004 Intel Corporation. All rights reserved. + * Copyright (c) 2004 Topspin Corporation. All rights reserved. + * Copyright (c) 2004 Voltaire Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: ib_verbs.h 1349 2004-12-16 21:09:43Z roland $ + */ + +#if !defined(IB_VERBS_H) +#define IB_VERBS_H + +#include <linux/types.h> +#include <linux/device.h> +#include <asm/atomic.h> + +union ib_gid { + u8 raw[16]; + struct { + u64 subnet_prefix; + u64 interface_id; + } global; +}; + +enum ib_node_type { + IB_NODE_CA = 1, + IB_NODE_SWITCH, + IB_NODE_ROUTER +}; + +enum ib_device_cap_flags { + IB_DEVICE_RESIZE_MAX_WR = 1, + IB_DEVICE_BAD_PKEY_CNTR = (1<<1), + IB_DEVICE_BAD_QKEY_CNTR = (1<<2), + IB_DEVICE_RAW_MULTI = (1<<3), + IB_DEVICE_AUTO_PATH_MIG = (1<<4), + IB_DEVICE_CHANGE_PHY_PORT = (1<<5), + IB_DEVICE_UD_AV_PORT_ENFORCE = (1<<6), + IB_DEVICE_CURR_QP_STATE_MOD = (1<<7), + IB_DEVICE_SHUTDOWN_PORT = (1<<8), + IB_DEVICE_INIT_TYPE = (1<<9), + IB_DEVICE_PORT_ACTIVE_EVENT = (1<<10), + IB_DEVICE_SYS_IMAGE_GUID = (1<<11), + IB_DEVICE_RC_RNR_NAK_GEN = (1<<12), + IB_DEVICE_SRQ_RESIZE = (1<<13), + IB_DEVICE_N_NOTIFY_CQ = (1<<14), + IB_DEVICE_RQ_SIG_TYPE = (1<<15) +}; + +enum ib_atomic_cap { + IB_ATOMIC_NONE, + IB_ATOMIC_HCA, + IB_ATOMIC_GLOB +}; + +struct ib_device_attr { + u64 fw_ver; + u64 node_guid; + u64 sys_image_guid; + u64 max_mr_size; + u64 page_size_cap; + u32 vendor_id; + u32 vendor_part_id; + u32 hw_ver; + int max_qp; + int max_qp_wr; + int device_cap_flags; + int max_sge; + int max_sge_rd; + int max_cq; + int max_cqe; + int max_mr; + int max_pd; + int max_qp_rd_atom; + int max_ee_rd_atom; + int max_res_rd_atom; + int max_qp_init_rd_atom; + int max_ee_init_rd_atom; + enum ib_atomic_cap atomic_cap; + int max_ee; + int max_rdd; + int max_mw; + int max_raw_ipv6_qp; + int max_raw_ethy_qp; + int max_mcast_grp; + int max_mcast_qp_attach; + int max_total_mcast_qp_attach; + int max_ah; + int max_fmr; + int max_map_per_fmr; + int max_srq; + int max_srq_wr; + int max_srq_sge; + u16 max_pkeys; + u8 local_ca_ack_delay; +}; + +enum ib_mtu { + IB_MTU_256 = 1, + IB_MTU_512 = 2, + IB_MTU_1024 = 3, + IB_MTU_2048 = 4, + IB_MTU_4096 = 5 +}; + +static inline int ib_mtu_enum_to_int(enum ib_mtu mtu) +{ + switch (mtu) { + case IB_MTU_256: return 256; + case IB_MTU_512: return 512; + case IB_MTU_1024: return 1024; + case IB_MTU_2048: return 2048; + case IB_MTU_4096: return 4096; + default: return -1; + } +} + +enum ib_port_state { + IB_PORT_NOP = 0, + IB_PORT_DOWN = 1, + IB_PORT_INIT = 2, + IB_PORT_ARMED = 3, + IB_PORT_ACTIVE = 4, + IB_PORT_ACTIVE_DEFER = 5 +}; + +enum ib_port_cap_flags { + IB_PORT_SM = (1<<31), + IB_PORT_NOTICE_SUP = (1<<30), + IB_PORT_TRAP_SUP = (1<<29), + IB_PORT_AUTO_MIGR_SUP = (1<<27), + IB_PORT_SL_MAP_SUP = (1<<26), + IB_PORT_MKEY_NVRAM = (1<<25), + IB_PORT_PKEY_NVRAM = (1<<24), + IB_PORT_LED_INFO_SUP = (1<<23), + IB_PORT_SM_DISABLED = (1<<22), + IB_PORT_SYS_IMAGE_GUID_SUP = (1<<21), + IB_PORT_PKEY_SW_EXT_PORT_TRAP_SUP = (1<<20), + IB_PORT_CM_SUP = (1<<16), + IB_PORT_SNMP_TUNNEL_SUP = (1<<15), + IB_PORT_REINIT_SUP = (1<<14), + IB_PORT_DEVICE_MGMT_SUP = (1<<13), + IB_PORT_VENDOR_CLASS_SUP = (1<<12), + IB_PORT_DR_NOTICE_SUP = (1<<11), + IB_PORT_PORT_NOTICE_SUP = (1<<10), + IB_PORT_BOOT_MGMT_SUP = (1<<9) +}; + +enum ib_port_width { + IB_WIDTH_1X = 1, + IB_WIDTH_4X = 2, + IB_WIDTH_8X = 4, + IB_WIDTH_12X = 8 +}; + +static inline int ib_width_enum_to_int(enum ib_port_width width) +{ + switch (width) { + case IB_WIDTH_1X: return 1; + case IB_WIDTH_4X: return 4; + case IB_WIDTH_8X: return 8; + case IB_WIDTH_12X: return 12; + default: return -1; + } +} + +struct ib_port_attr { + enum ib_port_state state; + enum ib_mtu max_mtu; + enum ib_mtu active_mtu; + int gid_tbl_len; + u32 port_cap_flags; + u32 max_msg_sz; + u32 bad_pkey_cntr; + u32 qkey_viol_cntr; + u16 pkey_tbl_len; + u16 lid; + u16 sm_lid; + u8 lmc; + u8 max_vl_num; + u8 sm_sl; + u8 subnet_timeout; + u8 init_type_reply; + u8 active_width; + u8 active_speed; +}; + +enum ib_device_modify_flags { + IB_DEVICE_MODIFY_SYS_IMAGE_GUID = 1 +}; + +struct ib_device_modify { + u64 sys_image_guid; +}; + +enum ib_port_modify_flags { + IB_PORT_SHUTDOWN = 1, + IB_PORT_INIT_TYPE = (1<<2), + IB_PORT_RESET_QKEY_CNTR = (1<<3) +}; + +struct ib_port_modify { + u32 set_port_cap_mask; + u32 clr_port_cap_mask; + u8 init_type; +}; + +enum ib_event_type { + IB_EVENT_CQ_ERR, + IB_EVENT_QP_FATAL, + IB_EVENT_QP_REQ_ERR, + IB_EVENT_QP_ACCESS_ERR, + IB_EVENT_COMM_EST, + IB_EVENT_SQ_DRAINED, + IB_EVENT_PATH_MIG, + IB_EVENT_PATH_MIG_ERR, + IB_EVENT_DEVICE_FATAL, + IB_EVENT_PORT_ACTIVE, + IB_EVENT_PORT_ERR, + IB_EVENT_LID_CHANGE, + IB_EVENT_PKEY_CHANGE, + IB_EVENT_SM_CHANGE +}; + +struct ib_event { + struct ib_device *device; + union { + struct ib_cq *cq; + struct ib_qp *qp; + u8 port_num; + } element; + enum ib_event_type event; +}; + +struct ib_event_handler { + struct ib_device *device; + void (*handler)(struct ib_event_handler *, struct ib_event *); + struct list_head list; +}; + +#define INIT_IB_EVENT_HANDLER(_ptr, _device, _handler) \ + do { \ + (_ptr)->device = _device; \ + (_ptr)->handler = _handler; \ + INIT_LIST_HEAD(&(_ptr)->list); \ + } while (0) + +struct ib_global_route { + union ib_gid dgid; + u32 flow_label; + u8 sgid_index; + u8 hop_limit; + u8 traffic_class; +}; + +enum { + IB_MULTICAST_QPN = 0xffffff +}; + +enum ib_ah_flags { + IB_AH_GRH = 1 +}; + +struct ib_ah_attr { + struct ib_global_route grh; + u16 dlid; + u8 sl; + u8 src_path_bits; + u8 static_rate; + u8 ah_flags; + u8 port_num; +}; + +enum ib_wc_status { + IB_WC_SUCCESS, + IB_WC_LOC_LEN_ERR, + IB_WC_LOC_QP_OP_ERR, + IB_WC_LOC_EEC_OP_ERR, + IB_WC_LOC_PROT_ERR, + IB_WC_WR_FLUSH_ERR, + IB_WC_MW_BIND_ERR, + IB_WC_BAD_RESP_ERR, + IB_WC_LOC_ACCESS_ERR, + IB_WC_REM_INV_REQ_ERR, + IB_WC_REM_ACCESS_ERR, + IB_WC_REM_OP_ERR, + IB_WC_RETRY_EXC_ERR, + IB_WC_RNR_RETRY_EXC_ERR, + IB_WC_LOC_RDD_VIOL_ERR, + IB_WC_REM_INV_RD_REQ_ERR, + IB_WC_REM_ABORT_ERR, + IB_WC_INV_EECN_ERR, + IB_WC_INV_EEC_STATE_ERR, + IB_WC_FATAL_ERR, + IB_WC_RESP_TIMEOUT_ERR, + IB_WC_GENERAL_ERR +}; + +enum ib_wc_opcode { + IB_WC_SEND, + IB_WC_RDMA_WRITE, + IB_WC_RDMA_READ, + IB_WC_COMP_SWAP, + IB_WC_FETCH_ADD, + IB_WC_BIND_MW, +/* + * Set value of IB_WC_RECV so consumers can test if a completion is a + * receive by testing (opcode & IB_WC_RECV). + */ + IB_WC_RECV = 1 << 7, + IB_WC_RECV_RDMA_WITH_IMM +}; + +enum ib_wc_flags { + IB_WC_GRH = 1, + IB_WC_WITH_IMM = (1<<1) +}; + +struct ib_wc { + u64 wr_id; + enum ib_wc_status status; + enum ib_wc_opcode opcode; + u32 vendor_err; + u32 byte_len; + __be32 imm_data; + u32 src_qp; + int wc_flags; + u16 pkey_index; + u16 slid; + u8 sl; + u8 dlid_path_bits; + u8 port_num; /* valid only for DR SMPs on switches */ +}; + +enum ib_cq_notify { + IB_CQ_SOLICITED, + IB_CQ_NEXT_COMP +}; + +struct ib_qp_cap { + u32 max_send_wr; + u32 max_recv_wr; + u32 max_send_sge; + u32 max_recv_sge; + u32 max_inline_data; +}; + +enum ib_sig_type { + IB_SIGNAL_ALL_WR, + IB_SIGNAL_REQ_WR +}; + +enum ib_qp_type { + /* + * IB_QPT_SMI and IB_QPT_GSI have to be the first two entries + * here (and in that order) since the MAD layer uses them as + * indices into a 2-entry table. + */ + IB_QPT_SMI, + IB_QPT_GSI, + + IB_QPT_RC, + IB_QPT_UC, + IB_QPT_UD, + IB_QPT_RAW_IPV6, + IB_QPT_RAW_ETY +}; + +struct ib_qp_init_attr { + void (*event_handler)(struct ib_event *, void *); + void *qp_context; + struct ib_cq *send_cq; + struct ib_cq *recv_cq; + struct ib_srq *srq; + struct ib_qp_cap cap; + enum ib_sig_type sq_sig_type; + enum ib_sig_type rq_sig_type; + enum ib_qp_type qp_type; + u8 port_num; /* special QP types only */ +}; + +enum ib_rnr_timeout { + IB_RNR_TIMER_655_36 = 0, + IB_RNR_TIMER_000_01 = 1, + IB_RNR_TIMER_000_02 = 2, + IB_RNR_TIMER_000_03 = 3, + IB_RNR_TIMER_000_04 = 4, + IB_RNR_TIMER_000_06 = 5, + IB_RNR_TIMER_000_08 = 6, + IB_RNR_TIMER_000_12 = 7, + IB_RNR_TIMER_000_16 = 8, + IB_RNR_TIMER_000_24 = 9, + IB_RNR_TIMER_000_32 = 10, + IB_RNR_TIMER_000_48 = 11, + IB_RNR_TIMER_000_64 = 12, + IB_RNR_TIMER_000_96 = 13, + IB_RNR_TIMER_001_28 = 14, + IB_RNR_TIMER_001_92 = 15, + IB_RNR_TIMER_002_56 = 16, + IB_RNR_TIMER_003_84 = 17, + IB_RNR_TIMER_005_12 = 18, + IB_RNR_TIMER_007_68 = 19, + IB_RNR_TIMER_010_24 = 20, + IB_RNR_TIMER_015_36 = 21, + IB_RNR_TIMER_020_48 = 22, + IB_RNR_TIMER_030_72 = 23, + IB_RNR_TIMER_040_96 = 24, + IB_RNR_TIMER_061_44 = 25, + IB_RNR_TIMER_081_92 = 26, + IB_RNR_TIMER_122_88 = 27, + IB_RNR_TIMER_163_84 = 28, + IB_RNR_TIMER_245_76 = 29, + IB_RNR_TIMER_327_68 = 30, + IB_RNR_TIMER_491_52 = 31 +}; + +enum ib_qp_attr_mask { + IB_QP_STATE = 1, + IB_QP_CUR_STATE = (1<<1), + IB_QP_EN_SQD_ASYNC_NOTIFY = (1<<2), + IB_QP_ACCESS_FLAGS = (1<<3), + IB_QP_PKEY_INDEX = (1<<4), + IB_QP_PORT = (1<<5), + IB_QP_QKEY = (1<<6), + IB_QP_AV = (1<<7), + IB_QP_PATH_MTU = (1<<8), + IB_QP_TIMEOUT = (1<<9), + IB_QP_RETRY_CNT = (1<<10), + IB_QP_RNR_RETRY = (1<<11), + IB_QP_RQ_PSN = (1<<12), + IB_QP_MAX_QP_RD_ATOMIC = (1<<13), + IB_QP_ALT_PATH = (1<<14), + IB_QP_MIN_RNR_TIMER = (1<<15), + IB_QP_SQ_PSN = (1<<16), + IB_QP_MAX_DEST_RD_ATOMIC = (1<<17), + IB_QP_PATH_MIG_STATE = (1<<18), + IB_QP_CAP = (1<<19), + IB_QP_DEST_QPN = (1<<20) +}; + +enum ib_qp_state { + IB_QPS_RESET, + IB_QPS_INIT, + IB_QPS_RTR, + IB_QPS_RTS, + IB_QPS_SQD, + IB_QPS_SQE, + IB_QPS_ERR +}; + +enum ib_mig_state { + IB_MIG_MIGRATED, + IB_MIG_REARM, + IB_MIG_ARMED +}; + +struct ib_qp_attr { + enum ib_qp_state qp_state; + enum ib_qp_state cur_qp_state; + enum ib_mtu path_mtu; + enum ib_mig_state path_mig_state; + u32 qkey; + u32 rq_psn; + u32 sq_psn; + u32 dest_qp_num; + int qp_access_flags; + struct ib_qp_cap cap; + struct ib_ah_attr ah_attr; + struct ib_ah_attr alt_ah_attr; + u16 pkey_index; + u16 alt_pkey_index; + u8 en_sqd_async_notify; + u8 sq_draining; + u8 max_rd_atomic; + u8 max_dest_rd_atomic; + u8 min_rnr_timer; + u8 port_num; + u8 timeout; + u8 retry_cnt; + u8 rnr_retry; + u8 alt_port_num; + u8 alt_timeout; +}; + +enum ib_wr_opcode { + IB_WR_RDMA_WRITE, + IB_WR_RDMA_WRITE_WITH_IMM, + IB_WR_SEND, + IB_WR_SEND_WITH_IMM, + IB_WR_RDMA_READ, + IB_WR_ATOMIC_CMP_AND_SWP, + IB_WR_ATOMIC_FETCH_AND_ADD +}; + +enum ib_send_flags { + IB_SEND_FENCE = 1, + IB_SEND_SIGNALED = (1<<1), + IB_SEND_SOLICITED = (1<<2), + IB_SEND_INLINE = (1<<3) +}; + +enum ib_recv_flags { + IB_RECV_SIGNALED = 1 +}; + +struct ib_sge { + u64 addr; + u32 length; + u32 lkey; +}; + +struct ib_send_wr { + struct ib_send_wr *next; + u64 wr_id; + struct ib_sge *sg_list; + int num_sge; + enum ib_wr_opcode opcode; + int send_flags; + u32 imm_data; + union { + struct { + u64 remote_addr; + u32 rkey; + } rdma; + struct { + u64 remote_addr; + u64 compare_add; + u64 swap; + u32 rkey; + } atomic; + struct { + struct ib_ah *ah; + struct ib_mad_hdr *mad_hdr; + u32 remote_qpn; + u32 remote_qkey; + int timeout_ms; /* valid for MADs only */ + u16 pkey_index; /* valid for GSI only */ + u8 port_num; /* valid for DR SMPs on switch only */ + } ud; + } wr; +}; + +struct ib_recv_wr { + struct ib_recv_wr *next; + u64 wr_id; + struct ib_sge *sg_list; + int num_sge; + int recv_flags; +}; + +enum ib_access_flags { + IB_ACCESS_LOCAL_WRITE = 1, + IB_ACCESS_REMOTE_WRITE = (1<<1), + IB_ACCESS_REMOTE_READ = (1<<2), + IB_ACCESS_REMOTE_ATOMIC = (1<<3), + IB_ACCESS_MW_BIND = (1<<4) +}; + +struct ib_phys_buf { + u64 addr; + u64 size; +}; + +struct ib_mr_attr { + struct ib_pd *pd; + u64 device_virt_addr; + u64 size; + int mr_access_flags; + u32 lkey; + u32 rkey; +}; + +enum ib_mr_rereg_flags { + IB_MR_REREG_TRANS = 1, + IB_MR_REREG_PD = (1<<1), + IB_MR_REREG_ACCESS = (1<<2) +}; + +struct ib_mw_bind { + struct ib_mr *mr; + u64 wr_id; + u64 addr; + u32 length; + int send_flags; + int mw_access_flags; +}; + +struct ib_fmr_attr { + int max_pages; + int max_maps; + u8 page_size; +}; + +struct ib_pd { + struct ib_device *device; + atomic_t usecnt; /* count all resources */ +}; + +struct ib_ah { + struct ib_device *device; + struct ib_pd *pd; +}; + +typedef void (*ib_comp_handler)(struct ib_cq *cq, void *cq_context); + +struct ib_cq { + struct ib_device *device; + ib_comp_handler comp_handler; + void (*event_handler)(struct ib_event *, void *); + void * cq_context; + int cqe; + atomic_t usecnt; /* count number of work queues */ +}; + +struct ib_srq { + struct ib_device *device; + struct ib_pd *pd; + void *srq_context; + atomic_t usecnt; +}; + +struct ib_qp { + struct ib_device *device; + struct ib_pd *pd; + struct ib_cq *send_cq; + struct ib_cq *recv_cq; + struct ib_srq *srq; + void (*event_handler)(struct ib_event *, void *); + void *qp_context; + u32 qp_num; +}; + +struct ib_mr { + struct ib_device *device; + struct ib_pd *pd; + u32 lkey; + u32 rkey; + atomic_t usecnt; /* count number of MWs */ +}; + +struct ib_mw { + struct ib_device *device; + struct ib_pd *pd; + u32 rkey; +}; + +struct ib_fmr { + struct ib_device *device; + struct ib_pd *pd; + struct list_head list; + u32 lkey; + u32 rkey; +}; + +struct ib_mad; + +enum ib_process_mad_flags { + IB_MAD_IGNORE_MKEY = 1 +}; + +enum ib_mad_result { + IB_MAD_RESULT_FAILURE = 0, /* (!SUCCESS is the important flag) */ + IB_MAD_RESULT_SUCCESS = 1 << 0, /* MAD was successfully processed */ + IB_MAD_RESULT_REPLY = 1 << 1, /* Reply packet needs to be sent */ + IB_MAD_RESULT_CONSUMED = 1 << 2 /* Packet consumed: stop processing */ +}; + +#define IB_DEVICE_NAME_MAX 64 + +struct ib_cache { + rwlock_t lock; + struct ib_event_handler event_handler; + struct ib_pkey_cache **pkey_cache; + struct ib_gid_cache **gid_cache; +}; + +struct ib_device { + struct device *dma_device; + + char name[IB_DEVICE_NAME_MAX]; + + struct list_head event_handler_list; + spinlock_t event_handler_lock; + + struct list_head core_list; + struct list_head client_data_list; + spinlock_t client_data_lock; + + struct ib_cache cache; + + u32 flags; + + int (*query_device)(struct ib_device *device, + struct ib_device_attr *device_attr); + int (*query_port)(struct ib_device *device, + u8 port_num, + struct ib_port_attr *port_attr); + int (*query_gid)(struct ib_device *device, + u8 port_num, int index, + union ib_gid *gid); + int (*query_pkey)(struct ib_device *device, + u8 port_num, u16 index, u16 *pkey); + int (*modify_device)(struct ib_device *device, + int device_modify_mask, + struct ib_device_modify *device_modify); + int (*modify_port)(struct ib_device *device, + u8 port_num, int port_modify_mask, + struct ib_port_modify *port_modify); + struct ib_pd * (*alloc_pd)(struct ib_device *device); + int (*dealloc_pd)(struct ib_pd *pd); + struct ib_ah * (*create_ah)(struct ib_pd *pd, + struct ib_ah_attr *ah_attr); + int (*modify_ah)(struct ib_ah *ah, + struct ib_ah_attr *ah_attr); + int (*query_ah)(struct ib_ah *ah, + struct ib_ah_attr *ah_attr); + int (*destroy_ah)(struct ib_ah *ah); + struct ib_qp * (*create_qp)(struct ib_pd *pd, + struct ib_qp_init_attr *qp_init_attr); + int (*modify_qp)(struct ib_qp *qp, + struct ib_qp_attr *qp_attr, + int qp_attr_mask); + int (*query_qp)(struct ib_qp *qp, + struct ib_qp_attr *qp_attr, + int qp_attr_mask, + struct ib_qp_init_attr *qp_init_attr); + int (*destroy_qp)(struct ib_qp *qp); + int (*post_send)(struct ib_qp *qp, + struct ib_send_wr *send_wr, + struct ib_send_wr **bad_send_wr); + int (*post_recv)(struct ib_qp *qp, + struct ib_recv_wr *recv_wr, + struct ib_recv_wr **bad_recv_wr); + struct ib_cq * (*create_cq)(struct ib_device *device, + int cqe); + int (*destroy_cq)(struct ib_cq *cq); + int (*resize_cq)(struct ib_cq *cq, int *cqe); + int (*poll_cq)(struct ib_cq *cq, int num_entries, + struct ib_wc *wc); + int (*peek_cq)(struct ib_cq *cq, int wc_cnt); + int (*req_notify_cq)(struct ib_cq *cq, + enum ib_cq_notify cq_notify); + int (*req_ncomp_notif)(struct ib_cq *cq, + int wc_cnt); + struct ib_mr * (*get_dma_mr)(struct ib_pd *pd, + int mr_access_flags); + struct ib_mr * (*reg_phys_mr)(struct ib_pd *pd, + struct ib_phys_buf *phys_buf_array, + int num_phys_buf, + int mr_access_flags, + u64 *iova_start); + int (*query_mr)(struct ib_mr *mr, + struct ib_mr_attr *mr_attr); + int (*dereg_mr)(struct ib_mr *mr); + int (*rereg_phys_mr)(struct ib_mr *mr, + int mr_rereg_mask, + struct ib_pd *pd, + struct ib_phys_buf *phys_buf_array, + int num_phys_buf, + int mr_access_flags, + u64 *iova_start); + struct ib_mw * (*alloc_mw)(struct ib_pd *pd); + int (*bind_mw)(struct ib_qp *qp, + struct ib_mw *mw, + struct ib_mw_bind *mw_bind); + int (*dealloc_mw)(struct ib_mw *mw); + struct ib_fmr * (*alloc_fmr)(struct ib_pd *pd, + int mr_access_flags, + struct ib_fmr_attr *fmr_attr); + int (*map_phys_fmr)(struct ib_fmr *fmr, + u64 *page_list, int list_len, + u64 iova); + int (*unmap_fmr)(struct list_head *fmr_list); + int (*dealloc_fmr)(struct ib_fmr *fmr); + int (*attach_mcast)(struct ib_qp *qp, + union ib_gid *gid, + u16 lid); + int (*detach_mcast)(struct ib_qp *qp, + union ib_gid *gid, + u16 lid); + int (*process_mad)(struct ib_device *device, + int process_mad_flags, + u8 port_num, + u16 source_lid, + struct ib_mad *in_mad, + struct ib_mad *out_mad); + + struct class_device class_dev; + struct kobject ports_parent; + struct list_head port_list; + + enum { + IB_DEV_UNINITIALIZED, + IB_DEV_REGISTERED, + IB_DEV_UNREGISTERED + } reg_state; + + u8 node_type; + u8 phys_port_cnt; +}; + +struct ib_client { + char *name; + void (*add) (struct ib_device *); + void (*remove)(struct ib_device *); + + struct list_head list; +}; + +struct ib_device *ib_alloc_device(size_t size); +void ib_dealloc_device(struct ib_device *device); + +int ib_register_device (struct ib_device *device); +void ib_unregister_device(struct ib_device *device); + +int ib_register_client (struct ib_client *client); +void ib_unregister_client(struct ib_client *client); + +void *ib_get_client_data(struct ib_device *device, struct ib_client *client); +void ib_set_client_data(struct ib_device *device, struct ib_client *client, + void *data); + +int ib_register_event_handler (struct ib_event_handler *event_handler); +int ib_unregister_event_handler(struct ib_event_handler *event_handler); +void ib_dispatch_event(struct ib_event *event); + +int ib_query_device(struct ib_device *device, + struct ib_device_attr *device_attr); + +int ib_query_port(struct ib_device *device, + u8 port_num, struct ib_port_attr *port_attr); + +int ib_query_gid(struct ib_device *device, + u8 port_num, int index, union ib_gid *gid); + +int ib_query_pkey(struct ib_device *device, + u8 port_num, u16 index, u16 *pkey); + +int ib_modify_device(struct ib_device *device, + int device_modify_mask, + struct ib_device_modify *device_modify); + +int ib_modify_port(struct ib_device *device, + u8 port_num, int port_modify_mask, + struct ib_port_modify *port_modify); + +/** + * ib_alloc_pd - Allocates an unused protection domain. + * @device: The device on which to allocate the protection domain. + * + * A protection domain object provides an association between QPs, shared + * receive queues, address handles, memory regions, and memory windows. + */ +struct ib_pd *ib_alloc_pd(struct ib_device *device); + +/** + * ib_dealloc_pd - Deallocates a protection domain. + * @pd: The protection domain to deallocate. + */ +int ib_dealloc_pd(struct ib_pd *pd); + +/** + * ib_create_ah - Creates an address handle for the given address vector. + * @pd: The protection domain associated with the address handle. + * @ah_attr: The attributes of the address vector. + * + * The address handle is used to reference a local or global destination + * in all UD QP post sends. + */ +struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr); + +/** + * ib_modify_ah - Modifies the address vector associated with an address + * handle. + * @ah: The address handle to modify. + * @ah_attr: The new address vector attributes to associate with the + * address handle. + */ +int ib_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr); + +/** + * ib_query_ah - Queries the address vector associated with an address + * handle. + * @ah: The address handle to query. + * @ah_attr: The address vector attributes associated with the address + * handle. + */ +int ib_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr); + +/** + * ib_destroy_ah - Destroys an address handle. + * @ah: The address handle to destroy. + */ +int ib_destroy_ah(struct ib_ah *ah); + +/** + * ib_create_qp - Creates a QP associated with the specified protection + * domain. + * @pd: The protection domain associated with the QP. + * @qp_init_attr: A list of initial attributes required to create the QP. + */ +struct ib_qp *ib_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *qp_init_attr); + +/** + * ib_modify_qp - Modifies the attributes for the specified QP and then + * transitions the QP to the given state. + * @qp: The QP to modify. + * @qp_attr: On input, specifies the QP attributes to modify. On output, + * the current values of selected QP attributes are returned. + * @qp_attr_mask: A bit-mask used to specify which attributes of the QP + * are being modified. + */ +int ib_modify_qp(struct ib_qp *qp, + struct ib_qp_attr *qp_attr, + int qp_attr_mask); + +/** + * ib_query_qp - Returns the attribute list and current values for the + * specified QP. + * @qp: The QP to query. + * @qp_attr: The attributes of the specified QP. + * @qp_attr_mask: A bit-mask used to select specific attributes to query. + * @qp_init_attr: Additional attributes of the selected QP. + * + * The qp_attr_mask may be used to limit the query to gathering only the + * selected attributes. + */ +int ib_query_qp(struct ib_qp *qp, + struct ib_qp_attr *qp_attr, + int qp_attr_mask, + struct ib_qp_init_attr *qp_init_attr); + +/** + * ib_destroy_qp - Destroys the specified QP. + * @qp: The QP to destroy. + */ +int ib_destroy_qp(struct ib_qp *qp); + +/** + * ib_post_send - Posts a list of work requests to the send queue of + * the specified QP. + * @qp: The QP to post the work request on. + * @send_wr: A list of work requests to post on the send queue. + * @bad_send_wr: On an immediate failure, this parameter will reference + * the work request that failed to be posted on the QP. + */ +static inline int ib_post_send(struct ib_qp *qp, + struct ib_send_wr *send_wr, + struct ib_send_wr **bad_send_wr) +{ + return qp->device->post_send(qp, send_wr, bad_send_wr); +} + +/** + * ib_post_recv - Posts a list of work requests to the receive queue of + * the specified QP. + * @qp: The QP to post the work request on. + * @recv_wr: A list of work requests to post on the receive queue. + * @bad_recv_wr: On an immediate failure, this parameter will reference + * the work request that failed to be posted on the QP. + */ +static inline int ib_post_recv(struct ib_qp *qp, + struct ib_recv_wr *recv_wr, + struct ib_recv_wr **bad_recv_wr) +{ + return qp->device->post_recv(qp, recv_wr, bad_recv_wr); +} + +/** + * ib_create_cq - Creates a CQ on the specified device. + * @device: The device on which to create the CQ. + * @comp_handler: A user-specified callback that is invoked when a + * completion event occurs on the CQ. + * @event_handler: A user-specified callback that is invoked when an + * asynchronous event not associated with a completion occurs on the CQ. + * @cq_context: Context associated with the CQ returned to the user via + * the associated completion and event handlers. + * @cqe: The minimum size of the CQ. + * + * Users can examine the cq structure to determine the actual CQ size. + */ +struct ib_cq *ib_create_cq(struct ib_device *device, + ib_comp_handler comp_handler, + void (*event_handler)(struct ib_event *, void *), + void *cq_context, int cqe); + +/** + * ib_resize_cq - Modifies the capacity of the CQ. + * @cq: The CQ to resize. + * @cqe: The minimum size of the CQ. + * + * Users can examine the cq structure to determine the actual CQ size. + */ +int ib_resize_cq(struct ib_cq *cq, int cqe); + +/** + * ib_destroy_cq - Destroys the specified CQ. + * @cq: The CQ to destroy. + */ +int ib_destroy_cq(struct ib_cq *cq); + +/** + * ib_poll_cq - poll a CQ for completion(s) + * @cq:the CQ being polled + * @num_entries:maximum number of completions to return + * @wc:array of at least @num_entries &struct ib_wc where completions + * will be returned + * + * Poll a CQ for (possibly multiple) completions. If the return value + * is < 0, an error occurred. If the return value is >= 0, it is the + * number of completions returned. If the return value is + * non-negative and < num_entries, then the CQ was emptied. + */ +static inline int ib_poll_cq(struct ib_cq *cq, int num_entries, + struct ib_wc *wc) +{ + return cq->device->poll_cq(cq, num_entries, wc); +} + +/** + * ib_peek_cq - Returns the number of unreaped completions currently + * on the specified CQ. + * @cq: The CQ to peek. + * @wc_cnt: A minimum number of unreaped completions to check for. + * + * If the number of unreaped completions is greater than or equal to wc_cnt, + * this function returns wc_cnt, otherwise, it returns the actual number of + * unreaped completions. + */ +int ib_peek_cq(struct ib_cq *cq, int wc_cnt); + +/** + * ib_req_notify_cq - Request completion notification on a CQ. + * @cq: The CQ to generate an event for. + * @cq_notify: If set to %IB_CQ_SOLICITED, completion notification will + * occur on the next solicited event. If set to %IB_CQ_NEXT_COMP, + * notification will occur on the next completion. + */ +static inline int ib_req_notify_cq(struct ib_cq *cq, + enum ib_cq_notify cq_notify) +{ + return cq->device->req_notify_cq(cq, cq_notify); +} + +/** + * ib_req_ncomp_notif - Request completion notification when there are + * at least the specified number of unreaped completions on the CQ. + * @cq: The CQ to generate an event for. + * @wc_cnt: The number of unreaped completions that should be on the + * CQ before an event is generated. + */ +static inline int ib_req_ncomp_notif(struct ib_cq *cq, int wc_cnt) +{ + return cq->device->req_ncomp_notif ? + cq->device->req_ncomp_notif(cq, wc_cnt) : + -ENOSYS; +} + +/** + * ib_get_dma_mr - Returns a memory region for system memory that is + * usable for DMA. + * @pd: The protection domain associated with the memory region. + * @mr_access_flags: Specifies the memory access rights. + */ +struct ib_mr *ib_get_dma_mr(struct ib_pd *pd, int mr_access_flags); + +/** + * ib_reg_phys_mr - Prepares a virtually addressed memory region for use + * by an HCA. + * @pd: The protection domain associated assigned to the registered region. + * @phys_buf_array: Specifies a list of physical buffers to use in the + * memory region. + * @num_phys_buf: Specifies the size of the phys_buf_array. + * @mr_access_flags: Specifies the memory access rights. + * @iova_start: The offset of the region's starting I/O virtual address. + */ +struct ib_mr *ib_reg_phys_mr(struct ib_pd *pd, + struct ib_phys_buf *phys_buf_array, + int num_phys_buf, + int mr_access_flags, + u64 *iova_start); + +/** + * ib_rereg_phys_mr - Modifies the attributes of an existing memory region. + * Conceptually, this call performs the functions deregister memory region + * followed by register physical memory region. Where possible, + * resources are reused instead of deallocated and reallocated. + * @mr: The memory region to modify. + * @mr_rereg_mask: A bit-mask used to indicate which of the following + * properties of the memory region are being modified. + * @pd: If %IB_MR_REREG_PD is set in mr_rereg_mask, this field specifies + * the new protection domain to associated with the memory region, + * otherwise, this parameter is ignored. + * @phys_buf_array: If %IB_MR_REREG_TRANS is set in mr_rereg_mask, this + * field specifies a list of physical buffers to use in the new + * translation, otherwise, this parameter is ignored. + * @num_phys_buf: If %IB_MR_REREG_TRANS is set in mr_rereg_mask, this + * field specifies the size of the phys_buf_array, otherwise, this + * parameter is ignored. + * @mr_access_flags: If %IB_MR_REREG_ACCESS is set in mr_rereg_mask, this + * field specifies the new memory access rights, otherwise, this + * parameter is ignored. + * @iova_start: The offset of the region's starting I/O virtual address. + */ +int ib_rereg_phys_mr(struct ib_mr *mr, + int mr_rereg_mask, + struct ib_pd *pd, + struct ib_phys_buf *phys_buf_array, + int num_phys_buf, + int mr_access_flags, + u64 *iova_start); + +/** + * ib_query_mr - Retrieves information about a specific memory region. + * @mr: The memory region to retrieve information about. + * @mr_attr: The attributes of the specified memory region. + */ +int ib_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr); + +/** + * ib_dereg_mr - Deregisters a memory region and removes it from the + * HCA translation table. + * @mr: The memory region to deregister. + */ +int ib_dereg_mr(struct ib_mr *mr); + +/** + * ib_alloc_mw - Allocates a memory window. + * @pd: The protection domain associated with the memory window. + */ +struct ib_mw *ib_alloc_mw(struct ib_pd *pd); + +/** + * ib_bind_mw - Posts a work request to the send queue of the specified + * QP, which binds the memory window to the given address range and + * remote access attributes. + * @qp: QP to post the bind work request on. + * @mw: The memory window to bind. + * @mw_bind: Specifies information about the memory window, including + * its address range, remote access rights, and associated memory region. + */ +static inline int ib_bind_mw(struct ib_qp *qp, + struct ib_mw *mw, + struct ib_mw_bind *mw_bind) +{ + /* XXX reference counting in corresponding MR? */ + return mw->device->bind_mw ? + mw->device->bind_mw(qp, mw, mw_bind) : + -ENOSYS; +} + +/** + * ib_dealloc_mw - Deallocates a memory window. + * @mw: The memory window to deallocate. + */ +int ib_dealloc_mw(struct ib_mw *mw); + +/** + * ib_alloc_fmr - Allocates a unmapped fast memory region. + * @pd: The protection domain associated with the unmapped region. + * @mr_access_flags: Specifies the memory access rights. + * @fmr_attr: Attributes of the unmapped region. + * + * A fast memory region must be mapped before it can be used as part of + * a work request. + */ +struct ib_fmr *ib_alloc_fmr(struct ib_pd *pd, + int mr_access_flags, + struct ib_fmr_attr *fmr_attr); + +/** + * ib_map_phys_fmr - Maps a list of physical pages to a fast memory region. + * @fmr: The fast memory region to associate with the pages. + * @page_list: An array of physical pages to map to the fast memory region. + * @list_len: The number of pages in page_list. + * @iova: The I/O virtual address to use with the mapped region. + */ +static inline int ib_map_phys_fmr(struct ib_fmr *fmr, + u64 *page_list, int list_len, + u64 iova) +{ + return fmr->device->map_phys_fmr(fmr, page_list, list_len, iova); +} + +/** + * ib_unmap_fmr - Removes the mapping from a list of fast memory regions. + * @fmr_list: A linked list of fast memory regions to unmap. + */ +int ib_unmap_fmr(struct list_head *fmr_list); + +/** + * ib_dealloc_fmr - Deallocates a fast memory region. + * @fmr: The fast memory region to deallocate. + */ +int ib_dealloc_fmr(struct ib_fmr *fmr); + +/** + * ib_attach_mcast - Attaches the specified QP to a multicast group. + * @qp: QP to attach to the multicast group. The QP must be type + * IB_QPT_UD. + * @gid: Multicast group GID. + * @lid: Multicast group LID in host byte order. + * + * In order to send and receive multicast packets, subnet + * administration must have created the multicast group and configured + * the fabric appropriately. The port associated with the specified + * QP must also be a member of the multicast group. + */ +int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid); + +/** + * ib_detach_mcast - Detaches the specified QP from a multicast group. + * @qp: QP to detach from the multicast group. + * @gid: Multicast group GID. + * @lid: Multicast group LID in host byte order. + */ +int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid); + +#endif /* IB_VERBS_H */ diff --git a/drivers/infiniband/ulp/ipoib/Kconfig b/drivers/infiniband/ulp/ipoib/Kconfig new file mode 100644 index 000000000000..8d2e04cac68e --- /dev/null +++ b/drivers/infiniband/ulp/ipoib/Kconfig @@ -0,0 +1,33 @@ +config INFINIBAND_IPOIB + tristate "IP-over-InfiniBand" + depends on INFINIBAND && NETDEVICES && INET + ---help--- + Support for the IP-over-InfiniBand protocol (IPoIB). This + transports IP packets over InfiniBand so you can use your IB + device as a fancy NIC. + + The IPoIB protocol is defined by the IETF ipoib working + group: <http://www.ietf.org/html.charters/ipoib-charter.html>. + +config INFINIBAND_IPOIB_DEBUG + bool "IP-over-InfiniBand debugging" + depends on INFINIBAND_IPOIB + ---help--- + This option causes debugging code to be compiled into the + IPoIB driver. The output can be turned on via the + debug_level and mcast_debug_level module parameters (which + can also be set after the driver is loaded through sysfs). + + This option also creates an "ipoib_debugfs," which can be + mounted to expose debugging information about IB multicast + groups used by the IPoIB driver. + +config INFINIBAND_IPOIB_DEBUG_DATA + bool "IP-over-InfiniBand data path debugging" + depends on INFINIBAND_IPOIB_DEBUG + ---help--- + This option compiles debugging code into the the data path + of the IPoIB driver. The output can be turned on via the + data_debug_level module parameter; however, even with output + turned off, this debugging code will have some performance + impact. diff --git a/drivers/infiniband/ulp/ipoib/Makefile b/drivers/infiniband/ulp/ipoib/Makefile new file mode 100644 index 000000000000..394bc08abc6f --- /dev/null +++ b/drivers/infiniband/ulp/ipoib/Makefile @@ -0,0 +1,11 @@ +EXTRA_CFLAGS += -Idrivers/infiniband/include + +obj-$(CONFIG_INFINIBAND_IPOIB) += ib_ipoib.o + +ib_ipoib-y := ipoib_main.o \ + ipoib_ib.o \ + ipoib_multicast.o \ + ipoib_verbs.o \ + ipoib_vlan.o +ib_ipoib-$(CONFIG_INFINIBAND_IPOIB_DEBUG) += ipoib_fs.o + diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h new file mode 100644 index 000000000000..a1db3d177529 --- /dev/null +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -0,0 +1,350 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: ipoib.h 1358 2004-12-17 22:00:11Z roland $ + */ + +#ifndef _IPOIB_H +#define _IPOIB_H + +#include <linux/list.h> +#include <linux/skbuff.h> +#include <linux/netdevice.h> +#include <linux/workqueue.h> +#include <linux/pci.h> +#include <linux/config.h> +#include <linux/kref.h> +#include <linux/if_infiniband.h> + +#include <net/neighbour.h> + +#include <asm/atomic.h> +#include <asm/semaphore.h> + +#include <ib_verbs.h> +#include <ib_pack.h> +#include <ib_sa.h> + +/* constants */ + +enum { + IPOIB_PACKET_SIZE = 2048, + IPOIB_BUF_SIZE = IPOIB_PACKET_SIZE + IB_GRH_BYTES, + + IPOIB_ENCAP_LEN = 4, + + IPOIB_RX_RING_SIZE = 128, + IPOIB_TX_RING_SIZE = 64, + + IPOIB_NUM_WC = 4, + + IPOIB_MAX_PATH_REC_QUEUE = 3, + IPOIB_MAX_MCAST_QUEUE = 3, + + IPOIB_FLAG_OPER_UP = 0, + IPOIB_FLAG_ADMIN_UP = 1, + IPOIB_PKEY_ASSIGNED = 2, + IPOIB_PKEY_STOP = 3, + IPOIB_FLAG_SUBINTERFACE = 4, + IPOIB_MCAST_RUN = 5, + IPOIB_STOP_REAPER = 6, + + IPOIB_MAX_BACKOFF_SECONDS = 16, + + IPOIB_MCAST_FLAG_FOUND = 0, /* used in set_multicast_list */ + IPOIB_MCAST_FLAG_SENDONLY = 1, + IPOIB_MCAST_FLAG_BUSY = 2, /* joining or already joined */ + IPOIB_MCAST_FLAG_ATTACHED = 3, +}; + +/* structs */ + +struct ipoib_header { + u16 proto; + u16 reserved; +}; + +struct ipoib_pseudoheader { + u8 hwaddr[INFINIBAND_ALEN]; +}; + +struct ipoib_mcast; + +struct ipoib_buf { + struct sk_buff *skb; + DECLARE_PCI_UNMAP_ADDR(mapping) +}; + +/* + * Device private locking: tx_lock protects members used in TX fast + * path (and we use LLTX so upper layers don't do extra locking). + * lock protects everything else. lock nests inside of tx_lock (ie + * tx_lock must be acquired first if needed). + */ +struct ipoib_dev_priv { + spinlock_t lock; + + struct net_device *dev; + + unsigned long flags; + + struct semaphore mcast_mutex; + struct semaphore vlan_mutex; + + struct rb_root path_tree; + struct list_head path_list; + + struct ipoib_mcast *broadcast; + struct list_head multicast_list; + struct rb_root multicast_tree; + + struct work_struct pkey_task; + struct work_struct mcast_task; + struct work_struct flush_task; + struct work_struct restart_task; + struct work_struct ah_reap_task; + + struct ib_device *ca; + u8 port; + u16 pkey; + struct ib_pd *pd; + struct ib_mr *mr; + struct ib_cq *cq; + struct ib_qp *qp; + u32 qkey; + + union ib_gid local_gid; + u16 local_lid; + + unsigned int admin_mtu; + unsigned int mcast_mtu; + + struct ipoib_buf *rx_ring; + + spinlock_t tx_lock; + struct ipoib_buf *tx_ring; + unsigned tx_head; + unsigned tx_tail; + + struct ib_wc ibwc[IPOIB_NUM_WC]; + + struct list_head dead_ahs; + + struct ib_event_handler event_handler; + + struct net_device_stats stats; + + struct net_device *parent; + struct list_head child_intfs; + struct list_head list; + +#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG + struct list_head fs_list; + struct dentry *mcg_dentry; +#endif +}; + +struct ipoib_ah { + struct net_device *dev; + struct ib_ah *ah; + struct list_head list; + struct kref ref; + unsigned last_send; +}; + +struct ipoib_path { + struct net_device *dev; + struct ib_sa_path_rec pathrec; + struct ipoib_ah *ah; + struct sk_buff_head queue; + + struct list_head neigh_list; + + int query_id; + struct ib_sa_query *query; + struct completion done; + + struct rb_node rb_node; + struct list_head list; +}; + +struct ipoib_neigh { + struct ipoib_ah *ah; + struct sk_buff_head queue; + + struct neighbour *neighbour; + + struct list_head list; +}; + +static inline struct ipoib_neigh **to_ipoib_neigh(struct neighbour *neigh) +{ + return (struct ipoib_neigh **) (neigh->ha + 24 - + (offsetof(struct neighbour, ha) & 4)); +} + +extern struct workqueue_struct *ipoib_workqueue; + +/* functions */ + +void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr); + +struct ipoib_ah *ipoib_create_ah(struct net_device *dev, + struct ib_pd *pd, struct ib_ah_attr *attr); +void ipoib_free_ah(struct kref *kref); +static inline void ipoib_put_ah(struct ipoib_ah *ah) +{ + kref_put(&ah->ref, ipoib_free_ah); +} + +int ipoib_add_pkey_attr(struct net_device *dev); + +void ipoib_send(struct net_device *dev, struct sk_buff *skb, + struct ipoib_ah *address, u32 qpn); +void ipoib_reap_ah(void *dev_ptr); + +void ipoib_flush_paths(struct net_device *dev); +struct ipoib_dev_priv *ipoib_intf_alloc(const char *format); + +int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port); +void ipoib_ib_dev_flush(void *dev); +void ipoib_ib_dev_cleanup(struct net_device *dev); + +int ipoib_ib_dev_open(struct net_device *dev); +int ipoib_ib_dev_up(struct net_device *dev); +int ipoib_ib_dev_down(struct net_device *dev); +int ipoib_ib_dev_stop(struct net_device *dev); + +int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port); +void ipoib_dev_cleanup(struct net_device *dev); + +void ipoib_mcast_join_task(void *dev_ptr); +void ipoib_mcast_send(struct net_device *dev, union ib_gid *mgid, + struct sk_buff *skb); + +void ipoib_mcast_restart_task(void *dev_ptr); +int ipoib_mcast_start_thread(struct net_device *dev); +int ipoib_mcast_stop_thread(struct net_device *dev); + +void ipoib_mcast_dev_down(struct net_device *dev); +void ipoib_mcast_dev_flush(struct net_device *dev); + +struct ipoib_mcast_iter *ipoib_mcast_iter_init(struct net_device *dev); +void ipoib_mcast_iter_free(struct ipoib_mcast_iter *iter); +int ipoib_mcast_iter_next(struct ipoib_mcast_iter *iter); +void ipoib_mcast_iter_read(struct ipoib_mcast_iter *iter, + union ib_gid *gid, + unsigned long *created, + unsigned int *queuelen, + unsigned int *complete, + unsigned int *send_only); + +int ipoib_mcast_attach(struct net_device *dev, u16 mlid, + union ib_gid *mgid); +int ipoib_mcast_detach(struct net_device *dev, u16 mlid, + union ib_gid *mgid); + +int ipoib_qp_create(struct net_device *dev); +int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca); +void ipoib_transport_dev_cleanup(struct net_device *dev); + +void ipoib_event(struct ib_event_handler *handler, + struct ib_event *record); + +int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey); +int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey); + +void ipoib_pkey_poll(void *dev); +int ipoib_pkey_dev_delay_open(struct net_device *dev); + +#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG +int ipoib_create_debug_file(struct net_device *dev); +void ipoib_delete_debug_file(struct net_device *dev); +int ipoib_register_debugfs(void); +void ipoib_unregister_debugfs(void); +#else +static inline int ipoib_create_debug_file(struct net_device *dev) { return 0; } +static inline void ipoib_delete_debug_file(struct net_device *dev) { } +static inline int ipoib_register_debugfs(void) { return 0; } +static inline void ipoib_unregister_debugfs(void) { } +#endif + + +#define ipoib_printk(level, priv, format, arg...) \ + printk(level "%s: " format, ((struct ipoib_dev_priv *) priv)->dev->name , ## arg) +#define ipoib_warn(priv, format, arg...) \ + ipoib_printk(KERN_WARNING, priv, format , ## arg) + + +#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG +extern int debug_level; + +#define ipoib_dbg(priv, format, arg...) \ + do { \ + if (debug_level > 0) \ + ipoib_printk(KERN_DEBUG, priv, format , ## arg); \ + } while (0) +#define ipoib_dbg_mcast(priv, format, arg...) \ + do { \ + if (mcast_debug_level > 0) \ + ipoib_printk(KERN_DEBUG, priv, format , ## arg); \ + } while (0) +#else /* CONFIG_INFINIBAND_IPOIB_DEBUG */ +#define ipoib_dbg(priv, format, arg...) \ + do { (void) (priv); } while (0) +#define ipoib_dbg_mcast(priv, format, arg...) \ + do { (void) (priv); } while (0) +#endif /* CONFIG_INFINIBAND_IPOIB_DEBUG */ + +#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG_DATA +#define ipoib_dbg_data(priv, format, arg...) \ + do { \ + if (data_debug_level > 0) \ + ipoib_printk(KERN_DEBUG, priv, format , ## arg); \ + } while (0) +#else /* CONFIG_INFINIBAND_IPOIB_DEBUG_DATA */ +#define ipoib_dbg_data(priv, format, arg...) \ + do { (void) (priv); } while (0) +#endif /* CONFIG_INFINIBAND_IPOIB_DEBUG_DATA */ + + +#define IPOIB_GID_FMT "%x:%x:%x:%x:%x:%x:%x:%x" + +#define IPOIB_GID_ARG(gid) be16_to_cpup((__be16 *) ((gid).raw + 0)), \ + be16_to_cpup((__be16 *) ((gid).raw + 2)), \ + be16_to_cpup((__be16 *) ((gid).raw + 4)), \ + be16_to_cpup((__be16 *) ((gid).raw + 6)), \ + be16_to_cpup((__be16 *) ((gid).raw + 8)), \ + be16_to_cpup((__be16 *) ((gid).raw + 10)), \ + be16_to_cpup((__be16 *) ((gid).raw + 12)), \ + be16_to_cpup((__be16 *) ((gid).raw + 14)) + +#endif /* _IPOIB_H */ diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c b/drivers/infiniband/ulp/ipoib/ipoib_fs.c new file mode 100644 index 000000000000..044f2c78ef15 --- /dev/null +++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c @@ -0,0 +1,287 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: ipoib_fs.c 1389 2004-12-27 22:56:47Z roland $ + */ + +#include <linux/pagemap.h> +#include <linux/seq_file.h> + +#include "ipoib.h" + +enum { + IPOIB_MAGIC = 0x49504942 /* "IPIB" */ +}; + +static DECLARE_MUTEX(ipoib_fs_mutex); +static struct dentry *ipoib_root; +static struct super_block *ipoib_sb; +static LIST_HEAD(ipoib_device_list); + +static void *ipoib_mcg_seq_start(struct seq_file *file, loff_t *pos) +{ + struct ipoib_mcast_iter *iter; + loff_t n = *pos; + + iter = ipoib_mcast_iter_init(file->private); + if (!iter) + return NULL; + + while (n--) { + if (ipoib_mcast_iter_next(iter)) { + ipoib_mcast_iter_free(iter); + return NULL; + } + } + + return iter; +} + +static void *ipoib_mcg_seq_next(struct seq_file *file, void *iter_ptr, + loff_t *pos) +{ + struct ipoib_mcast_iter *iter = iter_ptr; + + (*pos)++; + + if (ipoib_mcast_iter_next(iter)) { + ipoib_mcast_iter_free(iter); + return NULL; + } + + return iter; +} + +static void ipoib_mcg_seq_stop(struct seq_file *file, void *iter_ptr) +{ + /* nothing for now */ +} + +static int ipoib_mcg_seq_show(struct seq_file *file, void *iter_ptr) +{ + struct ipoib_mcast_iter *iter = iter_ptr; + char gid_buf[sizeof "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff"]; + union ib_gid mgid; + int i, n; + unsigned long created; + unsigned int queuelen, complete, send_only; + + if (iter) { + ipoib_mcast_iter_read(iter, &mgid, &created, &queuelen, + &complete, &send_only); + + for (n = 0, i = 0; i < sizeof mgid / 2; ++i) { + n += sprintf(gid_buf + n, "%x", + be16_to_cpu(((u16 *)mgid.raw)[i])); + if (i < sizeof mgid / 2 - 1) + gid_buf[n++] = ':'; + } + } + + seq_printf(file, "GID: %*s", -(1 + (int) sizeof gid_buf), gid_buf); + + seq_printf(file, + " created: %10ld queuelen: %4d complete: %d send_only: %d\n", + created, queuelen, complete, send_only); + + return 0; +} + +static struct seq_operations ipoib_seq_ops = { + .start = ipoib_mcg_seq_start, + .next = ipoib_mcg_seq_next, + .stop = ipoib_mcg_seq_stop, + .show = ipoib_mcg_seq_show, +}; + +static int ipoib_mcg_open(struct inode *inode, struct file *file) +{ + struct seq_file *seq; + int ret; + + ret = seq_open(file, &ipoib_seq_ops); + if (ret) + return ret; + + seq = file->private_data; + seq->private = inode->u.generic_ip; + + return 0; +} + +static struct file_operations ipoib_fops = { + .owner = THIS_MODULE, + .open = ipoib_mcg_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release +}; + +static struct inode *ipoib_get_inode(void) +{ + struct inode *inode = new_inode(ipoib_sb); + + if (inode) { + inode->i_mode = S_IFREG | S_IRUGO; + inode->i_uid = 0; + inode->i_gid = 0; + inode->i_blksize = PAGE_CACHE_SIZE; + inode->i_blocks = 0; + inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + inode->i_fop = &ipoib_fops; + } + + return inode; +} + +static int __ipoib_create_debug_file(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct dentry *dentry; + struct inode *inode; + char name[IFNAMSIZ + sizeof "_mcg"]; + + snprintf(name, sizeof name, "%s_mcg", dev->name); + + dentry = d_alloc_name(ipoib_root, name); + if (!dentry) + return -ENOMEM; + + inode = ipoib_get_inode(); + if (!inode) { + dput(dentry); + return -ENOMEM; + } + + inode->u.generic_ip = dev; + priv->mcg_dentry = dentry; + + d_add(dentry, inode); + + return 0; +} + +int ipoib_create_debug_file(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + + down(&ipoib_fs_mutex); + + list_add_tail(&priv->fs_list, &ipoib_device_list); + + if (!ipoib_sb) { + up(&ipoib_fs_mutex); + return 0; + } + + up(&ipoib_fs_mutex); + + return __ipoib_create_debug_file(dev); +} + +void ipoib_delete_debug_file(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + + down(&ipoib_fs_mutex); + list_del(&priv->fs_list); + if (!ipoib_sb) { + up(&ipoib_fs_mutex); + return; + } + up(&ipoib_fs_mutex); + + if (priv->mcg_dentry) { + d_drop(priv->mcg_dentry); + simple_unlink(ipoib_root->d_inode, priv->mcg_dentry); + } +} + +static int ipoib_fill_super(struct super_block *sb, void *data, int silent) +{ + static struct tree_descr ipoib_files[] = { + { "" } + }; + struct ipoib_dev_priv *priv; + int ret; + + ret = simple_fill_super(sb, IPOIB_MAGIC, ipoib_files); + if (ret) + return ret; + + ipoib_root = sb->s_root; + + down(&ipoib_fs_mutex); + + ipoib_sb = sb; + + list_for_each_entry(priv, &ipoib_device_list, fs_list) { + ret = __ipoib_create_debug_file(priv->dev); + if (ret) + break; + } + + up(&ipoib_fs_mutex); + + return ret; +} + +static struct super_block *ipoib_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) +{ + return get_sb_single(fs_type, flags, data, ipoib_fill_super); +} + +static void ipoib_kill_sb(struct super_block *sb) +{ + down(&ipoib_fs_mutex); + ipoib_sb = NULL; + up(&ipoib_fs_mutex); + + kill_litter_super(sb); +} + +static struct file_system_type ipoib_fs_type = { + .owner = THIS_MODULE, + .name = "ipoib_debugfs", + .get_sb = ipoib_get_sb, + .kill_sb = ipoib_kill_sb, +}; + +int ipoib_register_debugfs(void) +{ + return register_filesystem(&ipoib_fs_type); +} + +void ipoib_unregister_debugfs(void) +{ + unregister_filesystem(&ipoib_fs_type); +} diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c new file mode 100644 index 000000000000..fa13eda34fd8 --- /dev/null +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -0,0 +1,678 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: ipoib_ib.c 1386 2004-12-27 16:23:17Z roland $ + */ + +#include <linux/delay.h> +#include <linux/dma-mapping.h> + +#include <ib_cache.h> + +#include "ipoib.h" + +#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG_DATA +int data_debug_level; + +module_param(data_debug_level, int, 0644); +MODULE_PARM_DESC(data_debug_level, + "Enable data path debug tracing if > 0"); +#endif + +#define IPOIB_OP_RECV (1ul << 31) + +static DECLARE_MUTEX(pkey_sem); + +struct ipoib_ah *ipoib_create_ah(struct net_device *dev, + struct ib_pd *pd, struct ib_ah_attr *attr) +{ + struct ipoib_ah *ah; + + ah = kmalloc(sizeof *ah, GFP_KERNEL); + if (!ah) + return NULL; + + ah->dev = dev; + ah->last_send = 0; + kref_init(&ah->ref); + + ah->ah = ib_create_ah(pd, attr); + if (IS_ERR(ah->ah)) { + kfree(ah); + ah = NULL; + } else + ipoib_dbg(netdev_priv(dev), "Created ah %p\n", ah->ah); + + return ah; +} + +void ipoib_free_ah(struct kref *kref) +{ + struct ipoib_ah *ah = container_of(kref, struct ipoib_ah, ref); + struct ipoib_dev_priv *priv = netdev_priv(ah->dev); + + unsigned long flags; + + if (ah->last_send <= priv->tx_tail) { + ipoib_dbg(priv, "Freeing ah %p\n", ah->ah); + ib_destroy_ah(ah->ah); + kfree(ah); + } else { + spin_lock_irqsave(&priv->lock, flags); + list_add_tail(&ah->list, &priv->dead_ahs); + spin_unlock_irqrestore(&priv->lock, flags); + } +} + +static inline int ipoib_ib_receive(struct ipoib_dev_priv *priv, + unsigned int wr_id, + dma_addr_t addr) +{ + struct ib_sge list = { + .addr = addr, + .length = IPOIB_BUF_SIZE, + .lkey = priv->mr->lkey, + }; + struct ib_recv_wr param = { + .wr_id = wr_id | IPOIB_OP_RECV, + .sg_list = &list, + .num_sge = 1, + .recv_flags = IB_RECV_SIGNALED + }; + struct ib_recv_wr *bad_wr; + + return ib_post_recv(priv->qp, ¶m, &bad_wr); +} + +static int ipoib_ib_post_receive(struct net_device *dev, int id) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct sk_buff *skb; + dma_addr_t addr; + int ret; + + skb = dev_alloc_skb(IPOIB_BUF_SIZE + 4); + if (!skb) { + ipoib_warn(priv, "failed to allocate receive buffer\n"); + + priv->rx_ring[id].skb = NULL; + return -ENOMEM; + } + skb_reserve(skb, 4); /* 16 byte align IP header */ + priv->rx_ring[id].skb = skb; + addr = dma_map_single(priv->ca->dma_device, + skb->data, IPOIB_BUF_SIZE, + DMA_FROM_DEVICE); + pci_unmap_addr_set(&priv->rx_ring[id], mapping, addr); + + ret = ipoib_ib_receive(priv, id, addr); + if (ret) { + ipoib_warn(priv, "ipoib_ib_receive failed for buf %d (%d)\n", + id, ret); + priv->rx_ring[id].skb = NULL; + } + + return ret; +} + +static int ipoib_ib_post_receives(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + int i; + + for (i = 0; i < IPOIB_RX_RING_SIZE; ++i) { + if (ipoib_ib_post_receive(dev, i)) { + ipoib_warn(priv, "ipoib_ib_post_receive failed for buf %d\n", i); + return -EIO; + } + } + + return 0; +} + +static void ipoib_ib_handle_wc(struct net_device *dev, + struct ib_wc *wc) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + unsigned int wr_id = wc->wr_id; + + ipoib_dbg_data(priv, "called: id %d, op %d, status: %d\n", + wr_id, wc->opcode, wc->status); + + if (wr_id & IPOIB_OP_RECV) { + wr_id &= ~IPOIB_OP_RECV; + + if (wr_id < IPOIB_RX_RING_SIZE) { + struct sk_buff *skb = priv->rx_ring[wr_id].skb; + + priv->rx_ring[wr_id].skb = NULL; + + dma_unmap_single(priv->ca->dma_device, + pci_unmap_addr(&priv->rx_ring[wr_id], + mapping), + IPOIB_BUF_SIZE, + DMA_FROM_DEVICE); + + if (wc->status != IB_WC_SUCCESS) { + if (wc->status != IB_WC_WR_FLUSH_ERR) + ipoib_warn(priv, "failed recv event " + "(status=%d, wrid=%d vend_err %x)\n", + wc->status, wr_id, wc->vendor_err); + dev_kfree_skb_any(skb); + return; + } + + ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n", + wc->byte_len, wc->slid); + + skb_put(skb, wc->byte_len); + skb_pull(skb, IB_GRH_BYTES); + + if (wc->slid != priv->local_lid || + wc->src_qp != priv->qp->qp_num) { + skb->protocol = ((struct ipoib_header *) skb->data)->proto; + + skb_pull(skb, IPOIB_ENCAP_LEN); + + dev->last_rx = jiffies; + ++priv->stats.rx_packets; + priv->stats.rx_bytes += skb->len; + + skb->dev = dev; + /* XXX get correct PACKET_ type here */ + skb->pkt_type = PACKET_HOST; + netif_rx_ni(skb); + } else { + ipoib_dbg_data(priv, "dropping loopback packet\n"); + dev_kfree_skb_any(skb); + } + + /* repost receive */ + if (ipoib_ib_post_receive(dev, wr_id)) + ipoib_warn(priv, "ipoib_ib_post_receive failed " + "for buf %d\n", wr_id); + } else + ipoib_warn(priv, "completion event with wrid %d\n", + wr_id); + + } else { + struct ipoib_buf *tx_req; + unsigned long flags; + + if (wr_id >= IPOIB_TX_RING_SIZE) { + ipoib_warn(priv, "completion event with wrid %d (> %d)\n", + wr_id, IPOIB_TX_RING_SIZE); + return; + } + + ipoib_dbg_data(priv, "send complete, wrid %d\n", wr_id); + + tx_req = &priv->tx_ring[wr_id]; + + dma_unmap_single(priv->ca->dma_device, + pci_unmap_addr(tx_req, mapping), + tx_req->skb->len, + DMA_TO_DEVICE); + + ++priv->stats.tx_packets; + priv->stats.tx_bytes += tx_req->skb->len; + + dev_kfree_skb_any(tx_req->skb); + + spin_lock_irqsave(&priv->tx_lock, flags); + ++priv->tx_tail; + if (netif_queue_stopped(dev) && + priv->tx_head - priv->tx_tail <= IPOIB_TX_RING_SIZE / 2) + netif_wake_queue(dev); + spin_unlock_irqrestore(&priv->tx_lock, flags); + + if (wc->status != IB_WC_SUCCESS && + wc->status != IB_WC_WR_FLUSH_ERR) + ipoib_warn(priv, "failed send event " + "(status=%d, wrid=%d vend_err %x)\n", + wc->status, wr_id, wc->vendor_err); + } +} + +void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr) +{ + struct net_device *dev = (struct net_device *) dev_ptr; + struct ipoib_dev_priv *priv = netdev_priv(dev); + int n, i; + + ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); + do { + n = ib_poll_cq(cq, IPOIB_NUM_WC, priv->ibwc); + for (i = 0; i < n; ++i) + ipoib_ib_handle_wc(dev, priv->ibwc + i); + } while (n == IPOIB_NUM_WC); +} + +static inline int post_send(struct ipoib_dev_priv *priv, + unsigned int wr_id, + struct ib_ah *address, u32 qpn, + dma_addr_t addr, int len) +{ + struct ib_sge list = { + .addr = addr, + .length = len, + .lkey = priv->mr->lkey, + }; + struct ib_send_wr param = { + .wr_id = wr_id, + .opcode = IB_WR_SEND, + .sg_list = &list, + .num_sge = 1, + .wr = { + .ud = { + .remote_qpn = qpn, + .remote_qkey = priv->qkey, + .ah = address + }, + }, + .send_flags = IB_SEND_SIGNALED, + }; + struct ib_send_wr *bad_wr; + + return ib_post_send(priv->qp, ¶m, &bad_wr); +} + +void ipoib_send(struct net_device *dev, struct sk_buff *skb, + struct ipoib_ah *address, u32 qpn) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_buf *tx_req; + dma_addr_t addr; + + if (skb->len > dev->mtu + INFINIBAND_ALEN) { + ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n", + skb->len, dev->mtu + INFINIBAND_ALEN); + ++priv->stats.tx_dropped; + ++priv->stats.tx_errors; + dev_kfree_skb_any(skb); + return; + } + + ipoib_dbg_data(priv, "sending packet, length=%d address=%p qpn=0x%06x\n", + skb->len, address, qpn); + + /* + * We put the skb into the tx_ring _before_ we call post_send() + * because it's entirely possible that the completion handler will + * run before we execute anything after the post_send(). That + * means we have to make sure everything is properly recorded and + * our state is consistent before we call post_send(). + */ + tx_req = &priv->tx_ring[priv->tx_head & (IPOIB_TX_RING_SIZE - 1)]; + tx_req->skb = skb; + addr = dma_map_single(priv->ca->dma_device, skb->data, skb->len, + DMA_TO_DEVICE); + pci_unmap_addr_set(tx_req, mapping, addr); + + if (unlikely(post_send(priv, priv->tx_head & (IPOIB_TX_RING_SIZE - 1), + address->ah, qpn, addr, skb->len))) { + ipoib_warn(priv, "post_send failed\n"); + ++priv->stats.tx_errors; + dma_unmap_single(priv->ca->dma_device, addr, skb->len, + DMA_TO_DEVICE); + dev_kfree_skb_any(skb); + } else { + dev->trans_start = jiffies; + + address->last_send = priv->tx_head; + ++priv->tx_head; + + if (priv->tx_head - priv->tx_tail == IPOIB_TX_RING_SIZE) { + ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n"); + netif_stop_queue(dev); + } + } +} + +void __ipoib_reap_ah(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_ah *ah, *tah; + LIST_HEAD(remove_list); + + spin_lock_irq(&priv->lock); + list_for_each_entry_safe(ah, tah, &priv->dead_ahs, list) + if (ah->last_send <= priv->tx_tail) { + list_del(&ah->list); + list_add_tail(&ah->list, &remove_list); + } + spin_unlock_irq(&priv->lock); + + list_for_each_entry_safe(ah, tah, &remove_list, list) { + ipoib_dbg(priv, "Reaping ah %p\n", ah->ah); + ib_destroy_ah(ah->ah); + kfree(ah); + } +} + +void ipoib_reap_ah(void *dev_ptr) +{ + struct net_device *dev = dev_ptr; + struct ipoib_dev_priv *priv = netdev_priv(dev); + + __ipoib_reap_ah(dev); + + if (!test_bit(IPOIB_STOP_REAPER, &priv->flags)) + queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task, HZ); +} + +int ipoib_ib_dev_open(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + int ret; + + ret = ipoib_qp_create(dev); + if (ret) { + ipoib_warn(priv, "ipoib_qp_create returned %d\n", ret); + return -1; + } + + ret = ipoib_ib_post_receives(dev); + if (ret) { + ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret); + return -1; + } + + clear_bit(IPOIB_STOP_REAPER, &priv->flags); + queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task, HZ); + + return 0; +} + +int ipoib_ib_dev_up(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + + set_bit(IPOIB_FLAG_OPER_UP, &priv->flags); + + return ipoib_mcast_start_thread(dev); +} + +int ipoib_ib_dev_down(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + + ipoib_dbg(priv, "downing ib_dev\n"); + + clear_bit(IPOIB_FLAG_OPER_UP, &priv->flags); + netif_carrier_off(dev); + + /* Shutdown the P_Key thread if still active */ + if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) { + down(&pkey_sem); + set_bit(IPOIB_PKEY_STOP, &priv->flags); + cancel_delayed_work(&priv->pkey_task); + up(&pkey_sem); + flush_workqueue(ipoib_workqueue); + } + + ipoib_mcast_stop_thread(dev); + + /* + * Flush the multicast groups first so we stop any multicast joins. The + * completion thread may have already died and we may deadlock waiting + * for the completion thread to finish some multicast joins. + */ + ipoib_mcast_dev_flush(dev); + + /* Delete broadcast and local addresses since they will be recreated */ + ipoib_mcast_dev_down(dev); + + ipoib_flush_paths(dev); + + return 0; +} + +static int recvs_pending(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + int pending = 0; + int i; + + for (i = 0; i < IPOIB_RX_RING_SIZE; ++i) + if (priv->rx_ring[i].skb) + ++pending; + + return pending; +} + +int ipoib_ib_dev_stop(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ib_qp_attr qp_attr; + int attr_mask; + unsigned long begin; + struct ipoib_buf *tx_req; + int i; + + /* Kill the existing QP and allocate a new one */ + qp_attr.qp_state = IB_QPS_ERR; + attr_mask = IB_QP_STATE; + if (ib_modify_qp(priv->qp, &qp_attr, attr_mask)) + ipoib_warn(priv, "Failed to modify QP to ERROR state\n"); + + /* Wait for all sends and receives to complete */ + begin = jiffies; + + while (priv->tx_head != priv->tx_tail || recvs_pending(dev)) { + if (time_after(jiffies, begin + 5 * HZ)) { + ipoib_warn(priv, "timing out; %d sends %d receives not completed\n", + priv->tx_head - priv->tx_tail, recvs_pending(dev)); + + /* + * assume the HW is wedged and just free up + * all our pending work requests. + */ + while (priv->tx_tail < priv->tx_head) { + tx_req = &priv->tx_ring[priv->tx_tail & + (IPOIB_TX_RING_SIZE - 1)]; + dma_unmap_single(priv->ca->dma_device, + pci_unmap_addr(tx_req, mapping), + tx_req->skb->len, + DMA_TO_DEVICE); + dev_kfree_skb_any(tx_req->skb); + ++priv->tx_tail; + } + + for (i = 0; i < IPOIB_RX_RING_SIZE; ++i) + if (priv->rx_ring[i].skb) { + dma_unmap_single(priv->ca->dma_device, + pci_unmap_addr(&priv->rx_ring[i], + mapping), + IPOIB_BUF_SIZE, + DMA_FROM_DEVICE); + dev_kfree_skb_any(priv->rx_ring[i].skb); + priv->rx_ring[i].skb = NULL; + } + + goto timeout; + } + + yield(); + } + + ipoib_dbg(priv, "All sends and receives done.\n"); + +timeout: + qp_attr.qp_state = IB_QPS_RESET; + attr_mask = IB_QP_STATE; + if (ib_modify_qp(priv->qp, &qp_attr, attr_mask)) + ipoib_warn(priv, "Failed to modify QP to RESET state\n"); + + /* Wait for all AHs to be reaped */ + set_bit(IPOIB_STOP_REAPER, &priv->flags); + cancel_delayed_work(&priv->ah_reap_task); + flush_workqueue(ipoib_workqueue); + + begin = jiffies; + + while (!list_empty(&priv->dead_ahs)) { + __ipoib_reap_ah(dev); + + if (time_after(jiffies, begin + HZ)) { + ipoib_warn(priv, "timing out; will leak address handles\n"); + break; + } + + yield(); + } + + return 0; +} + +int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + + priv->ca = ca; + priv->port = port; + priv->qp = NULL; + + if (ipoib_transport_dev_init(dev, ca)) { + printk(KERN_WARNING "%s: ipoib_transport_dev_init failed\n", ca->name); + return -ENODEV; + } + + if (dev->flags & IFF_UP) { + if (ipoib_ib_dev_open(dev)) { + ipoib_transport_dev_cleanup(dev); + return -ENODEV; + } + } + + return 0; +} + +void ipoib_ib_dev_flush(void *_dev) +{ + struct net_device *dev = (struct net_device *)_dev; + struct ipoib_dev_priv *priv = netdev_priv(dev), *cpriv; + + if (!test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) + return; + + ipoib_dbg(priv, "flushing\n"); + + ipoib_ib_dev_down(dev); + + /* + * The device could have been brought down between the start and when + * we get here, don't bring it back up if it's not configured up + */ + if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) + ipoib_ib_dev_up(dev); + + /* Flush any child interfaces too */ + list_for_each_entry(cpriv, &priv->child_intfs, list) + ipoib_ib_dev_flush(&cpriv->dev); +} + +void ipoib_ib_dev_cleanup(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + + ipoib_dbg(priv, "cleaning up ib_dev\n"); + + ipoib_mcast_stop_thread(dev); + + /* Delete the broadcast address and the local address */ + ipoib_mcast_dev_down(dev); + + ipoib_transport_dev_cleanup(dev); +} + +/* + * Delayed P_Key Assigment Interim Support + * + * The following is initial implementation of delayed P_Key assigment + * mechanism. It is using the same approach implemented for the multicast + * group join. The single goal of this implementation is to quickly address + * Bug #2507. This implementation will probably be removed when the P_Key + * change async notification is available. + */ +int ipoib_open(struct net_device *dev); + +static void ipoib_pkey_dev_check_presence(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + u16 pkey_index = 0; + + if (ib_cached_pkey_find(priv->ca, priv->port, priv->pkey, &pkey_index)) + clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); + else + set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); +} + +void ipoib_pkey_poll(void *dev_ptr) +{ + struct net_device *dev = dev_ptr; + struct ipoib_dev_priv *priv = netdev_priv(dev); + + ipoib_pkey_dev_check_presence(dev); + + if (test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) + ipoib_open(dev); + else { + down(&pkey_sem); + if (!test_bit(IPOIB_PKEY_STOP, &priv->flags)) + queue_delayed_work(ipoib_workqueue, + &priv->pkey_task, + HZ); + up(&pkey_sem); + } +} + +int ipoib_pkey_dev_delay_open(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + + /* Look for the interface pkey value in the IB Port P_Key table and */ + /* set the interface pkey assigment flag */ + ipoib_pkey_dev_check_presence(dev); + + /* P_Key value not assigned yet - start polling */ + if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) { + down(&pkey_sem); + clear_bit(IPOIB_PKEY_STOP, &priv->flags); + queue_delayed_work(ipoib_workqueue, + &priv->pkey_task, + HZ); + up(&pkey_sem); + return 1; + } + + return 0; +} diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c new file mode 100644 index 000000000000..d225f457697b --- /dev/null +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -0,0 +1,1079 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: ipoib_main.c 1377 2004-12-23 19:57:12Z roland $ + */ + +#include "ipoib.h" + +#include <linux/version.h> +#include <linux/module.h> + +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> + +#include <linux/if_arp.h> /* For ARPHRD_xxx */ + +#include <linux/ip.h> +#include <linux/in.h> + +MODULE_AUTHOR("Roland Dreier"); +MODULE_DESCRIPTION("IP-over-InfiniBand net driver"); +MODULE_LICENSE("Dual BSD/GPL"); + +#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG +int debug_level; + +module_param(debug_level, int, 0644); +MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0"); +#endif + +static const u8 ipv4_bcast_addr[] = { + 0x00, 0xff, 0xff, 0xff, + 0xff, 0x12, 0x40, 0x1b, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff +}; + +struct workqueue_struct *ipoib_workqueue; + +static void ipoib_add_one(struct ib_device *device); +static void ipoib_remove_one(struct ib_device *device); + +static struct ib_client ipoib_client = { + .name = "ipoib", + .add = ipoib_add_one, + .remove = ipoib_remove_one +}; + +int ipoib_open(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + + ipoib_dbg(priv, "bringing up interface\n"); + + set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags); + + if (ipoib_pkey_dev_delay_open(dev)) + return 0; + + if (ipoib_ib_dev_open(dev)) + return -EINVAL; + + if (ipoib_ib_dev_up(dev)) + return -EINVAL; + + if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { + struct ipoib_dev_priv *cpriv; + + /* Bring up any child interfaces too */ + down(&priv->vlan_mutex); + list_for_each_entry(cpriv, &priv->child_intfs, list) { + int flags; + + flags = cpriv->dev->flags; + if (flags & IFF_UP) + continue; + + dev_change_flags(cpriv->dev, flags | IFF_UP); + } + up(&priv->vlan_mutex); + } + + netif_start_queue(dev); + + return 0; +} + +static int ipoib_stop(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + + ipoib_dbg(priv, "stopping interface\n"); + + clear_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags); + + netif_stop_queue(dev); + + ipoib_ib_dev_down(dev); + ipoib_ib_dev_stop(dev); + + if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { + struct ipoib_dev_priv *cpriv; + + /* Bring down any child interfaces too */ + down(&priv->vlan_mutex); + list_for_each_entry(cpriv, &priv->child_intfs, list) { + int flags; + + flags = cpriv->dev->flags; + if (!(flags & IFF_UP)) + continue; + + dev_change_flags(cpriv->dev, flags & ~IFF_UP); + } + up(&priv->vlan_mutex); + } + + return 0; +} + +static int ipoib_change_mtu(struct net_device *dev, int new_mtu) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + + if (new_mtu > IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN) + return -EINVAL; + + priv->admin_mtu = new_mtu; + + dev->mtu = min(priv->mcast_mtu, priv->admin_mtu); + + return 0; +} + +static struct ipoib_path *__path_find(struct net_device *dev, + union ib_gid *gid) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct rb_node *n = priv->path_tree.rb_node; + struct ipoib_path *path; + int ret; + + while (n) { + path = rb_entry(n, struct ipoib_path, rb_node); + + ret = memcmp(gid->raw, path->pathrec.dgid.raw, + sizeof (union ib_gid)); + + if (ret < 0) + n = n->rb_left; + else if (ret > 0) + n = n->rb_right; + else + return path; + } + + return NULL; +} + +static int __path_add(struct net_device *dev, struct ipoib_path *path) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct rb_node **n = &priv->path_tree.rb_node; + struct rb_node *pn = NULL; + struct ipoib_path *tpath; + int ret; + + while (*n) { + pn = *n; + tpath = rb_entry(pn, struct ipoib_path, rb_node); + + ret = memcmp(path->pathrec.dgid.raw, tpath->pathrec.dgid.raw, + sizeof (union ib_gid)); + if (ret < 0) + n = &pn->rb_left; + else if (ret > 0) + n = &pn->rb_right; + else + return -EEXIST; + } + + rb_link_node(&path->rb_node, pn, n); + rb_insert_color(&path->rb_node, &priv->path_tree); + + list_add_tail(&path->list, &priv->path_list); + + return 0; +} + +static void __path_free(struct net_device *dev, struct ipoib_path *path) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_neigh *neigh, *tn; + struct sk_buff *skb; + + while ((skb = __skb_dequeue(&path->queue))) + dev_kfree_skb_irq(skb); + + list_for_each_entry_safe(neigh, tn, &path->neigh_list, list) { + if (neigh->ah) + ipoib_put_ah(neigh->ah); + *to_ipoib_neigh(neigh->neighbour) = NULL; + neigh->neighbour->ops->destructor = NULL; + kfree(neigh); + } + + if (path->ah) + ipoib_put_ah(path->ah); + + rb_erase(&path->rb_node, &priv->path_tree); + list_del(&path->list); + kfree(path); +} + +void ipoib_flush_paths(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_path *path, *tp; + LIST_HEAD(remove_list); + unsigned long flags; + + spin_lock_irqsave(&priv->lock, flags); + list_splice(&priv->path_list, &remove_list); + INIT_LIST_HEAD(&priv->path_list); + spin_unlock_irqrestore(&priv->lock, flags); + + list_for_each_entry_safe(path, tp, &remove_list, list) { + if (path->query) + ib_sa_cancel_query(path->query_id, path->query); + wait_for_completion(&path->done); + __path_free(dev, path); + } +} + +static void path_rec_completion(int status, + struct ib_sa_path_rec *pathrec, + void *path_ptr) +{ + struct ipoib_path *path = path_ptr; + struct net_device *dev = path->dev; + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_ah *ah = NULL; + struct ipoib_neigh *neigh; + struct sk_buff_head skqueue; + struct sk_buff *skb; + unsigned long flags; + + if (pathrec) + ipoib_dbg(priv, "PathRec LID 0x%04x for GID " IPOIB_GID_FMT "\n", + be16_to_cpu(pathrec->dlid), IPOIB_GID_ARG(pathrec->dgid)); + else + ipoib_dbg(priv, "PathRec status %d for GID " IPOIB_GID_FMT "\n", + status, IPOIB_GID_ARG(path->pathrec.dgid)); + + skb_queue_head_init(&skqueue); + + if (!status) { + /* + * For now we set static_rate to 0. This is not + * really correct: we should look at the rate + * component of the path member record, compare it + * with the rate of our local port (calculated from + * the active link speed and link width) and set an + * inter-packet delay appropriately. + */ + struct ib_ah_attr av = { + .dlid = be16_to_cpu(pathrec->dlid), + .sl = pathrec->sl, + .static_rate = 0, + .port_num = priv->port + }; + + ah = ipoib_create_ah(dev, priv->pd, &av); + } + + spin_lock_irqsave(&priv->lock, flags); + + path->ah = ah; + + if (ah) { + path->pathrec = *pathrec; + + ipoib_dbg(priv, "created address handle %p for LID 0x%04x, SL %d\n", + ah, be16_to_cpu(pathrec->dlid), pathrec->sl); + + while ((skb = __skb_dequeue(&path->queue))) + __skb_queue_tail(&skqueue, skb); + + list_for_each_entry(neigh, &path->neigh_list, list) { + kref_get(&path->ah->ref); + neigh->ah = path->ah; + + while ((skb = __skb_dequeue(&neigh->queue))) + __skb_queue_tail(&skqueue, skb); + } + } else + path->query = NULL; + + complete(&path->done); + + spin_unlock_irqrestore(&priv->lock, flags); + + while ((skb = __skb_dequeue(&skqueue))) { + skb->dev = dev; + if (dev_queue_xmit(skb)) + ipoib_warn(priv, "dev_queue_xmit failed " + "to requeue packet\n"); + } +} + +static struct ipoib_path *path_rec_create(struct net_device *dev, + union ib_gid *gid) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_path *path; + + path = kmalloc(sizeof *path, GFP_ATOMIC); + if (!path) + return NULL; + + path->dev = dev; + path->pathrec.dlid = 0; + + skb_queue_head_init(&path->queue); + + INIT_LIST_HEAD(&path->neigh_list); + path->query = NULL; + init_completion(&path->done); + + memcpy(path->pathrec.dgid.raw, gid->raw, sizeof (union ib_gid)); + path->pathrec.sgid = priv->local_gid; + path->pathrec.pkey = cpu_to_be16(priv->pkey); + path->pathrec.numb_path = 1; + + __path_add(dev, path); + + return path; +} + +static int path_rec_start(struct net_device *dev, + struct ipoib_path *path) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + + ipoib_dbg(priv, "Start path record lookup for " IPOIB_GID_FMT "\n", + IPOIB_GID_ARG(path->pathrec.dgid)); + + path->query_id = + ib_sa_path_rec_get(priv->ca, priv->port, + &path->pathrec, + IB_SA_PATH_REC_DGID | + IB_SA_PATH_REC_SGID | + IB_SA_PATH_REC_NUMB_PATH | + IB_SA_PATH_REC_PKEY, + 1000, GFP_ATOMIC, + path_rec_completion, + path, &path->query); + if (path->query_id < 0) { + ipoib_warn(priv, "ib_sa_path_rec_get failed\n"); + path->query = NULL; + return path->query_id; + } + + return 0; +} + +static void neigh_add_path(struct sk_buff *skb, struct net_device *dev) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_path *path; + struct ipoib_neigh *neigh; + + neigh = kmalloc(sizeof *neigh, GFP_ATOMIC); + if (!neigh) { + ++priv->stats.tx_dropped; + dev_kfree_skb_any(skb); + return; + } + + skb_queue_head_init(&neigh->queue); + neigh->neighbour = skb->dst->neighbour; + *to_ipoib_neigh(skb->dst->neighbour) = neigh; + + /* + * We can only be called from ipoib_start_xmit, so we're + * inside tx_lock -- no need to save/restore flags. + */ + spin_lock(&priv->lock); + + path = __path_find(dev, (union ib_gid *) (skb->dst->neighbour->ha + 4)); + if (!path) { + path = path_rec_create(dev, + (union ib_gid *) (skb->dst->neighbour->ha + 4)); + if (!path) + goto err; + } + + list_add_tail(&neigh->list, &path->neigh_list); + + if (path->pathrec.dlid) { + kref_get(&path->ah->ref); + neigh->ah = path->ah; + + ipoib_send(dev, skb, path->ah, + be32_to_cpup((__be32 *) skb->dst->neighbour->ha)); + } else { + neigh->ah = NULL; + if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) { + __skb_queue_tail(&neigh->queue, skb); + } else { + ++priv->stats.tx_dropped; + dev_kfree_skb_any(skb); + } + + if (!path->query && path_rec_start(dev, path)) + goto err; + } + + spin_unlock(&priv->lock); + return; + +err: + *to_ipoib_neigh(skb->dst->neighbour) = NULL; + list_del(&neigh->list); + kfree(neigh); + neigh->neighbour->ops->destructor = NULL; + + ++priv->stats.tx_dropped; + dev_kfree_skb_any(skb); + + spin_unlock(&priv->lock); +} + +static void path_lookup(struct sk_buff *skb, struct net_device *dev) +{ + struct ipoib_dev_priv *priv = netdev_priv(skb->dev); + + /* Look up path record for unicasts */ + if (skb->dst->neighbour->ha[4] != 0xff) { + neigh_add_path(skb, dev); + return; + } + + /* Add in the P_Key for multicasts */ + skb->dst->neighbour->ha[8] = (priv->pkey >> 8) & 0xff; + skb->dst->neighbour->ha[9] = priv->pkey & 0xff; + ipoib_mcast_send(dev, (union ib_gid *) (skb->dst->neighbour->ha + 4), skb); +} + +static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev, + struct ipoib_pseudoheader *phdr) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_path *path; + + /* + * We can only be called from ipoib_start_xmit, so we're + * inside tx_lock -- no need to save/restore flags. + */ + spin_lock(&priv->lock); + + path = __path_find(dev, (union ib_gid *) (phdr->hwaddr + 4)); + if (!path) { + path = path_rec_create(dev, + (union ib_gid *) (phdr->hwaddr + 4)); + if (path) { + /* put pseudoheader back on for next time */ + skb_push(skb, sizeof *phdr); + __skb_queue_tail(&path->queue, skb); + + if (path_rec_start(dev, path)) + __path_free(dev, path); + } else { + ++priv->stats.tx_dropped; + dev_kfree_skb_any(skb); + } + + spin_unlock(&priv->lock); + return; + } + + if (path->pathrec.dlid) { + ipoib_dbg(priv, "Send unicast ARP to %04x\n", + be16_to_cpu(path->pathrec.dlid)); + + ipoib_send(dev, skb, path->ah, + be32_to_cpup((__be32 *) phdr->hwaddr)); + } else if ((path->query || !path_rec_start(dev, path)) && + skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) { + /* put pseudoheader back on for next time */ + skb_push(skb, sizeof *phdr); + __skb_queue_tail(&path->queue, skb); + } else { + ++priv->stats.tx_dropped; + dev_kfree_skb_any(skb); + } + + spin_unlock(&priv->lock); +} + +static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_neigh *neigh; + unsigned long flags; + + local_irq_save(flags); + if (!spin_trylock(&priv->tx_lock)) { + local_irq_restore(flags); + return NETDEV_TX_LOCKED; + } + + /* + * Check if our queue is stopped. Since we have the LLTX bit + * set, we can't rely on netif_stop_queue() preventing our + * xmit function from being called with a full queue. + */ + if (unlikely(netif_queue_stopped(dev))) { + spin_unlock_irqrestore(&priv->tx_lock, flags); + return NETDEV_TX_BUSY; + } + + if (skb->dst && skb->dst->neighbour) { + if (unlikely(!*to_ipoib_neigh(skb->dst->neighbour))) { + path_lookup(skb, dev); + goto out; + } + + neigh = *to_ipoib_neigh(skb->dst->neighbour); + + if (likely(neigh->ah)) { + ipoib_send(dev, skb, neigh->ah, + be32_to_cpup((__be32 *) skb->dst->neighbour->ha)); + goto out; + } + + if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) { + spin_lock(&priv->lock); + __skb_queue_tail(&neigh->queue, skb); + spin_unlock(&priv->lock); + } else { + ++priv->stats.tx_dropped; + dev_kfree_skb_any(skb); + } + } else { + struct ipoib_pseudoheader *phdr = + (struct ipoib_pseudoheader *) skb->data; + skb_pull(skb, sizeof *phdr); + + if (phdr->hwaddr[4] == 0xff) { + /* Add in the P_Key for multicast*/ + phdr->hwaddr[8] = (priv->pkey >> 8) & 0xff; + phdr->hwaddr[9] = priv->pkey & 0xff; + + ipoib_mcast_send(dev, (union ib_gid *) (phdr->hwaddr + 4), skb); + } else { + /* unicast GID -- should be ARP reply */ + + if (be16_to_cpup((u16 *) skb->data) != ETH_P_ARP) { + ipoib_warn(priv, "Unicast, no %s: type %04x, QPN %06x " + IPOIB_GID_FMT "\n", + skb->dst ? "neigh" : "dst", + be16_to_cpup((u16 *) skb->data), + be32_to_cpup((u32 *) phdr->hwaddr), + IPOIB_GID_ARG(*(union ib_gid *) (phdr->hwaddr + 4))); + dev_kfree_skb_any(skb); + ++priv->stats.tx_dropped; + goto out; + } + + unicast_arp_send(skb, dev, phdr); + } + } + +out: + spin_unlock_irqrestore(&priv->tx_lock, flags); + + return NETDEV_TX_OK; +} + +struct net_device_stats *ipoib_get_stats(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + + return &priv->stats; +} + +static void ipoib_timeout(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + + ipoib_warn(priv, "transmit timeout: latency %ld\n", + jiffies - dev->trans_start); + /* XXX reset QP, etc. */ +} + +static int ipoib_hard_header(struct sk_buff *skb, + struct net_device *dev, + unsigned short type, + void *daddr, void *saddr, unsigned len) +{ + struct ipoib_header *header; + + header = (struct ipoib_header *) skb_push(skb, sizeof *header); + + header->proto = htons(type); + header->reserved = 0; + + /* + * If we don't have a neighbour structure, stuff the + * destination address onto the front of the skb so we can + * figure out where to send the packet later. + */ + if (!skb->dst || !skb->dst->neighbour) { + struct ipoib_pseudoheader *phdr = + (struct ipoib_pseudoheader *) skb_push(skb, sizeof *phdr); + memcpy(phdr->hwaddr, daddr, INFINIBAND_ALEN); + } + + return 0; +} + +static void ipoib_set_mcast_list(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + + schedule_work(&priv->restart_task); +} + +static void ipoib_neigh_destructor(struct neighbour *n) +{ + struct ipoib_neigh *neigh = *to_ipoib_neigh(n); + struct ipoib_dev_priv *priv = netdev_priv(n->dev); + unsigned long flags; + + ipoib_dbg(priv, + "neigh_destructor for %06x " IPOIB_GID_FMT "\n", + be32_to_cpup((__be32 *) n->ha), + IPOIB_GID_ARG(*((union ib_gid *) (n->ha + 4)))); + + spin_lock_irqsave(&priv->lock, flags); + + if (neigh) { + if (neigh->ah) + ipoib_put_ah(neigh->ah); + list_del(&neigh->list); + *to_ipoib_neigh(n) = NULL; + kfree(neigh); + } + + spin_unlock_irqrestore(&priv->lock, flags); +} + +static int ipoib_neigh_setup(struct neighbour *neigh) +{ + /* + * Is this kosher? I can't find anybody in the kernel that + * sets neigh->destructor, so we should be able to set it here + * without trouble. + */ + neigh->ops->destructor = ipoib_neigh_destructor; + + return 0; +} + +static int ipoib_neigh_setup_dev(struct net_device *dev, struct neigh_parms *parms) +{ + parms->neigh_setup = ipoib_neigh_setup; + + return 0; +} + +int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + + /* Allocate RX/TX "rings" to hold queued skbs */ + + priv->rx_ring = kmalloc(IPOIB_RX_RING_SIZE * sizeof (struct ipoib_buf), + GFP_KERNEL); + if (!priv->rx_ring) { + printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n", + ca->name, IPOIB_RX_RING_SIZE); + goto out; + } + memset(priv->rx_ring, 0, + IPOIB_RX_RING_SIZE * sizeof (struct ipoib_buf)); + + priv->tx_ring = kmalloc(IPOIB_TX_RING_SIZE * sizeof (struct ipoib_buf), + GFP_KERNEL); + if (!priv->tx_ring) { + printk(KERN_WARNING "%s: failed to allocate TX ring (%d entries)\n", + ca->name, IPOIB_TX_RING_SIZE); + goto out_rx_ring_cleanup; + } + memset(priv->tx_ring, 0, + IPOIB_TX_RING_SIZE * sizeof (struct ipoib_buf)); + + /* priv->tx_head & tx_tail are already 0 */ + + if (ipoib_ib_dev_init(dev, ca, port)) + goto out_tx_ring_cleanup; + + return 0; + +out_tx_ring_cleanup: + kfree(priv->tx_ring); + +out_rx_ring_cleanup: + kfree(priv->rx_ring); + +out: + return -ENOMEM; +} + +void ipoib_dev_cleanup(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev), *cpriv, *tcpriv; + + ipoib_delete_debug_file(dev); + + /* Delete any child interfaces first */ + list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) { + unregister_netdev(cpriv->dev); + ipoib_dev_cleanup(cpriv->dev); + free_netdev(cpriv->dev); + } + + ipoib_ib_dev_cleanup(dev); + + if (priv->rx_ring) { + kfree(priv->rx_ring); + priv->rx_ring = NULL; + } + + if (priv->tx_ring) { + kfree(priv->tx_ring); + priv->tx_ring = NULL; + } +} + +static void ipoib_setup(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + + dev->open = ipoib_open; + dev->stop = ipoib_stop; + dev->change_mtu = ipoib_change_mtu; + dev->hard_start_xmit = ipoib_start_xmit; + dev->get_stats = ipoib_get_stats; + dev->tx_timeout = ipoib_timeout; + dev->hard_header = ipoib_hard_header; + dev->set_multicast_list = ipoib_set_mcast_list; + dev->neigh_setup = ipoib_neigh_setup_dev; + + dev->watchdog_timeo = HZ; + + dev->rebuild_header = NULL; + dev->set_mac_address = NULL; + dev->header_cache_update = NULL; + + dev->flags |= IFF_BROADCAST | IFF_MULTICAST; + + /* + * We add in INFINIBAND_ALEN to allow for the destination + * address "pseudoheader" for skbs without neighbour struct. + */ + dev->hard_header_len = IPOIB_ENCAP_LEN + INFINIBAND_ALEN; + dev->addr_len = INFINIBAND_ALEN; + dev->type = ARPHRD_INFINIBAND; + dev->tx_queue_len = IPOIB_TX_RING_SIZE * 2; + dev->features = NETIF_F_VLAN_CHALLENGED | NETIF_F_LLTX; + + /* MTU will be reset when mcast join happens */ + dev->mtu = IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN; + priv->mcast_mtu = priv->admin_mtu = dev->mtu; + + memcpy(dev->broadcast, ipv4_bcast_addr, INFINIBAND_ALEN); + + netif_carrier_off(dev); + + SET_MODULE_OWNER(dev); + + priv->dev = dev; + + spin_lock_init(&priv->lock); + spin_lock_init(&priv->tx_lock); + + init_MUTEX(&priv->mcast_mutex); + init_MUTEX(&priv->vlan_mutex); + + INIT_LIST_HEAD(&priv->path_list); + INIT_LIST_HEAD(&priv->child_intfs); + INIT_LIST_HEAD(&priv->dead_ahs); + INIT_LIST_HEAD(&priv->multicast_list); + + INIT_WORK(&priv->pkey_task, ipoib_pkey_poll, priv->dev); + INIT_WORK(&priv->mcast_task, ipoib_mcast_join_task, priv->dev); + INIT_WORK(&priv->flush_task, ipoib_ib_dev_flush, priv->dev); + INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task, priv->dev); + INIT_WORK(&priv->ah_reap_task, ipoib_reap_ah, priv->dev); +} + +struct ipoib_dev_priv *ipoib_intf_alloc(const char *name) +{ + struct net_device *dev; + + dev = alloc_netdev((int) sizeof (struct ipoib_dev_priv), name, + ipoib_setup); + if (!dev) + return NULL; + + return netdev_priv(dev); +} + +static ssize_t show_pkey(struct class_device *cdev, char *buf) +{ + struct ipoib_dev_priv *priv = + netdev_priv(container_of(cdev, struct net_device, class_dev)); + + return sprintf(buf, "0x%04x\n", priv->pkey); +} +static CLASS_DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL); + +static ssize_t create_child(struct class_device *cdev, + const char *buf, size_t count) +{ + int pkey; + int ret; + + if (sscanf(buf, "%i", &pkey) != 1) + return -EINVAL; + + if (pkey < 0 || pkey > 0xffff) + return -EINVAL; + + ret = ipoib_vlan_add(container_of(cdev, struct net_device, class_dev), + pkey); + + return ret ? ret : count; +} +static CLASS_DEVICE_ATTR(create_child, S_IWUGO, NULL, create_child); + +static ssize_t delete_child(struct class_device *cdev, + const char *buf, size_t count) +{ + int pkey; + int ret; + + if (sscanf(buf, "%i", &pkey) != 1) + return -EINVAL; + + if (pkey < 0 || pkey > 0xffff) + return -EINVAL; + + ret = ipoib_vlan_delete(container_of(cdev, struct net_device, class_dev), + pkey); + + return ret ? ret : count; + +} +static CLASS_DEVICE_ATTR(delete_child, S_IWUGO, NULL, delete_child); + +int ipoib_add_pkey_attr(struct net_device *dev) +{ + return class_device_create_file(&dev->class_dev, + &class_device_attr_pkey); +} + +static struct net_device *ipoib_add_port(const char *format, + struct ib_device *hca, u8 port) +{ + struct ipoib_dev_priv *priv; + int result = -ENOMEM; + + priv = ipoib_intf_alloc(format); + if (!priv) + goto alloc_mem_failed; + + SET_NETDEV_DEV(priv->dev, hca->dma_device); + + result = ib_query_pkey(hca, port, 0, &priv->pkey); + if (result) { + printk(KERN_WARNING "%s: ib_query_pkey port %d failed (ret = %d)\n", + hca->name, port, result); + goto alloc_mem_failed; + } + + priv->dev->broadcast[8] = priv->pkey >> 8; + priv->dev->broadcast[9] = priv->pkey & 0xff; + + result = ib_query_gid(hca, port, 0, &priv->local_gid); + if (result) { + printk(KERN_WARNING "%s: ib_query_gid port %d failed (ret = %d)\n", + hca->name, port, result); + goto alloc_mem_failed; + } else + memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid)); + + + result = ipoib_dev_init(priv->dev, hca, port); + if (result < 0) { + printk(KERN_WARNING "%s: failed to initialize port %d (ret = %d)\n", + hca->name, port, result); + goto device_init_failed; + } + + INIT_IB_EVENT_HANDLER(&priv->event_handler, + priv->ca, ipoib_event); + result = ib_register_event_handler(&priv->event_handler); + if (result < 0) { + printk(KERN_WARNING "%s: ib_register_event_handler failed for " + "port %d (ret = %d)\n", + hca->name, port, result); + goto event_failed; + } + + result = register_netdev(priv->dev); + if (result) { + printk(KERN_WARNING "%s: couldn't register ipoib port %d; error %d\n", + hca->name, port, result); + goto register_failed; + } + + if (ipoib_create_debug_file(priv->dev)) + goto debug_failed; + + if (ipoib_add_pkey_attr(priv->dev)) + goto sysfs_failed; + if (class_device_create_file(&priv->dev->class_dev, + &class_device_attr_create_child)) + goto sysfs_failed; + if (class_device_create_file(&priv->dev->class_dev, + &class_device_attr_delete_child)) + goto sysfs_failed; + + return priv->dev; + +sysfs_failed: + ipoib_delete_debug_file(priv->dev); + +debug_failed: + unregister_netdev(priv->dev); + +register_failed: + ib_unregister_event_handler(&priv->event_handler); + +event_failed: + ipoib_dev_cleanup(priv->dev); + +device_init_failed: + free_netdev(priv->dev); + +alloc_mem_failed: + return ERR_PTR(result); +} + +static void ipoib_add_one(struct ib_device *device) +{ + struct list_head *dev_list; + struct net_device *dev; + struct ipoib_dev_priv *priv; + int s, e, p; + + dev_list = kmalloc(sizeof *dev_list, GFP_KERNEL); + if (!dev_list) + return; + + INIT_LIST_HEAD(dev_list); + + if (device->node_type == IB_NODE_SWITCH) { + s = 0; + e = 0; + } else { + s = 1; + e = device->phys_port_cnt; + } + + for (p = s; p <= e; ++p) { + dev = ipoib_add_port("ib%d", device, p); + if (!IS_ERR(dev)) { + priv = netdev_priv(dev); + list_add_tail(&priv->list, dev_list); + } + } + + ib_set_client_data(device, &ipoib_client, dev_list); +} + +static void ipoib_remove_one(struct ib_device *device) +{ + struct ipoib_dev_priv *priv, *tmp; + struct list_head *dev_list; + + dev_list = ib_get_client_data(device, &ipoib_client); + + list_for_each_entry_safe(priv, tmp, dev_list, list) { + ib_unregister_event_handler(&priv->event_handler); + + unregister_netdev(priv->dev); + ipoib_dev_cleanup(priv->dev); + free_netdev(priv->dev); + } +} + +static int __init ipoib_init_module(void) +{ + int ret; + + ret = ipoib_register_debugfs(); + if (ret) + return ret; + + /* + * We create our own workqueue mainly because we want to be + * able to flush it when devices are being removed. We can't + * use schedule_work()/flush_scheduled_work() because both + * unregister_netdev() and linkwatch_event take the rtnl lock, + * so flush_scheduled_work() can deadlock during device + * removal. + */ + ipoib_workqueue = create_singlethread_workqueue("ipoib"); + if (!ipoib_workqueue) { + ret = -ENOMEM; + goto err_fs; + } + + ret = ib_register_client(&ipoib_client); + if (ret) + goto err_wq; + + return 0; + +err_fs: + ipoib_unregister_debugfs(); + +err_wq: + destroy_workqueue(ipoib_workqueue); + + return ret; +} + +static void __exit ipoib_cleanup_module(void) +{ + ipoib_unregister_debugfs(); + ib_unregister_client(&ipoib_client); + destroy_workqueue(ipoib_workqueue); +} + +module_init(ipoib_init_module); +module_exit(ipoib_cleanup_module); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c new file mode 100644 index 000000000000..804c5ba6f810 --- /dev/null +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -0,0 +1,981 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: ipoib_multicast.c 1362 2004-12-18 15:56:29Z roland $ + */ + +#include <linux/skbuff.h> +#include <linux/rtnetlink.h> +#include <linux/ip.h> +#include <linux/in.h> +#include <linux/igmp.h> +#include <linux/inetdevice.h> +#include <linux/delay.h> +#include <linux/completion.h> + +#include "ipoib.h" + +#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG +int mcast_debug_level; + +module_param(mcast_debug_level, int, 0644); +MODULE_PARM_DESC(mcast_debug_level, + "Enable multicast debug tracing if > 0"); +#endif + +static DECLARE_MUTEX(mcast_mutex); + +/* Used for all multicast joins (broadcast, IPv4 mcast and IPv6 mcast) */ +struct ipoib_mcast { + struct ib_sa_mcmember_rec mcmember; + struct ipoib_ah *ah; + + struct rb_node rb_node; + struct list_head list; + struct completion done; + + int query_id; + struct ib_sa_query *query; + + unsigned long created; + unsigned long backoff; + + unsigned long flags; + unsigned char logcount; + + struct list_head neigh_list; + + struct sk_buff_head pkt_queue; + + struct net_device *dev; +}; + +struct ipoib_mcast_iter { + struct net_device *dev; + union ib_gid mgid; + unsigned long created; + unsigned int queuelen; + unsigned int complete; + unsigned int send_only; +}; + +static void ipoib_mcast_free(struct ipoib_mcast *mcast) +{ + struct net_device *dev = mcast->dev; + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_neigh *neigh, *tmp; + unsigned long flags; + + ipoib_dbg_mcast(netdev_priv(dev), + "deleting multicast group " IPOIB_GID_FMT "\n", + IPOIB_GID_ARG(mcast->mcmember.mgid)); + + spin_lock_irqsave(&priv->lock, flags); + + list_for_each_entry_safe(neigh, tmp, &mcast->neigh_list, list) { + ipoib_put_ah(neigh->ah); + *to_ipoib_neigh(neigh->neighbour) = NULL; + neigh->neighbour->ops->destructor = NULL; + kfree(neigh); + } + + spin_unlock_irqrestore(&priv->lock, flags); + + if (mcast->ah) + ipoib_put_ah(mcast->ah); + + while (!skb_queue_empty(&mcast->pkt_queue)) { + struct sk_buff *skb = skb_dequeue(&mcast->pkt_queue); + + skb->dev = dev; + dev_kfree_skb_any(skb); + } + + kfree(mcast); +} + +static struct ipoib_mcast *ipoib_mcast_alloc(struct net_device *dev, + int can_sleep) +{ + struct ipoib_mcast *mcast; + + mcast = kmalloc(sizeof (*mcast), can_sleep ? GFP_KERNEL : GFP_ATOMIC); + if (!mcast) + return NULL; + + memset(mcast, 0, sizeof (*mcast)); + + init_completion(&mcast->done); + + mcast->dev = dev; + mcast->created = jiffies; + mcast->backoff = HZ; + mcast->logcount = 0; + + INIT_LIST_HEAD(&mcast->list); + INIT_LIST_HEAD(&mcast->neigh_list); + skb_queue_head_init(&mcast->pkt_queue); + + mcast->ah = NULL; + mcast->query = NULL; + + return mcast; +} + +static struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, union ib_gid *mgid) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct rb_node *n = priv->multicast_tree.rb_node; + + while (n) { + struct ipoib_mcast *mcast; + int ret; + + mcast = rb_entry(n, struct ipoib_mcast, rb_node); + + ret = memcmp(mgid->raw, mcast->mcmember.mgid.raw, + sizeof (union ib_gid)); + if (ret < 0) + n = n->rb_left; + else if (ret > 0) + n = n->rb_right; + else + return mcast; + } + + return NULL; +} + +static int __ipoib_mcast_add(struct net_device *dev, struct ipoib_mcast *mcast) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct rb_node **n = &priv->multicast_tree.rb_node, *pn = NULL; + + while (*n) { + struct ipoib_mcast *tmcast; + int ret; + + pn = *n; + tmcast = rb_entry(pn, struct ipoib_mcast, rb_node); + + ret = memcmp(mcast->mcmember.mgid.raw, tmcast->mcmember.mgid.raw, + sizeof (union ib_gid)); + if (ret < 0) + n = &pn->rb_left; + else if (ret > 0) + n = &pn->rb_right; + else + return -EEXIST; + } + + rb_link_node(&mcast->rb_node, pn, n); + rb_insert_color(&mcast->rb_node, &priv->multicast_tree); + + return 0; +} + +static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, + struct ib_sa_mcmember_rec *mcmember) +{ + struct net_device *dev = mcast->dev; + struct ipoib_dev_priv *priv = netdev_priv(dev); + int ret; + + mcast->mcmember = *mcmember; + + /* Set the cached Q_Key before we attach if it's the broadcast group */ + if (!memcmp(mcast->mcmember.mgid.raw, priv->dev->broadcast + 4, + sizeof (union ib_gid))) + priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey); + + if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { + if (test_and_set_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) { + ipoib_warn(priv, "multicast group " IPOIB_GID_FMT + " already attached\n", + IPOIB_GID_ARG(mcast->mcmember.mgid)); + + return 0; + } + + ret = ipoib_mcast_attach(dev, be16_to_cpu(mcast->mcmember.mlid), + &mcast->mcmember.mgid); + if (ret < 0) { + ipoib_warn(priv, "couldn't attach QP to multicast group " + IPOIB_GID_FMT "\n", + IPOIB_GID_ARG(mcast->mcmember.mgid)); + + clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags); + return ret; + } + } + + { + /* + * For now we set static_rate to 0. This is not + * really correct: we should look at the rate + * component of the MC member record, compare it with + * the rate of our local port (calculated from the + * active link speed and link width) and set an + * inter-packet delay appropriately. + */ + struct ib_ah_attr av = { + .dlid = be16_to_cpu(mcast->mcmember.mlid), + .port_num = priv->port, + .sl = mcast->mcmember.sl, + .static_rate = 0, + .ah_flags = IB_AH_GRH, + .grh = { + .flow_label = be32_to_cpu(mcast->mcmember.flow_label), + .hop_limit = mcast->mcmember.hop_limit, + .sgid_index = 0, + .traffic_class = mcast->mcmember.traffic_class + } + }; + + av.grh.dgid = mcast->mcmember.mgid; + + mcast->ah = ipoib_create_ah(dev, priv->pd, &av); + if (!mcast->ah) { + ipoib_warn(priv, "ib_address_create failed\n"); + } else { + ipoib_dbg_mcast(priv, "MGID " IPOIB_GID_FMT + " AV %p, LID 0x%04x, SL %d\n", + IPOIB_GID_ARG(mcast->mcmember.mgid), + mcast->ah->ah, + be16_to_cpu(mcast->mcmember.mlid), + mcast->mcmember.sl); + } + } + + /* actually send any queued packets */ + while (!skb_queue_empty(&mcast->pkt_queue)) { + struct sk_buff *skb = skb_dequeue(&mcast->pkt_queue); + + skb->dev = dev; + + if (!skb->dst || !skb->dst->neighbour) { + /* put pseudoheader back on for next time */ + skb_push(skb, sizeof (struct ipoib_pseudoheader)); + } + + if (dev_queue_xmit(skb)) + ipoib_warn(priv, "dev_queue_xmit failed to requeue packet\n"); + } + + return 0; +} + +static void +ipoib_mcast_sendonly_join_complete(int status, + struct ib_sa_mcmember_rec *mcmember, + void *mcast_ptr) +{ + struct ipoib_mcast *mcast = mcast_ptr; + struct net_device *dev = mcast->dev; + + if (!status) + ipoib_mcast_join_finish(mcast, mcmember); + else { + if (mcast->logcount++ < 20) + ipoib_dbg_mcast(netdev_priv(dev), "multicast join failed for " + IPOIB_GID_FMT ", status %d\n", + IPOIB_GID_ARG(mcast->mcmember.mgid), status); + + /* Flush out any queued packets */ + while (!skb_queue_empty(&mcast->pkt_queue)) { + struct sk_buff *skb = skb_dequeue(&mcast->pkt_queue); + + skb->dev = dev; + + dev_kfree_skb_any(skb); + } + + /* Clear the busy flag so we try again */ + clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); + } + + complete(&mcast->done); +} + +static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast) +{ + struct net_device *dev = mcast->dev; + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ib_sa_mcmember_rec rec = { +#if 0 /* Some SMs don't support send-only yet */ + .join_state = 4 +#else + .join_state = 1 +#endif + }; + int ret = 0; + + if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) { + ipoib_dbg_mcast(priv, "device shutting down, no multicast joins\n"); + return -ENODEV; + } + + if (test_and_set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) { + ipoib_dbg_mcast(priv, "multicast entry busy, skipping\n"); + return -EBUSY; + } + + rec.mgid = mcast->mcmember.mgid; + rec.port_gid = priv->local_gid; + rec.pkey = be16_to_cpu(priv->pkey); + + ret = ib_sa_mcmember_rec_set(priv->ca, priv->port, &rec, + IB_SA_MCMEMBER_REC_MGID | + IB_SA_MCMEMBER_REC_PORT_GID | + IB_SA_MCMEMBER_REC_PKEY | + IB_SA_MCMEMBER_REC_JOIN_STATE, + 1000, GFP_ATOMIC, + ipoib_mcast_sendonly_join_complete, + mcast, &mcast->query); + if (ret < 0) { + ipoib_warn(priv, "ib_sa_mcmember_rec_set failed (ret = %d)\n", + ret); + } else { + ipoib_dbg_mcast(priv, "no multicast record for " IPOIB_GID_FMT + ", starting join\n", + IPOIB_GID_ARG(mcast->mcmember.mgid)); + + mcast->query_id = ret; + } + + return ret; +} + +static void ipoib_mcast_join_complete(int status, + struct ib_sa_mcmember_rec *mcmember, + void *mcast_ptr) +{ + struct ipoib_mcast *mcast = mcast_ptr; + struct net_device *dev = mcast->dev; + struct ipoib_dev_priv *priv = netdev_priv(dev); + + ipoib_dbg_mcast(priv, "join completion for " IPOIB_GID_FMT + " (status %d)\n", + IPOIB_GID_ARG(mcast->mcmember.mgid), status); + + if (!status && !ipoib_mcast_join_finish(mcast, mcmember)) { + mcast->backoff = HZ; + down(&mcast_mutex); + if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) + queue_work(ipoib_workqueue, &priv->mcast_task); + up(&mcast_mutex); + complete(&mcast->done); + return; + } + + if (status == -EINTR) { + complete(&mcast->done); + return; + } + + if (status && mcast->logcount++ < 20) { + if (status == -ETIMEDOUT || status == -EINTR) { + ipoib_dbg_mcast(priv, "multicast join failed for " IPOIB_GID_FMT + ", status %d\n", + IPOIB_GID_ARG(mcast->mcmember.mgid), + status); + } else { + ipoib_warn(priv, "multicast join failed for " + IPOIB_GID_FMT ", status %d\n", + IPOIB_GID_ARG(mcast->mcmember.mgid), + status); + } + } + + mcast->backoff *= 2; + if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS) + mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS; + + mcast->query = NULL; + + down(&mcast_mutex); + if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) { + if (status == -ETIMEDOUT) + queue_work(ipoib_workqueue, &priv->mcast_task); + else + queue_delayed_work(ipoib_workqueue, &priv->mcast_task, + mcast->backoff * HZ); + } else + complete(&mcast->done); + up(&mcast_mutex); + + return; +} + +static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast, + int create) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ib_sa_mcmember_rec rec = { + .join_state = 1 + }; + ib_sa_comp_mask comp_mask; + int ret = 0; + + ipoib_dbg_mcast(priv, "joining MGID " IPOIB_GID_FMT "\n", + IPOIB_GID_ARG(mcast->mcmember.mgid)); + + rec.mgid = mcast->mcmember.mgid; + rec.port_gid = priv->local_gid; + rec.pkey = be16_to_cpu(priv->pkey); + + comp_mask = + IB_SA_MCMEMBER_REC_MGID | + IB_SA_MCMEMBER_REC_PORT_GID | + IB_SA_MCMEMBER_REC_PKEY | + IB_SA_MCMEMBER_REC_JOIN_STATE; + + if (create) { + comp_mask |= + IB_SA_MCMEMBER_REC_QKEY | + IB_SA_MCMEMBER_REC_SL | + IB_SA_MCMEMBER_REC_FLOW_LABEL | + IB_SA_MCMEMBER_REC_TRAFFIC_CLASS; + + rec.qkey = priv->broadcast->mcmember.qkey; + rec.sl = priv->broadcast->mcmember.sl; + rec.flow_label = priv->broadcast->mcmember.flow_label; + rec.traffic_class = priv->broadcast->mcmember.traffic_class; + } + + ret = ib_sa_mcmember_rec_set(priv->ca, priv->port, &rec, comp_mask, + mcast->backoff * 1000, GFP_ATOMIC, + ipoib_mcast_join_complete, + mcast, &mcast->query); + + if (ret < 0) { + ipoib_warn(priv, "ib_sa_mcmember_rec_set failed, status %d\n", ret); + + mcast->backoff *= 2; + if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS) + mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS; + + down(&mcast_mutex); + if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) + queue_delayed_work(ipoib_workqueue, + &priv->mcast_task, + mcast->backoff); + up(&mcast_mutex); + } else + mcast->query_id = ret; +} + +void ipoib_mcast_join_task(void *dev_ptr) +{ + struct net_device *dev = dev_ptr; + struct ipoib_dev_priv *priv = netdev_priv(dev); + + if (!test_bit(IPOIB_MCAST_RUN, &priv->flags)) + return; + + if (ib_query_gid(priv->ca, priv->port, 0, &priv->local_gid)) + ipoib_warn(priv, "ib_gid_entry_get() failed\n"); + else + memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid)); + + if (!priv->broadcast) { + priv->broadcast = ipoib_mcast_alloc(dev, 1); + if (!priv->broadcast) { + ipoib_warn(priv, "failed to allocate broadcast group\n"); + down(&mcast_mutex); + if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) + queue_delayed_work(ipoib_workqueue, + &priv->mcast_task, HZ); + up(&mcast_mutex); + return; + } + + memcpy(priv->broadcast->mcmember.mgid.raw, priv->dev->broadcast + 4, + sizeof (union ib_gid)); + + spin_lock_irq(&priv->lock); + __ipoib_mcast_add(dev, priv->broadcast); + spin_unlock_irq(&priv->lock); + } + + if (!test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) { + ipoib_mcast_join(dev, priv->broadcast, 0); + return; + } + + while (1) { + struct ipoib_mcast *mcast = NULL; + + spin_lock_irq(&priv->lock); + list_for_each_entry(mcast, &priv->multicast_list, list) { + if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) + && !test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags) + && !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) { + /* Found the next unjoined group */ + break; + } + } + spin_unlock_irq(&priv->lock); + + if (&mcast->list == &priv->multicast_list) { + /* All done */ + break; + } + + ipoib_mcast_join(dev, mcast, 1); + return; + } + + { + struct ib_port_attr attr; + + if (!ib_query_port(priv->ca, priv->port, &attr)) + priv->local_lid = attr.lid; + else + ipoib_warn(priv, "ib_query_port failed\n"); + } + + priv->mcast_mtu = ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu) - + IPOIB_ENCAP_LEN; + dev->mtu = min(priv->mcast_mtu, priv->admin_mtu); + + ipoib_dbg_mcast(priv, "successfully joined all multicast groups\n"); + + clear_bit(IPOIB_MCAST_RUN, &priv->flags); + netif_carrier_on(dev); +} + +int ipoib_mcast_start_thread(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + + ipoib_dbg_mcast(priv, "starting multicast thread\n"); + + down(&mcast_mutex); + if (!test_and_set_bit(IPOIB_MCAST_RUN, &priv->flags)) + queue_work(ipoib_workqueue, &priv->mcast_task); + up(&mcast_mutex); + + return 0; +} + +int ipoib_mcast_stop_thread(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_mcast *mcast; + + ipoib_dbg_mcast(priv, "stopping multicast thread\n"); + + down(&mcast_mutex); + clear_bit(IPOIB_MCAST_RUN, &priv->flags); + cancel_delayed_work(&priv->mcast_task); + up(&mcast_mutex); + + flush_workqueue(ipoib_workqueue); + + if (priv->broadcast && priv->broadcast->query) { + ib_sa_cancel_query(priv->broadcast->query_id, priv->broadcast->query); + priv->broadcast->query = NULL; + ipoib_dbg_mcast(priv, "waiting for bcast\n"); + wait_for_completion(&priv->broadcast->done); + } + + list_for_each_entry(mcast, &priv->multicast_list, list) { + if (mcast->query) { + ib_sa_cancel_query(mcast->query_id, mcast->query); + mcast->query = NULL; + ipoib_dbg_mcast(priv, "waiting for MGID " IPOIB_GID_FMT "\n", + IPOIB_GID_ARG(mcast->mcmember.mgid)); + wait_for_completion(&mcast->done); + } + } + + return 0; +} + +int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ib_sa_mcmember_rec rec = { + .join_state = 1 + }; + int ret = 0; + + if (!test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) + return 0; + + ipoib_dbg_mcast(priv, "leaving MGID " IPOIB_GID_FMT "\n", + IPOIB_GID_ARG(mcast->mcmember.mgid)); + + rec.mgid = mcast->mcmember.mgid; + rec.port_gid = priv->local_gid; + rec.pkey = be16_to_cpu(priv->pkey); + + /* Remove ourselves from the multicast group */ + ret = ipoib_mcast_detach(dev, be16_to_cpu(mcast->mcmember.mlid), + &mcast->mcmember.mgid); + if (ret) + ipoib_warn(priv, "ipoib_mcast_detach failed (result = %d)\n", ret); + + /* + * Just make one shot at leaving and don't wait for a reply; + * if we fail, too bad. + */ + ret = ib_sa_mcmember_rec_delete(priv->ca, priv->port, &rec, + IB_SA_MCMEMBER_REC_MGID | + IB_SA_MCMEMBER_REC_PORT_GID | + IB_SA_MCMEMBER_REC_PKEY | + IB_SA_MCMEMBER_REC_JOIN_STATE, + 0, GFP_ATOMIC, NULL, + mcast, &mcast->query); + if (ret < 0) + ipoib_warn(priv, "ib_sa_mcmember_rec_delete failed " + "for leave (result = %d)\n", ret); + + return 0; +} + +void ipoib_mcast_send(struct net_device *dev, union ib_gid *mgid, + struct sk_buff *skb) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_mcast *mcast; + + /* + * We can only be called from ipoib_start_xmit, so we're + * inside tx_lock -- no need to save/restore flags. + */ + spin_lock(&priv->lock); + + mcast = __ipoib_mcast_find(dev, mgid); + if (!mcast) { + /* Let's create a new send only group now */ + ipoib_dbg_mcast(priv, "setting up send only multicast group for " + IPOIB_GID_FMT "\n", IPOIB_GID_ARG(*mgid)); + + mcast = ipoib_mcast_alloc(dev, 0); + if (!mcast) { + ipoib_warn(priv, "unable to allocate memory for " + "multicast structure\n"); + dev_kfree_skb_any(skb); + goto out; + } + + set_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags); + mcast->mcmember.mgid = *mgid; + __ipoib_mcast_add(dev, mcast); + list_add_tail(&mcast->list, &priv->multicast_list); + } + + if (!mcast->ah) { + if (skb_queue_len(&mcast->pkt_queue) < IPOIB_MAX_MCAST_QUEUE) + skb_queue_tail(&mcast->pkt_queue, skb); + else + dev_kfree_skb_any(skb); + + if (mcast->query) + ipoib_dbg_mcast(priv, "no address vector, " + "but multicast join already started\n"); + else if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) + ipoib_mcast_sendonly_join(mcast); + + /* + * If lookup completes between here and out:, don't + * want to send packet twice. + */ + mcast = NULL; + } + +out: + if (mcast && mcast->ah) { + if (skb->dst && + skb->dst->neighbour && + !*to_ipoib_neigh(skb->dst->neighbour)) { + struct ipoib_neigh *neigh = kmalloc(sizeof *neigh, GFP_ATOMIC); + + if (neigh) { + kref_get(&mcast->ah->ref); + neigh->ah = mcast->ah; + neigh->neighbour = skb->dst->neighbour; + *to_ipoib_neigh(skb->dst->neighbour) = neigh; + list_add_tail(&neigh->list, &mcast->neigh_list); + } + } + + ipoib_send(dev, skb, mcast->ah, IB_MULTICAST_QPN); + } + + spin_unlock(&priv->lock); +} + +void ipoib_mcast_dev_flush(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + LIST_HEAD(remove_list); + struct ipoib_mcast *mcast, *tmcast, *nmcast; + unsigned long flags; + + ipoib_dbg_mcast(priv, "flushing multicast list\n"); + + spin_lock_irqsave(&priv->lock, flags); + list_for_each_entry_safe(mcast, tmcast, &priv->multicast_list, list) { + nmcast = ipoib_mcast_alloc(dev, 0); + if (nmcast) { + nmcast->flags = + mcast->flags & (1 << IPOIB_MCAST_FLAG_SENDONLY); + + nmcast->mcmember.mgid = mcast->mcmember.mgid; + + /* Add the new group in before the to-be-destroyed group */ + list_add_tail(&nmcast->list, &mcast->list); + list_del_init(&mcast->list); + + rb_replace_node(&mcast->rb_node, &nmcast->rb_node, + &priv->multicast_tree); + + list_add_tail(&mcast->list, &remove_list); + } else { + ipoib_warn(priv, "could not reallocate multicast group " + IPOIB_GID_FMT "\n", + IPOIB_GID_ARG(mcast->mcmember.mgid)); + } + } + + if (priv->broadcast) { + nmcast = ipoib_mcast_alloc(dev, 0); + if (nmcast) { + nmcast->mcmember.mgid = priv->broadcast->mcmember.mgid; + + rb_replace_node(&priv->broadcast->rb_node, + &nmcast->rb_node, + &priv->multicast_tree); + + list_add_tail(&priv->broadcast->list, &remove_list); + } + + priv->broadcast = nmcast; + } + + spin_unlock_irqrestore(&priv->lock, flags); + + list_for_each_entry(mcast, &remove_list, list) { + ipoib_mcast_leave(dev, mcast); + ipoib_mcast_free(mcast); + } +} + +void ipoib_mcast_dev_down(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + unsigned long flags; + + /* Delete broadcast since it will be recreated */ + if (priv->broadcast) { + ipoib_dbg_mcast(priv, "deleting broadcast group\n"); + + spin_lock_irqsave(&priv->lock, flags); + rb_erase(&priv->broadcast->rb_node, &priv->multicast_tree); + spin_unlock_irqrestore(&priv->lock, flags); + ipoib_mcast_leave(dev, priv->broadcast); + ipoib_mcast_free(priv->broadcast); + priv->broadcast = NULL; + } +} + +void ipoib_mcast_restart_task(void *dev_ptr) +{ + struct net_device *dev = dev_ptr; + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct dev_mc_list *mclist; + struct ipoib_mcast *mcast, *tmcast; + LIST_HEAD(remove_list); + unsigned long flags; + + ipoib_dbg_mcast(priv, "restarting multicast task\n"); + + ipoib_mcast_stop_thread(dev); + + spin_lock_irqsave(&priv->lock, flags); + + /* + * Unfortunately, the networking core only gives us a list of all of + * the multicast hardware addresses. We need to figure out which ones + * are new and which ones have been removed + */ + + /* Clear out the found flag */ + list_for_each_entry(mcast, &priv->multicast_list, list) + clear_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags); + + /* Mark all of the entries that are found or don't exist */ + for (mclist = dev->mc_list; mclist; mclist = mclist->next) { + union ib_gid mgid; + + memcpy(mgid.raw, mclist->dmi_addr + 4, sizeof mgid); + + /* Add in the P_Key */ + mgid.raw[4] = (priv->pkey >> 8) & 0xff; + mgid.raw[5] = priv->pkey & 0xff; + + mcast = __ipoib_mcast_find(dev, &mgid); + if (!mcast || test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { + struct ipoib_mcast *nmcast; + + /* Not found or send-only group, let's add a new entry */ + ipoib_dbg_mcast(priv, "adding multicast entry for mgid " + IPOIB_GID_FMT "\n", IPOIB_GID_ARG(mgid)); + + nmcast = ipoib_mcast_alloc(dev, 0); + if (!nmcast) { + ipoib_warn(priv, "unable to allocate memory for multicast structure\n"); + continue; + } + + set_bit(IPOIB_MCAST_FLAG_FOUND, &nmcast->flags); + + nmcast->mcmember.mgid = mgid; + + if (mcast) { + /* Destroy the send only entry */ + list_del(&mcast->list); + list_add_tail(&mcast->list, &remove_list); + + rb_replace_node(&mcast->rb_node, + &nmcast->rb_node, + &priv->multicast_tree); + } else + __ipoib_mcast_add(dev, nmcast); + + list_add_tail(&nmcast->list, &priv->multicast_list); + } + + if (mcast) + set_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags); + } + + /* Remove all of the entries don't exist anymore */ + list_for_each_entry_safe(mcast, tmcast, &priv->multicast_list, list) { + if (!test_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags) && + !test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { + ipoib_dbg_mcast(priv, "deleting multicast group " IPOIB_GID_FMT "\n", + IPOIB_GID_ARG(mcast->mcmember.mgid)); + + rb_erase(&mcast->rb_node, &priv->multicast_tree); + + /* Move to the remove list */ + list_del(&mcast->list); + list_add_tail(&mcast->list, &remove_list); + } + } + spin_unlock_irqrestore(&priv->lock, flags); + + /* We have to cancel outside of the spinlock */ + list_for_each_entry(mcast, &remove_list, list) { + ipoib_mcast_leave(mcast->dev, mcast); + ipoib_mcast_free(mcast); + } + + if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) + ipoib_mcast_start_thread(dev); +} + +struct ipoib_mcast_iter *ipoib_mcast_iter_init(struct net_device *dev) +{ + struct ipoib_mcast_iter *iter; + + iter = kmalloc(sizeof *iter, GFP_KERNEL); + if (!iter) + return NULL; + + iter->dev = dev; + memset(iter->mgid.raw, 0, sizeof iter->mgid); + + if (ipoib_mcast_iter_next(iter)) { + ipoib_mcast_iter_free(iter); + return NULL; + } + + return iter; +} + +void ipoib_mcast_iter_free(struct ipoib_mcast_iter *iter) +{ + kfree(iter); +} + +int ipoib_mcast_iter_next(struct ipoib_mcast_iter *iter) +{ + struct ipoib_dev_priv *priv = netdev_priv(iter->dev); + struct rb_node *n; + struct ipoib_mcast *mcast; + int ret = 1; + + spin_lock_irq(&priv->lock); + + n = rb_first(&priv->multicast_tree); + + while (n) { + mcast = rb_entry(n, struct ipoib_mcast, rb_node); + + if (memcmp(iter->mgid.raw, mcast->mcmember.mgid.raw, + sizeof (union ib_gid)) < 0) { + iter->mgid = mcast->mcmember.mgid; + iter->created = mcast->created; + iter->queuelen = skb_queue_len(&mcast->pkt_queue); + iter->complete = !!mcast->ah; + iter->send_only = !!(mcast->flags & (1 << IPOIB_MCAST_FLAG_SENDONLY)); + + ret = 0; + + break; + } + + n = rb_next(n); + } + + spin_unlock_irq(&priv->lock); + + return ret; +} + +void ipoib_mcast_iter_read(struct ipoib_mcast_iter *iter, + union ib_gid *mgid, + unsigned long *created, + unsigned int *queuelen, + unsigned int *complete, + unsigned int *send_only) +{ + *mgid = iter->mgid; + *created = iter->created; + *queuelen = iter->queuelen; + *complete = iter->complete; + *send_only = iter->send_only; +} diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c new file mode 100644 index 000000000000..262f2598cfef --- /dev/null +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -0,0 +1,254 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: ipoib_verbs.c 1349 2004-12-16 21:09:43Z roland $ + */ + +#include <ib_cache.h> + +#include "ipoib.h" + +int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ib_qp_attr *qp_attr; + int attr_mask; + int ret; + u16 pkey_index; + + ret = -ENOMEM; + qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL); + if (!qp_attr) + goto out; + + if (ib_cached_pkey_find(priv->ca, priv->port, priv->pkey, &pkey_index)) { + clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); + ret = -ENXIO; + goto out; + } + set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); + + /* set correct QKey for QP */ + qp_attr->qkey = priv->qkey; + attr_mask = IB_QP_QKEY; + ret = ib_modify_qp(priv->qp, qp_attr, attr_mask); + if (ret) { + ipoib_warn(priv, "failed to modify QP, ret = %d\n", ret); + goto out; + } + + /* attach QP to multicast group */ + down(&priv->mcast_mutex); + ret = ib_attach_mcast(priv->qp, mgid, mlid); + up(&priv->mcast_mutex); + if (ret) + ipoib_warn(priv, "failed to attach to multicast group, ret = %d\n", ret); + +out: + kfree(qp_attr); + return ret; +} + +int ipoib_mcast_detach(struct net_device *dev, u16 mlid, union ib_gid *mgid) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + int ret; + + down(&priv->mcast_mutex); + ret = ib_detach_mcast(priv->qp, mgid, mlid); + up(&priv->mcast_mutex); + if (ret) + ipoib_warn(priv, "ib_detach_mcast failed (result = %d)\n", ret); + + return ret; +} + +int ipoib_qp_create(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + int ret; + u16 pkey_index; + struct ib_qp_attr qp_attr; + int attr_mask; + + /* + * Search through the port P_Key table for the requested pkey value. + * The port has to be assigned to the respective IB partition in + * advance. + */ + ret = ib_cached_pkey_find(priv->ca, priv->port, priv->pkey, &pkey_index); + if (ret) { + clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); + return ret; + } + set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); + + qp_attr.qp_state = IB_QPS_INIT; + qp_attr.qkey = 0; + qp_attr.port_num = priv->port; + qp_attr.pkey_index = pkey_index; + attr_mask = + IB_QP_QKEY | + IB_QP_PORT | + IB_QP_PKEY_INDEX | + IB_QP_STATE; + ret = ib_modify_qp(priv->qp, &qp_attr, attr_mask); + if (ret) { + ipoib_warn(priv, "failed to modify QP to init, ret = %d\n", ret); + goto out_fail; + } + + qp_attr.qp_state = IB_QPS_RTR; + /* Can't set this in a INIT->RTR transition */ + attr_mask &= ~IB_QP_PORT; + ret = ib_modify_qp(priv->qp, &qp_attr, attr_mask); + if (ret) { + ipoib_warn(priv, "failed to modify QP to RTR, ret = %d\n", ret); + goto out_fail; + } + + qp_attr.qp_state = IB_QPS_RTS; + qp_attr.sq_psn = 0; + attr_mask |= IB_QP_SQ_PSN; + attr_mask &= ~IB_QP_PKEY_INDEX; + ret = ib_modify_qp(priv->qp, &qp_attr, attr_mask); + if (ret) { + ipoib_warn(priv, "failed to modify QP to RTS, ret = %d\n", ret); + goto out_fail; + } + + return 0; + +out_fail: + ib_destroy_qp(priv->qp); + priv->qp = NULL; + + return -EINVAL; +} + +int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ib_qp_init_attr init_attr = { + .cap = { + .max_send_wr = IPOIB_TX_RING_SIZE, + .max_recv_wr = IPOIB_RX_RING_SIZE, + .max_send_sge = 1, + .max_recv_sge = 1 + }, + .sq_sig_type = IB_SIGNAL_ALL_WR, + .rq_sig_type = IB_SIGNAL_ALL_WR, + .qp_type = IB_QPT_UD + }; + + priv->pd = ib_alloc_pd(priv->ca); + if (IS_ERR(priv->pd)) { + printk(KERN_WARNING "%s: failed to allocate PD\n", ca->name); + return -ENODEV; + } + + priv->cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, + IPOIB_TX_RING_SIZE + IPOIB_RX_RING_SIZE + 1); + if (IS_ERR(priv->cq)) { + printk(KERN_WARNING "%s: failed to create CQ\n", ca->name); + goto out_free_pd; + } + + if (ib_req_notify_cq(priv->cq, IB_CQ_NEXT_COMP)) + goto out_free_cq; + + priv->mr = ib_get_dma_mr(priv->pd, IB_ACCESS_LOCAL_WRITE); + if (IS_ERR(priv->mr)) { + printk(KERN_WARNING "%s: ib_reg_phys_mr failed\n", ca->name); + goto out_free_cq; + } + + init_attr.send_cq = priv->cq; + init_attr.recv_cq = priv->cq, + + priv->qp = ib_create_qp(priv->pd, &init_attr); + if (IS_ERR(priv->qp)) { + printk(KERN_WARNING "%s: failed to create QP\n", ca->name); + goto out_free_mr; + } + + priv->dev->dev_addr[1] = (priv->qp->qp_num >> 16) & 0xff; + priv->dev->dev_addr[2] = (priv->qp->qp_num >> 8) & 0xff; + priv->dev->dev_addr[3] = (priv->qp->qp_num ) & 0xff; + + return 0; + +out_free_mr: + ib_dereg_mr(priv->mr); + +out_free_cq: + ib_destroy_cq(priv->cq); + +out_free_pd: + ib_dealloc_pd(priv->pd); + return -ENODEV; +} + +void ipoib_transport_dev_cleanup(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + + if (priv->qp) { + if (ib_destroy_qp(priv->qp)) + ipoib_warn(priv, "ib_qp_destroy failed\n"); + + priv->qp = NULL; + clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); + } + + if (ib_dereg_mr(priv->mr)) + ipoib_warn(priv, "ib_dereg_mr failed\n"); + + if (ib_destroy_cq(priv->cq)) + ipoib_warn(priv, "ib_cq_destroy failed\n"); + + if (ib_dealloc_pd(priv->pd)) + ipoib_warn(priv, "ib_dealloc_pd failed\n"); +} + +void ipoib_event(struct ib_event_handler *handler, + struct ib_event *record) +{ + struct ipoib_dev_priv *priv = + container_of(handler, struct ipoib_dev_priv, event_handler); + + if (record->event == IB_EVENT_PORT_ACTIVE || + record->event == IB_EVENT_LID_CHANGE || + record->event == IB_EVENT_SM_CHANGE) { + ipoib_dbg(priv, "Port active event\n"); + schedule_work(&priv->flush_task); + } +} diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c new file mode 100644 index 000000000000..94b8ea812fef --- /dev/null +++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c @@ -0,0 +1,177 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: ipoib_vlan.c 1349 2004-12-16 21:09:43Z roland $ + */ + +#include <linux/version.h> +#include <linux/module.h> + +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/seq_file.h> + +#include <asm/uaccess.h> + +#include "ipoib.h" + +static ssize_t show_parent(struct class_device *class_dev, char *buf) +{ + struct net_device *dev = + container_of(class_dev, struct net_device, class_dev); + struct ipoib_dev_priv *priv = netdev_priv(dev); + + return sprintf(buf, "%s\n", priv->parent->name); +} +static CLASS_DEVICE_ATTR(parent, S_IRUGO, show_parent, NULL); + +int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey) +{ + struct ipoib_dev_priv *ppriv, *priv; + char intf_name[IFNAMSIZ]; + int result; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + ppriv = netdev_priv(pdev); + + down(&ppriv->vlan_mutex); + + /* + * First ensure this isn't a duplicate. We check the parent device and + * then all of the child interfaces to make sure the Pkey doesn't match. + */ + if (ppriv->pkey == pkey) { + result = -ENOTUNIQ; + goto err; + } + + list_for_each_entry(priv, &ppriv->child_intfs, list) { + if (priv->pkey == pkey) { + result = -ENOTUNIQ; + goto err; + } + } + + snprintf(intf_name, sizeof intf_name, "%s.%04x", + ppriv->dev->name, pkey); + priv = ipoib_intf_alloc(intf_name); + if (!priv) { + result = -ENOMEM; + goto err; + } + + set_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags); + + priv->pkey = pkey; + + memcpy(priv->dev->dev_addr, ppriv->dev->dev_addr, INFINIBAND_ALEN); + priv->dev->broadcast[8] = pkey >> 8; + priv->dev->broadcast[9] = pkey & 0xff; + + result = ipoib_dev_init(priv->dev, ppriv->ca, ppriv->port); + if (result < 0) { + ipoib_warn(ppriv, "failed to initialize subinterface: " + "device %s, port %d", + ppriv->ca->name, ppriv->port); + goto device_init_failed; + } + + result = register_netdev(priv->dev); + if (result) { + ipoib_warn(priv, "failed to initialize; error %i", result); + goto register_failed; + } + + priv->parent = ppriv->dev; + + if (ipoib_create_debug_file(priv->dev)) + goto debug_failed; + + if (ipoib_add_pkey_attr(priv->dev)) + goto sysfs_failed; + + if (class_device_create_file(&priv->dev->class_dev, + &class_device_attr_parent)) + goto sysfs_failed; + + list_add_tail(&priv->list, &ppriv->child_intfs); + + up(&ppriv->vlan_mutex); + + return 0; + +sysfs_failed: + ipoib_delete_debug_file(priv->dev); + +debug_failed: + unregister_netdev(priv->dev); + +register_failed: + ipoib_dev_cleanup(priv->dev); + +device_init_failed: + free_netdev(priv->dev); + +err: + up(&ppriv->vlan_mutex); + return result; +} + +int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey) +{ + struct ipoib_dev_priv *ppriv, *priv, *tpriv; + int ret = -ENOENT; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + ppriv = netdev_priv(pdev); + + down(&ppriv->vlan_mutex); + list_for_each_entry_safe(priv, tpriv, &ppriv->child_intfs, list) { + if (priv->pkey == pkey) { + unregister_netdev(priv->dev); + ipoib_dev_cleanup(priv->dev); + + list_del(&priv->list); + + kfree(priv); + + ret = 0; + break; + } + } + up(&ppriv->vlan_mutex); + + return ret; +} diff --git a/include/linux/atalk.h b/include/linux/atalk.h index 2a9b82002591..f5cdd69497bc 100644 --- a/include/linux/atalk.h +++ b/include/linux/atalk.h @@ -188,8 +188,6 @@ extern struct net_device *atrtr_get_dev(struct atalk_addr *sa); extern int aarp_send_ddp(struct net_device *dev, struct sk_buff *skb, struct atalk_addr *sa, void *hwaddr); -extern void aarp_send_probe(struct net_device *dev, - struct atalk_addr *addr); extern void aarp_device_down(struct net_device *dev); extern void aarp_probe_network(struct atalk_iface *atif); extern int aarp_proxy_probe_network(struct atalk_iface *atif, diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 6af5ecf9787a..ca6a19b1479f 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -37,8 +37,6 @@ extern void eth_header_cache_update(struct hh_cache *hh, struct net_device *dev unsigned char * haddr); extern int eth_header_cache(struct neighbour *neigh, struct hh_cache *hh); -extern int eth_header_parse(struct sk_buff *skb, - unsigned char *haddr); extern struct net_device *alloc_etherdev(int sizeof_priv); static inline void eth_copy_and_sum (struct sk_buff *dest, diff --git a/include/linux/if_infiniband.h b/include/linux/if_infiniband.h new file mode 100644 index 000000000000..3e659ec7dfdd --- /dev/null +++ b/include/linux/if_infiniband.h @@ -0,0 +1,29 @@ +/* + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available at + * <http://www.fsf.org/copyleft/gpl.html>, or the OpenIB.org BSD + * license, available in the LICENSE.TXT file accompanying this + * software. These details are also available at + * <http://openib.org/license.html>. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * + * $Id$ + */ + +#ifndef _LINUX_IF_INFINIBAND_H +#define _LINUX_IF_INFINIBAND_H + +#define INFINIBAND_ALEN 20 /* Octets in IPoIB HW addr */ + +#endif /* _LINUX_IF_INFINIBAND_H */ diff --git a/include/linux/ip.h b/include/linux/ip.h index 12d504ef8df0..3fe93474047d 100644 --- a/include/linux/ip.h +++ b/include/linux/ip.h @@ -107,7 +107,14 @@ struct ip_options { #define optlength(opt) (sizeof(struct ip_options) + opt->optlen) -struct inet_opt { +struct ipv6_pinfo; + +struct inet_sock { + /* sk and pinet6 has to be the first two members of inet_sock */ + struct sock sk; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + struct ipv6_pinfo *pinet6; +#endif /* Socket demultiplex comparisons on incoming packets. */ __u32 daddr; /* Foreign IPv4 addr */ __u32 rcv_saddr; /* Bound local IPv4 addr */ @@ -146,20 +153,9 @@ struct inet_opt { #define IPCORK_OPT 1 /* ip-options has been held in ipcork.opt */ -struct ipv6_pinfo; - -/* WARNING: don't change the layout of the members in inet_sock! */ -struct inet_sock { - struct sock sk; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - struct ipv6_pinfo *pinet6; -#endif - struct inet_opt inet; -}; - -static inline struct inet_opt * inet_sk(const struct sock *__sk) +static inline struct inet_sock *inet_sk(const struct sock *sk) { - return &((struct inet_sock *)__sk)->inet; + return (struct inet_sock *)sk; } #endif diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index d7d1673880d5..d7c28b9db4fd 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -256,32 +256,26 @@ struct raw6_opt { /* WARNING: don't change the layout of the members in {raw,udp,tcp}6_sock! */ struct raw6_sock { - struct sock sk; - struct ipv6_pinfo *pinet6; - struct inet_opt inet; + struct inet_sock inet; struct raw6_opt raw6; struct ipv6_pinfo inet6; }; struct udp6_sock { - struct sock sk; - struct ipv6_pinfo *pinet6; - struct inet_opt inet; + struct inet_sock inet; struct udp_opt udp; struct ipv6_pinfo inet6; }; struct tcp6_sock { - struct sock sk; - struct ipv6_pinfo *pinet6; - struct inet_opt inet; + struct inet_sock inet; struct tcp_opt tcp; struct ipv6_pinfo inet6; }; static inline struct ipv6_pinfo * inet6_sk(const struct sock *__sk) { - return ((struct raw6_sock *)__sk)->pinet6; + return inet_sk(__sk)->pinet6; } static inline struct raw6_opt * raw6_sk(const struct sock *__sk) diff --git a/include/linux/net.h b/include/linux/net.h index 368b0f9d3006..8bfb1243f5e2 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -187,10 +187,6 @@ extern int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t len); extern int sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags); -extern int sock_readv_writev(int type, struct inode *inode, - struct file *file, - const struct iovec *iov, long count, - size_t size); extern int sock_map_fd(struct socket *sock); extern struct socket *sockfd_lookup(int fd, int *err); #define sockfd_put(sock) fput(sock->file) diff --git a/include/linux/netlink.h b/include/linux/netlink.h index e969de36edaa..ee36f08c341e 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -116,8 +116,6 @@ struct netlink_skb_parms #define NETLINK_CREDS(skb) (&NETLINK_CB((skb)).creds) -extern int netlink_attach(int unit, int (*function)(int,struct sk_buff *skb)); -extern void netlink_detach(int unit); extern int netlink_post(int unit, struct sk_buff *skb); extern struct sock *netlink_kernel_create(int unit, void (*input)(struct sock *sk, int len)); extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err); @@ -129,7 +127,6 @@ extern int netlink_register_notifier(struct notifier_block *nb); extern int netlink_unregister_notifier(struct notifier_block *nb); /* finegrained unicast helpers: */ -struct sock *netlink_getsockbypid(struct sock *ssk, u32 pid); struct sock *netlink_getsockbyfilp(struct file *filp); int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock, long timeo); void netlink_detachskb(struct sock *sk, struct sk_buff *skb); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 57a2843faa21..b1d0c472d19c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1077,9 +1077,9 @@ static inline void kunmap_skb_frag(void *vaddr) } #define skb_queue_walk(queue, skb) \ - for (skb = (queue)->next, prefetch(skb->next); \ - (skb != (struct sk_buff *)(queue)); \ - skb = skb->next, prefetch(skb->next)) + for (skb = (queue)->next; \ + prefetch(skb->next), (skb != (struct sk_buff *)(queue)); \ + skb = skb->next) extern struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index a196e9b76793..97976c431d0b 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -114,8 +114,6 @@ extern struct rpc_authops authnull_ops; extern struct rpc_authops authdes_ops; #endif -u32 pseudoflavor_to_flavor(rpc_authflavor_t); - int rpcauth_register(struct rpc_authops *); int rpcauth_unregister(struct rpc_authops *); struct rpc_auth * rpcauth_create(rpc_authflavor_t, struct rpc_clnt *); diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 04ac3afcc6a9..b902425d2be5 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -257,8 +257,6 @@ RTN *FNAME ARGS \ -extern void cache_defer_req(struct cache_req *req, struct cache_head *item); -extern void cache_revisit_request(struct cache_head *item); extern void cache_clean_deferred(void *owner); static inline struct cache_head *cache_get(struct cache_head *h) @@ -286,14 +284,11 @@ extern void cache_fresh(struct cache_detail *detail, struct cache_head *head, time_t expiry); extern int cache_check(struct cache_detail *detail, struct cache_head *h, struct cache_req *rqstp); -extern int cache_clean(void); extern void cache_flush(void); extern void cache_purge(struct cache_detail *detail); #define NEVER (0x7FFFFFFF) extern void cache_register(struct cache_detail *cd); extern int cache_unregister(struct cache_detail *cd); -extern struct cache_detail *cache_find(char *name); -extern void cache_drop(struct cache_detail *detail); extern void qword_add(char **bpp, int *lp, char *str); extern void qword_addhex(char **bpp, int *lp, char *buf, int blen); diff --git a/include/linux/sunrpc/gss_asn1.h b/include/linux/sunrpc/gss_asn1.h index e0e4e1dc5656..3ccecd0ad229 100644 --- a/include/linux/sunrpc/gss_asn1.h +++ b/include/linux/sunrpc/gss_asn1.h @@ -71,8 +71,6 @@ u32 g_verify_token_header( unsigned char **buf_in, int toksize); -u32 g_get_mech_oid(struct xdr_netobj *mech, struct xdr_netobj * in_buf); - int g_token_size( struct xdr_netobj *mech, unsigned int body_size); diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 6959ed0d69d8..71d710450e84 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -222,7 +222,6 @@ struct rpc_task *rpc_wake_up_next(struct rpc_wait_queue *); void rpc_wake_up_status(struct rpc_wait_queue *, int); void rpc_delay(struct rpc_task *, unsigned long); void * rpc_malloc(struct rpc_task *, size_t); -void rpc_free(struct rpc_task *); int rpciod_up(void); void rpciod_down(void); void rpciod_wake_up(void); diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 112738cca4e1..541dcf838abf 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -95,7 +95,6 @@ u32 * xdr_decode_string(u32 *p, char **sp, int *lenp, int maxlen); u32 * xdr_decode_string_inplace(u32 *p, char **sp, int *lenp, int maxlen); u32 * xdr_encode_netobj(u32 *p, const struct xdr_netobj *); u32 * xdr_decode_netobj(u32 *p, struct xdr_netobj *); -u32 * xdr_decode_netobj_fixed(u32 *p, void *obj, unsigned int len); void xdr_encode_pages(struct xdr_buf *, struct page **, unsigned int, unsigned int); @@ -135,8 +134,6 @@ xdr_adjust_iovec(struct kvec *iov, u32 *p) return iov->iov_len = ((u8 *) p - (u8 *) iov->iov_base); } -void xdr_shift_iovec(struct kvec *, int, size_t); - /* * Maximum number of iov's we use. */ @@ -145,10 +142,7 @@ void xdr_shift_iovec(struct kvec *, int, size_t); /* * XDR buffer helper functions */ -extern int xdr_kmap(struct kvec *, struct xdr_buf *, size_t); -extern void xdr_kunmap(struct xdr_buf *, size_t); extern void xdr_shift_buf(struct xdr_buf *, size_t); -extern void _copy_from_pages(char *, struct page **, size_t, size_t); extern void xdr_buf_from_iov(struct kvec *, struct xdr_buf *); extern int xdr_buf_subsegment(struct xdr_buf *, struct xdr_buf *, int, int); extern int xdr_buf_read_netobj(struct xdr_buf *, struct xdr_netobj *, int); diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 298b18486729..78dbc7bedd7b 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -201,8 +201,6 @@ struct rpc_xprt { struct rpc_xprt * xprt_create_proto(int proto, struct sockaddr_in *addr, struct rpc_timeout *toparms); int xprt_destroy(struct rpc_xprt *); -void xprt_shutdown(struct rpc_xprt *); -void xprt_default_timeout(struct rpc_timeout *, int); void xprt_set_timeout(struct rpc_timeout *, unsigned int, unsigned long); @@ -213,7 +211,6 @@ void xprt_receive(struct rpc_task *); int xprt_adjust_timeout(struct rpc_rqst *req); void xprt_release(struct rpc_task *); void xprt_connect(struct rpc_task *); -int xprt_clear_backlog(struct rpc_xprt *); void xprt_sock_setbufsize(struct rpc_xprt *); #define XPRT_LOCKED 0 diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 0902b9f496c3..61de59d3cef4 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -440,11 +440,7 @@ struct tcp_opt { /* WARNING: don't change the layout of the members in tcp_sock! */ struct tcp_sock { - struct sock sk; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - struct ipv6_pinfo *pinet6; -#endif - struct inet_opt inet; + struct inet_sock inet; struct tcp_opt tcp; }; diff --git a/include/linux/udp.h b/include/linux/udp.h index facf661dd65a..831a3f532a10 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -53,11 +53,7 @@ struct udp_opt { /* WARNING: don't change the layout of the members in udp_sock! */ struct udp_sock { - struct sock sk; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - struct ipv6_pinfo *pinet6; -#endif - struct inet_opt inet; + struct inet_sock inet; struct udp_opt udp; }; diff --git a/include/net/act_api.h b/include/net/act_api.h index 221ea5ed6b34..12736d17d3a3 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -82,9 +82,6 @@ extern int tcf_action_dump(struct sk_buff *skb, struct tc_action *a, int, int); extern int tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int, int); extern int tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int, int); extern int tcf_action_copy_stats (struct sk_buff *,struct tc_action *); -extern int tcf_act_police_locate(struct rtattr *rta, struct rtattr *est,struct tc_action *,int , int ); -extern int tcf_act_police_dump(struct sk_buff *, struct tc_action *, int, int); -extern int tcf_act_police(struct sk_buff **skb, struct tc_action *a); #endif /* CONFIG_NET_CLS_ACT */ extern int tcf_police(struct sk_buff *skb, struct tcf_police *p); diff --git a/include/net/ax25.h b/include/net/ax25.h index bf86bf67bdb9..ab43611b59ba 100644 --- a/include/net/ax25.h +++ b/include/net/ax25.h @@ -231,7 +231,6 @@ extern void ax25_send_to_raw(ax25_address *, struct sk_buff *, int); extern void ax25_destroy_socket(ax25_cb *); extern ax25_cb *ax25_create_cb(void); extern void ax25_fillin_cb(ax25_cb *, ax25_dev *); -extern int ax25_create(struct socket *, int); extern struct sock *ax25_make_new(struct sock *, struct ax25_dev *); /* ax25_addr.c */ @@ -239,7 +238,6 @@ extern ax25_address null_ax25_address; extern char *ax2asc(ax25_address *); extern ax25_address *asc2ax(char *); extern int ax25cmp(ax25_address *, ax25_address *); -extern int ax25digicmp(ax25_digi *, ax25_digi *); extern unsigned char *ax25_addr_parse(unsigned char *, int, ax25_address *, ax25_address *, ax25_digi *, int *, int *); extern int ax25_addr_build(unsigned char *, ax25_address *, ax25_address *, ax25_digi *, int, int); extern int ax25_addr_size(ax25_digi *); @@ -268,7 +266,6 @@ extern int ax25_ds_frame_in(ax25_cb *, struct sk_buff *, int); extern void ax25_ds_nr_error_recovery(ax25_cb *); extern void ax25_ds_enquiry_response(ax25_cb *); extern void ax25_ds_establish_data_link(ax25_cb *); -extern void ax25_dev_dama_on(ax25_dev *); extern void ax25_dev_dama_off(ax25_dev *); extern void ax25_dama_on(ax25_cb *); extern void ax25_dama_off(ax25_cb *); diff --git a/include/net/icmp.h b/include/net/icmp.h index aa260fcad767..32b159f7ae22 100644 --- a/include/net/icmp.h +++ b/include/net/icmp.h @@ -50,15 +50,9 @@ struct raw_opt { struct icmp_filter filter; }; -struct ipv6_pinfo; - /* WARNING: don't change the layout of the members in raw_sock! */ struct raw_sock { - struct sock sk; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - struct ipv6_pinfo *pinet6; -#endif - struct inet_opt inet; + struct inet_sock inet; struct raw_opt raw4; }; diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index 76ce5f8b6c1e..e97a9accb71d 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -266,5 +266,20 @@ static inline void ipv6_arcnet_mc_map(const struct in6_addr *addr, char *buf) { buf[0] = 0x00; } + +static inline void ipv6_ib_mc_map(struct in6_addr *addr, char *buf) +{ + buf[0] = 0; /* Reserved */ + buf[1] = 0xff; /* Multicast QPN */ + buf[2] = 0xff; + buf[3] = 0xff; + buf[4] = 0xff; + buf[5] = 0x12; /* link local scope */ + buf[6] = 0x60; /* IPv6 signature */ + buf[7] = 0x1b; + buf[8] = 0; /* P_Key */ + buf[9] = 0; + memcpy(buf + 10, addr->s6_addr + 6, 10); +} #endif #endif diff --git a/include/net/ip.h b/include/net/ip.h index 15a1c6a2e534..deeab8c10759 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -229,6 +229,39 @@ static inline void ip_eth_mc_map(u32 addr, char *buf) buf[3]=addr&0x7F; } +/* + * Map a multicast IP onto multicast MAC for type IP-over-InfiniBand. + * Leave P_Key as 0 to be filled in by driver. + */ + +static inline void ip_ib_mc_map(u32 addr, char *buf) +{ + buf[0] = 0; /* Reserved */ + buf[1] = 0xff; /* Multicast QPN */ + buf[2] = 0xff; + buf[3] = 0xff; + addr = ntohl(addr); + buf[4] = 0xff; + buf[5] = 0x12; /* link local scope */ + buf[6] = 0x40; /* IPv4 signature */ + buf[7] = 0x1b; + buf[8] = 0; /* P_Key */ + buf[9] = 0; + buf[10] = 0; + buf[11] = 0; + buf[12] = 0; + buf[13] = 0; + buf[14] = 0; + buf[15] = 0; + buf[19] = addr & 0xff; + addr >>= 8; + buf[18] = addr & 0xff; + addr >>= 8; + buf[17] = addr & 0xff; + addr >>= 8; + buf[16] = addr & 0x0f; +} + #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) #include <linux/ipv6.h> #endif @@ -295,8 +328,6 @@ extern void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err, extern void ip_local_error(struct sock *sk, int err, u32 daddr, u16 dport, u32 info); -extern int ipv4_proc_init(void); - /* sysctl helpers - any sysctl which holds a value that ends up being * fed into the routing cache should use these handlers. */ diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 48a6ea4c2565..9f28dfffb6c6 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -200,7 +200,6 @@ extern void fib_select_default(const struct flowi *flp, struct fib_result *res); /* Exported by fib_frontend.c */ extern void ip_fib_init(void); -extern void fib_flush(void); extern int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg); extern int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg); extern int inet_rtm_getroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg); @@ -226,7 +225,6 @@ extern struct fib_table *fib_hash_init(int id); extern int inet_rtm_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg); extern int inet_rtm_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg); extern int inet_dump_rules(struct sk_buff *skb, struct netlink_callback *cb); -extern u32 fib_rules_map_destination(u32 daddr, struct fib_result *res); #ifdef CONFIG_NET_CLS_ROUTE extern u32 fib_rules_tclass(struct fib_result *res); #endif diff --git a/include/net/ipconfig.h b/include/net/ipconfig.h index faa9cef707c3..2a1fe996fbc6 100644 --- a/include/net/ipconfig.h +++ b/include/net/ipconfig.h @@ -8,14 +8,10 @@ /* The following are initdata: */ -extern int ic_enable; /* Enable or disable the whole shebang */ - extern int ic_proto_enabled; /* Protocols enabled (see IC_xxx) */ -extern int ic_host_name_set; /* Host name set by ipconfig? */ extern int ic_set_manually; /* IPconfig parameters set manually */ extern u32 ic_myaddr; /* My IP address */ -extern u32 ic_netmask; /* Netmask for local subnet */ extern u32 ic_gateway; /* Gateway IP address */ extern u32 ic_servaddr; /* Boot server IP address */ @@ -24,13 +20,6 @@ extern u32 root_server_addr; /* Address of NFS server */ extern u8 root_server_path[]; /* Path to mount as root */ - -/* The following are persistent (not initdata): */ - -extern int ic_proto_used; /* Protocol used, if any */ -extern u32 ic_nameserver; /* DNS server IP address */ -extern u8 ic_domain[]; /* DNS (not NIS) domain name */ - /* bits in ic_proto_{enabled,used} */ #define IC_PROTO 0xFF /* Protocols mask: */ #define IC_BOOTP 0x01 /* BOOTP (or DHCP, see below) */ diff --git a/include/net/ipx.h b/include/net/ipx.h index 064e5f7a4154..006a31bfa2d1 100644 --- a/include/net/ipx.h +++ b/include/net/ipx.h @@ -139,14 +139,6 @@ static __inline__ void ipxitf_put(struct ipx_interface *intrfc) ipxitf_down(intrfc); } -extern void __ipxitf_down(struct ipx_interface *intrfc); - -static __inline__ void __ipxitf_put(struct ipx_interface *intrfc) -{ - if (atomic_dec_and_test(&intrfc->refcnt)) - __ipxitf_down(intrfc); -} - static __inline__ void ipxrtr_hold(struct ipx_route *rt) { atomic_inc(&rt->refcnt); diff --git a/include/net/irda/ircomm_event.h b/include/net/irda/ircomm_event.h index 9f0ca85aaabf..c290447872d1 100644 --- a/include/net/irda/ircomm_event.h +++ b/include/net/irda/ircomm_event.h @@ -75,7 +75,6 @@ struct ircomm_info { }; extern char *ircomm_state[]; -extern char *ircomm_event[]; struct ircomm_cb; /* Forward decl. */ diff --git a/include/net/irda/ircomm_lmp.h b/include/net/irda/ircomm_lmp.h index fd738be7c792..ae02106be590 100644 --- a/include/net/irda/ircomm_lmp.h +++ b/include/net/irda/ircomm_lmp.h @@ -32,34 +32,7 @@ #define IRCOMM_LMP_H #include <net/irda/ircomm_core.h> -#include <net/irda/ircomm_event.h> int ircomm_open_lsap(struct ircomm_cb *self); -int ircomm_lmp_connect_request(struct ircomm_cb *self, - struct sk_buff *userdata, - struct ircomm_info *info); -int ircomm_lmp_connect_response(struct ircomm_cb *self, struct sk_buff *skb); -int ircomm_lmp_disconnect_request(struct ircomm_cb *self, - struct sk_buff *userdata, - struct ircomm_info *info); -int ircomm_lmp_data_request(struct ircomm_cb *self, struct sk_buff *skb, - int clen); -int ircomm_lmp_control_request(struct ircomm_cb *self, - struct sk_buff *userdata); -int ircomm_lmp_data_indication(void *instance, void *sap, - struct sk_buff *skb); -void ircomm_lmp_connect_confirm(void *instance, void *sap, - struct qos_info *qos, - __u32 max_sdu_size, - __u8 max_header_size, - struct sk_buff *skb); -void ircomm_lmp_connect_indication(void *instance, void *sap, - struct qos_info *qos, - __u32 max_sdu_size, - __u8 max_header_size, - struct sk_buff *skb); -void ircomm_lmp_disconnect_indication(void *instance, void *sap, - LM_REASON reason, - struct sk_buff *skb); #endif diff --git a/include/net/irda/ircomm_param.h b/include/net/irda/ircomm_param.h index 5f46fb2581e4..e6678800c41f 100644 --- a/include/net/irda/ircomm_param.h +++ b/include/net/irda/ircomm_param.h @@ -141,7 +141,6 @@ struct ircomm_params { struct ircomm_tty_cb; /* Forward decl. */ -int ircomm_param_flush(struct ircomm_tty_cb *self); int ircomm_param_request(struct ircomm_tty_cb *self, __u8 pi, int flush); extern pi_param_info_t ircomm_param_info; diff --git a/include/net/irda/ircomm_ttp.h b/include/net/irda/ircomm_ttp.h index 014d7f56eb04..403081ed725c 100644 --- a/include/net/irda/ircomm_ttp.h +++ b/include/net/irda/ircomm_ttp.h @@ -32,39 +32,8 @@ #define IRCOMM_TTP_H #include <net/irda/ircomm_core.h> -#include <net/irda/ircomm_event.h> int ircomm_open_tsap(struct ircomm_cb *self); -int ircomm_ttp_connect_request(struct ircomm_cb *self, - struct sk_buff *userdata, - struct ircomm_info *info); -int ircomm_ttp_connect_response(struct ircomm_cb *self, struct sk_buff *skb); -int ircomm_ttp_disconnect_request(struct ircomm_cb *self, - struct sk_buff *userdata, - struct ircomm_info *info); -int ircomm_ttp_data_request(struct ircomm_cb *self, struct sk_buff *skb, - int clen); -int ircomm_ttp_control_request(struct ircomm_cb *self, - struct sk_buff *userdata); -int ircomm_ttp_data_indication(void *instance, void *sap, - struct sk_buff *skb); -void ircomm_ttp_connect_confirm(void *instance, void *sap, - struct qos_info *qos, - __u32 max_sdu_size, - __u8 max_header_size, - struct sk_buff *skb); -void ircomm_ttp_connect_indication(void *instance, void *sap, - struct qos_info *qos, - __u32 max_sdu_size, - __u8 max_header_size, - struct sk_buff *skb); -void ircomm_ttp_disconnect_indication(void *instance, void *sap, - LM_REASON reason, - struct sk_buff *skb); -void ircomm_ttp_flow_indication(void *instance, void *sap, LOCAL_FLOW cmd); #endif - - - diff --git a/include/net/irda/ircomm_tty.h b/include/net/irda/ircomm_tty.h index 2ab995d723ac..87699cb4ef8c 100644 --- a/include/net/irda/ircomm_tty.h +++ b/include/net/irda/ircomm_tty.h @@ -118,10 +118,8 @@ struct ircomm_tty_cb { }; void ircomm_tty_start(struct tty_struct *tty); -void ircomm_tty_stop(struct tty_struct *tty); void ircomm_tty_check_modem_status(struct ircomm_tty_cb *self); -extern void ircomm_tty_change_speed(struct ircomm_tty_cb *self); extern int ircomm_tty_tiocmget(struct tty_struct *tty, struct file *file); extern int ircomm_tty_tiocmset(struct tty_struct *tty, struct file *file, unsigned int set, unsigned int clear); diff --git a/include/net/irda/ircomm_tty_attach.h b/include/net/irda/ircomm_tty_attach.h index 7f4656165adc..f91a5695aa44 100644 --- a/include/net/irda/ircomm_tty_attach.h +++ b/include/net/irda/ircomm_tty_attach.h @@ -67,7 +67,6 @@ struct ircomm_tty_info { }; extern char *ircomm_state[]; -extern char *ircomm_event[]; extern char *ircomm_tty_state[]; int ircomm_tty_do_event(struct ircomm_tty_cb *self, IRCOMM_TTY_EVENT event, diff --git a/include/net/irda/irda_device.h b/include/net/irda/irda_device.h index 2b245440b551..3b216f186f18 100644 --- a/include/net/irda/irda_device.h +++ b/include/net/irda/irda_device.h @@ -227,8 +227,6 @@ static inline int irda_device_txqueue_empty(const struct net_device *dev) return (skb_queue_len(&dev->qdisc->q) == 0); } int irda_device_set_raw_mode(struct net_device* self, int status); -int irda_device_set_dtr_rts(struct net_device *dev, int dtr, int rts); -int irda_device_change_speed(struct net_device *dev, __u32 speed); struct net_device *alloc_irdadev(int sizeof_priv); /* Dongle interface */ diff --git a/include/net/irda/iriap.h b/include/net/irda/iriap.h index ccd692fdef36..2007c5a0a43f 100644 --- a/include/net/irda/iriap.h +++ b/include/net/irda/iriap.h @@ -97,22 +97,12 @@ void iriap_close(struct iriap_cb *self); int iriap_getvaluebyclass_request(struct iriap_cb *self, __u32 saddr, __u32 daddr, char *name, char *attr); -void iriap_getvaluebyclass_confirm(struct iriap_cb *self, struct sk_buff *skb); void iriap_connect_request(struct iriap_cb *self); void iriap_send_ack( struct iriap_cb *self); void iriap_call_indication(struct iriap_cb *self, struct sk_buff *skb); void iriap_register_server(void); -void iriap_watchdog_timer_expired(void *data); - -static inline void iriap_start_watchdog_timer(struct iriap_cb *self, - int timeout) -{ - irda_start_timer(&self->watchdog_timer, timeout, self, - iriap_watchdog_timer_expired); -} - #endif diff --git a/include/net/irda/irlan_client.h b/include/net/irda/irlan_client.h index fb9389cebdd8..736dabe211e3 100644 --- a/include/net/irda/irlan_client.h +++ b/include/net/irda/irlan_client.h @@ -33,12 +33,9 @@ #include <net/irda/irias_object.h> #include <net/irda/irlan_event.h> -void irlan_client_start_kick_timer(struct irlan_cb *self, int timeout); void irlan_client_discovery_indication(discinfo_t *, DISCOVERY_MODE, void *); void irlan_client_wakeup(struct irlan_cb *self, __u32 saddr, __u32 daddr); -void irlan_client_open_ctrl_tsap( struct irlan_cb *self); - void irlan_client_parse_response(struct irlan_cb *self, struct sk_buff *skb); void irlan_client_get_value_confirm(int result, __u16 obj_id, struct ias_value *value, void *priv); diff --git a/include/net/irda/irlan_common.h b/include/net/irda/irlan_common.h index 2c1548bdc87d..1c73bdbc3eb3 100644 --- a/include/net/irda/irlan_common.h +++ b/include/net/irda/irlan_common.h @@ -190,7 +190,6 @@ struct irlan_cb { struct timer_list watchdog_timer; }; -struct irlan_cb *irlan_open(__u32 saddr, __u32 daddr); void irlan_close(struct irlan_cb *self); void irlan_close_tsaps(struct irlan_cb *self); @@ -204,13 +203,11 @@ int irlan_run_ctrl_tx_queue(struct irlan_cb *self); struct irlan_cb *irlan_get_any(void); void irlan_get_provider_info(struct irlan_cb *self); -void irlan_get_unicast_addr(struct irlan_cb *self); void irlan_get_media_char(struct irlan_cb *self); void irlan_open_data_channel(struct irlan_cb *self); void irlan_close_data_channel(struct irlan_cb *self); void irlan_set_multicast_filter(struct irlan_cb *self, int status); void irlan_set_broadcast_filter(struct irlan_cb *self, int status); -void irlan_open_unicast_addr(struct irlan_cb *self); int irlan_insert_byte_param(struct sk_buff *skb, char *param, __u8 value); int irlan_insert_short_param(struct sk_buff *skb, char *param, __u16 value); diff --git a/include/net/irda/irlap.h b/include/net/irda/irlap.h index c7c2aecaf637..f55e86e75030 100644 --- a/include/net/irda/irlap.h +++ b/include/net/irda/irlap.h @@ -253,10 +253,8 @@ int irlap_validate_ns_received(struct irlap_cb *, int ns); int irlap_generate_rand_time_slot(int S, int s); void irlap_initiate_connection_state(struct irlap_cb *); void irlap_flush_all_queues(struct irlap_cb *); -void irlap_change_speed(struct irlap_cb *self, __u32 speed, int now); void irlap_wait_min_turn_around(struct irlap_cb *, struct qos_info *); -void irlap_init_qos_capabilities(struct irlap_cb *, struct qos_info *); void irlap_apply_default_connection_parameters(struct irlap_cb *self); void irlap_apply_connection_parameters(struct irlap_cb *self, int now); diff --git a/include/net/irda/irlap_frame.h b/include/net/irda/irlap_frame.h index 94c83bc2c4cc..3452ae257c84 100644 --- a/include/net/irda/irlap_frame.h +++ b/include/net/irda/irlap_frame.h @@ -133,7 +133,6 @@ void irlap_send_data_secondary_final(struct irlap_cb *, struct sk_buff *); void irlap_resend_rejected_frames(struct irlap_cb *, int command); void irlap_resend_rejected_frame(struct irlap_cb *self, int command); -void irlap_send_i_frame(struct irlap_cb *, struct sk_buff *, int command); void irlap_send_ui_frame(struct irlap_cb *self, struct sk_buff *skb, __u8 caddr, int command); diff --git a/include/net/irda/irlmp.h b/include/net/irda/irlmp.h index cf8d5af5cb36..86aefb1fda5e 100644 --- a/include/net/irda/irlmp.h +++ b/include/net/irda/irlmp.h @@ -242,12 +242,9 @@ int irlmp_connless_data_request(struct lsap_cb *, struct sk_buff *, __u8); void irlmp_connless_data_indication(struct lsap_cb *, struct sk_buff *); #endif /* CONFIG_IRDA_ULTRA */ -void irlmp_status_request(void); void irlmp_status_indication(struct lap_cb *, LINK_STATUS link, LOCK_STATUS lock); void irlmp_flow_indication(struct lap_cb *self, LOCAL_FLOW flow); -int irlmp_slsap_inuse(__u8 slsap); -__u8 irlmp_find_free_slsap(void); LM_REASON irlmp_convert_lap_reason(LAP_REASON); static inline __u32 irlmp_get_saddr(const struct lsap_cb *self) diff --git a/include/net/irda/irttp.h b/include/net/irda/irttp.h index 600d5411da17..a899e5837be8 100644 --- a/include/net/irda/irttp.h +++ b/include/net/irda/irttp.h @@ -167,9 +167,6 @@ int irttp_connect_response(struct tsap_cb *self, __u32 max_sdu_size, int irttp_disconnect_request(struct tsap_cb *self, struct sk_buff *skb, int priority); void irttp_flow_request(struct tsap_cb *self, LOCAL_FLOW flow); -void irttp_status_indication(void *instance, - LINK_STATUS link, LOCK_STATUS lock); -void irttp_flow_indication(void *instance, void *sap, LOCAL_FLOW flow); struct tsap_cb *irttp_dup(struct tsap_cb *self, void *instance); static __inline __u32 irttp_get_saddr(struct tsap_cb *self) diff --git a/include/net/irda/parameters.h b/include/net/irda/parameters.h index 3938975c0764..3a605d37ddbf 100644 --- a/include/net/irda/parameters.h +++ b/include/net/irda/parameters.h @@ -90,11 +90,9 @@ typedef struct { } pi_param_info_t; int irda_param_pack(__u8 *buf, char *fmt, ...); -int irda_param_unpack(__u8 *buf, char *fmt, ...); int irda_param_insert(void *self, __u8 pi, __u8 *buf, int len, pi_param_info_t *info); -int irda_param_extract(void *self, __u8 *buf, int len, pi_param_info_t *info); int irda_param_extract_all(void *self, __u8 *buf, int len, pi_param_info_t *info); diff --git a/include/net/irda/qos.h b/include/net/irda/qos.h index 41ee53f94c52..9ae3d6bc2423 100644 --- a/include/net/irda/qos.h +++ b/include/net/irda/qos.h @@ -87,7 +87,6 @@ void irda_init_max_qos_capabilies(struct qos_info *qos); void irda_qos_compute_intersection(struct qos_info *, struct qos_info *); __u32 irlap_max_line_capacity(__u32 speed, __u32 max_turn_time); -__u32 irlap_requested_line_capacity(struct qos_info *qos); void irda_qos_bits_to_value(struct qos_info *qos); diff --git a/include/net/llc_c_ac.h b/include/net/llc_c_ac.h index 45f020aa69d5..df83f69d2de4 100644 --- a/include/net/llc_c_ac.h +++ b/include/net/llc_c_ac.h @@ -96,7 +96,6 @@ extern int llc_conn_ac_data_ind(struct sock* sk, struct sk_buff *skb); extern int llc_conn_ac_disc_ind(struct sock* sk, struct sk_buff *skb); extern int llc_conn_ac_rst_ind(struct sock* sk, struct sk_buff *skb); extern int llc_conn_ac_rst_confirm(struct sock* sk, struct sk_buff *skb); -extern int llc_conn_ac_report_status(struct sock* sk, struct sk_buff *skb); extern int llc_conn_ac_clear_remote_busy_if_f_eq_1(struct sock* sk, struct sk_buff *skb); extern int llc_conn_ac_stop_rej_tmr_if_data_flag_eq_2(struct sock* sk, @@ -107,8 +106,6 @@ extern int llc_conn_ac_send_dm_rsp_f_set_p(struct sock* sk, struct sk_buff *skb); extern int llc_conn_ac_send_dm_rsp_f_set_1(struct sock* sk, struct sk_buff *skb); -extern int llc_conn_ac_send_dm_rsp_f_set_f_flag(struct sock* sk, - struct sk_buff *skb); extern int llc_conn_ac_send_frmr_rsp_f_set_x(struct sock* sk, struct sk_buff *skb); extern int llc_conn_ac_resend_frmr_rsp_f_set_0(struct sock* sk, @@ -116,11 +113,6 @@ extern int llc_conn_ac_resend_frmr_rsp_f_set_0(struct sock* sk, extern int llc_conn_ac_resend_frmr_rsp_f_set_p(struct sock* sk, struct sk_buff *skb); extern int llc_conn_ac_send_i_cmd_p_set_1(struct sock* sk, struct sk_buff *skb); -extern int llc_conn_ac_send_i_cmd_p_set_0(struct sock* sk, struct sk_buff *skb); -extern int llc_conn_ac_resend_i_cmd_p_set_1(struct sock* sk, - struct sk_buff *skb); -extern int llc_conn_ac_resend_i_cmd_p_set_1_or_send_rr(struct sock* sk, - struct sk_buff *skb); extern int llc_conn_ac_send_i_xxx_x_set_0(struct sock* sk, struct sk_buff *skb); extern int llc_conn_ac_resend_i_xxx_x_set_0(struct sock* sk, struct sk_buff *skb); @@ -145,8 +137,6 @@ extern int llc_conn_ac_opt_send_rnr_xxx_x_set_0(struct sock* sk, struct sk_buff *skb); extern int llc_conn_ac_send_rr_cmd_p_set_1(struct sock* sk, struct sk_buff *skb); -extern int llc_conn_ac_send_ack_cmd_p_set_1(struct sock* sk, - struct sk_buff *skb); extern int llc_conn_ac_send_rr_rsp_f_set_1(struct sock* sk, struct sk_buff *skb); extern int llc_conn_ac_send_ack_rsp_f_set_1(struct sock* sk, @@ -157,8 +147,6 @@ extern int llc_conn_ac_send_ack_xxx_x_set_0(struct sock* sk, struct sk_buff *skb); extern int llc_conn_ac_send_sabme_cmd_p_set_x(struct sock* sk, struct sk_buff *skb); -extern int llc_conn_ac_send_ua_rsp_f_set_f_flag(struct sock* sk, - struct sk_buff *skb); extern int llc_conn_ac_send_ua_rsp_f_set_p(struct sock* sk, struct sk_buff *skb); extern int llc_conn_ac_set_s_flag_0(struct sock* sk, struct sk_buff *skb); @@ -183,7 +171,6 @@ extern int llc_conn_ac_set_data_flag_1(struct sock* sk, struct sk_buff *skb); extern int llc_conn_ac_set_data_flag_1_if_data_flag_eq_0(struct sock* sk, struct sk_buff *skb); extern int llc_conn_ac_set_p_flag_0(struct sock* sk, struct sk_buff *skb); -extern int llc_conn_ac_set_p_flag_1(struct sock* sk, struct sk_buff *skb); extern int llc_conn_ac_set_remote_busy_0(struct sock* sk, struct sk_buff *skb); extern int llc_conn_ac_set_retry_cnt_0(struct sock* sk, struct sk_buff *skb); extern int llc_conn_ac_set_cause_flag_0(struct sock* sk, struct sk_buff *skb); @@ -195,20 +182,14 @@ extern int llc_conn_ac_set_vs_0(struct sock* sk, struct sk_buff *skb); extern int llc_conn_ac_set_vs_nr(struct sock* sk, struct sk_buff *skb); extern int llc_conn_ac_rst_vs(struct sock* sk, struct sk_buff *skb); extern int llc_conn_ac_upd_vs(struct sock* sk, struct sk_buff *skb); -extern int llc_conn_ac_set_f_flag_p(struct sock* sk, struct sk_buff *skb); extern int llc_conn_disc(struct sock* sk, struct sk_buff *skb); extern int llc_conn_reset(struct sock* sk, struct sk_buff *skb); extern int llc_conn_ac_disc_confirm(struct sock* sk, struct sk_buff *skb); extern u8 llc_circular_between(u8 a, u8 b, u8 c); extern int llc_conn_ac_send_ack_if_needed(struct sock* sk, struct sk_buff *skb); -extern int llc_conn_ac_inc_npta_value(struct sock* sk, struct sk_buff *skb); extern int llc_conn_ac_adjust_npta_by_rr(struct sock* sk, struct sk_buff *skb); extern int llc_conn_ac_adjust_npta_by_rnr(struct sock* sk, struct sk_buff *skb); extern int llc_conn_ac_rst_sendack_flag(struct sock* sk, struct sk_buff *skb); -extern int llc_conn_ac_send_rr_rsp_f_set_ackpf(struct sock* sk, - struct sk_buff *skb); -extern int llc_conn_ac_send_i_rsp_f_set_ackpf(struct sock* sk, - struct sk_buff *skb); extern int llc_conn_ac_send_i_rsp_as_ack(struct sock* sk, struct sk_buff *skb); extern int llc_conn_ac_send_i_as_ack(struct sock* sk, struct sk_buff *skb); diff --git a/include/net/llc_c_ev.h b/include/net/llc_c_ev.h index 3deeb4235605..23a409381fa9 100644 --- a/include/net/llc_c_ev.h +++ b/include/net/llc_c_ev.h @@ -129,11 +129,9 @@ typedef int (*llc_conn_ev_t)(struct sock *sk, struct sk_buff *skb); typedef int (*llc_conn_ev_qfyr_t)(struct sock *sk, struct sk_buff *skb); extern int llc_conn_ev_conn_req(struct sock *sk, struct sk_buff *skb); -extern int llc_conn_ev_conn_resp(struct sock *sk, struct sk_buff *skb); extern int llc_conn_ev_data_req(struct sock *sk, struct sk_buff *skb); extern int llc_conn_ev_disc_req(struct sock *sk, struct sk_buff *skb); extern int llc_conn_ev_rst_req(struct sock *sk, struct sk_buff *skb); -extern int llc_conn_ev_rst_resp(struct sock *sk, struct sk_buff *skb); extern int llc_conn_ev_local_busy_detected(struct sock *sk, struct sk_buff *skb); extern int llc_conn_ev_local_busy_cleared(struct sock *sk, struct sk_buff *skb); @@ -162,7 +160,6 @@ extern int llc_conn_ev_rx_xxx_cmd_pbit_set_x(struct sock *sk, struct sk_buff *skb); extern int llc_conn_ev_rx_xxx_rsp_fbit_set_x(struct sock *sk, struct sk_buff *skb); -extern int llc_conn_ev_rx_xxx_yyy(struct sock *sk, struct sk_buff *skb); extern int llc_conn_ev_rx_zzz_cmd_pbit_set_x_inval_nr(struct sock *sk, struct sk_buff *skb); extern int llc_conn_ev_rx_zzz_rsp_fbit_set_x_inval_nr(struct sock *sk, @@ -171,13 +168,10 @@ extern int llc_conn_ev_p_tmr_exp(struct sock *sk, struct sk_buff *skb); extern int llc_conn_ev_ack_tmr_exp(struct sock *sk, struct sk_buff *skb); extern int llc_conn_ev_rej_tmr_exp(struct sock *sk, struct sk_buff *skb); extern int llc_conn_ev_busy_tmr_exp(struct sock *sk, struct sk_buff *skb); -extern int llc_conn_ev_any_tmr_exp(struct sock *sk, struct sk_buff *skb); extern int llc_conn_ev_sendack_tmr_exp(struct sock *sk, struct sk_buff *skb); /* NOT_USED functions and their variations */ extern int llc_conn_ev_rx_xxx_cmd_pbit_set_1(struct sock *sk, struct sk_buff *skb); -extern int llc_conn_ev_rx_xxx_cmd_pbit_set_0(struct sock *sk, - struct sk_buff *skb); extern int llc_conn_ev_rx_xxx_rsp_fbit_set_1(struct sock *sk, struct sk_buff *skb); extern int llc_conn_ev_rx_i_cmd_pbit_set_0_unexpd_ns(struct sock *sk, @@ -252,20 +246,14 @@ extern int llc_conn_ev_qlfy_cause_flag_eq_1(struct sock *sk, struct sk_buff *skb); extern int llc_conn_ev_qlfy_cause_flag_eq_0(struct sock *sk, struct sk_buff *skb); -extern int llc_conn_ev_qlfy_init_p_f_cycle(struct sock *sk, - struct sk_buff *skb); extern int llc_conn_ev_qlfy_set_status_conn(struct sock *sk, struct sk_buff *skb); extern int llc_conn_ev_qlfy_set_status_disc(struct sock *sk, struct sk_buff *skb); extern int llc_conn_ev_qlfy_set_status_failed(struct sock *sk, struct sk_buff *skb); -extern int llc_conn_ev_qlfy_set_status_impossible(struct sock *sk, - struct sk_buff *skb); extern int llc_conn_ev_qlfy_set_status_remote_busy(struct sock *sk, struct sk_buff *skb); -extern int llc_conn_ev_qlfy_set_status_received(struct sock *sk, - struct sk_buff *skb); extern int llc_conn_ev_qlfy_set_status_refuse(struct sock *sk, struct sk_buff *skb); extern int llc_conn_ev_qlfy_set_status_conflict(struct sock *sk, diff --git a/include/net/llc_conn.h b/include/net/llc_conn.h index 06fb01bccdbf..bbc0af5232ed 100644 --- a/include/net/llc_conn.h +++ b/include/net/llc_conn.h @@ -91,7 +91,6 @@ extern struct sock *llc_sk_alloc(int family, int priority); extern void llc_sk_free(struct sock *sk); extern void llc_sk_reset(struct sock *sk); -extern int llc_sk_init(struct sock *sk); /* Access to a connection */ extern int llc_conn_state_process(struct sock *sk, struct sk_buff *skb); @@ -106,8 +105,6 @@ extern int llc_conn_remove_acked_pdus(struct sock *conn, u8 nr, extern struct sock *llc_lookup_established(struct llc_sap *sap, struct llc_addr *daddr, struct llc_addr *laddr); -extern struct sock *llc_lookup_listener(struct llc_sap *sap, - struct llc_addr *laddr); extern void llc_sap_add_socket(struct llc_sap *sap, struct sock *sk); extern void llc_sap_remove_socket(struct llc_sap *sap, struct sock *sk); diff --git a/include/net/llc_pdu.h b/include/net/llc_pdu.h index cbbfc373447d..f45c37d89cf7 100644 --- a/include/net/llc_pdu.h +++ b/include/net/llc_pdu.h @@ -419,7 +419,6 @@ struct llc_frmr_info { extern void llc_pdu_set_cmd_rsp(struct sk_buff *skb, u8 type); extern void llc_pdu_set_pf_bit(struct sk_buff *skb, u8 bit_value); extern void llc_pdu_decode_pf_bit(struct sk_buff *skb, u8 *pf_bit); -extern void llc_pdu_decode_cr_bit(struct sk_buff *skb, u8 *cr_bit); extern void llc_pdu_init_as_disc_cmd(struct sk_buff *skb, u8 p_bit); extern void llc_pdu_init_as_i_cmd(struct sk_buff *skb, u8 p_bit, u8 ns, u8 nr); extern void llc_pdu_init_as_rej_cmd(struct sk_buff *skb, u8 p_bit, u8 nr); diff --git a/include/net/llc_sap.h b/include/net/llc_sap.h index dc60acce8e55..353baaa627f3 100644 --- a/include/net/llc_sap.h +++ b/include/net/llc_sap.h @@ -14,7 +14,6 @@ struct llc_sap; struct sk_buff; -extern void llc_sap_state_process(struct llc_sap *sap, struct sk_buff *skb); extern void llc_sap_rtn_pdu(struct llc_sap *sap, struct sk_buff *skb); extern void llc_save_primitive(struct sk_buff* skb, unsigned char prim); extern struct sk_buff *llc_alloc_frame(void); diff --git a/include/net/rose.h b/include/net/rose.h index f3382358e5ee..32fb79817d31 100644 --- a/include/net/rose.h +++ b/include/net/rose.h @@ -162,7 +162,6 @@ extern int rose_rx_call_request(struct sk_buff *, struct net_device *, struct r extern void rose_destroy_socket(struct sock *); /* rose_dev.c */ -extern int rose_rx_ip(struct sk_buff *, struct net_device *); extern void rose_setup(struct net_device *); /* rose_in.c */ @@ -170,15 +169,10 @@ extern int rose_process_rx_frame(struct sock *, struct sk_buff *); /* rose_link.c */ extern void rose_start_ftimer(struct rose_neigh *); -extern void rose_start_t0timer(struct rose_neigh *); extern void rose_stop_ftimer(struct rose_neigh *); extern void rose_stop_t0timer(struct rose_neigh *); extern int rose_ftimer_running(struct rose_neigh *); -extern int rose_t0timer_running(struct rose_neigh *); extern void rose_link_rx_restart(struct sk_buff *, struct rose_neigh *, unsigned short); -extern void rose_transmit_restart_request(struct rose_neigh *); -extern void rose_transmit_restart_confirmation(struct rose_neigh *); -extern void rose_transmit_diagnostic(struct rose_neigh *, unsigned char); extern void rose_transmit_clear_request(struct rose_neigh *, unsigned int, unsigned char, unsigned char); extern void rose_transmit_link(struct sk_buff *, struct rose_neigh *); @@ -205,7 +199,6 @@ extern void rose_link_device_down(struct net_device *); extern struct net_device *rose_dev_first(void); extern struct net_device *rose_dev_get(rose_address *); extern struct rose_route *rose_route_free_lci(unsigned int, struct rose_neigh *); -extern struct net_device *rose_ax25_dev_get(char *); extern struct rose_neigh *rose_get_neigh(rose_address *, unsigned char *, unsigned char *); extern int rose_rt_ioctl(unsigned int, void __user *); extern void rose_link_failed(ax25_cb *, int); @@ -220,7 +213,6 @@ extern int rose_validate_nr(struct sock *, unsigned short); extern void rose_write_internal(struct sock *, int); extern int rose_decode(struct sk_buff *, int *, int *, int *, int *, int *); extern int rose_parse_facilities(unsigned char *, struct rose_facilities_struct *); -extern int rose_create_facilities(unsigned char *, rose_cb *); extern void rose_disconnect(struct sock *, int, int, int); /* rose_timer.c */ diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 208c6aea06a1..f34b3aa1aa08 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -584,26 +584,20 @@ static inline int sctp_vtag_hashfn(__u16 lport, __u16 rport, __u32 vtag) /* WARNING: Do not change the layout of the members in sctp_sock! */ struct sctp_sock { - struct sock sk; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - struct ipv6_pinfo *pinet6; -#endif /* CONFIG_IPV6 */ - struct inet_opt inet; + struct inet_sock inet; struct sctp_opt sctp; }; #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) struct sctp6_sock { - struct sock sk; - struct ipv6_pinfo *pinet6; - struct inet_opt inet; + struct inet_sock inet; struct sctp_opt sctp; struct ipv6_pinfo inet6; }; #endif /* CONFIG_IPV6 */ #define sctp_sk(__sk) (&((struct sctp_sock *)__sk)->sctp) -#define sctp_opt2sk(__sp) &container_of(__sp, struct sctp_sock, sctp)->sk +#define sctp_opt2sk(__sp) &container_of(__sp, struct sctp_sock, sctp)->inet.sk /* Is a socket of this style? */ #define sctp_style(sk, style) __sctp_style((sk), (SCTP_SOCKET_##style)) diff --git a/include/net/tcp.h b/include/net/tcp.h index f5bac4c9b704..932b700b26be 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -196,7 +196,7 @@ struct tcp_tw_bucket { unsigned char tw_rcv_wscale; __u16 tw_sport; /* Socket demultiplex comparisons on incoming packets. */ - /* these five are in inet_opt */ + /* these five are in inet_sock */ __u32 tw_daddr __attribute__((aligned(TCP_ADDRCMP_ALIGN_BYTES))); __u32 tw_rcv_saddr; @@ -315,7 +315,6 @@ static inline void tcp_tw_put(struct tcp_tw_bucket *tw) extern atomic_t tcp_orphan_count; extern int tcp_tw_count; extern void tcp_time_wait(struct sock *sk, int state, int timeo); -extern void tcp_tw_schedule(struct tcp_tw_bucket *tw, int timeo); extern void tcp_tw_deschedule(struct tcp_tw_bucket *tw); @@ -2020,21 +2019,6 @@ static inline void tcp_westwood_update_rtt(struct tcp_opt *tp, __u32 rtt_seq) tp->westwood.rtt = rtt_seq; } -void __tcp_westwood_fast_bw(struct sock *, struct sk_buff *); -void __tcp_westwood_slow_bw(struct sock *, struct sk_buff *); - -static inline void tcp_westwood_fast_bw(struct sock *sk, struct sk_buff *skb) -{ - if (tcp_is_westwood(tcp_sk(sk))) - __tcp_westwood_fast_bw(sk, skb); -} - -static inline void tcp_westwood_slow_bw(struct sock *sk, struct sk_buff *skb) -{ - if (tcp_is_westwood(tcp_sk(sk))) - __tcp_westwood_slow_bw(sk, skb); -} - static inline __u32 __tcp_westwood_bw_rttmin(const struct tcp_opt *tp) { return max((tp->westwood.bw_est) * (tp->westwood.rtt_min) / diff --git a/include/net/x25.h b/include/net/x25.h index 53a19c468c1b..4d64a0bc2b0d 100644 --- a/include/net/x25.h +++ b/include/net/x25.h @@ -162,7 +162,6 @@ extern int x25_addr_ntoa(unsigned char *, struct x25_address *, struct x25_address *); extern int x25_addr_aton(unsigned char *, struct x25_address *, struct x25_address *); -extern unsigned int x25_new_lci(struct x25_neigh *); extern struct sock *x25_find_socket(unsigned int, struct x25_neigh *); extern void x25_destroy_socket(struct sock *); extern int x25_rx_call_request(struct sk_buff *, struct x25_neigh *, unsigned int); @@ -171,7 +170,6 @@ extern void x25_kill_by_neigh(struct x25_neigh *); /* x25_dev.c */ extern void x25_send_frame(struct sk_buff *, struct x25_neigh *); extern int x25_lapb_receive_frame(struct sk_buff *, struct net_device *, struct packet_type *); -extern int x25_llc_receive_frame(struct sk_buff *, struct net_device *, struct packet_type *); extern void x25_establish_link(struct x25_neigh *); extern void x25_terminate_link(struct x25_neigh *); @@ -191,9 +189,6 @@ extern void x25_link_device_up(struct net_device *); extern void x25_link_device_down(struct net_device *); extern void x25_link_established(struct x25_neigh *); extern void x25_link_terminated(struct x25_neigh *); -extern void x25_transmit_restart_request(struct x25_neigh *); -extern void x25_transmit_restart_confirmation(struct x25_neigh *); -extern void x25_transmit_diagnostic(struct x25_neigh *, unsigned char); extern void x25_transmit_clear_request(struct x25_neigh *, unsigned int, unsigned char); extern void x25_transmit_link(struct sk_buff *, struct x25_neigh *); extern int x25_subscr_ioctl(unsigned int, void __user *); diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 7078e743b732..d29831f449c1 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -782,7 +782,6 @@ struct xfrm6_tunnel { extern void xfrm_init(void); extern void xfrm4_init(void); -extern void xfrm4_fini(void); extern void xfrm6_init(void); extern void xfrm6_fini(void); extern void xfrm_state_init(void); @@ -843,7 +842,6 @@ static inline int xfrm_dst_lookup(struct xfrm_dst **dst, struct flowi *fl, unsig } #endif -void xfrm_policy_init(void); struct xfrm_policy *xfrm_policy_alloc(int gfp); extern int xfrm_policy_walk(int (*func)(struct xfrm_policy *, int, int, void*), void *); int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl); @@ -858,12 +856,9 @@ struct xfrm_state * xfrm_find_acq(u8 mode, u32 reqid, u8 proto, int create, unsigned short family); extern void xfrm_policy_flush(void); extern int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol); -extern struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl); extern int xfrm_flush_bundles(void); extern wait_queue_head_t km_waitq; -extern void km_state_expired(struct xfrm_state *x, int hard); -extern int km_query(struct xfrm_state *x, struct xfrm_tmpl *, struct xfrm_policy *pol); extern int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport); extern void km_policy_expired(struct xfrm_policy *pol, int dir, int hard); @@ -875,7 +870,6 @@ extern int xfrm_count_auth_supported(void); extern int xfrm_count_enc_supported(void); extern struct xfrm_algo_desc *xfrm_aalg_get_byidx(unsigned int idx); extern struct xfrm_algo_desc *xfrm_ealg_get_byidx(unsigned int idx); -extern struct xfrm_algo_desc *xfrm_calg_get_byidx(unsigned int idx); extern struct xfrm_algo_desc *xfrm_aalg_get_byid(int alg_id); extern struct xfrm_algo_desc *xfrm_ealg_get_byid(int alg_id); extern struct xfrm_algo_desc *xfrm_calg_get_byid(int alg_id); diff --git a/include/rxrpc/call.h b/include/rxrpc/call.h index f9b0a7cbd0a3..f48f27e9e0ab 100644 --- a/include/rxrpc/call.h +++ b/include/rxrpc/call.h @@ -20,9 +20,6 @@ #define RXRPC_CALL_ACK_WINDOW_SIZE 16 extern unsigned rxrpc_call_rcv_timeout; /* receive activity timeout (secs) */ -extern unsigned rxrpc_call_acks_timeout; /* pending ACK (retransmit) timeout (secs) */ -extern unsigned rxrpc_call_dfr_ack_timeout; /* deferred ACK timeout (secs) */ -extern unsigned short rxrpc_call_max_resend; /* maximum consecutive resend count */ /* application call state * - only state 0 and ffff are reserved, the state is set to 1 after an opid is received @@ -210,8 +207,6 @@ extern int rxrpc_call_write_data(struct rxrpc_call *call, int dup_data, size_t *size_sent); -extern int rxrpc_call_flush(struct rxrpc_call *call); - extern void rxrpc_call_handle_error(struct rxrpc_call *conn, int local, int errno); #endif /* _LINUX_RXRPC_CALL_H */ diff --git a/include/rxrpc/packet.h b/include/rxrpc/packet.h index f9224d3635be..1447f0aaa0eb 100644 --- a/include/rxrpc/packet.h +++ b/include/rxrpc/packet.h @@ -124,6 +124,4 @@ struct rxrpc_ackpacket } __attribute__((packed)); -extern const char *rxrpc_acks[]; - #endif /* _LINUX_RXRPC_PACKET_H */ diff --git a/include/rxrpc/transport.h b/include/rxrpc/transport.h index 486371459550..7c7b9683fa39 100644 --- a/include/rxrpc/transport.h +++ b/include/rxrpc/transport.h @@ -103,6 +103,4 @@ extern int rxrpc_trans_immediate_abort(struct rxrpc_transport *trans, struct rxrpc_message *msg, int error); -extern void rxrpc_clear_transport(struct rxrpc_transport *trans); - #endif /* _LINUX_RXRPC_TRANSPORT_H */ diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c index 8383c1d581cd..cd888bcf67d7 100644 --- a/net/appletalk/aarp.c +++ b/net/appletalk/aarp.c @@ -199,7 +199,7 @@ static void aarp_send_reply(struct net_device *dev, struct atalk_addr *us, * aarp_proxy_probe_network. */ -void aarp_send_probe(struct net_device *dev, struct atalk_addr *us) +static void aarp_send_probe(struct net_device *dev, struct atalk_addr *us) { struct elapaarp *eah; int len = dev->hard_header_len + sizeof(*eah) + aarp_dl->header_length; @@ -429,7 +429,7 @@ static struct atalk_addr *__aarp_proxy_find(struct net_device *dev, * Probe a Phase 1 device or a device that requires its Net:Node to * be set via an ioctl. */ -void aarp_send_probe_phase1(struct atalk_iface *iface) +static void aarp_send_probe_phase1(struct atalk_iface *iface) { struct ifreq atreq; struct sockaddr_at *sa = (struct sockaddr_at *)&atreq.ifr_addr; diff --git a/net/appletalk/atalk_proc.c b/net/appletalk/atalk_proc.c index 4d20501fad77..dc4048dd98c1 100644 --- a/net/appletalk/atalk_proc.c +++ b/net/appletalk/atalk_proc.c @@ -205,21 +205,21 @@ out: return 0; } -struct seq_operations atalk_seq_interface_ops = { +static struct seq_operations atalk_seq_interface_ops = { .start = atalk_seq_interface_start, .next = atalk_seq_interface_next, .stop = atalk_seq_interface_stop, .show = atalk_seq_interface_show, }; -struct seq_operations atalk_seq_route_ops = { +static struct seq_operations atalk_seq_route_ops = { .start = atalk_seq_route_start, .next = atalk_seq_route_next, .stop = atalk_seq_route_stop, .show = atalk_seq_route_show, }; -struct seq_operations atalk_seq_socket_ops = { +static struct seq_operations atalk_seq_socket_ops = { .start = atalk_seq_socket_start, .next = atalk_seq_socket_next, .stop = atalk_seq_socket_stop, diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 14a7a0198313..d903b5070f06 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -612,7 +612,7 @@ out: * Called when a device is downed. Just throw away any routes * via it. */ -void atrtr_device_down(struct net_device *dev) +static void atrtr_device_down(struct net_device *dev) { struct atalk_route **r = &atalk_routes; struct atalk_route *tmp; @@ -1854,12 +1854,12 @@ static struct notifier_block ddp_notifier = { .notifier_call = ddp_device_event, }; -struct packet_type ltalk_packet_type = { +static struct packet_type ltalk_packet_type = { .type = __constant_htons(ETH_P_LOCALTALK), .func = ltalk_rcv, }; -struct packet_type ppptalk_packet_type = { +static struct packet_type ppptalk_packet_type = { .type = __constant_htons(ETH_P_PPPTALK), .func = atalk_rcv, }; diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 8d1315ffdcd0..1d7b66a457f9 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -755,7 +755,7 @@ out: return res; } -int ax25_create(struct socket *sock, int protocol) +static int ax25_create(struct socket *sock, int protocol) { struct sock *sk; ax25_cb *ax25; diff --git a/net/ax25/ax25_addr.c b/net/ax25/ax25_addr.c index f4fa6dfb846e..4db0a89c4b3b 100644 --- a/net/ax25/ax25_addr.c +++ b/net/ax25/ax25_addr.c @@ -121,26 +121,6 @@ int ax25cmp(ax25_address *a, ax25_address *b) } /* - * Compare two AX.25 digipeater paths. - */ -int ax25digicmp(ax25_digi *digi1, ax25_digi *digi2) -{ - int i; - - if (digi1->ndigi != digi2->ndigi) - return 1; - - if (digi1->lastrepeat != digi2->lastrepeat) - return 1; - - for (i = 0; i < digi1->ndigi; i++) - if (ax25cmp(&digi1->calls[i], &digi2->calls[i]) != 0) - return 1; - - return 0; -} - -/* * Given an AX.25 address pull of to, from, digi list, command/response and the start of data * */ diff --git a/net/ax25/ax25_ds_subr.c b/net/ax25/ax25_ds_subr.c index 89c07ce023f2..10ffd2beba3f 100644 --- a/net/ax25/ax25_ds_subr.c +++ b/net/ax25/ax25_ds_subr.c @@ -174,7 +174,7 @@ static int ax25_check_dama_slave(ax25_dev *ax25_dev) return res; } -void ax25_dev_dama_on(ax25_dev *ax25_dev) +static void ax25_dev_dama_on(ax25_dev *ax25_dev) { if (ax25_dev == NULL) return; diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 5a3a14a72e6d..16c4234cbe12 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -208,7 +208,7 @@ unsigned short eth_type_trans(struct sk_buff *skb, struct net_device *dev) return htons(ETH_P_802_2); } -int eth_header_parse(struct sk_buff *skb, unsigned char *haddr) +static int eth_header_parse(struct sk_buff *skb, unsigned char *haddr) { struct ethhdr *eth = eth_hdr(skb); memcpy(haddr, eth->h_source, ETH_ALEN); diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index f3721ac39144..8fe95800fe83 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -355,7 +355,10 @@ config IP_TCPDIAG default y ---help--- Support for TCP socket monitoring interface used by native Linux - tools such as ss. + tools such as ss. ss is included in iproute2, currently downloadable + at <http://developer.osdl.org/dev/iproute2>. If you want IPv6 support + and have selected IPv6 as a module, you need to build this as a + module too. If unsure, say Y. diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 56872058ebce..4ec92ccca253 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -131,7 +131,7 @@ static spinlock_t inetsw_lock = SPIN_LOCK_UNLOCKED; void inet_sock_destruct(struct sock *sk) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); __skb_queue_purge(&sk->sk_receive_queue); __skb_queue_purge(&sk->sk_error_queue); @@ -173,7 +173,7 @@ void inet_sock_destruct(struct sock *sk) static int inet_autobind(struct sock *sk) { - struct inet_opt *inet; + struct inet_sock *inet; /* We may need to bind the socket. */ lock_sock(sk); inet = inet_sk(sk); @@ -232,7 +232,7 @@ static int inet_create(struct socket *sock, int protocol) struct sock *sk; struct list_head *p; struct inet_protosw *answer; - struct inet_opt *inet; + struct inet_sock *inet; struct proto *answer_prot; unsigned char answer_flags; char answer_no_check; @@ -389,7 +389,7 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { struct sockaddr_in *addr = (struct sockaddr_in *)uaddr; struct sock *sk = sock->sk; - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); unsigned short snum; int chk_addr_ret; int err; @@ -623,7 +623,7 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer) { struct sock *sk = sock->sk; - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct sockaddr_in *sin = (struct sockaddr_in *)uaddr; sin->sin_family = AF_INET; @@ -659,7 +659,7 @@ int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, } -ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags) +static ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags) { struct sock *sk = sock->sk; @@ -1011,7 +1011,7 @@ static int __init init_ipv4_mibs(void) return 0; } -int ipv4_proc_init(void); +static int ipv4_proc_init(void); extern void ipfrag_init(void); static int __init inet_init(void) @@ -1136,7 +1136,7 @@ extern void tcp4_proc_exit(void); extern int udp4_proc_init(void); extern void udp4_proc_exit(void); -int __init ipv4_proc_init(void) +static int __init ipv4_proc_init(void) { int rc = 0; @@ -1166,7 +1166,7 @@ out_raw: } #else /* CONFIG_PROC_FS */ -int __init ipv4_proc_init(void) +static int __init ipv4_proc_init(void) { return 0; } diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 9b5d1e813652..9b4b93364164 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -213,6 +213,9 @@ int arp_mc_map(u32 addr, u8 *haddr, struct net_device *dev, int dir) case ARPHRD_IEEE802_TR: ip_tr_mc_map(addr, haddr); return 0; + case ARPHRD_INFINIBAND: + ip_ib_mc_map(addr, haddr); + return 0; default: if (dir) { memcpy(haddr, dev->broadcast, dev->addr_len); @@ -704,7 +707,7 @@ static void parp_redo(struct sk_buff *skb) * Process an arp request. */ -int arp_process(struct sk_buff *skb) +static int arp_process(struct sk_buff *skb) { struct net_device *dev = skb->dev; struct in_device *in_dev = in_dev_get(dev); @@ -961,7 +964,7 @@ out_of_mem: * Set (create) an ARP cache entry. */ -int arp_req_set(struct arpreq *r, struct net_device * dev) +static int arp_req_set(struct arpreq *r, struct net_device * dev) { u32 ip = ((struct sockaddr_in *) &r->arp_pa)->sin_addr.s_addr; struct neighbour *neigh; @@ -1075,7 +1078,7 @@ static int arp_req_get(struct arpreq *r, struct net_device *dev) return err; } -int arp_req_delete(struct arpreq *r, struct net_device * dev) +static int arp_req_delete(struct arpreq *r, struct net_device * dev) { int err; u32 ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr; @@ -1207,7 +1210,7 @@ static int arp_netdev_event(struct notifier_block *this, unsigned long event, vo return NOTIFY_DONE; } -struct notifier_block arp_netdev_notifier = { +static struct notifier_block arp_netdev_notifier = { .notifier_call = arp_netdev_event, }; diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index f84e1041d2d9..b1db561f2542 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -22,7 +22,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct sockaddr_in *usin = (struct sockaddr_in *) uaddr; struct rtable *rt; u32 saddr; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index cb697aba93a2..7b6054d49271 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -380,7 +380,7 @@ struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, u32 prefix, return NULL; } -int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { struct rtattr **rta = arg; struct in_device *in_dev; @@ -412,7 +412,7 @@ out: return -EADDRNOTAVAIL; } -int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { struct rtattr **rta = arg; struct net_device *dev; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 4ccd02f535f7..563e7d612706 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -75,7 +75,7 @@ struct fib_table *__fib_new_table(int id) #endif /* CONFIG_IP_MULTIPLE_TABLES */ -void fib_flush(void) +static void fib_flush(void) { int flushed = 0; #ifdef CONFIG_IP_MULTIPLE_TABLES @@ -585,11 +585,11 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo return NOTIFY_DONE; } -struct notifier_block fib_inetaddr_notifier = { +static struct notifier_block fib_inetaddr_notifier = { .notifier_call =fib_inetaddr_event, }; -struct notifier_block fib_netdev_notifier = { +static struct notifier_block fib_netdev_notifier = { .notifier_call =fib_netdev_event, }; diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index e4b3af075657..f6675eef21cf 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -245,12 +245,6 @@ int inet_rtm_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) return 0; } -u32 fib_rules_map_destination(u32 daddr, struct fib_result *res) -{ - u32 mask = inet_make_mask(res->prefixlen); - return (daddr&~mask)|res->fi->fib_nh->nh_gw; -} - #ifdef CONFIG_NET_CLS_ROUTE u32 fib_rules_tclass(struct fib_result *res) { @@ -368,7 +362,7 @@ static int fib_rules_event(struct notifier_block *this, unsigned long event, voi } -struct notifier_block fib_rules_notifier = { +static struct notifier_block fib_rules_notifier = { .notifier_call =fib_rules_event, }; diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 74089ab9f74e..dc2e14faedb8 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -327,8 +327,8 @@ static void icmp_out_count(int type) * Checksum each fragment, and on the first include the headers and final * checksum. */ -int icmp_glue_bits(void *from, char *to, int offset, int len, int odd, - struct sk_buff *skb) +static int icmp_glue_bits(void *from, char *to, int offset, int len, int odd, + struct sk_buff *skb) { struct icmp_bxm *icmp_param = (struct icmp_bxm *)from; unsigned int csum; @@ -377,7 +377,7 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param, static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) { struct sock *sk = icmp_socket->sk; - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct ipcm_cookie ipc; struct rtable *rt = (struct rtable *)skb->dst; u32 daddr; @@ -1097,7 +1097,7 @@ static struct icmp_control icmp_pointers[NR_ICMP_TYPES + 1] = { void __init icmp_init(struct net_proto_family *ops) { - struct inet_opt *inet; + struct inet_sock *inet; int i; for (i = 0; i < NR_CPUS; i++) { diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index fc0c27f25f6a..1f3183168a90 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -143,8 +143,8 @@ static int sf_setstate(struct ip_mc_list *pmc); static void sf_markstate(struct ip_mc_list *pmc); #endif static void ip_mc_clear_src(struct ip_mc_list *pmc); -int ip_mc_add_src(struct in_device *in_dev, __u32 *pmca, int sfmode, - int sfcount, __u32 *psfsrc, int delta); +static int ip_mc_add_src(struct in_device *in_dev, __u32 *pmca, int sfmode, + int sfcount, __u32 *psfsrc, int delta); static void ip_ma_put(struct ip_mc_list *im) { @@ -1384,8 +1384,8 @@ static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode, #define igmp_ifc_event(x) do { } while (0) #endif -int ip_mc_del_src(struct in_device *in_dev, __u32 *pmca, int sfmode, - int sfcount, __u32 *psfsrc, int delta) +static int ip_mc_del_src(struct in_device *in_dev, __u32 *pmca, int sfmode, + int sfcount, __u32 *psfsrc, int delta) { struct ip_mc_list *pmc; int changerec = 0; @@ -1520,8 +1520,8 @@ static int sf_setstate(struct ip_mc_list *pmc) /* * Add multicast source filter list to the interface list */ -int ip_mc_add_src(struct in_device *in_dev, __u32 *pmca, int sfmode, - int sfcount, __u32 *psfsrc, int delta) +static int ip_mc_add_src(struct in_device *in_dev, __u32 *pmca, int sfmode, + int sfcount, __u32 *psfsrc, int delta) { struct ip_mc_list *pmc; int isexclude; @@ -1617,7 +1617,7 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr) u32 addr = imr->imr_multiaddr.s_addr; struct ip_mc_socklist *iml, *i; struct in_device *in_dev; - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); int count = 0; if (!MULTICAST(addr)) @@ -1667,8 +1667,8 @@ done: return err; } -int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml, - struct in_device *in_dev) +static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml, + struct in_device *in_dev) { int err; @@ -1691,7 +1691,7 @@ int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml, int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct ip_mc_socklist *iml, **imlp; rtnl_lock(); @@ -1734,7 +1734,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct u32 addr = mreqs->imr_multiaddr; struct ip_mc_socklist *pmc; struct in_device *in_dev = NULL; - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct ip_sf_socklist *psl; int i, j, rv; @@ -1852,7 +1852,7 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex) u32 addr = msf->imsf_multiaddr; struct ip_mc_socklist *pmc; struct in_device *in_dev; - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct ip_sf_socklist *newpsl, *psl; if (!MULTICAST(addr)) @@ -1922,7 +1922,7 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf, u32 addr = msf->imsf_multiaddr; struct ip_mc_socklist *pmc; struct in_device *in_dev; - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct ip_sf_socklist *psl; if (!MULTICAST(addr)) @@ -1980,7 +1980,7 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf, struct sockaddr_in *psin; u32 addr; struct ip_mc_socklist *pmc; - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct ip_sf_socklist *psl; psin = (struct sockaddr_in *)&gsf->gf_group; @@ -2033,7 +2033,7 @@ done: */ int ip_mc_sf_allow(struct sock *sk, u32 loc_addr, u32 rmt_addr, int dif) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct ip_mc_socklist *pmc; struct ip_sf_socklist *psl; int i; @@ -2069,7 +2069,7 @@ int ip_mc_sf_allow(struct sock *sk, u32 loc_addr, u32 rmt_addr, int dif) void ip_mc_drop_socket(struct sock *sk) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct ip_mc_socklist *iml; if (inet->mc_list == NULL) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 2c768ecc0a84..ce1e88c4c248 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -304,7 +304,7 @@ static void ipgre_tunnel_uninit(struct net_device *dev) } -void ipgre_err(struct sk_buff *skb, u32 info) +static void ipgre_err(struct sk_buff *skb, u32 info) { #ifndef I_WISH_WORLD_WERE_PERFECT @@ -552,7 +552,7 @@ ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb) return INET_ECN_encapsulate(tos, inner); } -int ipgre_rcv(struct sk_buff *skb) +static int ipgre_rcv(struct sk_buff *skb) { struct iphdr *iph; u8 *h; @@ -1279,7 +1279,7 @@ err1: goto out; } -void ipgre_fini(void) +static void ipgre_fini(void) { if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) printk(KERN_INFO "ipgre close: can't remove protocol\n"); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index d223d1251c1f..74f840473d70 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -115,7 +115,7 @@ static int ip_dev_loopback_xmit(struct sk_buff *newskb) return 0; } -static inline int ip_select_ttl(struct inet_opt *inet, struct dst_entry *dst) +static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst) { int ttl = inet->uc_ttl; @@ -131,7 +131,7 @@ static inline int ip_select_ttl(struct inet_opt *inet, struct dst_entry *dst) int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, u32 saddr, u32 daddr, struct ip_options *opt) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct rtable *rt = (struct rtable *)skb->dst; struct iphdr *iph; @@ -297,7 +297,7 @@ int ip_output(struct sk_buff *skb) int ip_queue_xmit(struct sk_buff *skb, int ipfragok) { struct sock *sk = skb->sk; - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct ip_options *opt = inet->opt; struct rtable *rt; struct iphdr *iph; @@ -712,7 +712,7 @@ int ip_append_data(struct sock *sk, struct ipcm_cookie *ipc, struct rtable *rt, unsigned int flags) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct sk_buff *skb; struct ip_options *opt = NULL; @@ -973,7 +973,7 @@ error: ssize_t ip_append_page(struct sock *sk, struct page *page, int offset, size_t size, int flags) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct sk_buff *skb; struct rtable *rt; struct ip_options *opt = NULL; @@ -1112,7 +1112,7 @@ int ip_push_pending_frames(struct sock *sk) { struct sk_buff *skb, *tmp_skb; struct sk_buff **tail_skb; - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct ip_options *opt = NULL; struct rtable *rt = inet->cork.rt; struct iphdr *iph; @@ -1217,7 +1217,7 @@ error: */ void ip_flush_pending_frames(struct sock *sk) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct sk_buff *skb; while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) @@ -1260,7 +1260,7 @@ static int ip_reply_glue_bits(void *dptr, char *to, int offset, void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg, unsigned int len) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct { struct ip_options opt; char data[40]; diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index d352252326c1..ee4e03628465 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -92,7 +92,7 @@ static void ip_cmsg_recv_opts(struct msghdr *msg, struct sk_buff *skb) } -void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb) +static void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb) { unsigned char optbuf[sizeof(struct ip_options) + 40]; struct ip_options * opt = (struct ip_options*)optbuf; @@ -112,7 +112,7 @@ void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb) void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb) { - struct inet_opt *inet = inet_sk(skb->sk); + struct inet_sock *inet = inet_sk(skb->sk); unsigned flags = inet->cmsg_flags; /* Ordered by supposed usage frequency */ @@ -234,7 +234,7 @@ int ip_ra_control(struct sock *sk, unsigned char on, void (*destructor)(struct s void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err, u16 port, u32 info, u8 *payload) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct sock_exterr_skb *serr; if (!inet->recverr) @@ -263,7 +263,7 @@ void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err, void ip_local_error(struct sock *sk, int err, u32 daddr, u16 port, u32 info) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct sock_exterr_skb *serr; struct iphdr *iph; struct sk_buff *skb; @@ -342,7 +342,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len) sin = &errhdr.offender; sin->sin_family = AF_UNSPEC; if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); sin->sin_family = AF_INET; sin->sin_addr.s_addr = skb->nh.iph->saddr; @@ -383,7 +383,7 @@ out: int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval, int optlen) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); int val=0,err; if (level != SOL_IP) @@ -875,7 +875,7 @@ e_inval: int ip_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); int val; int len; diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index f1c60e44a8da..86ade3d3e970 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -109,7 +109,7 @@ */ int ic_set_manually __initdata = 0; /* IPconfig parameters set manually */ -int ic_enable __initdata = 0; /* IP config enabled? */ +static int ic_enable __initdata = 0; /* IP config enabled? */ /* Protocol choice */ int ic_proto_enabled __initdata = 0 @@ -124,10 +124,10 @@ int ic_proto_enabled __initdata = 0 #endif ; -int ic_host_name_set __initdata = 0; /* Host name set by us? */ +static int ic_host_name_set __initdata = 0; /* Host name set by us? */ u32 ic_myaddr = INADDR_NONE; /* My IP address */ -u32 ic_netmask = INADDR_NONE; /* Netmask for local subnet */ +static u32 ic_netmask = INADDR_NONE; /* Netmask for local subnet */ u32 ic_gateway = INADDR_NONE; /* Gateway IP address */ u32 ic_servaddr = INADDR_NONE; /* Boot server IP address */ @@ -137,9 +137,9 @@ u8 root_server_path[256] = { 0, }; /* Path to mount as root */ /* Persistent data: */ -int ic_proto_used; /* Protocol used, if any */ -u32 ic_nameservers[CONF_NAMESERVERS_MAX]; /* DNS Server IP addresses */ -u8 ic_domain[64]; /* DNS (not NIS) domain name */ +static int ic_proto_used; /* Protocol used, if any */ +static u32 ic_nameservers[CONF_NAMESERVERS_MAX]; /* DNS Server IP addresses */ +static u8 ic_domain[64]; /* DNS (not NIS) domain name */ /* * Private state. diff --git a/net/ipv4/ipvs/ip_vs_app.c b/net/ipv4/ipvs/ip_vs_app.c index 9c00cf358d0d..d9212addd193 100644 --- a/net/ipv4/ipvs/ip_vs_app.c +++ b/net/ipv4/ipvs/ip_vs_app.c @@ -65,7 +65,7 @@ static inline void ip_vs_app_put(struct ip_vs_app *app) /* * Allocate/initialize app incarnation and register it in proto apps. */ -int +static int ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port) { struct ip_vs_protocol *pp; diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c index 95f4568f8afa..fd6feb5499fe 100644 --- a/net/ipv4/ipvs/ip_vs_conn.c +++ b/net/ipv4/ipvs/ip_vs_conn.c @@ -64,7 +64,7 @@ struct ip_vs_aligned_lock } __attribute__((__aligned__(SMP_CACHE_BYTES))); /* lock array for conn table */ -struct ip_vs_aligned_lock +static struct ip_vs_aligned_lock __ip_vs_conntbl_lock_array[CT_LOCKARRAY_SIZE] __cacheline_aligned; static inline void ct_read_lock(unsigned key) diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c index 47388b29affd..dc00cccca423 100644 --- a/net/ipv4/ipvs/ip_vs_ctl.c +++ b/net/ipv4/ipvs/ip_vs_ctl.c @@ -62,7 +62,7 @@ static spinlock_t __ip_vs_droppacket_lock = SPIN_LOCK_UNLOCKED; /* 1/rate drop and drop-entry variables */ int ip_vs_drop_rate = 0; int ip_vs_drop_counter = 0; -atomic_t ip_vs_dropentry = ATOMIC_INIT(0); +static atomic_t ip_vs_dropentry = ATOMIC_INIT(0); /* number of virtual services */ static int ip_vs_num_services = 0; diff --git a/net/ipv4/ipvs/ip_vs_proto.c b/net/ipv4/ipvs/ip_vs_proto.c index dfd0a7dd3b75..253c46252bd5 100644 --- a/net/ipv4/ipvs/ip_vs_proto.c +++ b/net/ipv4/ipvs/ip_vs_proto.c @@ -45,7 +45,7 @@ static struct ip_vs_protocol *ip_vs_proto_table[IP_VS_PROTO_TAB_SIZE]; /* * register an ipvs protocol */ -int register_ip_vs_protocol(struct ip_vs_protocol *pp) +static int register_ip_vs_protocol(struct ip_vs_protocol *pp) { unsigned hash = IP_VS_PROTO_HASH(pp->protocol); @@ -62,7 +62,7 @@ int register_ip_vs_protocol(struct ip_vs_protocol *pp) /* * unregister an ipvs protocol */ -int unregister_ip_vs_protocol(struct ip_vs_protocol *pp) +static int unregister_ip_vs_protocol(struct ip_vs_protocol *pp) { struct ip_vs_protocol **pp_p; unsigned hash = IP_VS_PROTO_HASH(pp->protocol); diff --git a/net/ipv4/ipvs/ip_vs_proto_icmp.c b/net/ipv4/ipvs/ip_vs_proto_icmp.c index 747e0333f5de..191e94aa1c1f 100644 --- a/net/ipv4/ipvs/ip_vs_proto_icmp.c +++ b/net/ipv4/ipvs/ip_vs_proto_icmp.c @@ -22,7 +22,7 @@ static int icmp_timeouts[1] = { 1*60*HZ }; static char * icmp_state_name_table[1] = { "ICMP" }; -struct ip_vs_conn * +static struct ip_vs_conn * icmp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, const struct iphdr *iph, @@ -49,7 +49,7 @@ icmp_conn_in_get(const struct sk_buff *skb, #endif } -struct ip_vs_conn * +static struct ip_vs_conn * icmp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, const struct iphdr *iph, diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c index 45ab05a7f4d5..a7389993318c 100644 --- a/net/ipv4/ipvs/ip_vs_sync.c +++ b/net/ipv4/ipvs/ip_vs_sync.c @@ -343,7 +343,7 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen) */ static void set_mcast_loop(struct sock *sk, u_char loop) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); /* setsockopt(sock, SOL_IP, IP_MULTICAST_LOOP, &loop, sizeof(loop)); */ lock_sock(sk); @@ -356,7 +356,7 @@ static void set_mcast_loop(struct sock *sk, u_char loop) */ static void set_mcast_ttl(struct sock *sk, u_char ttl) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); /* setsockopt(sock, SOL_IP, IP_MULTICAST_TTL, &ttl, sizeof(ttl)); */ lock_sock(sk); @@ -370,7 +370,7 @@ static void set_mcast_ttl(struct sock *sk, u_char ttl) static int set_mcast_if(struct sock *sk, char *ifname) { struct net_device *dev; - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); if ((dev = __dev_get_by_name(ifname)) == NULL) return -ENODEV; diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index d14a4e342de3..5f859e657576 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c @@ -1229,7 +1229,7 @@ ip_ct_selective_cleanup(int (*kill)(const struct ip_conntrack *i, void *data), static int getorigdst(struct sock *sk, int optval, void __user *user, int *len) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct ip_conntrack_tuple_hash *h; struct ip_conntrack_tuple tuple; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 2c6dd971b10a..c6294ee6f42e 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -109,7 +109,7 @@ struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num, struct hlist_node *node; sk_for_each_from(sk, node) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); if (inet->num == num && !(inet->daddr && inet->daddr != raddr) && @@ -181,7 +181,7 @@ out: void raw_err (struct sock *sk, struct sk_buff *skb, u32 info) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); int type = skb->h.icmph->type; int code = skb->h.icmph->code; int err = 0; @@ -263,7 +263,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, int length, struct rtable *rt, unsigned int flags) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); int hh_len; struct iphdr *iph; struct sk_buff *skb; @@ -374,7 +374,7 @@ static void raw_probe_proto_opt(struct flowi *fl, struct msghdr *msg) static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct ipcm_cookie ipc; struct rtable *rt = NULL; int free = 0; @@ -537,7 +537,7 @@ static void raw_close(struct sock *sk, long timeout) /* This gets rid of all the nasties in af_inet. -DaveM */ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct sockaddr_in *addr = (struct sockaddr_in *) uaddr; int ret = -EINVAL; int chk_addr_ret; @@ -562,10 +562,10 @@ out: return ret; * we return it, otherwise we block. */ -int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len, int noblock, int flags, int *addr_len) +static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, + size_t len, int noblock, int flags, int *addr_len) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); size_t copied = 0; int err = -EOPNOTSUPP; struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; @@ -802,7 +802,7 @@ static void raw_seq_stop(struct seq_file *seq, void *v) static __inline__ char *get_raw_sock(struct sock *sp, char *tmpbuf, int i) { - struct inet_opt *inet = inet_sk(sp); + struct inet_sock *inet = inet_sk(sp); unsigned int dest = inet->daddr, src = inet->rcv_saddr; __u16 destp = 0, diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 4de83141d5bc..9c438efe1249 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -108,22 +108,22 @@ #define RT_GC_TIMEOUT (300*HZ) -int ip_rt_min_delay = 2 * HZ; -int ip_rt_max_delay = 10 * HZ; -int ip_rt_max_size; -int ip_rt_gc_timeout = RT_GC_TIMEOUT; -int ip_rt_gc_interval = 60 * HZ; -int ip_rt_gc_min_interval = HZ / 2; -int ip_rt_redirect_number = 9; -int ip_rt_redirect_load = HZ / 50; -int ip_rt_redirect_silence = ((HZ / 50) << (9 + 1)); -int ip_rt_error_cost = HZ; -int ip_rt_error_burst = 5 * HZ; -int ip_rt_gc_elasticity = 8; -int ip_rt_mtu_expires = 10 * 60 * HZ; -int ip_rt_min_pmtu = 512 + 20 + 20; -int ip_rt_min_advmss = 256; -int ip_rt_secret_interval = 10 * 60 * HZ; +static int ip_rt_min_delay = 2 * HZ; +static int ip_rt_max_delay = 10 * HZ; +static int ip_rt_max_size; +static int ip_rt_gc_timeout = RT_GC_TIMEOUT; +static int ip_rt_gc_interval = 60 * HZ; +static int ip_rt_gc_min_interval = HZ / 2; +static int ip_rt_redirect_number = 9; +static int ip_rt_redirect_load = HZ / 50; +static int ip_rt_redirect_silence = ((HZ / 50) << (9 + 1)); +static int ip_rt_error_cost = HZ; +static int ip_rt_error_burst = 5 * HZ; +static int ip_rt_gc_elasticity = 8; +static int ip_rt_mtu_expires = 10 * 60 * HZ; +static int ip_rt_min_pmtu = 512 + 20 + 20; +static int ip_rt_min_advmss = 256; +static int ip_rt_secret_interval = 10 * 60 * HZ; static unsigned long rt_deadline; #define RTprint(a...) printk(KERN_DEBUG a) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 4c10aa8c3b8d..f1bff9647a2b 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -460,7 +460,7 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) int tcp_listen_start(struct sock *sk) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct tcp_opt *tp = tcp_sk(sk); struct tcp_listen_opt *lopt; @@ -1772,7 +1772,7 @@ static inline int tcp_need_reset(int state) int tcp_disconnect(struct sock *sk, int flags) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct tcp_opt *tp = tcp_sk(sk); int err = 0; int old_state = sk->sk_state; diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index be78fa284c0d..cbaa439ee83c 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -55,7 +55,7 @@ static struct sock *tcpnl; static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk, int ext, u32 pid, u32 seq, u16 nlmsg_flags) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct tcp_opt *tp = tcp_sk(sk); struct tcpdiagmsg *r; struct nlmsghdr *nlh; @@ -427,7 +427,7 @@ static int tcpdiag_dump_sock(struct sk_buff *skb, struct sock *sk, if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) { struct tcpdiag_entry entry; struct rtattr *bc = (struct rtattr *)(r + 1); - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); entry.family = sk->sk_family; #ifdef CONFIG_IP_TCPDIAG_IPV6 @@ -458,7 +458,7 @@ static int tcpdiag_fill_req(struct sk_buff *skb, struct sock *sk, struct open_request *req, u32 pid, u32 seq) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); unsigned char *b = skb->tail; struct tcpdiagmsg *r; struct nlmsghdr *nlh; @@ -515,7 +515,7 @@ static int tcpdiag_dump_reqs(struct sk_buff *skb, struct sock *sk, struct tcp_opt *tp = tcp_sk(sk); struct tcp_listen_opt *lopt; struct rtattr *bc = NULL; - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); int j, s_j; int reqnum, s_reqnum; int err = 0; @@ -609,7 +609,7 @@ static int tcpdiag_dump(struct sk_buff *skb, struct netlink_callback *cb) num = 0; sk_for_each(sk, node, &tcp_listening_hash[i]) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); if (num < s_num) { num++; @@ -670,7 +670,7 @@ skip_listen_ht: num = 0; sk_for_each(sk, node, &head->chain) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); if (num < s_num) goto next_normal; @@ -692,7 +692,7 @@ next_normal: if (r->tcpdiag_states&TCPF_TIME_WAIT) { sk_for_each(sk, node, &tcp_ehash[i + tcp_ehash_size].chain) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); if (num < s_num) goto next_dying; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a0f598569886..15c906067266 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1647,7 +1647,7 @@ static __inline__ int tcp_packet_delayed(struct tcp_opt *tp) #if FASTRETRANS_DEBUG > 1 static void DBGUNDO(struct sock *sk, struct tcp_opt *tp, const char *msg) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); printk(KERN_DEBUG "Undo %s %u.%u.%u.%u/%u c%u l%u ss%u/%u p%u\n", msg, NIPQUAD(inet->daddr), ntohs(inet->dport), @@ -2786,7 +2786,7 @@ static void westwood_update_window(struct sock *sk, __u32 now) * straight forward and doesn't need any particular care. */ -void __tcp_westwood_fast_bw(struct sock *sk, struct sk_buff *skb) +static void __tcp_westwood_fast_bw(struct sock *sk, struct sk_buff *skb) { struct tcp_opt *tp = tcp_sk(sk); @@ -2797,6 +2797,12 @@ void __tcp_westwood_fast_bw(struct sock *sk, struct sk_buff *skb) tp->westwood.rtt_min = westwood_update_rttmin(sk); } +static inline void tcp_westwood_fast_bw(struct sock *sk, struct sk_buff *skb) +{ + if (tcp_is_westwood(tcp_sk(sk))) + __tcp_westwood_fast_bw(sk, skb); +} + /* * @westwood_dupack_update @@ -2867,7 +2873,7 @@ static inline __u32 westwood_acked_count(struct sock *sk) * dupack. But we need to be careful in such case. */ -void __tcp_westwood_slow_bw(struct sock *sk, struct sk_buff *skb) +static void __tcp_westwood_slow_bw(struct sock *sk, struct sk_buff *skb) { struct tcp_opt *tp = tcp_sk(sk); @@ -2877,6 +2883,12 @@ void __tcp_westwood_slow_bw(struct sock *sk, struct sk_buff *skb) tp->westwood.rtt_min = westwood_update_rttmin(sk); } +static inline void tcp_westwood_slow_bw(struct sock *sk, struct sk_buff *skb) +{ + if (tcp_is_westwood(tcp_sk(sk))) + __tcp_westwood_slow_bw(sk, skb); +} + /* This routine deals with incoming acks, but not outgoing ones. */ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) { diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 3062579ef720..6c5b44af65b2 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -115,7 +115,7 @@ static __inline__ int tcp_hashfn(__u32 laddr, __u16 lport, static __inline__ int tcp_sk_hashfn(struct sock *sk) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); __u32 laddr = inet->rcv_saddr; __u16 lport = inet->num; __u32 faddr = inet->daddr; @@ -300,7 +300,7 @@ fail: */ static void __tcp_put_port(struct sock *sk) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct tcp_bind_hashbucket *head = &tcp_bhash[tcp_bhashfn(inet->num)]; struct tcp_bind_bucket *tb; @@ -420,7 +420,7 @@ static struct sock *__tcp_v4_lookup_listener(struct hlist_head *head, u32 daddr, hiscore=-1; sk_for_each(sk, node, head) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); if (inet->num == hnum && !ipv6_only_sock(sk)) { __u32 rcv_saddr = inet->rcv_saddr; @@ -457,7 +457,7 @@ static inline struct sock *tcp_v4_lookup_listener(u32 daddr, read_lock(&tcp_lhash_lock); head = &tcp_listening_hash[tcp_lhashfn(hnum)]; if (!hlist_empty(head)) { - struct inet_opt *inet = inet_sk((sk = __sk_head(head))); + struct inet_sock *inet = inet_sk((sk = __sk_head(head))); if (inet->num == hnum && !sk->sk_node.next && (!inet->rcv_saddr || inet->rcv_saddr == daddr) && @@ -549,7 +549,7 @@ static inline __u32 tcp_v4_init_sequence(struct sock *sk, struct sk_buff *skb) static int __tcp_v4_check_established(struct sock *sk, __u16 lport, struct tcp_tw_bucket **twp) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); u32 daddr = inet->rcv_saddr; u32 saddr = inet->daddr; int dif = sk->sk_bound_dev_if; @@ -638,7 +638,7 @@ not_unique: static inline u32 connect_port_offset(const struct sock *sk) { - const struct inet_opt *inet = inet_sk(sk); + const struct inet_sock *inet = inet_sk(sk); return secure_tcp_port_ephemeral(inet->rcv_saddr, inet->daddr, inet->dport); @@ -743,7 +743,7 @@ out: /* This will initiate an outgoing connection. */ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct tcp_opt *tp = tcp_sk(sk); struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; struct rtable *rt; @@ -917,7 +917,7 @@ static inline void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu) { struct dst_entry *dst; - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct tcp_opt *tp = tcp_sk(sk); /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs @@ -980,7 +980,7 @@ void tcp_v4_err(struct sk_buff *skb, u32 info) struct iphdr *iph = (struct iphdr *)skb->data; struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2)); struct tcp_opt *tp; - struct inet_opt *inet; + struct inet_sock *inet; int type = skb->h.icmph->type; int code = skb->h.icmph->code; struct sock *sk; @@ -1127,7 +1127,7 @@ out: void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len, struct sk_buff *skb) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); if (skb->ip_summed == CHECKSUM_HW) { th->check = ~tcp_v4_check(th, len, inet->saddr, inet->daddr, 0); @@ -1549,7 +1549,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, struct open_request *req, struct dst_entry *dst) { - struct inet_opt *newinet; + struct inet_sock *newinet; struct tcp_opt *newtp; struct sock *newsk; @@ -1856,7 +1856,7 @@ static void __tcp_v4_rehash(struct sock *sk) static int tcp_v4_reselect_saddr(struct sock *sk) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); int err; struct rtable *rt; __u32 old_saddr = inet->saddr; @@ -1907,7 +1907,7 @@ static int tcp_v4_reselect_saddr(struct sock *sk) int tcp_v4_rebuild_header(struct sock *sk) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct rtable *rt = (struct rtable *)__sk_dst_check(sk, 0); u32 daddr; int err; @@ -1956,7 +1956,7 @@ int tcp_v4_rebuild_header(struct sock *sk) static void v4_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr) { struct sockaddr_in *sin = (struct sockaddr_in *) uaddr; - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); sin->sin_family = AF_INET; sin->sin_addr.s_addr = inet->daddr; @@ -1971,7 +1971,7 @@ static void v4_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr) int tcp_v4_remember_stamp(struct sock *sk) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct tcp_opt *tp = tcp_sk(sk); struct rtable *rt = (struct rtable *)__sk_dst_get(sk); struct inet_peer *peer = NULL; @@ -2474,7 +2474,7 @@ static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i) int timer_active; unsigned long timer_expires; struct tcp_opt *tp = tcp_sk(sp); - struct inet_opt *inet = inet_sk(sp); + struct inet_sock *inet = inet_sk(sp); unsigned int dest = inet->daddr; unsigned int src = inet->rcv_saddr; __u16 destp = ntohs(inet->dport); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index d5b6ea321305..1e0970332a21 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -41,6 +41,8 @@ int sysctl_tcp_max_tw_buckets = NR_FILE*2; int sysctl_tcp_syncookies = SYNC_INIT; int sysctl_tcp_abort_on_overflow; +static void tcp_tw_schedule(struct tcp_tw_bucket *tw, int timeo); + static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win) { if (seq == s_win) @@ -337,7 +339,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) tw = kmem_cache_alloc(tcp_timewait_cachep, SLAB_ATOMIC); if(tw != NULL) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); int rto = (tp->rto<<2) - (tp->rto>>1); /* Give us an identity. */ @@ -551,7 +553,7 @@ static struct timer_list tcp_twcal_timer = TIMER_INITIALIZER(tcp_twcal_tick, 0, 0); static struct hlist_head tcp_twcal_row[TCP_TW_RECYCLE_SLOTS]; -void tcp_tw_schedule(struct tcp_tw_bucket *tw, int timeo) +static void tcp_tw_schedule(struct tcp_tw_bucket *tw, int timeo) { struct hlist_head *list; int slot; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 0b08a3499b29..6c6fcf22bf79 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -266,7 +266,7 @@ static __inline__ u16 tcp_select_window(struct sock *sk) static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb) { if (skb != NULL) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct tcp_opt *tp = tcp_sk(sk); struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); int tcp_header_size = tp->tcp_header_len; diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 3b4d82176a84..48fca593c24e 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -332,7 +332,7 @@ static void tcp_retransmit_timer(struct sock *sk) */ #ifdef TCP_DEBUG if (net_ratelimit()) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); printk(KERN_DEBUG "TCP: Treason uncloaked! Peer %u.%u.%u.%u:%u/%u shrinks window %u:%u. Repaired.\n", NIPQUAD(inet->daddr), htons(inet->dport), inet->num, tp->snd_una, tp->snd_nxt); @@ -653,6 +653,3 @@ EXPORT_SYMBOL(tcp_clear_xmit_timers); EXPORT_SYMBOL(tcp_delete_keepalive_timer); EXPORT_SYMBOL(tcp_init_xmit_timers); EXPORT_SYMBOL(tcp_reset_keepalive_timer); -#ifdef TCP_DEBUG -EXPORT_SYMBOL(tcp_timer_bug_msg); -#endif diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 4a1bfb39c4b0..078db102c0a6 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -124,7 +124,7 @@ static int udp_v4_get_port(struct sock *sk, unsigned short snum) { struct hlist_node *node; struct sock *sk2; - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); write_lock_bh(&udp_hash_lock); if (snum == 0) { @@ -171,7 +171,7 @@ gotit: } else { sk_for_each(sk2, node, &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]) { - struct inet_opt *inet2 = inet_sk(sk2); + struct inet_sock *inet2 = inet_sk(sk2); if (inet2->num == snum && sk2 != sk && @@ -219,7 +219,8 @@ static void udp_v4_unhash(struct sock *sk) /* UDP is nearly always wildcards out the wazoo, it makes no sense to try * harder than this. -DaveM */ -struct sock *udp_v4_lookup_longway(u32 saddr, u16 sport, u32 daddr, u16 dport, int dif) +static struct sock *udp_v4_lookup_longway(u32 saddr, u16 sport, + u32 daddr, u16 dport, int dif) { struct sock *sk, *result = NULL; struct hlist_node *node; @@ -227,7 +228,7 @@ struct sock *udp_v4_lookup_longway(u32 saddr, u16 sport, u32 daddr, u16 dport, i int badness = -1; sk_for_each(sk, node, &udp_hash[hnum & (UDP_HTABLE_SIZE - 1)]) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); if (inet->num == hnum && !ipv6_only_sock(sk)) { int score = (sk->sk_family == PF_INET ? 1 : 0); @@ -263,7 +264,8 @@ struct sock *udp_v4_lookup_longway(u32 saddr, u16 sport, u32 daddr, u16 dport, i return result; } -__inline__ struct sock *udp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport, int dif) +static __inline__ struct sock *udp_v4_lookup(u32 saddr, u16 sport, + u32 daddr, u16 dport, int dif) { struct sock *sk; @@ -285,7 +287,7 @@ static inline struct sock *udp_v4_mcast_next(struct sock *sk, unsigned short hnum = ntohs(loc_port); sk_for_each_from(s, node) { - struct inet_opt *inet = inet_sk(s); + struct inet_sock *inet = inet_sk(s); if (inet->num != hnum || (inet->daddr && inet->daddr != rmt_addr) || @@ -316,7 +318,7 @@ found: void udp_err(struct sk_buff *skb, u32 info) { - struct inet_opt *inet; + struct inet_sock *inet; struct iphdr *iph = (struct iphdr*)skb->data; struct udphdr *uh = (struct udphdr*)(skb->data+(iph->ihl<<2)); int type = skb->h.icmph->type; @@ -398,7 +400,7 @@ static void udp_flush_pending_frames(struct sock *sk) */ static int udp_push_pending_frames(struct sock *sk, struct udp_opt *up) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct flowi *fl = &inet->cork.fl; struct sk_buff *skb; struct udphdr *uh; @@ -480,7 +482,7 @@ static unsigned short udp_check(struct udphdr *uh, int len, unsigned long saddr, int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct udp_opt *up = udp_sk(sk); int ulen = len; struct ipcm_cookie ipc; @@ -667,7 +669,8 @@ do_confirm: goto out; } -int udp_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags) +static int udp_sendpage(struct sock *sk, struct page *page, int offset, + size_t size, int flags) { struct udp_opt *up = udp_sk(sk); int ret; @@ -770,10 +773,10 @@ static __inline__ int udp_checksum_complete(struct sk_buff *skb) * return it, otherwise we block. */ -int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len, int noblock, int flags, int *addr_len) +static int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, + size_t len, int noblock, int flags, int *addr_len) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; struct sk_buff *skb; int copied, err; @@ -864,7 +867,7 @@ csum_copy_err: int udp_disconnect(struct sock *sk, int flags) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); /* * 1003.1g - break association. */ @@ -1503,7 +1506,7 @@ void udp_proc_unregister(struct udp_seq_afinfo *afinfo) /* ------------------------------------------------------------------------ */ static void udp4_format_sock(struct sock *sp, char *tmpbuf, int bucket) { - struct inet_opt *inet = inet_sk(sp); + struct inet_sock *inet = inet_sk(sp); unsigned int dest = inet->daddr; unsigned int src = inet->rcv_saddr; __u16 destp = ntohs(inet->dport); diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 3ce69883bcc4..012ef304ce63 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -279,10 +279,3 @@ void __init xfrm4_init(void) xfrm4_policy_init(); } -void __exit xfrm4_fini(void) -{ - //xfrm4_input_fini(); - xfrm4_policy_fini(); - xfrm4_state_fini(); -} - diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index e1003256c641..5fa3fdb890e4 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -48,6 +48,7 @@ #include <linux/netdevice.h> #include <linux/if_arp.h> #include <linux/if_arcnet.h> +#include <linux/if_infiniband.h> #include <linux/route.h> #include <linux/inetdevice.h> #include <linux/init.h> @@ -1095,6 +1096,12 @@ static int ipv6_generate_eui64(u8 *eui, struct net_device *dev) memset(eui, 0, 7); eui[7] = *(u8*)dev->dev_addr; return 0; + case ARPHRD_INFINIBAND: + if (dev->addr_len != INFINIBAND_ALEN) + return -1; + memcpy(eui, dev->dev_addr + 12, 8); + eui[0] |= 2; + return 0; } return -1; } @@ -1794,7 +1801,8 @@ static void addrconf_dev_config(struct net_device *dev) if ((dev->type != ARPHRD_ETHER) && (dev->type != ARPHRD_FDDI) && (dev->type != ARPHRD_IEEE802_TR) && - (dev->type != ARPHRD_ARCNET)) { + (dev->type != ARPHRD_ARCNET) && + (dev->type != ARPHRD_INFINIBAND)) { /* Alas, we support only Ethernet autoconfiguration. */ return; } diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index fac7d9cdf238..5ca3af45cfeb 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -114,10 +114,9 @@ static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk) static int inet6_create(struct socket *sock, int protocol) { - struct inet_opt *inet; + struct inet_sock *inet; struct ipv6_pinfo *np; struct sock *sk; - struct tcp6_sock* tcp6sk; struct list_head *p; struct inet_protosw *answer; struct proto *answer_prot; @@ -196,8 +195,7 @@ static int inet6_create(struct socket *sock, int protocol) sk->sk_backlog_rcv = answer->prot->backlog_rcv; - tcp6sk = (struct tcp6_sock *)sk; - tcp6sk->pinet6 = np = inet6_sk_generic(sk); + inet_sk(sk)->pinet6 = np = inet6_sk_generic(sk); np->hop_limit = -1; np->mcast_hops = -1; np->mc_loop = 1; @@ -252,7 +250,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { struct sockaddr_in6 *addr=(struct sockaddr_in6 *)uaddr; struct sock *sk = sock->sk; - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); __u32 v4addr = 0; unsigned short snum; @@ -410,7 +408,7 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr, { struct sockaddr_in6 *sin=(struct sockaddr_in6 *)uaddr; struct sock *sk = sock->sk; - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); sin->sin6_family = AF_INET6; diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index b077cd19f576..65b9375df57d 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -36,7 +36,7 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct in6_addr *daddr, *final_p = NULL, final; struct dst_entry *dst; @@ -335,7 +335,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL) sin->sin6_scope_id = IP6CB(skb)->iif; } else { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); ipv6_addr_set(&sin->sin6_addr, 0, 0, htonl(0xffff), diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 96e2925f4661..aec81a63b126 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -809,7 +809,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offse int hlimit, struct ipv6_txoptions *opt, struct flowi *fl, struct rt6_info *rt, unsigned int flags) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct sk_buff *skb; unsigned int maxfraglen, fragheaderlen; @@ -1087,7 +1087,7 @@ int ip6_push_pending_frames(struct sock *sk) struct sk_buff *skb, *tmp_skb; struct sk_buff **tail_skb; struct in6_addr final_dst_buf, *final_dst = &final_dst_buf; - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6hdr *hdr; struct ipv6_txoptions *opt = np->cork.opt; @@ -1165,7 +1165,7 @@ error: void ip6_flush_pending_frames(struct sock *sk) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct sk_buff *skb; diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index d4eaaa8245a4..55110651a3f2 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -260,6 +260,9 @@ int ndisc_mc_map(struct in6_addr *addr, char *buf, struct net_device *dev, int d case ARPHRD_ARCNET: ipv6_arcnet_mc_map(addr, buf); return 0; + case ARPHRD_INFINIBAND: + ipv6_ib_mc_map(addr, buf); + return 0; default: if (dir) { memcpy(buf, dev->broadcast, dev->addr_len); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 1e7ed8dcf1cb..0f28463ac350 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -178,7 +178,7 @@ out: /* This cleans up af_inet6 a bit. -DaveM */ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct sockaddr_in6 *addr = (struct sockaddr_in6 *) uaddr; __u32 v4addr = 0; @@ -253,7 +253,7 @@ void rawv6_err(struct sock *sk, struct sk_buff *skb, struct inet6_skb_parm *opt, int type, int code, int offset, u32 info) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); int err; int harderr; @@ -314,7 +314,7 @@ static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb) */ int rawv6_rcv(struct sock *sk, struct sk_buff *skb) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct raw6_opt *raw_opt = raw6_sk(sk); if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) { @@ -505,7 +505,7 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length, struct flowi *fl, struct rt6_info *rt, unsigned int flags) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct ipv6hdr *iph; struct sk_buff *skb; unsigned int hh_len; @@ -607,7 +607,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct ipv6_txoptions opt_space; struct sockaddr_in6 * sin6 = (struct sockaddr_in6 *) msg->msg_name; struct in6_addr *daddr, *final_p = NULL, final; - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct raw6_opt *raw_opt = raw6_sk(sk); struct ipv6_txoptions *opt = NULL; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 0ae13d6b19db..97fe88e10597 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -89,7 +89,7 @@ static __inline__ int tcp_v6_hashfn(struct in6_addr *laddr, u16 lport, static __inline__ int tcp_v6_sk_hashfn(struct sock *sk) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct in6_addr *laddr = &np->rcv_saddr; struct in6_addr *faddr = &np->daddr; @@ -443,7 +443,7 @@ static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb) static int tcp_v6_check_established(struct sock *sk) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct in6_addr *daddr = &np->rcv_saddr; struct in6_addr *saddr = &np->daddr; @@ -549,7 +549,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct tcp_opt *tp = tcp_sk(sk); struct in6_addr *saddr = NULL, *final_p = NULL, final; @@ -785,7 +785,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, dst = __sk_dst_check(sk, np->dst_cookie); if (dst == NULL) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct flowi fl; /* BUGGG_FUTURE: Again, it is not clear how @@ -1281,7 +1281,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, { struct ipv6_pinfo *newnp, *np = inet6_sk(sk); struct tcp6_sock *newtcp6sk; - struct inet_opt *newinet; + struct inet_sock *newinet; struct tcp_opt *newtp; struct sock *newsk; struct ipv6_txoptions *opt; @@ -1297,7 +1297,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, return NULL; newtcp6sk = (struct tcp6_sock *)newsk; - newtcp6sk->pinet6 = &newtcp6sk->inet6; + newtcp6sk->inet.pinet6 = &newtcp6sk->inet6; newinet = inet_sk(newsk); newnp = inet6_sk(newsk); @@ -1390,7 +1390,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, ~(NETIF_F_IP_CSUM | NETIF_F_TSO); newtcp6sk = (struct tcp6_sock *)newsk; - newtcp6sk->pinet6 = &newtcp6sk->inet6; + newtcp6sk->inet.pinet6 = &newtcp6sk->inet6; newtp = tcp_sk(newsk); newinet = inet_sk(newsk); @@ -1754,7 +1754,7 @@ static int tcp_v6_rebuild_header(struct sock *sk) dst = __sk_dst_check(sk, np->dst_cookie); if (dst == NULL) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct in6_addr *final_p = NULL, final; struct flowi fl; @@ -1800,7 +1800,7 @@ static int tcp_v6_rebuild_header(struct sock *sk) static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok) { struct sock *sk = skb->sk; - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct flowi fl; struct dst_entry *dst; @@ -2006,7 +2006,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) __u16 destp, srcp; int timer_active; unsigned long timer_expires; - struct inet_opt *inet = inet_sk(sp); + struct inet_sock *inet = inet_sk(sp); struct tcp_opt *tp = tcp_sk(sp); struct ipv6_pinfo *np = inet6_sk(sp); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index dc0bbfb4c1c9..019318679817 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -160,7 +160,7 @@ static struct sock *udp_v6_lookup(struct in6_addr *saddr, u16 sport, read_lock(&udp_hash_lock); sk_for_each(sk, node, &udp_hash[hnum & (UDP_HTABLE_SIZE - 1)]) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); if (inet->num == hnum && sk->sk_family == PF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); @@ -269,7 +269,7 @@ try_again: sin6->sin6_scope_id = 0; if (skb->protocol == htons(ETH_P_IP)) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); ipv6_addr_set(&sin6->sin6_addr, 0, 0, htonl(0xffff), skb->nh.iph->saddr); @@ -386,7 +386,7 @@ static struct sock *udp_v6_mcast_next(struct sock *sk, unsigned short num = ntohs(loc_port); sk_for_each_from(s, node) { - struct inet_opt *inet = inet_sk(s); + struct inet_sock *inet = inet_sk(s); if (inet->num == num && s->sk_family == PF_INET6) { struct ipv6_pinfo *np = inet6_sk(s); @@ -566,7 +566,7 @@ static int udp_v6_push_pending_frames(struct sock *sk, struct udp_opt *up) { struct sk_buff *skb; struct udphdr *uh; - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct flowi *fl = &inet->cork.fl; int err = 0; @@ -624,7 +624,7 @@ static int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, { struct ipv6_txoptions opt_space; struct udp_opt *up = udp_sk(sk); - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) msg->msg_name; struct in6_addr *daddr, *final_p = NULL, final; @@ -970,7 +970,7 @@ static struct inet6_protocol udpv6_protocol = { static void udp6_sock_seq_show(struct seq_file *seq, struct sock *sp, int bucket) { - struct inet_opt *inet = inet_sk(sp); + struct inet_sock *inet = inet_sk(sp); struct ipv6_pinfo *np = inet6_sk(sp); struct in6_addr *dest, *src; __u16 destp, srcp; diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index e88d37fe262c..6da62172f674 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -291,7 +291,7 @@ found: } #endif -void __ipxitf_down(struct ipx_interface *intrfc) +static void __ipxitf_down(struct ipx_interface *intrfc) { struct sock *s; struct hlist_node *node, *t; @@ -335,6 +335,12 @@ void ipxitf_down(struct ipx_interface *intrfc) spin_unlock_bh(&ipx_interfaces_lock); } +static __inline__ void __ipxitf_put(struct ipx_interface *intrfc) +{ + if (atomic_dec_and_test(&intrfc->refcnt)) + __ipxitf_down(intrfc); +} + static int ipxitf_device_event(struct notifier_block *notifier, unsigned long event, void *ptr) { @@ -1629,7 +1635,7 @@ out: return rc; } -int ipx_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) +static int ipx_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) { /* NULL here for pt means the packet was looped back */ struct ipx_interface *intrfc; diff --git a/net/ipx/ipx_proc.c b/net/ipx/ipx_proc.c index c2febe44d1f2..6b3cb469fc96 100644 --- a/net/ipx/ipx_proc.c +++ b/net/ipx/ipx_proc.c @@ -287,21 +287,21 @@ out: return 0; } -struct seq_operations ipx_seq_interface_ops = { +static struct seq_operations ipx_seq_interface_ops = { .start = ipx_seq_interface_start, .next = ipx_seq_interface_next, .stop = ipx_seq_interface_stop, .show = ipx_seq_interface_show, }; -struct seq_operations ipx_seq_route_ops = { +static struct seq_operations ipx_seq_route_ops = { .start = ipx_seq_route_start, .next = ipx_seq_route_next, .stop = ipx_seq_route_stop, .show = ipx_seq_route_show, }; -struct seq_operations ipx_seq_socket_ops = { +static struct seq_operations ipx_seq_socket_ops = { .start = ipx_seq_socket_start, .next = ipx_seq_socket_next, .stop = ipx_seq_interface_stop, diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index bc6aa5db3bb5..5dd00fe097b6 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -298,7 +298,7 @@ static void irda_connect_indication(void *instance, void *sap, * Accept incoming connection * */ -void irda_connect_response(struct irda_sock *self) +static void irda_connect_response(struct irda_sock *self) { struct sk_buff *skb; @@ -1155,7 +1155,7 @@ static int irda_create(struct socket *sock, int protocol) * Destroy socket * */ -void irda_destroy_socket(struct irda_sock *self) +static void irda_destroy_socket(struct irda_sock *self) { IRDA_DEBUG(2, "%s(%p)\n", __FUNCTION__, self); diff --git a/net/irda/discovery.c b/net/irda/discovery.c index 645c86915d54..d16ef81552cf 100644 --- a/net/irda/discovery.c +++ b/net/irda/discovery.c @@ -315,41 +315,6 @@ struct irda_device_info *irlmp_copy_discoveries(hashbin_t *log, int *pn, return(buffer); } -/* - * Function irlmp_find_device (name, saddr) - * - * Look through the discovery log at each of the links and try to find - * the device with the given name. Return daddr and saddr. If saddr is - * specified, that look at that particular link only (not impl). - */ -__u32 irlmp_find_device(hashbin_t *cachelog, char *name, __u32 *saddr) -{ - unsigned long flags; - discovery_t *d; - - spin_lock_irqsave(&cachelog->hb_spinlock, flags); - - /* Look at all discoveries for that link */ - d = (discovery_t *) hashbin_get_first(cachelog); - while (d != NULL) { - IRDA_DEBUG(1, "Discovery:\n"); - IRDA_DEBUG(1, " daddr=%08x\n", d->data.daddr); - IRDA_DEBUG(1, " nickname=%s\n", d->data.info); - - if (strcmp(name, d->data.info) == 0) { - *saddr = d->data.saddr; - - spin_unlock_irqrestore(&cachelog->hb_spinlock, flags); - return d->data.daddr; - } - d = (discovery_t *) hashbin_get_next(cachelog); - } - - spin_unlock_irqrestore(&cachelog->hb_spinlock, flags); - - return 0; -} - #ifdef CONFIG_PROC_FS static inline discovery_t *discovery_seq_idx(loff_t pos) diff --git a/net/irda/ircomm/ircomm_core.c b/net/irda/ircomm/ircomm_core.c index 010d2b5764c1..36b75261d0ca 100644 --- a/net/irda/ircomm/ircomm_core.c +++ b/net/irda/ircomm/ircomm_core.c @@ -68,7 +68,7 @@ static struct file_operations ircomm_proc_fops = { hashbin_t *ircomm = NULL; -int __init ircomm_init(void) +static int __init ircomm_init(void) { ircomm = hashbin_new(HB_LOCK); if (ircomm == NULL) { @@ -89,7 +89,7 @@ int __init ircomm_init(void) return 0; } -void __exit ircomm_cleanup(void) +static void __exit ircomm_cleanup(void) { IRDA_DEBUG(2, "%s()\n", __FUNCTION__ ); diff --git a/net/irda/ircomm/ircomm_event.c b/net/irda/ircomm/ircomm_event.c index 3ad421cbea83..606b4e2a0a0f 100644 --- a/net/irda/ircomm/ircomm_event.c +++ b/net/irda/ircomm/ircomm_event.c @@ -57,7 +57,7 @@ char *ircomm_state[] = { "IRCOMM_CONN", }; -char *ircomm_event[] = { +static char *ircomm_event[] = { "IRCOMM_CONNECT_REQUEST", "IRCOMM_CONNECT_RESPONSE", "IRCOMM_TTP_CONNECT_INDICATION", diff --git a/net/irda/ircomm/ircomm_lmp.c b/net/irda/ircomm/ircomm_lmp.c index a773b35c66cb..3a0dc925a4aa 100644 --- a/net/irda/ircomm/ircomm_lmp.c +++ b/net/irda/ircomm/ircomm_lmp.c @@ -41,44 +41,6 @@ #include <net/irda/ircomm_event.h> #include <net/irda/ircomm_lmp.h> -/* - * Function ircomm_open_lsap (self) - * - * Open LSAP. This function will only be used when using "raw" services - * - */ -int ircomm_open_lsap(struct ircomm_cb *self) -{ - notify_t notify; - - IRDA_DEBUG(0, "%s()\n", __FUNCTION__ ); - - /* Register callbacks */ - irda_notify_init(¬ify); - notify.data_indication = ircomm_lmp_data_indication; - notify.connect_confirm = ircomm_lmp_connect_confirm; - notify.connect_indication = ircomm_lmp_connect_indication; - notify.disconnect_indication = ircomm_lmp_disconnect_indication; - notify.instance = self; - strlcpy(notify.name, "IrCOMM", sizeof(notify.name)); - - self->lsap = irlmp_open_lsap(LSAP_ANY, ¬ify, 0); - if (!self->lsap) { - IRDA_DEBUG(0,"%sfailed to allocate tsap\n", __FUNCTION__ ); - return -1; - } - self->slsap_sel = self->lsap->slsap_sel; - - /* - * Initialize the call-table for issuing commands - */ - self->issue.data_request = ircomm_lmp_data_request; - self->issue.connect_request = ircomm_lmp_connect_request; - self->issue.connect_response = ircomm_lmp_connect_response; - self->issue.disconnect_request = ircomm_lmp_disconnect_request; - - return 0; -} /* * Function ircomm_lmp_connect_request (self, userdata) @@ -86,9 +48,9 @@ int ircomm_open_lsap(struct ircomm_cb *self) * * */ -int ircomm_lmp_connect_request(struct ircomm_cb *self, - struct sk_buff *userdata, - struct ircomm_info *info) +static int ircomm_lmp_connect_request(struct ircomm_cb *self, + struct sk_buff *userdata, + struct ircomm_info *info) { int ret = 0; @@ -109,7 +71,8 @@ int ircomm_lmp_connect_request(struct ircomm_cb *self, * * */ -int ircomm_lmp_connect_response(struct ircomm_cb *self, struct sk_buff *userdata) +static int ircomm_lmp_connect_response(struct ircomm_cb *self, + struct sk_buff *userdata) { struct sk_buff *tx_skb; int ret; @@ -141,9 +104,9 @@ int ircomm_lmp_connect_response(struct ircomm_cb *self, struct sk_buff *userdata return 0; } -int ircomm_lmp_disconnect_request(struct ircomm_cb *self, - struct sk_buff *userdata, - struct ircomm_info *info) +static int ircomm_lmp_disconnect_request(struct ircomm_cb *self, + struct sk_buff *userdata, + struct ircomm_info *info) { struct sk_buff *tx_skb; int ret; @@ -175,7 +138,7 @@ int ircomm_lmp_disconnect_request(struct ircomm_cb *self, * been deallocated. We do this to make sure we don't flood IrLAP with * frames, since we are not using the IrTTP flow control mechanism */ -void ircomm_lmp_flow_control(struct sk_buff *skb) +static void ircomm_lmp_flow_control(struct sk_buff *skb) { struct irda_skb_cb *cb; struct ircomm_cb *self; @@ -215,8 +178,9 @@ void ircomm_lmp_flow_control(struct sk_buff *skb) * Send data frame to peer device * */ -int ircomm_lmp_data_request(struct ircomm_cb *self, struct sk_buff *skb, - int not_used) +static int ircomm_lmp_data_request(struct ircomm_cb *self, + struct sk_buff *skb, + int not_used) { struct irda_skb_cb *cb; int ret; @@ -256,8 +220,8 @@ int ircomm_lmp_data_request(struct ircomm_cb *self, struct sk_buff *skb, * Incoming data which we must deliver to the state machine, to check * we are still connected. */ -int ircomm_lmp_data_indication(void *instance, void *sap, - struct sk_buff *skb) +static int ircomm_lmp_data_indication(void *instance, void *sap, + struct sk_buff *skb) { struct ircomm_cb *self = (struct ircomm_cb *) instance; @@ -282,11 +246,11 @@ int ircomm_lmp_data_indication(void *instance, void *sap, * Connection has been confirmed by peer device * */ -void ircomm_lmp_connect_confirm(void *instance, void *sap, - struct qos_info *qos, - __u32 max_seg_size, - __u8 max_header_size, - struct sk_buff *skb) +static void ircomm_lmp_connect_confirm(void *instance, void *sap, + struct qos_info *qos, + __u32 max_seg_size, + __u8 max_header_size, + struct sk_buff *skb) { struct ircomm_cb *self = (struct ircomm_cb *) instance; struct ircomm_info info; @@ -315,11 +279,11 @@ void ircomm_lmp_connect_confirm(void *instance, void *sap, * Peer device wants to make a connection with us * */ -void ircomm_lmp_connect_indication(void *instance, void *sap, - struct qos_info *qos, - __u32 max_seg_size, - __u8 max_header_size, - struct sk_buff *skb) +static void ircomm_lmp_connect_indication(void *instance, void *sap, + struct qos_info *qos, + __u32 max_seg_size, + __u8 max_header_size, + struct sk_buff *skb) { struct ircomm_cb *self = (struct ircomm_cb *)instance; struct ircomm_info info; @@ -347,9 +311,9 @@ void ircomm_lmp_connect_indication(void *instance, void *sap, * Peer device has closed the connection, or the link went down for some * other reason */ -void ircomm_lmp_disconnect_indication(void *instance, void *sap, - LM_REASON reason, - struct sk_buff *skb) +static void ircomm_lmp_disconnect_indication(void *instance, void *sap, + LM_REASON reason, + struct sk_buff *skb) { struct ircomm_cb *self = (struct ircomm_cb *) instance; struct ircomm_info info; @@ -367,3 +331,41 @@ void ircomm_lmp_disconnect_indication(void *instance, void *sap, if(skb) dev_kfree_skb(skb); } +/* + * Function ircomm_open_lsap (self) + * + * Open LSAP. This function will only be used when using "raw" services + * + */ +int ircomm_open_lsap(struct ircomm_cb *self) +{ + notify_t notify; + + IRDA_DEBUG(0, "%s()\n", __FUNCTION__ ); + + /* Register callbacks */ + irda_notify_init(¬ify); + notify.data_indication = ircomm_lmp_data_indication; + notify.connect_confirm = ircomm_lmp_connect_confirm; + notify.connect_indication = ircomm_lmp_connect_indication; + notify.disconnect_indication = ircomm_lmp_disconnect_indication; + notify.instance = self; + strlcpy(notify.name, "IrCOMM", sizeof(notify.name)); + + self->lsap = irlmp_open_lsap(LSAP_ANY, ¬ify, 0); + if (!self->lsap) { + IRDA_DEBUG(0,"%sfailed to allocate tsap\n", __FUNCTION__ ); + return -1; + } + self->slsap_sel = self->lsap->slsap_sel; + + /* + * Initialize the call-table for issuing commands + */ + self->issue.data_request = ircomm_lmp_data_request; + self->issue.connect_request = ircomm_lmp_connect_request; + self->issue.connect_response = ircomm_lmp_connect_response; + self->issue.disconnect_request = ircomm_lmp_disconnect_request; + + return 0; +} diff --git a/net/irda/ircomm/ircomm_param.c b/net/irda/ircomm/ircomm_param.c index dee0c8917c18..703e8aa31c75 100644 --- a/net/irda/ircomm/ircomm_param.c +++ b/net/irda/ircomm/ircomm_param.c @@ -92,23 +92,6 @@ static pi_major_info_t pi_major_call_table[] = { pi_param_info_t ircomm_param_info = { pi_major_call_table, 3, 0x0f, 4 }; /* - * Function ircomm_param_flush (self) - * - * Flush (send) out all queued parameters - * - */ -int ircomm_param_flush(struct ircomm_tty_cb *self) -{ - /* we should lock here, but I guess this function is unused... - * Jean II */ - if (self->ctrl_skb) { - ircomm_control_request(self->ircomm, self->ctrl_skb); - self->ctrl_skb = NULL; - } - return 0; -} - -/* * Function ircomm_param_request (self, pi, flush) * * Queue a parameter for the control channel diff --git a/net/irda/ircomm/ircomm_ttp.c b/net/irda/ircomm/ircomm_ttp.c index fc453c3ad55e..6dafe0b0f628 100644 --- a/net/irda/ircomm/ircomm_ttp.c +++ b/net/irda/ircomm/ircomm_ttp.c @@ -40,6 +40,35 @@ #include <net/irda/ircomm_event.h> #include <net/irda/ircomm_ttp.h> +static int ircomm_ttp_data_indication(void *instance, void *sap, + struct sk_buff *skb); +static void ircomm_ttp_connect_confirm(void *instance, void *sap, + struct qos_info *qos, + __u32 max_sdu_size, + __u8 max_header_size, + struct sk_buff *skb); +static void ircomm_ttp_connect_indication(void *instance, void *sap, + struct qos_info *qos, + __u32 max_sdu_size, + __u8 max_header_size, + struct sk_buff *skb); +static void ircomm_ttp_flow_indication(void *instance, void *sap, + LOCAL_FLOW cmd); +static void ircomm_ttp_disconnect_indication(void *instance, void *sap, + LM_REASON reason, + struct sk_buff *skb); +static int ircomm_ttp_data_request(struct ircomm_cb *self, + struct sk_buff *skb, + int clen); +static int ircomm_ttp_connect_request(struct ircomm_cb *self, + struct sk_buff *userdata, + struct ircomm_info *info); +static int ircomm_ttp_connect_response(struct ircomm_cb *self, + struct sk_buff *userdata); +static int ircomm_ttp_disconnect_request(struct ircomm_cb *self, + struct sk_buff *userdata, + struct ircomm_info *info); + /* * Function ircomm_open_tsap (self) * @@ -87,9 +116,9 @@ int ircomm_open_tsap(struct ircomm_cb *self) * * */ -int ircomm_ttp_connect_request(struct ircomm_cb *self, - struct sk_buff *userdata, - struct ircomm_info *info) +static int ircomm_ttp_connect_request(struct ircomm_cb *self, + struct sk_buff *userdata, + struct ircomm_info *info) { int ret = 0; @@ -112,8 +141,8 @@ int ircomm_ttp_connect_request(struct ircomm_cb *self, * * */ -int ircomm_ttp_connect_response(struct ircomm_cb *self, - struct sk_buff *userdata) +static int ircomm_ttp_connect_response(struct ircomm_cb *self, + struct sk_buff *userdata) { int ret; @@ -137,9 +166,9 @@ int ircomm_ttp_connect_response(struct ircomm_cb *self, * some of them are sent after connection establishment, so this can * increase the latency a bit. */ -int ircomm_ttp_data_request(struct ircomm_cb *self, - struct sk_buff *skb, - int clen) +static int ircomm_ttp_data_request(struct ircomm_cb *self, + struct sk_buff *skb, + int clen) { int ret; @@ -175,8 +204,8 @@ int ircomm_ttp_data_request(struct ircomm_cb *self, * Incoming data * */ -int ircomm_ttp_data_indication(void *instance, void *sap, - struct sk_buff *skb) +static int ircomm_ttp_data_indication(void *instance, void *sap, + struct sk_buff *skb) { struct ircomm_cb *self = (struct ircomm_cb *) instance; @@ -194,11 +223,11 @@ int ircomm_ttp_data_indication(void *instance, void *sap, return 0; } -void ircomm_ttp_connect_confirm(void *instance, void *sap, - struct qos_info *qos, - __u32 max_sdu_size, - __u8 max_header_size, - struct sk_buff *skb) +static void ircomm_ttp_connect_confirm(void *instance, void *sap, + struct qos_info *qos, + __u32 max_sdu_size, + __u8 max_header_size, + struct sk_buff *skb) { struct ircomm_cb *self = (struct ircomm_cb *) instance; struct ircomm_info info; @@ -234,11 +263,11 @@ out: * * */ -void ircomm_ttp_connect_indication(void *instance, void *sap, - struct qos_info *qos, - __u32 max_sdu_size, - __u8 max_header_size, - struct sk_buff *skb) +static void ircomm_ttp_connect_indication(void *instance, void *sap, + struct qos_info *qos, + __u32 max_sdu_size, + __u8 max_header_size, + struct sk_buff *skb) { struct ircomm_cb *self = (struct ircomm_cb *)instance; struct ircomm_info info; @@ -273,9 +302,9 @@ out: * * */ -int ircomm_ttp_disconnect_request(struct ircomm_cb *self, - struct sk_buff *userdata, - struct ircomm_info *info) +static int ircomm_ttp_disconnect_request(struct ircomm_cb *self, + struct sk_buff *userdata, + struct ircomm_info *info) { int ret; @@ -294,9 +323,9 @@ int ircomm_ttp_disconnect_request(struct ircomm_cb *self, * * */ -void ircomm_ttp_disconnect_indication(void *instance, void *sap, - LM_REASON reason, - struct sk_buff *skb) +static void ircomm_ttp_disconnect_indication(void *instance, void *sap, + LM_REASON reason, + struct sk_buff *skb) { struct ircomm_cb *self = (struct ircomm_cb *) instance; struct ircomm_info info; @@ -321,7 +350,8 @@ void ircomm_ttp_disconnect_indication(void *instance, void *sap, * Layer below is telling us to start or stop the flow of data * */ -void ircomm_ttp_flow_indication(void *instance, void *sap, LOCAL_FLOW cmd) +static void ircomm_ttp_flow_indication(void *instance, void *sap, + LOCAL_FLOW cmd) { struct ircomm_cb *self = (struct ircomm_cb *) instance; diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c index eb430e81d02f..f75949d277bb 100644 --- a/net/irda/ircomm/ircomm_tty.c +++ b/net/irda/ircomm/ircomm_tty.c @@ -64,6 +64,7 @@ static void ircomm_tty_wait_until_sent(struct tty_struct *tty, int timeout); static void ircomm_tty_hangup(struct tty_struct *tty); static void ircomm_tty_do_softint(void *private_); static void ircomm_tty_shutdown(struct ircomm_tty_cb *self); +static void ircomm_tty_stop(struct tty_struct *tty); static int ircomm_tty_data_indication(void *instance, void *sap, struct sk_buff *skb); @@ -108,7 +109,7 @@ static struct tty_operations ops = { * Init IrCOMM TTY layer/driver * */ -int __init ircomm_tty_init(void) +static int __init ircomm_tty_init(void) { driver = alloc_tty_driver(IRCOMM_TTY_PORTS); if (!driver) @@ -159,7 +160,7 @@ static void __exit __ircomm_tty_cleanup(struct ircomm_tty_cb *self) * Remove IrCOMM TTY layer/driver * */ -void __exit ircomm_tty_cleanup(void) +static void __exit ircomm_tty_cleanup(void) { int ret; @@ -1064,7 +1065,7 @@ void ircomm_tty_start(struct tty_struct *tty) * This routine notifies the tty driver that it should stop outputting * characters to the tty device. */ -void ircomm_tty_stop(struct tty_struct *tty) +static void ircomm_tty_stop(struct tty_struct *tty) { struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data; diff --git a/net/irda/ircomm/ircomm_tty_attach.c b/net/irda/ircomm/ircomm_tty_attach.c index fb9991390fff..41a6aa415040 100644 --- a/net/irda/ircomm/ircomm_tty_attach.c +++ b/net/irda/ircomm/ircomm_tty_attach.c @@ -52,8 +52,9 @@ static void ircomm_tty_discovery_indication(discinfo_t *discovery, void *priv); static void ircomm_tty_getvalue_confirm(int result, __u16 obj_id, struct ias_value *value, void *priv); -void ircomm_tty_start_watchdog_timer(struct ircomm_tty_cb *self, int timeout); -void ircomm_tty_watchdog_timer_expired(void *data); +static void ircomm_tty_start_watchdog_timer(struct ircomm_tty_cb *self, + int timeout); +static void ircomm_tty_watchdog_timer_expired(void *data); static int ircomm_tty_state_idle(struct ircomm_tty_cb *self, IRCOMM_TTY_EVENT event, @@ -90,7 +91,7 @@ char *ircomm_tty_state[] = { "*** ERROR *** ", }; -char *ircomm_tty_event[] = { +static char *ircomm_tty_event[] = { "IRCOMM_TTY_ATTACH_CABLE", "IRCOMM_TTY_DETACH_CABLE", "IRCOMM_TTY_DATA_REQUEST", @@ -594,7 +595,8 @@ void ircomm_tty_link_established(struct ircomm_tty_cb *self) * connection attempt is successful, and if not, we will retry after * the timeout */ -void ircomm_tty_start_watchdog_timer(struct ircomm_tty_cb *self, int timeout) +static void ircomm_tty_start_watchdog_timer(struct ircomm_tty_cb *self, + int timeout) { ASSERT(self != NULL, return;); ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;); @@ -609,7 +611,7 @@ void ircomm_tty_start_watchdog_timer(struct ircomm_tty_cb *self, int timeout) * Called when the connect procedure have taken to much time. * */ -void ircomm_tty_watchdog_timer_expired(void *data) +static void ircomm_tty_watchdog_timer_expired(void *data) { struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) data; diff --git a/net/irda/ircomm/ircomm_tty_ioctl.c b/net/irda/ircomm/ircomm_tty_ioctl.c index 40965158a75f..001611d3302e 100644 --- a/net/irda/ircomm/ircomm_tty_ioctl.c +++ b/net/irda/ircomm/ircomm_tty_ioctl.c @@ -53,7 +53,7 @@ * Change speed of the driver. If the remote device is a DCE, then this * should make it change the speed of its serial port */ -void ircomm_tty_change_speed(struct ircomm_tty_cb *self) +static void ircomm_tty_change_speed(struct ircomm_tty_cb *self) { unsigned cflag, cval; int baud; diff --git a/net/irda/irda_device.c b/net/irda/irda_device.c index 80a6648a4043..cf5a849bf4c0 100644 --- a/net/irda/irda_device.c +++ b/net/irda/irda_device.c @@ -143,47 +143,6 @@ void irda_device_set_media_busy(struct net_device *dev, int status) EXPORT_SYMBOL(irda_device_set_media_busy); -int irda_device_set_dtr_rts(struct net_device *dev, int dtr, int rts) -{ - struct if_irda_req req; - int ret; - - IRDA_DEBUG(2, "%s()\n", __FUNCTION__); - - if (!dev->do_ioctl) { - ERROR("%s: do_ioctl not impl. by device driver\n", - __FUNCTION__); - return -1; - } - - req.ifr_dtr = dtr; - req.ifr_rts = rts; - - ret = dev->do_ioctl(dev, (struct ifreq *) &req, SIOCSDTRRTS); - - return ret; -} - -int irda_device_change_speed(struct net_device *dev, __u32 speed) -{ - struct if_irda_req req; - int ret; - - IRDA_DEBUG(2, "%s()\n", __FUNCTION__); - - if (!dev->do_ioctl) { - ERROR("%s: do_ioctl not impl. by device driver\n", - __FUNCTION__); - return -1; - } - - req.ifr_baudrate = speed; - - ret = dev->do_ioctl(dev, (struct ifreq *) &req, SIOCSBANDWIDTH); - - return ret; -} - /* * Function irda_device_is_receiving (dev) * @@ -372,7 +331,7 @@ static void irda_task_timer_expired(void *data) * This function should be used by low level device drivers in a similar way * as ether_setup() is used by normal network device drivers */ -void irda_device_setup(struct net_device *dev) +static void irda_device_setup(struct net_device *dev) { dev->hard_header_len = 0; dev->addr_len = 0; @@ -502,33 +461,6 @@ void irda_device_unregister_dongle(struct dongle_reg *dongle) } EXPORT_SYMBOL(irda_device_unregister_dongle); -/* - * Function irda_device_set_mode (self, mode) - * - * Set the Infrared device driver into mode where it sends and receives - * data without using IrLAP framing. Check out the particular device - * driver to find out which modes it support. - */ -int irda_device_set_mode(struct net_device* dev, int mode) -{ - struct if_irda_req req; - int ret; - - IRDA_DEBUG(0, "%s()\n", __FUNCTION__); - - if (!dev->do_ioctl) { - ERROR("%s: set_raw_mode not impl. by device driver\n", - __FUNCTION__); - return -1; - } - - req.ifr_mode = mode; - - ret = dev->do_ioctl(dev, (struct ifreq *) &req, SIOCSMODE); - - return ret; -} - #ifdef CONFIG_ISA /* * Function setup_dma (idev, buffer, count, mode) diff --git a/net/irda/iriap.c b/net/irda/iriap.c index 7810c6da23d6..f22993c24ea8 100644 --- a/net/irda/iriap.c +++ b/net/irda/iriap.c @@ -77,6 +77,15 @@ static void iriap_connect_confirm(void *instance, void *sap, static int iriap_data_indication(void *instance, void *sap, struct sk_buff *skb); +static void iriap_watchdog_timer_expired(void *data); + +static inline void iriap_start_watchdog_timer(struct iriap_cb *self, + int timeout) +{ + irda_start_timer(&self->watchdog_timer, timeout, self, + iriap_watchdog_timer_expired); +} + /* * Function iriap_init (void) * @@ -328,7 +337,7 @@ static void iriap_disconnect_indication(void *instance, void *sap, /* * Function iriap_disconnect_request (handle) */ -void iriap_disconnect_request(struct iriap_cb *self) +static void iriap_disconnect_request(struct iriap_cb *self) { struct sk_buff *tx_skb; @@ -352,31 +361,6 @@ void iriap_disconnect_request(struct iriap_cb *self) irlmp_disconnect_request(self->lsap, tx_skb); } -void iriap_getinfobasedetails_request(void) -{ - IRDA_DEBUG(0, "%s(), Not implemented!\n", __FUNCTION__); -} - -void iriap_getinfobasedetails_confirm(void) -{ - IRDA_DEBUG(0, "%s(), Not implemented!\n", __FUNCTION__); -} - -void iriap_getobjects_request(void) -{ - IRDA_DEBUG(0, "%s(), Not implemented!\n", __FUNCTION__); -} - -void iriap_getobjects_confirm(void) -{ - IRDA_DEBUG(0, "%s(), Not implemented!\n", __FUNCTION__); -} - -void iriap_getvalue(void) -{ - IRDA_DEBUG(0, "%s(), Not implemented!\n", __FUNCTION__); -} - /* * Function iriap_getvaluebyclass (addr, name, attr) * @@ -445,7 +429,8 @@ EXPORT_SYMBOL(iriap_getvaluebyclass_request); * to service user. * */ -void iriap_getvaluebyclass_confirm(struct iriap_cb *self, struct sk_buff *skb) +static void iriap_getvaluebyclass_confirm(struct iriap_cb *self, + struct sk_buff *skb) { struct ias_value *value; int charset; @@ -552,8 +537,10 @@ void iriap_getvaluebyclass_confirm(struct iriap_cb *self, struct sk_buff *skb) * Send answer back to remote LM-IAS * */ -void iriap_getvaluebyclass_response(struct iriap_cb *self, __u16 obj_id, - __u8 ret_code, struct ias_value *value) +static void iriap_getvaluebyclass_response(struct iriap_cb *self, + __u16 obj_id, + __u8 ret_code, + struct ias_value *value) { struct sk_buff *tx_skb; int n; @@ -641,8 +628,8 @@ void iriap_getvaluebyclass_response(struct iriap_cb *self, __u16 obj_id, * getvaluebyclass is requested from peer LM-IAS * */ -void iriap_getvaluebyclass_indication(struct iriap_cb *self, - struct sk_buff *skb) +static void iriap_getvaluebyclass_indication(struct iriap_cb *self, + struct sk_buff *skb) { struct ias_object *obj; struct ias_attrib *attrib; @@ -962,7 +949,7 @@ void iriap_call_indication(struct iriap_cb *self, struct sk_buff *skb) * Query has taken too long time, so abort * */ -void iriap_watchdog_timer_expired(void *data) +static void iriap_watchdog_timer_expired(void *data) { struct iriap_cb *self = (struct iriap_cb *) data; diff --git a/net/irda/irias_object.c b/net/irda/irias_object.c index bc8d74df1563..98c9e1517a2f 100644 --- a/net/irda/irias_object.c +++ b/net/irda/irias_object.c @@ -116,7 +116,7 @@ EXPORT_SYMBOL(irias_new_object); * Delete given attribute and deallocate all its memory * */ -void __irias_delete_attrib(struct ias_attrib *attrib) +static void __irias_delete_attrib(struct ias_attrib *attrib) { ASSERT(attrib != NULL, return;); ASSERT(attrib->magic == IAS_ATTRIB_MAGIC, return;); @@ -267,8 +267,8 @@ EXPORT_SYMBOL(irias_find_attrib); * Add attribute to object * */ -void irias_add_attrib( struct ias_object *obj, struct ias_attrib *attrib, - int owner) +static void irias_add_attrib(struct ias_object *obj, struct ias_attrib *attrib, + int owner) { ASSERT(obj != NULL, return;); ASSERT(obj->magic == IAS_OBJECT_MAGIC, return;); diff --git a/net/irda/irlan/irlan_client.c b/net/irda/irlan/irlan_client.c index 90c8221654f9..92dbe620340e 100644 --- a/net/irda/irlan/irlan_client.c +++ b/net/irda/irlan/irlan_client.c @@ -66,6 +66,7 @@ static void irlan_client_ctrl_connect_confirm(void *instance, void *sap, struct sk_buff *); static void irlan_check_response_param(struct irlan_cb *self, char *param, char *value, int val_len); +static void irlan_client_open_ctrl_tsap(struct irlan_cb *self); static void irlan_client_kick_timer_expired(void *data) { @@ -88,7 +89,7 @@ static void irlan_client_kick_timer_expired(void *data) } } -void irlan_client_start_kick_timer(struct irlan_cb *self, int timeout) +static void irlan_client_start_kick_timer(struct irlan_cb *self, int timeout) { IRDA_DEBUG(4, "%s()\n", __FUNCTION__ ); @@ -248,7 +249,7 @@ static void irlan_client_ctrl_disconnect_indication(void *instance, void *sap, * Initialize callbacks and open IrTTP TSAPs * */ -void irlan_client_open_ctrl_tsap(struct irlan_cb *self) +static void irlan_client_open_ctrl_tsap(struct irlan_cb *self) { struct tsap_cb *tsap; notify_t notify; @@ -309,42 +310,6 @@ static void irlan_client_ctrl_connect_confirm(void *instance, void *sap, } /* - * Function irlan_client_reconnect_data_channel (self) - * - * Try to reconnect data channel (currently not used) - * - */ -void irlan_client_reconnect_data_channel(struct irlan_cb *self) -{ - struct sk_buff *skb; - __u8 *frame; - - IRDA_DEBUG(4, "%s()\n", __FUNCTION__ ); - - ASSERT(self != NULL, return;); - ASSERT(self->magic == IRLAN_MAGIC, return;); - - skb = dev_alloc_skb(128); - if (!skb) - return; - - /* Reserve space for TTP, LMP, and LAP header */ - skb_reserve(skb, self->max_header_size); - skb_put(skb, 2); - - frame = skb->data; - - frame[0] = CMD_RECONNECT_DATA_CHAN; - frame[1] = 0x01; - irlan_insert_array_param(skb, "RECONNECT_KEY", - self->client.reconnect_key, - self->client.key_len); - - irttp_data_request(self->client.tsap_ctrl, skb); -} - - -/* * Function print_ret_code (code) * * Print return code of request to peer IrLAN layer. diff --git a/net/irda/irlan/irlan_common.c b/net/irda/irlan/irlan_common.c index 032ab40f913a..fd9f3043119b 100644 --- a/net/irda/irlan/irlan_common.c +++ b/net/irda/irlan/irlan_common.c @@ -105,10 +105,13 @@ static struct file_operations irlan_fops = { extern struct proc_dir_entry *proc_irda; #endif /* CONFIG_PROC_FS */ +static struct irlan_cb *irlan_open(__u32 saddr, __u32 daddr); static void __irlan_close(struct irlan_cb *self); static int __irlan_insert_param(struct sk_buff *skb, char *param, int type, __u8 value_byte, __u16 value_short, __u8 *value_array, __u16 value_len); +static void irlan_open_unicast_addr(struct irlan_cb *self); +static void irlan_get_unicast_addr(struct irlan_cb *self); void irlan_close_tsaps(struct irlan_cb *self); /* @@ -185,7 +188,7 @@ static void __exit irlan_cleanup(void) * Open new instance of a client/provider, we should only register the * network device if this instance is ment for a particular client/provider */ -struct irlan_cb *irlan_open(__u32 saddr, __u32 daddr) +static struct irlan_cb *irlan_open(__u32 saddr, __u32 daddr) { struct net_device *dev; struct irlan_cb *self; @@ -294,9 +297,11 @@ struct irlan_cb *irlan_get_any(void) * Here we receive the connect indication for the data channel * */ -void irlan_connect_indication(void *instance, void *sap, struct qos_info *qos, - __u32 max_sdu_size, __u8 max_header_size, - struct sk_buff *skb) +static void irlan_connect_indication(void *instance, void *sap, + struct qos_info *qos, + __u32 max_sdu_size, + __u8 max_header_size, + struct sk_buff *skb) { struct irlan_cb *self; struct tsap_cb *tsap; @@ -339,9 +344,11 @@ void irlan_connect_indication(void *instance, void *sap, struct qos_info *qos, netif_start_queue(self->dev); /* Clear reason */ } -void irlan_connect_confirm(void *instance, void *sap, struct qos_info *qos, - __u32 max_sdu_size, __u8 max_header_size, - struct sk_buff *skb) +static void irlan_connect_confirm(void *instance, void *sap, + struct qos_info *qos, + __u32 max_sdu_size, + __u8 max_header_size, + struct sk_buff *skb) { struct irlan_cb *self; @@ -384,8 +391,9 @@ void irlan_connect_confirm(void *instance, void *sap, struct qos_info *qos, * Callback function for the IrTTP layer. Indicates a disconnection of * the specified connection (handle) */ -void irlan_disconnect_indication(void *instance, void *sap, LM_REASON reason, - struct sk_buff *userdata) +static void irlan_disconnect_indication(void *instance, + void *sap, LM_REASON reason, + struct sk_buff *userdata) { struct irlan_cb *self; struct tsap_cb *tsap; @@ -602,7 +610,7 @@ int irlan_run_ctrl_tx_queue(struct irlan_cb *self) * This function makes sure that commands on the control channel is being * sent in a command/response fashion */ -void irlan_ctrl_data_request(struct irlan_cb *self, struct sk_buff *skb) +static void irlan_ctrl_data_request(struct irlan_cb *self, struct sk_buff *skb) { IRDA_DEBUG(2, "%s()\n", __FUNCTION__ ); @@ -722,7 +730,7 @@ void irlan_close_data_channel(struct irlan_cb *self) * address. * */ -void irlan_open_unicast_addr(struct irlan_cb *self) +static void irlan_open_unicast_addr(struct irlan_cb *self) { struct sk_buff *skb; __u8 *frame; @@ -839,7 +847,7 @@ void irlan_set_multicast_filter(struct irlan_cb *self, int status) * can construct its packets. * */ -void irlan_get_unicast_addr(struct irlan_cb *self) +static void irlan_get_unicast_addr(struct irlan_cb *self) { struct sk_buff *skb; __u8 *frame; diff --git a/net/irda/irlan/irlan_provider.c b/net/irda/irlan/irlan_provider.c index 2793c54f3462..406d78a5f659 100644 --- a/net/irda/irlan/irlan_provider.c +++ b/net/irda/irlan/irlan_provider.c @@ -175,9 +175,9 @@ void irlan_provider_connect_response(struct irlan_cb *self, irttp_connect_response(tsap, IRLAN_MTU, NULL); } -void irlan_provider_disconnect_indication(void *instance, void *sap, - LM_REASON reason, - struct sk_buff *userdata) +static void irlan_provider_disconnect_indication(void *instance, void *sap, + LM_REASON reason, + struct sk_buff *userdata) { struct irlan_cb *self; struct tsap_cb *tsap; diff --git a/net/irda/irlap.c b/net/irda/irlap.c index 4466b69182a7..44fd6aa43171 100644 --- a/net/irda/irlap.c +++ b/net/irda/irlap.c @@ -60,6 +60,8 @@ int sysctl_warn_noreply_time = 3; extern void irlap_queue_xmit(struct irlap_cb *self, struct sk_buff *skb); static void __irlap_close(struct irlap_cb *self); +static void irlap_init_qos_capabilities(struct irlap_cb *self, + struct qos_info *qos_user); #ifdef CONFIG_IRDA_DEBUG static char *lap_reasons[] = { @@ -867,7 +869,7 @@ void irlap_flush_all_queues(struct irlap_cb *self) * Change the speed of the IrDA port * */ -void irlap_change_speed(struct irlap_cb *self, __u32 speed, int now) +static void irlap_change_speed(struct irlap_cb *self, __u32 speed, int now) { struct sk_buff *skb; @@ -894,8 +896,8 @@ void irlap_change_speed(struct irlap_cb *self, __u32 speed, int now) * IrLAP itself. Normally, IrLAP will not specify any values, but it can * be used to restrict certain values. */ -void irlap_init_qos_capabilities(struct irlap_cb *self, - struct qos_info *qos_user) +static void irlap_init_qos_capabilities(struct irlap_cb *self, + struct qos_info *qos_user) { ASSERT(self != NULL, return;); ASSERT(self->magic == LAP_MAGIC, return;); diff --git a/net/irda/irlap_event.c b/net/irda/irlap_event.c index 97ed757b844f..570a296c8530 100644 --- a/net/irda/irlap_event.c +++ b/net/irda/irlap_event.c @@ -181,7 +181,7 @@ static void irlap_poll_timer_expired(void *data) * Make sure that state is XMIT_P/XMIT_S when calling this function * (and that nobody messed up with the state). - Jean II */ -void irlap_start_poll_timer(struct irlap_cb *self, int timeout) +static void irlap_start_poll_timer(struct irlap_cb *self, int timeout) { ASSERT(self != NULL, return;); ASSERT(self->magic == LAP_MAGIC, return;); diff --git a/net/irda/irlap_frame.c b/net/irda/irlap_frame.c index 388f4ea83e1a..346c95eb46dd 100644 --- a/net/irda/irlap_frame.c +++ b/net/irda/irlap_frame.c @@ -43,6 +43,9 @@ #include <net/irda/irlap_frame.h> #include <net/irda/qos.h> +static void irlap_send_i_frame(struct irlap_cb *self, struct sk_buff *skb, + int command); + /* * Function irlap_insert_info (self, skb) * @@ -629,34 +632,6 @@ static inline void irlap_recv_rr_frame(struct irlap_cb *self, irlap_do_event(self, RECV_RR_RSP, skb, info); } -void irlap_send_frmr_frame( struct irlap_cb *self, int command) -{ - struct sk_buff *tx_skb = NULL; - __u8 *frame; - - ASSERT( self != NULL, return;); - ASSERT( self->magic == LAP_MAGIC, return;); - - tx_skb = dev_alloc_skb( 32); - if (!tx_skb) - return; - - frame = skb_put(tx_skb, 2); - - frame[0] = self->caddr; - frame[0] |= (command) ? CMD_FRAME : 0; - - frame[1] = (self->vs << 1); - frame[1] |= PF_BIT; - frame[1] |= (self->vr << 5); - - frame[2] = 0; - - IRDA_DEBUG(4, "%s(), vr=%d, %ld\n", __FUNCTION__, self->vr, jiffies); - - irlap_queue_xmit(self, tx_skb); -} - /* * Function irlap_recv_rnr_frame (self, skb, info) * @@ -1129,8 +1104,8 @@ void irlap_send_ui_frame(struct irlap_cb *self, struct sk_buff *skb, * * Contruct and transmit Information (I) frame */ -void irlap_send_i_frame(struct irlap_cb *self, struct sk_buff *skb, - int command) +static void irlap_send_i_frame(struct irlap_cb *self, struct sk_buff *skb, + int command) { /* Insert connection address */ skb->data[0] = self->caddr; diff --git a/net/irda/irlmp.c b/net/irda/irlmp.c index f0adafd6fc3b..26b22f49076b 100644 --- a/net/irda/irlmp.c +++ b/net/irda/irlmp.c @@ -44,6 +44,9 @@ #include <net/irda/irlmp.h> #include <net/irda/irlmp_frame.h> +static __u8 irlmp_find_free_slsap(void); +static int irlmp_slsap_inuse(__u8 slsap_sel); + /* Master structure */ struct irlmp_cb *irlmp = NULL; @@ -1278,11 +1281,6 @@ void irlmp_connless_data_indication(struct lsap_cb *self, struct sk_buff *skb) } #endif /* CONFIG_IRDA_ULTRA */ -void irlmp_status_request(void) -{ - IRDA_DEBUG(0, "%s(), Not implemented\n", __FUNCTION__); -} - /* * Propagate status indication from LAP to LSAPs (via LMP) * This don't trigger any change of state in lap_cb, lmp_cb or lsap_cb, @@ -1656,7 +1654,7 @@ EXPORT_SYMBOL(irlmp_unregister_client); * of the allocated LSAP, but I'm not sure the complexity is worth it. * Jean II */ -int irlmp_slsap_inuse(__u8 slsap_sel) +static int irlmp_slsap_inuse(__u8 slsap_sel) { struct lsap_cb *self; struct lap_cb *lap; @@ -1756,7 +1754,7 @@ erruncon: * Find a free source LSAP to use. This function is called if the service * user has requested a source LSAP equal to LM_ANY */ -__u8 irlmp_find_free_slsap(void) +static __u8 irlmp_find_free_slsap(void) { __u8 lsap_sel; int wrapped = 0; diff --git a/net/irda/irmod.c b/net/irda/irmod.c index 7ecc67c0a9ae..6ffaed4544e9 100644 --- a/net/irda/irmod.c +++ b/net/irda/irmod.c @@ -100,7 +100,7 @@ EXPORT_SYMBOL(irda_notify_init); * Protocol stack initialisation entry point. * Initialise the various components of the IrDA stack */ -int __init irda_init(void) +static int __init irda_init(void) { IRDA_DEBUG(0, "%s()\n", __FUNCTION__); @@ -136,7 +136,7 @@ int __init irda_init(void) * Protocol stack cleanup/removal entry point. * Cleanup the various components of the IrDA stack */ -void __exit irda_cleanup(void) +static void __exit irda_cleanup(void) { /* Remove External APIs */ #ifdef CONFIG_SYSCTL diff --git a/net/irda/irnet/irnet.h b/net/irda/irnet/irnet.h index 0ed25a123047..9004f7349a76 100644 --- a/net/irda/irnet/irnet.h +++ b/net/irda/irnet/irnet.h @@ -363,13 +363,13 @@ /* Exit a function with debug */ #define DRETURN(ret, dbg, args...) \ {DEXIT(dbg, ": " args);\ - return(ret); } + return ret; } /* Exit a function on failed condition */ #define DABORT(cond, ret, dbg, args...) \ {if(cond) {\ DERROR(dbg, args);\ - return(ret); }} + return ret; }} /* Invalid assertion, print out an error and exit... */ #define DASSERT(cond, ret, dbg, args...) \ @@ -520,8 +520,6 @@ extern void /* ---------------------------- MODULE ---------------------------- */ extern int irnet_init(void); /* Initialise IrNET module */ -extern void - irnet_cleanup(void); /* Teardown IrNET module */ /**************************** VARIABLES ****************************/ diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c index 24748b614f3f..123f92fb63a6 100644 --- a/net/irda/irnet/irnet_ppp.c +++ b/net/irda/irnet/irnet_ppp.c @@ -16,6 +16,12 @@ #include "irnet_ppp.h" /* Private header */ /* Please put other headers in irnet.h - Thanks */ +/* Generic PPP callbacks (to call us) */ +static struct ppp_channel_ops irnet_ppp_ops = { + .start_xmit = ppp_irnet_send, + .ioctl = ppp_irnet_ioctl +}; + /************************* CONTROL CHANNEL *************************/ /* * When a pppd instance is not active on /dev/irnet, it acts as a control @@ -1095,7 +1101,7 @@ irnet_init(void) /* * Module exit */ -void __exit +static void __exit irnet_cleanup(void) { irda_irnet_cleanup(); diff --git a/net/irda/irnet/irnet_ppp.h b/net/irda/irnet/irnet_ppp.h index 8b4e06c55d5a..d2beb7df8f7f 100644 --- a/net/irda/irnet/irnet_ppp.h +++ b/net/irda/irnet/irnet_ppp.h @@ -116,11 +116,4 @@ static struct miscdevice irnet_misc_device = &irnet_device_fops }; -/* Generic PPP callbacks (to call us) */ -struct ppp_channel_ops irnet_ppp_ops = -{ - ppp_irnet_send, - ppp_irnet_ioctl -}; - #endif /* IRNET_PPP_H */ diff --git a/net/irda/irsysctl.c b/net/irda/irsysctl.c index cdcb320b56d1..1b1c4193359a 100644 --- a/net/irda/irsysctl.c +++ b/net/irda/irsysctl.c @@ -43,7 +43,6 @@ extern int sysctl_discovery_slots; extern int sysctl_discovery_timeout; extern int sysctl_slot_timeout; extern int sysctl_fast_poll_increase; -int sysctl_compression = 0; extern char sysctl_devname[]; extern int sysctl_max_baud_rate; extern int sysctl_min_tx_turn_time; diff --git a/net/irda/irttp.c b/net/irda/irttp.c index 093c362e0029..1a8d3b17aee2 100644 --- a/net/irda/irttp.c +++ b/net/irda/irttp.c @@ -64,6 +64,10 @@ static void irttp_todo_expired(unsigned long data); static int irttp_param_max_sdu_size(void *instance, irda_param_t *param, int get); +static void irttp_flow_indication(void *instance, void *sap, LOCAL_FLOW flow); +static void irttp_status_indication(void *instance, + LINK_STATUS link, LOCK_STATUS lock); + /* Information for parsing parameters in IrTTP */ static pi_minor_info_t pi_minor_call_table[] = { { NULL, 0 }, /* 0x00 */ @@ -961,8 +965,8 @@ static int irttp_data_indication(void *instance, void *sap, * Status_indication, just pass to the higher layer... * */ -void irttp_status_indication(void *instance, - LINK_STATUS link, LOCK_STATUS lock) +static void irttp_status_indication(void *instance, + LINK_STATUS link, LOCK_STATUS lock) { struct tsap_cb *self; @@ -993,7 +997,7 @@ void irttp_status_indication(void *instance, * Flow_indication : IrLAP tells us to send more data. * */ -void irttp_flow_indication(void *instance, void *sap, LOCAL_FLOW flow) +static void irttp_flow_indication(void *instance, void *sap, LOCAL_FLOW flow) { struct tsap_cb *self; @@ -1613,7 +1617,7 @@ void irttp_disconnect_indication(void *instance, void *sap, LM_REASON reason, * for some reason should fail. We mark rx sdu as busy to apply back * pressure is necessary. */ -void irttp_do_data_indication(struct tsap_cb *self, struct sk_buff *skb) +static void irttp_do_data_indication(struct tsap_cb *self, struct sk_buff *skb) { int err; diff --git a/net/irda/parameters.c b/net/irda/parameters.c index 2136b4441913..9d26cba2c3eb 100644 --- a/net/irda/parameters.c +++ b/net/irda/parameters.c @@ -51,6 +51,8 @@ static int irda_insert_integer(void *self, __u8 *buf, int len, __u8 pi, static int irda_insert_no_value(void *self, __u8 *buf, int len, __u8 pi, PV_TYPE type, PI_HANDLER func); +static int irda_param_unpack(__u8 *buf, char *fmt, ...); + /* Parameter value call table. Must match PV_TYPE */ static PV_HANDLER pv_extract_table[] = { irda_extract_integer, /* Handler for any length integers */ @@ -400,7 +402,7 @@ EXPORT_SYMBOL(irda_param_pack); /* * Function irda_param_unpack (skb, fmt, ...) */ -int irda_param_unpack(__u8 *buf, char *fmt, ...) +static int irda_param_unpack(__u8 *buf, char *fmt, ...) { irda_pv_t arg; va_list args; @@ -440,7 +442,6 @@ int irda_param_unpack(__u8 *buf, char *fmt, ...) return 0; } -EXPORT_SYMBOL(irda_param_unpack); /* * Function irda_param_insert (self, pi, buf, len, info) @@ -496,13 +497,14 @@ int irda_param_insert(void *self, __u8 pi, __u8 *buf, int len, EXPORT_SYMBOL(irda_param_insert); /* - * Function irda_param_extract_all (self, buf, len, info) + * Function irda_param_extract (self, buf, len, info) * * Parse all parameters. If len is correct, then everything should be * safe. Returns the number of bytes that was parsed * */ -int irda_param_extract(void *self, __u8 *buf, int len, pi_param_info_t *info) +static int irda_param_extract(void *self, __u8 *buf, int len, + pi_param_info_t *info) { pi_minor_info_t *pi_minor_info; __u8 pi_minor; @@ -549,7 +551,6 @@ int irda_param_extract(void *self, __u8 *buf, int len, pi_param_info_t *info) type, pi_minor_info->func); return ret; } -EXPORT_SYMBOL(irda_param_extract); /* * Function irda_param_extract_all (self, buf, len, info) diff --git a/net/irda/qos.c b/net/irda/qos.c index d37dd2548325..288d468a8fca 100644 --- a/net/irda/qos.c +++ b/net/irda/qos.c @@ -96,6 +96,10 @@ static int irlap_param_additional_bofs(void *instance, irda_param_t *parm, static int irlap_param_min_turn_time(void *instance, irda_param_t *param, int get); +#ifndef CONFIG_IRDA_DYNAMIC_WINDOW +static __u32 irlap_requested_line_capacity(struct qos_info *qos); +#endif + static __u32 min_turn_times[] = { 10000, 5000, 1000, 500, 100, 50, 10, 0 }; /* us */ static __u32 baud_rates[] = { 2400, 9600, 19200, 38400, 57600, 115200, 576000, 1152000, 4000000, 16000000 }; /* bps */ @@ -333,7 +337,7 @@ EXPORT_SYMBOL(irda_init_max_qos_capabilies); * Adjust QoS settings in case some values are not possible to use because * of other settings */ -void irlap_adjust_qos_settings(struct qos_info *qos) +static void irlap_adjust_qos_settings(struct qos_info *qos) { __u32 line_capacity; int index; @@ -723,19 +727,22 @@ __u32 irlap_max_line_capacity(__u32 speed, __u32 max_turn_time) return line_capacity; } -__u32 irlap_requested_line_capacity(struct qos_info *qos) -{ __u32 line_capacity; - - line_capacity = qos->window_size.value * +#ifndef CONFIG_IRDA_DYNAMIC_WINDOW +static __u32 irlap_requested_line_capacity(struct qos_info *qos) +{ + __u32 line_capacity; + + line_capacity = qos->window_size.value * (qos->data_size.value + 6 + qos->additional_bofs.value) + - irlap_min_turn_time_in_bytes(qos->baud_rate.value, + irlap_min_turn_time_in_bytes(qos->baud_rate.value, qos->min_turn_time.value); - + IRDA_DEBUG(2, "%s(), requested line capacity=%d\n", __FUNCTION__, line_capacity); - - return line_capacity; + + return line_capacity; } +#endif void irda_qos_bits_to_value(struct qos_info *qos) { diff --git a/net/key/af_key.c b/net/key/af_key.c index ed9d9bebdd35..cc4a9c8c038c 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -35,7 +35,7 @@ /* List of all pfkey sockets. */ -HLIST_HEAD(pfkey_table); +static HLIST_HEAD(pfkey_table); static DECLARE_WAIT_QUEUE_HEAD(pfkey_table_wait); static rwlock_t pfkey_table_lock = RW_LOCK_UNLOCKED; static atomic_t pfkey_table_users = ATOMIC_INIT(0); diff --git a/net/llc/llc_c_ac.c b/net/llc/llc_c_ac.c index f04f96fe5080..6d1eefaa10da 100644 --- a/net/llc/llc_c_ac.c +++ b/net/llc/llc_c_ac.c @@ -33,6 +33,13 @@ static int llc_conn_ac_inc_vs_by_1(struct sock *sk, struct sk_buff *skb); static void llc_process_tmr_ev(struct sock *sk, struct sk_buff *skb); static int llc_conn_ac_data_confirm(struct sock *sk, struct sk_buff *ev); +static int llc_conn_ac_inc_npta_value(struct sock *sk, struct sk_buff *skb); + +static int llc_conn_ac_send_rr_rsp_f_set_ackpf(struct sock *sk, + struct sk_buff *skb); + +static int llc_conn_ac_set_p_flag_1(struct sock *sk, struct sk_buff *skb); + #define INCORRECT 0 int llc_conn_ac_clear_remote_busy(struct sock *sk, struct sk_buff *skb) @@ -185,11 +192,6 @@ int llc_conn_ac_rst_confirm(struct sock *sk, struct sk_buff *skb) return 0; } -int llc_conn_ac_report_status(struct sock *sk, struct sk_buff *skb) -{ - return 0; -} - int llc_conn_ac_clear_remote_busy_if_f_eq_1(struct sock *sk, struct sk_buff *skb) { @@ -291,32 +293,6 @@ free: goto out; } -int llc_conn_ac_send_dm_rsp_f_set_f_flag(struct sock *sk, struct sk_buff *skb) -{ - int rc = -ENOBUFS; - struct sk_buff *nskb = llc_alloc_frame(); - - if (nskb) { - struct llc_opt *llc = llc_sk(sk); - struct llc_sap *sap = llc->sap; - u8 f_bit = llc->f_flag; - - nskb->dev = llc->dev; - llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, sap->laddr.lsap, - llc->daddr.lsap, LLC_PDU_RSP); - llc_pdu_init_as_dm_rsp(nskb, f_bit); - rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac); - if (rc) - goto free; - llc_conn_send_pdu(sk, nskb); - } -out: - return rc; -free: - kfree_skb(nskb); - goto out; -} - int llc_conn_ac_send_frmr_rsp_f_set_x(struct sock *sk, struct sk_buff *skb) { u8 f_bit; @@ -426,7 +402,7 @@ int llc_conn_ac_send_i_cmd_p_set_1(struct sock *sk, struct sk_buff *skb) return rc; } -int llc_conn_ac_send_i_cmd_p_set_0(struct sock *sk, struct sk_buff *skb) +static int llc_conn_ac_send_i_cmd_p_set_0(struct sock *sk, struct sk_buff *skb) { int rc; struct llc_opt *llc = llc_sk(sk); @@ -443,27 +419,6 @@ int llc_conn_ac_send_i_cmd_p_set_0(struct sock *sk, struct sk_buff *skb) return rc; } -int llc_conn_ac_resend_i_cmd_p_set_1(struct sock *sk, struct sk_buff *skb) -{ - struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); - u8 nr = LLC_I_GET_NR(pdu); - - llc_conn_resend_i_pdu_as_cmd(sk, nr, 1); - return 0; -} - -int llc_conn_ac_resend_i_cmd_p_set_1_or_send_rr(struct sock *sk, - struct sk_buff *skb) -{ - struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); - u8 nr = LLC_I_GET_NR(pdu); - int rc = llc_conn_ac_send_rr_cmd_p_set_1(sk, skb); - - if (!rc) - llc_conn_resend_i_pdu_as_cmd(sk, nr, 0); - return rc; -} - int llc_conn_ac_send_i_xxx_x_set_0(struct sock *sk, struct sk_buff *skb) { int rc; @@ -745,31 +700,6 @@ free: goto out; } -int llc_conn_ac_send_ack_cmd_p_set_1(struct sock *sk, struct sk_buff *skb) -{ - int rc = -ENOBUFS; - struct sk_buff *nskb = llc_alloc_frame(); - - if (nskb) { - struct llc_opt *llc = llc_sk(sk); - struct llc_sap *sap = llc->sap; - - nskb->dev = llc->dev; - llc_pdu_header_init(nskb, LLC_PDU_TYPE_S, sap->laddr.lsap, - llc->daddr.lsap, LLC_PDU_CMD); - llc_pdu_init_as_rr_cmd(nskb, 1, llc->vR); - rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac); - if (rc) - goto free; - llc_conn_send_pdu(sk, nskb); - } -out: - return rc; -free: - kfree_skb(nskb); - goto out; -} - int llc_conn_ac_send_rr_rsp_f_set_1(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; @@ -911,31 +841,6 @@ free: goto out; } -int llc_conn_ac_send_ua_rsp_f_set_f_flag(struct sock *sk, struct sk_buff *skb) -{ - int rc = -ENOBUFS; - struct sk_buff *nskb = llc_alloc_frame(); - - if (nskb) { - struct llc_opt *llc = llc_sk(sk); - struct llc_sap *sap = llc->sap; - - nskb->dev = llc->dev; - llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, sap->laddr.lsap, - llc->daddr.lsap, LLC_PDU_RSP); - llc_pdu_init_as_ua_rsp(nskb, llc->f_flag); - rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac); - if (rc) - goto free; - llc_conn_send_pdu(sk, nskb); - } -out: - return rc; -free: - kfree_skb(nskb); - goto out; -} - int llc_conn_ac_send_ua_rsp_f_set_p(struct sock *sk, struct sk_buff *skb) { u8 f_bit; @@ -1041,7 +946,8 @@ int llc_conn_ac_rst_sendack_flag(struct sock *sk, struct sk_buff *skb) * set to one if one PDU with p-bit set to one is received. Returns 0 for * success, 1 otherwise. */ -int llc_conn_ac_send_i_rsp_f_set_ackpf(struct sock *sk, struct sk_buff *skb) +static int llc_conn_ac_send_i_rsp_f_set_ackpf(struct sock *sk, + struct sk_buff *skb) { int rc; struct llc_opt *llc = llc_sk(sk); @@ -1091,7 +997,8 @@ int llc_conn_ac_send_i_as_ack(struct sock *sk, struct sk_buff *skb) * if there is any. ack_pf flag indicates if a PDU has been received with * p-bit set to one. Returns 0 for success, 1 otherwise. */ -int llc_conn_ac_send_rr_rsp_f_set_ackpf(struct sock *sk, struct sk_buff *skb) +static int llc_conn_ac_send_rr_rsp_f_set_ackpf(struct sock *sk, + struct sk_buff *skb) { int rc = -ENOBUFS; struct sk_buff *nskb = llc_alloc_frame(); @@ -1126,7 +1033,7 @@ free: * acknowledgements decreases by increasing of "npta". Returns 0 for * success, 1 otherwise. */ -int llc_conn_ac_inc_npta_value(struct sock *sk, struct sk_buff *skb) +static int llc_conn_ac_inc_npta_value(struct sock *sk, struct sk_buff *skb) { struct llc_opt *llc = llc_sk(sk); @@ -1387,7 +1294,7 @@ int llc_conn_ac_set_p_flag_0(struct sock *sk, struct sk_buff *skb) return 0; } -int llc_conn_ac_set_p_flag_1(struct sock *sk, struct sk_buff *skb) +static int llc_conn_ac_set_p_flag_1(struct sock *sk, struct sk_buff *skb) { llc_conn_set_p_flag(sk, 1); return 0; @@ -1453,12 +1360,6 @@ int llc_conn_ac_inc_vs_by_1(struct sock *sk, struct sk_buff *skb) return 0; } -int llc_conn_ac_set_f_flag_p(struct sock *sk, struct sk_buff *skb) -{ - llc_pdu_decode_pf_bit(skb, &llc_sk(sk)->f_flag); - return 0; -} - void llc_conn_pf_cycle_tmr_cb(unsigned long timeout_data) { struct sock *sk = (struct sock *)timeout_data; diff --git a/net/llc/llc_c_ev.c b/net/llc/llc_c_ev.c index 77d630de2186..c7ed6c76cef3 100644 --- a/net/llc/llc_c_ev.c +++ b/net/llc/llc_c_ev.c @@ -105,14 +105,6 @@ int llc_conn_ev_conn_req(struct sock *sk, struct sk_buff *skb) ev->prim_type == LLC_PRIM_TYPE_REQ ? 0 : 1; } -int llc_conn_ev_conn_resp(struct sock *sk, struct sk_buff *skb) -{ - struct llc_conn_state_ev *ev = llc_conn_ev(skb); - - return ev->prim == LLC_CONN_PRIM && - ev->prim_type == LLC_PRIM_TYPE_RESP ? 0 : 1; -} - int llc_conn_ev_data_req(struct sock *sk, struct sk_buff *skb) { struct llc_conn_state_ev *ev = llc_conn_ev(skb); @@ -137,14 +129,6 @@ int llc_conn_ev_rst_req(struct sock *sk, struct sk_buff *skb) ev->prim_type == LLC_PRIM_TYPE_REQ ? 0 : 1; } -int llc_conn_ev_rst_resp(struct sock *sk, struct sk_buff *skb) -{ - struct llc_conn_state_ev *ev = llc_conn_ev(skb); - - return ev->prim == LLC_RESET_PRIM && - ev->prim_type == LLC_PRIM_TYPE_RESP ? 0 : 1; -} - int llc_conn_ev_local_busy_detected(struct sock *sk, struct sk_buff *skb) { struct llc_conn_state_ev *ev = llc_conn_ev(skb); @@ -474,27 +458,6 @@ int llc_conn_ev_rx_xxx_cmd_pbit_set_1(struct sock *sk, struct sk_buff *skb) return rc; } -int llc_conn_ev_rx_xxx_cmd_pbit_set_0(struct sock *sk, struct sk_buff *skb) -{ - u16 rc = 1; - struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); - - if (LLC_PDU_IS_CMD(pdu)) { - if (LLC_PDU_TYPE_IS_I(pdu) || LLC_PDU_TYPE_IS_S(pdu)) { - if (LLC_I_PF_IS_0(pdu)) - rc = 0; - } else if (LLC_PDU_TYPE_IS_U(pdu)) - switch (LLC_U_PDU_CMD(pdu)) { - case LLC_2_PDU_CMD_SABME: - case LLC_2_PDU_CMD_DISC: - if (LLC_U_PF_IS_0(pdu)) - rc = 0; - break; - } - } - return rc; -} - int llc_conn_ev_rx_xxx_cmd_pbit_set_x(struct sock *sk, struct sk_buff *skb) { u16 rc = 1; @@ -557,26 +520,6 @@ int llc_conn_ev_rx_xxx_rsp_fbit_set_x(struct sock *sk, struct sk_buff *skb) return rc; } -int llc_conn_ev_rx_xxx_yyy(struct sock *sk, struct sk_buff *skb) -{ - u16 rc = 1; - struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb); - - if (LLC_PDU_TYPE_IS_I(pdu) || LLC_PDU_TYPE_IS_S(pdu)) - rc = 0; - else if (LLC_PDU_TYPE_IS_U(pdu)) - switch (LLC_U_PDU_CMD(pdu)) { - case LLC_2_PDU_CMD_SABME: - case LLC_2_PDU_CMD_DISC: - case LLC_2_PDU_RSP_UA: - case LLC_2_PDU_RSP_DM: - case LLC_2_PDU_RSP_FRMR: - rc = 0; - break; - } - return rc; -} - int llc_conn_ev_rx_zzz_cmd_pbit_set_x_inval_nr(struct sock *sk, struct sk_buff *skb) { @@ -646,16 +589,6 @@ int llc_conn_ev_busy_tmr_exp(struct sock *sk, struct sk_buff *skb) return ev->type != LLC_CONN_EV_TYPE_BUSY_TMR; } -int llc_conn_ev_any_tmr_exp(struct sock *sk, struct sk_buff *skb) -{ - struct llc_conn_state_ev *ev = llc_conn_ev(skb); - - return ev->type == LLC_CONN_EV_TYPE_P_TMR || - ev->type == LLC_CONN_EV_TYPE_ACK_TMR || - ev->type == LLC_CONN_EV_TYPE_REJ_TMR || - ev->type == LLC_CONN_EV_TYPE_BUSY_TMR ? 0 : 1; -} - int llc_conn_ev_init_p_f_cycle(struct sock *sk, struct sk_buff *skb) { return 1; @@ -778,11 +711,6 @@ int llc_conn_ev_qlfy_cause_flag_eq_0(struct sock *sk, struct sk_buff *skb) return llc_sk(sk)->cause_flag; } -int llc_conn_ev_qlfy_init_p_f_cycle(struct sock *sk, struct sk_buff *skb) -{ - return 0; -} - int llc_conn_ev_qlfy_set_status_conn(struct sock *sk, struct sk_buff *skb) { struct llc_conn_state_ev *ev = llc_conn_ev(skb); @@ -799,14 +727,6 @@ int llc_conn_ev_qlfy_set_status_disc(struct sock *sk, struct sk_buff *skb) return 0; } -int llc_conn_ev_qlfy_set_status_impossible(struct sock *sk, struct sk_buff *skb) -{ - struct llc_conn_state_ev *ev = llc_conn_ev(skb); - - ev->status = LLC_STATUS_IMPOSSIBLE; - return 0; -} - int llc_conn_ev_qlfy_set_status_failed(struct sock *sk, struct sk_buff *skb) { struct llc_conn_state_ev *ev = llc_conn_ev(skb); @@ -824,14 +744,6 @@ int llc_conn_ev_qlfy_set_status_remote_busy(struct sock *sk, return 0; } -int llc_conn_ev_qlfy_set_status_received(struct sock *sk, struct sk_buff *skb) -{ - struct llc_conn_state_ev *ev = llc_conn_ev(skb); - - ev->status = LLC_STATUS_RECEIVED; - return 0; -} - int llc_conn_ev_qlfy_set_status_refuse(struct sock *sk, struct sk_buff *skb) { struct llc_conn_state_ev *ev = llc_conn_ev(skb); diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index 075364466ac1..9dd0f26a1534 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -503,7 +503,8 @@ found: * local mac, and local sap. Returns pointer for parent socket found, * %NULL otherwise. */ -struct sock *llc_lookup_listener(struct llc_sap *sap, struct llc_addr *laddr) +static struct sock *llc_lookup_listener(struct llc_sap *sap, + struct llc_addr *laddr) { struct sock *rc; struct hlist_node *node; @@ -546,7 +547,7 @@ u8 llc_data_accept_state(u8 state) * Finds offset of next category of transitions in transition table. * Returns the start index of next category. */ -u16 find_next_offset(struct llc_conn_state *state, u16 offset) +static u16 find_next_offset(struct llc_conn_state *state, u16 offset) { u16 cnt = 0; struct llc_conn_state_trans **next_trans; @@ -785,7 +786,7 @@ out_kfree_skb: * * Initializes a socket with default llc values. */ -int llc_sk_init(struct sock* sk) +static int llc_sk_init(struct sock* sk) { struct llc_opt *llc = kmalloc(sizeof(*llc), GFP_ATOMIC); int rc = -ENOMEM; diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c index e1140375e665..0b737bd77062 100644 --- a/net/llc/llc_core.c +++ b/net/llc/llc_core.c @@ -31,7 +31,7 @@ unsigned char llc_station_mac_sa[ETH_ALEN]; * * Allocates and initializes sap. */ -struct llc_sap *llc_sap_alloc(void) +static struct llc_sap *llc_sap_alloc(void) { struct llc_sap *sap = kmalloc(sizeof(*sap), GFP_ATOMIC); @@ -50,7 +50,7 @@ struct llc_sap *llc_sap_alloc(void) * * Adds a sap to the LLC's station sap list. */ -void llc_add_sap(struct llc_sap *sap) +static void llc_add_sap(struct llc_sap *sap) { write_lock_bh(&llc_sap_list_lock); list_add_tail(&sap->node, &llc_sap_list); @@ -63,7 +63,7 @@ void llc_add_sap(struct llc_sap *sap) * * Removes a sap to the LLC's station sap list. */ -void llc_del_sap(struct llc_sap *sap) +static void llc_del_sap(struct llc_sap *sap) { write_lock_bh(&llc_sap_list_lock); list_del(&sap->node); diff --git a/net/llc/llc_if.c b/net/llc/llc_if.c index 31253d2a3bc9..5949147018af 100644 --- a/net/llc/llc_if.c +++ b/net/llc/llc_if.c @@ -155,27 +155,3 @@ out: return rc; } -/** - * llc_build_and_send_reset_pkt - Resets an established LLC connection - * @prim: pointer to structure that contains service parameters. - * - * Called when upper layer wants to reset an established LLC connection - * with a remote machine. This function packages a proper event and sends - * it to connection component state machine. Returns 0 for success, 1 - * otherwise. - */ -int llc_build_and_send_reset_pkt(struct sock *sk) -{ - int rc = 1; - struct sk_buff *skb = alloc_skb(0, GFP_ATOMIC); - - if (skb) { - struct llc_conn_state_ev *ev = llc_conn_ev(skb); - - ev->type = LLC_CONN_EV_TYPE_PRIM; - ev->prim = LLC_RESET_PRIM; - ev->prim_type = LLC_PRIM_TYPE_REQ; - rc = llc_conn_state_process(sk, skb); - } - return rc; -} diff --git a/net/llc/llc_pdu.c b/net/llc/llc_pdu.c index 66344fabd84d..a28ce525d201 100644 --- a/net/llc/llc_pdu.c +++ b/net/llc/llc_pdu.c @@ -80,19 +80,6 @@ void llc_pdu_decode_pf_bit(struct sk_buff *skb, u8 *pf_bit) } /** - * llc_pdu_decode_cr_bit - extracts command response bit from LLC header - * @skb: input skb that c/r bit must be extracted from it. - * @cr_bit: command/response bit (0 or 1). - * - * This function extracts command/response bit from LLC header. this bit - * is right bit of source SAP. - */ -void llc_pdu_decode_cr_bit(struct sk_buff *skb, u8 *cr_bit) -{ - *cr_bit = llc_pdu_un_hdr(skb)->ssap & LLC_PDU_CMD_RSP_MASK; -} - -/** * llc_pdu_init_as_disc_cmd - Builds DISC PDU * @skb: Address of the skb to build * @p_bit: The P bit to set in the PDU diff --git a/net/llc/llc_proc.c b/net/llc/llc_proc.c index a991727b366a..c7de46e5cea4 100644 --- a/net/llc/llc_proc.c +++ b/net/llc/llc_proc.c @@ -185,14 +185,14 @@ out: return 0; } -struct seq_operations llc_seq_socket_ops = { +static struct seq_operations llc_seq_socket_ops = { .start = llc_seq_start, .next = llc_seq_next, .stop = llc_seq_stop, .show = llc_seq_socket_show, }; -struct seq_operations llc_seq_core_ops = { +static struct seq_operations llc_seq_core_ops = { .start = llc_seq_start, .next = llc_seq_next, .stop = llc_seq_stop, diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c index 0d42af0f6352..89ca222cfba5 100644 --- a/net/llc/llc_sap.c +++ b/net/llc/llc_sap.c @@ -173,7 +173,7 @@ out: * if needed(on receiving an UI frame). sk can be null for the * datalink_proto case. */ -void llc_sap_state_process(struct llc_sap *sap, struct sk_buff *skb) +static void llc_sap_state_process(struct llc_sap *sap, struct sk_buff *skb) { struct llc_sap_state_ev *ev = llc_sap_ev(skb); @@ -275,7 +275,8 @@ static void llc_sap_rcv(struct llc_sap *sap, struct sk_buff *skb) * Search socket list of the SAP and finds connection using the local * mac, and local sap. Returns pointer for socket found, %NULL otherwise. */ -struct sock *llc_lookup_dgram(struct llc_sap *sap, struct llc_addr *laddr) +static struct sock *llc_lookup_dgram(struct llc_sap *sap, + struct llc_addr *laddr) { struct sock *rc; struct hlist_node *node; diff --git a/net/llc/llc_station.c b/net/llc/llc_station.c index 76177cdcf591..8fe48a24bad5 100644 --- a/net/llc/llc_station.c +++ b/net/llc/llc_station.c @@ -642,7 +642,7 @@ static void llc_station_service_events(void) * Queues an event (on the station event queue) for handling by the * station state machine and attempts to process any queued-up events. */ -void llc_station_state_process(struct sk_buff *skb) +static void llc_station_state_process(struct sk_buff *skb) { spin_lock_bh(&llc_main_station.ev_q.lock); skb_queue_tail(&llc_main_station.ev_q.list, skb); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 360913944ae0..755a3b10f9e1 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -546,7 +546,7 @@ static void netlink_overrun(struct sock *sk) } } -struct sock *netlink_getsockbypid(struct sock *ssk, u32 pid) +static struct sock *netlink_getsockbypid(struct sock *ssk, u32 pid) { int protocol = ssk->sk_protocol; struct sock *sock; @@ -1210,30 +1210,6 @@ static rwlock_t nl_emu_lock = RW_LOCK_UNLOCKED; * Backward compatibility. */ -int netlink_attach(int unit, int (*function)(int, struct sk_buff *skb)) -{ - struct sock *sk = netlink_kernel_create(unit, NULL); - if (sk == NULL) - return -ENOBUFS; - nlk_sk(sk)->handler = function; - write_lock_bh(&nl_emu_lock); - netlink_kernel[unit] = sk->sk_socket; - write_unlock_bh(&nl_emu_lock); - return 0; -} - -void netlink_detach(int unit) -{ - struct socket *sock; - - write_lock_bh(&nl_emu_lock); - sock = netlink_kernel[unit]; - netlink_kernel[unit] = NULL; - write_unlock_bh(&nl_emu_lock); - - sock_release(sock); -} - int netlink_post(int unit, struct sk_buff *skb) { struct socket *sock; @@ -1522,7 +1498,5 @@ EXPORT_SYMBOL(netlink_unicast); EXPORT_SYMBOL(netlink_unregister_notifier); #if defined(CONFIG_NETLINK_DEV) || defined(CONFIG_NETLINK_DEV_MODULE) -EXPORT_SYMBOL(netlink_attach); -EXPORT_SYMBOL(netlink_detach); EXPORT_SYMBOL(netlink_post); #endif diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 62ff798bf306..8576b42ce7fa 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -43,7 +43,7 @@ #include <net/arp.h> #include <linux/init.h> -int nr_ndevs = 4; +static int nr_ndevs = 4; int sysctl_netrom_default_path_quality = NR_DEFAULT_QUAL; int sysctl_netrom_obsolescence_count_initialiser = NR_DEFAULT_OBS; diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index f667a29e1a07..d39549eb2fdd 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -45,7 +45,7 @@ static spinlock_t nr_node_list_lock = SPIN_LOCK_UNLOCKED; static HLIST_HEAD(nr_neigh_list); static spinlock_t nr_neigh_list_lock = SPIN_LOCK_UNLOCKED; -struct nr_node *nr_node_get(ax25_address *callsign) +static struct nr_node *nr_node_get(ax25_address *callsign) { struct nr_node *found = NULL; struct nr_node *nr_node; @@ -62,7 +62,8 @@ struct nr_node *nr_node_get(ax25_address *callsign) return found; } -struct nr_neigh *nr_neigh_get_dev(ax25_address *callsign, struct net_device *dev) +static struct nr_neigh *nr_neigh_get_dev(ax25_address *callsign, + struct net_device *dev) { struct nr_neigh *found = NULL; struct nr_neigh *nr_neigh; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 31bce98d6760..60e6d827436b 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -145,10 +145,10 @@ dev->hard_header == NULL (ll header is added by device, we cannot control it) */ /* List of all packet sockets. */ -HLIST_HEAD(packet_sklist); +static HLIST_HEAD(packet_sklist); static rwlock_t packet_sklist_lock = RW_LOCK_UNLOCKED; -atomic_t packet_socks_nr; +static atomic_t packet_socks_nr; /* Private packet socket structures. */ @@ -215,7 +215,7 @@ static inline char *packet_lookup_frame(struct packet_opt *po, unsigned int posi #define pkt_sk(__sk) ((struct packet_opt *)(__sk)->sk_protinfo) -void packet_sock_destruct(struct sock *sk) +static void packet_sock_destruct(struct sock *sk) { BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc)); BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc)); @@ -234,10 +234,10 @@ void packet_sock_destruct(struct sock *sk) } -extern struct proto_ops packet_ops; +static struct proto_ops packet_ops; #ifdef CONFIG_SOCK_PACKET -extern struct proto_ops packet_ops_spkt; +static struct proto_ops packet_ops_spkt; static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) { @@ -1350,8 +1350,8 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv } } -int packet_getsockopt(struct socket *sock, int level, int optname, - char __user *optval, int __user *optlen) +static int packet_getsockopt(struct socket *sock, int level, int optname, + char __user *optval, int __user *optlen) { int len; struct sock *sk = sock->sk; @@ -1500,7 +1500,8 @@ static int packet_ioctl(struct socket *sock, unsigned int cmd, #define packet_poll datagram_poll #else -unsigned int packet_poll(struct file * file, struct socket *sock, poll_table *wait) +static unsigned int packet_poll(struct file * file, struct socket *sock, + poll_table *wait) { struct sock *sk = sock->sk; struct packet_opt *po = pkt_sk(sk); @@ -1747,7 +1748,7 @@ out: #ifdef CONFIG_SOCK_PACKET -struct proto_ops packet_ops_spkt = { +static struct proto_ops packet_ops_spkt = { .family = PF_PACKET, .owner = THIS_MODULE, .release = packet_release, @@ -1769,7 +1770,7 @@ struct proto_ops packet_ops_spkt = { }; #endif -struct proto_ops packet_ops = { +static struct proto_ops packet_ops = { .family = PF_PACKET, .owner = THIS_MODULE, .release = packet_release, diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 4f6a9fab4e54..3d26b72b2b3a 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -59,7 +59,7 @@ int sysctl_rose_maximum_vcs = ROSE_DEFAULT_MAXVC; int sysctl_rose_window_size = ROSE_DEFAULT_WINDOW_SIZE; static HLIST_HEAD(rose_list); -spinlock_t rose_list_lock = SPIN_LOCK_UNLOCKED; +static spinlock_t rose_list_lock = SPIN_LOCK_UNLOCKED; static struct proto_ops rose_proto_ops; diff --git a/net/rose/rose_dev.c b/net/rose/rose_dev.c index d779e071b97d..98001cc84a2c 100644 --- a/net/rose/rose_dev.c +++ b/net/rose/rose_dev.c @@ -37,38 +37,6 @@ #include <net/ax25.h> #include <net/rose.h> -/* - * Only allow IP over ROSE frames through if the netrom device is up. - */ - -int rose_rx_ip(struct sk_buff *skb, struct net_device *dev) -{ - struct net_device_stats *stats = (struct net_device_stats *)dev->priv; - -#ifdef CONFIG_INET - if (!netif_running(dev)) { - stats->rx_errors++; - return 0; - } - - stats->rx_packets++; - stats->rx_bytes += skb->len; - - skb->protocol = htons(ETH_P_IP); - - /* Spoof incoming device */ - skb->dev = dev; - skb->h.raw = skb->data; - skb->nh.raw = skb->data; - skb->pkt_type = PACKET_HOST; - - ip_rcv(skb, skb->dev, NULL); -#else - kfree_skb(skb); -#endif - return 1; -} - static int rose_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, void *daddr, void *saddr, unsigned len) { diff --git a/net/rose/rose_link.c b/net/rose/rose_link.c index 8974778d70d7..09e9e9d04d92 100644 --- a/net/rose/rose_link.c +++ b/net/rose/rose_link.c @@ -31,6 +31,9 @@ static void rose_ftimer_expiry(unsigned long); static void rose_t0timer_expiry(unsigned long); +static void rose_transmit_restart_confirmation(struct rose_neigh *neigh); +static void rose_transmit_restart_request(struct rose_neigh *neigh); + void rose_start_ftimer(struct rose_neigh *neigh) { del_timer(&neigh->ftimer); @@ -42,7 +45,7 @@ void rose_start_ftimer(struct rose_neigh *neigh) add_timer(&neigh->ftimer); } -void rose_start_t0timer(struct rose_neigh *neigh) +static void rose_start_t0timer(struct rose_neigh *neigh) { del_timer(&neigh->t0timer); @@ -68,7 +71,7 @@ int rose_ftimer_running(struct rose_neigh *neigh) return timer_pending(&neigh->ftimer); } -int rose_t0timer_running(struct rose_neigh *neigh) +static int rose_t0timer_running(struct rose_neigh *neigh) { return timer_pending(&neigh->t0timer); } @@ -165,7 +168,7 @@ void rose_link_rx_restart(struct sk_buff *skb, struct rose_neigh *neigh, unsigne /* * This routine is called when a Restart Request is needed */ -void rose_transmit_restart_request(struct rose_neigh *neigh) +static void rose_transmit_restart_request(struct rose_neigh *neigh) { struct sk_buff *skb; unsigned char *dptr; @@ -194,7 +197,7 @@ void rose_transmit_restart_request(struct rose_neigh *neigh) /* * This routine is called when a Restart Confirmation is needed */ -void rose_transmit_restart_confirmation(struct rose_neigh *neigh) +static void rose_transmit_restart_confirmation(struct rose_neigh *neigh) { struct sk_buff *skb; unsigned char *dptr; @@ -219,34 +222,6 @@ void rose_transmit_restart_confirmation(struct rose_neigh *neigh) } /* - * This routine is called when a Diagnostic is required. - */ -void rose_transmit_diagnostic(struct rose_neigh *neigh, unsigned char diag) -{ - struct sk_buff *skb; - unsigned char *dptr; - int len; - - len = AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + ROSE_MIN_LEN + 2; - - if ((skb = alloc_skb(len, GFP_ATOMIC)) == NULL) - return; - - skb_reserve(skb, AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN); - - dptr = skb_put(skb, ROSE_MIN_LEN + 2); - - *dptr++ = AX25_P_ROSE; - *dptr++ = ROSE_GFI; - *dptr++ = 0x00; - *dptr++ = ROSE_DIAGNOSTIC; - *dptr++ = diag; - - if (!rose_send_frame(skb, neigh)) - kfree_skb(skb); -} - -/* * This routine is called when a Clear Request is needed outside of the context * of a connected socket. */ diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index eb0de0c2dc95..35591a0486cc 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -41,7 +41,7 @@ static unsigned int rose_neigh_no = 1; static struct rose_node *rose_node_list; static spinlock_t rose_node_list_lock = SPIN_LOCK_UNLOCKED; -struct rose_neigh *rose_neigh_list; +static struct rose_neigh *rose_neigh_list; static spinlock_t rose_neigh_list_lock = SPIN_LOCK_UNLOCKED; static struct rose_route *rose_route_list; static spinlock_t rose_route_list_lock = SPIN_LOCK_UNLOCKED; @@ -587,7 +587,7 @@ static int rose_clear_routes(void) /* * Check that the device given is a valid AX.25 interface that is "up". */ -struct net_device *rose_ax25_dev_get(char *devname) +static struct net_device *rose_ax25_dev_get(char *devname) { struct net_device *dev; diff --git a/net/rose/rose_subr.c b/net/rose/rose_subr.c index 4c3f3c7f08a8..e2cd9d8c9b5b 100644 --- a/net/rose/rose_subr.c +++ b/net/rose/rose_subr.c @@ -28,6 +28,8 @@ #include <linux/interrupt.h> #include <net/rose.h> +static int rose_create_facilities(unsigned char *buffer, rose_cb *rose); + /* * This routine purges all of the queues of frames. */ @@ -394,7 +396,7 @@ int rose_parse_facilities(unsigned char *p, return 1; } -int rose_create_facilities(unsigned char *buffer, rose_cb *rose) +static int rose_create_facilities(unsigned char *buffer, rose_cb *rose) { unsigned char *p = buffer + 1; char *callsign; diff --git a/net/rxrpc/call.c b/net/rxrpc/call.c index dfb65d87ff87..5cfd4cadee42 100644 --- a/net/rxrpc/call.c +++ b/net/rxrpc/call.c @@ -26,10 +26,10 @@ __RXACCT_DECL(atomic_t rxrpc_message_count); LIST_HEAD(rxrpc_calls); DECLARE_RWSEM(rxrpc_calls_sem); -unsigned rxrpc_call_rcv_timeout = HZ/3; -unsigned rxrpc_call_acks_timeout = HZ/3; -unsigned rxrpc_call_dfr_ack_timeout = HZ/20; -unsigned short rxrpc_call_max_resend = HZ/10; +unsigned rxrpc_call_rcv_timeout = HZ/3; +static unsigned rxrpc_call_acks_timeout = HZ/3; +static unsigned rxrpc_call_dfr_ack_timeout = HZ/20; +static unsigned short rxrpc_call_max_resend = HZ/10; const char *rxrpc_call_states[] = { "COMPLETE", @@ -58,7 +58,7 @@ const char *rxrpc_pkts[] = { "?09", "?10", "?11", "?12", "?13", "?14", "?15" }; -const char *rxrpc_acks[] = { +static const char *rxrpc_acks[] = { "---", "REQ", "DUP", "SEQ", "WIN", "MEM", "PNG", "PNR", "DLY", "IDL", "-?-" }; @@ -79,6 +79,9 @@ static int rxrpc_call_record_ACK(struct rxrpc_call *call, struct rxrpc_message *msg, rxrpc_seq_t seq, size_t count); + +static int rxrpc_call_flush(struct rxrpc_call *call); + #define _state(call) \ _debug("[[[ state %s ]]]", rxrpc_call_states[call->app_call_state]); @@ -2079,7 +2082,7 @@ int rxrpc_call_write_data(struct rxrpc_call *call, /* * flush outstanding packets to the network */ -int rxrpc_call_flush(struct rxrpc_call *call) +static int rxrpc_call_flush(struct rxrpc_call *call) { struct rxrpc_message *msg; int ret = 0; diff --git a/net/rxrpc/connection.c b/net/rxrpc/connection.c index a978007bff4d..61463c74f8cc 100644 --- a/net/rxrpc/connection.c +++ b/net/rxrpc/connection.c @@ -30,6 +30,8 @@ LIST_HEAD(rxrpc_conns); DECLARE_RWSEM(rxrpc_conns_sem); unsigned long rxrpc_conn_timeout = 60 * 60; +static void rxrpc_conn_do_timeout(struct rxrpc_connection *conn); + static void __rxrpc_conn_timeout(rxrpc_timer_t *timer) { struct rxrpc_connection *conn = @@ -415,7 +417,7 @@ void rxrpc_put_connection(struct rxrpc_connection *conn) /* * free a connection record */ -void rxrpc_conn_do_timeout(struct rxrpc_connection *conn) +static void rxrpc_conn_do_timeout(struct rxrpc_connection *conn) { struct rxrpc_peer *peer; diff --git a/net/rxrpc/internal.h b/net/rxrpc/internal.h index 752b6d71d017..70e52f6b0b64 100644 --- a/net/rxrpc/internal.h +++ b/net/rxrpc/internal.h @@ -73,7 +73,6 @@ extern struct list_head rxrpc_conns; extern struct rw_semaphore rxrpc_conns_sem; extern unsigned long rxrpc_conn_timeout; -extern void rxrpc_conn_do_timeout(struct rxrpc_connection *conn); extern void rxrpc_conn_clearall(struct rxrpc_peer *peer); /* @@ -89,8 +88,6 @@ extern void rxrpc_peer_calculate_rtt(struct rxrpc_peer *peer, extern void rxrpc_peer_clearall(struct rxrpc_transport *trans); -extern void rxrpc_peer_do_timeout(struct rxrpc_peer *peer); - /* * proc.c diff --git a/net/rxrpc/peer.c b/net/rxrpc/peer.c index e5c9f12d72db..ed38f5b17c1b 100644 --- a/net/rxrpc/peer.c +++ b/net/rxrpc/peer.c @@ -30,6 +30,8 @@ LIST_HEAD(rxrpc_peers); DECLARE_RWSEM(rxrpc_peers_sem); unsigned long rxrpc_peer_timeout = 12 * 60 * 60; +static void rxrpc_peer_do_timeout(struct rxrpc_peer *peer); + static void __rxrpc_peer_timeout(rxrpc_timer_t *timer) { struct rxrpc_peer *peer = @@ -259,7 +261,7 @@ void rxrpc_put_peer(struct rxrpc_peer *peer) * handle a peer timing out in the graveyard * - called from krxtimod */ -void rxrpc_peer_do_timeout(struct rxrpc_peer *peer) +static void rxrpc_peer_do_timeout(struct rxrpc_peer *peer) { struct rxrpc_transport *trans = peer->trans; diff --git a/net/rxrpc/rxrpc_syms.c b/net/rxrpc/rxrpc_syms.c index 76d95a3e2292..56adf16fed0c 100644 --- a/net/rxrpc/rxrpc_syms.c +++ b/net/rxrpc/rxrpc_syms.c @@ -23,7 +23,6 @@ EXPORT_SYMBOL(rxrpc_put_call); EXPORT_SYMBOL(rxrpc_call_abort); EXPORT_SYMBOL(rxrpc_call_read_data); EXPORT_SYMBOL(rxrpc_call_write_data); -EXPORT_SYMBOL(rxrpc_call_flush); /* connection.c */ EXPORT_SYMBOL(rxrpc_create_connection); diff --git a/net/rxrpc/transport.c b/net/rxrpc/transport.c index 7e015ceecee5..cd40f9c8befe 100644 --- a/net/rxrpc/transport.c +++ b/net/rxrpc/transport.c @@ -150,16 +150,6 @@ int rxrpc_create_transport(unsigned short port, /*****************************************************************************/ /* - * clear the connections on a transport endpoint - */ -void rxrpc_clear_transport(struct rxrpc_transport *trans) -{ - //struct rxrpc_connection *conn; - -} /* end rxrpc_clear_transport() */ - -/*****************************************************************************/ -/* * destroy a transport endpoint */ void rxrpc_put_transport(struct rxrpc_transport *trans) diff --git a/net/sched/gact.c b/net/sched/gact.c index 1cf0b1714391..a65d2d6689f6 100644 --- a/net/sched/gact.c +++ b/net/sched/gact.c @@ -68,7 +68,7 @@ gact_determ(struct tcf_gact *p) { } -g_rand gact_rand[MAX_RAND]= { NULL,gact_net_rand, gact_determ}; +static g_rand gact_rand[MAX_RAND]= { NULL,gact_net_rand, gact_determ}; #endif static int diff --git a/net/sched/police.c b/net/sched/police.c index 96da96b628fd..b8c54c5727e5 100644 --- a/net/sched/police.c +++ b/net/sched/police.c @@ -163,7 +163,7 @@ void tcf_police_destroy(struct tcf_police *p) } #ifdef CONFIG_NET_CLS_ACT -int tcf_act_police_locate(struct rtattr *rta, struct rtattr *est,struct tc_action *a, int ovr, int bind) +static int tcf_act_police_locate(struct rtattr *rta, struct rtattr *est,struct tc_action *a, int ovr, int bind) { unsigned h; int ret = 0; @@ -180,7 +180,8 @@ int tcf_act_police_locate(struct rtattr *rta, struct rtattr *est,struct tc_actio if (rtattr_parse(tb, TCA_POLICE_MAX, RTA_DATA(rta), RTA_PAYLOAD(rta)) < 0) return -1; - if (tb[TCA_POLICE_TBF-1] == NULL) + if (tb[TCA_POLICE_TBF-1] == NULL || + RTA_PAYLOAD(tb[TCA_POLICE_TBF-1]) != sizeof(*parm)) return -1; parm = RTA_DATA(tb[TCA_POLICE_TBF-1]); @@ -220,11 +221,17 @@ override: goto failure; } } - if (tb[TCA_POLICE_RESULT-1]) - p->result = *(int*)RTA_DATA(tb[TCA_POLICE_RESULT-1]); + if (tb[TCA_POLICE_RESULT-1]) { + if (RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32)) + goto failure; + p->result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]); + } #ifdef CONFIG_NET_ESTIMATOR - if (tb[TCA_POLICE_AVRATE-1]) + if (tb[TCA_POLICE_AVRATE-1]) { + if (RTA_PAYLOAD(tb[TCA_POLICE_AVRATE-1]) != sizeof(u32)) + goto failure; p->ewma_rate = *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]); + } #endif p->toks = p->burst = parm->burst; p->mtu = parm->mtu; @@ -265,7 +272,7 @@ failure: return -1; } -int tcf_act_police_cleanup(struct tc_action *a, int bind) +static int tcf_act_police_cleanup(struct tc_action *a, int bind) { struct tcf_police *p; p = PRIV(a); @@ -275,7 +282,7 @@ int tcf_act_police_cleanup(struct tc_action *a, int bind) return 0; } -int tcf_act_police(struct sk_buff **pskb, struct tc_action *a) +static int tcf_act_police(struct sk_buff **pskb, struct tc_action *a) { psched_time_t now; struct sk_buff *skb = *pskb; @@ -338,7 +345,7 @@ int tcf_act_police(struct sk_buff **pskb, struct tc_action *a) return p->action; } -int tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) +static int tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { unsigned char *b = skb->tail; struct tc_police opt; @@ -424,7 +431,8 @@ struct tcf_police * tcf_police_locate(struct rtattr *rta, struct rtattr *est) if (rtattr_parse(tb, TCA_POLICE_MAX, RTA_DATA(rta), RTA_PAYLOAD(rta)) < 0) return NULL; - if (tb[TCA_POLICE_TBF-1] == NULL) + if (tb[TCA_POLICE_TBF-1] == NULL || + RTA_PAYLOAD(tb[TCA_POLICE_TBF-1]) != sizeof(*parm)) return NULL; parm = RTA_DATA(tb[TCA_POLICE_TBF-1]); @@ -449,11 +457,17 @@ struct tcf_police * tcf_police_locate(struct rtattr *rta, struct rtattr *est) (p->P_tab = qdisc_get_rtab(&parm->peakrate, tb[TCA_POLICE_PEAKRATE-1])) == NULL) goto failure; } - if (tb[TCA_POLICE_RESULT-1]) - p->result = *(int*)RTA_DATA(tb[TCA_POLICE_RESULT-1]); + if (tb[TCA_POLICE_RESULT-1]) { + if (RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32)) + goto failure; + p->result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]); + } #ifdef CONFIG_NET_ESTIMATOR - if (tb[TCA_POLICE_AVRATE-1]) + if (tb[TCA_POLICE_AVRATE-1]) { + if (RTA_PAYLOAD(tb[TCA_POLICE_AVRATE-1]) != sizeof(u32)) + goto failure; p->ewma_rate = *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]); + } #endif p->toks = p->burst = parm->burst; p->mtu = parm->mtu; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index fff96b79613e..a8570f361d6d 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -207,7 +207,7 @@ struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) return NULL; } -struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid) +static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid) { unsigned long cl; struct Qdisc *leaf; @@ -226,7 +226,7 @@ struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid) /* Find queueing discipline by name */ -struct Qdisc_ops *qdisc_lookup_ops(struct rtattr *kind) +static struct Qdisc_ops *qdisc_lookup_ops(struct rtattr *kind) { struct Qdisc_ops *q = NULL; @@ -290,7 +290,7 @@ void qdisc_put_rtab(struct qdisc_rate_table *tab) /* Allocate an unique handle from space managed by kernel */ -u32 qdisc_alloc_handle(struct net_device *dev) +static u32 qdisc_alloc_handle(struct net_device *dev) { int i = 0x10000; static u32 autohandle = TC_H_MAKE(0x80000000U, 0); @@ -356,8 +356,9 @@ dev_graft_qdisc(struct net_device *dev, struct Qdisc *qdisc) Old qdisc is not destroyed but returned in *old. */ -int qdisc_graft(struct net_device *dev, struct Qdisc *parent, u32 classid, - struct Qdisc *new, struct Qdisc **old) +static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, + u32 classid, + struct Qdisc *new, struct Qdisc **old) { int err = 0; struct Qdisc *q = *old; diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 96203ed84151..19f4e4fa7f48 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -14,6 +14,7 @@ #include <linux/rtnetlink.h> #include <net/pkt_sched.h> #include <net/dsfield.h> +#include <net/inet_ecn.h> #include <asm/byteorder.h> @@ -198,10 +199,12 @@ static int dsmark_enqueue(struct sk_buff *skb,struct Qdisc *sch) /* FIXME: Safe with non-linear skbs? --RR */ switch (skb->protocol) { case __constant_htons(ETH_P_IP): - skb->tc_index = ipv4_get_dsfield(skb->nh.iph); + skb->tc_index = ipv4_get_dsfield(skb->nh.iph) + & ~INET_ECN_MASK; break; case __constant_htons(ETH_P_IPV6): - skb->tc_index = ipv6_get_dsfield(skb->nh.ipv6h); + skb->tc_index = ipv6_get_dsfield(skb->nh.ipv6h) + & ~INET_ECN_MASK; break; default: skb->tc_index = 0; @@ -320,7 +323,7 @@ static unsigned int dsmark_drop(struct Qdisc *sch) } -int dsmark_init(struct Qdisc *sch,struct rtattr *opt) +static int dsmark_init(struct Qdisc *sch,struct rtattr *opt) { struct dsmark_qdisc_data *p = PRIV(sch); struct rtattr *tb[TCA_DSMARK_MAX]; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 1b9180944904..35e87b35a33f 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -283,7 +283,7 @@ struct Qdisc noop_qdisc = { .list = LIST_HEAD_INIT(noop_qdisc.list), }; -struct Qdisc_ops noqueue_qdisc_ops = { +static struct Qdisc_ops noqueue_qdisc_ops = { .next = NULL, .cl_ops = NULL, .id = "noqueue", @@ -294,7 +294,7 @@ struct Qdisc_ops noqueue_qdisc_ops = { .owner = THIS_MODULE, }; -struct Qdisc noqueue_qdisc = { +static struct Qdisc noqueue_qdisc = { .enqueue = NULL, .dequeue = noop_dequeue, .flags = TCQ_F_BUILTIN, diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 2de5a8a01481..25fa2c06f139 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -71,7 +71,7 @@ #define HTB_HSIZE 16 /* classid hash size */ #define HTB_EWMAC 2 /* rate average over HTB_EWMAC*HTB_HSIZE sec */ -#define HTB_DEBUG 1 /* compile debugging support (activated by tc tool) */ +#undef HTB_DEBUG /* compile debugging support (activated by tc tool) */ #define HTB_RATECM 1 /* whether to use rate computer */ #define HTB_HYSTERESIS 1/* whether to use mode hysteresis for speedup */ #define HTB_QLOCK(S) spin_lock_bh(&(S)->dev->queue_lock) diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index 7ea1d35e1cce..fd751a57fff1 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -274,7 +274,7 @@ static struct nf_hook_ops ing_ops = { #endif #endif -int ingress_init(struct Qdisc *sch,struct rtattr *opt) +static int ingress_init(struct Qdisc *sch,struct rtattr *opt) { struct ingress_qdisc_data *p = PRIV(sch); diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 7804835a2d37..2f06270cad4b 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -47,7 +47,8 @@ struct prio_sched_data }; -struct Qdisc *prio_classify(struct sk_buff *skb, struct Qdisc *sch,int *r) +static struct Qdisc *prio_classify(struct sk_buff *skb, + struct Qdisc *sch, int *r) { struct prio_sched_data *q = qdisc_priv(sch); u32 band = skb->priority; diff --git a/net/sctp/input.c b/net/sctp/input.c index 0268f75c41e5..cab833371a54 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -393,7 +393,7 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info) struct sctp_endpoint *ep; struct sctp_association *asoc; struct sctp_transport *transport; - struct inet_opt *inet; + struct inet_sock *inet; char *saveip, *savesctp; int err; diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index f657c4deedd9..e142681824ca 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -580,9 +580,9 @@ static sctp_scope_t sctp_v6_scope(union sctp_addr *addr) struct sock *sctp_v6_create_accept_sk(struct sock *sk, struct sctp_association *asoc) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct sock *newsk; - struct inet_opt *newinet; + struct inet_sock *newinet; struct ipv6_pinfo *newnp, *np = inet6_sk(sk); struct sctp6_sock *newsctp6sk; @@ -608,7 +608,7 @@ struct sock *sctp_v6_create_accept_sk(struct sock *sk, newsk->sk_shutdown = sk->sk_shutdown; newsctp6sk = (struct sctp6_sock *)newsk; - newsctp6sk->pinet6 = &newsctp6sk->inet6; + newsctp6sk->inet.pinet6 = &newsctp6sk->inet6; newinet = inet_sk(newsk); newnp = inet6_sk(newsk); diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 1f693e954621..a8a3f3d37efc 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -551,8 +551,8 @@ struct sock *sctp_v4_create_accept_sk(struct sock *sk, struct sctp_association *asoc) { struct sock *newsk; - struct inet_opt *inet = inet_sk(sk); - struct inet_opt *newinet; + struct inet_sock *inet = inet_sk(sk); + struct inet_sock *newinet; newsk = sk_alloc(PF_INET, GFP_KERNEL, sk->sk_prot->slab_obj_size, sk->sk_prot->slab); diff --git a/net/socket.c b/net/socket.c index 45aada9314a1..dff6156dd4f8 100644 --- a/net/socket.c +++ b/net/socket.c @@ -736,8 +736,9 @@ ssize_t sock_sendpage(struct file *file, struct page *page, return sock->ops->sendpage(sock, page, offset, size, flags); } -int sock_readv_writev(int type, struct inode * inode, struct file * file, - const struct iovec * iov, long count, size_t size) +static int sock_readv_writev(int type, struct inode * inode, + struct file * file, const struct iovec * iov, + long count, size_t size) { struct msghdr msg; struct socket *sock; diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 315b16247b8c..91d325e62953 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -25,7 +25,7 @@ static struct rpc_authops * auth_flavors[RPC_AUTH_MAXFLAVOR] = { NULL, /* others can be loadable modules */ }; -u32 +static u32 pseudoflavor_to_flavor(u32 flavor) { if (flavor >= RPC_AUTH_MAXFLAVOR) return RPC_AUTH_GSS; diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 927b72c89fb3..a6260098e836 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -532,7 +532,7 @@ gss_pipe_release(struct inode *inode) spin_unlock(&gss_auth->lock); } -void +static void gss_pipe_destroy_msg(struct rpc_pipe_msg *msg) { struct gss_upcall_msg *gss_msg = container_of(msg, struct gss_upcall_msg, msg); diff --git a/net/sunrpc/auth_gss/gss_generic_token.c b/net/sunrpc/auth_gss/gss_generic_token.c index b0951d11522a..826df44e7fca 100644 --- a/net/sunrpc/auth_gss/gss_generic_token.c +++ b/net/sunrpc/auth_gss/gss_generic_token.c @@ -233,38 +233,3 @@ g_verify_token_header(struct xdr_netobj *mech, int *body_size, EXPORT_SYMBOL(g_verify_token_header); -/* Given a buffer containing a token, returns a copy of the mech oid in - * the parameter mech. */ -u32 -g_get_mech_oid(struct xdr_netobj *mech, struct xdr_netobj * in_buf) -{ - unsigned char *buf = in_buf->data; - int len = in_buf->len; - int ret=0; - int seqsize; - - if ((len-=1) < 0) - return(G_BAD_TOK_HEADER); - if (*buf++ != 0x60) - return(G_BAD_TOK_HEADER); - - if ((seqsize = der_read_length(&buf, &len)) < 0) - return(G_BAD_TOK_HEADER); - - if ((len-=1) < 0) - return(G_BAD_TOK_HEADER); - if (*buf++ != 0x06) - return(G_BAD_TOK_HEADER); - - if ((len-=1) < 0) - return(G_BAD_TOK_HEADER); - mech->len = *buf++; - - if ((len-=mech->len) < 0) - return(G_BAD_TOK_HEADER); - if (!(mech->data = kmalloc(mech->len, GFP_KERNEL))) - return(G_BUFFER_ALLOC); - memcpy(mech->data, buf, mech->len); - - return ret; -} diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c index b4db91868bb8..24c21f2a33a7 100644 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c @@ -132,7 +132,7 @@ out: EXPORT_SYMBOL(krb5_decrypt); -void +static void buf_to_sg(struct scatterlist *sg, char *ptr, int len) { sg->page = virt_to_page(ptr); sg->offset = offset_in_page(ptr); diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index 0082fb5c999a..a734dd869518 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -48,9 +48,6 @@ # define RPCDBG_FACILITY RPCDBG_AUTH #endif -struct xdr_netobj gss_mech_krb5_oid = - {9, "\052\206\110\206\367\022\001\002\002"}; - static inline int get_bytes(char **ptr, const char *end, void *res, int len) { diff --git a/net/sunrpc/auth_gss/gss_pseudoflavors.c b/net/sunrpc/auth_gss/gss_pseudoflavors.c deleted file mode 100644 index e7b9aa9c9c90..000000000000 --- a/net/sunrpc/auth_gss/gss_pseudoflavors.c +++ /dev/null @@ -1,237 +0,0 @@ -/* - * linux/net/sunrpc/gss_union.c - * - * Adapted from MIT Kerberos 5-1.2.1 lib/gssapi/generic code - * - * Copyright (c) 2001 The Regents of the University of Michigan. - * All rights reserved. - * - * Andy Adamson <andros@umich.edu> - * - */ - -/* - * Copyright 1993 by OpenVision Technologies, Inc. - * - * Permission to use, copy, modify, distribute, and sell this software - * and its documentation for any purpose is hereby granted without fee, - * provided that the above copyright notice appears in all copies and - * that both that copyright notice and this permission notice appear in - * supporting documentation, and that the name of OpenVision not be used - * in advertising or publicity pertaining to distribution of the software - * without specific, written prior permission. OpenVision makes no - * representations about the suitability of this software for any - * purpose. It is provided "as is" without express or implied warranty. - * - * OPENVISION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, - * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO - * EVENT SHALL OPENVISION BE LIABLE FOR ANY SPECIAL, INDIRECT OR - * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF - * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR - * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR - * PERFORMANCE OF THIS SOFTWARE. - */ - -#include <linux/types.h> -#include <linux/slab.h> -#include <linux/socket.h> -#include <linux/sunrpc/gss_asn1.h> -#include <linux/sunrpc/auth_gss.h> - -#ifdef RPC_DEBUG -# define RPCDBG_FACILITY RPCDBG_AUTH -#endif - -static LIST_HEAD(registered_triples); -static spinlock_t registered_triples_lock = SPIN_LOCK_UNLOCKED; - -/* The following must be called with spinlock held: */ -static struct sup_sec_triple * -do_lookup_triple_by_pseudoflavor(u32 pseudoflavor) -{ - struct sup_sec_triple *pos, *triple = NULL; - - list_for_each_entry(pos, ®istered_triples, triples) { - if (pos->pseudoflavor == pseudoflavor) { - triple = pos; - break; - } - } - return triple; -} - -/* XXX Need to think about reference counting of triples and of mechs. - * Currently we do no reference counting of triples, and I think that's - * probably OK given the reference counting on mechs, but there's probably - * a better way to do all this. */ - -int -gss_register_triple(u32 pseudoflavor, struct gss_api_mech *mech, - u32 qop, u32 service) -{ - struct sup_sec_triple *triple; - - if (!(triple = kmalloc(sizeof(*triple), GFP_KERNEL))) { - printk("Alloc failed in gss_register_triple"); - goto err; - } - triple->pseudoflavor = pseudoflavor; - triple->mech = gss_mech_get_by_OID(&mech->gm_oid); - triple->qop = qop; - triple->service = service; - - spin_lock(®istered_triples_lock); - if (do_lookup_triple_by_pseudoflavor(pseudoflavor)) { - printk(KERN_WARNING "RPC: Registered pseudoflavor %d again\n", - pseudoflavor); - goto err_unlock; - } - list_add(&triple->triples, ®istered_triples); - spin_unlock(®istered_triples_lock); - dprintk("RPC: registered pseudoflavor %d\n", pseudoflavor); - - return 0; - -err_unlock: - kfree(triple); - spin_unlock(®istered_triples_lock); -err: - return -1; -} - -int -gss_unregister_triple(u32 pseudoflavor) -{ - struct sup_sec_triple *triple; - - spin_lock(®istered_triples_lock); - if (!(triple = do_lookup_triple_by_pseudoflavor(pseudoflavor))) { - spin_unlock(®istered_triples_lock); - printk("Can't unregister unregistered pseudoflavor %d\n", - pseudoflavor); - return -1; - } - list_del(&triple->triples); - spin_unlock(®istered_triples_lock); - gss_mech_put(triple->mech); - kfree(triple); - return 0; - -} - -void -print_sec_triple(struct xdr_netobj *oid,u32 qop,u32 service) -{ - dprintk("RPC: print_sec_triple:\n"); - dprintk(" oid_len %d\n oid :\n",oid->len); - print_hexl((u32 *)oid->data,oid->len,0); - dprintk(" qop %d\n",qop); - dprintk(" service %d\n",service); -} - -/* Function: gss_get_cmp_triples - * - * Description: search sec_triples for a matching security triple - * return pseudoflavor if match, else 0 - * (Note that 0 is a valid pseudoflavor, but not for any gss pseudoflavor - * (0 means auth_null), so this shouldn't cause confusion.) - */ -u32 -gss_cmp_triples(u32 oid_len, char *oid_data, u32 qop, u32 service) -{ - struct sup_sec_triple *triple; - u32 pseudoflavor = 0; - struct xdr_netobj oid; - - oid.len = oid_len; - oid.data = oid_data; - - dprintk("RPC: gss_cmp_triples\n"); - print_sec_triple(&oid,qop,service); - - spin_lock(®istered_triples_lock); - list_for_each_entry(triple, ®istered_triples, triples) { - if((g_OID_equal(&oid, &triple->mech->gm_oid)) - && (qop == triple->qop) - && (service == triple->service)) { - pseudoflavor = triple->pseudoflavor; - break; - } - } - spin_unlock(®istered_triples_lock); - dprintk("RPC: gss_cmp_triples return %d\n", pseudoflavor); - return pseudoflavor; -} - -u32 -gss_get_pseudoflavor(struct gss_ctx *ctx, u32 qop, u32 service) -{ - return gss_cmp_triples(ctx->mech_type->gm_oid.len, - ctx->mech_type->gm_oid.data, - qop, service); -} - -/* Returns nonzero iff the given pseudoflavor is in the supported list. - * (Note that without incrementing a reference count or anything, this - * doesn't give any guarantees.) */ -int -gss_pseudoflavor_supported(u32 pseudoflavor) -{ - struct sup_sec_triple *triple; - - spin_lock(®istered_triples_lock); - triple = do_lookup_triple_by_pseudoflavor(pseudoflavor); - spin_unlock(®istered_triples_lock); - return (triple ? 1 : 0); -} - -u32 -gss_pseudoflavor_to_service(u32 pseudoflavor) -{ - struct sup_sec_triple *triple; - - spin_lock(®istered_triples_lock); - triple = do_lookup_triple_by_pseudoflavor(pseudoflavor); - spin_unlock(®istered_triples_lock); - if (!triple) { - dprintk("RPC: gss_pseudoflavor_to_service called with unsupported pseudoflavor %d\n", - pseudoflavor); - return 0; - } - return triple->service; -} - -struct gss_api_mech * -gss_pseudoflavor_to_mech(u32 pseudoflavor) { - struct sup_sec_triple *triple; - struct gss_api_mech *mech = NULL; - - spin_lock(®istered_triples_lock); - triple = do_lookup_triple_by_pseudoflavor(pseudoflavor); - spin_unlock(®istered_triples_lock); - if (triple) - mech = gss_mech_get(triple->mech); - else - dprintk("RPC: gss_pseudoflavor_to_mech called with unsupported pseudoflavor %d\n", - pseudoflavor); - return mech; -} - -int -gss_pseudoflavor_to_mechOID(u32 pseudoflavor, struct xdr_netobj * oid) -{ - struct gss_api_mech *mech; - - mech = gss_pseudoflavor_to_mech(pseudoflavor); - if (!mech) { - dprintk("RPC: gss_pseudoflavor_to_mechOID called with unsupported pseudoflavor %d\n", - pseudoflavor); - return -1; - } - oid->len = mech->gm_oid.len; - if (!(oid->data = kmalloc(oid->len, GFP_KERNEL))) - return -1; - memcpy(oid->data, mech->gm_oid.data, oid->len); - gss_mech_put(mech); - return 0; -} diff --git a/net/sunrpc/auth_gss/gss_spkm3_mech.c b/net/sunrpc/auth_gss/gss_spkm3_mech.c index 2887de1cae1c..fd213dc36093 100644 --- a/net/sunrpc/auth_gss/gss_spkm3_mech.c +++ b/net/sunrpc/auth_gss/gss_spkm3_mech.c @@ -49,9 +49,6 @@ # define RPCDBG_FACILITY RPCDBG_AUTH #endif -struct xdr_netobj gss_mech_spkm3_oid = - {7, "\053\006\001\005\005\001\003"}; - static inline int get_bytes(char **ptr, const char *end, void *res, int len) { @@ -206,7 +203,7 @@ out_err: return GSS_S_FAILURE; } -void +static void gss_delete_sec_context_spkm3(void *internal_ctx) { struct spkm3_ctx *sctx = internal_ctx; @@ -221,7 +218,7 @@ gss_delete_sec_context_spkm3(void *internal_ctx) { kfree(sctx); } -u32 +static u32 gss_verify_mic_spkm3(struct gss_ctx *ctx, struct xdr_buf *signbuf, struct xdr_netobj *checksum, @@ -241,7 +238,7 @@ gss_verify_mic_spkm3(struct gss_ctx *ctx, return maj_stat; } -u32 +static u32 gss_get_mic_spkm3(struct gss_ctx *ctx, u32 qop, struct xdr_buf *message_buffer, diff --git a/net/sunrpc/auth_gss/sunrpcgss_syms.c b/net/sunrpc/auth_gss/sunrpcgss_syms.c deleted file mode 100644 index ef45f93e5b95..000000000000 --- a/net/sunrpc/auth_gss/sunrpcgss_syms.c +++ /dev/null @@ -1,37 +0,0 @@ -#include <linux/config.h> -#include <linux/module.h> - -#include <linux/types.h> -#include <linux/socket.h> -#include <linux/sched.h> -#include <linux/uio.h> -#include <linux/unistd.h> - -#include <linux/sunrpc/auth_gss.h> -#include <linux/sunrpc/svcauth_gss.h> -#include <linux/sunrpc/gss_asn1.h> -#include <linux/sunrpc/gss_krb5.h> - -/* svcauth_gss.c: */ -EXPORT_SYMBOL(svcauth_gss_register_pseudoflavor); - -/* registering gss mechanisms to the mech switching code: */ -EXPORT_SYMBOL(gss_mech_register); -EXPORT_SYMBOL(gss_mech_unregister); -EXPORT_SYMBOL(gss_mech_get); -EXPORT_SYMBOL(gss_mech_get_by_pseudoflavor); -EXPORT_SYMBOL(gss_mech_get_by_name); -EXPORT_SYMBOL(gss_mech_put); -EXPORT_SYMBOL(gss_pseudoflavor_to_service); -EXPORT_SYMBOL(gss_service_to_auth_domain_name); - -/* generic functionality in gss code: */ -EXPORT_SYMBOL(g_make_token_header); -EXPORT_SYMBOL(g_verify_token_header); -EXPORT_SYMBOL(g_token_size); -EXPORT_SYMBOL(make_checksum); -EXPORT_SYMBOL(krb5_encrypt); -EXPORT_SYMBOL(krb5_decrypt); - -/* debug */ -EXPORT_SYMBOL(print_hexl); diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index ee2e9cef94c2..43096520a604 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -448,7 +448,7 @@ static struct cache_detail rsc_cache = { static DefineSimpleCacheLookup(rsc, 0); -struct rsc * +static struct rsc * gss_svc_searchbyctx(struct xdr_netobj *handle) { struct rsc rsci; @@ -1045,7 +1045,7 @@ svcauth_gss_domain_release(struct auth_domain *dom) kfree(gd); } -struct auth_ops svcauthops_gss = { +static struct auth_ops svcauthops_gss = { .name = "rpcsec_gss", .owner = THIS_MODULE, .flavour = RPC_AUTH_GSS, diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index d508a513d632..8845b9e3d735 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -33,6 +33,9 @@ #define RPCDBG_FACILITY RPCDBG_CACHE +static void cache_defer_req(struct cache_req *req, struct cache_head *item); +static void cache_revisit_request(struct cache_head *item); + void cache_init(struct cache_head *h) { time_t now = get_seconds(); @@ -256,39 +259,13 @@ int cache_unregister(struct cache_detail *cd) return 0; } -struct cache_detail *cache_find(char *name) -{ - struct list_head *l; - - spin_lock(&cache_list_lock); - list_for_each(l, &cache_list) { - struct cache_detail *cd = list_entry(l, struct cache_detail, others); - - if (strcmp(cd->name, name)==0) { - atomic_inc(&cd->inuse); - spin_unlock(&cache_list_lock); - return cd; - } - } - spin_unlock(&cache_list_lock); - return NULL; -} - -/* cache_drop must be called on any cache returned by - * cache_find, after it has been used - */ -void cache_drop(struct cache_detail *detail) -{ - atomic_dec(&detail->inuse); -} - /* clean cache tries to find something to clean * and cleans it. * It returns 1 if it cleaned something, * 0 if it didn't find anything this time * -1 if it fell off the end of the list. */ -int cache_clean(void) +static int cache_clean(void) { int rv = 0; struct list_head *next; @@ -428,12 +405,12 @@ void cache_purge(struct cache_detail *detail) #define DFR_MAX 300 /* ??? */ -spinlock_t cache_defer_lock = SPIN_LOCK_UNLOCKED; +static spinlock_t cache_defer_lock = SPIN_LOCK_UNLOCKED; static LIST_HEAD(cache_defer_list); static struct list_head cache_defer_hash[DFR_HASHSIZE]; static int cache_defer_cnt; -void cache_defer_req(struct cache_req *req, struct cache_head *item) +static void cache_defer_req(struct cache_req *req, struct cache_head *item) { struct cache_deferred_req *dreq; int hash = DFR_HASH(item); @@ -483,7 +460,7 @@ void cache_defer_req(struct cache_req *req, struct cache_head *item) } } -void cache_revisit_request(struct cache_head *item) +static void cache_revisit_request(struct cache_head *item) { struct cache_deferred_req *dreq; struct list_head pending; @@ -902,7 +879,7 @@ void qword_addhex(char **bpp, int *lp, char *buf, int blen) *lp = len; } -void warn_no_listener(struct cache_detail *detail) +static void warn_no_listener(struct cache_detail *detail) { if (detail->last_warn != detail->last_close) { detail->last_warn = detail->last_close; @@ -1119,7 +1096,7 @@ static int c_show(struct seq_file *m, void *p) return cd->cache_show(m, cd, cp); } -struct seq_operations cache_content_op = { +static struct seq_operations cache_content_op = { .start = c_start, .next = c_next, .stop = c_stop, diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c index 61bdc9754a06..5e157d65bbe7 100644 --- a/net/sunrpc/pmap_clnt.c +++ b/net/sunrpc/pmap_clnt.c @@ -31,7 +31,7 @@ static struct rpc_procinfo pmap_procedures[]; static struct rpc_clnt * pmap_create(char *, struct sockaddr_in *, int); static void pmap_getport_done(struct rpc_task *); -extern struct rpc_program pmap_program; +static struct rpc_program pmap_program; static spinlock_t pmap_lock = SPIN_LOCK_UNLOCKED; /* @@ -292,7 +292,7 @@ static struct rpc_version * pmap_version[] = { static struct rpc_stat pmap_stats; -struct rpc_program pmap_program = { +static struct rpc_program pmap_program = { .name = "portmap", .number = RPC_PMAP_PROGRAM, .nrvers = ARRAY_SIZE(pmap_version), diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 11872a8a94be..7b3d9647671f 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -276,12 +276,7 @@ rpc_pipe_ioctl(struct inode *ino, struct file *filp, } } -struct inode_operations rpc_pipe_iops = { - .lookup = simple_lookup, -}; - - -struct file_operations rpc_pipe_fops = { +static struct file_operations rpc_pipe_fops = { .owner = THIS_MODULE, .llseek = no_llseek, .read = rpc_pipe_read, @@ -595,7 +590,7 @@ __rpc_rmdir(struct inode *dir, struct dentry *dentry) return 0; } -struct dentry * +static struct dentry * rpc_lookup_negative(char *path, struct nameidata *nd) { struct dentry *dentry; diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index aa2205a2c19a..c5c8d5d59a0f 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -42,6 +42,9 @@ static mempool_t *rpc_buffer_mempool; static void __rpc_default_timer(struct rpc_task *task); static void rpciod_killall(void); +static void rpc_free(struct rpc_task *task); + + /* * When an asynchronous RPC task is activated within a bottom half * handler, or while executing another RPC task, it is put on @@ -837,7 +840,7 @@ rpc_malloc(struct rpc_task *task, size_t size) return task->tk_buffer; } -void +static void rpc_free(struct rpc_task *task) { if (task->tk_buffer) { diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index 51f0392ae531..1876751172be 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -107,7 +107,6 @@ EXPORT_SYMBOL(auth_unix_add_addr); EXPORT_SYMBOL(auth_unix_forget_old); EXPORT_SYMBOL(auth_unix_lookup); EXPORT_SYMBOL(cache_check); -EXPORT_SYMBOL(cache_clean); EXPORT_SYMBOL(cache_flush); EXPORT_SYMBOL(cache_purge); EXPORT_SYMBOL(cache_fresh); diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c index d9f09e61d83c..8a659ac5143a 100644 --- a/net/sunrpc/svcauth.c +++ b/net/sunrpc/svcauth.c @@ -128,7 +128,8 @@ EXPORT_SYMBOL(svc_auth_unregister); #define DN_HASHMASK (DN_HASHMAX-1) static struct cache_head *auth_domain_table[DN_HASHMAX]; -void auth_domain_drop(struct cache_head *item, struct cache_detail *cd) + +static void auth_domain_drop(struct cache_head *item, struct cache_detail *cd) { struct auth_domain *dom = container_of(item, struct auth_domain, h); if (cache_put(item,cd)) diff --git a/net/sunrpc/svcauth_des.c b/net/sunrpc/svcauth_des.c deleted file mode 100644 index e4ac9b1f2fdc..000000000000 --- a/net/sunrpc/svcauth_des.c +++ /dev/null @@ -1,215 +0,0 @@ -/* - * linux/net/sunrpc/svcauth_des.c - * - * Server-side AUTH_DES handling. - * - * Copyright (C) 1996, 1997 Olaf Kirch <okir@monad.swb.de> - */ - -#include <linux/types.h> -#include <linux/sched.h> -#include <linux/sunrpc/types.h> -#include <linux/sunrpc/xdr.h> -#include <linux/sunrpc/svcauth.h> -#include <linux/sunrpc/svcsock.h> - -#define RPCDBG_FACILITY RPCDBG_AUTH - -/* - * DES cedential cache. - * The cache is indexed by fullname/key to allow for multiple sessions - * by the same user from different hosts. - * It would be tempting to use the client's IP address rather than the - * conversation key as an index, but that could become problematic for - * multi-homed hosts that distribute traffic across their interfaces. - */ -struct des_cred { - struct des_cred * dc_next; - char * dc_fullname; - u32 dc_nickname; - des_cblock dc_key; /* conversation key */ - des_cblock dc_xkey; /* encrypted conv. key */ - des_key_schedule dc_keysched; -}; - -#define ADN_FULLNAME 0 -#define ADN_NICKNAME 1 - -/* - * The default slack allowed when checking for replayed credentials - * (in milliseconds). - */ -#define DES_REPLAY_SLACK 2000 - -/* - * Make sure we don't place more than one call to the key server at - * a time. - */ -static int in_keycall; - -#define FAIL(err) \ - { if (data) put_cred(data); \ - *authp = rpc_autherr_##err; \ - return; \ - } - -void -svcauth_des(struct svc_rqst *rqstp, u32 *statp, u32 *authp) -{ - struct svc_buf *argp = &rqstp->rq_argbuf; - struct svc_buf *resp = &rqstp->rq_resbuf; - struct svc_cred *cred = &rqstp->rq_cred; - struct des_cred *data = NULL; - u32 cryptkey[2]; - u32 cryptbuf[4]; - u32 *p = argp->buf; - int len = argp->len, slen, i; - - *authp = rpc_auth_ok; - - if ((argp->len -= 3) < 0) { - *statp = rpc_garbage_args; - return; - } - - p++; /* skip length field */ - namekind = ntohl(*p++); /* fullname/nickname */ - - /* Get the credentials */ - if (namekind == ADN_NICKNAME) { - /* If we can't find the cached session key, initiate a - * new session. */ - if (!(data = get_cred_bynick(*p++))) - FAIL(rejectedcred); - } else if (namekind == ADN_FULLNAME) { - p = xdr_decode_string(p, &fullname, &len, RPC_MAXNETNAMELEN); - if (p == NULL) - FAIL(badcred); - cryptkey[0] = *p++; /* get the encrypted key */ - cryptkey[1] = *p++; - cryptbuf[2] = *p++; /* get the encrypted window */ - } else { - FAIL(badcred); - } - - /* If we're just updating the key, silently discard the request. */ - if (data && data->dc_locked) { - *authp = rpc_autherr_dropit; - _put_cred(data); /* release but don't unlock */ - return; - } - - /* Get the verifier flavor and length */ - if (ntohl(*p++) != RPC_AUTH_DES && ntohl(*p++) != 12) - FAIL(badverf); - - cryptbuf[0] = *p++; /* encrypted time stamp */ - cryptbuf[1] = *p++; - cryptbuf[3] = *p++; /* 0 or window - 1 */ - - if (namekind == ADN_NICKNAME) { - status = des_ecb_encrypt((des_block *) cryptbuf, - (des_block *) cryptbuf, - data->dc_keysched, DES_DECRYPT); - } else { - /* We first have to decrypt the new session key and - * fill in the UNIX creds. */ - if (!(data = get_cred_byname(rqstp, authp, fullname, cryptkey))) - return; - status = des_cbc_encrypt((des_cblock *) cryptbuf, - (des_cblock *) cryptbuf, 16, - data->dc_keysched, - (des_cblock *) &ivec, - DES_DECRYPT); - } - if (status) { - printk("svcauth_des: DES decryption failed (status %d)\n", - status); - FAIL(badverf); - } - - /* Now check the whole lot */ - if (namekind == ADN_FULLNAME) { - unsigned long winverf; - - data->dc_window = ntohl(cryptbuf[2]); - winverf = ntohl(cryptbuf[2]); - if (window != winverf - 1) { - printk("svcauth_des: bad window verifier!\n"); - FAIL(badverf); - } - } - - /* XDR the decrypted timestamp */ - cryptbuf[0] = ntohl(cryptbuf[0]); - cryptbuf[1] = ntohl(cryptbuf[1]); - if (cryptbuf[1] > 1000000) { - dprintk("svcauth_des: bad usec value %u\n", cryptbuf[1]); - if (namekind == ADN_NICKNAME) - FAIL(rejectedverf); - FAIL(badverf); - } - - /* - * Check for replayed credentials. We must allow for reordering - * of requests by the network, and the OS scheduler, hence we - * cannot expect timestamps to be increasing monotonically. - * This opens a small security hole, therefore the replay_slack - * value shouldn't be too large. - */ - if ((delta = cryptbuf[0] - data->dc_timestamp[0]) <= 0) { - switch (delta) { - case -1: - delta = -1000000; - case 0: - delta += cryptbuf[1] - data->dc_timestamp[1]; - break; - default: - delta = -1000000; - } - if (delta < DES_REPLAY_SLACK) - FAIL(rejectedverf); -#ifdef STRICT_REPLAY_CHECKS - /* TODO: compare time stamp to last five timestamps cached - * and reject (drop?) request if a match is found. */ -#endif - } - - now = xtime; - now.tv_secs -= data->dc_window; - if (now.tv_secs < cryptbuf[0] || - (now.tv_secs == cryptbuf[0] && now.tv_usec < cryptbuf[1])) - FAIL(rejectedverf); - - /* Okay, we're done. Update the lot */ - if (namekind == ADN_FULLNAME) - data->dc_valid = 1; - data->dc_timestamp[0] = cryptbuf[0]; - data->dc_timestamp[1] = cryptbuf[1]; - - put_cred(data); - return; -garbage: - *statp = rpc_garbage_args; - return; -} - -/* - * Call the keyserver to obtain the decrypted conversation key and - * UNIX creds. We use a Linux-specific keycall extension that does - * both things in one go. - */ -static struct des_cred * -get_cred_byname(struct svc_rqst *rqstp, u32 *authp, char *fullname, u32 *cryptkey) -{ - static int in_keycall; - struct des_cred *cred; - - if (in_keycall) { - *authp = rpc_autherr_dropit; - return NULL; - } - in_keycall = 1; - in_keycall = 0; - return cred; -} diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 2832312483ed..043eb46c7fba 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -97,7 +97,7 @@ struct ip_map { }; static struct cache_head *ip_table[IP_HASHMAX]; -void ip_map_put(struct cache_head *item, struct cache_detail *cd) +static void ip_map_put(struct cache_head *item, struct cache_detail *cd) { struct ip_map *im = container_of(item, struct ip_map,h); if (cache_put(item, cd)) { @@ -417,7 +417,7 @@ struct auth_ops svcauth_null = { }; -int +static int svcauth_unix_accept(struct svc_rqst *rqstp, u32 *authp) { struct kvec *argv = &rqstp->rq_arg.head[0]; @@ -497,7 +497,7 @@ badcred: return SVC_DENIED; } -int +static int svcauth_unix_release(struct svc_rqst *rqstp) { /* Verifier (such as it is) is already in place. diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index a6630a1f2025..0b42059378cb 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -33,15 +33,6 @@ xdr_encode_netobj(u32 *p, const struct xdr_netobj *obj) } u32 * -xdr_decode_netobj_fixed(u32 *p, void *obj, unsigned int len) -{ - if (ntohl(*p++) != len) - return NULL; - memcpy(obj, p, len); - return p + XDR_QUADLEN(len); -} - -u32 * xdr_decode_netobj(u32 *p, struct xdr_netobj *obj) { unsigned int len; @@ -185,124 +176,6 @@ xdr_inline_pages(struct xdr_buf *xdr, unsigned int offset, xdr->buflen += len; } -/* - * Realign the kvec if the server missed out some reply elements - * (such as post-op attributes,...) - * Note: This is a simple implementation that assumes that - * len <= iov->iov_len !!! - * The RPC header (assumed to be the 1st element in the iov array) - * is not shifted. - */ -void xdr_shift_iovec(struct kvec *iov, int nr, size_t len) -{ - struct kvec *pvec; - - for (pvec = iov + nr - 1; nr > 1; nr--, pvec--) { - struct kvec *svec = pvec - 1; - - if (len > pvec->iov_len) { - printk(KERN_DEBUG "RPC: Urk! Large shift of short iovec.\n"); - return; - } - memmove((char *)pvec->iov_base + len, pvec->iov_base, - pvec->iov_len - len); - - if (len > svec->iov_len) { - printk(KERN_DEBUG "RPC: Urk! Large shift of short iovec.\n"); - return; - } - memcpy(pvec->iov_base, - (char *)svec->iov_base + svec->iov_len - len, len); - } -} - -/* - * Map a struct xdr_buf into an kvec array. - */ -int xdr_kmap(struct kvec *iov_base, struct xdr_buf *xdr, size_t base) -{ - struct kvec *iov = iov_base; - struct page **ppage = xdr->pages; - unsigned int len, pglen = xdr->page_len; - - len = xdr->head[0].iov_len; - if (base < len) { - iov->iov_len = len - base; - iov->iov_base = (char *)xdr->head[0].iov_base + base; - iov++; - base = 0; - } else - base -= len; - - if (pglen == 0) - goto map_tail; - if (base >= pglen) { - base -= pglen; - goto map_tail; - } - if (base || xdr->page_base) { - pglen -= base; - base += xdr->page_base; - ppage += base >> PAGE_CACHE_SHIFT; - base &= ~PAGE_CACHE_MASK; - } - do { - len = PAGE_CACHE_SIZE; - iov->iov_base = kmap(*ppage); - if (base) { - iov->iov_base += base; - len -= base; - base = 0; - } - if (pglen < len) - len = pglen; - iov->iov_len = len; - iov++; - ppage++; - } while ((pglen -= len) != 0); -map_tail: - if (xdr->tail[0].iov_len) { - iov->iov_len = xdr->tail[0].iov_len - base; - iov->iov_base = (char *)xdr->tail[0].iov_base + base; - iov++; - } - return (iov - iov_base); -} - -void xdr_kunmap(struct xdr_buf *xdr, size_t base) -{ - struct page **ppage = xdr->pages; - unsigned int pglen = xdr->page_len; - - if (!pglen) - return; - if (base > xdr->head[0].iov_len) - base -= xdr->head[0].iov_len; - else - base = 0; - - if (base >= pglen) - return; - if (base || xdr->page_base) { - pglen -= base; - base += xdr->page_base; - ppage += base >> PAGE_CACHE_SHIFT; - /* Note: The offset means that the length of the first - * page is really (PAGE_CACHE_SIZE - (base & ~PAGE_CACHE_MASK)). - * In order to avoid an extra test inside the loop, - * we bump pglen here, and just subtract PAGE_CACHE_SIZE... */ - pglen += base & ~PAGE_CACHE_MASK; - } - for (;;) { - flush_dcache_page(*ppage); - kunmap(*ppage); - if (pglen <= PAGE_CACHE_SIZE) - break; - pglen -= PAGE_CACHE_SIZE; - ppage++; - } -} - void xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, skb_reader_t *desc, @@ -572,7 +445,7 @@ _copy_to_pages(struct page **pages, size_t pgbase, const char *p, size_t len) * Copies data into an arbitrary memory location from an array of pages * The copy is assumed to be non-overlapping. */ -void +static void _copy_from_pages(char *p, struct page **pages, size_t pgbase, size_t len) { struct page **pgfrom; @@ -610,7 +483,7 @@ _copy_from_pages(char *p, struct page **pages, size_t pgbase, size_t len) * 'len' bytes. The extra data is not lost, but is instead * moved into the inlined pages and/or the tail. */ -void +static void xdr_shrink_bufhead(struct xdr_buf *buf, size_t len) { struct kvec *head, *tail; @@ -683,7 +556,7 @@ xdr_shrink_bufhead(struct xdr_buf *buf, size_t len) * 'len' bytes. The extra data is not lost, but is instead * moved into the tail. */ -void +static void xdr_shrink_pagelen(struct xdr_buf *buf, size_t len) { struct kvec *tail; diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index de4bccc1c25d..92ece31702fb 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -90,6 +90,8 @@ static struct socket *xprt_create_socket(struct rpc_xprt *, int, int); static void xprt_bind_socket(struct rpc_xprt *, struct socket *); static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *); +static int xprt_clear_backlog(struct rpc_xprt *xprt); + #ifdef RPC_DEBUG_DATA /* * Print the buffer contents (first 128 bytes only--just enough for @@ -1397,7 +1399,7 @@ xprt_release(struct rpc_task *task) /* * Set default timeout parameters */ -void +static void xprt_default_timeout(struct rpc_timeout *to, int proto) { if (proto == IPPROTO_UDP) @@ -1633,7 +1635,7 @@ xprt_create_proto(int proto, struct sockaddr_in *sap, struct rpc_timeout *to) /* * Prepare for transport shutdown. */ -void +static void xprt_shutdown(struct rpc_xprt *xprt) { xprt->shutdown = 1; @@ -1648,7 +1650,7 @@ xprt_shutdown(struct rpc_xprt *xprt) /* * Clear the xprt backlog queue */ -int +static int xprt_clear_backlog(struct rpc_xprt *xprt) { rpc_wake_up_next(&xprt->backlog); wake_up(&xprt->cong_wait); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 8902c4aa900f..eb30bc7bf946 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -121,7 +121,7 @@ int sysctl_unix_max_dgram_qlen = 10; -kmem_cache_t *unix_sk_cachep; +static kmem_cache_t *unix_sk_cachep; struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1]; rwlock_t unix_table_lock = RW_LOCK_UNLOCKED; diff --git a/net/unix/sysctl_net_unix.c b/net/unix/sysctl_net_unix.c index af1d297017a5..c974dac4580a 100644 --- a/net/unix/sysctl_net_unix.c +++ b/net/unix/sysctl_net_unix.c @@ -14,7 +14,7 @@ extern int sysctl_unix_max_dgram_qlen; -ctl_table unix_table[] = { +static ctl_table unix_table[] = { { .ctl_name = NET_UNIX_MAX_DGRAM_QLEN, .procname = "max_dgram_qlen", diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 02d8235d5a55..07171faaad2a 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -261,7 +261,7 @@ found: /* * Find a connected X.25 socket given my LCI and neighbour. */ -struct sock *__x25_find_socket(unsigned int lci, struct x25_neigh *nb) +static struct sock *__x25_find_socket(unsigned int lci, struct x25_neigh *nb) { struct sock *s; struct hlist_node *node; @@ -289,7 +289,7 @@ struct sock *x25_find_socket(unsigned int lci, struct x25_neigh *nb) /* * Find a unique LCI for a given device. */ -unsigned int x25_new_lci(struct x25_neigh *nb) +static unsigned int x25_new_lci(struct x25_neigh *nb) { unsigned int lci = 1; struct sock *sk; @@ -1336,7 +1336,7 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) return rc; } -struct net_proto_family x25_family_ops = { +static struct net_proto_family x25_family_ops = { .family = AF_X25, .create = x25_create, .owner = THIS_MODULE, @@ -1371,7 +1371,7 @@ static struct packet_type x25_packet_type = { .func = x25_lapb_receive_frame, }; -struct notifier_block x25_dev_notifier = { +static struct notifier_block x25_dev_notifier = { .notifier_call = x25_device_event, }; diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c index 88e704e70043..36fc3bf6d882 100644 --- a/net/x25/x25_dev.c +++ b/net/x25/x25_dev.c @@ -123,29 +123,6 @@ out: return 0; } -int x25_llc_receive_frame(struct sk_buff *skb, struct net_device *dev, - struct packet_type *ptype) -{ - struct x25_neigh *nb; - int rc = 0; - - skb->sk = NULL; - - /* - * Packet received from unrecognised device, throw it away. - */ - nb = x25_get_neigh(dev); - if (!nb) { - printk(KERN_DEBUG "X.25: unknown_neighbour - %s\n", dev->name); - kfree_skb(skb); - } else { - rc = x25_receive_data(skb, nb); - x25_neigh_put(nb); - } - - return rc; -} - void x25_establish_link(struct x25_neigh *nb) { struct sk_buff *skb; diff --git a/net/x25/x25_link.c b/net/x25/x25_link.c index a2b9b71d4659..3ca204cfb26a 100644 --- a/net/x25/x25_link.c +++ b/net/x25/x25_link.c @@ -35,6 +35,9 @@ static rwlock_t x25_neigh_list_lock = RW_LOCK_UNLOCKED; static void x25_t20timer_expiry(unsigned long); +static void x25_transmit_restart_confirmation(struct x25_neigh *nb); +static void x25_transmit_restart_request(struct x25_neigh *nb); + /* * Linux set/reset timer routines */ @@ -106,7 +109,7 @@ void x25_link_control(struct sk_buff *skb, struct x25_neigh *nb, /* * This routine is called when a Restart Request is needed */ -void x25_transmit_restart_request(struct x25_neigh *nb) +static void x25_transmit_restart_request(struct x25_neigh *nb) { unsigned char *dptr; int len = X25_MAX_L2_LEN + X25_STD_MIN_LEN + 2; @@ -133,7 +136,7 @@ void x25_transmit_restart_request(struct x25_neigh *nb) /* * This routine is called when a Restart Confirmation is needed */ -void x25_transmit_restart_confirmation(struct x25_neigh *nb) +static void x25_transmit_restart_confirmation(struct x25_neigh *nb) { unsigned char *dptr; int len = X25_MAX_L2_LEN + X25_STD_MIN_LEN; @@ -156,32 +159,6 @@ void x25_transmit_restart_confirmation(struct x25_neigh *nb) } /* - * This routine is called when a Diagnostic is required. - */ -void x25_transmit_diagnostic(struct x25_neigh *nb, unsigned char diag) -{ - unsigned char *dptr; - int len = X25_MAX_L2_LEN + X25_STD_MIN_LEN + 1; - struct sk_buff *skb = alloc_skb(len, GFP_ATOMIC); - - if (!skb) - return; - - skb_reserve(skb, X25_MAX_L2_LEN); - - dptr = skb_put(skb, X25_STD_MIN_LEN + 1); - - *dptr++ = nb->extended ? X25_GFI_EXTSEQ : X25_GFI_STDSEQ; - *dptr++ = 0x00; - *dptr++ = X25_DIAGNOSTIC; - *dptr++ = diag; - - skb->sk = NULL; - - x25_send_frame(skb, nb); -} - -/* * This routine is called when a Clear Request is needed outside of the context * of a connected socket. */ diff --git a/net/x25/x25_proc.c b/net/x25/x25_proc.c index 64fff05abf8a..37c9447e589b 100644 --- a/net/x25/x25_proc.c +++ b/net/x25/x25_proc.c @@ -166,14 +166,14 @@ out: return 0; } -struct seq_operations x25_seq_route_ops = { +static struct seq_operations x25_seq_route_ops = { .start = x25_seq_route_start, .next = x25_seq_route_next, .stop = x25_seq_route_stop, .show = x25_seq_route_show, }; -struct seq_operations x25_seq_socket_ops = { +static struct seq_operations x25_seq_socket_ops = { .start = x25_seq_socket_start, .next = x25_seq_socket_next, .stop = x25_seq_socket_stop, diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c index 0612ecbc2764..8947e63ca788 100644 --- a/net/xfrm/xfrm_algo.c +++ b/net/xfrm/xfrm_algo.c @@ -416,14 +416,6 @@ struct xfrm_algo_desc *xfrm_ealg_get_byidx(unsigned int idx) return &ealg_list[idx]; } -struct xfrm_algo_desc *xfrm_calg_get_byidx(unsigned int idx) -{ - if (idx >= calg_entries()) - return NULL; - - return &calg_list[idx]; -} - /* * Probe for the availability of crypto algorithms, and set the available * flag for any algorithms found on the system. This is typically called by diff --git a/net/xfrm/xfrm_export.c b/net/xfrm/xfrm_export.c index fd34836cdf7f..7ca0e889e54a 100644 --- a/net/xfrm/xfrm_export.c +++ b/net/xfrm/xfrm_export.c @@ -53,7 +53,6 @@ EXPORT_SYMBOL_GPL(xfrm_count_auth_supported); EXPORT_SYMBOL_GPL(xfrm_count_enc_supported); EXPORT_SYMBOL_GPL(xfrm_aalg_get_byidx); EXPORT_SYMBOL_GPL(xfrm_ealg_get_byidx); -EXPORT_SYMBOL_GPL(xfrm_calg_get_byidx); EXPORT_SYMBOL_GPL(xfrm_aalg_get_byid); EXPORT_SYMBOL_GPL(xfrm_ealg_get_byid); EXPORT_SYMBOL_GPL(xfrm_calg_get_byid); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 70ba1fc61d9f..c6ffe5413ca1 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -33,7 +33,7 @@ struct xfrm_policy *xfrm_policy_list[XFRM_POLICY_MAX*2]; static rwlock_t xfrm_policy_afinfo_lock = RW_LOCK_UNLOCKED; static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO]; -kmem_cache_t *xfrm_dst_cache; +static kmem_cache_t *xfrm_dst_cache; static struct work_struct xfrm_policy_gc_work; static struct list_head xfrm_policy_gc_list = @@ -498,7 +498,7 @@ static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir, *obj_refp = &pol->refcnt; } -struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl) +static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl) { struct xfrm_policy *pol; @@ -1222,13 +1222,13 @@ static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void return NOTIFY_DONE; } -struct notifier_block xfrm_dev_notifier = { +static struct notifier_block xfrm_dev_notifier = { xfrm_dev_event, NULL, 0 }; -void __init xfrm_policy_init(void) +static void __init xfrm_policy_init(void) { xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache", sizeof(struct xfrm_dst), diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index a3ef46c5d187..e82cb815c791 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -51,6 +51,9 @@ static void __xfrm_state_delete(struct xfrm_state *x); static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family); static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo); +static int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol); +static void km_state_expired(struct xfrm_state *x, int hard); + static void xfrm_state_gc_destroy(struct xfrm_state *x) { if (del_timer(&x->timer)) @@ -746,7 +749,7 @@ void xfrm_replay_advance(struct xfrm_state *x, u32 seq) static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list); static rwlock_t xfrm_km_lock = RW_LOCK_UNLOCKED; -void km_state_expired(struct xfrm_state *x, int hard) +static void km_state_expired(struct xfrm_state *x, int hard) { struct xfrm_mgr *km; @@ -764,7 +767,7 @@ void km_state_expired(struct xfrm_state *x, int hard) wake_up(&km_waitq); } -int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol) +static int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol) { int err = -EINVAL; struct xfrm_mgr *km; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 502c30d097b9..b1e1b29dc519 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1128,8 +1128,8 @@ static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt, /* User gives us xfrm_user_policy_info followed by an array of 0 * or more templates. */ -struct xfrm_policy *xfrm_compile_policy(u16 family, int opt, - u8 *data, int len, int *dir) +static struct xfrm_policy *xfrm_compile_policy(u16 family, int opt, + u8 *data, int len, int *dir) { struct xfrm_userpolicy_info *p = (struct xfrm_userpolicy_info *)data; struct xfrm_user_tmpl *ut = (struct xfrm_user_tmpl *) (p + 1); diff --git a/security/selinux/avc.c b/security/selinux/avc.c index 7afe09eed6ac..49af7bd9e40b 100644 --- a/security/selinux/avc.c +++ b/security/selinux/avc.c @@ -566,7 +566,7 @@ void avc_audit(u32 ssid, u32 tsid, switch (sk->sk_family) { case AF_INET: { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); avc_print_ipv4_addr(ab, inet->rcv_saddr, inet->sport, @@ -577,7 +577,7 @@ void avc_audit(u32 ssid, u32 tsid, break; } case AF_INET6: { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *inet6 = inet6_sk(sk); avc_print_ipv6_addr(ab, &inet6->rcv_saddr, |
