From ba3e3dba0eb04168649220368795a7a6abf0ea82 Mon Sep 17 00:00:00 2001
From: Corey Minyard <cminyard@mvista.com>
Date: Sun, 12 Jan 2003 21:20:22 -0800
Subject: [PATCH] IPMI (Intelligent Platform Management Interface) driver

---
 include/linux/ipmi.h         | 516 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/ipmi_msgdefs.h |  58 +++++
 include/linux/ipmi_smi.h     | 144 ++++++++++++
 3 files changed, 718 insertions(+)
 create mode 100644 include/linux/ipmi.h
 create mode 100644 include/linux/ipmi_msgdefs.h
 create mode 100644 include/linux/ipmi_smi.h

(limited to 'include/linux')

diff --git a/include/linux/ipmi.h b/include/linux/ipmi.h
new file mode 100644
index 000000000000..e14d3f10872e
--- /dev/null
+++ b/include/linux/ipmi.h
@@ -0,0 +1,516 @@
+/*
+ * ipmi.h
+ *
+ * MontaVista IPMI interface
+ *
+ * Author: MontaVista Software, Inc.
+ *         Corey Minyard <minyard@mvista.com>
+ *         source@mvista.com
+ *
+ * Copyright 2002 MontaVista Software Inc.
+ *
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *
+ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ *  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ *  OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ *  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
+ *  TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+ *  USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef __LINUX_IPMI_H
+#define __LINUX_IPMI_H
+
+#include <linux/ipmi_msgdefs.h>
+
+/*
+ * This file describes an interface to an IPMI driver.  You have to
+ * have a fairly good understanding of IPMI to use this, so go read
+ * the specs first before actually trying to do anything.
+ *
+ * With that said, this driver provides a multi-user interface to the
+ * IPMI driver, and it allows multiple IPMI physical interfaces below
+ * the driver.  The physical interfaces bind as a lower layer on the
+ * driver.  They appear as interfaces to the application using this
+ * interface.
+ *
+ * Multi-user means that multiple applications may use the driver,
+ * send commands, receive responses, etc.  The driver keeps track of
+ * commands the user sends and tracks the responses.  The responses
+ * will go back to the application that send the command.  If the
+ * response doesn't come back in time, the driver will return a
+ * timeout error response to the application.  Asynchronous events
+ * from the BMC event queue will go to all users bound to the driver.
+ * The incoming event queue in the BMC will automatically be flushed
+ * if it becomes full and it is queried once a second to see if
+ * anything is in it.  Incoming commands to the driver will get
+ * delivered as commands.
+ *
+ * This driver provides two main interfaces: one for in-kernel
+ * applications and another for userland applications.  The
+ * capabilities are basically the same for both interface, although
+ * the interfaces are somewhat different.  The stuff in the
+ * #ifdef KERNEL below is the in-kernel interface.  The userland
+ * interface is defined later in the file.  */
+
+
+
+/*
+ * This is an overlay for all the address types, so it's easy to
+ * determine the actual address type.  This is kind of like addresses
+ * work for sockets.
+ */
+#define IPMI_MAX_ADDR_SIZE 32
+struct ipmi_addr
+{
+	 /* Try to take these from the "Channel Medium Type" table
+	    in section 6.5 of the IPMI 1.5 manual. */
+	int   addr_type;
+	short channel;
+	char  data[IPMI_MAX_ADDR_SIZE];
+};
+
+/*
+ * When the address is not used, the type will be set to this value.
+ * The channel is the BMC's channel number for the channel (usually
+ * 0), or IPMC_BMC_CHANNEL if communicating directly with the BMC.
+ */
+#define IPMI_SYSTEM_INTERFACE_ADDR_TYPE	0x0c
+struct ipmi_system_interface_addr
+{
+	int           addr_type;
+	short         channel;
+	unsigned char lun;
+};
+
+/* An IPMB Address. */
+#define IPMI_IPMB_ADDR_TYPE		0x01
+/* Used for broadcast get device id as described in section 17.9 of the
+   IPMI 1.5 manual. */ 
+#define IPMI_IPMB_BROADCAST_ADDR_TYPE	0x41
+struct ipmi_ipmb_addr
+{
+	int           addr_type;
+	short         channel;
+	unsigned char slave_addr;
+	unsigned char lun;
+};
+
+
+/*
+ * Channel for talking directly with the BMC.  When using this
+ * channel, This is for the system interface address type only.  FIXME
+ * - is this right, or should we use -1?
+ */
+#define IPMI_BMC_CHANNEL  0xf
+#define IPMI_NUM_CHANNELS 0x10
+
+
+/*
+ * A raw IPMI message without any addressing.  This covers both
+ * commands and responses.  The completion code is always the first
+ * byte of data in the response (as the spec shows the messages laid
+ * out).
+ */
+struct ipmi_msg
+{
+	unsigned char  netfn;
+	unsigned char  cmd;
+	unsigned short data_len;
+	unsigned char  *data;
+};
+
+/*
+ * Various defines that are useful for IPMI applications.
+ */
+#define IPMI_INVALID_CMD_COMPLETION_CODE	0xC1
+#define IPMI_TIMEOUT_COMPLETION_CODE		0xC3
+#define IPMI_UNKNOWN_ERR_COMPLETION_CODE	0xff
+
+
+/*
+ * Receive types for messages coming from the receive interface.  This
+ * is used for the receive in-kernel interface and in the receive
+ * IOCTL.
+ */
+#define IPMI_RESPONSE_RECV_TYPE		1 /* A response to a command */
+#define IPMI_ASYNC_EVENT_RECV_TYPE	2 /* Something from the event queue */
+#define IPMI_CMD_RECV_TYPE		3 /* A command from somewhere else */
+/* Note that async events and received commands do not have a completion
+   code as the first byte of the incoming data, unlike a response. */
+
+
+
+#ifdef __KERNEL__
+
+/*
+ * The in-kernel interface.
+ */
+#include <linux/list.h>
+
+/* Opaque type for a IPMI message user.  One of these is needed to
+   send and receive messages. */
+typedef struct ipmi_user *ipmi_user_t;
+
+/*
+ * Stuff coming from the recieve interface comes as one of these.
+ * They are allocated, the receiver must free them with
+ * ipmi_free_recv_msg() when done with the message.  The link is not
+ * used after the message is delivered, so the upper layer may use the
+ * link to build a linked list, if it likes.
+ */
+struct ipmi_recv_msg
+{
+	struct list_head link;
+
+	/* The type of message as defined in the "Receive Types"
+           defines above. */
+	int              recv_type;
+
+	ipmi_user_t      user;
+	struct ipmi_addr addr;
+	long             msgid;
+	struct ipmi_msg  msg;
+
+	/* Call this when done with the message.  It will presumably free
+	   the message and do any other necessary cleanup. */
+	void (*done)(struct ipmi_recv_msg *msg);
+
+	/* Place-holder for the data, don't make any assumptions about
+	   the size or existance of this, since it may change. */
+	unsigned char   msg_data[IPMI_MAX_MSG_LENGTH];
+};
+
+/* Allocate and free the receive message. */
+static inline void ipmi_free_recv_msg(struct ipmi_recv_msg *msg)
+{
+	msg->done(msg);
+}
+struct ipmi_recv_msg *ipmi_alloc_recv_msg(void);
+
+struct ipmi_user_hndl
+{
+        /* Routine type to call when a message needs to be routed to
+	   the upper layer.  This will be called with some locks held,
+	   the only IPMI routines that can be called are ipmi_request
+	   and the alloc/free operations. */
+	void (*ipmi_recv_hndl)(struct ipmi_recv_msg *msg,
+			       void                 *handler_data);
+
+	/* Called when the interface detects a watchdog pre-timeout.  If
+	   this is NULL, it will be ignored for the user. */
+	void (*ipmi_watchdog_pretimeout)(void *handler_data);
+};
+
+/* Create a new user of the IPMI layer on the given interface number. */
+int ipmi_create_user(unsigned int          if_num,
+		     struct ipmi_user_hndl *handler,
+		     void                  *handler_data,
+		     ipmi_user_t           *user);
+
+/* Destroy the given user of the IPMI layer. */
+int ipmi_destroy_user(ipmi_user_t user);
+
+/* Get the IPMI version of the BMC we are talking to. */
+void ipmi_get_version(ipmi_user_t   user,
+		      unsigned char *major,
+		      unsigned char *minor);
+
+/* Set and get the slave address and LUN that we will use for our
+   source messages.  Note that this affects the interface, not just
+   this user, so it will affect all users of this interface.  This is
+   so some initialization code can come in and do the OEM-specific
+   things it takes to determine your address (if not the BMC) and set
+   it for everyone else. */
+void ipmi_set_my_address(ipmi_user_t   user,
+			 unsigned char address);
+unsigned char ipmi_get_my_address(ipmi_user_t user);
+void ipmi_set_my_LUN(ipmi_user_t   user,
+		     unsigned char LUN);
+unsigned char ipmi_get_my_LUN(ipmi_user_t user);
+
+/*
+ * Send a command request from the given user.  The address is the
+ * proper address for the channel type.  If this is a command, then
+ * the message response comes back, the receive handler for this user
+ * will be called with the given msgid value in the recv msg.  If this
+ * is a response to a command, then the msgid will be used as the
+ * sequence number for the response (truncated if necessary), so when
+ * sending a response you should use the sequence number you received
+ * in the msgid field of the received command.  If the priority is >
+ * 0, the message will go into a high-priority queue and be sent
+ * first.  Otherwise, it goes into a normal-priority queue.
+ */
+int ipmi_request(ipmi_user_t      user,
+		 struct ipmi_addr *addr,
+		 long             msgid,
+		 struct ipmi_msg  *msg,
+		 int              priority);
+
+/*
+ * Like ipmi_request, but lets you specify the slave return address.
+ */
+int ipmi_request_with_source(ipmi_user_t      user,
+			     struct ipmi_addr *addr,
+			     long             msgid,
+			     struct ipmi_msg  *msg,
+			     int              priority,
+			     unsigned char    source_address,
+			     unsigned char    source_lun);
+
+/*
+ * Like ipmi_request, but with messages supplied.  This will not
+ * allocate any memory, and the messages may be statically allocated
+ * (just make sure to do the "done" handling on them).  Note that this
+ * is primarily for the watchdog timer, since it should be able to
+ * send messages even if no memory is available.  This is subject to
+ * change as the system changes, so don't use it unless you REALLY
+ * have to.
+ */
+int ipmi_request_supply_msgs(ipmi_user_t          user,
+			     struct ipmi_addr     *addr,
+			     long                 msgid,
+			     struct ipmi_msg      *msg,
+			     void                 *supplied_smi,
+			     struct ipmi_recv_msg *supplied_recv,
+			     int                  priority);
+
+/*
+ * When commands come in to the SMS, the user can register to receive
+ * them.  Only one user can be listening on a specific netfn/cmd pair
+ * at a time, you will get an EBUSY error if the command is already
+ * registered.  If a command is received that does not have a user
+ * registered, the driver will automatically return the proper
+ * error.
+ */
+int ipmi_register_for_cmd(ipmi_user_t   user,
+			  unsigned char netfn,
+			  unsigned char cmd);
+int ipmi_unregister_for_cmd(ipmi_user_t   user,
+			    unsigned char netfn,
+			    unsigned char cmd);
+
+/*
+ * When the user is created, it will not receive IPMI events by
+ * default.  The user must set this to TRUE to get incoming events.
+ * The first user that sets this to TRUE will receive all events that
+ * have been queued while no one was waiting for events.
+ */
+int ipmi_set_gets_events(ipmi_user_t user, int val);
+
+/*
+ * Register the given user to handle all received IPMI commands.  This
+ * will fail if anyone is registered as a command receiver or if
+ * another is already registered to receive all commands.  NOTE THAT
+ * THIS IS FOR EMULATION USERS ONLY, DO NOT USER THIS FOR NORMAL
+ * STUFF.
+ */
+int ipmi_register_all_cmd_rcvr(ipmi_user_t user);
+int ipmi_unregister_all_cmd_rcvr(ipmi_user_t user);
+
+
+/*
+ * Called when a new SMI is registered.  This will also be called on
+ * every existing interface when a new watcher is registered with
+ * ipmi_smi_watcher_register().
+ */
+struct ipmi_smi_watcher
+{
+	struct list_head link;
+
+	/* These two are called with read locks held for the interface
+	   the watcher list.  So you can add and remove users from the
+	   IPMI interface, send messages, etc., but you cannot add
+	   or remove SMI watchers or SMI interfaces. */
+	void (*new_smi)(int if_num);
+	void (*smi_gone)(int if_num);
+};
+
+int ipmi_smi_watcher_register(struct ipmi_smi_watcher *watcher);
+int ipmi_smi_watcher_unregister(struct ipmi_smi_watcher *watcher);
+
+/* The following are various helper functions for dealing with IPMI
+   addresses. */
+
+/* Return the maximum length of an IPMI address given it's type. */
+unsigned int ipmi_addr_length(int addr_type);
+
+/* Validate that the given IPMI address is valid. */
+int ipmi_validate_addr(struct ipmi_addr *addr, int len);
+
+/* Return 1 if the given addresses are equal, 0 if not. */
+int ipmi_addr_equal(struct ipmi_addr *addr1, struct ipmi_addr *addr2);
+
+#endif /* __KERNEL__ */
+
+
+/*
+ * The userland interface
+ */
+
+/*
+ * The userland interface for the IPMI driver is a standard character
+ * device, with each instance of an interface registered as a minor
+ * number under the major character device.
+ *
+ * The read and write calls do not work, to get messages in and out
+ * requires ioctl calls because of the complexity of the data.  select
+ * and poll do work, so you can wait for input using the file
+ * descriptor, you just can use read to get it.
+ *
+ * In general, you send a command down to the interface and receive
+ * responses back.  You can use the msgid value to correlate commands
+ * and responses, the driver will take care of figuring out which
+ * incoming messages are for which command and find the proper msgid
+ * value to report.  You will only receive reponses for commands you
+ * send.  Asynchronous events, however, go to all open users, so you
+ * must be ready to handle these (or ignore them if you don't care).
+ *
+ * The address type depends upon the channel type.  When talking
+ * directly to the BMC (IPMC_BMC_CHANNEL), the address is ignored
+ * (IPMI_UNUSED_ADDR_TYPE).  When talking to an IPMB channel, you must
+ * supply a valid IPMB address with the addr_type set properly.
+ *
+ * When talking to normal channels, the driver takes care of the
+ * details of formatting and sending messages on that channel.  You do
+ * not, for instance, have to format a send command, you just send
+ * whatever command you want to the channel, the driver will create
+ * the send command, automatically issue receive command and get even
+ * commands, and pass those up to the proper user.
+ */
+
+
+/* The magic IOCTL value for this interface. */
+#define IPMI_IOC_MAGIC 'i'
+
+
+/* Messages sent to the interface are this format. */
+struct ipmi_req
+{
+	unsigned char *addr; /* Address to send the message to. */
+	unsigned int  addr_len;
+
+	long    msgid; /* The sequence number for the message.  This
+			  exact value will be reported back in the
+			  response to this request if it is a command.
+			  If it is a response, this will be used as
+			  the sequence value for the response.  */
+
+	struct ipmi_msg msg;
+};
+/*
+ * Send a message to the interfaces.  error values are:
+ *   - EFAULT - an address supplied was invalid.
+ *   - EINVAL - The address supplied was not valid, or the command
+ *              was not allowed.
+ *   - EMSGSIZE - The message to was too large.
+ *   - ENOMEM - Buffers could not be allocated for the command.
+ */
+#define IPMICTL_SEND_COMMAND		_IOR(IPMI_IOC_MAGIC, 13,	\
+					     struct ipmi_req)
+
+/* Messages received from the interface are this format. */
+struct ipmi_recv
+{
+	int     recv_type; /* Is this a command, response or an
+			      asyncronous event. */
+
+	unsigned char *addr;    /* Address the message was from is put
+				   here.  The caller must supply the
+				   memory. */
+	unsigned int  addr_len; /* The size of the address buffer.
+				   The caller supplies the full buffer
+				   length, this value is updated to
+				   the actual message length when the
+				   message is received. */
+
+	long    msgid; /* The sequence number specified in the request
+			  if this is a response.  If this is a command,
+			  this will be the sequence number from the
+			  command. */
+
+	struct ipmi_msg msg; /* The data field must point to a buffer.
+				The data_size field must be set to the
+				size of the message buffer.  The
+				caller supplies the full buffer
+				length, this value is updated to the
+				actual message length when the message
+				is received. */
+};
+
+/*
+ * Receive a message.  error values:
+ *  - EAGAIN - no messages in the queue.
+ *  - EFAULT - an address supplied was invalid.
+ *  - EINVAL - The address supplied was not valid.
+ *  - EMSGSIZE - The message to was too large to fit into the message buffer,
+ *               the message will be left in the buffer. */
+#define IPMICTL_RECEIVE_MSG		_IOWR(IPMI_IOC_MAGIC, 12,	\
+					      struct ipmi_recv)
+
+/*
+ * Like RECEIVE_MSG, but if the message won't fit in the buffer, it
+ * will truncate the contents instead of leaving the data in the
+ * buffer.
+ */
+#define IPMICTL_RECEIVE_MSG_TRUNC	_IOWR(IPMI_IOC_MAGIC, 11,	\
+					      struct ipmi_recv)
+
+/* Register to get commands from other entities on this interface. */
+struct ipmi_cmdspec
+{
+	unsigned char netfn;
+	unsigned char cmd;
+};
+
+/* 
+ * Register to receive a specific command.  error values:
+ *   - EFAULT - an address supplied was invalid.
+ *   - EBUSY - The netfn/cmd supplied was already in use.
+ *   - ENOMEM - could not allocate memory for the entry.
+ */
+#define IPMICTL_REGISTER_FOR_CMD	_IOR(IPMI_IOC_MAGIC, 14,	\
+					     struct ipmi_cmdspec)
+/*
+ * Unregister a regsitered command.  error values:
+ *  - EFAULT - an address supplied was invalid.
+ *  - ENOENT - The netfn/cmd was not found registered for this user.
+ */
+#define IPMICTL_UNREGISTER_FOR_CMD	_IOR(IPMI_IOC_MAGIC, 15,	\
+					     struct ipmi_cmdspec)
+
+/* 
+ * Set whether this interface receives events.  Note that the first
+ * user registered for events will get all pending events for the
+ * interface.  error values:
+ *  - EFAULT - an address supplied was invalid.
+ */
+#define IPMICTL_SET_GETS_EVENTS_CMD	_IOR(IPMI_IOC_MAGIC, 16, int)
+
+/*
+ * Set and get the slave address and LUN that we will use for our
+ * source messages.  Note that this affects the interface, not just
+ * this user, so it will affect all users of this interface.  This is
+ * so some initialization code can come in and do the OEM-specific
+ * things it takes to determine your address (if not the BMC) and set
+ * it for everyone else.  You should probably leave the LUN alone.
+ */
+#define IPMICTL_SET_MY_ADDRESS_CMD	_IOR(IPMI_IOC_MAGIC, 17, unsigned int)
+#define IPMICTL_GET_MY_ADDRESS_CMD	_IOR(IPMI_IOC_MAGIC, 18, unsigned int)
+#define IPMICTL_SET_MY_LUN_CMD		_IOR(IPMI_IOC_MAGIC, 19, unsigned int)
+#define IPMICTL_GET_MY_LUN_CMD		_IOR(IPMI_IOC_MAGIC, 20, unsigned int)
+
+#endif /* __LINUX_IPMI_H */
diff --git a/include/linux/ipmi_msgdefs.h b/include/linux/ipmi_msgdefs.h
new file mode 100644
index 000000000000..12588de3c45e
--- /dev/null
+++ b/include/linux/ipmi_msgdefs.h
@@ -0,0 +1,58 @@
+/*
+ * ipmi_smi.h
+ *
+ * MontaVista IPMI system management interface
+ *
+ * Author: MontaVista Software, Inc.
+ *         Corey Minyard <minyard@mvista.com>
+ *         source@mvista.com
+ *
+ * Copyright 2002 MontaVista Software Inc.
+ *
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *
+ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ *  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ *  OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ *  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
+ *  TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+ *  USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef __LINUX_IPMI_MSGDEFS_H
+#define __LINUX_IPMI_MSGDEFS_H
+
+/* Various definitions for IPMI messages used by almost everything in
+   the IPMI stack. */
+
+#define IPMI_NETFN_APP_REQUEST	0x06
+#define IPMI_NETFN_APP_RESPONSE	0x07
+
+#define IPMI_BMC_SLAVE_ADDR	0x20
+
+#define IPMI_GET_DEVICE_ID_CMD		0x01
+
+#define IPMI_CLEAR_MSG_FLAGS_CMD	0x30
+#define IPMI_GET_MSG_FLAGS_CMD		0x31
+#define IPMI_SEND_MSG_CMD		0x34
+#define IPMI_GET_MSG_CMD		0x33
+
+#define IPMI_SET_BMC_GLOBAL_ENABLES_CMD	0x2e
+#define IPMI_GET_BMC_GLOBAL_ENABLES_CMD	0x2f
+#define IPMI_READ_EVENT_MSG_BUFFER_CMD	0x35
+
+#define IPMI_MAX_MSG_LENGTH	80
+
+#endif /* __LINUX_IPMI_MSGDEFS_H */
diff --git a/include/linux/ipmi_smi.h b/include/linux/ipmi_smi.h
new file mode 100644
index 000000000000..5916dea748e3
--- /dev/null
+++ b/include/linux/ipmi_smi.h
@@ -0,0 +1,144 @@
+/*
+ * ipmi_smi.h
+ *
+ * MontaVista IPMI system management interface
+ *
+ * Author: MontaVista Software, Inc.
+ *         Corey Minyard <minyard@mvista.com>
+ *         source@mvista.com
+ *
+ * Copyright 2002 MontaVista Software Inc.
+ *
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *
+ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ *  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ *  OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ *  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
+ *  TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+ *  USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef __LINUX_IPMI_SMI_H
+#define __LINUX_IPMI_SMI_H
+
+#include <linux/ipmi_msgdefs.h>
+
+/* This files describes the interface for IPMI system management interface
+   drivers to bind into the IPMI message handler. */
+
+/* Structure for the low-level drivers. */
+typedef struct ipmi_smi *ipmi_smi_t;
+
+/*
+ * Messages to/from the lower layer.  The smi interface will take one
+ * of these to send. After the send has occurred and a response has
+ * been received, it will report this same data structure back up to
+ * the upper layer.  If an error occurs, it should fill in the
+ * response with an error code in the completion code location. When
+ * asyncronous data is received, one of these is allocated, the
+ * data_size is set to zero and the response holds the data from the
+ * get message or get event command that the interface initiated.
+ * Note that it is the interfaces responsibility to detect
+ * asynchronous data and messages and request them from the
+ * interface.
+ */
+struct ipmi_smi_msg
+{
+	struct list_head link;
+
+	long    msgid;
+	void    *user_data;
+
+	/* If 0, add to the end of the queue.  If 1, add to the beginning. */
+	int     prio;
+
+	int           data_size;
+	unsigned char data[IPMI_MAX_MSG_LENGTH];
+
+	int           rsp_size;
+	unsigned char rsp[IPMI_MAX_MSG_LENGTH];
+
+	/* Will be called when the system is done with the message
+           (presumably to free it). */
+	void (*done)(struct ipmi_smi_msg *msg);
+};
+
+struct ipmi_smi_handlers
+{
+	/* Called to enqueue an SMI message to be sent.  This
+	   operation is not allowed to fail.  If an error occurs, it
+	   should report back the error in a received message.  It may
+	   do this in the current call context, since no write locks
+	   are held when this is run.  If the priority is > 0, the
+	   message will go into a high-priority queue and be sent
+	   first.  Otherwise, it goes into a normal-priority queue. */
+	void (*sender)(void                *send_info,
+		       struct ipmi_smi_msg *msg,
+		       int                 priority);
+
+	/* Called by the upper layer to request that we try to get
+	   events from the BMC we are attached to. */
+	void (*request_events)(void *send_info);
+
+	/* Called when someone is using the interface, so the module can
+	   adjust it's use count.  Return zero if successful, or an
+	   errno if not. */
+	int (*new_user)(void *send_info);
+
+	/* Called when someone is no longer using the interface, so the
+	   module can adjust it's use count. */
+	void (*user_left)(void *send_info);
+
+	/* Called when the interface should go into "run to
+	   completion" mode.  If this call sets the value to true, the
+	   interface should make sure that all messages are flushed
+	   out and that none are pending, and any new requests are run
+	   to completion immediately. */
+	void (*set_run_to_completion)(void *send_info, int run_to_completion);
+};
+
+/* Add a low-level interface to the IPMI driver. */
+int ipmi_register_smi(struct ipmi_smi_handlers *handlers,
+		      void                     *send_info,
+		      unsigned char            version_major,
+		      unsigned char            version_minor,
+		      ipmi_smi_t               *intf);
+
+/*
+ * Remove a low-level interface from the IPMI driver.  This will
+ * return an error if the interface is still in use by a user.
+ */
+int ipmi_unregister_smi(ipmi_smi_t intf);
+
+/*
+ * The lower layer reports received messages through this interface.
+ * The data_size should be zero if this is an asyncronous message.  If
+ * the lower layer gets an error sending a message, it should format
+ * an error response in the message response.
+ */
+void ipmi_smi_msg_received(ipmi_smi_t          intf,
+			   struct ipmi_smi_msg *msg);
+
+/* The lower layer received a watchdog pre-timeout on interface. */
+void ipmi_smi_watchdog_pretimeout(ipmi_smi_t intf);
+
+struct ipmi_smi_msg *ipmi_alloc_smi_msg(void);
+static inline void ipmi_free_smi_msg(struct ipmi_smi_msg *msg)
+{
+	msg->done(msg);
+}
+
+#endif /* __LINUX_IPMI_SMI_H */
-- 
cgit v1.2.3


From 74e7a24b86f6c614db1fa1d2251de0a2bd9dae6a Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@brodo.de>
Date: Sun, 12 Jan 2003 21:23:07 -0800
Subject: [PATCH] cpufreq: add sysfs interface

This patch adds a sysfs interface to the cpufreq core, and marks the
previous /proc/cpufreq interface as deprecated.

As in drivers/base/cpu.c a "CPU driver" is registered, cpufreq acts as
"interface" to this, offering the following files for each CPU
(in /system/devices/sys/cpu.../) where CPUfreq support is present

cpuinfo_min_freq (ro)	- minimum frequency (in kHz) the CPU supports
cpuinfo_max_freq (ro)	- maximum frequency (in kHz) the CPU supports
scaling_min_freq (rw)	- minimum frequency (in kHz) cpufreq may scale
			     the CPU core to
scaling_max_freq (rw)	- maximum frequency (in kHz) cpufreq may scale
			     the CPU core to
scaling_governor (rw)	- governor == "A feedback device on a machine
			      or engine that is used to provide
			      automatic control, as of speed,
			      pressure, or temperature" [1, as noted
			      by David Kimdon]. Decides what frequency
			      is used. Currently, only "performance"
			      and "powersave" are supported, more may
			      be added later.

(In future, a file scaling_driver (ro) which shows what CPUfreq driver
is used (arm-sa1100,  gx-suspmod, speedstep, longrun, powernow-k6,
...) might be added, and this driver will be allowed to add files
scaling_driver_* for driver-specific settings like "prefer fast FSB".
And scaling_governor_* files might offer settings for the governor.)

To implement this sysfs interface, the driver model "interface" code
is used. Unfortunately, it has a non-trivial locking bug in
drivers/base/intf.c: there's a down_write call for
cls->subsys.rwsem in add_intf(), which then calls add(), which may call
intf->add_device(), which may call interface_add_data(), which calls
kobject_register(), which calls kobject_add(), which then tries to
down_write cls->subsys.rwsem. Remember, that was already locked writable
in add_intf().

Because of that, interface_add_data() is commented out; this means
that no link in /system/class/cpu/cpufreq is added, and that the
dev-removal code isn't called. This shouldn't be a problem yet,
though; as no cpufreq driver I know of is capable of CPU hotplugging.

    Dominik

[1] http://dictionary.reference.com/search?q=governor
---
 arch/i386/Kconfig       |  18 ++-
 include/linux/cpufreq.h |   2 +
 kernel/cpufreq.c        | 308 +++++++++++++++++++++++++++++++++++++++++-------
 3 files changed, 282 insertions(+), 46 deletions(-)

(limited to 'include/linux')

diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig
index 2d38729a5b36..d9d687f5458d 100644
--- a/arch/i386/Kconfig
+++ b/arch/i386/Kconfig
@@ -949,15 +949,27 @@ config CPU_FREQ
 
 	  If in doubt, say N.
 
+config CPU_FREQ_PROC_INTF
+	bool "/proc/cpufreq interface (DEPRECATED)"
+	depends on CPU_FREQ && PROC_FS
+	help
+	  This enables the /proc/cpufreq interface for controlling
+	  CPUFreq. Please note that it is recommended to use the sysfs
+	  interface instead (which is built automatically). 
+	  
+	  For details, take a look at linux/Documentation/cpufreq. 
+	  
+	  If in doubt, say N.
+
 config CPU_FREQ_24_API
 	bool "/proc/sys/cpu/ interface (2.4. / OLD)"
 	depends on CPU_FREQ
 	help
 	  This enables the /proc/sys/cpu/ sysctl interface for controlling
 	  CPUFreq, as known from the 2.4.-kernel patches for CPUFreq. 2.5
-	  uses /proc/cpufreq instead. Please note that some drivers do not 
-	  work well with the 2.4. /proc/sys/cpu sysctl interface, so if in
-	  doubt, say N here.
+	  uses a sysfs interface instead. Please note that some drivers do 
+	  not work well with the 2.4. /proc/sys/cpu sysctl interface,
+	  so if in doubt, say N here.
 
 	  For details, take a look at linux/Documentation/cpufreq. 
 
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 0e008ba3618c..12c59ed7d695 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -17,6 +17,7 @@
 #include <linux/config.h>
 #include <linux/notifier.h>
 #include <linux/threads.h>
+#include <linux/device.h>
 
 
 /*********************************************************************
@@ -57,6 +58,7 @@ struct cpufreq_policy {
 	unsigned int            max;    /* in kHz */
         unsigned int            policy; /* see above */
 	struct cpufreq_cpuinfo  cpuinfo;     /* see above */
+	struct intf_data        intf;   /* interface data */
 };
 
 #define CPUFREQ_ADJUST          (0)
diff --git a/kernel/cpufreq.c b/kernel/cpufreq.c
index d5b4003ef3cd..f2981df44f65 100644
--- a/kernel/cpufreq.c
+++ b/kernel/cpufreq.c
@@ -20,12 +20,16 @@
 #include <linux/delay.h>
 #include <linux/interrupt.h>
 #include <linux/spinlock.h>
+#include <linux/device.h>
+
+#ifdef CONFIG_CPU_FREQ_PROC_INTF
 #include <linux/ctype.h>
 #include <linux/proc_fs.h>
-
 #include <asm/uaccess.h>
+#endif
 
 #ifdef CONFIG_CPU_FREQ_24_API
+#include <linux/proc_fs.h>
 #include <linux/sysctl.h>
 #endif
 
@@ -75,11 +79,255 @@ static unsigned int     cpu_cur_freq[NR_CPUS];
 #endif
 
 
+/*********************************************************************
+ *                          SYSFS INTERFACE                          *
+ *********************************************************************/
+
+/**
+ * cpufreq_parse_governor - parse a governor string
+ */
+static int cpufreq_parse_governor (char *str_governor, unsigned int *governor)
+{
+	if (!strnicmp(str_governor, "performance", 11)) {
+		*governor = CPUFREQ_POLICY_PERFORMANCE;
+		return 0;
+	} else if (!strnicmp(str_governor, "powersave", 9)) {
+		*governor = CPUFREQ_POLICY_POWERSAVE;
+		return 0;
+	} else
+		return -EINVAL;
+}
+
+
+/* forward declarations */
+static int cpufreq_add_dev (struct device * dev);
+static int cpufreq_remove_dev (struct intf_data * dev);
+
+/* drivers/base/cpu.c */
+extern struct device_class cpu_devclass;
+
+static struct device_interface cpufreq_interface = {
+        .name = "cpufreq",
+        .devclass = &cpu_devclass,
+        .add_device = &cpufreq_add_dev,
+        .remove_device = &cpufreq_remove_dev,
+	.kset = { .subsys = &cpu_devclass.subsys, },
+        .devnum = 0,
+};
+
+static inline int to_cpu_nr (struct device *dev)
+{
+	struct sys_device * cpu_sys_dev = container_of(dev, struct sys_device, dev);
+	return (cpu_sys_dev->id);
+}
+
+
+/**
+ * cpufreq_per_cpu_attr_read() / show_##file_name() - print out cpufreq information
+ *
+ * Write out information from cpufreq_driver->policy[cpu]; object must be
+ * "unsigned int".
+ */
+
+#define cpufreq_per_cpu_attr_read(file_name, object) 			\
+static ssize_t show_##file_name 					\
+(struct device *dev, char *buf)						\
+{									\
+	unsigned int value = 0;						\
+									\
+	if (!dev)							\
+		return 0;						\
+									\
+	down(&cpufreq_driver_sem);					\
+	if (cpufreq_driver)						\
+		value = cpufreq_driver->policy[to_cpu_nr(dev)].object;	\
+	up(&cpufreq_driver_sem);					\
+									\
+	return sprintf (buf, "%u\n", value);				\
+}
+
+
+/**
+ * cpufreq_per_cpu_attr_write() / store_##file_name() - sysfs write access
+ */
+#define cpufreq_per_cpu_attr_write(file_name, object)			\
+static ssize_t store_##file_name					\
+(struct device *dev, const char *buf)					\
+{									\
+	unsigned int ret = -EINVAL;					\
+	struct cpufreq_policy policy;					\
+									\
+	if (!dev)							\
+		return 0;						\
+									\
+	ret = cpufreq_get_policy(&policy, to_cpu_nr(dev));		\
+	if (ret)							\
+		return ret;						\
+									\
+	ret = sscanf (buf, "%u", &policy.object);			\
+	if (ret != 1)							\
+		return -EINVAL;						\
+									\
+	ret = cpufreq_set_policy(&policy);				\
+	if (ret)							\
+		return ret;						\
+									\
+	return strlen(buf);						\
+}
+
+
+/**
+ * show_scaling_governor - show the current policy for the specified CPU
+ */
+static ssize_t show_scaling_governor (struct device *dev, char *buf)
+{
+	unsigned int value = 0;
+
+	if (!dev)
+		return 0;
+
+	down(&cpufreq_driver_sem);
+	if (cpufreq_driver)
+		value = cpufreq_driver->policy[to_cpu_nr(dev)].policy;
+	up(&cpufreq_driver_sem);
+
+	switch (value) {
+	case CPUFREQ_POLICY_POWERSAVE:
+		return sprintf(buf, "powersave\n");
+	case CPUFREQ_POLICY_PERFORMANCE:
+		return sprintf(buf, "performance\n");
+	}
+
+	return -EINVAL;
+}
+
+
+/**
+ * store_scaling_governor - store policy for the specified CPU
+ */
+static ssize_t store_scaling_governor (struct device *dev, const char *buf) 
+{
+	unsigned int ret = -EINVAL;
+	char	str_governor[16];
+	struct cpufreq_policy policy;
+
+	if (!dev)
+		return 0;
+
+	ret = cpufreq_get_policy(&policy, to_cpu_nr(dev));
+	if (ret)
+		return ret;
+
+	ret = sscanf (buf, "%15s", str_governor);
+	if (ret != 1)
+		return -EINVAL;
+
+	if (cpufreq_parse_governor(str_governor, &policy.policy))
+		return -EINVAL;
+
+	ret = cpufreq_set_policy(&policy);
+	if (ret)
+		return ret;
+
+	return strlen(buf);
+}
+
+
+/**
+ * cpufreq_per_cpu_attr_ro - read-only cpufreq per-CPU file
+ */
+#define cpufreq_per_cpu_attr_ro(file_name, object)			\
+cpufreq_per_cpu_attr_read(file_name, object) 				\
+static DEVICE_ATTR(file_name, S_IRUGO, show_##file_name, NULL);
+
+
+/**
+ * cpufreq_per_cpu_attr_rw - read-write cpufreq per-CPU file
+ */
+#define cpufreq_per_cpu_attr_rw(file_name, object) 			\
+cpufreq_per_cpu_attr_read(file_name, object) 				\
+cpufreq_per_cpu_attr_write(file_name, object) 				\
+static DEVICE_ATTR(file_name, (S_IRUGO | S_IWUSR), show_##file_name, store_##file_name);
+
+
+/* create the file functions */
+cpufreq_per_cpu_attr_ro(cpuinfo_min_freq, cpuinfo.min_freq);
+cpufreq_per_cpu_attr_ro(cpuinfo_max_freq, cpuinfo.max_freq);
+cpufreq_per_cpu_attr_rw(scaling_min_freq, min);
+cpufreq_per_cpu_attr_rw(scaling_max_freq, max);
+
+static DEVICE_ATTR(scaling_governor, (S_IRUGO | S_IWUSR), show_scaling_governor, store_scaling_governor);
+
+
+/**
+ * cpufreq_add_dev - add a CPU device
+ *
+ * Adds the cpufreq interface for a CPU device. 
+ */
+static int cpufreq_add_dev (struct device * dev)
+{
+	unsigned int cpu = to_cpu_nr(dev);
+	int ret = 0;
+
+	down(&cpufreq_driver_sem);
+	if (!cpufreq_driver) {
+		up(&cpufreq_driver_sem);
+		return -EINVAL;
+	}
+
+	/* prepare interface data */
+	cpufreq_driver->policy[cpu].intf.dev  = dev;
+	cpufreq_driver->policy[cpu].intf.intf = &cpufreq_interface;
+	strncpy(cpufreq_driver->policy[cpu].intf.kobj.name, cpufreq_interface.name, KOBJ_NAME_LEN);
+	cpufreq_driver->policy[cpu].intf.kobj.parent = &(dev->kobj);
+	cpufreq_driver->policy[cpu].intf.kobj.kset = &(cpufreq_interface.kset);
+
+	/* add interface */
+	/* currently commented out due to deadlock */
+	//ret = interface_add_data(&(cpufreq_driver->policy[cpu].intf));
+	if (ret) {
+		up(&cpufreq_driver_sem);
+		return ret;
+	}
+
+	/* create files */
+	device_create_file (dev, &dev_attr_cpuinfo_min_freq);
+	device_create_file (dev, &dev_attr_cpuinfo_max_freq);
+	device_create_file (dev, &dev_attr_scaling_min_freq);
+	device_create_file (dev, &dev_attr_scaling_max_freq);
+	device_create_file (dev, &dev_attr_scaling_governor);
+
+	up(&cpufreq_driver_sem);
+	return ret;
+}
+
+
+/**
+ * cpufreq_remove_dev - remove a CPU device
+ *
+ * Removes the cpufreq interface for a CPU device. Is called with
+ * cpufreq_driver_sem locked.
+ */
+static int cpufreq_remove_dev (struct intf_data *intf)
+{
+	struct device * dev = intf->dev;
+
+	device_remove_file (dev, &dev_attr_cpuinfo_min_freq);
+	device_remove_file (dev, &dev_attr_cpuinfo_max_freq);
+	device_remove_file (dev, &dev_attr_scaling_min_freq);
+	device_remove_file (dev, &dev_attr_scaling_max_freq);
+	device_remove_file (dev, &dev_attr_scaling_governor);
+
+	return 0;
+}
+
 
 /*********************************************************************
- *                              2.6. API                             *
+ *                      /proc/cpufreq INTERFACE                      *
  *********************************************************************/
 
+#ifdef CONFIG_CPU_FREQ_PROC_INTF
+
 /**
  * cpufreq_parse_policy - parse a policy string
  * @input_string: the string to parse.
@@ -95,10 +343,9 @@ static int cpufreq_parse_policy(char input_string[42], struct cpufreq_policy *po
 	unsigned int            min = 0;
 	unsigned int            max = 0;
 	unsigned int            cpu = 0;
-	char			policy_string[42] = {'\0'};
+	char			str_governor[16];
 	struct cpufreq_policy   current_policy;
 	unsigned int            result = -EFAULT;
-	unsigned int            i = 0;
 
 	if (!policy)
 		return -EINVAL;
@@ -108,7 +355,7 @@ static int cpufreq_parse_policy(char input_string[42], struct cpufreq_policy *po
 	policy->policy = 0;
 	policy->cpu = CPUFREQ_ALL_CPUS;
 
-	if (sscanf(input_string, "%d:%d:%d:%s", &cpu, &min, &max, policy_string) == 4) 
+	if (sscanf(input_string, "%d:%d:%d:%15s", &cpu, &min, &max, str_governor) == 4) 
 	{
 		policy->min = min;
 		policy->max = max;
@@ -116,7 +363,7 @@ static int cpufreq_parse_policy(char input_string[42], struct cpufreq_policy *po
 		result = 0;
 		goto scan_policy;
 	}
-	if (sscanf(input_string, "%d%%%d%%%d%%%s", &cpu, &min, &max, policy_string) == 4)
+	if (sscanf(input_string, "%d%%%d%%%d%%%15s", &cpu, &min, &max, str_governor) == 4)
 	{
 		if (!cpufreq_get_policy(&current_policy, cpu)) {
 			policy->min = (min * current_policy.cpuinfo.max_freq) / 100;
@@ -127,7 +374,7 @@ static int cpufreq_parse_policy(char input_string[42], struct cpufreq_policy *po
 		}
 	}
 
-	if (sscanf(input_string, "%d:%d:%s", &min, &max, policy_string) == 3) 
+	if (sscanf(input_string, "%d:%d:%15s", &min, &max, str_governor) == 3) 
 	{
 		policy->min = min;
 		policy->max = max;
@@ -135,7 +382,7 @@ static int cpufreq_parse_policy(char input_string[42], struct cpufreq_policy *po
 		goto scan_policy;
 	}
 
-	if (sscanf(input_string, "%d%%%d%%%s", &min, &max, policy_string) == 3)
+	if (sscanf(input_string, "%d%%%d%%%15s", &min, &max, str_governor) == 3)
 	{
 		if (!cpufreq_get_policy(&current_policy, cpu)) {
 			policy->min = (min * current_policy.cpuinfo.max_freq) / 100;
@@ -148,36 +395,7 @@ static int cpufreq_parse_policy(char input_string[42], struct cpufreq_policy *po
 	return -EINVAL;
 
 scan_policy:
-
-	for (i=0;i<sizeof(policy_string);i++){
-		if (policy_string[i]=='\0')
-			break;
-		policy_string[i] = tolower(policy_string[i]);
-	}
-
-	if (!strncmp(policy_string, "powersave", 6) ||  
-            !strncmp(policy_string, "eco", 3) ||       
-	    !strncmp(policy_string, "batter", 6) ||
-	    !strncmp(policy_string, "low", 3)) 
-	{
-		result = 0;
-		policy->policy = CPUFREQ_POLICY_POWERSAVE;
-	}
-	else if (!strncmp(policy_string, "performance",6) ||
-	    !strncmp(policy_string, "high",4) ||
-	    !strncmp(policy_string, "full",4))
-	{
-		result = 0;
-		policy->policy = CPUFREQ_POLICY_PERFORMANCE;
-	}
-	else if (!cpufreq_get_policy(&current_policy, policy->cpu))
-	{
-		policy->policy = current_policy.policy;
-	}
-	else
-	{
-		policy->policy = 0;
-	}
+	result = cpufreq_parse_governor(str_governor, &policy->policy);
 
 	return result;
 }
@@ -197,8 +415,6 @@ static int __init cpufreq_setup(char *str)
 __setup("cpufreq=", cpufreq_setup);
 
 
-#ifdef CONFIG_PROC_FS
-
 /**
  * cpufreq_proc_read - read /proc/cpufreq
  *
@@ -345,12 +561,15 @@ static void cpufreq_proc_exit (void)
 	remove_proc_entry("cpufreq", &proc_root);
 	return;
 }
-#endif /* CONFIG_PROC_FS */
+#else
+#define cpufreq_proc_init() do {} while(0)
+#define cpufreq_proc_exit() do {} while(0)
+#endif /* CONFIG_CPU_FREQ_PROC_INTF */
 
 
 /*********************************************************************
- *                        2.4. COMPATIBLE API                        *
+ *                      /proc/sys/cpu/ INTERFACE                     *
  *********************************************************************/
 
 #ifdef CONFIG_CPU_FREQ_24_API
@@ -1055,7 +1274,9 @@ int cpufreq_register(struct cpufreq_driver *driver_data)
 	cpufreq_sysctl_init();
 #endif
 
-	return 0;
+	ret = interface_register(&cpufreq_interface);
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(cpufreq_register);
 
@@ -1077,6 +1298,7 @@ int cpufreq_unregister(void)
 		return -EINVAL;
 	}
 
+	interface_unregister(&cpufreq_interface);
 	cpufreq_driver = NULL;
 
 	up(&cpufreq_driver_sem);
-- 
cgit v1.2.3


From 68fc0a7808166f84a382a50639f26c1dd9dcd354 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Sun, 12 Jan 2003 21:32:38 -0800
Subject: [PATCH] rbtree core for io scheduler

This patch has a bunch of io scheduler goodies that are, by now, well
tested in -mm and by self and Nick Piggin. In order of interest:

- Use rbtree data structure for sorting of requests. Even with the
  default queue lengths that are fairly short, this cuts a lot of run
  time for io scheduler intensive work loads. If we go to longer queue
  lengths, it very quickly becomes a necessity.

- Add sysfs interface for the tunables. At the same time, finally kill
  the BLKELVGET/BLKELVSET completely. I made these return -ENOTTY in
  2.5.1, but there are left-overs around the kernel. This old interface
  was never any good, it was centered around just one io scheduler.

The io scheduler core itself has received count less hours of tuning by
myself and Nick, should be in pretty good shape. Please apply.

Andrew, I made some sysfs changes to the version from 2.5.56-mm1. It
didn't even compile without warnings (or work, for that matter), as the
sysfs store/show procedures needed updating. Hmm?
---
 arch/mips64/kernel/ioctl32.c     |   2 -
 arch/parisc/kernel/ioctl32.c     |   3 -
 arch/ppc64/kernel/ioctl32.c      |  15 -
 arch/s390x/kernel/ioctl32.c      |   3 -
 arch/sparc64/kernel/ioctl32.c    |  15 -
 arch/x86_64/ia32/ia32_ioctl.c    |  15 -
 drivers/block/deadline-iosched.c | 811 +++++++++++++++++++++++++++------------
 drivers/block/elevator.c         |  67 +++-
 drivers/block/genhd.c            |   2 +
 drivers/block/ioctl.c            |   4 -
 drivers/block/ll_rw_blk.c        |  53 +--
 include/linux/elevator.h         |  25 +-
 12 files changed, 667 insertions(+), 348 deletions(-)

(limited to 'include/linux')

diff --git a/arch/mips64/kernel/ioctl32.c b/arch/mips64/kernel/ioctl32.c
index f569194c5e48..1636b8c70a30 100644
--- a/arch/mips64/kernel/ioctl32.c
+++ b/arch/mips64/kernel/ioctl32.c
@@ -755,8 +755,6 @@ static struct ioctl32_list ioctl32_handler_table[] = {
 	IOCTL32_HANDLER(BLKSECTGET, w_long),
 	IOCTL32_DEFAULT(BLKSSZGET),
 	IOCTL32_HANDLER(BLKPG, blkpg_ioctl_trans),
-	IOCTL32_DEFAULT(BLKELVGET),
-	IOCTL32_DEFAULT(BLKELVSET),
 	IOCTL32_DEFAULT(BLKBSZGET),
 	IOCTL32_DEFAULT(BLKBSZSET),
 
diff --git a/arch/parisc/kernel/ioctl32.c b/arch/parisc/kernel/ioctl32.c
index 5a6441ec4644..9d936fc06883 100644
--- a/arch/parisc/kernel/ioctl32.c
+++ b/arch/parisc/kernel/ioctl32.c
@@ -3464,9 +3464,6 @@ COMPATIBLE_IOCTL(DRM_IOCTL_LOCK)
 COMPATIBLE_IOCTL(DRM_IOCTL_UNLOCK)
 COMPATIBLE_IOCTL(DRM_IOCTL_FINISH)
 #endif /* DRM */
-/* elevator */
-COMPATIBLE_IOCTL(BLKELVGET)
-COMPATIBLE_IOCTL(BLKELVSET)
 /* Big R */
 COMPATIBLE_IOCTL(RNDGETENTCNT)
 COMPATIBLE_IOCTL(RNDADDTOENTCNT)
diff --git a/arch/ppc64/kernel/ioctl32.c b/arch/ppc64/kernel/ioctl32.c
index 6d04f959b1ed..7ac8afc17dca 100644
--- a/arch/ppc64/kernel/ioctl32.c
+++ b/arch/ppc64/kernel/ioctl32.c
@@ -3613,22 +3613,10 @@ mtd_rw_oob(unsigned int fd, unsigned int cmd, unsigned long arg)
 }	
 
 /* Fix sizeof(sizeof()) breakage */
-#define BLKELVGET_32	_IOR(0x12,106,int)
-#define BLKELVSET_32	_IOW(0x12,107,int)
 #define BLKBSZGET_32	_IOR(0x12,112,int)
 #define BLKBSZSET_32	_IOW(0x12,113,int)
 #define BLKGETSIZE64_32	_IOR(0x12,114,int)
 
-static int do_blkelvget(unsigned int fd, unsigned int cmd, unsigned long arg)
-{
-	return sys_ioctl(fd, BLKELVGET, arg);
-}
-
-static int do_blkelvset(unsigned int fd, unsigned int cmd, unsigned long arg)
-{
-	return sys_ioctl(fd, BLKELVSET, arg);
-}
-
 static int do_blkbszget(unsigned int fd, unsigned int cmd, unsigned long arg)
 {
 	return sys_ioctl(fd, BLKBSZGET, arg);
@@ -4459,9 +4447,6 @@ HANDLE_IOCTL(USBDEVFS_REAPURB32, do_usbdevfs_reapurb),
 HANDLE_IOCTL(USBDEVFS_REAPURBNDELAY32, do_usbdevfs_reapurb),
 HANDLE_IOCTL(USBDEVFS_DISCSIGNAL32, do_usbdevfs_discsignal),
 /* take care of sizeof(sizeof()) breakage */
-/* elevator */
-HANDLE_IOCTL(BLKELVGET_32, do_blkelvget),
-HANDLE_IOCTL(BLKELVSET_32, do_blkelvset),
 /* block stuff */
 HANDLE_IOCTL(BLKBSZGET_32, do_blkbszget),
 HANDLE_IOCTL(BLKBSZSET_32, do_blkbszset),
diff --git a/arch/s390x/kernel/ioctl32.c b/arch/s390x/kernel/ioctl32.c
index ff6066c2cd88..0dfef815cd09 100644
--- a/arch/s390x/kernel/ioctl32.c
+++ b/arch/s390x/kernel/ioctl32.c
@@ -798,9 +798,6 @@ static struct ioctl32_list ioctl32_handler_table[] = {
 	IOCTL32_DEFAULT(BLKBSZGET),
 	IOCTL32_DEFAULT(BLKGETSIZE64),
 
-	IOCTL32_DEFAULT(BLKELVGET),
-	IOCTL32_DEFAULT(BLKELVSET),
-
 	IOCTL32_HANDLER(HDIO_GETGEO, hd_geometry_ioctl),
 
 	IOCTL32_DEFAULT(TCGETA),
diff --git a/arch/sparc64/kernel/ioctl32.c b/arch/sparc64/kernel/ioctl32.c
index 035c7bf4232a..cd3af811408d 100644
--- a/arch/sparc64/kernel/ioctl32.c
+++ b/arch/sparc64/kernel/ioctl32.c
@@ -4244,22 +4244,10 @@ static int mtd_rw_oob(unsigned int fd, unsigned int cmd, unsigned long arg)
 }	
 
 /* Fix sizeof(sizeof()) breakage */
-#define BLKELVGET_32	_IOR(0x12,106,int)
-#define BLKELVSET_32	_IOW(0x12,107,int)
 #define BLKBSZGET_32	_IOR(0x12,112,int)
 #define BLKBSZSET_32	_IOW(0x12,113,int)
 #define BLKGETSIZE64_32	_IOR(0x12,114,int)
 
-static int do_blkelvget(unsigned int fd, unsigned int cmd, unsigned long arg)
-{
-	return sys_ioctl(fd, BLKELVGET, arg);
-}
-
-static int do_blkelvset(unsigned int fd, unsigned int cmd, unsigned long arg)
-{
-	return sys_ioctl(fd, BLKELVSET, arg);
-}
-
 static int do_blkbszget(unsigned int fd, unsigned int cmd, unsigned long arg)
 {
 	return sys_ioctl(fd, BLKBSZGET, arg);
@@ -5203,9 +5191,6 @@ HANDLE_IOCTL(USBDEVFS_REAPURB32, do_usbdevfs_reapurb)
 HANDLE_IOCTL(USBDEVFS_REAPURBNDELAY32, do_usbdevfs_reapurb)
 HANDLE_IOCTL(USBDEVFS_DISCSIGNAL32, do_usbdevfs_discsignal)
 /* take care of sizeof(sizeof()) breakage */
-/* elevator */
-HANDLE_IOCTL(BLKELVGET_32, do_blkelvget)
-HANDLE_IOCTL(BLKELVSET_32, do_blkelvset)
 /* block stuff */
 HANDLE_IOCTL(BLKBSZGET_32, do_blkbszget)
 HANDLE_IOCTL(BLKBSZSET_32, do_blkbszset)
diff --git a/arch/x86_64/ia32/ia32_ioctl.c b/arch/x86_64/ia32/ia32_ioctl.c
index 567d070cd48f..73ca3c4d615e 100644
--- a/arch/x86_64/ia32/ia32_ioctl.c
+++ b/arch/x86_64/ia32/ia32_ioctl.c
@@ -3025,22 +3025,10 @@ static int rtc32_ioctl(unsigned fd, unsigned cmd, unsigned long arg)
 } 
 
 /* Fix sizeof(sizeof()) breakage */
-#define BLKELVGET_32   _IOR(0x12,106,int)
-#define BLKELVSET_32   _IOW(0x12,107,int)
 #define BLKBSZGET_32   _IOR(0x12,112,int)
 #define BLKBSZSET_32   _IOW(0x12,113,int)
 #define BLKGETSIZE64_32        _IOR(0x12,114,int)
 
-static int do_blkelvget(unsigned int fd, unsigned int cmd, unsigned long arg)
-{
-       return sys_ioctl(fd, BLKELVGET, arg);
-}
-
-static int do_blkelvset(unsigned int fd, unsigned int cmd, unsigned long arg)
-{
-       return sys_ioctl(fd, BLKELVSET, arg);
-}
-
 static int do_blkbszget(unsigned int fd, unsigned int cmd, unsigned long arg)
 {
        return sys_ioctl(fd, BLKBSZGET, arg);
@@ -4427,9 +4415,6 @@ HANDLE_IOCTL(USBDEVFS_REAPURB32, do_usbdevfs_reapurb)
 HANDLE_IOCTL(USBDEVFS_REAPURBNDELAY32, do_usbdevfs_reapurb)
 HANDLE_IOCTL(USBDEVFS_DISCSIGNAL32, do_usbdevfs_discsignal)
 /* take care of sizeof(sizeof()) breakage */
-/* elevator */
-HANDLE_IOCTL(BLKELVGET_32, do_blkelvget)
-HANDLE_IOCTL(BLKELVSET_32, do_blkelvset)
 /* block stuff */
 HANDLE_IOCTL(BLKBSZGET_32, do_blkbszget)
 HANDLE_IOCTL(BLKBSZSET_32, do_blkbszset)
diff --git a/drivers/block/deadline-iosched.c b/drivers/block/deadline-iosched.c
index 975e2c70e205..15c2cc1c3126 100644
--- a/drivers/block/deadline-iosched.c
+++ b/drivers/block/deadline-iosched.c
@@ -17,26 +17,24 @@
 #include <linux/init.h>
 #include <linux/compiler.h>
 #include <linux/hash.h>
+#include <linux/rbtree.h>
 
 /*
- * feel free to try other values :-). read_expire value is the timeout for
- * reads, our goal is to start a request "around" the time when it expires.
- * fifo_batch is how many steps along the sorted list we will take when the
- * front fifo request expires.
+ * See Documentation/deadline-iosched.txt
  */
-static int read_expire = HZ / 2;	/* 500ms start timeout */
-static int fifo_batch = 16;
-static int seek_cost = 16;		/* seek is 16 times more expensive */
+static int read_expire = HZ / 2;  /* max time before a read is submitted. */
+static int write_expire = 5 * HZ; /* ditto for writes, these limits are SOFT! */
+static int writes_starved = 2;    /* max times reads can starve a write */
+static int fifo_batch = 16;       /* # of sequential requests treated as one
+				     by the above parameters. For throughput. */
 
-/*
- * how many times reads are allowed to starve writes
- */
-static int writes_starved = 2;
-
-static const int deadline_hash_shift = 8;
+static const int deadline_hash_shift = 10;
 #define DL_HASH_BLOCK(sec)	((sec) >> 3)
 #define DL_HASH_FN(sec)		(hash_long(DL_HASH_BLOCK((sec)), deadline_hash_shift))
 #define DL_HASH_ENTRIES		(1 << deadline_hash_shift)
+#define rq_hash_key(rq)		((rq)->sector + (rq)->nr_sectors)
+#define list_entry_hash(ptr)	list_entry((ptr), struct deadline_rq, hash)
+#define ON_HASH(drq)		(drq)->hash_valid_count
 
 #define DL_INVALIDATE_HASH(dd)				\
 	do {						\
@@ -48,31 +46,55 @@ struct deadline_data {
 	/*
 	 * run time data
 	 */
-	struct list_head sort_list[2];	/* sorted listed */
-	struct list_head read_fifo;	/* fifo list */
+
+	/*
+	 * requests (deadline_rq s) are present on both sort_list and fifo_list
+	 */
+	struct rb_root sort_list[2];	
+	struct list_head fifo_list[2];
+	
+	/*
+	 * next in sort order. read, write or both are NULL
+	 */
+	struct deadline_rq *next_drq[2];
 	struct list_head *dispatch;	/* driver dispatch queue */
 	struct list_head *hash;		/* request hash */
-	sector_t last_sector;		/* last sector sent to drive */
 	unsigned long hash_valid_count;	/* barrier hash count */
-	unsigned int starved;		/* writes starved */
+	unsigned int batching;		/* number of sequential requests made */
+	sector_t last_sector;		/* head position */
+	unsigned int starved;		/* times reads have starved writes */
 
 	/*
 	 * settings that change how the i/o scheduler behaves
 	 */
-	unsigned int fifo_batch;
-	unsigned long read_expire;
-	unsigned int seek_cost;
-	unsigned int writes_starved;
+	int fifo_expire[2];
+	int fifo_batch;
+	int writes_starved;
+	int front_merges;
 };
 
 /*
  * pre-request data.
  */
 struct deadline_rq {
-	struct list_head fifo;
+	/*
+	 * rbtree index, key is the starting offset
+	 */
+	struct rb_node rb_node;
+	sector_t rb_key;
+
+	struct request *request;
+
+	/*
+	 * request hash, key is the ending offset (for back merge lookup)
+	 */
 	struct list_head hash;
 	unsigned long hash_valid_count;
-	struct request *request;
+
+	/*
+	 * expire fifo
+	 */
+	struct list_head fifo;
 	unsigned long expires;
 };
 
@@ -81,159 +103,257 @@ static kmem_cache_t *drq_pool;
 #define RQ_DATA(rq)	((struct deadline_rq *) (rq)->elevator_private)
 
 /*
- * rq hash
+ * the back merge hash support functions
  */
-static inline void __deadline_del_rq_hash(struct deadline_rq *drq)
+static inline void __deadline_del_drq_hash(struct deadline_rq *drq)
 {
 	drq->hash_valid_count = 0;
 	list_del_init(&drq->hash);
 }
 
-#define ON_HASH(drq)	(drq)->hash_valid_count
-static inline void deadline_del_rq_hash(struct deadline_rq *drq)
+static inline void deadline_del_drq_hash(struct deadline_rq *drq)
 {
 	if (ON_HASH(drq))
-		__deadline_del_rq_hash(drq);
+		__deadline_del_drq_hash(drq);
 }
 
 static inline void
-deadline_add_rq_hash(struct deadline_data *dd, struct deadline_rq *drq)
+deadline_add_drq_hash(struct deadline_data *dd, struct deadline_rq *drq)
 {
 	struct request *rq = drq->request;
 
 	BUG_ON(ON_HASH(drq));
 
 	drq->hash_valid_count = dd->hash_valid_count;
-	list_add(&drq->hash, &dd->hash[DL_HASH_FN(rq->sector +rq->nr_sectors)]);
+	list_add(&drq->hash, &dd->hash[DL_HASH_FN(rq_hash_key(rq))]);
 }
 
-#define list_entry_hash(ptr)	list_entry((ptr), struct deadline_rq, hash)
 static struct request *
-deadline_find_hash(struct deadline_data *dd, sector_t offset)
+deadline_find_drq_hash(struct deadline_data *dd, sector_t offset)
 {
 	struct list_head *hash_list = &dd->hash[DL_HASH_FN(offset)];
 	struct list_head *entry, *next = hash_list->next;
-	struct deadline_rq *drq;
-	struct request *rq = NULL;
 
 	while ((entry = next) != hash_list) {
+		struct deadline_rq *drq = list_entry_hash(entry);
+		struct request *__rq = drq->request;
+
 		next = entry->next;
 		
-		drq = list_entry_hash(entry);
-
-		BUG_ON(!drq->hash_valid_count);
+		BUG_ON(!ON_HASH(drq));
 
-		if (!rq_mergeable(drq->request)
+		if (!rq_mergeable(__rq)
 		    || drq->hash_valid_count != dd->hash_valid_count) {
-			__deadline_del_rq_hash(drq);
+			__deadline_del_drq_hash(drq);
 			continue;
 		}
 
-		if (drq->request->sector + drq->request->nr_sectors == offset) {
-			rq = drq->request;
-			break;
-		}
+		if (rq_hash_key(__rq) == offset)
+			return __rq;
 	}
 
-	return rq;
+	return NULL;
 }
 
-static sector_t deadline_get_last_sector(struct deadline_data *dd)
+/*
+ * rb tree support functions
+ */
+#define RB_NONE		(2)
+#define RB_EMPTY(root)	((root)->rb_node == NULL)
+#define ON_RB(node)	((node)->rb_color != RB_NONE)
+#define RB_CLEAR(node)	((node)->rb_color = RB_NONE)
+#define rb_entry_drq(node)	rb_entry((node), struct deadline_rq, rb_node)
+#define DRQ_RB_ROOT(dd, drq)	(&(dd)->sort_list[rq_data_dir((drq)->request)])
+#define rq_rb_key(rq)		(rq)->sector
+
+static struct deadline_rq *
+__deadline_add_drq_rb(struct deadline_data *dd, struct deadline_rq *drq)
 {
-	sector_t last_sec = dd->last_sector;
+	struct rb_node **p = &DRQ_RB_ROOT(dd, drq)->rb_node;
+	struct rb_node *parent = NULL;
+	struct deadline_rq *__drq;
+
+	while (*p) {
+		parent = *p;
+		__drq = rb_entry_drq(parent);
+
+		if (drq->rb_key < __drq->rb_key)
+			p = &(*p)->rb_left;
+		else if (drq->rb_key > __drq->rb_key)
+			p = &(*p)->rb_right;
+		else
+			return __drq;
+	}
+
+	rb_link_node(&drq->rb_node, parent, p);
+	return 0;
+}
+
+static int
+deadline_add_drq_rb(struct deadline_data *dd, struct deadline_rq *drq)
+{
+	struct deadline_rq *__alias;
+
+	drq->rb_key = rq_rb_key(drq->request);
+
+	__alias = __deadline_add_drq_rb(dd, drq);
+	if (!__alias) {
+		rb_insert_color(&drq->rb_node, DRQ_RB_ROOT(dd, drq));
+		return 0;
+	}
 
 	/*
-	 * if dispatch is non-empty, disregard last_sector and check last one
+	 * this should not typically happen, but if it does simply chain
+	 * the two requests. then they will be moved to the dispatch list
+	 * at the same time
 	 */
-	if (!list_empty(dd->dispatch)) {
-		struct request *__rq = list_entry_rq(dd->dispatch->prev);
+	list_add(&drq->request->queuelist, &__alias->request->queuelist);
+	return 1;
+}
+
+static inline void
+deadline_del_drq_rb(struct deadline_data *dd, struct deadline_rq *drq)
+{
+	const int data_dir = rq_data_dir(drq->request);
+
+	if (dd->next_drq[data_dir] == drq) {
+		struct rb_node *rbnext = rb_next(&drq->rb_node);
+
+		dd->next_drq[data_dir] = NULL;
+		if (rbnext)
+			dd->next_drq[data_dir] = rb_entry_drq(rbnext);
+	}
+
+	if (ON_RB(&drq->rb_node)) {
+		rb_erase(&drq->rb_node, DRQ_RB_ROOT(dd, drq));
+		RB_CLEAR(&drq->rb_node);
+	}
+}
 
-		last_sec = __rq->sector + __rq->nr_sectors;
+static struct request *
+deadline_find_drq_rb(struct deadline_data *dd, sector_t sector, int data_dir)
+{
+	struct rb_node *n = dd->sort_list[data_dir].rb_node;
+	struct deadline_rq *drq;
+
+	while (n) {
+		drq = rb_entry_drq(n);
+
+		if (sector < drq->rb_key)
+			n = n->rb_left;
+		else if (sector > drq->rb_key)
+			n = n->rb_right;
+		else
+			return drq->request;
 	}
 
-	return last_sec;
+	return NULL;
+}
+
+/*
+ * deadline_find_first_drq finds the first (lowest sector numbered) request
+ * for the specified data_dir. Used to sweep back to the start of the disk
+ * (1-way elevator) after we process the last (highest sector) request.
+ */
+static struct deadline_rq *
+deadline_find_first_drq(struct deadline_data *dd, int data_dir)
+{
+	struct rb_node *n = dd->sort_list[data_dir].rb_node;
+
+	for (;;) {
+		if (n->rb_left == NULL)
+			return rb_entry_drq(n);
+		
+		n = n->rb_left;
+	}
+}
+
+/*
+ * add drq to rbtree and fifo
+ */
+static inline void
+deadline_add_request(struct deadline_data *dd, struct deadline_rq *drq)
+{
+	const int data_dir = rq_data_dir(drq->request);
+
+	if (!deadline_add_drq_rb(dd, drq)) {
+		/*
+		 * set expire time (only used for reads) and add to fifo list
+		 */
+		drq->expires = jiffies + dd->fifo_expire[data_dir];
+		list_add_tail(&drq->fifo, &dd->fifo_list[data_dir]);
+	}
+}
+
+/*
+ * remove rq from rbtree, fifo, and hash
+ */
+static void deadline_remove_request(request_queue_t *q, struct request *rq)
+{
+	struct deadline_rq *drq = RQ_DATA(rq);
+
+	if (drq) {
+		struct deadline_data *dd = q->elevator.elevator_data;
+
+		list_del_init(&drq->fifo);
+		deadline_del_drq_hash(drq);
+		deadline_del_drq_rb(dd, drq);
+	}
+
+	list_del_init(&rq->queuelist);
 }
 
 static int
 deadline_merge(request_queue_t *q, struct list_head **insert, struct bio *bio)
 {
 	struct deadline_data *dd = q->elevator.elevator_data;
-	const int data_dir = bio_data_dir(bio);
-	struct list_head *entry, *sort_list;
 	struct request *__rq;
-	int ret = ELEVATOR_NO_MERGE;
+	int ret;
 
 	/*
 	 * try last_merge to avoid going to hash
 	 */
 	ret = elv_try_last_merge(q, bio);
 	if (ret != ELEVATOR_NO_MERGE) {
-		*insert = q->last_merge;
-		goto out;
+		__rq = list_entry_rq(q->last_merge);
+		goto out_insert;
 	}
 
 	/*
 	 * see if the merge hash can satisfy a back merge
 	 */
-	if ((__rq = deadline_find_hash(dd, bio->bi_sector))) {
+	__rq = deadline_find_drq_hash(dd, bio->bi_sector);
+	if (__rq) {
 		BUG_ON(__rq->sector + __rq->nr_sectors != bio->bi_sector);
 
 		if (elv_rq_merge_ok(__rq, bio)) {
-			*insert = &__rq->queuelist;
 			ret = ELEVATOR_BACK_MERGE;
 			goto out;
 		}
 	}
 
 	/*
-	 * scan list from back to find insertion point.
+	 * check for front merge
 	 */
-	entry = sort_list = &dd->sort_list[data_dir];
-	while ((entry = entry->prev) != sort_list) {
-		__rq = list_entry_rq(entry);
+	if (dd->front_merges) {
+		sector_t rb_key = bio->bi_sector + bio_sectors(bio);
 
-		BUG_ON(__rq->flags & REQ_STARTED);
+		__rq = deadline_find_drq_rb(dd, rb_key, bio_data_dir(bio));
+		if (__rq) {
+			BUG_ON(rb_key != rq_rb_key(__rq));
 
-		if (!(__rq->flags & REQ_CMD))
-			continue;
-
-		/*
-		 * it's not necessary to break here, and in fact it could make
-		 * us loose a front merge. emperical evidence shows this to
-		 * be a big waste of cycles though, so quit scanning
-		 */
-		if (!*insert && bio_rq_in_between(bio, __rq, sort_list)) {
-			*insert = &__rq->queuelist;
-			break;
-		}
-
-		if (__rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER))
-			break;
-
-		/*
-		 * checking for a front merge, hash will miss those
-		 */
-		if (__rq->sector - bio_sectors(bio) == bio->bi_sector) {
-			ret = elv_try_merge(__rq, bio);
-			if (ret != ELEVATOR_NO_MERGE) {
-				*insert = &__rq->queuelist;
-				break;
+			if (elv_rq_merge_ok(__rq, bio)) {
+				ret = ELEVATOR_FRONT_MERGE;
+				goto out;
 			}
 		}
 	}
 
-	/*
-	 * no insertion point found, check the very front
-	 */
-	if (!*insert && !list_empty(sort_list)) {
-		__rq = list_entry_rq(sort_list->next);
-
-		if (bio->bi_sector + bio_sectors(bio) < __rq->sector &&
-		    bio->bi_sector > deadline_get_last_sector(dd))
-			*insert = sort_list;
-	}
-
+	return ELEVATOR_NO_MERGE;
 out:
+	q->last_merge = &__rq->queuelist;
+out_insert:
+	*insert = &__rq->queuelist;
 	return ret;
 }
 
@@ -242,14 +362,26 @@ static void deadline_merged_request(request_queue_t *q, struct request *req)
 	struct deadline_data *dd = q->elevator.elevator_data;
 	struct deadline_rq *drq = RQ_DATA(req);
 
-	deadline_del_rq_hash(drq);
-	deadline_add_rq_hash(dd, drq);
+	/*
+	 * hash always needs to be repositioned, key is end sector
+	 */
+	deadline_del_drq_hash(drq);
+	deadline_add_drq_hash(dd, drq);
+
+	/*
+	 * if the merge was a front merge, we need to reposition request
+	 */
+	if (rq_rb_key(req) != drq->rb_key) {
+		deadline_del_drq_rb(dd, drq);
+		deadline_add_drq_rb(dd, drq);
+	}
 
 	q->last_merge = &req->queuelist;
 }
 
 static void
-deadline_merge_request(request_queue_t *q, struct request *req, struct request *next)
+deadline_merged_requests(request_queue_t *q, struct request *req,
+			 struct request *next)
 {
 	struct deadline_data *dd = q->elevator.elevator_data;
 	struct deadline_rq *drq = RQ_DATA(req);
@@ -258,11 +390,20 @@ deadline_merge_request(request_queue_t *q, struct request *req, struct request *
 	BUG_ON(!drq);
 	BUG_ON(!dnext);
 
-	deadline_del_rq_hash(drq);
-	deadline_add_rq_hash(dd, drq);
+	/*
+	 * reposition drq (this is the merged request) in hash, and in rbtree
+	 * in case of a front merge
+	 */
+	deadline_del_drq_hash(drq);
+	deadline_add_drq_hash(dd, drq);
+
+	if (rq_rb_key(req) != drq->rb_key) {
+		deadline_del_drq_rb(dd, drq);
+		deadline_add_drq_rb(dd, drq);
+	}
 
 	/*
-	 * if dnext expires before drq, assign it's expire time to drq
+	 * if dnext expires before drq, assign its expire time to drq
 	 * and move into dnext position (dnext will be deleted) in fifo
 	 */
 	if (!list_empty(&drq->fifo) && !list_empty(&dnext->fifo)) {
@@ -271,209 +412,255 @@ deadline_merge_request(request_queue_t *q, struct request *req, struct request *
 			drq->expires = dnext->expires;
 		}
 	}
+
+	/*
+	 * kill knowledge of next, this one is a goner
+	 */
+	deadline_remove_request(q, next);
 }
 
 /*
- * move request from sort list to dispatch queue. maybe remove from rq hash
- * here too?
+ * move request from sort list to dispatch queue.
  */
 static inline void
-deadline_move_to_dispatch(struct deadline_data *dd, struct request *rq)
+deadline_move_to_dispatch(struct deadline_data *dd, struct deadline_rq *drq)
 {
-	struct deadline_rq *drq = RQ_DATA(rq);
-
-	list_move_tail(&rq->queuelist, dd->dispatch);
+	deadline_del_drq_rb(dd, drq);
 	list_del_init(&drq->fifo);
+	list_add_tail(&drq->request->queuelist, dd->dispatch);
 }
 
 /*
- * move along sort list and move entries to dispatch queue, starting from rq
+ * move an entry to dispatch queue
  */
-static void deadline_move_requests(struct deadline_data *dd, struct request *rq)
+static void
+deadline_move_request(struct deadline_data *dd, struct deadline_rq *drq)
 {
-	struct list_head *sort_head = &dd->sort_list[rq_data_dir(rq)];
-	sector_t last_sec = deadline_get_last_sector(dd);
-	int batch_count = dd->fifo_batch;
-
-	do {
-		struct list_head *nxt = rq->queuelist.next;
-		int this_rq_cost;
-
-		/*
-		 * take it off the sort and fifo list, move
-		 * to dispatch queue
-		 */
-		deadline_move_to_dispatch(dd, rq);
-
-		/*
-		 * if this is the last entry, don't bother doing accounting
-		 */
-		if (nxt == sort_head)
-			break;
+	const int data_dir = rq_data_dir(drq->request);
+	struct rb_node *rbnext = rb_next(&drq->rb_node);
 
-		this_rq_cost = dd->seek_cost;
-		if (rq->sector == last_sec)
-			this_rq_cost = (rq->nr_sectors + 255) >> 8;
+	dd->next_drq[READ] = NULL;
+	dd->next_drq[WRITE] = NULL;
 
-		batch_count -= this_rq_cost;
-		if (batch_count <= 0)
-			break;
+	if (rbnext)
+		dd->next_drq[data_dir] = rb_entry_drq(rbnext);
+	
+	dd->last_sector = drq->request->sector + drq->request->nr_sectors;
 
-		last_sec = rq->sector + rq->nr_sectors;
-		rq = list_entry_rq(nxt);
-	} while (1);
+	/*
+	 * take it off the sort and fifo list, move
+	 * to dispatch queue
+	 */
+	deadline_move_to_dispatch(dd, drq);
 }
 
+#define list_entry_fifo(ptr)	list_entry((ptr), struct deadline_rq, fifo)
+
 /*
- * returns 0 if there are no expired reads on the fifo, 1 otherwise
+ * deadline_check_fifo returns 0 if there are no expired reads on the fifo,
+ * 1 otherwise. Requires !list_empty(&dd->fifo_list[data_dir])
  */
-#define list_entry_fifo(ptr)	list_entry((ptr), struct deadline_rq, fifo)
-static inline int deadline_check_fifo(struct deadline_data *dd)
+static inline int deadline_check_fifo(struct deadline_data *dd, int ddir)
 {
-	if (!list_empty(&dd->read_fifo)) {
-		struct deadline_rq *drq = list_entry_fifo(dd->read_fifo.next);
+	struct deadline_rq *drq = list_entry_fifo(dd->fifo_list[ddir].next);
 
-		/*
-		 * drq is expired!
-		 */
-		if (time_after(jiffies, drq->expires))
-			return 1;
-	}
+	/*
+	 * drq is expired!
+	 */
+	if (time_after(jiffies, drq->expires))
+		return 1;
 
 	return 0;
 }
 
-static struct request *deadline_next_request(request_queue_t *q)
+/*
+ * deadline_dispatch_requests selects the best request according to
+ * read/write expire, fifo_batch, etc
+ */
+static int deadline_dispatch_requests(struct deadline_data *dd)
 {
-	struct deadline_data *dd = q->elevator.elevator_data;
+	const int reads = !list_empty(&dd->fifo_list[READ]);
+	const int writes = !list_empty(&dd->fifo_list[WRITE]);
 	struct deadline_rq *drq;
-	struct list_head *nxt;
-	struct request *rq;
-	int writes;
+	int data_dir, other_dir;
 
 	/*
-	 * if still requests on the dispatch queue, just grab the first one
+	 * batches are currently reads XOR writes
 	 */
-	if (!list_empty(&q->queue_head)) {
-dispatch:
-		rq = list_entry_rq(q->queue_head.next);
-		dd->last_sector = rq->sector + rq->nr_sectors;
-		return rq;
-	}
+	drq = NULL;
+
+	if (dd->next_drq[READ])
+		drq = dd->next_drq[READ];
 
-	writes = !list_empty(&dd->sort_list[WRITE]);
+	if (dd->next_drq[WRITE])
+		drq = dd->next_drq[WRITE];
+
+	if (drq) {
+		/* we have a "next request" */
+		
+		if (dd->last_sector != drq->request->sector)
+			/* end the batch on a non sequential request */
+			dd->batching += dd->fifo_batch;
+		
+		if (dd->batching < dd->fifo_batch)
+			/* we are still entitled to batch */
+			goto dispatch_request;
+	}
 
 	/*
-	 * if we have expired entries on the fifo list, move some to dispatch
+	 * at this point we are not running a batch. select the appropriate
+	 * data direction (read / write)
 	 */
-	if (deadline_check_fifo(dd)) {
-		if (writes && (dd->starved++ >= dd->writes_starved))
-			goto dispatch_writes;
 
-		nxt = dd->read_fifo.next;
-		drq = list_entry_fifo(nxt);
-		deadline_move_requests(dd, drq->request);
-		goto dispatch;
-	}
+	if (reads) {
+		BUG_ON(RB_EMPTY(&dd->sort_list[READ]));
 
-	if (!list_empty(&dd->sort_list[READ])) {
 		if (writes && (dd->starved++ >= dd->writes_starved))
 			goto dispatch_writes;
 
-		nxt = dd->sort_list[READ].next;
-		deadline_move_requests(dd, list_entry_rq(nxt));
-		goto dispatch;
+		data_dir = READ;
+		other_dir = WRITE;
+
+		goto dispatch_find_request;
 	}
 
 	/*
-	 * either there are no reads expired or on sort list, or the reads
-	 * have starved writes for too long. dispatch some writes
+	 * there are either no reads or writes have been starved
 	 */
+
 	if (writes) {
 dispatch_writes:
-		nxt = dd->sort_list[WRITE].next;
-		deadline_move_requests(dd, list_entry_rq(nxt));
+		BUG_ON(RB_EMPTY(&dd->sort_list[WRITE]));
+
 		dd->starved = 0;
-		goto dispatch;
+
+		data_dir = WRITE;
+		other_dir = READ;
+
+		goto dispatch_find_request;
 	}
 
-	BUG_ON(!list_empty(&dd->sort_list[READ]));
-	BUG_ON(writes);
+	return 0;
+
+dispatch_find_request:
+	/*
+	 * we are not running a batch, find best request for selected data_dir
+	 */
+	if (deadline_check_fifo(dd, data_dir)) {
+		/* An expired request exists - satisfy it */
+		dd->batching = 0;
+		drq = list_entry_fifo(dd->fifo_list[data_dir].next);
+		
+	} else if (dd->next_drq[data_dir]) {
+		/*
+		 * The last req was the same dir and we have a next request in
+		 * sort order. No expired requests so continue on from here.
+		 */
+		drq = dd->next_drq[data_dir];
+	} else {
+		/*
+		 * The last req was the other direction or we have run out of
+		 * higher-sectored requests. Go back to the lowest sectored
+		 * request (1 way elevator) and start a new batch.
+		 */
+		dd->batching = 0;
+		drq = deadline_find_first_drq(dd, data_dir);
+	}
+
+dispatch_request:
+	/*
+	 * drq is the selected appropriate request.
+	 */
+	dd->batching++;
+	deadline_move_request(dd, drq);
+
+	return 1;
+}
+
+static struct request *deadline_next_request(request_queue_t *q)
+{
+	struct deadline_data *dd = q->elevator.elevator_data;
+	struct request *rq;
+
+	/*
+	 * if there are still requests on the dispatch queue, grab the first one
+	 */
+	if (!list_empty(dd->dispatch)) {
+dispatch:
+		rq = list_entry_rq(dd->dispatch->next);
+		return rq;
+	}
+
+	if (deadline_dispatch_requests(dd))
+		goto dispatch;
+
 	return NULL;
 }
 
 static void
-deadline_add_request(request_queue_t *q, struct request *rq, struct list_head *insert_here)
+deadline_insert_request(request_queue_t *q, struct request *rq,
+			struct list_head *insert_here)
 {
 	struct deadline_data *dd = q->elevator.elevator_data;
 	struct deadline_rq *drq = RQ_DATA(rq);
-	const int data_dir = rq_data_dir(rq);
 
-	/*
-	 * flush hash on barrier insert, as not to allow merges before a
-	 * barrier.
-	 */
 	if (unlikely(rq->flags & REQ_HARDBARRIER)) {
 		DL_INVALIDATE_HASH(dd);
 		q->last_merge = NULL;
 	}
 
-	/*
-	 * add to sort list
-	 */
-	if (!insert_here)
-		insert_here = dd->sort_list[data_dir].prev;
+	if (unlikely(!blk_fs_request(rq))) {
+		if (!insert_here)
+			insert_here = dd->dispatch->prev;
 
-	list_add(&rq->queuelist, insert_here);
-
-	if (unlikely(!(rq->flags & REQ_CMD)))
+		list_add(&rq->queuelist, insert_here);
 		return;
+	}
 
 	if (rq_mergeable(rq)) {
-		deadline_add_rq_hash(dd, drq);
+		deadline_add_drq_hash(dd, drq);
 
 		if (!q->last_merge)
 			q->last_merge = &rq->queuelist;
 	}
 
-	if (data_dir == READ) {
-		/*
-		 * set expire time and add to fifo list
-		 */
-		drq->expires = jiffies + dd->read_expire;
-		list_add_tail(&drq->fifo, &dd->read_fifo);
-	}
-}
-
-static void deadline_remove_request(request_queue_t *q, struct request *rq)
-{
-	struct deadline_rq *drq = RQ_DATA(rq);
-
-	if (drq) {
-		list_del_init(&drq->fifo);
-		deadline_del_rq_hash(drq);
-	}
+	deadline_add_request(dd, drq);
 }
 
 static int deadline_queue_empty(request_queue_t *q)
 {
 	struct deadline_data *dd = q->elevator.elevator_data;
 
-	if (!list_empty(&dd->sort_list[WRITE]) ||
-	    !list_empty(&dd->sort_list[READ]) ||
-	    !list_empty(&q->queue_head))
+	if (!list_empty(&dd->fifo_list[WRITE])
+	    || !list_empty(&dd->fifo_list[READ])
+	    || !list_empty(dd->dispatch))
 		return 0;
 
-	BUG_ON(!list_empty(&dd->read_fifo));
 	return 1;
 }
 
-static struct list_head *
-deadline_get_sort_head(request_queue_t *q, struct request *rq)
+static struct request *
+deadline_former_request(request_queue_t *q, struct request *rq)
 {
-	struct deadline_data *dd = q->elevator.elevator_data;
+	struct deadline_rq *drq = RQ_DATA(rq);
+	struct rb_node *rbprev = rb_prev(&drq->rb_node);
+
+	if (rbprev)
+		return rb_entry_drq(rbprev)->request;
+
+	return NULL;
+}
+
+static struct request *
+deadline_latter_request(request_queue_t *q, struct request *rq)
+{
+	struct deadline_rq *drq = RQ_DATA(rq);
+	struct rb_node *rbnext = rb_next(&drq->rb_node);
+
+	if (rbnext)
+		return rb_entry_drq(rbnext)->request;
 
-	return &dd->sort_list[rq_data_dir(rq)];
+	return NULL;
 }
 
 static void deadline_exit(request_queue_t *q, elevator_t *e)
@@ -483,18 +670,14 @@ static void deadline_exit(request_queue_t *q, elevator_t *e)
 	struct request *rq;
 	int i;
 
-	BUG_ON(!list_empty(&dd->read_fifo));
-	BUG_ON(!list_empty(&dd->sort_list[READ]));
-	BUG_ON(!list_empty(&dd->sort_list[WRITE]));
+	BUG_ON(!list_empty(&dd->fifo_list[READ]));
+	BUG_ON(!list_empty(&dd->fifo_list[WRITE]));
 
 	for (i = READ; i <= WRITE; i++) {
 		struct request_list *rl = &q->rq[i];
-		struct list_head *entry = &rl->free;
+		struct list_head *entry;
 
-		if (list_empty(&rl->free))
-			continue;
-	
-		while ((entry = entry->next) != &rl->free) {
+		list_for_each(entry, &rl->free) {
 			rq = list_entry_rq(entry);
 
 			if ((drq = RQ_DATA(rq)) == NULL)
@@ -537,25 +720,24 @@ static int deadline_init(request_queue_t *q, elevator_t *e)
 	for (i = 0; i < DL_HASH_ENTRIES; i++)
 		INIT_LIST_HEAD(&dd->hash[i]);
 
-	INIT_LIST_HEAD(&dd->read_fifo);
-	INIT_LIST_HEAD(&dd->sort_list[READ]);
-	INIT_LIST_HEAD(&dd->sort_list[WRITE]);
+	INIT_LIST_HEAD(&dd->fifo_list[READ]);
+	INIT_LIST_HEAD(&dd->fifo_list[WRITE]);
+	dd->sort_list[READ] = RB_ROOT;
+	dd->sort_list[WRITE] = RB_ROOT;
 	dd->dispatch = &q->queue_head;
-	dd->fifo_batch = fifo_batch;
-	dd->read_expire = read_expire;
-	dd->seek_cost = seek_cost;
+	dd->fifo_expire[READ] = read_expire;
+	dd->fifo_expire[WRITE] = write_expire;
 	dd->hash_valid_count = 1;
 	dd->writes_starved = writes_starved;
+	dd->front_merges = 1;
+	dd->fifo_batch = fifo_batch;
 	e->elevator_data = dd;
 
 	for (i = READ; i <= WRITE; i++) {
 		struct request_list *rl = &q->rq[i];
-		struct list_head *entry = &rl->free;
+		struct list_head *entry;
 
-		if (list_empty(&rl->free))
-			continue;
-	
-		while ((entry = entry->next) != &rl->free) {
+		list_for_each(entry, &rl->free) {
 			rq = list_entry_rq(entry);
 
 			drq = kmem_cache_alloc(drq_pool, GFP_KERNEL);
@@ -567,6 +749,7 @@ static int deadline_init(request_queue_t *q, elevator_t *e)
 			memset(drq, 0, sizeof(*drq));
 			INIT_LIST_HEAD(&drq->fifo);
 			INIT_LIST_HEAD(&drq->hash);
+			RB_CLEAR(&drq->rb_node);
 			drq->request = rq;
 			rq->elevator_private = drq;
 		}
@@ -578,10 +761,141 @@ static int deadline_init(request_queue_t *q, elevator_t *e)
 	return ret;
 }
 
+/*
+ * sysfs parts below
+ */
+struct deadline_fs_entry {
+	struct attribute attr;
+	ssize_t (*show)(struct deadline_data *, char *);
+	ssize_t (*store)(struct deadline_data *, const char *);
+};
+
+static ssize_t
+deadline_var_show(unsigned int var, char *page)
+{
+	return sprintf(page, "%d\n", var);
+}
+
+static ssize_t
+deadline_var_store(unsigned int *var, const char *page)
+{
+	char *p = (char *) page;
+	int ret = strlen(p);
+
+	*var = simple_strtoul(p, &p, 10);
+	return ret;
+}
+
+#define SHOW_FUNCTION(__FUNC, __VAR)					\
+static ssize_t __FUNC(struct deadline_data *dd, char *page)		\
+{									\
+	return deadline_var_show(__VAR, (page));			\
+}
+SHOW_FUNCTION(deadline_readexpire_show, dd->fifo_expire[READ]);
+SHOW_FUNCTION(deadline_writeexpire_show, dd->fifo_expire[WRITE]);
+SHOW_FUNCTION(deadline_writesstarved_show, dd->writes_starved);
+SHOW_FUNCTION(deadline_frontmerges_show, dd->front_merges);
+SHOW_FUNCTION(deadline_fifobatch_show, dd->fifo_batch);
+#undef SHOW_FUNCTION
+
+#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX)				\
+static ssize_t __FUNC(struct deadline_data *dd, const char *page)	\
+{									\
+	int ret = deadline_var_store(__PTR, (page));			\
+	if (*(__PTR) < (MIN))						\
+		*(__PTR) = (MIN);					\
+	else if (*(__PTR) > (MAX))					\
+		*(__PTR) = (MAX);					\
+	return ret;							\
+}
+STORE_FUNCTION(deadline_readexpire_store, &dd->fifo_expire[READ], 0, INT_MAX);
+STORE_FUNCTION(deadline_writeexpire_store, &dd->fifo_expire[WRITE], 0, INT_MAX);
+STORE_FUNCTION(deadline_writesstarved_store, &dd->writes_starved, INT_MIN, INT_MAX);
+STORE_FUNCTION(deadline_frontmerges_store, &dd->front_merges, 0, 1);
+STORE_FUNCTION(deadline_fifobatch_store, &dd->fifo_batch, 0, INT_MAX);
+#undef STORE_FUNCTION
+
+static struct deadline_fs_entry deadline_readexpire_entry = {
+	.attr = {.name = "read_expire", .mode = S_IRUGO | S_IWUSR },
+	.show = deadline_readexpire_show,
+	.store = deadline_readexpire_store,
+};
+static struct deadline_fs_entry deadline_writeexpire_entry = {
+	.attr = {.name = "write_expire", .mode = S_IRUGO | S_IWUSR },
+	.show = deadline_writeexpire_show,
+	.store = deadline_writeexpire_store,
+};
+static struct deadline_fs_entry deadline_writesstarved_entry = {
+	.attr = {.name = "writes_starved", .mode = S_IRUGO | S_IWUSR },
+	.show = deadline_writesstarved_show,
+	.store = deadline_writesstarved_store,
+};
+static struct deadline_fs_entry deadline_frontmerges_entry = {
+	.attr = {.name = "front_merges", .mode = S_IRUGO | S_IWUSR },
+	.show = deadline_frontmerges_show,
+	.store = deadline_frontmerges_store,
+};
+static struct deadline_fs_entry deadline_fifobatch_entry = {
+	.attr = {.name = "fifo_batch", .mode = S_IRUGO | S_IWUSR },
+	.show = deadline_fifobatch_show,
+	.store = deadline_fifobatch_store,
+};
+
+static struct attribute *default_attrs[] = {
+	&deadline_readexpire_entry.attr,
+	&deadline_writeexpire_entry.attr,
+	&deadline_writesstarved_entry.attr,
+	&deadline_frontmerges_entry.attr,
+	&deadline_fifobatch_entry.attr,
+	NULL,
+};
+
+#define to_deadline(atr) container_of((atr), struct deadline_fs_entry, attr)
+
+static ssize_t
+deadline_attr_show(struct kobject *kobj, struct attribute *attr, char *page,
+		   size_t length, loff_t offset)
+{
+	elevator_t *e = container_of(kobj, elevator_t, kobj);
+	struct deadline_fs_entry *entry = to_deadline(attr);
+
+	if (!entry->show)
+		return 0;
+	if (offset)
+		return 0;
+
+	return entry->show(e->elevator_data, page);
+}
+
+static ssize_t
+deadline_attr_store(struct kobject *kobj, struct attribute *attr,
+		    const char *page, size_t length, loff_t offset)
+{
+	elevator_t *e = container_of(kobj, elevator_t, kobj);
+	struct deadline_fs_entry *entry = to_deadline(attr);
+
+	if (!entry->store)
+		return -EINVAL;
+	if (offset)
+		return 0;
+
+	return entry->store(e->elevator_data, page);
+}
+
+static struct sysfs_ops deadline_sysfs_ops = {
+	.show	= deadline_attr_show,
+	.store	= deadline_attr_store,
+};
+
+struct kobj_type deadline_ktype = {
+	.sysfs_ops	= &deadline_sysfs_ops,
+	.default_attrs	= default_attrs,
+};
+
 static int __init deadline_slab_setup(void)
 {
 	drq_pool = kmem_cache_create("deadline_drq", sizeof(struct deadline_rq),
-				     0, SLAB_HWCACHE_ALIGN, NULL, NULL);
+				     0, 0, NULL, NULL);
 
 	if (!drq_pool)
 		panic("deadline: can't init slab pool\n");
@@ -594,14 +908,17 @@ subsys_initcall(deadline_slab_setup);
 elevator_t iosched_deadline = {
 	.elevator_merge_fn = 		deadline_merge,
 	.elevator_merged_fn =		deadline_merged_request,
-	.elevator_merge_req_fn =	deadline_merge_request,
+	.elevator_merge_req_fn =	deadline_merged_requests,
 	.elevator_next_req_fn =		deadline_next_request,
-	.elevator_add_req_fn =		deadline_add_request,
+	.elevator_add_req_fn =		deadline_insert_request,
 	.elevator_remove_req_fn =	deadline_remove_request,
 	.elevator_queue_empty_fn =	deadline_queue_empty,
-	.elevator_get_sort_head_fn =	deadline_get_sort_head,
+	.elevator_former_req_fn =	deadline_former_request,
+	.elevator_latter_req_fn =	deadline_latter_request,
 	.elevator_init_fn =		deadline_init,
 	.elevator_exit_fn =		deadline_exit,
+
+	.elevator_ktype =		&deadline_ktype,
 };
 
 EXPORT_SYMBOL(iosched_deadline);
diff --git a/drivers/block/elevator.c b/drivers/block/elevator.c
index e4f10b6f9949..f24653c019c4 100644
--- a/drivers/block/elevator.c
+++ b/drivers/block/elevator.c
@@ -194,6 +194,12 @@ int elevator_noop_merge(request_queue_t *q, struct list_head **insert,
 	return ELEVATOR_NO_MERGE;
 }
 
+void elevator_noop_merge_requests(request_queue_t *q, struct request *req,
+				  struct request *next)
+{
+	list_del_init(&next->queuelist);
+}
+
 void elevator_noop_add_request(request_queue_t *q, struct request *rq,
 			       struct list_head *insert_here)
 {
@@ -370,19 +376,70 @@ int elv_queue_empty(request_queue_t *q)
 	return list_empty(&q->queue_head);
 }
 
-inline struct list_head *elv_get_sort_head(request_queue_t *q,
-					   struct request *rq)
+struct request *elv_latter_request(request_queue_t *q, struct request *rq)
 {
+	struct list_head *next;
+
+	elevator_t *e = &q->elevator;
+
+	if (e->elevator_latter_req_fn)
+		return e->elevator_latter_req_fn(q, rq);
+
+	next = rq->queuelist.next;
+	if (next != &q->queue_head && next != &rq->queuelist)
+		return list_entry_rq(next);
+
+	return NULL;
+}
+
+struct request *elv_former_request(request_queue_t *q, struct request *rq)
+{
+	struct list_head *prev;
+
 	elevator_t *e = &q->elevator;
 
-	if (e->elevator_get_sort_head_fn)
-		return e->elevator_get_sort_head_fn(q, rq);
+	if (e->elevator_former_req_fn)
+		return e->elevator_latter_req_fn(q, rq);
+
+	prev = rq->queuelist.prev;
+	if (prev != &q->queue_head && prev != &rq->queuelist)
+		return list_entry_rq(prev);
+
+	return NULL;
+}
+
+int elv_register_queue(struct gendisk *disk)
+{
+	request_queue_t *q = disk->queue;
+	elevator_t *e;
+
+	if (!q)
+		return -ENXIO;
+
+	e = &q->elevator;
+
+	e->kobj.parent = kobject_get(&disk->kobj);
+	if (!e->kobj.parent)
+		return -EBUSY;
+
+	snprintf(e->kobj.name, KOBJ_NAME_LEN, "%s", "iosched");
+	e->kobj.ktype = e->elevator_ktype;
+
+	return kobject_register(&e->kobj);
+}
+
+void elv_unregister_queue(struct gendisk *disk)
+{
+	request_queue_t *q = disk->queue;
+	elevator_t *e = &q->elevator;
 
-	return &q->queue_head;
+	kobject_unregister(&e->kobj);
+	kobject_put(&disk->kobj);
 }
 
 elevator_t elevator_noop = {
 	.elevator_merge_fn		= elevator_noop_merge,
+	.elevator_merge_req_fn		= elevator_noop_merge_requests,
 	.elevator_next_req_fn		= elevator_noop_next_request,
 	.elevator_add_req_fn		= elevator_noop_add_request,
 };
diff --git a/drivers/block/genhd.c b/drivers/block/genhd.c
index 8d42f19811b4..43af853a617a 100644
--- a/drivers/block/genhd.c
+++ b/drivers/block/genhd.c
@@ -112,6 +112,7 @@ void add_disk(struct gendisk *disk)
 	blk_register_region(MKDEV(disk->major, disk->first_minor), disk->minors,
 			NULL, exact_match, exact_lock, disk);
 	register_disk(disk);
+	elv_register_queue(disk);
 }
 
 EXPORT_SYMBOL(add_disk);
@@ -119,6 +120,7 @@ EXPORT_SYMBOL(del_gendisk);
 
 void unlink_gendisk(struct gendisk *disk)
 {
+	elv_unregister_queue(disk);
 	blk_unregister_region(MKDEV(disk->major, disk->first_minor),
 			      disk->minors);
 }
diff --git a/drivers/block/ioctl.c b/drivers/block/ioctl.c
index f07a40c447d0..538c8a04a2d3 100644
--- a/drivers/block/ioctl.c
+++ b/drivers/block/ioctl.c
@@ -128,10 +128,6 @@ int blkdev_ioctl(struct inode *inode, struct file *file, unsigned cmd,
 	int ret, n;
 
 	switch (cmd) {
-	case BLKELVGET:
-	case BLKELVSET:
-		/* deprecated, use the /proc/iosched interface instead */
-		return -ENOTTY;
 	case BLKRAGET:
 	case BLKFRAGET:
 		if (!arg)
diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c
index 8d8616c13eca..36436a7aa57f 100644
--- a/drivers/block/ll_rw_blk.c
+++ b/drivers/block/ll_rw_blk.c
@@ -68,7 +68,7 @@ static inline int queue_congestion_on_threshold(void)
 {
 	int ret;
 
-	ret = queue_nr_requests / 4 - 1;
+	ret = queue_nr_requests / 8 - 1;
 	if (ret < 0)
 		ret = 1;
 	return ret;
@@ -81,7 +81,7 @@ static inline int queue_congestion_off_threshold(void)
 {
 	int ret;
 
-	ret = queue_nr_requests / 4 + 1;
+	ret = queue_nr_requests / 8 + 1;
 	if (ret > queue_nr_requests)
 		ret = queue_nr_requests;
 	return ret;
@@ -1159,6 +1159,8 @@ void blk_cleanup_queue(request_queue_t * q)
 {
 	int count = (queue_nr_requests*2);
 
+	elevator_exit(q);
+
 	count -= __blk_cleanup_queue(&q->rq[READ]);
 	count -= __blk_cleanup_queue(&q->rq[WRITE]);
 
@@ -1168,8 +1170,6 @@ void blk_cleanup_queue(request_queue_t * q)
 	if (blk_queue_tagged(q))
 		blk_queue_free_tags(q);
 
-	elevator_exit(q);
-
 	memset(q, 0, sizeof(*q));
 }
 
@@ -1576,22 +1576,22 @@ void blk_congestion_wait(int rw, long timeout)
 /*
  * Has to be called with the request spinlock acquired
  */
-static void attempt_merge(request_queue_t *q, struct request *req,
+static int attempt_merge(request_queue_t *q, struct request *req,
 			  struct request *next)
 {
 	if (!rq_mergeable(req) || !rq_mergeable(next))
-		return;
+		return 0;
 
 	/*
 	 * not contigious
 	 */
 	if (req->sector + req->nr_sectors != next->sector)
-		return;
+		return 0;
 
 	if (rq_data_dir(req) != rq_data_dir(next)
 	    || req->rq_disk != next->rq_disk
 	    || next->waiting || next->special)
-		return;
+		return 0;
 
 	/*
 	 * If we are allowed to merge, then append bio list
@@ -1612,27 +1612,31 @@ static void attempt_merge(request_queue_t *q, struct request *req,
 			req->rq_disk->in_flight--;
 		}
 
-		blkdev_dequeue_request(next);
 		__blk_put_request(q, next);
+		return 1;
 	}
+
+	return 0;
 }
 
-static inline void attempt_back_merge(request_queue_t *q, struct request *rq)
+static inline int attempt_back_merge(request_queue_t *q, struct request *rq)
 {
-	struct list_head *next = rq->queuelist.next;
-	struct list_head *sort_head = elv_get_sort_head(q, rq);
+	struct request *next = elv_latter_request(q, rq);
 
-	if (next != sort_head)
-		attempt_merge(q, rq, list_entry_rq(next));
+	if (next)
+		return attempt_merge(q, rq, next);
+
+	return 0;
 }
 
-static inline void attempt_front_merge(request_queue_t *q, struct request *rq)
+static inline int attempt_front_merge(request_queue_t *q, struct request *rq)
 {
-	struct list_head *prev = rq->queuelist.prev;
-	struct list_head *sort_head = elv_get_sort_head(q, rq);
+	struct request *prev = elv_former_request(q, rq);
 
-	if (prev != sort_head)
-		attempt_merge(q, list_entry_rq(prev), rq);
+	if (prev)
+		return attempt_merge(q, prev, rq);
+
+	return 0;
 }
 
 /**
@@ -1715,8 +1719,8 @@ again:
 			req->biotail = bio;
 			req->nr_sectors = req->hard_nr_sectors += nr_sectors;
 			drive_stat_acct(req, nr_sectors, 0);
-			elv_merged_request(q, req);
-			attempt_back_merge(q, req);
+			if (!attempt_back_merge(q, req))
+				elv_merged_request(q, req);
 			goto out;
 
 		case ELEVATOR_FRONT_MERGE:
@@ -1742,8 +1746,8 @@ again:
 			req->sector = req->hard_sector = sector;
 			req->nr_sectors = req->hard_nr_sectors += nr_sectors;
 			drive_stat_acct(req, nr_sectors, 0);
-			elv_merged_request(q, req);
-			attempt_front_merge(q, req);
+			if (!attempt_front_merge(q, req))
+				elv_merged_request(q, req);
 			goto out;
 
 		/*
@@ -2169,8 +2173,7 @@ int __init blk_dev_init(void)
 	int i;
 
 	request_cachep = kmem_cache_create("blkdev_requests",
-			sizeof(struct request), 0,
-			SLAB_HWCACHE_ALIGN, NULL, NULL);
+			sizeof(struct request), 0, 0, NULL, NULL);
 	if (!request_cachep)
 		panic("Can't create request pool slab cache\n");
 
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 62092e8ed2ad..8a9c22d3b16b 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -13,6 +13,7 @@ typedef struct request *(elevator_next_req_fn) (request_queue_t *);
 typedef void (elevator_add_req_fn) (request_queue_t *, struct request *, struct list_head *);
 typedef int (elevator_queue_empty_fn) (request_queue_t *);
 typedef void (elevator_remove_req_fn) (request_queue_t *, struct request *);
+typedef struct request *(elevator_request_list_fn) (request_queue_t *, struct request *);
 typedef struct list_head *(elevator_get_sort_head_fn) (request_queue_t *, struct request *);
 
 typedef int (elevator_init_fn) (request_queue_t *, elevator_t *);
@@ -29,12 +30,17 @@ struct elevator_s
 	elevator_remove_req_fn *elevator_remove_req_fn;
 
 	elevator_queue_empty_fn *elevator_queue_empty_fn;
-	elevator_get_sort_head_fn *elevator_get_sort_head_fn;
+
+	elevator_request_list_fn *elevator_former_req_fn;
+	elevator_request_list_fn *elevator_latter_req_fn;
 
 	elevator_init_fn *elevator_init_fn;
 	elevator_exit_fn *elevator_exit_fn;
 
 	void *elevator_data;
+
+	struct kobject kobj;
+	struct kobj_type *elevator_ktype;
 };
 
 /*
@@ -48,7 +54,10 @@ extern void elv_merge_requests(request_queue_t *, struct request *,
 extern void elv_merged_request(request_queue_t *, struct request *);
 extern void elv_remove_request(request_queue_t *, struct request *);
 extern int elv_queue_empty(request_queue_t *);
-extern inline struct list_head *elv_get_sort_head(request_queue_t *, struct request *);
+extern struct request *elv_former_request(request_queue_t *, struct request *);
+extern struct request *elv_latter_request(request_queue_t *, struct request *);
+extern int elv_register_queue(struct gendisk *);
+extern void elv_unregister_queue(struct gendisk *);
 
 #define __elv_add_request_pos(q, rq, pos)	\
 	(q)->elevator.elevator_add_req_fn((q), (rq), (pos))
@@ -64,18 +73,6 @@ extern elevator_t elevator_noop;
  */
 extern elevator_t iosched_deadline;
 
-/*
- * use the /proc/iosched interface, all the below is history ->
- */
-typedef struct blkelv_ioctl_arg_s {
-	int queue_ID;
-	int read_latency;
-	int write_latency;
-	int max_bomb_segments;
-} blkelv_ioctl_arg_t;
-#define BLKELVGET   _IOR(0x12,106,sizeof(blkelv_ioctl_arg_t))
-#define BLKELVSET   _IOW(0x12,107,sizeof(blkelv_ioctl_arg_t))
-
 extern int elevator_init(request_queue_t *, elevator_t *);
 extern void elevator_exit(request_queue_t *);
 extern inline int bio_rq_in_between(struct bio *, struct request *, struct list_head *);
-- 
cgit v1.2.3


From 756a93f720ecf1f695691e06712be128bad02be0 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@fys.uio.no>
Date: Sun, 12 Jan 2003 21:40:55 -0800
Subject: [PATCH] Clean up RPC client credcache lookups [1/6]

Clean up RPC client credcache lookups.

    - Remove the limitation whereby the RPC client may only look up
      credentials for the current task.

The ability to lookup arbitrary credentials is needed in order to allow
a user daemon to set the RPCSEC_GSS private information once it
has finished negotiating the RPCSEC user context with the server.
---
 include/linux/sunrpc/auth.h | 12 +++++++++--
 net/sunrpc/auth.c           | 25 ++++++++++++++++------
 net/sunrpc/auth_null.c      |  6 +++---
 net/sunrpc/auth_unix.c      | 52 +++++++++++++++++++++++----------------------
 4 files changed, 59 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index 5e481026fc7e..13d7f1aec49c 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -19,6 +19,14 @@
 /* size of the nodename buffer */
 #define UNX_MAXNODENAME	32
 
+/* Work around the lack of a VFS credential */
+struct auth_cred {
+	uid_t	uid;
+	gid_t	gid;
+	int	ngroups;
+	gid_t	*groups;
+};
+
 /*
  * Client user credentials
  */
@@ -74,13 +82,13 @@ struct rpc_authops {
 	struct rpc_auth *	(*create)(struct rpc_clnt *);
 	void			(*destroy)(struct rpc_auth *);
 
-	struct rpc_cred *	(*crcreate)(int);
+	struct rpc_cred *	(*crcreate)(struct auth_cred *, int);
 };
 
 struct rpc_credops {
 	void			(*crdestroy)(struct rpc_cred *);
 
-	int			(*crmatch)(struct rpc_cred *, int);
+	int			(*crmatch)(struct auth_cred *, struct rpc_cred *, int);
 	u32 *			(*crmarshal)(struct rpc_task *, u32 *, int);
 	int			(*crrefresh)(struct rpc_task *);
 	u32 *			(*crvalidate)(struct rpc_task *, u32 *);
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index a45ab766376d..c930689b1982 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -174,7 +174,8 @@ rpcauth_gc_credcache(struct rpc_auth *auth, struct list_head *free)
  * Look up a process' credentials in the authentication cache
  */
 static struct rpc_cred *
-rpcauth_lookup_credcache(struct rpc_auth *auth, int taskflags)
+rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred,
+		int taskflags)
 {
 	LIST_HEAD(free);
 	struct list_head *pos, *next;
@@ -183,7 +184,7 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, int taskflags)
 	int		nr = 0;
 
 	if (!(taskflags & RPC_TASK_ROOTCREDS))
-		nr = current->uid & RPC_CREDCACHE_MASK;
+		nr = acred->uid & RPC_CREDCACHE_MASK;
 retry:
 	spin_lock(&rpc_credcache_lock);
 	if (time_before(auth->au_nextgc, jiffies))
@@ -195,7 +196,7 @@ retry:
 			continue;
 		if (rpcauth_prune_expired(entry, &free))
 			continue;
-		if (entry->cr_ops->crmatch(entry, taskflags)) {
+		if (entry->cr_ops->crmatch(acred, entry, taskflags)) {
 			list_del(&entry->cr_hash);
 			cred = entry;
 			break;
@@ -217,7 +218,7 @@ retry:
 	rpcauth_destroy_credlist(&free);
 
 	if (!cred) {
-		new = auth->au_ops->crcreate(taskflags);
+		new = auth->au_ops->crcreate(acred, taskflags);
 		if (new) {
 #ifdef RPC_DEBUG
 			new->cr_magic = RPCAUTH_CRED_MAGIC;
@@ -232,19 +233,31 @@ retry:
 struct rpc_cred *
 rpcauth_lookupcred(struct rpc_auth *auth, int taskflags)
 {
+	struct auth_cred acred = {
+		.uid = current->fsuid,
+		.gid = current->fsgid,
+		.ngroups = current->ngroups,
+		.groups = current->groups,
+	};
 	dprintk("RPC:     looking up %s cred\n",
 		auth->au_ops->au_name);
-	return rpcauth_lookup_credcache(auth, taskflags);
+	return rpcauth_lookup_credcache(auth, &acred, taskflags);
 }
 
 struct rpc_cred *
 rpcauth_bindcred(struct rpc_task *task)
 {
 	struct rpc_auth *auth = task->tk_auth;
+	struct auth_cred acred = {
+		.uid = current->fsuid,
+		.gid = current->fsgid,
+		.ngroups = current->ngroups,
+		.groups = current->groups,
+	};
 
 	dprintk("RPC: %4d looking up %s cred\n",
 		task->tk_pid, task->tk_auth->au_ops->au_name);
-	task->tk_msg.rpc_cred = rpcauth_lookup_credcache(auth, task->tk_flags);
+	task->tk_msg.rpc_cred = rpcauth_lookup_credcache(auth, &acred, task->tk_flags);
 	if (task->tk_msg.rpc_cred == 0)
 		task->tk_status = -ENOMEM;
 	return task->tk_msg.rpc_cred;
diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c
index d4ad52a1b1de..c716c7415f30 100644
--- a/net/sunrpc/auth_null.c
+++ b/net/sunrpc/auth_null.c
@@ -48,7 +48,7 @@ nul_destroy(struct rpc_auth *auth)
  * Create NULL creds for current process
  */
 static struct rpc_cred *
-nul_create_cred(int flags)
+nul_create_cred(struct auth_cred *acred, int flags)
 {
 	struct rpc_cred	*cred;
 
@@ -56,7 +56,7 @@ nul_create_cred(int flags)
 		return NULL;
 	atomic_set(&cred->cr_count, 0);
 	cred->cr_flags = RPCAUTH_CRED_UPTODATE;
-	cred->cr_uid = current->uid;
+	cred->cr_uid = acred->uid;
 	cred->cr_ops = &null_credops;
 
 	return cred;
@@ -75,7 +75,7 @@ nul_destroy_cred(struct rpc_cred *cred)
  * Match cred handle against current process
  */
 static int
-nul_match(struct rpc_cred *cred, int taskflags)
+nul_match(struct auth_cred *acred, struct rpc_cred *cred, int taskflags)
 {
 	return 1;
 }
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
index 95cd36580a95..96ce30c1269a 100644
--- a/net/sunrpc/auth_unix.c
+++ b/net/sunrpc/auth_unix.c
@@ -14,10 +14,12 @@
 #include <linux/sunrpc/auth.h>
 
 #define NFS_NGROUPS	16
+
 struct unx_cred {
 	struct rpc_cred		uc_base;
-	uid_t			uc_fsuid;
-	gid_t			uc_gid, uc_fsgid;
+	gid_t			uc_gid;
+	uid_t			uc_puid;		/* process uid */
+	gid_t			uc_pgid;		/* process gid */
 	gid_t			uc_gids[NFS_NGROUPS];
 };
 #define uc_uid			uc_base.cr_uid
@@ -62,13 +64,13 @@ unx_destroy(struct rpc_auth *auth)
 }
 
 static struct rpc_cred *
-unx_create_cred(int flags)
+unx_create_cred(struct auth_cred *acred, int flags)
 {
 	struct unx_cred	*cred;
 	int		i;
 
 	dprintk("RPC:      allocating UNIX cred for uid %d gid %d\n",
-				current->uid, current->gid);
+				acred->uid, acred->gid);
 
 	if (!(cred = (struct unx_cred *) kmalloc(sizeof(*cred), GFP_KERNEL)))
 		return NULL;
@@ -76,20 +78,20 @@ unx_create_cred(int flags)
 	atomic_set(&cred->uc_count, 0);
 	cred->uc_flags = RPCAUTH_CRED_UPTODATE;
 	if (flags & RPC_TASK_ROOTCREDS) {
-		cred->uc_uid = cred->uc_fsuid = 0;
-		cred->uc_gid = cred->uc_fsgid = 0;
+		cred->uc_uid = cred->uc_puid = 0;
+		cred->uc_gid = cred->uc_pgid = 0;
 		cred->uc_gids[0] = NOGROUP;
 	} else {
-		int groups = current->ngroups;
+		int groups = acred->ngroups;
 		if (groups > NFS_NGROUPS)
 			groups = NFS_NGROUPS;
 
-		cred->uc_uid = current->uid;
-		cred->uc_gid = current->gid;
-		cred->uc_fsuid = current->fsuid;
-		cred->uc_fsgid = current->fsgid;
+		cred->uc_uid = acred->uid;
+		cred->uc_gid = acred->gid;
+		cred->uc_puid = current->uid;
+		cred->uc_pgid = current->gid;
 		for (i = 0; i < groups; i++)
-			cred->uc_gids[i] = (gid_t) current->groups[i];
+			cred->uc_gids[i] = (gid_t) acred->groups[i];
 		if (i < NFS_NGROUPS)
 		  cred->uc_gids[i] = NOGROUP;
 	}
@@ -110,7 +112,7 @@ unx_destroy_cred(struct rpc_cred *cred)
  * request root creds (e.g. for NFS swapping).
  */
 static int
-unx_match(struct rpc_cred *rcred, int taskflags)
+unx_match(struct auth_cred *acred, struct rpc_cred *rcred, int taskflags)
 {
 	struct unx_cred	*cred = (struct unx_cred *) rcred;
 	int		i;
@@ -118,22 +120,22 @@ unx_match(struct rpc_cred *rcred, int taskflags)
 	if (!(taskflags & RPC_TASK_ROOTCREDS)) {
 		int groups;
 
-		if (cred->uc_uid != current->uid
-		 || cred->uc_gid != current->gid
-		 || cred->uc_fsuid != current->fsuid
-		 || cred->uc_fsgid != current->fsgid)
+		if (cred->uc_uid != acred->uid
+		 || cred->uc_gid != acred->gid
+		 || cred->uc_puid != current->uid
+		 || cred->uc_pgid != current->gid)
 			return 0;
 
-		groups = current->ngroups;
+		groups = acred->ngroups;
 		if (groups > NFS_NGROUPS)
 			groups = NFS_NGROUPS;
 		for (i = 0; i < groups ; i++)
-			if (cred->uc_gids[i] != (gid_t) current->groups[i])
+			if (cred->uc_gids[i] != (gid_t) acred->groups[i])
 				return 0;
 		return 1;
 	}
-	return (cred->uc_uid == 0 && cred->uc_fsuid == 0
-	     && cred->uc_gid == 0 && cred->uc_fsgid == 0
+	return (cred->uc_uid == 0 && cred->uc_puid == 0
+	     && cred->uc_gid == 0 && cred->uc_pgid == 0
 	     && cred->uc_gids[0] == (gid_t) NOGROUP);
 }
 
@@ -162,12 +164,12 @@ unx_marshal(struct rpc_task *task, u32 *p, int ruid)
 	p += (n + 3) >> 2;
 
 	/* Note: we don't use real uid if it involves raising priviledge */
-	if (ruid && cred->uc_uid != 0 && cred->uc_gid != 0) {
+	if (ruid && cred->uc_puid != 0 && cred->uc_pgid != 0) {
+		*p++ = htonl((u32) cred->uc_puid);
+		*p++ = htonl((u32) cred->uc_pgid);
+	} else {
 		*p++ = htonl((u32) cred->uc_uid);
 		*p++ = htonl((u32) cred->uc_gid);
-	} else {
-		*p++ = htonl((u32) cred->uc_fsuid);
-		*p++ = htonl((u32) cred->uc_fsgid);
 	}
 	hold = p++;
 	for (i = 0; i < 16 && cred->uc_gids[i] != (gid_t) NOGROUP; i++)
-- 
cgit v1.2.3


From 6d52fdcbcd3ad58fef0cdef7ce4fccde01fc14c1 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@fys.uio.no>
Date: Sun, 12 Jan 2003 21:41:07 -0800
Subject: [PATCH] XDR 'encode' phase move [2/6]

The RPCSEC_GSS user context defines a 'sequence number' in the AUTH header
fields in order to provide protection against replay attacks. This
number needs to lie within a given 'window', and is required to be updated
even when retransmitting dropped requests.

In order to allow this update to occur, move the XDR 'encode' phase
so that it is done immediately before writing the data to the socket.
---
 include/linux/sunrpc/xprt.h |  1 +
 net/sunrpc/clnt.c           | 24 +++++++++++++++--------
 net/sunrpc/xprt.c           | 46 ++++++++++++++++++++++-----------------------
 3 files changed, 39 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 034aa2ac05e9..5e01355e19f6 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -187,6 +187,7 @@ void			xprt_set_timeout(struct rpc_timeout *, unsigned int,
 					unsigned long);
 
 void			xprt_reserve(struct rpc_task *);
+int			xprt_prepare_transmit(struct rpc_task *);
 void			xprt_transmit(struct rpc_task *);
 void			xprt_receive(struct rpc_task *);
 int			xprt_adjust_timeout(struct rpc_timeout *);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 569e860d27ed..3a93fffc9056 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -470,7 +470,7 @@ call_allocate(struct rpc_task *task)
 
 	dprintk("RPC: %4d call_allocate (status %d)\n", 
 				task->tk_pid, task->tk_status);
-	task->tk_action = call_encode;
+	task->tk_action = call_bind;
 	if (task->tk_buffer)
 		return;
 
@@ -510,8 +510,6 @@ call_encode(struct rpc_task *task)
 	dprintk("RPC: %4d call_encode (status %d)\n", 
 				task->tk_pid, task->tk_status);
 
-	task->tk_action = call_bind;
-
 	/* Default buffer setup */
 	bufsiz = task->tk_bufsize >> 1;
 	sndbuf->head[0].iov_base = (void *)task->tk_buffer;
@@ -533,7 +531,8 @@ call_encode(struct rpc_task *task)
 	if (!(p = call_header(task))) {
 		printk(KERN_INFO "RPC: call_header failed, exit EIO\n");
 		rpc_exit(task, -EIO);
-	} else
+		return;
+	}
 	if (encode && (status = encode(req, p, task->tk_msg.rpc_argp)) < 0) {
 		printk(KERN_WARNING "%s: can't encode arguments: %d\n",
 				clnt->cl_protname, -status);
@@ -615,10 +614,19 @@ call_transmit(struct rpc_task *task)
 				task->tk_pid, task->tk_status);
 
 	task->tk_action = call_status;
+	if (task->tk_status < 0)
+		return;
+	task->tk_status = xprt_prepare_transmit(task);
+	if (task->tk_status < 0)
+		return;
+	/* Encode here so that rpcsec_gss can use correct sequence number. */
+	call_encode(task);
 	if (task->tk_status < 0)
 		return;
 	xprt_transmit(task);
-	if (!task->tk_msg.rpc_proc->p_decode && task->tk_status >= 0) {
+	if (task->tk_status < 0)
+		return;
+	if (!task->tk_msg.rpc_proc->p_decode) {
 		task->tk_action = NULL;
 		rpc_wake_up_task(task);
 	}
@@ -758,7 +766,7 @@ call_decode(struct rpc_task *task)
 		if (RPC_IS_SETUID(task) && task->tk_suid_retry) {
 			dprintk("RPC: %4d retry squashed uid\n", task->tk_pid);
 			task->tk_flags ^= RPC_CALL_REALUID;
-			task->tk_action = call_encode;
+			task->tk_action = call_bind;
 			task->tk_suid_retry--;
 			return;
 		}
@@ -864,7 +872,7 @@ call_verify(struct rpc_task *task)
 			task->tk_garb_retry--;
 			dprintk("RPC: %4d call_verify: retry garbled creds\n",
 							task->tk_pid);
-			task->tk_action = call_encode;
+			task->tk_action = call_bind;
 			return NULL;
 		case RPC_AUTH_TOOWEAK:
 			printk(KERN_NOTICE "call_verify: server requires stronger "
@@ -899,7 +907,7 @@ garbage:
 	if (task->tk_garb_retry) {
 		task->tk_garb_retry--;
 		dprintk(KERN_WARNING "RPC: garbage, retrying %4d\n", task->tk_pid);
-		task->tk_action = call_encode;
+		task->tk_action = call_bind;
 		return NULL;
 	}
 	printk(KERN_WARNING "RPC: garbage, exit EIO\n");
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index f2630a93ef50..5ff838e376c2 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -83,7 +83,6 @@
  * Local functions
  */
 static void	xprt_request_init(struct rpc_task *, struct rpc_xprt *);
-static void	do_xprt_transmit(struct rpc_task *);
 static inline void	do_xprt_reserve(struct rpc_task *);
 static void	xprt_disconnect(struct rpc_xprt *);
 static void	xprt_conn_status(struct rpc_task *task);
@@ -1091,51 +1090,40 @@ out_unlock:
  * Place the actual RPC call.
  * We have to copy the iovec because sendmsg fiddles with its contents.
  */
-void
-xprt_transmit(struct rpc_task *task)
+int
+xprt_prepare_transmit(struct rpc_task *task)
 {
 	struct rpc_rqst	*req = task->tk_rqstp;
 	struct rpc_xprt	*xprt = req->rq_xprt;
+	int err = 0;
 
-	dprintk("RPC: %4d xprt_transmit(%x)\n", task->tk_pid, 
-				*(u32 *)(req->rq_svec[0].iov_base));
+	dprintk("RPC: %4d xprt_prepare_transmit\n", task->tk_pid);
 
 	if (xprt->shutdown)
-		task->tk_status = -EIO;
+		return -EIO;
 
 	if (!xprt_connected(xprt))
-		task->tk_status = -ENOTCONN;
-
-	if (task->tk_status < 0)
-		return;
+		return -ENOTCONN;
 
 	if (task->tk_rpcwait)
 		rpc_remove_wait_queue(task);
 
-	/* set up everything as needed. */
-	/* Write the record marker */
-	if (xprt->stream) {
-		u32	*marker = req->rq_svec[0].iov_base;
-
-		*marker = htonl(0x80000000|(req->rq_slen-sizeof(*marker)));
-	}
-
 	spin_lock_bh(&xprt->sock_lock);
 	if (!__xprt_lock_write(xprt, task)) {
-		spin_unlock_bh(&xprt->sock_lock);
-		return;
+		err = -EAGAIN;
+		goto out_unlock;
 	}
 	if (list_empty(&req->rq_list)) {
 		list_add_tail(&req->rq_list, &xprt->recv);
 		req->rq_received = 0;
 	}
+out_unlock:
 	spin_unlock_bh(&xprt->sock_lock);
-
-	do_xprt_transmit(task);
+	return err;
 }
 
-static void
-do_xprt_transmit(struct rpc_task *task)
+void
+xprt_transmit(struct rpc_task *task)
 {
 	struct rpc_clnt *clnt = task->tk_client;
 	struct rpc_rqst	*req = task->tk_rqstp;
@@ -1143,6 +1131,16 @@ do_xprt_transmit(struct rpc_task *task)
 	int status, retry = 0;
 
 
+	dprintk("RPC: %4d xprt_transmit(%u)\n", task->tk_pid, req->rq_slen);
+
+	/* set up everything as needed. */
+	/* Write the record marker */
+	if (xprt->stream) {
+		u32	*marker = req->rq_svec[0].iov_base;
+
+		*marker = htonl(0x80000000|(req->rq_slen-sizeof(*marker)));
+	}
+
 	/* Continue transmitting the packet/record. We must be careful
 	 * to cope with writespace callbacks arriving _after_ we have
 	 * called xprt_sendmsg().
-- 
cgit v1.2.3


From af2f003391786fb632889c02142c941b212ba4ff Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@fys.uio.no>
Date: Sun, 12 Jan 2003 21:41:19 -0800
Subject: [PATCH] RPCSEC upcall mechanism [3/6]

This patch provides the upcall mechanism that will be used for communicating
with the RPCSEC client user daemons.

It sets up a 'ramfs' style filesystem (rpc_pipefs) that is populated with
named pipes. Each time the kernel initializes a new NFS, lockd, statd or
portmapper client, a directory automatically gets set up in this fs.
The directory is initially only populated with a single file "info"
that provides information such as the server IP address, the port number
and the RPC service for the benefit of the user daemon.

When an RPCSEC_GSS mechanism needs to communicate with the daemon, it
is provided with a toolkit for setting up a named pipe in the same
directory. It can then perform upcalls/downcalls in order to talk to the
daemon in much the same way as is done by CODA.

The NFSv4 client will also need to use this same filesystem to communicate
with its user daemon in order to do name-to-uid/name-from-uid and
name-to-gid/name-from-gid translation.
---
 include/linux/sunrpc/clnt.h        |   4 +
 include/linux/sunrpc/rpc_pipe_fs.h |  47 +++
 net/sunrpc/Makefile                |   2 +-
 net/sunrpc/clnt.c                  |  37 +-
 net/sunrpc/rpc_pipe.c              | 817 +++++++++++++++++++++++++++++++++++++
 net/sunrpc/sunrpc_syms.c           |  22 +-
 6 files changed, 918 insertions(+), 11 deletions(-)
 create mode 100644 include/linux/sunrpc/rpc_pipe_fs.h
 create mode 100644 net/sunrpc/rpc_pipe.c

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 4267b59764fc..26a73231ad16 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -28,6 +28,8 @@ struct rpc_portmap {
 	__u16			pm_port;
 };
 
+struct rpc_inode;
+
 /*
  * The high-level client handle
  */
@@ -58,6 +60,8 @@ struct rpc_clnt {
 
 	int			cl_nodelen;	/* nodename length */
 	char 			cl_nodename[UNX_MAXNODENAME];
+	char			cl_pathname[30];/* Path in rpc_pipe_fs */
+	struct dentry *		cl_dentry;	/* inode */
 };
 #define cl_timeout		cl_xprt->timeout
 #define cl_prog			cl_pmap.pm_prog
diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h
new file mode 100644
index 000000000000..b6c2c0fabc4d
--- /dev/null
+++ b/include/linux/sunrpc/rpc_pipe_fs.h
@@ -0,0 +1,47 @@
+#ifndef _LINUX_SUNRPC_RPC_PIPE_FS_H
+#define _LINUX_SUNRPC_RPC_PIPE_FS_H
+
+#ifdef __KERNEL__
+
+struct rpc_pipe_msg {
+	struct list_head list;
+	void *data;
+	size_t len;
+	size_t copied;
+	int errno;
+};
+
+struct rpc_pipe_ops {
+	ssize_t (*upcall)(struct file *, struct rpc_pipe_msg *, char *, size_t);
+	ssize_t (*downcall)(struct file *, const char *, size_t);
+	void (*destroy_msg)(struct rpc_pipe_msg *);
+};
+
+struct rpc_inode {
+	struct inode vfs_inode;
+	void *private;
+	struct list_head pipe;
+	int pipelen;
+	int nreaders;
+	wait_queue_head_t waitq;
+	struct rpc_pipe_ops *ops;
+};
+
+static inline struct rpc_inode *
+RPC_I(struct inode *inode)
+{
+	return container_of(inode, struct rpc_inode, vfs_inode);
+}
+
+extern void rpc_inode_setowner(struct inode *, void *);
+extern int rpc_queue_upcall(struct inode *, struct rpc_pipe_msg *);
+
+extern struct dentry *rpc_mkdir(char *, struct rpc_clnt *);
+extern int rpc_rmdir(char *);
+extern struct dentry *rpc_mkpipe(char *, void *, struct rpc_pipe_ops *);
+extern int rpc_unlink(char *);
+
+void __rpc_purge_current_upcall(struct file *);
+
+#endif
+#endif
diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile
index cc30a0343464..e83bedaf58f7 100644
--- a/net/sunrpc/Makefile
+++ b/net/sunrpc/Makefile
@@ -10,6 +10,6 @@ sunrpc-y := clnt.o xprt.o sched.o \
 	    auth.o auth_null.o auth_unix.o \
 	    svc.o svcsock.o svcauth.o svcauth_unix.o \
 	    pmap_clnt.o timer.o xdr.o \
-	    sunrpc_syms.o cache.o
+	    sunrpc_syms.o cache.o rpc_pipe.o
 sunrpc-$(CONFIG_PROC_FS) += stats.o
 sunrpc-$(CONFIG_SYSCTL) += sysctl.o
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 3a93fffc9056..c75a0a33974d 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -30,6 +30,7 @@
 #include <linux/utsname.h>
 
 #include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/rpc_pipe_fs.h>
 
 #include <linux/nfs.h>
 
@@ -108,8 +109,19 @@ rpc_create_client(struct rpc_xprt *xprt, char *servname,
 
 	rpc_init_rtt(&clnt->cl_rtt, xprt->timeout.to_initval);
 
-	if (!rpcauth_create(flavor, clnt))
+	snprintf(clnt->cl_pathname, sizeof(clnt->cl_pathname),
+			"/%s/clnt%p", clnt->cl_protname, clnt);
+	clnt->cl_dentry = rpc_mkdir(clnt->cl_pathname, clnt);
+	if (IS_ERR(clnt->cl_dentry)) {
+		printk(KERN_INFO "RPC: Couldn't create pipefs entry %s\n",
+				clnt->cl_pathname);
+		goto out_no_path;
+	}
+	if (!rpcauth_create(flavor, clnt)) {
+		printk(KERN_INFO "RPC: Couldn't create auth handle (flavor %u)\n",
+				flavor);
 		goto out_no_auth;
+	}
 
 	/* save the nodename */
 	clnt->cl_nodelen = strlen(system_utsname.nodename);
@@ -123,8 +135,8 @@ out_no_clnt:
 	printk(KERN_INFO "RPC: out of memory in rpc_create_client\n");
 	goto out;
 out_no_auth:
-	printk(KERN_INFO "RPC: Couldn't create auth handle (flavor %u)\n",
-		flavor);
+	rpc_rmdir(clnt->cl_pathname);
+out_no_path:
 	kfree(clnt);
 	clnt = NULL;
 	goto out;
@@ -176,6 +188,7 @@ rpc_destroy_client(struct rpc_clnt *clnt)
 		rpcauth_destroy(clnt->cl_auth);
 		clnt->cl_auth = NULL;
 	}
+	rpc_rmdir(clnt->cl_pathname);
 	if (clnt->cl_xprt) {
 		xprt_destroy(clnt->cl_xprt);
 		clnt->cl_xprt = NULL;
@@ -801,13 +814,23 @@ call_refresh(struct rpc_task *task)
 static void
 call_refreshresult(struct rpc_task *task)
 {
+	int status = task->tk_status;
 	dprintk("RPC: %4d call_refreshresult (status %d)\n", 
 				task->tk_pid, task->tk_status);
 
-	if (task->tk_status < 0)
-		rpc_exit(task, -EACCES);
-	else
-		task->tk_action = call_reserve;
+	task->tk_status = 0;
+	task->tk_action = call_reserve;
+	if (status >= 0)
+		return;
+	switch (status) {
+		case -EPIPE:
+			rpc_delay(task, 3*HZ);
+		case -ETIMEDOUT:
+			task->tk_action = call_refresh;
+			break;
+		default:
+			rpc_exit(task, -EACCES);
+	}
 }
 
 /*
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
new file mode 100644
index 000000000000..79ddba53eafc
--- /dev/null
+++ b/net/sunrpc/rpc_pipe.c
@@ -0,0 +1,817 @@
+/*
+ * net/sunrpc/rpc_pipe.c
+ *
+ * Userland/kernel interface for rpcauth_gss.
+ * Code shamelessly plagiarized from fs/nfsd/nfsctl.c
+ * and fs/driverfs/inode.c
+ *
+ * Copyright (c) 2002, Trond Myklebust <trond.myklebust@fys.uio.no>
+ *
+ */
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/version.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/pagemap.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/dnotify.h>
+#include <linux/kernel.h>
+
+#include <asm/ioctls.h>
+#include <linux/fs.h>
+#include <linux/poll.h>
+#include <linux/wait.h>
+#include <linux/seq_file.h>
+
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/rpc_pipe_fs.h>
+
+static struct vfsmount *rpc_mount;
+static spinlock_t rpc_mount_lock = SPIN_LOCK_UNLOCKED;
+static int rpc_mount_count;
+
+static struct file_system_type rpc_pipe_fs_type;
+
+
+static kmem_cache_t *rpc_inode_cachep;
+
+static void
+__rpc_purge_upcall(struct inode *inode, int err)
+{
+	struct rpc_inode *rpci = RPC_I(inode);
+	struct rpc_pipe_msg *msg;
+
+	while (!list_empty(&rpci->pipe)) {
+		msg = list_entry(rpci->pipe.next, struct rpc_pipe_msg, list);
+		list_del_init(&msg->list);
+		msg->errno = err;
+		rpci->ops->destroy_msg(msg);
+	}
+	rpci->pipelen = 0;
+	wake_up(&rpci->waitq);
+}
+
+void
+rpc_purge_upcall(struct inode *inode, int err)
+{
+	down(&inode->i_sem);
+	__rpc_purge_upcall(inode, err);
+	up(&inode->i_sem);
+}
+
+/*
+ * XXX should only be called in ->downcall
+ */
+void
+__rpc_purge_current_upcall(struct file *filp)
+{
+	struct rpc_pipe_msg *msg;
+
+	msg = filp->private_data;
+	filp->private_data = NULL;
+
+	if (msg != NULL)
+		msg->errno = 0;
+}
+
+int
+rpc_queue_upcall(struct inode *inode, struct rpc_pipe_msg *msg)
+{
+	struct rpc_inode *rpci = RPC_I(inode);
+	int res = 0;
+
+	down(&inode->i_sem);
+	if (rpci->nreaders) {
+		list_add_tail(&msg->list, &rpci->pipe);
+		rpci->pipelen += msg->len;
+	} else
+		res = -EPIPE;
+	up(&inode->i_sem);
+	wake_up(&rpci->waitq);
+	return res;
+}
+
+void
+rpc_inode_setowner(struct inode *inode, void *private)
+{
+	struct rpc_inode *rpci = RPC_I(inode);
+	down(&inode->i_sem);
+	rpci->private = private;
+	if (!private)
+		__rpc_purge_upcall(inode, -EPIPE);
+	up(&inode->i_sem);
+}
+
+static struct inode *
+rpc_alloc_inode(struct super_block *sb)
+{
+	struct rpc_inode *rpci;
+	rpci = (struct rpc_inode *)kmem_cache_alloc(rpc_inode_cachep, SLAB_KERNEL);
+	if (!rpci)
+		return NULL;
+	return &rpci->vfs_inode;
+}
+
+static void
+rpc_destroy_inode(struct inode *inode)
+{
+	kmem_cache_free(rpc_inode_cachep, RPC_I(inode));
+}
+
+static int
+rpc_pipe_open(struct inode *inode, struct file *filp)
+{
+	struct rpc_inode *rpci = RPC_I(inode);
+	int res = -ENXIO;
+
+	down(&inode->i_sem);
+	if (rpci->private != NULL) {
+		if (filp->f_mode & FMODE_READ)
+			rpci->nreaders ++;
+		res = 0;
+	}
+	up(&inode->i_sem);
+	return res;
+}
+
+static int
+rpc_pipe_release(struct inode *inode, struct file *filp)
+{
+	struct rpc_inode *rpci = RPC_I(filp->f_dentry->d_inode);
+	struct rpc_pipe_msg *msg;
+
+	msg = (struct rpc_pipe_msg *)filp->private_data;
+	if (msg != NULL) {
+		msg->errno = -EPIPE;
+		rpci->ops->destroy_msg(msg);
+	}
+	down(&inode->i_sem);
+	if (filp->f_mode & FMODE_READ)
+		rpci->nreaders --;
+	if (!rpci->nreaders)
+		__rpc_purge_upcall(inode, -EPIPE);
+	up(&inode->i_sem);
+	return 0;
+}
+
+static ssize_t
+rpc_pipe_read(struct file *filp, char *buf, size_t len, loff_t *offset)
+{
+	struct inode *inode = filp->f_dentry->d_inode;
+	struct rpc_inode *rpci = RPC_I(inode);
+	struct rpc_pipe_msg *msg;
+	int res = 0;
+
+	down(&inode->i_sem);
+	if (!rpci->private) {
+		res = -EPIPE;
+		goto out_unlock;
+	}
+	msg = filp->private_data;
+	if (msg == NULL) {
+		if (!list_empty(&rpci->pipe)) {
+			msg = list_entry(rpci->pipe.next,
+					struct rpc_pipe_msg,
+					list);
+			list_del_init(&msg->list);
+			rpci->pipelen -= msg->len;
+			filp->private_data = msg;
+		}
+		if (msg == NULL)
+			goto out_unlock;
+	}
+	res = rpci->ops->upcall(filp, msg, buf, len);
+	if (res < 0 || msg->len == msg->copied) {
+		filp->private_data = NULL;
+		msg->errno = 0;
+		rpci->ops->destroy_msg(msg);
+	}
+out_unlock:
+	up(&inode->i_sem);
+	return res;
+}
+
+static ssize_t
+rpc_pipe_write(struct file *filp, const char *buf, size_t len, loff_t *offset)
+{
+	struct inode *inode = filp->f_dentry->d_inode;
+	struct rpc_inode *rpci = RPC_I(inode);
+	int res;
+
+	down(&inode->i_sem);
+	res = -EPIPE;
+	if (rpci->private != NULL)
+		res = rpci->ops->downcall(filp, buf, len);
+	up(&inode->i_sem);
+	return res;
+}
+
+static unsigned int
+rpc_pipe_poll(struct file *filp, struct poll_table_struct *wait)
+{
+	struct rpc_inode *rpci;
+	unsigned int mask = 0;
+
+	rpci = RPC_I(filp->f_dentry->d_inode);
+	poll_wait(filp, &rpci->waitq, wait);
+
+	mask = POLLOUT | POLLWRNORM;
+	if (rpci->private == NULL)
+		mask |= POLLERR | POLLHUP;
+	if (!list_empty(&rpci->pipe))
+		mask |= POLLIN | POLLRDNORM;
+	return mask;
+}
+
+static int
+rpc_pipe_ioctl(struct inode *ino, struct file *filp,
+		unsigned int cmd, unsigned long arg)
+{
+	struct rpc_inode *rpci = RPC_I(filp->f_dentry->d_inode);
+	int len;
+
+	switch (cmd) {
+	case FIONREAD:
+		if (!rpci->private)
+			return -EPIPE;
+		len = rpci->pipelen;
+		if (filp->private_data) {
+			struct rpc_pipe_msg *msg;
+			msg = (struct rpc_pipe_msg *)filp->private_data;
+			len += msg->len - msg->copied;
+		}
+		return put_user(len, (int *)arg);
+	default:
+		return -EINVAL;
+	}
+}
+
+struct inode_operations rpc_pipe_iops = {
+	.lookup		= simple_lookup,
+};
+
+
+struct file_operations rpc_pipe_fops = {
+	.owner		= THIS_MODULE,
+	.llseek		= no_llseek,
+	.read		= rpc_pipe_read,
+	.write		= rpc_pipe_write,
+	.poll		= rpc_pipe_poll,
+	.ioctl		= rpc_pipe_ioctl,
+	.open		= rpc_pipe_open,
+	.release	= rpc_pipe_release,
+};
+
+static int
+rpc_show_info(struct seq_file *m, void *v)
+{
+	struct rpc_clnt *clnt = m->private;
+
+	seq_printf(m, "RPC server: %s\n", clnt->cl_server);
+	seq_printf(m, "service: %s (%d) version %d\n", clnt->cl_protname,
+			clnt->cl_prog, clnt->cl_vers);
+	seq_printf(m, "address: %u.%u.%u.%u\n",
+			NIPQUAD(clnt->cl_xprt->addr.sin_addr.s_addr));
+	return 0;
+}
+
+static int
+rpc_info_open(struct inode *inode, struct file *file)
+{
+	struct rpc_clnt *clnt;
+	int ret = single_open(file, rpc_show_info, NULL);
+
+	if (!ret) {
+		struct seq_file *m = file->private_data;
+		down(&inode->i_sem);
+		clnt = RPC_I(inode)->private;
+		if (clnt) {
+			atomic_inc(&clnt->cl_users);
+			m->private = clnt;
+		} else {
+			single_release(inode, file);
+			ret = -EINVAL;
+		}
+		up(&inode->i_sem);
+	}
+	return ret;
+}
+
+static int
+rpc_info_release(struct inode *inode, struct file *file)
+{
+	struct seq_file *m = file->private_data;
+	struct rpc_clnt *clnt = (struct rpc_clnt *)m->private;
+
+	if (clnt)
+		rpc_release_client(clnt);
+	return single_release(inode, file);
+}
+
+static struct file_operations rpc_info_operations = {
+	.open		= rpc_info_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= rpc_info_release,
+};
+
+
+/*
+ * We have a single directory with 1 node in it.
+ */
+enum {
+	RPCAUTH_Root = 1,
+	RPCAUTH_lockd,
+	RPCAUTH_nfs,
+	RPCAUTH_portmap,
+	RPCAUTH_statd,
+	RPCAUTH_RootEOF
+};
+
+/*
+ * Description of fs contents.
+ */
+struct rpc_filelist {
+	char *name;
+	struct file_operations *i_fop;
+	int mode;
+};
+
+static struct rpc_filelist files[] = {
+	[RPCAUTH_lockd] = {
+		.name = "lockd",
+		.mode = S_IFDIR | S_IRUSR | S_IXUSR,
+	},
+	[RPCAUTH_nfs] = {
+		.name = "nfs",
+		.mode = S_IFDIR | S_IRUSR | S_IXUSR,
+	},
+	[RPCAUTH_portmap] = {
+		.name = "portmap",
+		.mode = S_IFDIR | S_IRUSR | S_IXUSR,
+	},
+	[RPCAUTH_statd] = {
+		.name = "statd",
+		.mode = S_IFDIR | S_IRUSR | S_IXUSR,
+	},
+};
+
+enum {
+	RPCAUTH_info = 2,
+	RPCAUTH_EOF
+};
+
+static struct rpc_filelist authfiles[] = {
+	[RPCAUTH_info] = {
+		.name = "info",
+		.i_fop = &rpc_info_operations,
+		.mode = S_IFREG | S_IRUSR,
+	},
+};
+
+static int
+rpc_get_mount(void)
+{
+	struct vfsmount * mnt = NULL;
+
+	spin_lock(&rpc_mount_lock);
+	if (rpc_mount)
+		goto out_get;
+	spin_unlock(&rpc_mount_lock);
+	mnt = kern_mount(&rpc_pipe_fs_type);
+	if (IS_ERR(mnt))
+		return -ENODEV;
+	spin_lock(&rpc_mount_lock);
+	if (!rpc_mount) {
+		rpc_mount = mnt;
+		mnt = NULL;
+		goto out_dontget;
+	}
+out_get:
+	mntget(rpc_mount);
+out_dontget:
+	++rpc_mount_count;
+	spin_unlock(&rpc_mount_lock);
+	if (mnt)
+		mntput(mnt);
+	return 0;
+}
+
+static void
+rpc_put_mount(void)
+{
+	struct vfsmount *mnt;
+
+	spin_lock(&rpc_mount_lock);
+	mnt = rpc_mount;
+	--rpc_mount_count;
+	if (rpc_mount_count == 0)
+		rpc_mount = NULL;
+	else
+		mnt = NULL;
+	spin_unlock(&rpc_mount_lock);
+	if (mnt)
+		mntput(mnt);
+}
+
+static int
+rpc_lookup_path(char *path, struct nameidata *nd, int flags)
+{
+	if (rpc_get_mount()) {
+		printk(KERN_WARNING "%s: %s failed to mount "
+			       "pseudofilesystem \n", __FILE__, __FUNCTION__);
+		return -ENODEV;
+	}
+	nd->mnt = mntget(rpc_mount);
+	nd->dentry = dget(rpc_mount->mnt_sb->s_root);
+	nd->last_type = LAST_ROOT;
+	nd->flags = flags;
+
+	if (path_walk(path, nd)) {
+		printk(KERN_WARNING "%s: %s failed to find path %s\n",
+				__FILE__, __FUNCTION__, path);
+		rpc_put_mount();
+		return -ENOENT;
+	}
+	return 0;
+}
+
+static void
+rpc_release_path(struct nameidata *nd)
+{
+	path_release(nd);
+	rpc_put_mount();
+}
+
+static struct inode *
+rpc_get_inode(struct super_block *sb, int mode)
+{
+	struct inode *inode = new_inode(sb);
+	if (!inode)
+		return NULL;
+	inode->i_mode = mode;
+	inode->i_uid = inode->i_gid = 0;
+	inode->i_blksize = PAGE_CACHE_SIZE;
+	inode->i_blocks = 0;
+	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+	switch(mode & S_IFMT) {
+		case S_IFDIR:
+			inode->i_fop = &simple_dir_operations;
+			inode->i_op = &simple_dir_inode_operations;
+			inode->i_nlink++;
+		default:
+			break;
+	}
+	return inode;
+}
+
+/*
+ * FIXME: This probably has races.
+ */
+static void
+rpc_depopulate(struct dentry *dir)
+{
+	LIST_HEAD(head);
+	struct list_head *pos, *next;
+	struct dentry *dentry;
+
+	down(&dir->d_inode->i_sem);
+	spin_lock(&dcache_lock);
+	list_for_each_safe(pos, next, &dir->d_subdirs) {
+		dentry = list_entry(pos, struct dentry, d_child);
+		if (!d_unhashed(dentry)) {
+			dget_locked(dentry);
+			list_del(&dentry->d_hash);
+			list_add(&dentry->d_hash, &head);
+		}
+	}
+	spin_unlock(&dcache_lock);
+	while (!list_empty(&head)) {
+		dentry = list_entry(head.next, struct dentry, d_hash);
+		list_del_init(&dentry->d_hash);
+		if (dentry->d_inode) {
+			rpc_inode_setowner(dentry->d_inode, NULL);
+			simple_unlink(dir->d_inode, dentry);
+		}
+		dput(dentry);
+	}
+	up(&dir->d_inode->i_sem);
+}
+
+static int
+rpc_populate(struct dentry *dir,
+		struct rpc_filelist *files,
+		int start, int eof)
+{
+	void *private = RPC_I(dir->d_inode)->private;
+	struct qstr name;
+	struct dentry *dentry;
+	struct inode *inode;
+	int mode, i;
+	for (i = start; i < eof; i++) {
+		name.name = files[i].name;
+		name.len = strlen(name.name);
+		name.hash = full_name_hash(name.name, name.len);
+		dentry = d_alloc(dir, &name);
+		if (!dentry)
+			goto out_bad;
+		mode = files[i].mode;
+		inode = rpc_get_inode(dir->d_inode->i_sb, mode);
+		if (!inode) {
+			dput(dentry);
+			goto out_bad;
+		}
+		inode->i_ino = i;
+		if (files[i].i_fop)
+			inode->i_fop = files[i].i_fop;
+		if (private)
+			rpc_inode_setowner(inode, private);
+		if (S_ISDIR(mode))
+			dir->d_inode->i_nlink++;
+		d_add(dentry, inode);
+	}
+	return 0;
+out_bad:
+	printk(KERN_WARNING "%s: %s failed to populate directory %s\n",
+			__FILE__, __FUNCTION__, dir->d_name.name);
+	return -ENOMEM;
+}
+
+static int
+__rpc_mkdir(struct inode *dir, struct dentry *dentry)
+{
+	struct inode *inode;
+
+	inode = rpc_get_inode(dir->i_sb, S_IFDIR | S_IRUSR | S_IXUSR);
+	if (!inode)
+		goto out_err;
+	inode->i_ino = iunique(dir->i_sb, 100);
+	d_instantiate(dentry, inode);
+	dir->i_nlink++;
+	inode_dir_notify(dir, DN_CREATE);
+	rpc_get_mount();
+	return 0;
+out_err:
+	printk(KERN_WARNING "%s: %s failed to allocate inode for dentry %s\n",
+			__FILE__, __FUNCTION__, dentry->d_name.name);
+	return -ENOMEM;
+}
+
+static int
+__rpc_rmdir(struct inode *dir, struct dentry *dentry)
+{
+	int error;
+
+	rpc_inode_setowner(dentry->d_inode, NULL);
+	if ((error = simple_rmdir(dir, dentry)) != 0)
+		return error;
+	if (!error) {
+		inode_dir_notify(dir, DN_DELETE);
+		d_drop(dentry);
+		rpc_put_mount();
+	}
+	return 0;
+}
+
+struct dentry *
+rpc_lookup_negative(char *path, struct nameidata *nd)
+{
+	struct dentry *dentry;
+	struct inode *dir;
+	int error;
+
+	if ((error = rpc_lookup_path(path, nd, LOOKUP_PARENT)) != 0)
+		return ERR_PTR(error);
+	dir = nd->dentry->d_inode;
+	down(&dir->i_sem);
+	dentry = lookup_hash(&nd->last, nd->dentry);
+	if (IS_ERR(dentry))
+		goto out_err;
+	if (dentry->d_inode) {
+		dput(dentry);
+		dentry = ERR_PTR(-EEXIST);
+		goto out_err;
+	}
+	return dentry;
+out_err:
+	up(&dir->i_sem);
+	rpc_release_path(nd);
+	return dentry;
+}
+
+
+struct dentry *
+rpc_mkdir(char *path, struct rpc_clnt *rpc_client)
+{
+	struct nameidata nd;
+	struct dentry *dentry;
+	struct inode *dir;
+	int error;
+
+	dentry = rpc_lookup_negative(path, &nd);
+	if (IS_ERR(dentry))
+		return dentry;
+	dir = nd.dentry->d_inode;
+	if ((error = __rpc_mkdir(dir, dentry)) != 0)
+		goto err_dput;
+	RPC_I(dentry->d_inode)->private = rpc_client;
+	error = rpc_populate(dentry, authfiles,
+			RPCAUTH_info, RPCAUTH_EOF);
+	if (error)
+		goto err_depopulate;
+out:
+	up(&dir->i_sem);
+	rpc_release_path(&nd);
+	return dentry;
+err_depopulate:
+	rpc_depopulate(dentry);
+	__rpc_rmdir(dir, dentry);
+err_dput:
+	dput(dentry);
+	printk(KERN_WARNING "%s: %s() failed to create directory %s (errno = %d)\n",
+			__FILE__, __FUNCTION__, path, error);
+	dentry = ERR_PTR(error);
+	goto out;
+}
+
+int
+rpc_rmdir(char *path)
+{
+	struct nameidata nd;
+	struct dentry *dentry;
+	struct inode *dir;
+	int error;
+
+	if ((error = rpc_lookup_path(path, &nd, LOOKUP_PARENT)) != 0)
+		return error;
+	dir = nd.dentry->d_inode;
+	down(&dir->i_sem);
+	dentry = lookup_hash(&nd.last, nd.dentry);
+	if (IS_ERR(dentry)) {
+		error = PTR_ERR(dentry);
+		goto out_release;
+	}
+	rpc_depopulate(dentry);
+	error = __rpc_rmdir(dir, dentry);
+	dput(dentry);
+out_release:
+	up(&dir->i_sem);
+	rpc_release_path(&nd);
+	return error;
+}
+
+struct dentry *
+rpc_mkpipe(char *path, void *private, struct rpc_pipe_ops *ops)
+{
+	struct nameidata nd;
+	struct dentry *dentry;
+	struct inode *dir, *inode;
+	struct rpc_inode *rpci;
+
+	dentry = rpc_lookup_negative(path, &nd);
+	if (IS_ERR(dentry))
+		return dentry;
+	dir = nd.dentry->d_inode;
+	inode = rpc_get_inode(dir->i_sb, S_IFSOCK | S_IRUSR | S_IXUSR);
+	if (!inode)
+		goto err_dput;
+	inode->i_ino = iunique(dir->i_sb, 100);
+	inode->i_fop = &rpc_pipe_fops;
+	d_instantiate(dentry, inode);
+	rpci = RPC_I(inode);
+	rpci->private = private;
+	rpci->ops = ops;
+	inode_dir_notify(dir, DN_CREATE);
+out:
+	up(&dir->i_sem);
+	rpc_release_path(&nd);
+	return dentry;
+err_dput:
+	dput(dentry);
+	dentry = ERR_PTR(-ENOMEM);
+	printk(KERN_WARNING "%s: %s() failed to create pipe %s (errno = %d)\n",
+			__FILE__, __FUNCTION__, path, -ENOMEM);
+	goto out;
+}
+
+int
+rpc_unlink(char *path)
+{
+	struct nameidata nd;
+	struct dentry *dentry;
+	struct inode *dir;
+	int error;
+
+	if ((error = rpc_lookup_path(path, &nd, LOOKUP_PARENT)) != 0)
+		return error;
+	dir = nd.dentry->d_inode;
+	down(&dir->i_sem);
+	dentry = lookup_hash(&nd.last, nd.dentry);
+	if (IS_ERR(dentry)) {
+		error = PTR_ERR(dentry);
+		goto out_release;
+	}
+	d_drop(dentry);
+	if (dentry->d_inode) {
+		rpc_inode_setowner(dentry->d_inode, NULL);
+		error = simple_unlink(dir, dentry);
+	}
+	dput(dentry);
+	inode_dir_notify(dir, DN_DELETE);
+out_release:
+	up(&dir->i_sem);
+	rpc_release_path(&nd);
+	return error;
+}
+
+/*
+ * populate the filesystem
+ */
+static struct super_operations s_ops = {
+	.alloc_inode	= rpc_alloc_inode,
+	.destroy_inode	= rpc_destroy_inode,
+	.statfs		= simple_statfs,
+};
+
+#define RPCAUTH_GSSMAGIC 0x67596969
+
+static int
+rpc_fill_super(struct super_block *sb, void *data, int silent)
+{
+	struct inode *inode;
+	struct dentry *root;
+
+	sb->s_blocksize = PAGE_CACHE_SIZE;
+	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+	sb->s_magic = RPCAUTH_GSSMAGIC;
+	sb->s_op = &s_ops;
+
+	inode = rpc_get_inode(sb, S_IFDIR | 0755);
+	if (!inode)
+		return -ENOMEM;
+	root = d_alloc_root(inode);
+	if (!root) {
+		iput(inode);
+		return -ENOMEM;
+	}
+	if (rpc_populate(root, files, RPCAUTH_Root + 1, RPCAUTH_RootEOF))
+		goto out;
+	sb->s_root = root;
+	return 0;
+out:
+	d_genocide(root);
+	dput(root);
+	return -ENOMEM;
+}
+
+static struct super_block *
+rpc_get_sb(struct file_system_type *fs_type,
+		int flags, char *dev_name, void *data)
+{
+	return get_sb_single(fs_type, flags, data, rpc_fill_super);
+}
+
+static struct file_system_type rpc_pipe_fs_type = {
+	.owner		= THIS_MODULE,
+	.name		= "rpc_pipefs",
+	.get_sb		= rpc_get_sb,
+	.kill_sb	= kill_litter_super,
+};
+
+static void
+init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+{
+	struct rpc_inode *rpci = (struct rpc_inode *) foo;
+
+	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
+	    SLAB_CTOR_CONSTRUCTOR) {
+		inode_init_once(&rpci->vfs_inode);
+		rpci->private = NULL;
+		rpci->nreaders = 0;
+		INIT_LIST_HEAD(&rpci->pipe);
+		rpci->pipelen = 0;
+		init_waitqueue_head(&rpci->waitq);
+		rpci->ops = NULL;
+	}
+}
+
+int register_rpc_pipefs(void)
+{
+	rpc_inode_cachep = kmem_cache_create("rpc_inode_cache",
+                                             sizeof(struct rpc_inode),
+                                             0, SLAB_HWCACHE_ALIGN,
+                                             init_once, NULL);
+	if (!rpc_inode_cachep)
+		return -ENOMEM;
+	register_filesystem(&rpc_pipe_fs_type);
+	return 0;
+}
+
+void unregister_rpc_pipefs(void)
+{
+	if (kmem_cache_destroy(rpc_inode_cachep))
+		printk(KERN_WARNING "RPC: unable to free inode cache\n");
+	unregister_filesystem(&rpc_pipe_fs_type);
+}
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index dc56b0ea3748..b499ab61af09 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -22,6 +22,7 @@
 #include <linux/sunrpc/svc.h>
 #include <linux/sunrpc/svcsock.h>
 #include <linux/sunrpc/auth.h>
+#include <linux/sunrpc/rpc_pipe_fs.h>
 
 
 /* RPC scheduler */
@@ -42,6 +43,7 @@ EXPORT_SYMBOL(rpc_release_task);
 EXPORT_SYMBOL(rpc_create_client);
 EXPORT_SYMBOL(rpc_destroy_client);
 EXPORT_SYMBOL(rpc_shutdown_client);
+EXPORT_SYMBOL(rpc_release_client);
 EXPORT_SYMBOL(rpc_killall_tasks);
 EXPORT_SYMBOL(rpc_call_sync);
 EXPORT_SYMBOL(rpc_call_async);
@@ -51,6 +53,11 @@ EXPORT_SYMBOL(rpc_clnt_sigunmask);
 EXPORT_SYMBOL(rpc_delay);
 EXPORT_SYMBOL(rpc_restart_call);
 EXPORT_SYMBOL(rpc_setbufsize);
+EXPORT_SYMBOL(rpc_unlink);
+EXPORT_SYMBOL(rpc_wake_up);
+EXPORT_SYMBOL(rpc_queue_upcall);
+EXPORT_SYMBOL(rpc_mkpipe);
+EXPORT_SYMBOL(__rpc_purge_current_upcall);
 
 /* Client transport */
 EXPORT_SYMBOL(xprt_create_proto);
@@ -126,11 +133,18 @@ EXPORT_SYMBOL(nfsd_debug);
 EXPORT_SYMBOL(nlm_debug);
 #endif
 
+extern int register_rpc_pipefs(void);
+extern void unregister_rpc_pipefs(void);
+
 static int __init
 init_sunrpc(void)
 {
-	if (rpc_init_mempool() != 0)
-		return -ENOMEM;
+	int err = register_rpc_pipefs();
+	if (err)
+		goto out;
+	err = rpc_init_mempool() != 0;
+	if (err)
+		goto out;
 #ifdef RPC_DEBUG
 	rpc_register_sysctl();
 #endif
@@ -139,12 +153,14 @@ init_sunrpc(void)
 #endif
 	cache_register(&auth_domain_cache);
 	cache_register(&ip_map_cache);
-	return 0;
+out:
+	return err;
 }
 
 static void __exit
 cleanup_sunrpc(void)
 {
+	unregister_rpc_pipefs();
 	rpc_destroy_mempool();
 	cache_unregister(&auth_domain_cache);
 	cache_unregister(&ip_map_cache);
-- 
cgit v1.2.3


From 8d188768b6a1b5692994c02539c801aad888b555 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@fys.uio.no>
Date: Sun, 12 Jan 2003 21:41:33 -0800
Subject: [PATCH] RPCSEC_GSS authentication framework [4/6]

This patch provides the basic framework for RPCSEC_GSS authentication
in the RPC client. The protocol is fully described in RFC-2203.
Sun has supported it in their commercial NFSv3 and v2 implementations
for quite some time, and it has been specified in RFC3010 as being
mandatory for NFSv4.

  - Update the mount_data struct for NFSv2 and v3 in order to allow them
    to pass an RPCSEC_GSS security flavour. Compatibility with existing
    versions of the 'mount' program is ensured by requiring that RPCSEC
    support be enabled using the new flag NFS_MOUNT_SECFLAVOUR.
  - Provide secure authentication, and later data encryption on
    a per-user basis. A later patch will an provide an implementation
    of the Kerberos 5 security mechanism. SPKM and LIPKEY are still
    being planned.
  - Security context negotiation and initialization are all assumed
    to be done in userland. A later patch will provide the actual upcall
    mechanisms to allow for this.
---
 fs/Kconfig                              |  11 +
 fs/nfs/inode.c                          |   9 +-
 include/linux/nfs_mount.h               |   3 +-
 include/linux/sunrpc/auth.h             |  15 +-
 include/linux/sunrpc/auth_gss.h         |  97 ++++++++
 include/linux/sunrpc/gss_api.h          | 132 +++++++++++
 include/linux/sunrpc/gss_asn1.h         |  85 +++++++
 include/linux/sunrpc/gss_err.h          | 177 +++++++++++++++
 include/linux/sunrpc/msg_prot.h         |  16 +-
 include/linux/sunrpc/sched.h            |   3 +
 include/linux/sunrpc/xdr.h              |   4 +-
 net/sunrpc/Makefile                     |   2 +
 net/sunrpc/auth.c                       |  14 +-
 net/sunrpc/auth_gss/Makefile            |  11 +
 net/sunrpc/auth_gss/auth_gss.c          | 378 ++++++++++++++++++++++++++++++++
 net/sunrpc/auth_gss/gss_generic_token.c | 269 +++++++++++++++++++++++
 net/sunrpc/auth_gss/gss_mech_switch.c   | 243 ++++++++++++++++++++
 net/sunrpc/auth_gss/gss_pseudoflavors.c | 235 ++++++++++++++++++++
 net/sunrpc/auth_gss/sunrpcgss_syms.c    |  34 +++
 net/sunrpc/auth_null.c                  |   4 +-
 net/sunrpc/auth_unix.c                  |   6 +-
 net/sunrpc/clnt.c                       |   2 +-
 net/sunrpc/sunrpc_syms.c                |   2 +
 23 files changed, 1736 insertions(+), 16 deletions(-)
 create mode 100644 include/linux/sunrpc/auth_gss.h
 create mode 100644 include/linux/sunrpc/gss_api.h
 create mode 100644 include/linux/sunrpc/gss_asn1.h
 create mode 100644 include/linux/sunrpc/gss_err.h
 create mode 100644 net/sunrpc/auth_gss/Makefile
 create mode 100644 net/sunrpc/auth_gss/auth_gss.c
 create mode 100644 net/sunrpc/auth_gss/gss_generic_token.c
 create mode 100644 net/sunrpc/auth_gss/gss_mech_switch.c
 create mode 100644 net/sunrpc/auth_gss/gss_pseudoflavors.c
 create mode 100644 net/sunrpc/auth_gss/sunrpcgss_syms.c

(limited to 'include/linux')

diff --git a/fs/Kconfig b/fs/Kconfig
index 5f95ac068fb8..755f57a1f77d 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -1339,6 +1339,17 @@ config SUNRPC
 	default m if NFS_FS!=y && NFSD!=y && (NFS_FS=m || NFSD=m)
 	default y if NFS_FS=y || NFSD=y
 
+config SUNRPC_GSS
+	tristate "Provide RPCSEC_GSS authentication (EXPERIMENTAL)"
+	depends on SUNRPC && EXPERIMENTAL
+	default SUNRPC if NFS_V4=y
+	help
+	  Provides cryptographic authentication for NFS rpc requests.  To
+	  make this useful, you also need support for a gss-api mechanism
+	  (such as Kerberos).
+	  Note: You should always select this option if you wish to use
+	  NFSv4.
+
 config LOCKD
 	tristate
 	default m if NFS_FS!=y && NFSD!=y && (NFS_FS=m || NFSD=m)
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 261a801edb1e..78d1f895740f 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -346,6 +346,7 @@ int nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data, int sile
 	struct rpc_clnt		*clnt = NULL;
 	struct rpc_timeout	timeparms;
 	int			tcp, err = -EIO;
+	u32			authflavor;
 
 	server           = NFS_SB(sb);
 	sb->s_blocksize_bits = 0;
@@ -408,8 +409,14 @@ int nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data, int sile
 		printk(KERN_WARNING "NFS: cannot create RPC transport.\n");
 		goto out_fail;
 	}
+
+	if (data->flags & NFS_MOUNT_SECFLAVOUR)
+		authflavor = data->pseudoflavor;
+	else
+		authflavor = RPC_AUTH_UNIX;
+
 	clnt = rpc_create_client(xprt, server->hostname, &nfs_program,
-				 server->rpc_ops->version, RPC_AUTH_UNIX);
+				 server->rpc_ops->version, authflavor);
 	if (clnt == NULL) {
 		printk(KERN_WARNING "NFS: cannot create RPC client.\n");
 		xprt_destroy(xprt);
diff --git a/include/linux/nfs_mount.h b/include/linux/nfs_mount.h
index 223ed3462064..c2268ca1ccd3 100644
--- a/include/linux/nfs_mount.h
+++ b/include/linux/nfs_mount.h
@@ -40,6 +40,7 @@ struct nfs_mount_data {
 	int		namlen;			/* 2 */
 	unsigned int	bsize;			/* 3 */
 	struct nfs3_fh	root;			/* 4 */
+	int		pseudoflavor;		/* 4 */
 };
 
 /* bits in the flags field */
@@ -55,10 +56,8 @@ struct nfs_mount_data {
 #define NFS_MOUNT_KERBEROS	0x0100	/* 3 */
 #define NFS_MOUNT_NONLM		0x0200	/* 3 */
 #define NFS_MOUNT_BROKEN_SUID	0x0400	/* 4 */
-#if 0
 #define NFS_MOUNT_STRICTLOCK	0x1000	/* reserved for NFSv4 */
 #define NFS_MOUNT_SECFLAVOUR	0x2000	/* reserved */
-#endif
 #define NFS_MOUNT_FLAGMASK	0xFFFF
 
 #endif
diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index 13d7f1aec49c..4d466e9a4c64 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -13,12 +13,17 @@
 
 #include <linux/config.h>
 #include <linux/sunrpc/sched.h>
+#include <linux/sunrpc/msg_prot.h>
+#include <linux/sunrpc/xdr.h>
 
 #include <asm/atomic.h>
 
 /* size of the nodename buffer */
 #define UNX_MAXNODENAME	32
 
+/* Maximum size (in bytes) of an rpc credential or verifier */
+#define RPC_MAX_AUTH_SIZE (400)
+
 /* Work around the lack of a VFS credential */
 struct auth_cred {
 	uid_t	uid;
@@ -64,6 +69,10 @@ struct rpc_auth {
 	unsigned int		au_rslack;	/* reply verf size guess */
 	unsigned int		au_flags;	/* various flags */
 	struct rpc_authops *	au_ops;		/* operations */
+	rpc_authflavor_t	au_flavor;	/* pseudoflavor (note may
+						 * differ from the flavor in
+						 * au_ops->au_flavor in gss
+						 * case) */
 
 	/* per-flavor data */
 };
@@ -79,10 +88,10 @@ struct rpc_authops {
 #ifdef RPC_DEBUG
 	char *			au_name;
 #endif
-	struct rpc_auth *	(*create)(struct rpc_clnt *);
+	struct rpc_auth *	(*create)(struct rpc_clnt *, rpc_authflavor_t);
 	void			(*destroy)(struct rpc_auth *);
 
-	struct rpc_cred *	(*crcreate)(struct auth_cred *, int);
+	struct rpc_cred *	(*crcreate)(struct rpc_auth*, struct auth_cred *, int);
 };
 
 struct rpc_credops {
@@ -100,6 +109,8 @@ extern struct rpc_authops	authnull_ops;
 extern struct rpc_authops	authdes_ops;
 #endif
 
+u32			pseudoflavor_to_flavor(rpc_authflavor_t);
+
 int			rpcauth_register(struct rpc_authops *);
 int			rpcauth_unregister(struct rpc_authops *);
 struct rpc_auth *	rpcauth_create(rpc_authflavor_t, struct rpc_clnt *);
diff --git a/include/linux/sunrpc/auth_gss.h b/include/linux/sunrpc/auth_gss.h
new file mode 100644
index 000000000000..6f23b9d9ce07
--- /dev/null
+++ b/include/linux/sunrpc/auth_gss.h
@@ -0,0 +1,97 @@
+/*
+ * linux/include/linux/auth_gss.h
+ *
+ * Declarations for RPCSEC_GSS
+ *
+ * Dug Song <dugsong@monkey.org>
+ * Andy Adamson <andros@umich.edu>
+ * Bruce Fields <bfields@umich.edu>
+ * Copyright (c) 2000 The Regents of the University of Michigan
+ *
+ * $Id$
+ */
+
+#ifndef _LINUX_SUNRPC_AUTH_GSS_H
+#define _LINUX_SUNRPC_AUTH_GSS_H
+
+#ifdef __KERNEL__
+#ifdef __linux__
+#include <linux/sunrpc/auth.h>
+#include <linux/sunrpc/svc.h>
+#include <linux/sunrpc/gss_api.h>
+#endif
+
+#define RPC_GSS_VERSION		1
+
+#define MAXSEQ 0x80000000 /* maximum legal sequence number, from rfc 2203 */
+
+enum rpc_gss_proc {
+	RPC_GSS_PROC_DATA = 0,
+	RPC_GSS_PROC_INIT = 1,
+	RPC_GSS_PROC_CONTINUE_INIT = 2,
+	RPC_GSS_PROC_DESTROY = 3
+};
+
+enum rpc_gss_svc {
+	RPC_GSS_SVC_NONE = 1,
+	RPC_GSS_SVC_INTEGRITY = 2,
+	RPC_GSS_SVC_PRIVACY = 3
+};
+
+/* on-the-wire gss cred: */
+struct rpc_gss_wire_cred {
+	u32			gc_v;		/* version */
+	u32			gc_proc;	/* control procedure */
+	u32			gc_seq;		/* sequence number */
+	u32			gc_svc;		/* service */
+	struct xdr_netobj	gc_ctx;		/* context handle */
+};
+
+/* on-the-wire gss verifier: */
+struct rpc_gss_wire_verf {
+	u32			gv_flavor;
+	struct xdr_netobj	gv_verf;
+};
+
+/* return from gss NULL PROC init sec context */
+struct rpc_gss_init_res {
+	struct xdr_netobj	gr_ctx;		/* context handle */
+	u32			gr_major;	/* major status */
+	u32			gr_minor;	/* minor status */
+	u32			gr_win;		/* sequence window */
+	struct xdr_netobj	gr_token;	/* token */
+};
+
+#define GSS_SEQ_WIN	5
+
+/* The gss_cl_ctx struct holds all the information the rpcsec_gss client
+ * code needs to know about a single security context.  In particular,
+ * gc_gss_ctx is the context handle that is used to do gss-api calls, while
+ * gc_wire_ctx is the context handle that is used to identify the context on
+ * the wire when communicating with a server. */
+
+struct gss_cl_ctx {
+	u32			gc_proc;
+	u32			gc_seq;
+	spinlock_t		gc_seq_lock;
+	struct gss_ctx		*gc_gss_ctx;
+	struct xdr_netobj	gc_wire_ctx;
+	u32			gc_win;
+};
+
+struct gss_cred {
+	struct rpc_cred		gc_base;
+	u32			gc_flavor;
+	struct gss_cl_ctx	*gc_ctx;
+};
+
+#define gc_uid			gc_base.cr_uid
+#define gc_count		gc_base.cr_count
+#define gc_flags		gc_base.cr_flags
+#define gc_expire		gc_base.cr_expire
+
+void print_hexl(u32 *p, u_int length, u_int offset);
+
+#endif /* __KERNEL__ */
+#endif /* _LINUX_SUNRPC_AUTH_GSS_H */
+
diff --git a/include/linux/sunrpc/gss_api.h b/include/linux/sunrpc/gss_api.h
new file mode 100644
index 000000000000..a41ef0ab5da8
--- /dev/null
+++ b/include/linux/sunrpc/gss_api.h
@@ -0,0 +1,132 @@
+/*
+ * linux/include/linux/gss_api.h
+ *
+ * Somewhat simplified version of the gss api.
+ *
+ * Dug Song <dugsong@monkey.org>
+ * Andy Adamson <andros@umich.edu>
+ * Bruce Fields <bfields@umich.edu>
+ * Copyright (c) 2000 The Regents of the University of Michigan
+ *
+ * $Id$
+ */
+
+#ifndef _LINUX_SUNRPC_GSS_API_H
+#define _LINUX_SUNRPC_GSS_API_H
+
+#ifdef __KERNEL__
+#include <linux/sunrpc/xdr.h>
+
+/* The mechanism-independent gss-api context: */
+struct gss_ctx {
+	struct gss_api_mech	*mech_type;
+	void			*internal_ctx_id;
+};
+
+#define GSS_C_NO_BUFFER		((struct xdr_netobj) 0)
+#define GSS_C_NO_CONTEXT	((struct gss_ctx *) 0)
+#define GSS_C_NULL_OID		((struct xdr_netobj) 0)
+
+/*XXX  arbitrary length - is this set somewhere? */
+#define GSS_OID_MAX_LEN 32
+
+/* gss-api prototypes; note that these are somewhat simplified versions of
+ * the prototypes specified in RFC 2744. */
+u32 gss_import_sec_context(
+		struct xdr_netobj	*input_token,
+		struct gss_api_mech	*mech,
+		struct gss_ctx		**ctx_id);
+u32 gss_get_mic(
+		struct gss_ctx		*ctx_id,
+		u32			qop,
+		struct xdr_netobj	*message_buffer,
+		struct xdr_netobj	*message_token);
+u32 gss_verify_mic(
+		struct gss_ctx		*ctx_id,
+		struct xdr_netobj	*signbuf,
+		struct xdr_netobj	*checksum,
+		u32			*qstate);
+u32 gss_delete_sec_context(
+		struct gss_ctx		**ctx_id);
+
+/* We maintain a list of the pseudoflavors (equivalently, mechanism-qop-service
+ * triples) that we currently support: */
+
+struct sup_sec_triple {
+	struct list_head	triples;
+	u32			pseudoflavor;
+	struct gss_api_mech	*mech;
+	u32			qop;
+	u32			service;
+};
+
+int gss_register_triple(u32 pseudoflavor, struct gss_api_mech *mech, u32 qop,
+			u32 service);
+int gss_unregister_triple(u32 pseudoflavor);
+int gss_pseudoflavor_supported(u32 pseudoflavor);
+u32 gss_cmp_triples(u32 oid_len, char *oid_data, u32 qop, u32 service);
+u32 gss_get_pseudoflavor(struct gss_ctx *ctx_id, u32 qop, u32 service);
+u32 gss_pseudoflavor_to_service(u32 pseudoflavor);
+/* Both return NULL on failure: */
+struct gss_api_mech * gss_pseudoflavor_to_mech(u32 pseudoflavor);
+int gss_pseudoflavor_to_mechOID(u32 pseudoflavor, struct xdr_netobj *mech);
+
+/* Different mechanisms (e.g., krb5 or spkm3) may implement gss-api, and
+ * mechanisms may be dynamically registered or unregistered by modules.
+ * Our only built-in mechanism is a trivial debugging mechanism that provides
+ * no actual security; the following function registers that mechanism: */
+
+void gss_mech_register_debug(void);
+
+/* Each mechanism is described by the following struct: */
+struct gss_api_mech {
+	struct xdr_netobj	gm_oid;
+	struct list_head	gm_list;
+	atomic_t		gm_count;
+	struct gss_api_ops	*gm_ops;
+};
+
+/* and must provide the following operations: */
+struct gss_api_ops {
+	char *name;
+	u32 (*gss_import_sec_context)(
+			struct xdr_netobj	*input_token,
+			struct gss_ctx		*ctx_id);
+	u32 (*gss_get_mic)(
+			struct gss_ctx		*ctx_id,
+			u32			qop, 
+			struct xdr_netobj	*message_buffer,
+			struct xdr_netobj	*message_token);
+	u32 (*gss_verify_mic)(
+			struct gss_ctx		*ctx_id,
+			struct xdr_netobj	*signbuf,
+			struct xdr_netobj	*checksum,
+			u32			*qstate);
+	void (*gss_delete_sec_context)(
+			void			*internal_ctx_id);
+};
+
+/* Returns nonzero on failure. */
+int gss_mech_register(struct xdr_netobj *, struct gss_api_ops *);
+
+/* Returns nonzero iff someone still has a reference to this mech. */
+int gss_mech_unregister(struct gss_api_mech *);
+
+/* Returns nonzer iff someone still has a reference to some mech. */
+int gss_mech_unregister_all(void);
+
+/* returns a mechanism descriptor given an OID, an increments the mechanism's
+ * reference count. */
+struct gss_api_mech * gss_mech_get_by_OID(struct xdr_netobj *);
+
+/* Just increments the mechanism's reference count and returns its input: */
+struct gss_api_mech * gss_mech_get(struct gss_api_mech *);
+
+/* Returns nonzero iff you've released the last reference to this mech.
+ * Note that for every succesful gss_get_mech call there must be exactly
+ * one corresponding call to gss_mech_put.*/
+int gss_mech_put(struct gss_api_mech *);
+
+#endif /* __KERNEL__ */
+#endif /* _LINUX_SUNRPC_GSS_API_H */
+
diff --git a/include/linux/sunrpc/gss_asn1.h b/include/linux/sunrpc/gss_asn1.h
new file mode 100644
index 000000000000..bb5f84adcb5b
--- /dev/null
+++ b/include/linux/sunrpc/gss_asn1.h
@@ -0,0 +1,85 @@
+/*
+ *  linux/include/linux/sunrpc/gss_asn1.h
+ *
+ *  minimal asn1 for generic encoding/decoding of gss tokens
+ *
+ *  Adapted from MIT Kerberos 5-1.2.1 lib/include/krb5.h,
+ *  lib/gssapi/krb5/gssapiP_krb5.h, and others
+ *
+ *  Copyright (c) 2000 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  Andy Adamson   <andros@umich.edu>
+ */
+
+/*
+ * Copyright 1995 by the Massachusetts Institute of Technology.
+ * All Rights Reserved.
+ *
+ * Export of this software from the United States of America may
+ *   require a specific license from the United States Government.
+ *   It is the responsibility of any person or organization contemplating
+ *   export to obtain such a license before exporting.
+ *
+ * WITHIN THAT CONSTRAINT, permission to use, copy, modify, and
+ * distribute this software and its documentation for any purpose and
+ * without fee is hereby granted, provided that the above copyright
+ * notice appear in all copies and that both that copyright notice and
+ * this permission notice appear in supporting documentation, and that
+ * the name of M.I.T. not be used in advertising or publicity pertaining
+ * to distribution of the software without specific, written prior
+ * permission.  Furthermore if you modify this software you must label
+ * your software as modified software and not distribute it in such a
+ * fashion that it might be confused with the original M.I.T. software.
+ * M.I.T. makes no representations about the suitability of
+ * this software for any purpose.  It is provided "as is" without express
+ * or implied warranty.
+ *
+ */
+
+
+#include <linux/sunrpc/gss_api.h>
+
+#define SIZEOF_INT 4
+
+/* from gssapi_err_generic.h */
+#define G_BAD_SERVICE_NAME                       (-2045022976L)
+#define G_BAD_STRING_UID                         (-2045022975L)
+#define G_NOUSER                                 (-2045022974L)
+#define G_VALIDATE_FAILED                        (-2045022973L)
+#define G_BUFFER_ALLOC                           (-2045022972L)
+#define G_BAD_MSG_CTX                            (-2045022971L)
+#define G_WRONG_SIZE                             (-2045022970L)
+#define G_BAD_USAGE                              (-2045022969L)
+#define G_UNKNOWN_QOP                            (-2045022968L)
+#define G_NO_HOSTNAME                            (-2045022967L)
+#define G_BAD_HOSTNAME                           (-2045022966L)
+#define G_WRONG_MECH                             (-2045022965L)
+#define G_BAD_TOK_HEADER                         (-2045022964L)
+#define G_BAD_DIRECTION                          (-2045022963L)
+#define G_TOK_TRUNC                              (-2045022962L)
+#define G_REFLECT                                (-2045022961L)
+#define G_WRONG_TOKID                            (-2045022960L)
+
+#define g_OID_equal(o1,o2) \
+   (((o1)->len == (o2)->len) && \
+    (memcmp((o1)->data,(o2)->data,(int) (o1)->len) == 0))
+
+u32 g_verify_token_header(
+     struct xdr_netobj *mech,
+     int *body_size,
+     unsigned char **buf_in,
+     int tok_type,
+     int toksize);
+
+u32 g_get_mech_oid(struct xdr_netobj *mech, struct xdr_netobj * in_buf);
+
+int g_token_size(
+     struct xdr_netobj *mech,
+     unsigned int body_size);
+
+void g_make_token_header(
+     struct xdr_netobj *mech,
+     int body_size,
+     unsigned char **buf,
+     int tok_type);
diff --git a/include/linux/sunrpc/gss_err.h b/include/linux/sunrpc/gss_err.h
new file mode 100644
index 000000000000..92608a2e574c
--- /dev/null
+++ b/include/linux/sunrpc/gss_err.h
@@ -0,0 +1,177 @@
+/*
+ *  linux/include/sunrpc/gss_err.h
+ *
+ *  Adapted from MIT Kerberos 5-1.2.1 include/gssapi/gssapi.h
+ *
+ *  Copyright (c) 2002 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  Andy Adamson   <andros@umich.edu>
+ */
+
+/*
+ * Copyright 1993 by OpenVision Technologies, Inc.
+ * 
+ * Permission to use, copy, modify, distribute, and sell this software
+ * and its documentation for any purpose is hereby granted without fee,
+ * provided that the above copyright notice appears in all copies and
+ * that both that copyright notice and this permission notice appear in
+ * supporting documentation, and that the name of OpenVision not be used
+ * in advertising or publicity pertaining to distribution of the software
+ * without specific, written prior permission. OpenVision makes no
+ * representations about the suitability of this software for any
+ * purpose.  It is provided "as is" without express or implied warranty.
+ * 
+ * OPENVISION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL OPENVISION BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
+ * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
+ * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef _LINUX_SUNRPC_GSS_ERR_H
+#define _LINUX_SUNRPC_GSS_ERR_H
+
+#ifdef __KERNEL__
+
+typedef unsigned int OM_uint32;
+
+/*
+ * Flag bits for context-level services.
+ */
+#define GSS_C_DELEG_FLAG 1
+#define GSS_C_MUTUAL_FLAG 2
+#define GSS_C_REPLAY_FLAG 4
+#define GSS_C_SEQUENCE_FLAG 8
+#define GSS_C_CONF_FLAG 16
+#define GSS_C_INTEG_FLAG 32
+#define	GSS_C_ANON_FLAG 64
+#define GSS_C_PROT_READY_FLAG 128
+#define GSS_C_TRANS_FLAG 256
+
+/*
+ * Credential usage options
+ */
+#define GSS_C_BOTH 0
+#define GSS_C_INITIATE 1
+#define GSS_C_ACCEPT 2
+
+/*
+ * Status code types for gss_display_status
+ */
+#define GSS_C_GSS_CODE 1
+#define GSS_C_MECH_CODE 2
+
+
+/*
+ * Define the default Quality of Protection for per-message services.  Note
+ * that an implementation that offers multiple levels of QOP may either reserve
+ * a value (for example zero, as assumed here) to mean "default protection", or
+ * alternatively may simply equate GSS_C_QOP_DEFAULT to a specific explicit
+ * QOP value.  However a value of 0 should always be interpreted by a GSSAPI
+ * implementation as a request for the default protection level.
+ */
+#define GSS_C_QOP_DEFAULT 0
+
+/*
+ * Expiration time of 2^32-1 seconds means infinite lifetime for a
+ * credential or security context
+ */
+#define GSS_C_INDEFINITE ((OM_uint32) 0xfffffffful)
+
+
+/* Major status codes */
+
+#define GSS_S_COMPLETE 0
+
+/*
+ * Some "helper" definitions to make the status code macros obvious.
+ */
+#define GSS_C_CALLING_ERROR_OFFSET 24
+#define GSS_C_ROUTINE_ERROR_OFFSET 16
+#define GSS_C_SUPPLEMENTARY_OFFSET 0
+#define GSS_C_CALLING_ERROR_MASK ((OM_uint32) 0377ul)
+#define GSS_C_ROUTINE_ERROR_MASK ((OM_uint32) 0377ul)
+#define GSS_C_SUPPLEMENTARY_MASK ((OM_uint32) 0177777ul)
+
+/*
+ * The macros that test status codes for error conditions.  Note that the
+ * GSS_ERROR() macro has changed slightly from the V1 GSSAPI so that it now
+ * evaluates its argument only once.
+ */
+#define GSS_CALLING_ERROR(x) \
+  ((x) & (GSS_C_CALLING_ERROR_MASK << GSS_C_CALLING_ERROR_OFFSET))
+#define GSS_ROUTINE_ERROR(x) \
+  ((x) & (GSS_C_ROUTINE_ERROR_MASK << GSS_C_ROUTINE_ERROR_OFFSET))
+#define GSS_SUPPLEMENTARY_INFO(x) \
+  ((x) & (GSS_C_SUPPLEMENTARY_MASK << GSS_C_SUPPLEMENTARY_OFFSET))
+#define GSS_ERROR(x) \
+  ((x) & ((GSS_C_CALLING_ERROR_MASK << GSS_C_CALLING_ERROR_OFFSET) | \
+	  (GSS_C_ROUTINE_ERROR_MASK << GSS_C_ROUTINE_ERROR_OFFSET)))
+
+/*
+ * Now the actual status code definitions
+ */
+
+/*
+ * Calling errors:
+ */
+#define GSS_S_CALL_INACCESSIBLE_READ \
+                             (((OM_uint32) 1ul) << GSS_C_CALLING_ERROR_OFFSET)
+#define GSS_S_CALL_INACCESSIBLE_WRITE \
+                             (((OM_uint32) 2ul) << GSS_C_CALLING_ERROR_OFFSET)
+#define GSS_S_CALL_BAD_STRUCTURE \
+                             (((OM_uint32) 3ul) << GSS_C_CALLING_ERROR_OFFSET)
+
+/*
+ * Routine errors:
+ */
+#define GSS_S_BAD_MECH (((OM_uint32) 1ul) << GSS_C_ROUTINE_ERROR_OFFSET)
+#define GSS_S_BAD_NAME (((OM_uint32) 2ul) << GSS_C_ROUTINE_ERROR_OFFSET)
+#define GSS_S_BAD_NAMETYPE (((OM_uint32) 3ul) << GSS_C_ROUTINE_ERROR_OFFSET)
+#define GSS_S_BAD_BINDINGS (((OM_uint32) 4ul) << GSS_C_ROUTINE_ERROR_OFFSET)
+#define GSS_S_BAD_STATUS (((OM_uint32) 5ul) << GSS_C_ROUTINE_ERROR_OFFSET)
+#define GSS_S_BAD_SIG (((OM_uint32) 6ul) << GSS_C_ROUTINE_ERROR_OFFSET)
+#define GSS_S_NO_CRED (((OM_uint32) 7ul) << GSS_C_ROUTINE_ERROR_OFFSET)
+#define GSS_S_NO_CONTEXT (((OM_uint32) 8ul) << GSS_C_ROUTINE_ERROR_OFFSET)
+#define GSS_S_DEFECTIVE_TOKEN (((OM_uint32) 9ul) << GSS_C_ROUTINE_ERROR_OFFSET)
+#define GSS_S_DEFECTIVE_CREDENTIAL \
+     (((OM_uint32) 10ul) << GSS_C_ROUTINE_ERROR_OFFSET)
+#define GSS_S_CREDENTIALS_EXPIRED \
+     (((OM_uint32) 11ul) << GSS_C_ROUTINE_ERROR_OFFSET)
+#define GSS_S_CONTEXT_EXPIRED \
+     (((OM_uint32) 12ul) << GSS_C_ROUTINE_ERROR_OFFSET)
+#define GSS_S_FAILURE (((OM_uint32) 13ul) << GSS_C_ROUTINE_ERROR_OFFSET)
+#define GSS_S_BAD_QOP (((OM_uint32) 14ul) << GSS_C_ROUTINE_ERROR_OFFSET)
+#define GSS_S_UNAUTHORIZED (((OM_uint32) 15ul) << GSS_C_ROUTINE_ERROR_OFFSET)
+#define GSS_S_UNAVAILABLE (((OM_uint32) 16ul) << GSS_C_ROUTINE_ERROR_OFFSET)
+#define GSS_S_DUPLICATE_ELEMENT \
+     (((OM_uint32) 17ul) << GSS_C_ROUTINE_ERROR_OFFSET)
+#define GSS_S_NAME_NOT_MN \
+     (((OM_uint32) 18ul) << GSS_C_ROUTINE_ERROR_OFFSET)
+
+/*
+ * Supplementary info bits:
+ */
+#define GSS_S_CONTINUE_NEEDED (1 << (GSS_C_SUPPLEMENTARY_OFFSET + 0))
+#define GSS_S_DUPLICATE_TOKEN (1 << (GSS_C_SUPPLEMENTARY_OFFSET + 1))
+#define GSS_S_OLD_TOKEN (1 << (GSS_C_SUPPLEMENTARY_OFFSET + 2))
+#define GSS_S_UNSEQ_TOKEN (1 << (GSS_C_SUPPLEMENTARY_OFFSET + 3))
+#define GSS_S_GAP_TOKEN (1 << (GSS_C_SUPPLEMENTARY_OFFSET + 4))
+
+/* XXXX these are not part of the GSSAPI C bindings!  (but should be) */
+
+#define GSS_CALLING_ERROR_FIELD(x) \
+   (((x) >> GSS_C_CALLING_ERROR_OFFSET) & GSS_C_CALLING_ERROR_MASK)
+#define GSS_ROUTINE_ERROR_FIELD(x) \
+   (((x) >> GSS_C_ROUTINE_ERROR_OFFSET) & GSS_C_ROUTINE_ERROR_MASK)
+#define GSS_SUPPLEMENTARY_INFO_FIELD(x) \
+   (((x) >> GSS_C_SUPPLEMENTARY_OFFSET) & GSS_C_SUPPLEMENTARY_MASK)
+
+/* XXXX This is a necessary evil until the spec is fixed */
+#define GSS_S_CRED_UNAVAIL GSS_S_FAILURE
+
+#endif /* __KERNEL__ */
+#endif /* __LINUX_SUNRPC_GSS_ERR_H */
diff --git a/include/linux/sunrpc/msg_prot.h b/include/linux/sunrpc/msg_prot.h
index 7ec27ed3767b..15f115332389 100644
--- a/include/linux/sunrpc/msg_prot.h
+++ b/include/linux/sunrpc/msg_prot.h
@@ -20,7 +20,18 @@ enum rpc_auth_flavors {
 	RPC_AUTH_SHORT = 2,
 	RPC_AUTH_DES   = 3,
 	RPC_AUTH_KRB   = 4,
+	RPC_AUTH_GSS   = 6,
 	RPC_AUTH_MAXFLAVOR = 8,
+	/* pseudoflavors: */
+	RPC_AUTH_GSS_KRB5  = 390003,
+	RPC_AUTH_GSS_KRB5I = 390004,
+	RPC_AUTH_GSS_KRB5P = 390005,
+	RPC_AUTH_GSS_LKEY  = 390006,
+	RPC_AUTH_GSS_LKEYI = 390007,
+	RPC_AUTH_GSS_LKEYP = 390008,
+	RPC_AUTH_GSS_SPKM  = 390009,
+	RPC_AUTH_GSS_SPKMI = 390010,
+	RPC_AUTH_GSS_SPKMP = 390011,
 };
 
 enum rpc_msg_type {
@@ -53,7 +64,10 @@ enum rpc_auth_stat {
 	RPC_AUTH_REJECTEDCRED = 2,
 	RPC_AUTH_BADVERF = 3,
 	RPC_AUTH_REJECTEDVERF = 4,
-	RPC_AUTH_TOOWEAK = 5
+	RPC_AUTH_TOOWEAK = 5,
+	/* RPCSEC_GSS errors */
+	RPCSEC_GSS_CREDPROBLEM = 13,
+	RPCSEC_GSS_CTXPROBLEM = 14
 };
 
 #define RPC_PMAP_PROGRAM	100000
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index dc77707c649d..89fc9268b339 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -12,6 +12,7 @@
 #include <linux/timer.h>
 #include <linux/sunrpc/types.h>
 #include <linux/wait.h>
+#include <linux/sunrpc/xdr.h>
 
 /*
  * This is the actual RPC procedure call info.
@@ -47,6 +48,8 @@ struct rpc_task {
 	__u8			tk_garb_retry,
 				tk_cred_retry,
 				tk_suid_retry;
+	u32			tk_gss_seqno;	/* rpcsec_gss sequence number
+						   used on this request */
 
 	/*
 	 * timeout_fn   to be executed by timer bottom half
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index fd871e87f7f4..92934030ce92 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -80,7 +80,9 @@ struct xdr_buf {
 #define	rpc_autherr_badverf	__constant_htonl(RPC_AUTH_BADVERF)
 #define	rpc_autherr_rejectedverf __constant_htonl(RPC_AUTH_REJECTEDVERF)
 #define	rpc_autherr_tooweak	__constant_htonl(RPC_AUTH_TOOWEAK)
-
+#define	rpcsec_gsserr_credproblem	__constant_htonl(RPCSEC_GSS_CREDPROBLEM)
+#define	rpcsec_gsserr_ctxproblem	__constant_htonl(RPCSEC_GSS_CTXPROBLEM)
+#define	rpc_autherr_oldseqnum	__constant_htonl(101)
 
 /*
  * Miscellaneous XDR helper functions
diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile
index e83bedaf58f7..ae33217a21fa 100644
--- a/net/sunrpc/Makefile
+++ b/net/sunrpc/Makefile
@@ -2,6 +2,8 @@
 # Makefile for Linux kernel SUN RPC
 #
 
+obj-$(CONFIG_SUNRPC_GSS) += auth_gss/
+
 obj-$(CONFIG_SUNRPC) += sunrpc.o
 
 export-objs := sunrpc_syms.o
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index c930689b1982..56c0ee0163e1 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -24,6 +24,13 @@ static struct rpc_authops *	auth_flavors[RPC_AUTH_MAXFLAVOR] = {
 	NULL,			/* others can be loadable modules */
 };
 
+u32
+pseudoflavor_to_flavor(u32 flavor) {
+	if (flavor >= RPC_AUTH_MAXFLAVOR)
+		return RPC_AUTH_GSS;
+	return flavor;
+}
+
 int
 rpcauth_register(struct rpc_authops *ops)
 {
@@ -51,13 +58,14 @@ rpcauth_unregister(struct rpc_authops *ops)
 }
 
 struct rpc_auth *
-rpcauth_create(rpc_authflavor_t flavor, struct rpc_clnt *clnt)
+rpcauth_create(rpc_authflavor_t pseudoflavor, struct rpc_clnt *clnt)
 {
 	struct rpc_authops	*ops;
+	u32			flavor = pseudoflavor_to_flavor(pseudoflavor);
 
 	if (flavor >= RPC_AUTH_MAXFLAVOR || !(ops = auth_flavors[flavor]))
 		return NULL;
-	clnt->cl_auth = ops->create(clnt);
+	clnt->cl_auth = ops->create(clnt, pseudoflavor);
 	return clnt->cl_auth;
 }
 
@@ -218,7 +226,7 @@ retry:
 	rpcauth_destroy_credlist(&free);
 
 	if (!cred) {
-		new = auth->au_ops->crcreate(acred, taskflags);
+		new = auth->au_ops->crcreate(auth, acred, taskflags);
 		if (new) {
 #ifdef RPC_DEBUG
 			new->cr_magic = RPCAUTH_CRED_MAGIC;
diff --git a/net/sunrpc/auth_gss/Makefile b/net/sunrpc/auth_gss/Makefile
new file mode 100644
index 000000000000..a621ec40ac04
--- /dev/null
+++ b/net/sunrpc/auth_gss/Makefile
@@ -0,0 +1,11 @@
+#
+# Makefile for Linux kernel rpcsec_gss implementation
+#
+
+obj-$(CONFIG_SUNRPC_GSS) += auth_rpcgss.o
+
+export-objs := sunrpcgss_syms.o
+
+auth_rpcgss-objs := auth_gss.o gss_pseudoflavors.o gss_generic_token.o \
+	sunrpcgss_syms.o gss_mech_switch.o
+
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
new file mode 100644
index 000000000000..1959a7b21a4e
--- /dev/null
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -0,0 +1,378 @@
+/*
+ * linux/net/sunrpc/auth_gss.c
+ *
+ * RPCSEC_GSS client authentication.
+ * 
+ *  Copyright (c) 2000 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  Dug Song       <dugsong@monkey.org>
+ *  Andy Adamson   <andros@umich.edu>
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *  3. Neither the name of the University nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $Id$
+ */
+
+
+#define __NO_VERSION__
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/sched.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/auth.h>
+#include <linux/sunrpc/auth_gss.h>
+#include <linux/sunrpc/gss_err.h>
+
+static struct rpc_authops authgss_ops;
+
+static struct rpc_credops gss_credops;
+
+#ifdef RPC_DEBUG
+# define RPCDBG_FACILITY	RPCDBG_AUTH
+#endif
+
+#define NFS_NGROUPS	16
+
+#define GSS_CRED_EXPIRE		(60 * HZ)	/* XXX: reasonable? */
+#define GSS_CRED_SLACK		1024		/* XXX: unused */
+#define GSS_VERF_SLACK		48		/* length of a krb5 verifier.*/
+
+/* XXX this define must match the gssd define
+* as it is passed to gssd to signal the use of
+* machine creds should be part of the shared rpc interface */
+
+#define CA_RUN_AS_MACHINE  0x00000200 
+
+/* dump the buffer in `emacs-hexl' style */
+#define isprint(c)      ((c > 0x1f) && (c < 0x7f))
+
+void
+print_hexl(u32 *p, u_int length, u_int offset)
+{
+	u_int i, j, jm;
+	u8 c, *cp;
+	
+	dprintk("RPC: print_hexl: length %d\n",length);
+	dprintk("\n");
+	cp = (u8 *) p;
+	
+	for (i = 0; i < length; i += 0x10) {
+		dprintk("  %04x: ", (u_int)(i + offset));
+		jm = length - i;
+		jm = jm > 16 ? 16 : jm;
+		
+		for (j = 0; j < jm; j++) {
+			if ((j % 2) == 1)
+				dprintk("%02x ", (u_int)cp[i+j]);
+			else
+				dprintk("%02x", (u_int)cp[i+j]);
+		}
+		for (; j < 16; j++) {
+			if ((j % 2) == 1)
+				dprintk("   ");
+			else
+				dprintk("  ");
+		}
+		dprintk(" ");
+		
+		for (j = 0; j < jm; j++) {
+			c = cp[i+j];
+			c = isprint(c) ? c : '.';
+			dprintk("%c", c);
+		}
+		dprintk("\n");
+	}
+}
+
+
+/* 
+ * NOTE: we have the opportunity to use different 
+ * parameters based on the input flavor (which must be a pseudoflavor)
+ */
+static struct rpc_auth *
+gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
+{
+	struct rpc_auth * auth;
+
+	dprintk("RPC: creating GSS authenticator for client %p\n",clnt);
+	if (!try_module_get(THIS_MODULE))
+		return NULL;
+	if (!(auth = kmalloc(sizeof(*auth), GFP_KERNEL)))
+		goto out_dec;
+	auth->au_cslack = GSS_CRED_SLACK >> 2;
+	auth->au_rslack = GSS_VERF_SLACK >> 2;
+	auth->au_expire = GSS_CRED_EXPIRE;
+	auth->au_ops = &authgss_ops;
+	auth->au_flavor = flavor;
+
+	rpcauth_init_credcache(auth);
+
+	return auth;
+out_dec:
+	module_put(THIS_MODULE);
+	return NULL;
+}
+
+static void
+gss_destroy(struct rpc_auth *auth)
+{
+	dprintk("RPC: destroying GSS authenticator %p flavor %d\n",
+		auth, auth->au_flavor);
+
+	rpcauth_free_credcache(auth);
+
+	kfree(auth);
+	module_put(THIS_MODULE);
+}
+
+/* gss_destroy_cred (and gss_destroy_ctx) are used to clean up after failure
+ * to create a new cred or context, so they check that things have been
+ * allocated before freeing them. */
+void
+gss_destroy_ctx(struct gss_cl_ctx *ctx)
+{
+
+	dprintk("RPC: gss_destroy_ctx\n");
+
+	if (ctx->gc_gss_ctx)
+		gss_delete_sec_context(&ctx->gc_gss_ctx);
+
+	if (ctx->gc_wire_ctx.len > 0) {
+		kfree(ctx->gc_wire_ctx.data);
+		ctx->gc_wire_ctx.len = 0;
+	}
+
+	kfree(ctx);
+
+}
+
+static void
+gss_destroy_cred(struct rpc_cred *rc)
+{
+	struct gss_cred *cred = (struct gss_cred *)rc;
+
+	dprintk("RPC: gss_destroy_cred \n");
+
+	if (cred->gc_ctx)
+		gss_destroy_ctx(cred->gc_ctx);
+	kfree(cred);
+}
+
+static struct rpc_cred *
+gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int taskflags)
+{
+	struct gss_cred	*cred = NULL;
+
+	dprintk("RPC: gss_create_cred for uid %d, flavor %d\n",
+		acred->uid, auth->au_flavor);
+
+	if (!(cred = kmalloc(sizeof(*cred), GFP_KERNEL)))
+		goto out_err;
+
+	memset(cred, 0, sizeof(*cred));
+	atomic_set(&cred->gc_count, 0);
+	cred->gc_uid = acred->uid;
+	/*
+	 * Note: in order to force a call to call_refresh(), we deliberately
+	 * fail to flag the credential as RPCAUTH_CRED_UPTODATE.
+	 */
+	cred->gc_flags = 0;
+	cred->gc_base.cr_ops = &gss_credops;
+	cred->gc_flavor = auth->au_flavor;
+
+	return (struct rpc_cred *) cred;
+
+out_err:
+	dprintk("RPC: gss_create_cred failed\n");
+	if (cred) gss_destroy_cred((struct rpc_cred *)cred);
+	return NULL;
+}
+
+static int
+gss_match(struct auth_cred *acred, struct rpc_cred *rc, int taskflags)
+{
+	return (rc->cr_uid == acred->uid);
+}
+
+/*
+* Marshal credentials.
+* Maybe we should keep a cached credential for performance reasons.
+*/
+static u32 *
+gss_marshal(struct rpc_task *task, u32 *p, int ruid)
+{
+	struct gss_cred	*cred = (struct gss_cred *) task->tk_msg.rpc_cred;
+	struct gss_cl_ctx	*ctx = cred->gc_ctx;
+	u32		*cred_len;
+	struct rpc_rqst *req = task->tk_rqstp;
+	struct rpc_clnt *clnt = task->tk_client;
+	struct rpc_xprt *xprt = clnt->cl_xprt;
+	u32             *verfbase = req->rq_svec[0].iov_base; 
+	u32             maj_stat = 0;
+	struct xdr_netobj bufin,bufout;
+	u32		service;
+
+	dprintk("RPC: gss_marshal\n");
+
+	/* We compute the checksum for the verifier over the xdr-encoded bytes
+	 * starting with the xid (which verfbase points to) and ending at
+	 * the end of the credential. */
+	if (xprt->stream)
+		verfbase++; /* See clnt.c:call_header() */
+
+	*p++ = htonl(RPC_AUTH_GSS);
+	cred_len = p++;
+
+	service = gss_pseudoflavor_to_service(cred->gc_flavor);
+	if (service == 0) {
+		dprintk("Bad pseudoflavor %d in gss_marshal\n",
+			cred->gc_flavor);
+		return NULL;
+	}
+	spin_lock(&ctx->gc_seq_lock);
+	task->tk_gss_seqno = ctx->gc_seq++;
+	spin_unlock(&ctx->gc_seq_lock);
+
+	*p++ = htonl((u32) RPC_GSS_VERSION);
+	*p++ = htonl((u32) ctx->gc_proc);
+	*p++ = htonl((u32) task->tk_gss_seqno);
+	*p++ = htonl((u32) service);
+	p = xdr_encode_netobj(p, &ctx->gc_wire_ctx);
+	*cred_len = htonl((p - (cred_len + 1)) << 2);
+
+	/* Marshal verifier. */
+	bufin.data = (u8 *)verfbase;
+	bufin.len = (p - verfbase) << 2;
+
+	/* set verifier flavor*/
+	*p++ = htonl(RPC_AUTH_GSS);
+
+	maj_stat = gss_get_mic(ctx->gc_gss_ctx,
+			       GSS_C_QOP_DEFAULT, 
+			       &bufin, &bufout);
+	if(maj_stat != 0){
+		printk("gss_marshal: gss_get_mic FAILED (%d)\n",
+		       maj_stat);
+		return(NULL);
+	}
+	p = xdr_encode_netobj(p, &bufout);
+	return p;
+}
+
+/*
+* Refresh credentials. XXX - finish
+*/
+static int
+gss_refresh(struct rpc_task *task)
+{
+	/* Insert upcall here ! */
+	task->tk_msg.rpc_cred->cr_flags |= RPCAUTH_CRED_UPTODATE;
+	return task->tk_status = -EACCES;
+}
+
+static u32 *
+gss_validate(struct rpc_task *task, u32 *p)
+{
+	struct gss_cred *cred = (struct gss_cred *)task->tk_msg.rpc_cred; 
+	struct gss_cl_ctx	*ctx = cred->gc_ctx;
+	u32		seq, qop_state;
+	struct xdr_netobj bufin;
+	struct xdr_netobj bufout;
+	u32		flav,len;
+	int             code = 0;
+
+	dprintk("RPC: gss_validate\n");
+
+	flav = ntohl(*p++);
+	if ((len = ntohl(*p++)) > RPC_MAX_AUTH_SIZE) {
+                printk("RPC: giant verf size: %ld\n", (unsigned long) len);
+                return NULL;
+	}
+	dprintk("RPC: gss_validate: verifier flavor %d, len %d\n", flav, len);
+
+	if (flav != RPC_AUTH_GSS) {
+		printk("RPC: bad verf flavor: %ld\n", (unsigned long)flav);
+		return NULL;
+	}
+	seq = htonl(task->tk_gss_seqno);
+	bufin.data = (u8 *) &seq;
+	bufin.len = sizeof(seq);
+	bufout.data = (u8 *) p;
+	bufout.len = len;
+
+	if ((code = gss_verify_mic(ctx->gc_gss_ctx, 
+				   &bufin, &bufout, &qop_state) < 0))
+		return NULL;
+	task->tk_auth->au_rslack = XDR_QUADLEN(len) + 2;
+	dprintk("RPC: GSS gss_validate: gss_verify_mic succeeded.\n");
+	return p + XDR_QUADLEN(len);
+}
+
+static struct rpc_authops authgss_ops = {
+	.au_flavor	= RPC_AUTH_GSS,
+#ifdef RPC_DEBUG
+	.au_name	= "RPCSEC_GSS",
+#endif
+	.create		= gss_create,
+	.destroy	= gss_destroy,
+	.crcreate	= gss_create_cred
+};
+
+static struct rpc_credops gss_credops = {
+	.crdestroy	= gss_destroy_cred,
+	.crmatch	= gss_match,
+	.crmarshal	= gss_marshal,
+	.crrefresh	= gss_refresh,
+	.crvalidate	= gss_validate,
+};
+
+extern void gss_svc_ctx_init(void);
+
+/*
+ * Initialize RPCSEC_GSS module
+ */
+static int __init init_rpcsec_gss(void)
+{
+	int err = 0;
+
+	err = rpcauth_register(&authgss_ops);
+	return err;
+}
+
+static void __exit exit_rpcsec_gss(void)
+{
+	gss_mech_unregister_all();
+	rpcauth_unregister(&authgss_ops);
+}
+
+MODULE_LICENSE("GPL");
+module_init(init_rpcsec_gss)
+module_exit(exit_rpcsec_gss)
diff --git a/net/sunrpc/auth_gss/gss_generic_token.c b/net/sunrpc/auth_gss/gss_generic_token.c
new file mode 100644
index 000000000000..9a948de035de
--- /dev/null
+++ b/net/sunrpc/auth_gss/gss_generic_token.c
@@ -0,0 +1,269 @@
+/*
+ *  linux/net/sunrpc/gss_generic_token.c
+ *
+ *  Adapted from MIT Kerberos 5-1.2.1 lib/gssapi/generic/util_token.c
+ *
+ *  Copyright (c) 2000 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  Andy Adamson   <andros@umich.edu>
+ */
+
+/*
+ * Copyright 1993 by OpenVision Technologies, Inc.
+ * 
+ * Permission to use, copy, modify, distribute, and sell this software
+ * and its documentation for any purpose is hereby granted without fee,
+ * provided that the above copyright notice appears in all copies and
+ * that both that copyright notice and this permission notice appear in
+ * supporting documentation, and that the name of OpenVision not be used
+ * in advertising or publicity pertaining to distribution of the software
+ * without specific, written prior permission. OpenVision makes no
+ * representations about the suitability of this software for any
+ * purpose.  It is provided "as is" without express or implied warranty.
+ * 
+ * OPENVISION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL OPENVISION BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
+ * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
+ * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/sunrpc/sched.h>
+#include <linux/sunrpc/gss_asn1.h>
+
+
+#ifdef RPC_DEBUG
+# define RPCDBG_FACILITY        RPCDBG_AUTH
+#endif
+
+
+/* TWRITE_STR from gssapiP_generic.h */
+#define TWRITE_STR(ptr, str, len) \
+	memcpy((ptr), (char *) (str), (len)); \
+	(ptr) += (len);
+
+/* XXXX this code currently makes the assumption that a mech oid will
+   never be longer than 127 bytes.  This assumption is not inherent in
+   the interfaces, so the code can be fixed if the OSI namespace
+   balloons unexpectedly. */
+
+/* Each token looks like this:
+
+0x60				tag for APPLICATION 0, SEQUENCE
+					(constructed, definite-length)
+	<length>		possible multiple bytes, need to parse/generate
+	0x06			tag for OBJECT IDENTIFIER
+		<moid_length>	compile-time constant string (assume 1 byte)
+		<moid_bytes>	compile-time constant string
+	<inner_bytes>		the ANY containing the application token
+					bytes 0,1 are the token type
+					bytes 2,n are the token data
+
+For the purposes of this abstraction, the token "header" consists of
+the sequence tag and length octets, the mech OID DER encoding, and the
+first two inner bytes, which indicate the token type.  The token
+"body" consists of everything else.
+
+*/
+
+static int
+der_length_size( int length)
+{
+	if (length < (1<<7))
+		return(1);
+	else if (length < (1<<8))
+		return(2);
+#if (SIZEOF_INT == 2)
+	else
+		return(3);
+#else
+	else if (length < (1<<16))
+		return(3);
+	else if (length < (1<<24))
+		return(4);
+	else
+		return(5);
+#endif
+}
+
+static void
+der_write_length(unsigned char **buf, int length)
+{
+	if (length < (1<<7)) {
+		*(*buf)++ = (unsigned char) length;
+	} else {
+		*(*buf)++ = (unsigned char) (der_length_size(length)+127);
+#if (SIZEOF_INT > 2)
+		if (length >= (1<<24))
+			*(*buf)++ = (unsigned char) (length>>24);
+		if (length >= (1<<16))
+			*(*buf)++ = (unsigned char) ((length>>16)&0xff);
+#endif
+		if (length >= (1<<8))
+			*(*buf)++ = (unsigned char) ((length>>8)&0xff);
+		*(*buf)++ = (unsigned char) (length&0xff);
+	}
+}
+
+/* returns decoded length, or < 0 on failure.  Advances buf and
+   decrements bufsize */
+
+static int
+der_read_length(unsigned char **buf, int *bufsize)
+{
+	unsigned char sf;
+	int ret;
+
+	if (*bufsize < 1)
+		return(-1);
+	sf = *(*buf)++;
+	(*bufsize)--;
+	if (sf & 0x80) {
+		if ((sf &= 0x7f) > ((*bufsize)-1))
+			return(-1);
+		if (sf > SIZEOF_INT)
+			return (-1);
+		ret = 0;
+		for (; sf; sf--) {
+			ret = (ret<<8) + (*(*buf)++);
+			(*bufsize)--;
+		}
+	} else {
+		ret = sf;
+	}
+
+	return(ret);
+}
+
+/* returns the length of a token, given the mech oid and the body size */
+
+int
+g_token_size(struct xdr_netobj *mech, unsigned int body_size)
+{
+	/* set body_size to sequence contents size */
+	body_size += 4 + (int) mech->len;         /* NEED overflow check */
+	return(1 + der_length_size(body_size) + body_size);
+}
+
+/* fills in a buffer with the token header.  The buffer is assumed to
+   be the right size.  buf is advanced past the token header */
+
+void
+g_make_token_header(struct xdr_netobj *mech, int body_size, unsigned char **buf,
+		int tok_type)
+{
+	*(*buf)++ = 0x60;
+	der_write_length(buf, 4 + mech->len + body_size);
+	*(*buf)++ = 0x06;
+	*(*buf)++ = (unsigned char) mech->len;
+	TWRITE_STR(*buf, mech->data, ((int) mech->len));
+	*(*buf)++ = (unsigned char) ((tok_type>>8)&0xff);
+	*(*buf)++ = (unsigned char) (tok_type&0xff);
+}
+
+/*
+ * Given a buffer containing a token, reads and verifies the token,
+ * leaving buf advanced past the token header, and setting body_size
+ * to the number of remaining bytes.  Returns 0 on success,
+ * G_BAD_TOK_HEADER for a variety of errors, and G_WRONG_MECH if the
+ * mechanism in the token does not match the mech argument.  buf and
+ * *body_size are left unmodified on error.
+ */
+u32
+g_verify_token_header(struct xdr_netobj *mech, int *body_size,
+		      unsigned char **buf_in, int tok_type, int toksize)
+{
+	unsigned char *buf = *buf_in;
+	int seqsize;
+	struct xdr_netobj toid;
+	int ret = 0;
+
+	if ((toksize-=1) < 0)
+		return(G_BAD_TOK_HEADER);
+	if (*buf++ != 0x60)
+		return(G_BAD_TOK_HEADER);
+
+	if ((seqsize = der_read_length(&buf, &toksize)) < 0)
+		return(G_BAD_TOK_HEADER);
+
+	if (seqsize != toksize)
+		return(G_BAD_TOK_HEADER);
+
+	if ((toksize-=1) < 0)
+		return(G_BAD_TOK_HEADER);
+	if (*buf++ != 0x06)
+		return(G_BAD_TOK_HEADER);
+ 
+	if ((toksize-=1) < 0)
+		return(G_BAD_TOK_HEADER);
+	toid.len = *buf++;
+
+	if ((toksize-=toid.len) < 0)
+		return(G_BAD_TOK_HEADER);
+	toid.data = buf;
+	buf+=toid.len;
+
+	if (! g_OID_equal(&toid, mech)) 
+		ret = G_WRONG_MECH;
+ 
+   /* G_WRONG_MECH is not returned immediately because it's more important
+      to return G_BAD_TOK_HEADER if the token header is in fact bad */
+
+	if ((toksize-=2) < 0)
+		return(G_BAD_TOK_HEADER);
+
+	if (ret)
+		return(ret);
+
+	if ((*buf++ != ((tok_type>>8)&0xff)) || (*buf++ != (tok_type&0xff))) 
+		return(G_WRONG_TOKID);
+
+	if (!ret) {
+		*buf_in = buf;
+		*body_size = toksize;
+	}
+
+	return(ret);
+}
+
+/* Given a buffer containing a token, returns a copy of the mech oid in
+ * the parameter mech. */
+u32
+g_get_mech_oid(struct xdr_netobj *mech, struct xdr_netobj * in_buf)
+{
+	unsigned char *buf = in_buf->data;
+	int len = in_buf->len;
+	int ret=0;
+	int seqsize;
+
+	if ((len-=1) < 0)
+		return(G_BAD_TOK_HEADER);
+	if (*buf++ != 0x60)
+		return(G_BAD_TOK_HEADER);
+
+	if ((seqsize = der_read_length(&buf, &len)) < 0)
+		return(G_BAD_TOK_HEADER);
+
+	if ((len-=1) < 0)
+		return(G_BAD_TOK_HEADER);
+	if (*buf++ != 0x06)
+		return(G_BAD_TOK_HEADER);
+
+	if ((len-=1) < 0)
+		return(G_BAD_TOK_HEADER);
+	mech->len = *buf++;
+
+	if ((len-=mech->len) < 0)
+		return(G_BAD_TOK_HEADER);
+	if (!(mech->data = kmalloc(mech->len, GFP_KERNEL))) 
+		return(G_BUFFER_ALLOC);
+	memcpy(mech->data, buf, mech->len);
+
+	return ret;
+}
diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c
new file mode 100644
index 000000000000..429ab78a2fea
--- /dev/null
+++ b/net/sunrpc/auth_gss/gss_mech_switch.c
@@ -0,0 +1,243 @@
+/*
+ *  linux/net/sunrpc/gss_mech_switch.c
+ *
+ *  Copyright (c) 2001 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  J. Bruce Fields   <bfields@umich.edu>
+ *
+ *  Redistribution and use in source and binary forms, with or without 
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the 
+ *     documentation and/or other materials provided with the distribution.
+ *  3. Neither the name of the University nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/socket.h>
+#include <linux/sunrpc/msg_prot.h>
+#include <linux/sunrpc/gss_asn1.h>
+#include <linux/sunrpc/auth_gss.h>
+#include <linux/sunrpc/gss_err.h>
+#include <linux/sunrpc/sched.h>
+#include <linux/sunrpc/gss_api.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/name_lookup.h>
+
+#ifdef RPC_DEBUG
+# define RPCDBG_FACILITY        RPCDBG_AUTH
+#endif
+
+static LIST_HEAD(registered_mechs);
+static spinlock_t registered_mechs_lock = SPIN_LOCK_UNLOCKED;
+
+/* Reference counting: The reference count includes the reference in the
+ * global registered_mechs list.  That reference will never diseappear
+ * (so the reference count will never go below 1) until after the mech
+ * is removed from the list.  Nothing can be removed from the list without
+ * first getting the registered_mechs_lock, so a gss_api_mech won't diseappear
+ * from underneath us while we hold the registered_mech_lock.  */
+
+int
+gss_mech_register(struct xdr_netobj * mech_type, struct gss_api_ops * ops)
+{
+	struct gss_api_mech *gm;
+
+	if (!(gm = kmalloc(sizeof(*gm), GFP_KERNEL))) {
+		printk("Failed to allocate memory in gss_mech_register");
+		return -1;
+	}
+	gm->gm_oid.len = mech_type->len;
+	if (!(gm->gm_oid.data = kmalloc(mech_type->len, GFP_KERNEL))) {
+		printk("Failed to allocate memory in gss_mech_register");
+		return -1;
+	}
+	memcpy(gm->gm_oid.data, mech_type->data, mech_type->len);
+	/* We're counting the reference in the registered_mechs list: */
+	atomic_set(&gm->gm_count, 1);
+	gm->gm_ops = ops;
+	
+	spin_lock(&registered_mechs_lock);
+	list_add(&gm->gm_list, &registered_mechs);
+	spin_unlock(&registered_mechs_lock);
+	dprintk("RPC: gss_mech_register: registered mechanism with oid:\n");
+	print_hexl((u32 *)mech_type->data, mech_type->len, 0);
+	return 0;
+}
+
+/* The following must be called with spinlock held: */
+int
+do_gss_mech_unregister(struct gss_api_mech *gm)
+{
+
+	list_del(&gm->gm_list);
+
+	dprintk("RPC: unregistered mechanism with oid:\n");
+	print_hexl((u32 *)gm->gm_oid.data, gm->gm_oid.len, 0);
+	if (!gss_mech_put(gm)) {
+		dprintk("RPC: We just unregistered a gss_mechanism which"
+				" someone is still using.\n");
+		return -1;
+	} else {
+		return 0;
+	}
+}
+
+int
+gss_mech_unregister(struct gss_api_mech *gm)
+{
+	int status;
+
+	spin_lock(&registered_mechs_lock);
+	status = do_gss_mech_unregister(gm);
+	spin_unlock(&registered_mechs_lock);
+	return status;
+}
+
+int
+gss_mech_unregister_all(void)
+{
+	struct list_head	*pos;
+	struct gss_api_mech	*gm;
+	int			status = 0;
+
+	spin_lock(&registered_mechs_lock);
+	while (!list_empty(&registered_mechs)) {
+		pos = registered_mechs.next;
+		gm = list_entry(pos, struct gss_api_mech, gm_list);
+		if (do_gss_mech_unregister(gm))
+			status = -1;
+	}
+	spin_unlock(&registered_mechs_lock);
+	return status;
+}
+
+struct gss_api_mech *
+gss_mech_get(struct gss_api_mech *gm)
+{
+	atomic_inc(&gm->gm_count);
+	return gm;
+}
+
+struct gss_api_mech *
+gss_mech_get_by_OID(struct xdr_netobj *mech_type)
+{
+	struct gss_api_mech 	*pos, *gm = NULL;
+
+	dprintk("RPC: gss_mech_get_by_OID searching for mechanism with OID:\n");
+	print_hexl((u32 *)mech_type->data, mech_type->len, 0);
+	spin_lock(&registered_mechs_lock);
+	list_for_each_entry(pos, &registered_mechs, gm_list) {
+		if ((pos->gm_oid.len == mech_type->len)
+			&& !memcmp(pos->gm_oid.data, mech_type->data,
+							mech_type->len)) {
+			gm = gss_mech_get(pos);
+			break;
+		}
+	}
+	spin_unlock(&registered_mechs_lock);
+	dprintk("RPC: gss_mech_get_by_OID %s it\n", gm ? "found" : "didn't find");
+	return gm;
+}
+
+int
+gss_mech_put(struct gss_api_mech * gm)
+{
+	if (atomic_dec_and_test(&gm->gm_count)) {
+		if (gm->gm_oid.len >0)
+			kfree(gm->gm_oid.data);
+		kfree(gm);
+		return 1;
+	} else {
+		return 0;
+	}
+}
+
+/* The mech could probably be determined from the token instead, but it's just
+ * as easy for now to pass it in. */
+u32
+gss_import_sec_context(struct xdr_netobj	*input_token,
+		       struct gss_api_mech	*mech,
+		       struct gss_ctx		**ctx_id)
+{
+	if (!(*ctx_id = kmalloc(sizeof(**ctx_id), GFP_KERNEL)))
+		return GSS_S_FAILURE;
+	memset(*ctx_id, 0, sizeof(**ctx_id));
+	(*ctx_id)->mech_type = gss_mech_get(mech);
+
+	return mech->gm_ops
+		->gss_import_sec_context(input_token, *ctx_id);
+}
+
+/* gss_verify_mic: hash messages_buffer and return gss verify token. */
+
+u32
+gss_get_mic(struct gss_ctx	*context_handle,
+	    u32			qop,
+	    struct xdr_netobj	*message_buffer,
+	    struct xdr_netobj	*message_token)
+{
+	 return context_handle->mech_type->gm_ops
+		->gss_get_mic(context_handle,
+			      qop,
+			      message_buffer,
+			      message_token);
+}
+
+/* gss_verify_mic: hash messages_buffer and return gss verify token. */
+
+u32
+gss_verify_mic(struct gss_ctx		*context_handle,
+		struct xdr_netobj	*signbuf,
+		struct xdr_netobj	*checksum,
+		u32			*qstate)
+{
+	return context_handle->mech_type->gm_ops
+		->gss_verify_mic(context_handle,
+				 signbuf,
+				 checksum,
+				 qstate);
+}
+
+/* gss_delete_sec_context: free all resources associated with context_handle.
+ * Note this differs from the RFC 2744-specified prototype in that we don't
+ * bother returning an output token, since it would never be used anyway. */
+
+u32
+gss_delete_sec_context(struct gss_ctx	**context_handle)
+{
+	dprintk("gss_delete_sec_context deleting %p\n",*context_handle);
+
+	if (!*context_handle)
+		return(GSS_S_NO_CONTEXT);
+	if ((*context_handle)->internal_ctx_id != 0)
+		(*context_handle)->mech_type->gm_ops
+			->gss_delete_sec_context((*context_handle)
+							->internal_ctx_id);
+	if ((*context_handle)->mech_type)
+		gss_mech_put((*context_handle)->mech_type);
+	kfree(*context_handle);
+	*context_handle=NULL;
+	return GSS_S_COMPLETE;
+}
diff --git a/net/sunrpc/auth_gss/gss_pseudoflavors.c b/net/sunrpc/auth_gss/gss_pseudoflavors.c
new file mode 100644
index 000000000000..9fa384588473
--- /dev/null
+++ b/net/sunrpc/auth_gss/gss_pseudoflavors.c
@@ -0,0 +1,235 @@
+/*
+ *  linux/net/sunrpc/gss_union.c
+ *
+ *  Adapted from MIT Kerberos 5-1.2.1 lib/gssapi/generic code
+ *
+ *  Copyright (c) 2001 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  Andy Adamson   <andros@umich.edu>
+ *
+ */
+
+/*
+ * Copyright 1993 by OpenVision Technologies, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software
+ * and its documentation for any purpose is hereby granted without fee,
+ * provided that the above copyright notice appears in all copies and
+ * that both that copyright notice and this permission notice appear in
+ * supporting documentation, and that the name of OpenVision not be used
+ * in advertising or publicity pertaining to distribution of the software
+ * without specific, written prior permission. OpenVision makes no
+ * representations about the suitability of this software for any
+ * purpose.  It is provided "as is" without express or implied warranty.
+ *
+ * OPENVISION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL OPENVISION BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
+ * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
+ * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */ 
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/socket.h>
+#include <linux/sunrpc/gss_asn1.h>
+#include <linux/sunrpc/auth_gss.h>
+
+#ifdef RPC_DEBUG
+# define RPCDBG_FACILITY        RPCDBG_AUTH
+#endif
+
+static LIST_HEAD(registered_triples);
+static spinlock_t registered_triples_lock = SPIN_LOCK_UNLOCKED;
+
+/* The following must be called with spinlock held: */
+static struct sup_sec_triple *
+do_lookup_triple_by_pseudoflavor(u32 pseudoflavor)
+{
+	struct sup_sec_triple *pos, *triple = NULL;
+
+	list_for_each_entry(pos, &registered_triples, triples) {
+		if (pos->pseudoflavor == pseudoflavor) {
+			triple = pos;
+			break;
+		}
+	}
+	return triple;
+}
+
+/* XXX Need to think about reference counting of triples and of mechs.
+ * Currently we do no reference counting of triples, and I think that's
+ * probably OK given the reference counting on mechs, but there's probably
+ * a better way to do all this. */
+
+int
+gss_register_triple(u32 pseudoflavor, struct gss_api_mech *mech,
+			  u32 qop, u32 service)
+{
+	struct sup_sec_triple *triple;
+
+	if (!(triple = kmalloc(sizeof(*triple), GFP_KERNEL))) {
+		printk("Alloc failed in gss_register_triple");
+		goto err;
+	}
+	triple->pseudoflavor = pseudoflavor;
+	triple->mech = gss_mech_get_by_OID(&mech->gm_oid);
+	triple->qop = qop;
+	triple->service = service;
+
+	spin_lock(&registered_triples_lock);
+	if (do_lookup_triple_by_pseudoflavor(pseudoflavor)) {
+		printk("Registered pseudoflavor %d again\n", pseudoflavor);
+		goto err_unlock;
+	}
+	list_add(&triple->triples, &registered_triples);
+	spin_unlock(&registered_triples_lock);
+	dprintk("RPC: registered pseudoflavor %d\n", pseudoflavor);
+
+	return 0;
+
+err_unlock:
+	spin_unlock(&registered_triples_lock);
+err:
+	return -1;
+}
+
+int
+gss_unregister_triple(u32 pseudoflavor)
+{
+	struct sup_sec_triple *triple;
+
+	spin_lock(&registered_triples_lock);
+	if (!(triple = do_lookup_triple_by_pseudoflavor(pseudoflavor))) {
+		spin_unlock(&registered_triples_lock);
+		printk("Can't unregister unregistered pseudoflavor %d\n",
+		       pseudoflavor);
+		return -1;
+	}
+	list_del(&triple->triples);
+	spin_unlock(&registered_triples_lock);
+	gss_mech_put(triple->mech);
+	kfree(triple);
+	return 0;
+
+}
+
+void
+print_sec_triple(struct xdr_netobj *oid,u32 qop,u32 service)
+{
+	dprintk("RPC: print_sec_triple:\n");
+	dprintk("                     oid_len %d\n  oid :\n",oid->len);
+	print_hexl((u32 *)oid->data,oid->len,0);
+	dprintk("                     qop %d\n",qop);
+	dprintk("                     service %d\n",service);
+}
+
+/* Function: gss_get_cmp_triples
+ *
+ * Description: search sec_triples for a matching security triple
+ * return pseudoflavor if match, else 0
+ * (Note that 0 is a valid pseudoflavor, but not for any gss pseudoflavor
+ * (0 means auth_null), so this shouldn't cause confusion.)
+ */
+u32
+gss_cmp_triples(u32 oid_len, char *oid_data, u32 qop, u32 service)
+{
+	struct sup_sec_triple *triple;
+	u32 pseudoflavor = 0;
+	struct xdr_netobj oid;
+
+	oid.len = oid_len;
+	oid.data = oid_data;
+
+	dprintk("RPC: gss_cmp_triples \n");
+	print_sec_triple(&oid,qop,service);
+
+	spin_lock(&registered_triples_lock);
+	list_for_each_entry(triple, &registered_triples, triples) {
+		if((g_OID_equal(&oid, &triple->mech->gm_oid))
+		    && (qop == triple->qop)
+		    && (service == triple->service)) {
+			pseudoflavor = triple->pseudoflavor;
+			break;
+		}
+	}
+	spin_unlock(&registered_triples_lock);
+	dprintk("RPC: gss_cmp_triples return %d\n", pseudoflavor);
+	return pseudoflavor;
+}
+
+u32
+gss_get_pseudoflavor(struct gss_ctx *ctx, u32 qop, u32 service)
+{
+	return gss_cmp_triples(ctx->mech_type->gm_oid.len,
+			       ctx->mech_type->gm_oid.data,
+			       qop, service);
+}
+
+/* Returns nonzero iff the given pseudoflavor is in the supported list.
+ * (Note that without incrementing a reference count or anything, this
+ * doesn't give any guarantees.) */
+int
+gss_pseudoflavor_supported(u32 pseudoflavor)
+{
+	struct sup_sec_triple *triple;
+
+	spin_lock(&registered_triples_lock);
+	triple = do_lookup_triple_by_pseudoflavor(pseudoflavor);
+	spin_unlock(&registered_triples_lock);
+	return (triple ? 1 : 0);
+}
+
+u32
+gss_pseudoflavor_to_service(u32 pseudoflavor)
+{
+	struct sup_sec_triple *triple;
+
+	spin_lock(&registered_triples_lock);
+	triple = do_lookup_triple_by_pseudoflavor(pseudoflavor);
+	spin_unlock(&registered_triples_lock);
+	if (!triple) {
+		dprintk("RPC: gss_pseudoflavor_to_service called with"
+			" unsupported pseudoflavor %d\n", pseudoflavor);
+		return 0;
+	}
+	return triple->service;
+}
+
+struct gss_api_mech *
+gss_pseudoflavor_to_mech(u32 pseudoflavor) {
+	struct sup_sec_triple *triple;
+	struct gss_api_mech *mech = NULL;
+
+	spin_lock(&registered_triples_lock);
+	triple = do_lookup_triple_by_pseudoflavor(pseudoflavor);
+	spin_unlock(&registered_triples_lock);
+	if (triple)
+		mech = gss_mech_get(triple->mech);
+	else
+		dprintk("RPC: gss_pseudoflavor_to_mech called with"
+			" unsupported pseudoflavor %d\n", pseudoflavor);
+	return mech;
+}
+
+int
+gss_pseudoflavor_to_mechOID(u32 pseudoflavor, struct xdr_netobj * oid)
+{
+	struct gss_api_mech *mech;
+
+	mech = gss_pseudoflavor_to_mech(pseudoflavor);
+	if (!mech)  {
+		dprintk("RPC: gss_pseudoflavor_to_mechOID called with"
+			" unsupported pseudoflavor %d\n", pseudoflavor);
+		        return -1;
+	}
+	oid->len = mech->gm_oid.len;
+	if (!(oid->data = kmalloc(oid->len, GFP_KERNEL)))
+		return -1;
+	memcpy(oid->data, mech->gm_oid.data, oid->len);
+	gss_mech_put(mech);
+	return 0;
+}
diff --git a/net/sunrpc/auth_gss/sunrpcgss_syms.c b/net/sunrpc/auth_gss/sunrpcgss_syms.c
new file mode 100644
index 000000000000..307da4aad27e
--- /dev/null
+++ b/net/sunrpc/auth_gss/sunrpcgss_syms.c
@@ -0,0 +1,34 @@
+#define __NO_VERSION__
+#include <linux/config.h>
+#include <linux/module.h>
+
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/sched.h>
+#include <linux/uio.h>
+#include <linux/unistd.h>
+
+#include <linux/sunrpc/auth_gss.h>
+#include <linux/sunrpc/gss_asn1.h>
+
+/* sec_triples: */
+EXPORT_SYMBOL(gss_register_triple);
+EXPORT_SYMBOL(gss_unregister_triple);
+EXPORT_SYMBOL(gss_cmp_triples);
+EXPORT_SYMBOL(gss_pseudoflavor_to_mechOID);
+EXPORT_SYMBOL(gss_pseudoflavor_supported);
+EXPORT_SYMBOL(gss_pseudoflavor_to_service);
+
+/* registering gss mechanisms to the mech switching code: */
+EXPORT_SYMBOL(gss_mech_register);
+EXPORT_SYMBOL(gss_mech_get);
+EXPORT_SYMBOL(gss_mech_get_by_OID);
+EXPORT_SYMBOL(gss_mech_put);
+
+/* generic functionality in gss code: */
+EXPORT_SYMBOL(g_make_token_header);
+EXPORT_SYMBOL(g_verify_token_header);
+EXPORT_SYMBOL(g_token_size);
+
+/* debug */
+EXPORT_SYMBOL(print_hexl);
diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c
index c716c7415f30..6d1a0fa8e0d2 100644
--- a/net/sunrpc/auth_null.c
+++ b/net/sunrpc/auth_null.c
@@ -20,7 +20,7 @@
 static struct rpc_credops	null_credops;
 
 static struct rpc_auth *
-nul_create(struct rpc_clnt *clnt)
+nul_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
 {
 	struct rpc_auth	*auth;
 
@@ -48,7 +48,7 @@ nul_destroy(struct rpc_auth *auth)
  * Create NULL creds for current process
  */
 static struct rpc_cred *
-nul_create_cred(struct auth_cred *acred, int flags)
+nul_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
 {
 	struct rpc_cred	*cred;
 
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
index 96ce30c1269a..e4a15b94f7f6 100644
--- a/net/sunrpc/auth_unix.c
+++ b/net/sunrpc/auth_unix.c
@@ -38,7 +38,7 @@ struct unx_cred {
 static struct rpc_credops	unix_credops;
 
 static struct rpc_auth *
-unx_create(struct rpc_clnt *clnt)
+unx_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
 {
 	struct rpc_auth	*auth;
 
@@ -64,7 +64,7 @@ unx_destroy(struct rpc_auth *auth)
 }
 
 static struct rpc_cred *
-unx_create_cred(struct auth_cred *acred, int flags)
+unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
 {
 	struct unx_cred	*cred;
 	int		i;
@@ -208,7 +208,7 @@ unx_validate(struct rpc_task *task, u32 *p)
 	}
 
 	size = ntohl(*p++);
-	if (size > 400) {
+	if (size > RPC_MAX_AUTH_SIZE) {
 		printk("RPC: giant verf size: %u\n", size);
 		return NULL;
 	}
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index c75a0a33974d..54c2eb392856 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -35,7 +35,7 @@
 #include <linux/nfs.h>
 
 
-#define RPC_SLACK_SPACE		512	/* total overkill */
+#define RPC_SLACK_SPACE		(1024)	/* total overkill */
 
 #ifdef RPC_DEBUG
 # define RPCDBG_FACILITY	RPCDBG_CALL
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index b499ab61af09..32d08c06d300 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -68,6 +68,8 @@ EXPORT_SYMBOL(xprt_set_timeout);
 EXPORT_SYMBOL(rpcauth_register);
 EXPORT_SYMBOL(rpcauth_unregister);
 EXPORT_SYMBOL(rpcauth_lookupcred);
+EXPORT_SYMBOL(rpcauth_free_credcache);
+EXPORT_SYMBOL(rpcauth_init_credcache);
 EXPORT_SYMBOL(put_rpccred);
 
 /* RPC server stuff */
-- 
cgit v1.2.3


From a4419dbb2a96b112350b114019ef540763e0d120 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@fys.uio.no>
Date: Sun, 12 Jan 2003 21:41:44 -0800
Subject: [PATCH] RPCSEC_GSS client upcall user [5/6]

This patches the RPCSEC_GSS client to make use of the upcall mechanism
that was provided by patch [3/6].

If an RPC task presents a non-uptodate credential to call_refresh(),
a user daemon is contacted by means of a dedicated rpc_pipefs pipe.
The daemon is then fed the uid for which it must establish a new RPCSEC
security context.

While the daemon goes about its business, the RPC task is put to sleep
on a wait queue in order to allow the 'rpciod' process to service other
requests. If another task wants to use the same credential, it too will
be put to sleep once it reaches call_refresh(). A timeout mechanism
ensures that requests are retried (or that 'soft' mounts fail) if the
daemon crashes / is killed.

Once the daemon has established the RPCSEC context, it writes the result
back to the pipe, causing the credential to be updated. Those RPC tasks
that were sleeping on the context are automatically woken up, and
their execution can proceed.
---
 include/linux/sunrpc/auth.h     |   1 +
 include/linux/sunrpc/auth_gss.h |   1 +
 net/sunrpc/auth.c               |   9 +-
 net/sunrpc/auth_gss/auth_gss.c  | 384 ++++++++++++++++++++++++++++++++++++++--
 net/sunrpc/sunrpc_syms.c        |   1 +
 5 files changed, 377 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index 4d466e9a4c64..a1ed52af2895 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -115,6 +115,7 @@ int			rpcauth_register(struct rpc_authops *);
 int			rpcauth_unregister(struct rpc_authops *);
 struct rpc_auth *	rpcauth_create(rpc_authflavor_t, struct rpc_clnt *);
 void			rpcauth_destroy(struct rpc_auth *);
+struct rpc_cred *	rpcauth_lookup_credcache(struct rpc_auth *, struct auth_cred *, int);
 struct rpc_cred *	rpcauth_lookupcred(struct rpc_auth *, int);
 struct rpc_cred *	rpcauth_bindcred(struct rpc_task *);
 void			rpcauth_holdcred(struct rpc_task *);
diff --git a/include/linux/sunrpc/auth_gss.h b/include/linux/sunrpc/auth_gss.h
index 6f23b9d9ce07..1006c4fd6a99 100644
--- a/include/linux/sunrpc/auth_gss.h
+++ b/include/linux/sunrpc/auth_gss.h
@@ -71,6 +71,7 @@ struct rpc_gss_init_res {
  * the wire when communicating with a server. */
 
 struct gss_cl_ctx {
+	atomic_t		count;
 	u32			gc_proc;
 	u32			gc_seq;
 	spinlock_t		gc_seq_lock;
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 56c0ee0163e1..992618adbb84 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -181,7 +181,7 @@ rpcauth_gc_credcache(struct rpc_auth *auth, struct list_head *free)
 /*
  * Look up a process' credentials in the authentication cache
  */
-static struct rpc_cred *
+struct rpc_cred *
 rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred,
 		int taskflags)
 {
@@ -360,10 +360,7 @@ rpcauth_invalcred(struct rpc_task *task)
 int
 rpcauth_uptodatecred(struct rpc_task *task)
 {
-	int retval;
-	spin_lock(&rpc_credcache_lock);
-	retval = !(task->tk_msg.rpc_cred) ||
+	return !(task->tk_msg.rpc_cred) ||
 		(task->tk_msg.rpc_cred->cr_flags & RPCAUTH_CRED_UPTODATE);
-	spin_unlock(&rpc_credcache_lock);
-	return retval;
 }
+
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 1959a7b21a4e..6b3c84647cb5 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -50,6 +50,8 @@
 #include <linux/sunrpc/auth.h>
 #include <linux/sunrpc/auth_gss.h>
 #include <linux/sunrpc/gss_err.h>
+#include <linux/sunrpc/rpc_pipe_fs.h>
+#include <asm/uaccess.h>
 
 static struct rpc_authops authgss_ops;
 
@@ -74,6 +76,20 @@ static struct rpc_credops gss_credops;
 /* dump the buffer in `emacs-hexl' style */
 #define isprint(c)      ((c > 0x1f) && (c < 0x7f))
 
+static rwlock_t gss_ctx_lock = RW_LOCK_UNLOCKED;
+
+struct gss_auth {
+	struct rpc_auth rpc_auth;
+	struct gss_api_mech *mech;
+	struct list_head upcalls;
+	struct dentry *dentry;
+	char path[48];
+	spinlock_t lock;
+};
+
+static void gss_destroy_ctx(struct gss_cl_ctx *);
+static struct rpc_pipe_ops gss_upcall_ops;
+
 void
 print_hexl(u32 *p, u_int length, u_int offset)
 {
@@ -112,6 +128,304 @@ print_hexl(u32 *p, u_int length, u_int offset)
 	}
 }
 
+static inline struct gss_cl_ctx *
+gss_get_ctx(struct gss_cl_ctx *ctx)
+{
+	atomic_inc(&ctx->count);
+	return ctx;
+}
+
+static inline void
+gss_put_ctx(struct gss_cl_ctx *ctx)
+{
+	if (atomic_dec_and_test(&ctx->count))
+		gss_destroy_ctx(ctx);
+}
+
+static void
+gss_cred_set_ctx(struct rpc_cred *cred, struct gss_cl_ctx *ctx)
+{
+	struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base);
+	struct gss_cl_ctx *old;
+	write_lock(&gss_ctx_lock);
+	old = gss_cred->gc_ctx;
+	gss_cred->gc_ctx = ctx;
+	cred->cr_flags |= RPCAUTH_CRED_UPTODATE;
+	write_unlock(&gss_ctx_lock);
+	if (old)
+		gss_put_ctx(old);
+}
+
+static struct gss_cl_ctx *
+gss_cred_get_ctx(struct rpc_cred *cred)
+{
+	struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base);
+	struct gss_cl_ctx *ctx = NULL;
+
+	read_lock(&gss_ctx_lock);
+	if ((cred->cr_flags & RPCAUTH_CRED_UPTODATE) && gss_cred->gc_ctx)
+		ctx = gss_get_ctx(gss_cred->gc_ctx);
+	read_unlock(&gss_ctx_lock);
+	return ctx;
+}
+
+static inline int
+simple_get_bytes(char **ptr, const char *end, void *res, int len)
+{
+	char *p, *q;
+	p = *ptr;
+	q = p + len;
+	if (q > end || q < p)
+		return -1;
+	memcpy(res, p, len);
+	*ptr = q;
+	return 0;
+}
+
+static inline int
+simple_get_netobj(char **ptr, const char *end, struct xdr_netobj *res)
+{
+	char *p, *q;
+	p = *ptr;
+	if (simple_get_bytes(&p, end, &res->len, sizeof(res->len)))
+		return -1;
+	q = p + res->len;
+	if (q > end || q < p)
+		return -1;
+	res->data = p;
+	*ptr = q;
+	return 0;
+}
+
+static int
+dup_netobj(struct xdr_netobj *source, struct xdr_netobj *dest)
+{
+	dest->len = source->len;
+	if (!(dest->data = kmalloc(dest->len, GFP_KERNEL)))
+		return -1;
+	memcpy(dest->data, source->data, dest->len);
+	return 0;
+}
+
+static int
+gss_parse_init_downcall(struct gss_api_mech *gm, struct xdr_netobj *buf,
+		struct gss_cl_ctx **gc, uid_t *uid)
+{
+	char *end = buf->data + buf->len;
+	char *p = buf->data;
+	struct gss_cl_ctx *ctx;
+	struct xdr_netobj tmp_buf;
+	unsigned int timeout;
+	int err = -EIO;
+
+	if (!(ctx = kmalloc(sizeof(*ctx), GFP_KERNEL))) {
+		err = -ENOMEM;
+		goto err;
+	}
+	ctx->gc_proc = RPC_GSS_PROC_DATA;
+	ctx->gc_seq = 0;
+	spin_lock_init(&ctx->gc_seq_lock);
+	atomic_set(&ctx->count,1);
+
+	if (simple_get_bytes(&p, end, uid, sizeof(uid)))
+		goto err_free_ctx;
+	/* FIXME: discarded timeout for now */
+	if (simple_get_bytes(&p, end, &timeout, sizeof(timeout)))
+		goto err_free_ctx;
+	if (simple_get_bytes(&p, end, &ctx->gc_win, sizeof(ctx->gc_win)))
+		goto err_free_ctx;
+	if (simple_get_netobj(&p, end, &tmp_buf))
+		goto err_free_ctx;
+	if (dup_netobj(&tmp_buf, &ctx->gc_wire_ctx)) {
+		err = -ENOMEM;
+		goto err_free_ctx;
+	}
+	if (simple_get_netobj(&p, end, &tmp_buf))
+		goto err_free_wire_ctx;
+	if (p != end)
+		goto err_free_wire_ctx;
+	if (gss_import_sec_context(&tmp_buf, gm, &ctx->gc_gss_ctx))
+		goto err_free_wire_ctx;
+	*gc = ctx;
+	return 0;
+err_free_wire_ctx:
+	kfree(ctx->gc_wire_ctx.data);
+err_free_ctx:
+	kfree(ctx);
+err:
+	*gc = NULL;
+	dprintk("RPC: gss_parse_init_downcall returning %d\n", err);
+	return err;
+}
+
+
+struct gss_upcall_msg {
+	struct rpc_pipe_msg msg;
+	struct list_head list;
+	struct rpc_wait_queue waitq;
+	uid_t	uid;
+	atomic_t count;
+};
+
+static void
+gss_release_msg(struct gss_upcall_msg *gss_msg)
+{
+	if (atomic_dec_and_test(&gss_msg->count))
+		kfree(gss_msg);
+}
+
+static struct gss_upcall_msg *
+gss_find_upcall(struct gss_auth *gss_auth, uid_t uid)
+{
+	struct gss_upcall_msg *pos;
+	list_for_each_entry(pos, &gss_auth->upcalls, list) {
+		if (pos->uid == uid)
+			return pos;
+	}
+	return NULL;
+}
+
+static void
+gss_release_callback(struct rpc_task *task)
+{
+	struct rpc_clnt *clnt = task->tk_client;
+	struct gss_auth *gss_auth = container_of(clnt->cl_auth,
+			struct gss_auth, rpc_auth);
+	struct gss_upcall_msg *gss_msg;
+
+	spin_lock(&gss_auth->lock);
+	gss_msg = gss_find_upcall(gss_auth, task->tk_msg.rpc_cred->cr_uid);
+	if (gss_msg) {
+		rpc_wake_up(&gss_msg->waitq);
+		list_del(&gss_msg->list);
+		gss_release_msg(gss_msg);
+	}
+	spin_unlock(&gss_auth->lock);
+}
+
+static int
+gss_upcall(struct rpc_clnt *clnt, struct rpc_task *task, uid_t uid)
+{
+	struct gss_auth *gss_auth = container_of(clnt->cl_auth,
+			struct gss_auth, rpc_auth);
+	struct gss_upcall_msg *gss_msg, *gss_new = NULL;
+	struct rpc_pipe_msg *msg;
+	struct dentry *dentry = gss_auth->dentry;
+	int res;
+
+retry:
+	gss_msg = gss_find_upcall(gss_auth, uid);
+	if (gss_msg == NULL && gss_new == NULL) {
+		spin_unlock(&gss_auth->lock);
+		gss_new = kmalloc(sizeof(*gss_new), GFP_KERNEL);
+		spin_lock(&gss_auth->lock);
+		if (gss_new)
+			goto retry;
+		return -ENOMEM;
+	}
+	if (gss_msg)
+		goto out_sleep;
+	gss_msg = gss_new;
+	memset(gss_new, 0, sizeof(*gss_new));
+	INIT_LIST_HEAD(&gss_new->list);
+	INIT_RPC_WAITQ(&gss_new->waitq, "RPCSEC_GSS upcall waitq");
+	atomic_set(&gss_new->count, 2);
+	msg = &gss_new->msg;
+	msg->data = &gss_new->uid;
+	msg->len = sizeof(gss_new->uid);
+	gss_new->uid = uid;
+	list_add(&gss_new->list, &gss_auth->upcalls);
+	gss_new = NULL;
+	rpc_sleep_on(&gss_msg->waitq, task, gss_release_callback, NULL);
+	spin_unlock(&gss_auth->lock);
+	res = rpc_queue_upcall(dentry->d_inode, msg);
+	spin_lock(&gss_auth->lock);
+	if (res)
+		gss_release_msg(gss_msg);
+	return res;
+out_sleep:
+	rpc_sleep_on(&gss_msg->waitq, task, NULL, NULL);
+	if (gss_new)
+		kfree(gss_new);
+	return 0;
+}
+
+static ssize_t
+gss_pipe_upcall(struct file *filp, struct rpc_pipe_msg *msg,
+		char *dst, size_t buflen)
+{
+	char *data = (char *)msg->data + msg->copied;
+	ssize_t mlen = msg->len - msg->copied;
+	ssize_t left;
+
+	if (mlen > buflen)
+		mlen = buflen;
+	left = copy_to_user(dst, data, mlen);
+	msg->copied += mlen - left;
+	return mlen - left;
+}
+
+static ssize_t
+gss_pipe_downcall(struct file *filp, const char *src, size_t mlen)
+{
+	char buf[1024];
+	struct xdr_netobj obj = {
+		.len	= mlen,
+		.data	= buf,
+	};
+	struct inode *inode = filp->f_dentry->d_inode;
+	struct rpc_inode *rpci = RPC_I(inode);
+	struct rpc_clnt *clnt;
+	struct rpc_auth *auth;
+	struct gss_auth *gss_auth;
+	struct gss_api_mech *mech;
+	struct auth_cred acred = { 0 };
+	struct rpc_cred *cred;
+	struct gss_upcall_msg *gss_msg;
+	struct gss_cl_ctx *ctx;
+	ssize_t left;
+	int err;
+
+	if (mlen > sizeof(buf))
+		return -ENOSPC;
+	left = copy_from_user(buf, src, mlen);
+	if (left)
+		return -EFAULT;
+	clnt = rpci->private;
+	atomic_inc(&clnt->cl_users);
+	auth = clnt->cl_auth;
+	gss_auth = container_of(auth, struct gss_auth, rpc_auth);
+	mech = gss_auth->mech;
+	err = gss_parse_init_downcall(mech, &obj, &ctx, &acred.uid);
+	if (err)
+		goto err;
+	cred = rpcauth_lookup_credcache(auth, &acred, 0);
+	if (!cred)
+		goto err_release_ctx;
+	gss_cred_set_ctx(cred, ctx);
+	spin_lock(&gss_auth->lock);
+	gss_msg = gss_find_upcall(gss_auth, acred.uid);
+	if (gss_msg)
+		rpc_wake_up(&gss_msg->waitq);
+	spin_unlock(&gss_auth->lock);
+	rpc_release_client(clnt);
+	return mlen;
+err_release_ctx:
+	gss_destroy_ctx(ctx);
+err:
+	rpc_release_client(clnt);
+	dprintk("RPC: gss_pipe_downcall returning %d\n", err);
+	return err;
+}
+
+void
+gss_pipe_destroy_msg(struct rpc_pipe_msg *msg)
+{
+	struct gss_upcall_msg *gss_msg = container_of(msg, struct gss_upcall_msg, msg);
+
+	rpc_wake_up(&gss_msg->waitq);
+	gss_release_msg(gss_msg);
+}
 
 /* 
  * NOTE: we have the opportunity to use different 
@@ -120,13 +434,23 @@ print_hexl(u32 *p, u_int length, u_int offset)
 static struct rpc_auth *
 gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
 {
+	struct gss_auth *gss_auth;
 	struct rpc_auth * auth;
 
 	dprintk("RPC: creating GSS authenticator for client %p\n",clnt);
 	if (!try_module_get(THIS_MODULE))
 		return NULL;
-	if (!(auth = kmalloc(sizeof(*auth), GFP_KERNEL)))
+	if (!(gss_auth = kmalloc(sizeof(*gss_auth), GFP_KERNEL)))
 		goto out_dec;
+	gss_auth->mech = gss_pseudoflavor_to_mech(flavor);
+	if (!gss_auth->mech) {
+		printk(KERN_WARNING "%s: Pseudoflavor %d not found!",
+				__FUNCTION__, flavor);
+		goto err_free;
+	}
+	INIT_LIST_HEAD(&gss_auth->upcalls);
+	spin_lock_init(&gss_auth->lock);
+	auth = &gss_auth->rpc_auth;
 	auth->au_cslack = GSS_CRED_SLACK >> 2;
 	auth->au_rslack = GSS_VERF_SLACK >> 2;
 	auth->au_expire = GSS_CRED_EXPIRE;
@@ -135,7 +459,16 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
 
 	rpcauth_init_credcache(auth);
 
+	snprintf(gss_auth->path, sizeof(gss_auth->path), "%s/%s",
+			clnt->cl_pathname,
+			gss_auth->mech->gm_ops->name);
+	gss_auth->dentry = rpc_mkpipe(gss_auth->path, clnt, &gss_upcall_ops);
+	if (IS_ERR(gss_auth->dentry))
+		goto err_free;
+
 	return auth;
+err_free:
+	kfree(gss_auth);
 out_dec:
 	module_put(THIS_MODULE);
 	return NULL;
@@ -144,9 +477,13 @@ out_dec:
 static void
 gss_destroy(struct rpc_auth *auth)
 {
+	struct gss_auth *gss_auth;
 	dprintk("RPC: destroying GSS authenticator %p flavor %d\n",
 		auth, auth->au_flavor);
 
+	gss_auth = container_of(auth, struct gss_auth, rpc_auth);
+	rpc_unlink(gss_auth->path);
+
 	rpcauth_free_credcache(auth);
 
 	kfree(auth);
@@ -156,7 +493,7 @@ gss_destroy(struct rpc_auth *auth)
 /* gss_destroy_cred (and gss_destroy_ctx) are used to clean up after failure
  * to create a new cred or context, so they check that things have been
  * allocated before freeing them. */
-void
+static void
 gss_destroy_ctx(struct gss_cl_ctx *ctx)
 {
 
@@ -182,7 +519,7 @@ gss_destroy_cred(struct rpc_cred *rc)
 	dprintk("RPC: gss_destroy_cred \n");
 
 	if (cred->gc_ctx)
-		gss_destroy_ctx(cred->gc_ctx);
+		gss_put_ctx(cred->gc_ctx);
 	kfree(cred);
 }
 
@@ -229,8 +566,10 @@ gss_match(struct auth_cred *acred, struct rpc_cred *rc, int taskflags)
 static u32 *
 gss_marshal(struct rpc_task *task, u32 *p, int ruid)
 {
-	struct gss_cred	*cred = (struct gss_cred *) task->tk_msg.rpc_cred;
-	struct gss_cl_ctx	*ctx = cred->gc_ctx;
+	struct rpc_cred *cred = task->tk_msg.rpc_cred;
+	struct gss_cred	*gss_cred = container_of(cred, struct gss_cred,
+						 gc_base);
+	struct gss_cl_ctx	*ctx = gss_cred_get_ctx(cred);
 	u32		*cred_len;
 	struct rpc_rqst *req = task->tk_rqstp;
 	struct rpc_clnt *clnt = task->tk_client;
@@ -251,11 +590,11 @@ gss_marshal(struct rpc_task *task, u32 *p, int ruid)
 	*p++ = htonl(RPC_AUTH_GSS);
 	cred_len = p++;
 
-	service = gss_pseudoflavor_to_service(cred->gc_flavor);
+	service = gss_pseudoflavor_to_service(gss_cred->gc_flavor);
 	if (service == 0) {
 		dprintk("Bad pseudoflavor %d in gss_marshal\n",
-			cred->gc_flavor);
-		return NULL;
+			gss_cred->gc_flavor);
+		goto out_put_ctx;
 	}
 	spin_lock(&ctx->gc_seq_lock);
 	task->tk_gss_seqno = ctx->gc_seq++;
@@ -281,10 +620,13 @@ gss_marshal(struct rpc_task *task, u32 *p, int ruid)
 	if(maj_stat != 0){
 		printk("gss_marshal: gss_get_mic FAILED (%d)\n",
 		       maj_stat);
-		return(NULL);
+		goto out_put_ctx;
 	}
 	p = xdr_encode_netobj(p, &bufout);
 	return p;
+out_put_ctx:
+	gss_put_ctx(ctx);
+	return NULL;
 }
 
 /*
@@ -293,9 +635,21 @@ gss_marshal(struct rpc_task *task, u32 *p, int ruid)
 static int
 gss_refresh(struct rpc_task *task)
 {
-	/* Insert upcall here ! */
-	task->tk_msg.rpc_cred->cr_flags |= RPCAUTH_CRED_UPTODATE;
-	return task->tk_status = -EACCES;
+	struct rpc_clnt *clnt = task->tk_client;
+	struct gss_auth *gss_auth = container_of(clnt->cl_auth,
+			struct gss_auth, rpc_auth);
+	struct rpc_xprt *xprt = task->tk_xprt;
+	struct rpc_cred *cred = task->tk_msg.rpc_cred;
+	int err = 0;
+
+	task->tk_timeout = xprt->timeout.to_current;
+	spin_lock(&gss_auth->lock);
+	if (gss_cred_get_ctx(cred))
+		goto out;
+	err = gss_upcall(clnt, task, cred->cr_uid);
+out:
+	spin_unlock(&gss_auth->lock);
+	return err;
 }
 
 static u32 *
@@ -354,7 +708,11 @@ static struct rpc_credops gss_credops = {
 	.crvalidate	= gss_validate,
 };
 
-extern void gss_svc_ctx_init(void);
+static struct rpc_pipe_ops gss_upcall_ops = {
+	.upcall		= gss_pipe_upcall,
+	.downcall	= gss_pipe_downcall,
+	.destroy_msg	= gss_pipe_destroy_msg,
+};
 
 /*
  * Initialize RPCSEC_GSS module
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index 32d08c06d300..0956d2e0f207 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -68,6 +68,7 @@ EXPORT_SYMBOL(xprt_set_timeout);
 EXPORT_SYMBOL(rpcauth_register);
 EXPORT_SYMBOL(rpcauth_unregister);
 EXPORT_SYMBOL(rpcauth_lookupcred);
+EXPORT_SYMBOL(rpcauth_lookup_credcache);
 EXPORT_SYMBOL(rpcauth_free_credcache);
 EXPORT_SYMBOL(rpcauth_init_credcache);
 EXPORT_SYMBOL(put_rpccred);
-- 
cgit v1.2.3


From e0594725b51b5253237ed11b8bf3cf9ab87d9d48 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@fys.uio.no>
Date: Sun, 12 Jan 2003 21:41:57 -0800
Subject: [PATCH] minimal Kerberos V5 client support [6/6]

The following patch provides minimal client support for the
(mandatory) Kerberos V5 authentication mechanism under RPCSEC_GSS.
See RFC2623 and RFC3010 for protocol details.

Only authentication is supported for the moment. Data integrity and/or
data privacy (encryption) will be implemented at a later stage.
---
 fs/Kconfig                            |  17 +-
 include/linux/sunrpc/gss_krb5.h       | 160 ++++++++++++++++++
 net/sunrpc/auth_gss/Makefile          |   5 +
 net/sunrpc/auth_gss/gss_krb5_crypto.c | 168 +++++++++++++++++++
 net/sunrpc/auth_gss/gss_krb5_mech.c   | 251 ++++++++++++++++++++++++++++
 net/sunrpc/auth_gss/gss_krb5_seal.c   | 214 ++++++++++++++++++++++++
 net/sunrpc/auth_gss/gss_krb5_seqnum.c |  88 ++++++++++
 net/sunrpc/auth_gss/gss_krb5_unseal.c | 306 ++++++++++++++++++++++++++++++++++
 8 files changed, 1207 insertions(+), 2 deletions(-)
 create mode 100644 include/linux/sunrpc/gss_krb5.h
 create mode 100644 net/sunrpc/auth_gss/gss_krb5_crypto.c
 create mode 100644 net/sunrpc/auth_gss/gss_krb5_mech.c
 create mode 100644 net/sunrpc/auth_gss/gss_krb5_seal.c
 create mode 100644 net/sunrpc/auth_gss/gss_krb5_seqnum.c
 create mode 100644 net/sunrpc/auth_gss/gss_krb5_unseal.c

(limited to 'include/linux')

diff --git a/fs/Kconfig b/fs/Kconfig
index 755f57a1f77d..71957da6eebb 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -1345,11 +1345,24 @@ config SUNRPC_GSS
 	default SUNRPC if NFS_V4=y
 	help
 	  Provides cryptographic authentication for NFS rpc requests.  To
-	  make this useful, you also need support for a gss-api mechanism
-	  (such as Kerberos).
+	  make this useful, you must also select at least one rpcsec_gss
+	  mechanism.
 	  Note: You should always select this option if you wish to use
 	  NFSv4.
 
+config RPCSEC_GSS_KRB5
+	tristate "Kerberos V mechanism for RPCSEC_GSS (EXPERIMENTAL)"
+	depends on SUNRPC_GSS && CRYPTO_DES && CRYPTO_MD5
+	default SUNRPC_GSS if NFS_V4=y
+	help
+	  Provides a gss-api mechanism based on Kerberos V5 (this is
+	  mandatory for RFC3010-compliant NFSv4 implementations).
+	  Requires a userspace daemon;
+		see http://www.citi.umich.edu/projects/nfsv4/.
+
+	  Note: If you select this option, please ensure that you also
+	  enable the MD5 and DES crypto ciphers.
+
 config LOCKD
 	tristate
 	default m if NFS_FS!=y && NFSD!=y && (NFS_FS=m || NFSD=m)
diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h
new file mode 100644
index 000000000000..8db6d1e13a69
--- /dev/null
+++ b/include/linux/sunrpc/gss_krb5.h
@@ -0,0 +1,160 @@
+/*
+ *  linux/include/linux/sunrpc/gss_krb5_types.h
+ *
+ *  Adapted from MIT Kerberos 5-1.2.1 lib/include/krb5.h,
+ *  lib/gssapi/krb5/gssapiP_krb5.h, and others
+ *
+ *  Copyright (c) 2000 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  Andy Adamson   <andros@umich.edu>
+ *  Bruce Fields   <bfields@umich.edu>
+ */
+
+/*
+ * Copyright 1995 by the Massachusetts Institute of Technology.
+ * All Rights Reserved.
+ *
+ * Export of this software from the United States of America may
+ *   require a specific license from the United States Government.
+ *   It is the responsibility of any person or organization contemplating
+ *   export to obtain such a license before exporting.
+ *
+ * WITHIN THAT CONSTRAINT, permission to use, copy, modify, and
+ * distribute this software and its documentation for any purpose and
+ * without fee is hereby granted, provided that the above copyright
+ * notice appear in all copies and that both that copyright notice and
+ * this permission notice appear in supporting documentation, and that
+ * the name of M.I.T. not be used in advertising or publicity pertaining
+ * to distribution of the software without specific, written prior
+ * permission.  Furthermore if you modify this software you must label
+ * your software as modified software and not distribute it in such a
+ * fashion that it might be confused with the original M.I.T. software.
+ * M.I.T. makes no representations about the suitability of
+ * this software for any purpose.  It is provided "as is" without express
+ * or implied warranty.
+ *
+ */
+
+#include <linux/sunrpc/auth_gss.h>
+#include <linux/sunrpc/gss_err.h>
+#include <linux/sunrpc/gss_asn1.h>
+
+struct krb5_ctx {
+	int			initiate; /* 1 = initiating, 0 = accepting */
+	int			seed_init;
+	unsigned char		seed[16];
+	int			signalg;
+	int			sealalg;
+	struct crypto_tfm	*enc;
+	struct crypto_tfm	*seq;
+	s32			endtime;
+	u32			seq_send;
+	u32			seq_recv;
+	struct xdr_netobj	mech_used;
+};
+
+#define KG_TOK_MIC_MSG    0x0101
+#define KG_TOK_WRAP_MSG   0x0201
+
+enum sgn_alg {
+	SGN_ALG_DES_MAC_MD5 = 0x0000,
+	SGN_ALG_MD2_5 = 0x0001,
+	SGN_ALG_DES_MAC = 0x0002,
+	SGN_ALG_3 = 0x0003,		/* not published */
+	SGN_ALG_HMAC_MD5 = 0x0011,	/* microsoft w2k; no support */
+	SGN_ALG_HMAC_SHA1_DES3_KD = 0x0004
+};
+enum seal_alg {
+	SEAL_ALG_NONE = 0xffff,
+	SEAL_ALG_DES = 0x0000,
+	SEAL_ALG_1 = 0x0001,		/* not published */
+	SEAL_ALG_MICROSOFT_RC4 = 0x0010,/* microsoft w2k; no support */
+	SEAL_ALG_DES3KD = 0x0002
+};
+
+#define RSA_MD5_CKSUM_LENGTH 16
+
+#define CKSUMTYPE_CRC32			0x0001
+#define CKSUMTYPE_RSA_MD4		0x0002
+#define CKSUMTYPE_RSA_MD4_DES		0x0003
+#define CKSUMTYPE_DESCBC		0x0004
+#define CKSUMTYPE_RSA_MD5		0x0007
+#define CKSUMTYPE_RSA_MD5_DES		0x0008
+#define CKSUMTYPE_NIST_SHA		0x0009
+#define CKSUMTYPE_HMAC_SHA1_DES3	0x000c
+
+/* from gssapi_err_krb5.h */
+#define KG_CCACHE_NOMATCH                        (39756032L)
+#define KG_KEYTAB_NOMATCH                        (39756033L)
+#define KG_TGT_MISSING                           (39756034L)
+#define KG_NO_SUBKEY                             (39756035L)
+#define KG_CONTEXT_ESTABLISHED                   (39756036L)
+#define KG_BAD_SIGN_TYPE                         (39756037L)
+#define KG_BAD_LENGTH                            (39756038L)
+#define KG_CTX_INCOMPLETE                        (39756039L)
+#define KG_CONTEXT                               (39756040L)
+#define KG_CRED                                  (39756041L)
+#define KG_ENC_DESC                              (39756042L)
+#define KG_BAD_SEQ                               (39756043L)
+#define KG_EMPTY_CCACHE                          (39756044L)
+#define KG_NO_CTYPES                             (39756045L)
+
+#define KV5M_PRINCIPAL                           (-1760647423L)
+#define KV5M_KEYBLOCK                            (-1760647421L)
+#define KV5M_CHECKSUM                            (-1760647420L)
+#define KV5M_ADDRESS                             (-1760647390L)
+#define KV5M_AUTHENTICATOR                       (-1760647410L)
+#define KV5M_AUTH_CONTEXT                        (-1760647383L)
+#define KV5M_AUTHDATA                            (-1760647414L)
+#define KV5M_GSS_OID                             (-1760647372L)
+#define KV5M_GSS_QUEUE                           (-1760647371L)
+
+/* per Kerberos v5 protocol spec crypto types from the wire. 
+ * these get mapped to linux kernel crypto routines.  
+ */
+#define ENCTYPE_NULL            0x0000
+#define ENCTYPE_DES_CBC_CRC     0x0001	/* DES cbc mode with CRC-32 */
+#define ENCTYPE_DES_CBC_MD4     0x0002	/* DES cbc mode with RSA-MD4 */
+#define ENCTYPE_DES_CBC_MD5     0x0003	/* DES cbc mode with RSA-MD5 */
+#define ENCTYPE_DES_CBC_RAW     0x0004	/* DES cbc mode raw */
+/* XXX deprecated? */
+#define ENCTYPE_DES3_CBC_SHA    0x0005	/* DES-3 cbc mode with NIST-SHA */
+#define ENCTYPE_DES3_CBC_RAW    0x0006	/* DES-3 cbc mode raw */
+#define ENCTYPE_DES_HMAC_SHA1   0x0008
+#define ENCTYPE_DES3_CBC_SHA1   0x0010
+#define ENCTYPE_UNKNOWN         0x01ff
+
+s32
+krb5_make_checksum(s32 cksumtype,
+		   struct xdr_netobj *input,
+		   struct xdr_netobj *cksum);
+
+u32
+krb5_make_token(struct krb5_ctx *context_handle, int qop_req,
+	struct xdr_netobj * input_message_buffer,
+	struct xdr_netobj * output_message_buffer, int toktype);
+
+u32
+krb5_read_token(struct krb5_ctx *context_handle,
+	  struct xdr_netobj *input_token_buffer,
+	  struct xdr_netobj *message_buffer,
+	  int *qop_state, int toktype);
+
+u32
+krb5_encrypt(struct crypto_tfm * key,
+	     void *iv, void *in, void *out, int length);
+
+u32
+krb5_decrypt(struct crypto_tfm * key,
+	     void *iv, void *in, void *out, int length); 
+
+s32
+krb5_make_seq_num(struct crypto_tfm * key,
+		int direction,
+		s32 seqnum, unsigned char *cksum, unsigned char *buf);
+
+s32
+krb5_get_seq_num(struct crypto_tfm * key,
+	       unsigned char *cksum,
+	       unsigned char *buf, int *direction, s32 * seqnum);
diff --git a/net/sunrpc/auth_gss/Makefile b/net/sunrpc/auth_gss/Makefile
index a621ec40ac04..49e465f37637 100644
--- a/net/sunrpc/auth_gss/Makefile
+++ b/net/sunrpc/auth_gss/Makefile
@@ -9,3 +9,8 @@ export-objs := sunrpcgss_syms.o
 auth_rpcgss-objs := auth_gss.o gss_pseudoflavors.o gss_generic_token.o \
 	sunrpcgss_syms.o gss_mech_switch.o
 
+obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o
+
+rpcsec_gss_krb5-objs := gss_krb5_mech.o gss_krb5_seal.o gss_krb5_unseal.o \
+	gss_krb5_crypto.o gss_krb5_seqnum.o
+
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
new file mode 100644
index 000000000000..512a4d686e5f
--- /dev/null
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -0,0 +1,168 @@
+/*
+ *  linux/net/sunrpc/gss_krb5_crypto.c
+ *
+ *  Copyright (c) 2000 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  Andy Adamson   <andros@umich.edu>
+ *  Bruce Fields   <bfields@umich.edu>
+ */
+
+/*
+ * Copyright (C) 1998 by the FundsXpress, INC.
+ *
+ * All rights reserved.
+ *
+ * Export of this software from the United States of America may require
+ * a specific license from the United States Government.  It is the
+ * responsibility of any person or organization contemplating export to
+ * obtain such a license before exporting.
+ *
+ * WITHIN THAT CONSTRAINT, permission to use, copy, modify, and
+ * distribute this software and its documentation for any purpose and
+ * without fee is hereby granted, provided that the above copyright
+ * notice appear in all copies and that both that copyright notice and
+ * this permission notice appear in supporting documentation, and that
+ * the name of FundsXpress. not be used in advertising or publicity pertaining
+ * to distribution of the software without specific, written prior
+ * permission.  FundsXpress makes no representations about the suitability of
+ * this software for any purpose.  It is provided "as is" without express
+ * or implied warranty.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
+ * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <asm/scatterlist.h>
+#include <linux/crypto.h>
+#include <linux/sunrpc/gss_krb5.h>
+
+#ifdef RPC_DEBUG
+# define RPCDBG_FACILITY        RPCDBG_AUTH
+#endif
+
+u32
+krb5_encrypt(
+	struct crypto_tfm *tfm,
+	void * iv,
+	void * in,
+	void * out,
+	int length)
+{
+	u32 ret = -EINVAL;
+        struct scatterlist sg[1];
+	u8 local_iv[16] = {0};
+
+	dprintk("RPC: gss_k5encrypt: TOP in %p out %p\nin data:\n", out, in);
+	print_hexl((u32 *)in, length, 0);
+
+	if (length % crypto_tfm_alg_blocksize(tfm) != 0)
+		goto out;
+
+	if (crypto_tfm_alg_ivsize(tfm) > 16) {
+		dprintk("RPC: gss_k5encrypt: tfm iv size to large %d\n",
+		         crypto_tfm_alg_ivsize(tfm));
+		goto out;
+	}
+
+	if (iv)
+		memcpy(local_iv, iv, crypto_tfm_alg_ivsize(tfm));
+	crypto_cipher_set_iv(tfm, local_iv, crypto_tfm_alg_ivsize(tfm));
+
+	memcpy(out, in, length);
+	sg[0].page = virt_to_page(out);
+	sg[0].offset = ((long)out & ~PAGE_MASK);
+	sg[0].length = length;
+
+	ret = crypto_cipher_encrypt(tfm, sg, 1);
+
+out:
+	dprintk("gss_k5encrypt returns %d\n",ret);
+	return(ret);
+}
+
+u32
+krb5_decrypt(
+     struct crypto_tfm *tfm,
+     void * iv,
+     void * in,
+     void * out,
+     int length)
+{
+	u32 ret = -EINVAL;
+	struct scatterlist sg[1];
+	u8 local_iv[16] = {0};
+
+	dprintk("RPC: gss_k5decrypt: TOP in %p out %p\nin data:\n", in, out);
+	print_hexl((u32 *)in,length,0);
+
+	if (length % crypto_tfm_alg_blocksize(tfm) != 0)
+		goto out;
+
+	if (crypto_tfm_alg_ivsize(tfm) > 16) {
+		dprintk("RPC: gss_k5decrypt: tfm iv size to large %d\n",
+			crypto_tfm_alg_ivsize(tfm));
+		goto out;
+	}
+	if (iv)
+		memcpy(local_iv,iv, crypto_tfm_alg_ivsize(tfm));
+	crypto_cipher_set_iv(tfm, local_iv, crypto_tfm_alg_blocksize(tfm));
+
+	memcpy(out, in, length);
+	sg[0].page = virt_to_page(out);
+	sg[0].offset = ((long)out  & ~PAGE_MASK);
+	sg[0].length = length;
+
+	ret = crypto_cipher_decrypt(tfm, sg, 1);
+
+out:
+	dprintk("gss_k5decrypt returns %d\n",ret);
+	return(ret);
+}
+
+s32
+krb5_make_checksum(s32 cksumtype, struct xdr_netobj *input,
+		   struct xdr_netobj *cksum)
+{
+	s32			ret = -EINVAL;
+	struct scatterlist	sg[1];
+	char			*cksumname;
+	struct crypto_tfm	*tfm;
+
+	switch (cksumtype) {
+		case CKSUMTYPE_RSA_MD5:
+			cksumname = "md5";
+			break;
+		default:
+			dprintk("RPC: krb5_make_checksum:"
+				" unsupported checksum %d", cksumtype);
+			goto out;
+	}
+	if (!(tfm = crypto_alloc_tfm(cksumname, 0)))
+		goto out;
+	cksum->len = crypto_tfm_alg_digestsize(tfm);
+
+	if ((cksum->data = kmalloc(cksum->len, GFP_KERNEL)) == NULL) {
+		ret = -ENOMEM;
+		goto out_free_tfm;
+	}
+	sg[0].page = virt_to_page(input->data);
+	sg[0].offset = ((long)input->data & ~PAGE_MASK);
+	sg[0].length = input->len;
+
+	crypto_digest_init(tfm);
+	crypto_digest_update(tfm, sg, 1);
+	crypto_digest_final(tfm, cksum->data);
+
+	ret = 0;
+
+out_free_tfm:
+	crypto_free_tfm(tfm);
+out:
+	dprintk("RPC: gss_k5cksum: returning %d\n", ret);
+	return (ret);
+}
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
new file mode 100644
index 000000000000..ce31a89684e4
--- /dev/null
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -0,0 +1,251 @@
+/*
+ *  linux/net/sunrpc/gss_krb5_mech.c
+ *
+ *  Copyright (c) 2001 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  Andy Adamson <andros@umich.edu>
+ *  J. Bruce Fields <bfields@umich.edu>
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *  3. Neither the name of the University nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/sunrpc/auth.h>
+#include <linux/sunrpc/gss_krb5.h>
+#include <linux/sunrpc/xdr.h>
+#include <linux/crypto.h>
+
+#ifdef RPC_DEBUG
+# define RPCDBG_FACILITY	RPCDBG_AUTH
+#endif
+
+struct xdr_netobj gss_mech_krb5_oid =
+   {9, "\052\206\110\206\367\022\001\002\002"};
+
+static inline int
+get_bytes(char **ptr, const char *end, void *res, int len)
+{
+	char *p, *q;
+	p = *ptr;
+	q = p + len;
+	if (q > end || q < p)
+		return -1;
+	memcpy(res, p, len);
+	*ptr = q;
+	return 0;
+}
+
+static inline int
+get_netobj(char **ptr, const char *end, struct xdr_netobj *res)
+{
+	char *p, *q;
+	p = *ptr;
+	if (get_bytes(&p, end, &res->len, sizeof(res->len)))
+		return -1;
+	q = p + res->len;
+	if (q > end || q < p)
+		return -1;
+	if (!(res->data = kmalloc(res->len, GFP_KERNEL)))
+		return -1;
+	memcpy(res->data, p, res->len);
+	*ptr = q;
+	return 0;
+}
+
+static inline int
+get_key(char **p, char *end, struct crypto_tfm **res)
+{
+	struct xdr_netobj	key;
+	int			alg, alg_mode;
+	char			*alg_name;
+
+	if (get_bytes(p, end, &alg, sizeof(alg)))
+		goto out_err;
+	if ((get_netobj(p, end, &key)))
+		goto out_err;
+
+	switch (alg) {
+		case ENCTYPE_DES_CBC_RAW:
+			alg_name = "des";
+			alg_mode = CRYPTO_TFM_MODE_CBC;
+			break;
+		default:
+			dprintk("RPC: get_key: unsupported algorithm %d", alg);
+			goto out_err_free_key;
+	}
+	if (!(*res = crypto_alloc_tfm(alg_name, alg_mode)))
+		goto out_err_free_key;
+	if (crypto_cipher_setkey(*res, key.data, key.len))
+		goto out_err_free_tfm;
+
+	kfree(key.data);
+	return 0;
+
+out_err_free_tfm:
+	crypto_free_tfm(*res);
+out_err_free_key:
+	kfree(key.data);
+out_err:
+	return -1;
+}
+
+static u32
+gss_import_sec_context_kerberos(struct xdr_netobj *inbuf,
+				struct gss_ctx *ctx_id)
+{
+	char	*p = inbuf->data;
+	char	*end = inbuf->data + inbuf->len;
+	struct	krb5_ctx *ctx;
+
+	if (!(ctx = kmalloc(sizeof(*ctx), GFP_KERNEL)))
+		goto out_err;
+	memset(ctx, 0, sizeof(*ctx));
+
+	if (get_bytes(&p, end, &ctx->initiate, sizeof(ctx->initiate)))
+		goto out_err_free_ctx;
+	if (get_bytes(&p, end, &ctx->seed_init, sizeof(ctx->seed_init)))
+		goto out_err_free_ctx;
+	if (get_bytes(&p, end, ctx->seed, sizeof(ctx->seed)))
+		goto out_err_free_ctx;
+	if (get_bytes(&p, end, &ctx->signalg, sizeof(ctx->signalg)))
+		goto out_err_free_ctx;
+	if (get_bytes(&p, end, &ctx->sealalg, sizeof(ctx->sealalg)))
+		goto out_err_free_ctx;
+	if (get_bytes(&p, end, &ctx->endtime, sizeof(ctx->endtime)))
+		goto out_err_free_ctx;
+	if (get_bytes(&p, end, &ctx->seq_send, sizeof(ctx->seq_send)))
+		goto out_err_free_ctx;
+	if (get_netobj(&p, end, &ctx->mech_used))
+		goto out_err_free_ctx;
+	if (get_key(&p, end, &ctx->enc))
+		goto out_err_free_mech;
+	if (get_key(&p, end, &ctx->seq))
+		goto out_err_free_key1;
+	if (p != end)
+		goto out_err_free_key2;
+
+	ctx_id->internal_ctx_id = ctx;
+	dprintk("Succesfully imported new context.\n");
+	return 0;
+
+out_err_free_key2:
+	crypto_free_tfm(ctx->seq);
+out_err_free_key1:
+	crypto_free_tfm(ctx->enc);
+out_err_free_mech:
+	kfree(ctx->mech_used.data);
+out_err_free_ctx:
+	kfree(ctx);
+out_err:
+	return GSS_S_FAILURE;
+}
+
+void
+gss_delete_sec_context_kerberos(void *internal_ctx) {
+	struct krb5_ctx *kctx = internal_ctx;
+
+	if (kctx->seq)
+		crypto_free_tfm(kctx->seq);
+	if (kctx->enc)
+		crypto_free_tfm(kctx->enc);
+	if (kctx->mech_used.data)
+		kfree(kctx->mech_used.data);
+	kfree(kctx);
+}
+
+u32
+gss_verify_mic_kerberos(struct gss_ctx		*ctx,
+			struct xdr_netobj	*signbuf,
+			struct xdr_netobj	*checksum,
+			u32		*qstate) {
+	u32 maj_stat = 0;
+	int qop_state;
+	struct krb5_ctx *kctx = ctx->internal_ctx_id;
+
+	maj_stat = krb5_read_token(kctx, checksum, signbuf, &qop_state,
+				   KG_TOK_MIC_MSG);
+	if (!maj_stat && qop_state)
+	    *qstate = qop_state;
+
+	dprintk("RPC: gss_verify_mic_kerberos returning %d\n", maj_stat);
+	return maj_stat;
+}
+
+u32
+gss_get_mic_kerberos(struct gss_ctx	*ctx,
+		     u32		qop,
+		     struct xdr_netobj	*message_buffer,
+		     struct xdr_netobj	*message_token) {
+	u32 err = 0;
+	struct krb5_ctx *kctx = ctx->internal_ctx_id;
+
+	if (!message_buffer->data) return GSS_S_FAILURE;
+
+	dprintk("RPC: gss_get_mic_kerberos:"
+		" message_buffer->len %d\n",message_buffer->len);
+
+	err = krb5_make_token(kctx, qop, message_buffer,
+			      message_token, KG_TOK_MIC_MSG);
+
+	dprintk("RPC: gss_get_mic_kerberos returning %d\n",err);
+
+	return err;
+}
+
+static struct gss_api_ops gss_kerberos_ops = {
+	.name			= "krb5",
+	.gss_import_sec_context	= gss_import_sec_context_kerberos,
+	.gss_get_mic		= gss_get_mic_kerberos,
+	.gss_verify_mic		= gss_verify_mic_kerberos,
+	.gss_delete_sec_context	= gss_delete_sec_context_kerberos,
+};
+
+/* XXX error checking? reference counting? */
+static int __init init_kerberos_module(void)
+{
+	struct gss_api_mech *gm;
+
+	if (gss_mech_register(&gss_mech_krb5_oid, &gss_kerberos_ops))
+		printk("Failed to register kerberos gss mechanism!\n");
+	gm = gss_mech_get_by_OID(&gss_mech_krb5_oid);
+	gss_register_triple(RPC_AUTH_GSS_KRB5 , gm, 0, RPC_GSS_SVC_NONE);
+	gss_mech_put(gm);
+	return 0;
+}
+
+static void __exit cleanup_kerberos_module(void)
+{
+	gss_unregister_triple(RPC_AUTH_GSS_KRB5);
+}
+
+MODULE_LICENSE("GPL");
+module_init(init_kerberos_module);
+module_exit(cleanup_kerberos_module);
diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c
new file mode 100644
index 000000000000..280d82d7c6dc
--- /dev/null
+++ b/net/sunrpc/auth_gss/gss_krb5_seal.c
@@ -0,0 +1,214 @@
+/*
+ *  linux/net/sunrpc/gss_krb5_seal.c
+ *
+ *  Adapted from MIT Kerberos 5-1.2.1 lib/gssapi/krb5/k5seal.c
+ *
+ *  Copyright (c) 2000 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  Andy Adamson	<andros@umich.edu>
+ *  J. Bruce Fields	<bfields@umich.edu>
+ */
+
+/*
+ * Copyright 1993 by OpenVision Technologies, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software
+ * and its documentation for any purpose is hereby granted without fee,
+ * provided that the above copyright notice appears in all copies and
+ * that both that copyright notice and this permission notice appear in
+ * supporting documentation, and that the name of OpenVision not be used
+ * in advertising or publicity pertaining to distribution of the software
+ * without specific, written prior permission. OpenVision makes no
+ * representations about the suitability of this software for any
+ * purpose.  It is provided "as is" without express or implied warranty.
+ *
+ * OPENVISION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL OPENVISION BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
+ * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
+ * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Copyright (C) 1998 by the FundsXpress, INC.
+ *
+ * All rights reserved.
+ *
+ * Export of this software from the United States of America may require
+ * a specific license from the United States Government.  It is the
+ * responsibility of any person or organization contemplating export to
+ * obtain such a license before exporting.
+ *
+ * WITHIN THAT CONSTRAINT, permission to use, copy, modify, and
+ * distribute this software and its documentation for any purpose and
+ * without fee is hereby granted, provided that the above copyright
+ * notice appear in all copies and that both that copyright notice and
+ * this permission notice appear in supporting documentation, and that
+ * the name of FundsXpress. not be used in advertising or publicity pertaining
+ * to distribution of the software without specific, written prior
+ * permission.  FundsXpress makes no representations about the suitability of
+ * this software for any purpose.  It is provided "as is" without express
+ * or implied warranty.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
+ * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/jiffies.h>
+#include <linux/sunrpc/gss_krb5.h>
+#include <linux/random.h>
+#include <linux/crypto.h>
+
+#ifdef RPC_DEBUG
+# define RPCDBG_FACILITY        RPCDBG_AUTH
+#endif
+
+#define CKSUM_SIZE	8
+
+static inline int
+gss_krb5_padding(int blocksize, int length) {
+	/* Most of the code is block-size independent but in practice we
+	 * use only 8: */
+	BUG_ON(blocksize != 8);
+	return 8 - (length & 7);
+}
+
+/* checksum the plaintext data and the first 8 bytes of the krb5 token header,
+ * as specified by the rfc: */
+static u32
+compute_checksum(s32 checksum_type, char *header, char *body, int body_len,
+		 struct xdr_netobj *md5cksum) {
+	char			*data_ptr;
+	struct xdr_netobj	plaind;
+	u32			code = GSS_S_FAILURE;
+
+	if (!(data_ptr = kmalloc(8 + body_len, GFP_KERNEL)))
+		goto out;
+	memcpy(data_ptr, header, 8);
+	memcpy(data_ptr + 8, body, body_len);
+	plaind.len = 8 + body_len;
+	plaind.data = data_ptr;
+	code = krb5_make_checksum(checksum_type, &plaind, md5cksum);
+	kfree(data_ptr);
+	code = 0;
+
+out:
+	return code;
+}
+
+u32
+krb5_make_token(struct krb5_ctx *ctx, int qop_req,
+		   struct xdr_netobj * text, struct xdr_netobj * token,
+		   int toktype)
+{
+	s32			checksum_type;
+	struct xdr_netobj	md5cksum = {.len = 0, .data = NULL};
+	int			blocksize = 0, tmsglen;
+	unsigned char		*ptr, *krb5_hdr, *msg_start;
+	s32			now;
+
+	dprintk("RPC: gss_krb5_seal");
+
+	now = jiffies;
+
+	if (qop_req != 0)
+		goto out_err;
+
+	switch (ctx->signalg) {
+		case SGN_ALG_DES_MAC_MD5:
+			checksum_type = CKSUMTYPE_RSA_MD5;
+			break;
+		default:
+			dprintk("RPC: gss_krb5_seal: ctx->signalg %d not"
+				" supported\n", ctx->signalg);
+			goto out_err;
+	}
+	if (ctx->sealalg != SEAL_ALG_NONE && ctx->sealalg != SEAL_ALG_DES) {
+		dprintk("RPC: gss_krb5_seal: ctx->sealalg %d not supported\n",
+			ctx->sealalg);
+		goto out_err;
+	}
+
+	if (toktype == KG_TOK_WRAP_MSG) {
+		blocksize = crypto_tfm_alg_blocksize(ctx->enc);
+		tmsglen = blocksize + text->len
+			+ gss_krb5_padding(blocksize, blocksize + text->len);
+	} else {
+		tmsglen = 0;
+	}
+
+	token->len = g_token_size(&ctx->mech_used, 22 + tmsglen);
+	if ((token->data = kmalloc(token->len, GFP_KERNEL)) == NULL)
+		goto out_err;
+
+	ptr = token->data;
+	g_make_token_header(&ctx->mech_used, 22 + tmsglen, &ptr, toktype);
+
+	/* ptr now at byte 2 of header described in rfc 1964, section 1.2.1: */
+	krb5_hdr = ptr - 2;
+	msg_start = krb5_hdr + 24;
+
+	*(u16 *)(krb5_hdr + 2) = htons(ctx->signalg);
+	memset(krb5_hdr + 4, 0xff, 4);
+	if (toktype == KG_TOK_WRAP_MSG)
+		*(u16 *)(krb5_hdr + 4) = htons(ctx->sealalg);
+
+	if (toktype == KG_TOK_WRAP_MSG) {
+		unsigned char pad = gss_krb5_padding(blocksize, text->len);
+
+		get_random_bytes(msg_start, blocksize); /* "confounder" */
+		memcpy(msg_start + blocksize, text->data, text->len);
+
+		memset(msg_start + blocksize + text->len, pad, pad);
+
+		if (compute_checksum(checksum_type, krb5_hdr, msg_start,
+				     tmsglen, &md5cksum))
+			goto out_err;
+
+		if (krb5_encrypt(ctx->enc, NULL, msg_start, msg_start,
+					tmsglen))
+			goto out_err;
+
+	} else { /* Sign only.  */
+		if (compute_checksum(checksum_type, krb5_hdr, text->data,
+					text->len, &md5cksum))
+			goto out_err;
+	}
+
+	switch (ctx->signalg) {
+	case SGN_ALG_DES_MAC_MD5:
+		if (krb5_encrypt(ctx->seq, NULL, md5cksum.data,
+				  md5cksum.data, md5cksum.len))
+			goto out_err;
+		memcpy(krb5_hdr + 16,
+		       md5cksum.data + md5cksum.len - CKSUM_SIZE, CKSUM_SIZE);
+
+		dprintk("make_seal_token: cksum data: \n");
+		print_hexl((u32 *) (krb5_hdr + 16), CKSUM_SIZE, 0);
+		break;
+	default:
+		BUG();
+	}
+
+	kfree(md5cksum.data);
+
+	if ((krb5_make_seq_num(ctx->seq, ctx->initiate ? 0 : 0xff,
+			       ctx->seq_send, krb5_hdr + 16, krb5_hdr + 8)))
+		goto out_err;
+
+	ctx->seq_send++;
+
+	return ((ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE);
+out_err:
+	if (md5cksum.data) kfree(md5cksum.data);
+	if (token->data) kfree(token->data);
+	token->data = 0;
+	token->len = 0;
+	return GSS_S_FAILURE;
+}
diff --git a/net/sunrpc/auth_gss/gss_krb5_seqnum.c b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
new file mode 100644
index 000000000000..3345bae2aeae
--- /dev/null
+++ b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
@@ -0,0 +1,88 @@
+/*
+ *  linux/net/sunrpc/gss_krb5_seqnum.c
+ *
+ *  Adapted from MIT Kerberos 5-1.2.1 lib/gssapi/krb5/util_seqnum.c
+ *
+ *  Copyright (c) 2000 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  Andy Adamson   <andros@umich.edu>
+ */
+
+/*
+ * Copyright 1993 by OpenVision Technologies, Inc.
+ * 
+ * Permission to use, copy, modify, distribute, and sell this software
+ * and its documentation for any purpose is hereby granted without fee,
+ * provided that the above copyright notice appears in all copies and
+ * that both that copyright notice and this permission notice appear in
+ * supporting documentation, and that the name of OpenVision not be used
+ * in advertising or publicity pertaining to distribution of the software
+ * without specific, written prior permission. OpenVision makes no
+ * representations about the suitability of this software for any
+ * purpose.  It is provided "as is" without express or implied warranty.
+ * 
+ * OPENVISION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL OPENVISION BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
+ * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
+ * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/sunrpc/gss_krb5.h>
+#include <linux/crypto.h>
+
+#ifdef RPC_DEBUG
+# define RPCDBG_FACILITY        RPCDBG_AUTH
+#endif
+
+s32
+krb5_make_seq_num(struct crypto_tfm *key,
+		int direction,
+		s32 seqnum,
+		unsigned char *cksum, unsigned char *buf)
+{
+	unsigned char plain[8];
+
+	plain[0] = (unsigned char) (seqnum & 0xff);
+	plain[1] = (unsigned char) ((seqnum >> 8) & 0xff);
+	plain[2] = (unsigned char) ((seqnum >> 16) & 0xff);
+	plain[3] = (unsigned char) ((seqnum >> 24) & 0xff);
+
+	plain[4] = direction;
+	plain[5] = direction;
+	plain[6] = direction;
+	plain[7] = direction;
+
+	return krb5_encrypt(key, cksum, plain, buf, 8);
+}
+
+s32
+krb5_get_seq_num(struct crypto_tfm *key,
+	       unsigned char *cksum,
+	       unsigned char *buf,
+	       int *direction, s32 * seqnum)
+{
+	s32 code;
+	unsigned char plain[8];
+
+	dprintk("krb5_get_seq_num: \n");
+
+	if ((code = krb5_decrypt(key, cksum, buf, plain, 8)))
+		return code;
+
+	if ((plain[4] != plain[5]) || (plain[4] != plain[6])
+				   || (plain[4] != plain[7]))
+		return (s32)KG_BAD_SEQ;
+
+	*direction = plain[4];
+
+	*seqnum = ((plain[0]) |
+		   (plain[1] << 8) | (plain[2] << 16) | (plain[3] << 24));
+
+	return (0);
+}
diff --git a/net/sunrpc/auth_gss/gss_krb5_unseal.c b/net/sunrpc/auth_gss/gss_krb5_unseal.c
new file mode 100644
index 000000000000..836c683777f2
--- /dev/null
+++ b/net/sunrpc/auth_gss/gss_krb5_unseal.c
@@ -0,0 +1,306 @@
+/*
+ *  linux/net/sunrpc/gss_krb5_unseal.c
+ *
+ *  Adapted from MIT Kerberos 5-1.2.1 lib/gssapi/krb5/k5unseal.c
+ *
+ *  Copyright (c) 2000 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  Andy Adamson   <andros@umich.edu>
+ */
+
+/*
+ * Copyright 1993 by OpenVision Technologies, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software
+ * and its documentation for any purpose is hereby granted without fee,
+ * provided that the above copyright notice appears in all copies and
+ * that both that copyright notice and this permission notice appear in
+ * supporting documentation, and that the name of OpenVision not be used
+ * in advertising or publicity pertaining to distribution of the software
+ * without specific, written prior permission. OpenVision makes no
+ * representations about the suitability of this software for any
+ * purpose.  It is provided "as is" without express or implied warranty.
+ *
+ * OPENVISION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL OPENVISION BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
+ * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
+ * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Copyright (C) 1998 by the FundsXpress, INC.
+ *
+ * All rights reserved.
+ *
+ * Export of this software from the United States of America may require
+ * a specific license from the United States Government.  It is the
+ * responsibility of any person or organization contemplating export to
+ * obtain such a license before exporting.
+ *
+ * WITHIN THAT CONSTRAINT, permission to use, copy, modify, and
+ * distribute this software and its documentation for any purpose and
+ * without fee is hereby granted, provided that the above copyright
+ * notice appear in all copies and that both that copyright notice and
+ * this permission notice appear in supporting documentation, and that
+ * the name of FundsXpress. not be used in advertising or publicity pertaining
+ * to distribution of the software without specific, written prior
+ * permission.  FundsXpress makes no representations about the suitability of
+ * this software for any purpose.  It is provided "as is" without express
+ * or implied warranty.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
+ * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/jiffies.h>
+#include <linux/sunrpc/gss_krb5.h>
+#include <linux/crypto.h>
+
+#ifdef RPC_DEBUG
+# define RPCDBG_FACILITY        RPCDBG_AUTH
+#endif
+
+
+/* message_buffer is an input if MIC and an output if WRAP. */
+
+u32
+krb5_read_token(struct krb5_ctx *ctx,
+		struct xdr_netobj *read_token,
+		struct xdr_netobj *message_buffer,
+		int *qop_state, int toktype)
+{
+	s32			code;
+	int			tmsglen = 0;
+	int			conflen = 0;
+	int			signalg;
+	int			sealalg;
+	struct xdr_netobj	token = {.len = 0, .data = NULL};
+	s32			checksum_type;
+	struct xdr_netobj	cksum;
+	struct xdr_netobj	md5cksum = {.len = 0, .data = NULL};
+	struct xdr_netobj	plaind;
+	char			*data_ptr;
+	s32			now;
+	unsigned char		*plain = NULL;
+	int			cksum_len = 0;
+	int			plainlen = 0;
+	int			direction;
+	s32			seqnum;
+	unsigned char		*ptr = (unsigned char *)read_token->data;
+	int			bodysize;
+	u32			ret = GSS_S_DEFECTIVE_TOKEN;
+
+	dprintk("RPC: krb5_read_token\n");
+
+	if (g_verify_token_header((struct xdr_netobj *) &ctx->mech_used,
+					&bodysize, &ptr, toktype,
+					read_token->len))
+		goto out;
+
+	if (toktype == KG_TOK_WRAP_MSG) {
+		message_buffer->len = 0;
+		message_buffer->data = NULL;
+	}
+
+	/* get the sign and seal algorithms */
+
+	signalg = ptr[0] + (ptr[1] << 8);
+	sealalg = ptr[2] + (ptr[3] << 8);
+
+	/* Sanity checks */
+
+	if ((ptr[4] != 0xff) || (ptr[5] != 0xff))
+		goto out;
+
+	if (((toktype != KG_TOK_WRAP_MSG) && (sealalg != 0xffff)) ||
+	    ((toktype == KG_TOK_WRAP_MSG) && (sealalg == 0xffff)))
+		goto out;
+
+	/* in the current spec, there is only one valid seal algorithm per
+	   key type, so a simple comparison is ok */
+
+	if ((toktype == KG_TOK_WRAP_MSG) && !(sealalg == ctx->sealalg))
+		goto out;
+
+	/* there are several mappings of seal algorithms to sign algorithms,
+	   but few enough that we can try them all. */
+
+	if ((ctx->sealalg == SEAL_ALG_NONE && signalg > 1) ||
+	    (ctx->sealalg == SEAL_ALG_1 && signalg != SGN_ALG_3) ||
+	    (ctx->sealalg == SEAL_ALG_DES3KD &&
+	     signalg != SGN_ALG_HMAC_SHA1_DES3_KD))
+		goto out;
+
+	/* starting with a single alg */
+	switch (signalg) {
+	case SGN_ALG_DES_MAC_MD5:
+		cksum_len = 8;
+		break;
+	default:
+		goto out;
+	}
+
+	if (toktype == KG_TOK_WRAP_MSG)
+		tmsglen = bodysize - (14 + cksum_len);
+
+	/* get the token parameters */
+
+	/* decode the message, if WRAP */
+
+	if (toktype == KG_TOK_WRAP_MSG) {
+		dprintk("RPC: krb5_read_token KG_TOK_WRAP_MSG\n");
+
+		plain = kmalloc(tmsglen, GFP_KERNEL);
+		ret = GSS_S_FAILURE;
+		if (plain ==  NULL)
+			goto out;
+
+		code = krb5_decrypt(ctx->enc, NULL,
+				   ptr + 14 + cksum_len, plain,
+				   tmsglen);
+		if (code)
+			goto out;
+
+		plainlen = tmsglen;
+
+		conflen = crypto_tfm_alg_blocksize(ctx->enc);
+		token.len = tmsglen - conflen - plain[tmsglen - 1];
+
+		if (token.len) {
+			token.data = kmalloc(token.len, GFP_KERNEL);
+			if (token.data == NULL)
+				goto out;
+			memcpy(token.data, plain + conflen, token.len);
+		}
+
+	} else if (toktype == KG_TOK_MIC_MSG) {
+		dprintk("RPC: krb5_read_token KG_TOK_MIC_MSG\n");
+		token = *message_buffer;
+		plain = token.data;
+		plainlen = token.len;
+	} else {
+		token.len = 0;
+		token.data = NULL;
+		plain = token.data;
+		plainlen = token.len;
+	}
+
+	dprintk("RPC krb5_read_token: token.len %d plainlen %d\n", token.len,
+		plainlen);
+
+	/* compute the checksum of the message */
+
+	/* initialize the the cksum */
+	switch (signalg) {
+	case SGN_ALG_DES_MAC_MD5:
+		checksum_type = CKSUMTYPE_RSA_MD5;
+		break;
+	default:
+		ret = GSS_S_DEFECTIVE_TOKEN;
+		goto out;
+	}
+
+	switch (signalg) {
+	case SGN_ALG_DES_MAC_MD5:
+		dprintk("RPC krb5_read_token SGN_ALG_DES_MAC_MD5\n");
+		/* compute the checksum of the message.
+		 * 8 = bytes of token body to be checksummed according to spec 
+		 */
+
+		data_ptr = kmalloc(8 + plainlen, GFP_KERNEL);
+		ret = GSS_S_FAILURE;
+		if (!data_ptr)
+			goto out;
+
+		memcpy(data_ptr, ptr - 2, 8);
+		memcpy(data_ptr + 8, plain, plainlen);
+
+		plaind.len = 8 + plainlen;
+		plaind.data = data_ptr;
+
+		code = krb5_make_checksum(checksum_type,
+					    &plaind, &md5cksum);
+
+		kfree(data_ptr);
+
+		if (code)
+			goto out;
+
+		code = krb5_encrypt(ctx->seq, NULL, md5cksum.data,
+					  md5cksum.data, 16);
+		if (code)
+			goto out;
+
+		if (signalg == 0)
+			cksum.len = 8;
+		else
+			cksum.len = 16;
+		cksum.data = md5cksum.data + 16 - cksum.len;
+
+		dprintk
+		    ("RPC: krb5_read_token: memcmp digest cksum.len %d:\n",
+		     cksum.len);
+		dprintk("          md5cksum.data\n");
+		print_hexl((u32 *) md5cksum.data, 16, 0);
+		dprintk("          cksum.data:\n");
+		print_hexl((u32 *) cksum.data, cksum.len, 0);
+		{
+			u32 *p;
+
+			(u8 *) p = ptr + 14;
+			dprintk("          ptr+14:\n");
+			print_hexl(p, cksum.len, 0);
+		}
+
+		code = memcmp(cksum.data, ptr + 14, cksum.len);
+		break;
+	default:
+		ret = GSS_S_DEFECTIVE_TOKEN;
+		goto out;
+	}
+
+	ret = GSS_S_BAD_SIG;
+	if (code)
+		goto out;
+
+	/* it got through unscathed.  Make sure the context is unexpired */
+
+	if (toktype == KG_TOK_WRAP_MSG)
+		*message_buffer = token;
+
+	if (qop_state)
+		*qop_state = GSS_C_QOP_DEFAULT;
+
+	now = jiffies;
+
+	ret = GSS_S_CONTEXT_EXPIRED;
+	if (now > ctx->endtime)
+		goto out;
+
+	/* do sequencing checks */
+
+	ret = GSS_S_BAD_SIG;
+	if ((code = krb5_get_seq_num(ctx->seq, ptr + 14, ptr + 6, &direction,
+				   &seqnum)))
+		goto out;
+
+	if ((ctx->initiate && direction != 0xff) ||
+	    (!ctx->initiate && direction != 0))
+		goto out;
+
+	ret = GSS_S_COMPLETE;
+out:
+	if (md5cksum.data) kfree(md5cksum.data);
+	if (toktype == KG_TOK_WRAP_MSG) {
+		if (plain) kfree(plain);
+		if (ret && token.data) kfree(token.data);
+	}
+	return ret;
+}
-- 
cgit v1.2.3


From d97c33778dfa512c4aab4a72d3af49aa308c09a8 Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@brodo.de>
Date: Sun, 12 Jan 2003 22:03:50 -0800
Subject: [PATCH] cpufreq: per-CPU initialization

Allow for per-CPU initialization of CPUfreq. Therefore, it's not
necessary any longer to kmalloc the per-CPU policy struct. To use
this, cpufreq_driver->policy has to be set to NULL. Of course,
cpufreq_driver->init is needed then, which is the appropriate function
for CPU initialization. cpufreq_driver->exit is available for cleanup.

All existing drivers continue to work without any changes, just for
clarity ->init and ->exit are set to NULL, and the names accordingly.
---
 arch/arm/mach-integrator/cpu.c             |   1 +
 arch/arm/mach-sa1100/cpu-sa1100.c          |   1 +
 arch/arm/mach-sa1100/cpu-sa1110.c          |   1 +
 arch/i386/kernel/cpu/cpufreq/elanfreq.c    |   3 +
 arch/i386/kernel/cpu/cpufreq/longhaul.c    |   3 +
 arch/i386/kernel/cpu/cpufreq/longrun.c     |   3 +
 arch/i386/kernel/cpu/cpufreq/p4-clockmod.c |   3 +
 arch/i386/kernel/cpu/cpufreq/powernow-k6.c |   3 +
 arch/i386/kernel/cpu/cpufreq/speedstep.c   |   3 +
 drivers/acpi/processor.c                   |   3 +
 include/linux/cpufreq.h                    |  14 ++-
 kernel/cpufreq.c                           | 178 ++++++++++++++++-------------
 12 files changed, 134 insertions(+), 82 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-integrator/cpu.c b/arch/arm/mach-integrator/cpu.c
index e59d1abf5f4f..2d71baa76351 100644
--- a/arch/arm/mach-integrator/cpu.c
+++ b/arch/arm/mach-integrator/cpu.c
@@ -166,6 +166,7 @@ static struct cpufreq_driver integrator_driver = {
 	.verify		= integrator_verify_speed,
 	.setpolicy	= integrator_set_policy,
 	.policy		= &integrator_policy,
+	.name		= "integrator",
 };
 #endif
 
diff --git a/arch/arm/mach-sa1100/cpu-sa1100.c b/arch/arm/mach-sa1100/cpu-sa1100.c
index 31378afc1d3f..b4226692b902 100644
--- a/arch/arm/mach-sa1100/cpu-sa1100.c
+++ b/arch/arm/mach-sa1100/cpu-sa1100.c
@@ -214,6 +214,7 @@ static struct cpufreq_driver sa1100_driver = {
 	.verify		= sa11x0_verify_speed,
 	.setpolicy	= sa1100_setspeed,
 	.policy		= &sa1100_policy,
+	.name		= "sa1100",
 };
 
 static int __init sa1100_dram_init(void)
diff --git a/arch/arm/mach-sa1100/cpu-sa1110.c b/arch/arm/mach-sa1100/cpu-sa1110.c
index f46c17063182..2ab03f867d86 100644
--- a/arch/arm/mach-sa1100/cpu-sa1110.c
+++ b/arch/arm/mach-sa1100/cpu-sa1110.c
@@ -309,6 +309,7 @@ static struct cpufreq_driver sa1110_driver = {
 	.verify		 = sa11x0_verify_speed,
 	.setpolicy	 = sa1110_setspeed,
 	.policy		 = &sa1110_policy,
+	.name		= "sa1110",
 };
 
 static int __init sa1110_clk_init(void)
diff --git a/arch/i386/kernel/cpu/cpufreq/elanfreq.c b/arch/i386/kernel/cpu/cpufreq/elanfreq.c
index 1917510f0338..c1c7aa075864 100644
--- a/arch/i386/kernel/cpu/cpufreq/elanfreq.c
+++ b/arch/i386/kernel/cpu/cpufreq/elanfreq.c
@@ -260,6 +260,9 @@ static int __init elanfreq_init(void)
 
 	driver->verify        = &elanfreq_verify;
 	driver->setpolicy     = &elanfreq_setpolicy;
+	driver->init = NULL;
+	driver->exit = NULL;
+	strncpy(driver->name, "elanfreq", CPUFREQ_NAME_LEN);
 
 	driver->policy[0].cpu    = 0;
 	ret = cpufreq_frequency_table_cpuinfo(&driver->policy[0], &elanfreq_table[0]);
diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.c b/arch/i386/kernel/cpu/cpufreq/longhaul.c
index d6f3f66d6644..b0af80343bed 100644
--- a/arch/i386/kernel/cpu/cpufreq/longhaul.c
+++ b/arch/i386/kernel/cpu/cpufreq/longhaul.c
@@ -771,6 +771,9 @@ static int __init longhaul_init (void)
 
 	driver->verify    = &longhaul_verify;
 	driver->setpolicy = &longhaul_setpolicy;
+	driver->init = NULL;
+	driver->exit = NULL;
+	strncpy(driver->name, "longhaul", CPUFREQ_NAME_LEN);
 
 	driver->policy[0].cpu = 0;
 	driver->policy[0].min = (unsigned int) lowest_speed;
diff --git a/arch/i386/kernel/cpu/cpufreq/longrun.c b/arch/i386/kernel/cpu/cpufreq/longrun.c
index 69b4fdb42751..b8a854fb0f2f 100644
--- a/arch/i386/kernel/cpu/cpufreq/longrun.c
+++ b/arch/i386/kernel/cpu/cpufreq/longrun.c
@@ -251,6 +251,9 @@ static int __init longrun_init(void)
 	driver->policy[0].cpuinfo.min_freq = longrun_low_freq;
 	driver->policy[0].cpuinfo.max_freq = longrun_high_freq;
 	driver->policy[0].cpuinfo.transition_latency = CPUFREQ_ETERNAL;
+	driver->init = NULL;
+	driver->exit = NULL;
+	strncpy(driver->name, "longrun", CPUFREQ_NAME_LEN);
 
 	longrun_get_policy(&driver->policy[0]);
 
diff --git a/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c b/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c
index e26c7918045e..c64aa0ac9cad 100644
--- a/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c
+++ b/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c
@@ -240,6 +240,9 @@ static int __init cpufreq_p4_init(void)
 
 	driver->verify        = &cpufreq_p4_verify;
 	driver->setpolicy     = &cpufreq_p4_setpolicy;
+	driver->init = NULL;
+	driver->exit = NULL;
+	strncpy(driver->name, "p4-clockmod", CPUFREQ_NAME_LEN);
 
 	for (i=0;i<NR_CPUS;i++) {
 		driver->policy[i].cpu    = i;
diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k6.c b/arch/i386/kernel/cpu/cpufreq/powernow-k6.c
index 88085123ee3b..af01528ce993 100644
--- a/arch/i386/kernel/cpu/cpufreq/powernow-k6.c
+++ b/arch/i386/kernel/cpu/cpufreq/powernow-k6.c
@@ -184,6 +184,9 @@ static int __init powernow_k6_init(void)
 
 	driver->verify        = &powernow_k6_verify;
 	driver->setpolicy     = &powernow_k6_setpolicy;
+	driver->init = NULL;
+	driver->exit = NULL;
+	strncpy(driver->name, "powernow-k6", CPUFREQ_NAME_LEN);
 
 	/* cpuinfo and default policy values */
 	driver->policy[0].cpu    = 0;
diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep.c b/arch/i386/kernel/cpu/cpufreq/speedstep.c
index 97212910fcff..209938dc1021 100644
--- a/arch/i386/kernel/cpu/cpufreq/speedstep.c
+++ b/arch/i386/kernel/cpu/cpufreq/speedstep.c
@@ -690,6 +690,9 @@ static int __init speedstep_init(void)
 
 	driver->verify      = &speedstep_verify;
 	driver->setpolicy   = &speedstep_setpolicy;
+	driver->init = NULL;
+	driver->exit = NULL;
+	strncpy(driver->name, "speedstep", CPUFREQ_NAME_LEN);
 
 	driver->policy[0].cpuinfo.transition_latency = CPUFREQ_ETERNAL;
 
diff --git a/drivers/acpi/processor.c b/drivers/acpi/processor.c
index 9c2bc028dce4..ebdd6f25a2d7 100644
--- a/drivers/acpi/processor.c
+++ b/drivers/acpi/processor.c
@@ -1823,6 +1823,9 @@ acpi_cpufreq_init (
 
 	driver->verify      = &acpi_cpufreq_verify;
 	driver->setpolicy   = &acpi_cpufreq_setpolicy;
+	driver->init        = NULL;
+	driver->exit        = NULL;
+	strncpy(driver->name, "acpi-processor", CPUFREQ_NAME_LEN);
 
 	for (i=0;i<NR_CPUS;i++) {
 		driver->policy[i].cpu    = pr->id;
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 12c59ed7d695..9226268ef4ef 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -109,19 +109,29 @@ static inline unsigned long cpufreq_scale(unsigned long old, u_int div, u_int mu
  *                      CPUFREQ DRIVER INTERFACE                     *
  *********************************************************************/
 
+#define CPUFREQ_NAME_LEN 16
+
 struct cpufreq_driver {
 	/* needed by all drivers */
 	int     (*verify)       (struct cpufreq_policy *policy);
 	int     (*setpolicy)    (struct cpufreq_policy *policy);
 	struct cpufreq_policy   *policy;
+	char           		name[CPUFREQ_NAME_LEN];
+	/* optional, for the moment */
+	int     (*init)        (struct cpufreq_policy *policy);
+	int     (*exit)        (struct cpufreq_policy *policy);
 	/* 2.4. compatible API */
 #ifdef CONFIG_CPU_FREQ_24_API
 	unsigned int            cpu_cur_freq[NR_CPUS];
 #endif
 };
 
-int cpufreq_register(struct cpufreq_driver *driver_data);
-int cpufreq_unregister(void);
+int cpufreq_register_driver(struct cpufreq_driver *driver_data);
+int cpufreq_unregister_driver(struct cpufreq_driver *driver_data);
+/* deprecated */
+#define cpufreq_register(x)   cpufreq_register_driver(x)
+#define cpufreq_unregister(x) cpufreq_unregister_driver(NULL)
+
 
 void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state);
 
diff --git a/kernel/cpufreq.c b/kernel/cpufreq.c
index f2e15a43e39d..9f13db3b9233 100644
--- a/kernel/cpufreq.c
+++ b/kernel/cpufreq.c
@@ -57,18 +57,6 @@ static struct notifier_block    *cpufreq_transition_notifier_list;
 static DECLARE_MUTEX            (cpufreq_notifier_sem);
 
 
-/**
- * The cpufreq default policy. Can be set by a "cpufreq=..." command
- * line option.
- */
-static struct cpufreq_policy default_policy = {
-	.cpu    = CPUFREQ_ALL_CPUS,
-	.min    = 0,
-	.max    = 0,
-	.policy = 0,
-};
-
-
 #ifdef CONFIG_CPU_FREQ_24_API
 /**
  * A few values needed by the 2.4.-compatible API
@@ -234,6 +222,25 @@ store_scaling_governor (struct device *dev, const char *buf, size_t count)
 }
 
 
+/**
+ * show_scaling_governor - show the current policy for the specified CPU
+ */
+static ssize_t show_scaling_driver (struct device *dev, char *buf)
+{
+	char value[CPUFREQ_NAME_LEN];
+
+	if (!dev)
+		return 0;
+
+	down(&cpufreq_driver_sem);
+	if (cpufreq_driver)
+		strncpy(value, cpufreq_driver->name, CPUFREQ_NAME_LEN);
+	up(&cpufreq_driver_sem);
+
+	return sprintf(buf, "%s\n", value);
+}
+
+
 /**
  * cpufreq_per_cpu_attr_ro - read-only cpufreq per-CPU file
  */
@@ -258,6 +265,7 @@ cpufreq_per_cpu_attr_rw(scaling_min_freq, min);
 cpufreq_per_cpu_attr_rw(scaling_max_freq, max);
 
 static DEVICE_ATTR(scaling_governor, (S_IRUGO | S_IWUSR), show_scaling_governor, store_scaling_governor);
+static DEVICE_ATTR(scaling_driver, S_IRUGO, show_scaling_driver, NULL);
 
 
 /**
@@ -269,6 +277,7 @@ static int cpufreq_add_dev (struct device * dev)
 {
 	unsigned int cpu = to_cpu_nr(dev);
 	int ret = 0;
+	struct cpufreq_policy policy;
 
 	down(&cpufreq_driver_sem);
 	if (!cpufreq_driver) {
@@ -276,6 +285,37 @@ static int cpufreq_add_dev (struct device * dev)
 		return -EINVAL;
 	}
 
+	/* call driver. From then on the cpufreq must be able
+	 * to accept all calls to ->verify and ->setpolicy for this CPU
+	 */
+	cpufreq_driver->policy[cpu].cpu = cpu;
+	if (cpufreq_driver->init) {
+		ret = cpufreq_driver->init(&cpufreq_driver->policy[cpu]);
+		if (ret) {
+			up(&cpufreq_driver_sem);
+			return -ENODEV;
+		}
+	}
+
+	/* set default policy on this CPU */
+	policy.policy = cpufreq_driver->policy[cpu].policy;
+	policy.min    = cpufreq_driver->policy[cpu].min;
+	policy.max    = cpufreq_driver->policy[cpu].max;
+	policy.cpu    = cpu;
+
+	up(&cpufreq_driver_sem);
+	ret = cpufreq_set_policy(&policy);
+	if (ret)
+		return -EINVAL;
+	down(&cpufreq_driver_sem);
+
+	/* 2.4-API init for this CPU */
+#ifdef CONFIG_CPU_FREQ_24_API
+	cpu_min_freq[cpu] = cpufreq_driver->policy[cpu].cpuinfo.min_freq;
+	cpu_max_freq[cpu] = cpufreq_driver->policy[cpu].cpuinfo.max_freq;
+	cpu_cur_freq[cpu] = cpufreq_driver->cpu_cur_freq[cpu];
+#endif
+
 	/* prepare interface data */
 	cpufreq_driver->policy[cpu].intf.dev  = dev;
 	cpufreq_driver->policy[cpu].intf.intf = &cpufreq_interface;
@@ -297,6 +337,7 @@ static int cpufreq_add_dev (struct device * dev)
 	device_create_file (dev, &dev_attr_scaling_min_freq);
 	device_create_file (dev, &dev_attr_scaling_max_freq);
 	device_create_file (dev, &dev_attr_scaling_governor);
+	device_create_file (dev, &dev_attr_scaling_driver);
 
 	up(&cpufreq_driver_sem);
 	return ret;
@@ -312,12 +353,17 @@ static int cpufreq_add_dev (struct device * dev)
 static int cpufreq_remove_dev (struct intf_data *intf)
 {
 	struct device * dev = intf->dev;
+	unsigned int cpu = to_cpu_nr(dev);
+
+	if (cpufreq_driver->exit)
+		cpufreq_driver->exit(&cpufreq_driver->policy[cpu]);
 
 	device_remove_file (dev, &dev_attr_cpuinfo_min_freq);
 	device_remove_file (dev, &dev_attr_cpuinfo_max_freq);
 	device_remove_file (dev, &dev_attr_scaling_min_freq);
 	device_remove_file (dev, &dev_attr_scaling_max_freq);
 	device_remove_file (dev, &dev_attr_scaling_governor);
+	device_remove_file (dev, &dev_attr_scaling_governor);
 
 	return 0;
 }
@@ -402,20 +448,6 @@ scan_policy:
 }
 
 
-/*
- * cpufreq command line parameter.  Must be hard values (kHz)
- *  cpufreq=1000000:2000000:PERFORMANCE   
- * to set the default CPUFreq policy.
- */
-static int __init cpufreq_setup(char *str)
-{
-	cpufreq_parse_policy(str, &default_policy);
-	default_policy.cpu = CPUFREQ_ALL_CPUS;
-	return 1;
-}
-__setup("cpufreq=", cpufreq_setup);
-
-
 /**
  * cpufreq_proc_read - read /proc/cpufreq
  *
@@ -1203,19 +1235,18 @@ EXPORT_SYMBOL_GPL(cpufreq_notify_transition);
  *********************************************************************/
 
 /**
- * cpufreq_register - register a CPU Frequency driver
- * @driver_data: A struct cpufreq_driver containing the values submitted by the CPU Frequency driver.
+ * cpufreq_register_driver - register a CPU Frequency driver
+ * @driver_data: A struct cpufreq_driver containing the values#
+ * submitted by the CPU Frequency driver.
  *
  *   Registers a CPU Frequency driver to this core code. This code 
  * returns zero on success, -EBUSY when another driver got here first
  * (and isn't unregistered in the meantime). 
  *
  */
-int cpufreq_register(struct cpufreq_driver *driver_data)
+int cpufreq_register_driver(struct cpufreq_driver *driver_data)
 {
-	unsigned int            ret;
-	unsigned int            i;
-	struct cpufreq_policy   policy;
+	int ret = 0;
 
 	if (cpufreq_driver)
 		return -EBUSY;
@@ -1225,53 +1256,27 @@ int cpufreq_register(struct cpufreq_driver *driver_data)
 		return -EINVAL;
 
 	down(&cpufreq_driver_sem);
-	cpufreq_driver        = driver_data;
-	
-	/* check for a default policy - if it exists, use it on _all_ CPUs*/
-	for (i=0; i<NR_CPUS; i++)
-	{
-		if (default_policy.policy)
-			cpufreq_driver->policy[i].policy = default_policy.policy;
-		if (default_policy.min)
-			cpufreq_driver->policy[i].min = default_policy.min;
-		if (default_policy.max)
-			cpufreq_driver->policy[i].max = default_policy.max;
-	}
 
-	/* set default policy on all CPUs. Must be called per-CPU and not
-	 * with CPUFREQ_ALL_CPUs as there might be no common policy for all
-	 * CPUs (UltraSPARC etc.)
-	 */
-	for (i=0; i<NR_CPUS; i++)
-	{
-		policy.policy = cpufreq_driver->policy[i].policy;
-		policy.min    = cpufreq_driver->policy[i].min;
-		policy.max    = cpufreq_driver->policy[i].max;
-		policy.cpu    = i;
-		up(&cpufreq_driver_sem);
-		ret = cpufreq_set_policy(&policy);
-		down(&cpufreq_driver_sem);
-		if (ret) {
-			cpufreq_driver = NULL;
+	cpufreq_driver = driver_data;
+
+	if (!cpufreq_driver->policy) {
+		/* then we need per-CPU init */
+		if (!cpufreq_driver->init) {
 			up(&cpufreq_driver_sem);
-			return ret;
+			return -EINVAL;
+		}
+		cpufreq_driver->policy = kmalloc(NR_CPUS * sizeof(struct cpufreq_policy), GFP_KERNEL);
+		if (!cpufreq_driver->policy) {
+			up(&cpufreq_driver_sem);
+			return -ENOMEM;
 		}
 	}
-
+	
 	up(&cpufreq_driver_sem);
 
 	cpufreq_proc_init();
 
 #ifdef CONFIG_CPU_FREQ_24_API
- 	down(&cpufreq_driver_sem);
-	for (i=0; i<NR_CPUS; i++) 
-	{
-		cpu_min_freq[i] = driver_data->policy[i].cpuinfo.min_freq;
-		cpu_max_freq[i] = driver_data->policy[i].cpuinfo.max_freq;
-		cpu_cur_freq[i] = driver_data->cpu_cur_freq[i];
-	}
-	up(&cpufreq_driver_sem);
-
 	cpufreq_sysctl_init();
 #endif
 
@@ -1279,40 +1284,53 @@ int cpufreq_register(struct cpufreq_driver *driver_data)
 
 	return ret;
 }
-EXPORT_SYMBOL_GPL(cpufreq_register);
+EXPORT_SYMBOL_GPL(cpufreq_register_driver);
 
 
 /**
- * cpufreq_unregister - unregister the current CPUFreq driver
+ * cpufreq_unregister_driver - unregister the current CPUFreq driver
  *
  *    Unregister the current CPUFreq driver. Only call this if you have 
  * the right to do so, i.e. if you have succeeded in initialising before!
  * Returns zero if successful, and -EINVAL if the cpufreq_driver is
  * currently not initialised.
  */
-int cpufreq_unregister(void)
+int cpufreq_unregister_driver(struct cpufreq_driver *driver)
 {
 	down(&cpufreq_driver_sem);
 
-	if (!cpufreq_driver) {
+	if (!cpufreq_driver || 
+	    ((driver != cpufreq_driver) && (driver != NULL))) { /* compat */
 		up(&cpufreq_driver_sem);
 		return -EINVAL;
 	}
 
-	interface_unregister(&cpufreq_interface);
-	cpufreq_driver = NULL;
-
-	up(&cpufreq_driver_sem);
-
 	cpufreq_proc_exit();
 
 #ifdef CONFIG_CPU_FREQ_24_API
 	cpufreq_sysctl_exit();
 #endif
 
+	/* remove this workaround as soon as interface_add_data works */
+	{
+		unsigned int i;
+		for (i=0; i<NR_CPUS; i++) {
+			if (cpu_online(i)) 
+				cpufreq_remove_dev(&cpufreq_driver->policy[i].intf);
+		}
+	}
+
+	interface_unregister(&cpufreq_interface);
+
+	if (driver)
+		kfree(cpufreq_driver->policy);
+	cpufreq_driver = NULL;
+
+	up(&cpufreq_driver_sem);
+
 	return 0;
 }
-EXPORT_SYMBOL_GPL(cpufreq_unregister);
+EXPORT_SYMBOL_GPL(cpufreq_unregister_driver);
 
 
 #ifdef CONFIG_PM
-- 
cgit v1.2.3


From 9e5350557e7ba76771c66155132c117a69a0508e Mon Sep 17 00:00:00 2001
From: Russell King <rmk@arm.linux.org.uk>
Date: Sun, 12 Jan 2003 23:24:04 -0800
Subject: [PATCH] use <asm/bug.h> for BUG() defines

This patch moves BUG() and PAGE_BUG() from asm/page.h into asm/bug.h.

We also fix up linux/dcache.h, which included asm/page.h for the sole
purpose of getting the BUG() definition.

Since linux/kernel.h and linux/smp.h make use of BUG(), asm/bug.h is
included there as well.

In addition, linux/jbd.h did not contain a clear path with which to
obtain the archtecture BUG() definition, but did contain its own
definition.
---
 include/asm-alpha/bug.h     | 14 ++++++++++++++
 include/asm-alpha/page.h    |  8 --------
 include/asm-arm/bug.h       | 20 ++++++++++++++++++++
 include/asm-arm/page.h      | 15 ---------------
 include/asm-cris/bug.h      | 12 ++++++++++++
 include/asm-cris/page.h     | 12 ------------
 include/asm-i386/bug.h      | 27 +++++++++++++++++++++++++++
 include/asm-i386/page.h     | 21 ---------------------
 include/asm-ia64/bug.h      | 12 ++++++++++++
 include/asm-ia64/page.h     |  8 --------
 include/asm-m68k/bug.h      | 28 ++++++++++++++++++++++++++++
 include/asm-m68k/page.h     | 22 ----------------------
 include/asm-mips/bug.h      |  8 ++++++++
 include/asm-mips/page.h     |  3 ---
 include/asm-mips64/bug.h    |  7 +++++++
 include/asm-mips64/page.h   |  3 ---
 include/asm-parisc/bug.h    | 18 ++++++++++++++++++
 include/asm-parisc/page.h   | 14 --------------
 include/asm-ppc/bug.h       | 21 +++++++++++++++++++++
 include/asm-ppc/page.h      |  3 ---
 include/asm-ppc/processor.h | 13 -------------
 include/asm-s390/bug.h      | 13 +++++++++++++
 include/asm-s390/page.h     |  9 ---------
 include/asm-s390x/bug.h     | 13 +++++++++++++
 include/asm-s390x/page.h    |  9 ---------
 include/asm-sh/bug.h        | 16 ++++++++++++++++
 include/asm-sh/page.h       | 12 ------------
 include/asm-sparc/bug.h     | 25 +++++++++++++++++++++++++
 include/asm-sparc/page.h    | 20 --------------------
 include/asm-sparc64/bug.h   | 17 +++++++++++++++++
 include/asm-sparc64/page.h  | 12 ------------
 include/linux/dcache.h      |  2 +-
 include/linux/jbd.h         |  9 +--------
 include/linux/kernel.h      |  1 +
 include/linux/smp.h         |  2 +-
 35 files changed, 255 insertions(+), 194 deletions(-)
 create mode 100644 include/asm-alpha/bug.h
 create mode 100644 include/asm-arm/bug.h
 create mode 100644 include/asm-cris/bug.h
 create mode 100644 include/asm-i386/bug.h
 create mode 100644 include/asm-ia64/bug.h
 create mode 100644 include/asm-m68k/bug.h
 create mode 100644 include/asm-mips/bug.h
 create mode 100644 include/asm-mips64/bug.h
 create mode 100644 include/asm-parisc/bug.h
 create mode 100644 include/asm-ppc/bug.h
 create mode 100644 include/asm-s390/bug.h
 create mode 100644 include/asm-s390x/bug.h
 create mode 100644 include/asm-sh/bug.h
 create mode 100644 include/asm-sparc/bug.h
 create mode 100644 include/asm-sparc64/bug.h

(limited to 'include/linux')

diff --git a/include/asm-alpha/bug.h b/include/asm-alpha/bug.h
new file mode 100644
index 000000000000..0ff718cfdf53
--- /dev/null
+++ b/include/asm-alpha/bug.h
@@ -0,0 +1,14 @@
+#ifndef _ALPHA_BUG_H
+#define _ALPHA_BUG_H
+
+#include <asm/pal.h>
+
+/* ??? Would be nice to use .gprel32 here, but we can't be sure that the
+   function loaded the GP, so this could fail in modules.  */
+#define BUG() \
+  __asm__ __volatile__("call_pal %0  # bugchk\n\t"".long %1\n\t.8byte %2" \
+		       : : "i" (PAL_bugchk), "i"(__LINE__), "i"(__FILE__))
+
+#define PAGE_BUG(page)	BUG()
+
+#endif
diff --git a/include/asm-alpha/page.h b/include/asm-alpha/page.h
index 43e414a33d95..2602b3685fe5 100644
--- a/include/asm-alpha/page.h
+++ b/include/asm-alpha/page.h
@@ -59,14 +59,6 @@ typedef unsigned long pgprot_t;
 
 #endif /* STRICT_MM_TYPECHECKS */
 
-/* ??? Would be nice to use .gprel32 here, but we can't be sure that the
-   function loaded the GP, so this could fail in modules.  */
-#define BUG() \
-  __asm__ __volatile__("call_pal %0  # bugchk\n\t"".long %1\n\t.8byte %2" \
-		       : : "i" (PAL_bugchk), "i"(__LINE__), "i"(__FILE__))
-
-#define PAGE_BUG(page)	BUG()
-
 /* Pure 2^n version of get_order */
 extern __inline__ int get_order(unsigned long size)
 {
diff --git a/include/asm-arm/bug.h b/include/asm-arm/bug.h
new file mode 100644
index 000000000000..c9b6e7f6b317
--- /dev/null
+++ b/include/asm-arm/bug.h
@@ -0,0 +1,20 @@
+#ifndef _ASMARM_BUG_H
+#define _ASMARM_BUG_H
+
+#include <linux/config.h>
+
+#ifdef CONFIG_DEBUG_BUGVERBOSE
+extern volatile void __bug(const char *file, int line, void *data);
+
+/* give file/line information */
+#define BUG()		__bug(__FILE__, __LINE__, NULL)
+#define PAGE_BUG(page)	__bug(__FILE__, __LINE__, page)
+
+#else
+
+/* these just cause an oops */
+#define BUG()		(*(int *)0 = 0)
+#define PAGE_BUG(page)	(*(int *)0 = 0)
+
+#endif
+
diff --git a/include/asm-arm/page.h b/include/asm-arm/page.h
index 0b1a26ed991d..ca379e1f9184 100644
--- a/include/asm-arm/page.h
+++ b/include/asm-arm/page.h
@@ -160,21 +160,6 @@ typedef unsigned long pgprot_t;
 #ifdef __KERNEL__
 #ifndef __ASSEMBLY__
 
-#ifdef CONFIG_DEBUG_BUGVERBOSE
-extern void __bug(const char *file, int line, void *data);
-
-/* give file/line information */
-#define BUG()		__bug(__FILE__, __LINE__, NULL)
-#define PAGE_BUG(page)	__bug(__FILE__, __LINE__, page)
-
-#else
-
-/* these just cause an oops */
-#define BUG()		(*(int *)0 = 0)
-#define PAGE_BUG(page)	(*(int *)0 = 0)
-
-#endif
-
 /* Pure 2^n version of get_order */
 static inline int get_order(unsigned long size)
 {
diff --git a/include/asm-cris/bug.h b/include/asm-cris/bug.h
new file mode 100644
index 000000000000..64e1d8b01f25
--- /dev/null
+++ b/include/asm-cris/bug.h
@@ -0,0 +1,12 @@
+#ifndef _CRIS_BUG_H
+#define _CRIS_BUG_H
+
+#define BUG() do { \
+  printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \
+} while (0)
+
+#define PAGE_BUG(page) do { \
+         BUG(); \
+} while (0)
+
+#endif
diff --git a/include/asm-cris/page.h b/include/asm-cris/page.h
index 01166c2bfd7c..47c83465c2af 100644
--- a/include/asm-cris/page.h
+++ b/include/asm-cris/page.h
@@ -70,18 +70,6 @@ typedef unsigned long pgprot_t;
 #define PAGE_OFFSET		KSEG_C   /* kseg_c is mapped to physical ram */
 #endif
 
-#ifndef __ASSEMBLY__
-
-#define BUG() do { \
-  printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \
-} while (0)
-
-#define PAGE_BUG(page) do { \
-         BUG(); \
-} while (0)
-
-#endif /* __ASSEMBLY__ */
-
 /* macros to convert between really physical and virtual addresses
  * by stripping a selected bit, we can convert between KSEG_x and 0x40000000 where
  * the DRAM really resides
diff --git a/include/asm-i386/bug.h b/include/asm-i386/bug.h
new file mode 100644
index 000000000000..7a30b490e828
--- /dev/null
+++ b/include/asm-i386/bug.h
@@ -0,0 +1,27 @@
+#ifndef _I386_BUG_H
+#define _I386_BUG_H
+
+#include <linux/config.h>
+
+/*
+ * Tell the user there is some problem. Beep too, so we can
+ * see^H^H^Hhear bugs in early bootup as well!
+ * The offending file and line are encoded after the "officially
+ * undefined" opcode for parsing in the trap handler.
+ */
+
+#if 1	/* Set to zero for a slightly smaller kernel */
+#define BUG()				\
+ __asm__ __volatile__(	"ud2\n"		\
+			"\t.word %c0\n"	\
+			"\t.long %c1\n"	\
+			 : : "i" (__LINE__), "i" (__FILE__))
+#else
+#define BUG() __asm__ __volatile__("ud2\n")
+#endif
+
+#define PAGE_BUG(page) do { \
+	BUG(); \
+} while (0)
+
+#endif
diff --git a/include/asm-i386/page.h b/include/asm-i386/page.h
index 3c73ce930a0a..ebc1bf892bd3 100644
--- a/include/asm-i386/page.h
+++ b/include/asm-i386/page.h
@@ -99,27 +99,6 @@ typedef struct { unsigned long pgprot; } pgprot_t;
 
 #ifndef __ASSEMBLY__
 
-/*
- * Tell the user there is some problem. Beep too, so we can
- * see^H^H^Hhear bugs in early bootup as well!
- * The offending file and line are encoded after the "officially
- * undefined" opcode for parsing in the trap handler.
- */
-
-#if 1	/* Set to zero for a slightly smaller kernel */
-#define BUG()				\
- __asm__ __volatile__(	"ud2\n"		\
-			"\t.word %c0\n"	\
-			"\t.long %c1\n"	\
-			 : : "i" (__LINE__), "i" (__FILE__))
-#else
-#define BUG() __asm__ __volatile__("ud2\n")
-#endif
-
-#define PAGE_BUG(page) do { \
-	BUG(); \
-} while (0)
-
 /* Pure 2^n version of get_order */
 static __inline__ int get_order(unsigned long size)
 {
diff --git a/include/asm-ia64/bug.h b/include/asm-ia64/bug.h
new file mode 100644
index 000000000000..085cdb70c85e
--- /dev/null
+++ b/include/asm-ia64/bug.h
@@ -0,0 +1,12 @@
+#ifndef _ASM_IA64_BUG_H
+#define _ASM_IA64_BUG_H
+
+#if (__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)
+# define ia64_abort()	__builtin_trap()
+#else
+# define ia64_abort()	(*(volatile int *) 0 = 0)
+#endif
+#define BUG() do { printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); ia64_abort(); } while (0)
+#define PAGE_BUG(page) do { BUG(); } while (0)
+
+#endif
diff --git a/include/asm-ia64/page.h b/include/asm-ia64/page.h
index 4f956057d17f..2c78c70ee477 100644
--- a/include/asm-ia64/page.h
+++ b/include/asm-ia64/page.h
@@ -125,14 +125,6 @@ typedef union ia64_va {
 # define HUGETLB_PAGE_ORDER	(HPAGE_SHIFT - PAGE_SHIFT)
 #endif
 
-#if (__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)
-# define ia64_abort()	__builtin_trap()
-#else
-# define ia64_abort()	(*(volatile int *) 0 = 0)
-#endif
-#define BUG() do { printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); ia64_abort(); } while (0)
-#define PAGE_BUG(page) do { BUG(); } while (0)
-
 static __inline__ int
 get_order (unsigned long size)
 {
diff --git a/include/asm-m68k/bug.h b/include/asm-m68k/bug.h
new file mode 100644
index 000000000000..92c5cc5c3c09
--- /dev/null
+++ b/include/asm-m68k/bug.h
@@ -0,0 +1,28 @@
+#ifndef _M68K_BUG_H
+#define _M68K_BUG_H
+
+#include <linux/config.h>
+
+#ifdef CONFIG_DEBUG_BUGVERBOSE
+#ifndef CONFIG_SUN3
+#define BUG() do { \
+	printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \
+	asm volatile("illegal"); \
+} while (0)
+#else
+#define BUG() do { \
+	printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \
+	panic("BUG!"); \
+} while (0)
+#endif
+#else
+#define BUG() do { \
+	asm volatile("illegal"); \
+} while (0)
+#endif
+
+#define PAGE_BUG(page) do { \
+	BUG(); \
+} while (0)
+
+#endif
diff --git a/include/asm-m68k/page.h b/include/asm-m68k/page.h
index 7ca3578dfa8f..6c09c4f01e07 100644
--- a/include/asm-m68k/page.h
+++ b/include/asm-m68k/page.h
@@ -178,28 +178,6 @@ static inline void *__va(unsigned long x)
 #define virt_addr_valid(kaddr)	((void *)(kaddr) >= (void *)PAGE_OFFSET && (void *)(kaddr) < high_memory)
 #define pfn_valid(pfn)		virt_addr_valid(pfn_to_virt(pfn))
 
-#ifdef CONFIG_DEBUG_BUGVERBOSE
-#ifndef CONFIG_SUN3
-#define BUG() do { \
-	printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \
-	asm volatile("illegal"); \
-} while (0)
-#else
-#define BUG() do { \
-	printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \
-	panic("BUG!"); \
-} while (0)
-#endif
-#else
-#define BUG() do { \
-	asm volatile("illegal"); \
-} while (0)
-#endif
-
-#define PAGE_BUG(page) do { \
-	BUG(); \
-} while (0)
-
 #endif /* __ASSEMBLY__ */
 
 #define VM_DATA_DEFAULT_FLAGS	(VM_READ | VM_WRITE | VM_EXEC | \
diff --git a/include/asm-mips/bug.h b/include/asm-mips/bug.h
new file mode 100644
index 000000000000..a66c6f97c391
--- /dev/null
+++ b/include/asm-mips/bug.h
@@ -0,0 +1,8 @@
+/* $Id$ */
+#ifndef __ASM_BUG_H
+#define __ASM_BUG_H
+
+#define BUG() do { printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); *(int *)0=0; } while (0)
+#define PAGE_BUG(page) do {  BUG(); } while (0)
+
+#endif
diff --git a/include/asm-mips/page.h b/include/asm-mips/page.h
index d3fb0a1e2fde..5cd2c52cb14f 100644
--- a/include/asm-mips/page.h
+++ b/include/asm-mips/page.h
@@ -20,9 +20,6 @@
 
 #ifndef _LANGUAGE_ASSEMBLY
 
-#define BUG() do { printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); *(int *)0=0; } while (0)
-#define PAGE_BUG(page) do {  BUG(); } while (0)
-
 extern void (*_clear_page)(void * page);
 extern void (*_copy_page)(void * to, void * from);
 
diff --git a/include/asm-mips64/bug.h b/include/asm-mips64/bug.h
new file mode 100644
index 000000000000..3bf0774fc204
--- /dev/null
+++ b/include/asm-mips64/bug.h
@@ -0,0 +1,7 @@
+#ifndef _ASM_BUG_H
+#define _ASM_BUG_H
+
+#define BUG() do { printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); *(int *)0=0; } while (0)
+#define PAGE_BUG(page) do {  BUG(); } while (0)
+
+#endif
diff --git a/include/asm-mips64/page.h b/include/asm-mips64/page.h
index 9046c3474d43..872855cea66b 100644
--- a/include/asm-mips64/page.h
+++ b/include/asm-mips64/page.h
@@ -20,9 +20,6 @@
 
 #ifndef _LANGUAGE_ASSEMBLY
 
-#define BUG() do { printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); *(int *)0=0; } while (0)
-#define PAGE_BUG(page) do {  BUG(); } while (0)
-
 extern void (*_clear_page)(void * page);
 extern void (*_copy_page)(void * to, void * from);
 
diff --git a/include/asm-parisc/bug.h b/include/asm-parisc/bug.h
new file mode 100644
index 000000000000..d810287bb4f6
--- /dev/null
+++ b/include/asm-parisc/bug.h
@@ -0,0 +1,18 @@
+#ifndef _PARISC_BUG_H
+#define _PARISC_BUG_H
+
+/*
+ * Tell the user there is some problem. Beep too, so we can
+ * see^H^H^Hhear bugs in early bootup as well!
+ *
+ * We don't beep yet.  prumpf
+ */
+#define BUG() do { \
+	printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \
+} while (0)
+
+#define PAGE_BUG(page) do { \
+	BUG(); \
+} while (0)
+
+#endif
diff --git a/include/asm-parisc/page.h b/include/asm-parisc/page.h
index f754c241f7dd..b5db7b3da584 100644
--- a/include/asm-parisc/page.h
+++ b/include/asm-parisc/page.h
@@ -86,20 +86,6 @@ extern int npmem_ranges;
 /* to align the pointer to the (next) page boundary */
 #define PAGE_ALIGN(addr)	(((addr)+PAGE_SIZE-1)&PAGE_MASK)
 
-/*
- * Tell the user there is some problem. Beep too, so we can
- * see^H^H^Hhear bugs in early bootup as well!
- *
- * We don't beep yet.  prumpf
- */
-#define BUG() do { \
-	printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \
-} while (0)
-
-#define PAGE_BUG(page) do { \
-	BUG(); \
-} while (0)
-
 
 #define LINUX_GATEWAY_SPACE     0
 #define __PAGE_OFFSET           (0x10000000)
diff --git a/include/asm-ppc/bug.h b/include/asm-ppc/bug.h
new file mode 100644
index 000000000000..848c69a703b5
--- /dev/null
+++ b/include/asm-ppc/bug.h
@@ -0,0 +1,21 @@
+#ifndef _PPC_BUG_H
+#define _PPC_BUG_H
+
+#include <linux/config.h>
+#include <asm/system.h> /* for xmon definition */
+
+#ifdef CONFIG_XMON
+extern void xmon(struct pt_regs *);
+#define BUG() do { \
+	printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \
+	xmon(0); \
+} while (0)
+#else
+#define BUG() do { \
+	printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \
+	__asm__ __volatile__(".long 0x0"); \
+} while (0)
+#endif
+#define PAGE_BUG(page) do { BUG(); } while (0)
+
+#endif
diff --git a/include/asm-ppc/page.h b/include/asm-ppc/page.h
index e0429c2b5bc7..ff0f8f9acf62 100644
--- a/include/asm-ppc/page.h
+++ b/include/asm-ppc/page.h
@@ -14,9 +14,6 @@
 #define KERNELBASE	PAGE_OFFSET
 
 #ifndef __ASSEMBLY__
-#include <asm/processor.h>	/* for BUG definition */
-
-#define PAGE_BUG(page) do { BUG(); } while (0)
 
 #define STRICT_MM_TYPECHECKS
 
diff --git a/include/asm-ppc/processor.h b/include/asm-ppc/processor.h
index 6c7193953195..b958da9e3c13 100644
--- a/include/asm-ppc/processor.h
+++ b/include/asm-ppc/processor.h
@@ -718,19 +718,6 @@ extern inline void prefetchw(const void *x)
 
 #define spin_lock_prefetch(x)	prefetchw(x)
 
-#ifdef CONFIG_XMON
-extern void xmon(struct pt_regs *);
-#define BUG() do { \
-	printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \
-	xmon(0); \
-} while (0)
-#else
-#define BUG() do { \
-	printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \
-	__asm__ __volatile__(".long 0x0"); \
-} while (0)
-#endif
-
 #endif /* !__ASSEMBLY__ */
 
 #endif /* __ASM_PPC_PROCESSOR_H */
diff --git a/include/asm-s390/bug.h b/include/asm-s390/bug.h
new file mode 100644
index 000000000000..38f2940e72df
--- /dev/null
+++ b/include/asm-s390/bug.h
@@ -0,0 +1,13 @@
+#ifndef _S390_BUG_H
+#define _S390_BUG_H
+
+#define BUG() do { \
+        printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \
+        __asm__ __volatile__(".long 0"); \
+} while (0)                                       
+
+#define PAGE_BUG(page) do { \
+        BUG(); \
+} while (0)                      
+
+#endif
diff --git a/include/asm-s390/page.h b/include/asm-s390/page.h
index 273b1a528af1..75ce536327d9 100644
--- a/include/asm-s390/page.h
+++ b/include/asm-s390/page.h
@@ -62,15 +62,6 @@ static inline void copy_page(void *to, void *from)
 #define clear_user_page(page, vaddr, pg)	clear_page(page)
 #define copy_user_page(to, from, vaddr, pg)	copy_page(to, from)
 
-#define BUG() do { \
-        printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \
-        __asm__ __volatile__(".long 0"); \
-} while (0)                                       
-
-#define PAGE_BUG(page) do { \
-        BUG(); \
-} while (0)                      
-
 /* Pure 2^n version of get_order */
 extern __inline__ int get_order(unsigned long size)
 {
diff --git a/include/asm-s390x/bug.h b/include/asm-s390x/bug.h
new file mode 100644
index 000000000000..38f2940e72df
--- /dev/null
+++ b/include/asm-s390x/bug.h
@@ -0,0 +1,13 @@
+#ifndef _S390_BUG_H
+#define _S390_BUG_H
+
+#define BUG() do { \
+        printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \
+        __asm__ __volatile__(".long 0"); \
+} while (0)                                       
+
+#define PAGE_BUG(page) do { \
+        BUG(); \
+} while (0)                      
+
+#endif
diff --git a/include/asm-s390x/page.h b/include/asm-s390x/page.h
index ddbed4167af4..e075af90adb2 100644
--- a/include/asm-s390x/page.h
+++ b/include/asm-s390x/page.h
@@ -60,15 +60,6 @@ static inline void copy_page(void *to, void *from)
 #define clear_user_page(page, vaddr, pg)    clear_page(page)
 #define copy_user_page(to, from, vaddr, pg) copy_page(to, from)
 
-#define BUG() do { \
-        printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \
-        __asm__ __volatile__(".long 0"); \
-} while (0)                                       
-
-#define PAGE_BUG(page) do { \
-        BUG(); \
-} while (0)                      
-
 /* Pure 2^n version of get_order */
 extern __inline__ int get_order(unsigned long size)
 {
diff --git a/include/asm-sh/bug.h b/include/asm-sh/bug.h
new file mode 100644
index 000000000000..a8624ef03788
--- /dev/null
+++ b/include/asm-sh/bug.h
@@ -0,0 +1,16 @@
+#ifndef __ASM_SH_BUG_H
+#define __ASM_SH_BUG_H
+
+/*
+ * Tell the user there is some problem.
+ */
+#define BUG() do { \
+	printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \
+	asm volatile("nop"); \
+} while (0)
+
+#define PAGE_BUG(page) do { \
+	BUG(); \
+} while (0)
+
+#endif
diff --git a/include/asm-sh/page.h b/include/asm-sh/page.h
index b124b63e23ce..742f66dac325 100644
--- a/include/asm-sh/page.h
+++ b/include/asm-sh/page.h
@@ -90,18 +90,6 @@ typedef struct { unsigned long pgprot; } pgprot_t;
 
 #ifndef __ASSEMBLY__
 
-/*
- * Tell the user there is some problem.
- */
-#define BUG() do { \
-	printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \
-	asm volatile("nop"); \
-} while (0)
-
-#define PAGE_BUG(page) do { \
-	BUG(); \
-} while (0)
-
 /* Pure 2^n version of get_order */
 static __inline__ int get_order(unsigned long size)
 {
diff --git a/include/asm-sparc/bug.h b/include/asm-sparc/bug.h
new file mode 100644
index 000000000000..2100cc50f97c
--- /dev/null
+++ b/include/asm-sparc/bug.h
@@ -0,0 +1,25 @@
+/* $Id$ */
+#ifndef _SPARC_BUG_H
+#define _SPARC_BUG_H
+
+/*
+ * XXX I am hitting compiler bugs with __builtin_trap. This has
+ * hit me before and rusty was blaming his netfilter bugs on
+ * this so lets disable it. - Anton
+ */
+#if 0
+/* We need the mb()'s so we don't trigger a compiler bug - Anton */
+#define BUG() do { \
+	mb(); \
+	__builtin_trap(); \
+	mb(); \
+} while(0)
+#else
+#define BUG() do { \
+	printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); *(int *)0=0; \
+} while (0)
+#endif
+
+#define PAGE_BUG(page)	BUG()
+
+#endif
diff --git a/include/asm-sparc/page.h b/include/asm-sparc/page.h
index 7e521e0c31f4..253f216f1e1e 100644
--- a/include/asm-sparc/page.h
+++ b/include/asm-sparc/page.h
@@ -29,26 +29,6 @@
 
 #ifndef __ASSEMBLY__
 
-/*
- * XXX I am hitting compiler bugs with __builtin_trap. This has
- * hit me before and rusty was blaming his netfilter bugs on
- * this so lets disable it. - Anton
- */
-#if 0
-/* We need the mb()'s so we don't trigger a compiler bug - Anton */
-#define BUG() do { \
-	mb(); \
-	__builtin_trap(); \
-	mb(); \
-} while(0)
-#else
-#define BUG() do { \
-	printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); *(int *)0=0; \
-} while (0)
-#endif
-
-#define PAGE_BUG(page)	BUG()
-
 #define clear_page(page)	 memset((void *)(page), 0, PAGE_SIZE)
 #define copy_page(to,from) 	memcpy((void *)(to), (void *)(from), PAGE_SIZE)
 #define clear_user_page(addr, vaddr, page)	clear_page(addr)
diff --git a/include/asm-sparc64/bug.h b/include/asm-sparc64/bug.h
new file mode 100644
index 000000000000..7acd49b93509
--- /dev/null
+++ b/include/asm-sparc64/bug.h
@@ -0,0 +1,17 @@
+/* $Id$ */
+
+#ifndef _SPARC64_BUG_H
+#define _SPARC64_BUG_H
+
+#ifdef CONFIG_DEBUG_BUGVERBOSE
+extern void do_BUG(const char *file, int line);
+#define BUG() do {					\
+	do_BUG(__FILE__, __LINE__);			\
+	__builtin_trap();				\
+} while (0)
+#else
+#define BUG()		__builtin_trap()
+#endif
+
+
+#endif
diff --git a/include/asm-sparc64/page.h b/include/asm-sparc64/page.h
index 208650062ebf..2fc25cfb7be8 100644
--- a/include/asm-sparc64/page.h
+++ b/include/asm-sparc64/page.h
@@ -20,18 +20,6 @@
 
 #ifndef __ASSEMBLY__
 
-#ifdef CONFIG_DEBUG_BUGVERBOSE
-extern void do_BUG(const char *file, int line);
-#define BUG() do {					\
-	do_BUG(__FILE__, __LINE__);			\
-	__builtin_trap();				\
-} while (0)
-#else
-#define BUG()		__builtin_trap()
-#endif
-
-#define PAGE_BUG(page)	BUG()
-
 /* Sparc64 is slow at multiplication, we prefer to use some extra space. */
 #define WANT_PAGE_VIRTUAL 1
 
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 535ca54d1d37..53d2a111c485 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -7,7 +7,7 @@
 #include <linux/list.h>
 #include <linux/spinlock.h>
 #include <linux/cache.h>
-#include <asm/page.h>			/* for BUG() */
+#include <asm/bug.h>
 
 struct vfsmount;
 
diff --git a/include/linux/jbd.h b/include/linux/jbd.h
index 5b1657fca68a..f3e44482a298 100644
--- a/include/linux/jbd.h
+++ b/include/linux/jbd.h
@@ -194,6 +194,7 @@ typedef struct journal_superblock_s
 
 #include <linux/fs.h>
 #include <linux/sched.h>
+#include <asm/bug.h>
 
 #define JBD_ASSERTIONS
 #ifdef JBD_ASSERTIONS
@@ -779,14 +780,6 @@ static inline void journal_abort_handle(handle_t *handle)
 	handle->h_aborted = 1;
 }
 
-/* Not all architectures define BUG() */
-#ifndef BUG
- #define BUG() do { \
-        printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \
-	* ((char *) 0) = 0; \
- } while (0)
-#endif /* BUG */
-
 #endif /* __KERNEL__   */
 
 /* Comparison functions for transaction IDs: perform comparisons using
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 7eedbb751856..5cd1d8dc77f7 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -13,6 +13,7 @@
 #include <linux/types.h>
 #include <linux/compiler.h>
 #include <asm/byteorder.h>
+#include <asm/bug.h>
 
 /* Optimization barrier */
 /* The "volatile" is due to gcc bugs */
diff --git a/include/linux/smp.h b/include/linux/smp.h
index 06ac1c4f1530..ce016a544cd4 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -14,6 +14,7 @@
 #include <linux/compiler.h>
 #include <linux/threads.h>
 #include <asm/smp.h>
+#include <asm/bug.h>
 
 /*
  * main cross-CPU interfaces, handles INIT, TLB flush, STOP, etc.
@@ -86,7 +87,6 @@ int cpu_up(unsigned int cpu);
 void smp_prepare_boot_cpu(void);
 
 #else /* !SMP */
-#include <asm/page.h> /* For BUG() */
 
 /*
  *	These macros fold the SMP functionality into a single CPU system
-- 
cgit v1.2.3