From 5dd133b719132d34e30bc3908266cac847071f1a Mon Sep 17 00:00:00 2001 From: Tom Rini Date: Mon, 9 Jun 2003 20:48:05 -0700 Subject: PPC32: Update the OpenPIC code. This is related to bugzilla bug #791 --- arch/ppc/syslib/open_pic.c | 106 +++++++++++++++++++++------------------------ 1 file changed, 49 insertions(+), 57 deletions(-) diff --git a/arch/ppc/syslib/open_pic.c b/arch/ppc/syslib/open_pic.c index 9c292607e4ad..dbbc9d6ecd00 100644 --- a/arch/ppc/syslib/open_pic.c +++ b/arch/ppc/syslib/open_pic.c @@ -33,6 +33,7 @@ void* OpenPIC_Addr; static volatile struct OpenPIC *OpenPIC = NULL; + /* * We define OpenPIC_InitSenses table thusly: * bit 0x1: sense, 0 for edge and 1 for level. @@ -261,32 +262,32 @@ static void openpic_safe_writefield_IPI(volatile u_int *addr, u_int mask, u_int } #endif /* CONFIG_SMP */ -#if defined(CONFIG_EPIC_SERIAL_MODE) || defined(CONFIG_PMAC_PBOOK) -static void openpic_reset(void) +#ifdef CONFIG_EPIC_SERIAL_MODE +static void __init openpic_eicr_set_clk(u_int clkval) { - openpic_setfield(&OpenPIC->Global.Global_Configuration0, - OPENPIC_CONFIG_RESET); - while (openpic_readfield(&OpenPIC->Global.Global_Configuration0, - OPENPIC_CONFIG_RESET)) - mb(); + openpic_writefield(&OpenPIC->Global.Global_Configuration1, + OPENPIC_EICR_S_CLK_MASK, (clkval << 28)); } -#endif -#ifdef CONFIG_EPIC_SERIAL_MODE -static void openpic_enable_sie(void) +static void __init openpic_enable_sie(void) { openpic_setfield(&OpenPIC->Global.Global_Configuration1, - OPENPIC_EICR_SIE); + OPENPIC_EICR_SIE); } +#endif -static void openpic_eicr_set_clk(u_int clkval) +#if defined(CONFIG_EPIC_SERIAL_MODE) || defined(CONFIG_PMAC_PBOOK) +static void openpic_reset(void) { - openpic_writefield(&OpenPIC->Global.Global_Configuration1, - OPENPIC_EICR_S_CLK_MASK, (clkval << 28)); + openpic_setfield(&OpenPIC->Global.Global_Configuration0, + OPENPIC_CONFIG_RESET); + while (openpic_readfield(&OpenPIC->Global.Global_Configuration0, + OPENPIC_CONFIG_RESET)) + mb(); } #endif -void openpic_set_sources(int first_irq, int num_irqs, void *first_ISR) +void __init openpic_set_sources(int first_irq, int num_irqs, void *first_ISR) { volatile OpenPIC_Source *src = first_ISR; int i, last_irq; @@ -300,7 +301,14 @@ void openpic_set_sources(int first_irq, int num_irqs, void *first_ISR) ISR[i] = src; } -void __init openpic_init(int linux_irq_offset) +/* + * The `offset' parameter defines where the interrupts handled by the + * OpenPIC start in the space of interrupt numbers that the kernel knows + * about. In other words, the OpenPIC's IRQ0 is numbered `offset' in the + * kernel's interrupt numbering scheme. + * We assume there is only one OpenPIC. + */ +void __init openpic_init(int offset) { u_int t, i; u_int timerfreq; @@ -349,13 +357,13 @@ void __init openpic_init(int linux_irq_offset) printk("OpenPIC timer frequency is %d.%06d MHz\n", timerfreq / 1000000, timerfreq % 1000000); - open_pic_irq_offset = linux_irq_offset; + open_pic_irq_offset = offset; /* Initialize timer interrupts */ if ( ppc_md.progress ) ppc_md.progress("openpic: timer",0x3ba); for (i = 0; i < OPENPIC_NUM_TIMERS; i++) { /* Disabled, Priority 0 */ - openpic_inittimer(i, 0, OPENPIC_VEC_TIMER+i+linux_irq_offset); + openpic_inittimer(i, 0, OPENPIC_VEC_TIMER+i+offset); /* No processor */ openpic_maptimer(i, 0); } @@ -365,12 +373,10 @@ void __init openpic_init(int linux_irq_offset) if ( ppc_md.progress ) ppc_md.progress("openpic: ipi",0x3bb); for (i = 0; i < OPENPIC_NUM_IPI; i++) { /* Disabled, Priority 10..13 */ - openpic_initipi(i, 10+i, OPENPIC_VEC_IPI+i+linux_irq_offset); + openpic_initipi(i, 10+i, OPENPIC_VEC_IPI+i+offset); /* IPIs are per-CPU */ - irq_desc[OPENPIC_VEC_IPI+i+linux_irq_offset].status |= - IRQ_PER_CPU; - irq_desc[OPENPIC_VEC_IPI+i+linux_irq_offset].handler = - &open_pic_ipi; + irq_desc[OPENPIC_VEC_IPI+i+offset].status |= IRQ_PER_CPU; + irq_desc[OPENPIC_VEC_IPI+i+offset].handler = &open_pic_ipi; } #endif @@ -387,40 +393,36 @@ void __init openpic_init(int linux_irq_offset) continue; /* the bootloader may have left it enabled (bad !) */ - openpic_disable_irq(i+linux_irq_offset); + openpic_disable_irq(i+offset); - /* - * We find the value from either the InitSenses table - * or assume a negative polarity level interrupt. - */ - sense = (i < OpenPIC_NumInitSenses)? OpenPIC_InitSenses[i]: 1; + sense = (i < OpenPIC_NumInitSenses)? OpenPIC_InitSenses[i]: \ + (IRQ_SENSE_LEVEL | IRQ_POLARITY_NEGATIVE); - if ((sense & IRQ_SENSE_MASK) == 1) - irq_desc[i+linux_irq_offset].status = IRQ_LEVEL; + if (sense & IRQ_SENSE_MASK) + irq_desc[i+offset].status = IRQ_LEVEL; /* Enabled, Priority 8 */ - openpic_initirq(i, 8, i + linux_irq_offset, - (sense & IRQ_POLARITY_MASK), + openpic_initirq(i, 8, i+offset, (sense & IRQ_POLARITY_MASK), (sense & IRQ_SENSE_MASK)); /* Processor 0 */ openpic_mapirq(i, 1<<0, 0); } /* Init descriptors */ - for (i = linux_irq_offset; i < NumSources + linux_irq_offset; i++) + for (i = offset; i < NumSources + offset; i++) irq_desc[i].handler = &open_pic; /* Initialize the spurious interrupt */ if (ppc_md.progress) ppc_md.progress("openpic: spurious",0x3bd); - openpic_set_spurious(OPENPIC_VEC_SPURIOUS+linux_irq_offset); + openpic_set_spurious(OPENPIC_VEC_SPURIOUS+offset); /* Initialize the cascade */ - if (linux_irq_offset) { - if (request_irq(linux_irq_offset, no_action, SA_INTERRUPT, + if (offset) { + if (request_irq(offset, no_action, SA_INTERRUPT, "82c59 cascade", NULL)) printk("Unable to get OpenPIC IRQ 0 for cascade\n"); } - openpic_disable_8259_pass_through(); + openpic_disable_8259_pass_through(); #ifdef CONFIG_EPIC_SERIAL_MODE openpic_eicr_set_clk(7); /* Slowest value until we know better */ openpic_enable_sie(); @@ -479,7 +481,7 @@ static u_int openpic_get_priority(void) } #endif /* notused */ -static void openpic_set_priority(u_int pri) +static void __init openpic_set_priority(u_int pri) { DECL_THIS_CPU; @@ -656,29 +658,18 @@ static void __init openpic_maptimer(u_int timer, u_int cpumask) } /* - * Initalize the interrupt source which will generate an NMI (and disable it). + * Initalize the interrupt source which will generate an NMI. + * This raises the interrupt's priority from 8 to 9. * * irq: The logical IRQ which generates an NMI. */ void __init openpic_init_nmi_irq(u_int irq) { - int sense; - - /* If this wasn't given, assume a level, negative polarity interrupt. */ - sense = (irq < OpenPIC_NumInitSenses) ? OpenPIC_InitSenses[irq] : - (IRQ_SENSE_LEVEL | IRQ_POLARITY_NEGATIVE); - - openpic_safe_writefield(&ISR[irq]->Vector_Priority, - OPENPIC_PRIORITY_MASK | OPENPIC_VECTOR_MASK | - OPENPIC_SENSE_MASK | OPENPIC_POLARITY_MASK, - (9 << OPENPIC_PRIORITY_SHIFT) | - (irq + open_pic_irq_offset) | - ((sense & IRQ_POLARITY_MASK) ? - OPENPIC_POLARITY_POSITIVE : - OPENPIC_POLARITY_NEGATIVE) | - ((sense & IRQ_SENSE_MASK) ? OPENPIC_SENSE_LEVEL - : OPENPIC_SENSE_EDGE)); + check_arg_irq(irq); + openpic_safe_writefield(&ISR[irq - open_pic_irq_offset]->Vector_Priority, + OPENPIC_PRIORITY_MASK, + 9 << OPENPIC_PRIORITY_SHIFT); } /* @@ -752,7 +743,8 @@ void openpic_disable_ipi(u_int irq) * pol: polarity (1 for positive, 0 for negative) * sense: 1 for level, 0 for edge */ -static void openpic_initirq(u_int irq, u_int pri, u_int vec, int pol, int sense) +static void __init +openpic_initirq(u_int irq, u_int pri, u_int vec, int pol, int sense) { openpic_safe_writefield(&ISR[irq]->Vector_Priority, OPENPIC_PRIORITY_MASK | OPENPIC_VECTOR_MASK | -- cgit v1.2.3 From 1b546146f995b81305331f3f3e70a43039e87b03 Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 26 Jun 2003 03:49:32 -0700 Subject: Fix compiler warning --- fs/cifs/inode.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 19b4bd43dcab..3d78ae445e3c 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -635,9 +635,9 @@ cifs_setattr(struct dentry *direntry, struct iattr *attrs) struct cifsFileInfo *open_file = NULL; FILE_BASIC_INFO time_buf; int set_time = FALSE; - __u64 mode = 0xFFFFFFFFFFFFFFFF; - __u64 uid = 0xFFFFFFFFFFFFFFFF; - __u64 gid = 0xFFFFFFFFFFFFFFFF; + __u64 mode = 0xFFFFFFFFFFFFFFFFULL; + __u64 uid = 0xFFFFFFFFFFFFFFFFULL; + __u64 gid = 0xFFFFFFFFFFFFFFFFULL; struct cifsInodeInfo *cifsInode; xid = GetXid(); -- cgit v1.2.3 From fd1df7849924fa8ed6363e43c51aac72343d6956 Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 26 Jun 2003 10:55:28 -0700 Subject: Signing fixes part 1 --- fs/cifs/cifs_unicode.c | 2 +- fs/cifs/smbdes.c | 2 +- fs/cifs/smbencrypt.c | 119 ++++++++++++++++++++++++++++++++++++++----------- 3 files changed, 94 insertions(+), 29 deletions(-) diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c index 78425d07e5bc..e8a97a8949e6 100644 --- a/fs/cifs/cifs_unicode.c +++ b/fs/cifs/cifs_unicode.c @@ -34,7 +34,7 @@ void toUpper(const struct nls_table *n, char *mixed_string) { - int i; + unsigned int i; char temp; for (i = 0; i < strlen(mixed_string); i++) { diff --git a/fs/cifs/smbdes.c b/fs/cifs/smbdes.c index 79cbdc7feef6..82e27cac1c53 100644 --- a/fs/cifs/smbdes.c +++ b/fs/cifs/smbdes.c @@ -399,7 +399,7 @@ SamOEMhash(unsigned char *data, unsigned char *key, int val) s_box[ind] = s_box[j]; s_box[j] = tc; } - for (ind = 0; ind < (val ? 516 : 16); ind++) { + for (ind = 0; ind < val; ind++) { unsigned char tc; unsigned char t; diff --git a/fs/cifs/smbencrypt.c b/fs/cifs/smbencrypt.c index 2e84b92c010f..83e772647425 100644 --- a/fs/cifs/smbencrypt.c +++ b/fs/cifs/smbencrypt.c @@ -5,7 +5,8 @@ Copyright (C) Andrew Tridgell 1992-2000 Copyright (C) Luke Kenneth Casson Leighton 1996-2000 Modified by Jeremy Allison 1995. - Modified by Steve French (sfrench@us.ibm.com) 2002 + Copyright (C) Andrew Bartlett 2002-2003 + Modified by Steve French (sfrench@us.ibm.com) 2002-2003 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -97,13 +98,15 @@ SMBencrypt(unsigned char *passwd, unsigned char *c8, unsigned char *p24) E_P16(p14, p21); SMBOWFencrypt(p21, c8, p24); - + #ifdef DEBUG_PASSWORD DEBUG(100, ("SMBencrypt: lm#, challenge, response\n")); dump_data(100, (char *) p21, 16); dump_data(100, (char *) c8, 8); dump_data(100, (char *) p24, 24); #endif + memset(p14,0,15); + memset(p21,0,21); } /* Routines for Windows NT MD4 Hash functions. */ @@ -161,6 +164,7 @@ E_md4hash(const unsigned char *passwd, unsigned char *p16) len = _my_wcslen(wpwd) * sizeof (__u16); mdfour(p16, (unsigned char *) wpwd, len); + memset(wpwd,0,129 * 2); } /* Does both the NT and LM owfs of a user's password */ @@ -222,7 +226,7 @@ ntv2_owf_gen(const unsigned char owf[16], const char *user_n, /* push_ucs2(NULL, user_u, user_n, (user_l+1)*2, STR_UNICODE|STR_NOALIGN|STR_TERMINATE|STR_UPPER); push_ucs2(NULL, dom_u, domain_n, (domain_l+1)*2, STR_UNICODE|STR_NOALIGN|STR_TERMINATE|STR_UPPER); */ - /* do not think it is supposed to be uppercased */ + /* BB user and domain may need to be uppercased */ user_l = cifs_strtoUCS(user_u, user_n, 511, nls_codepage); domain_l = cifs_strtoUCS(dom_u, domain_n, 511, nls_codepage); @@ -297,8 +301,52 @@ SMBNTencrypt(unsigned char *passwd, unsigned char *c8, unsigned char *p24) #endif } -int -make_oem_passwd_hash(char data[516], const char *passwd, +/* Does the md5 encryption from the NT hash for NTLMv2. */ +void +SMBOWFencrypt_ntv2(const unsigned char kr[16], + const struct data_blob * srv_chal, + const struct data_blob * cli_chal, unsigned char resp_buf[16]) +{ + struct HMACMD5Context ctx; + + hmac_md5_init_limK_to_64(kr, 16, &ctx); + hmac_md5_update(srv_chal->data, srv_chal->length, &ctx); + hmac_md5_update(cli_chal->data, cli_chal->length, &ctx); + hmac_md5_final(resp_buf, &ctx); + +#ifdef DEBUG_PASSWORD + DEBUG(100, ("SMBOWFencrypt_ntv2: srv_chal, cli_chal, resp_buf\n")); + dump_data(100, srv_chal->data, srv_chal->length); + dump_data(100, cli_chal->data, cli_chal->length); + dump_data(100, resp_buf, 16); +#endif +} + +static struct data_blob LMv2_generate_response(const unsigned char ntlm_v2_hash[16], + const struct data_blob * server_chal) +{ + unsigned char lmv2_response[16]; + struct data_blob lmv2_client_data/* = data_blob(NULL, 8)*/; /* BB Fix BB */ + struct data_blob final_response /* = data_blob(NULL, 24)*/; /* BB Fix BB */ + + /* LMv2 */ + /* client-supplied random data */ + get_random_bytes(lmv2_client_data.data, lmv2_client_data.length); + /* Given that data, and the challenge from the server, generate a response */ + SMBOWFencrypt_ntv2(ntlm_v2_hash, server_chal, &lmv2_client_data, lmv2_response); + memcpy(final_response.data, lmv2_response, sizeof(lmv2_response)); + + /* after the first 16 bytes is the random data we generated above, + so the server can verify us with it */ + memcpy(final_response.data+sizeof(lmv2_response), + lmv2_client_data.data, lmv2_client_data.length); + +/* data_blob_free(&lmv2_client_data); */ /* BB fix BB */ + + return final_response; +} + +int make_oem_passwd_hash(char data[516], const char *passwd, unsigned char old_pw_hash[16], int unicode) { int new_pw_len = strlen(passwd) * (unicode ? 2 : 1); @@ -333,32 +381,11 @@ make_oem_passwd_hash(char data[516], const char *passwd, DEBUG(100, ("make_oem_passwd_hash\n")); dump_data(100, data, 516); #endif - SamOEMhash((unsigned char *) data, (unsigned char *) old_pw_hash, TRUE); + SamOEMhash((unsigned char *) data, (unsigned char *) old_pw_hash, 516); return TRUE; } -/* Does the md5 encryption from the NT hash for NTLMv2. */ -void -SMBOWFencrypt_ntv2(const unsigned char kr[16], - const struct data_blob srv_chal, - const struct data_blob cli_chal, unsigned char resp_buf[16]) -{ - struct HMACMD5Context ctx; - - hmac_md5_init_limK_to_64(kr, 16, &ctx); - hmac_md5_update(srv_chal.data, srv_chal.length, &ctx); - hmac_md5_update(cli_chal.data, cli_chal.length, &ctx); - hmac_md5_final(resp_buf, &ctx); - -#ifdef DEBUG_PASSWORD - DEBUG(100, ("SMBOWFencrypt_ntv2: srv_chal, cli_chal, resp_buf\n")); - dump_data(100, srv_chal.data, srv_chal.length); - dump_data(100, cli_chal.data, cli_chal.length); - dump_data(100, resp_buf, 16); -#endif -} - void SMBsesskeygen_ntv2(const unsigned char kr[16], const unsigned char *nt_resp, __u8 sess_key[16]) @@ -407,6 +434,44 @@ encode_pw_buffer(char buffer[516], char *new_pw, int new_pw_length) return TRUE; } +int SMBNTLMv2encrypt(const char *user, const char *domain, const char *password, + const struct data_blob *server_chal, + const struct data_blob *names_blob, + struct data_blob *lm_response, struct data_blob *nt_response, + struct data_blob *nt_session_key,struct nls_table * nls_codepage) +{ + unsigned char nt_hash[16]; + unsigned char ntlm_v2_hash[16]; + E_md4hash(password, nt_hash); + + /* We don't use the NT# directly. Instead we use it mashed up with + the username and domain. + This prevents username swapping during the auth exchange + */ + ntv2_owf_gen(nt_hash, user, domain, ntlm_v2_hash,nls_codepage); + + if (nt_response) { +/* *nt_response = NTLMv2_generate_response(ntlm_v2_hash, server_chal, + names_blob); */ /* BB fix BB */ + if (nt_session_key) { +/* *nt_session_key = data_blob(NULL, 16); */ /* BB fix BB */ + + /* The NTLMv2 calculations also provide a session key, for signing etc later */ + /* use only the first 16 bytes of nt_response for session key */ + SMBsesskeygen_ntv2(ntlm_v2_hash, nt_response->data, nt_session_key->data); + } + } + + /* LMv2 */ + + if (lm_response) { + *lm_response = LMv2_generate_response(ntlm_v2_hash, server_chal); + } + + return TRUE; +} + + /*********************************************************** SMB signing - setup the MAC key. ************************************************************/ -- cgit v1.2.3 From f2e85a0c81acb1ccbeb61702810dbb8bc4961c35 Mon Sep 17 00:00:00 2001 From: Tom Rini Date: Sun, 29 Jun 2003 14:07:44 +1000 Subject: PPC32: Update the bootloader serial code to have stub functions. --- arch/ppc/boot/common/Makefile | 3 ++- arch/ppc/boot/common/ns16550.c | 5 ----- arch/ppc/boot/common/serial_stub.c | 28 ++++++++++++++++++++++++++++ arch/ppc/boot/simple/Makefile | 4 ---- arch/ppc/boot/simple/direct.S | 15 --------------- arch/ppc/boot/simple/m8260_tty.c | 5 ----- arch/ppc/boot/simple/m8xx_tty.c | 5 ----- 7 files changed, 30 insertions(+), 35 deletions(-) create mode 100644 arch/ppc/boot/common/serial_stub.c delete mode 100644 arch/ppc/boot/simple/direct.S diff --git a/arch/ppc/boot/common/Makefile b/arch/ppc/boot/common/Makefile index aefa82fcd7bc..6435b3722a13 100644 --- a/arch/ppc/boot/common/Makefile +++ b/arch/ppc/boot/common/Makefile @@ -8,7 +8,8 @@ # Tom Rini January 2001 # -lib-y := string.o util.o misc-common.o +lib-y := string.o util.o misc-common.o \ + serial_stub.o lib-$(CONFIG_PPC_PREP) += mpc10x_memory.o lib-$(CONFIG_LOPEC) += mpc10x_memory.o lib-$(CONFIG_PAL4) += cpc700_memory.o diff --git a/arch/ppc/boot/common/ns16550.c b/arch/ppc/boot/common/ns16550.c index 21cbfba89044..8c8db8d382d4 100644 --- a/arch/ppc/boot/common/ns16550.c +++ b/arch/ppc/boot/common/ns16550.c @@ -95,8 +95,3 @@ serial_tstc(unsigned long com_port) { return ((inb(com_port + (UART_LSR << shift)) & UART_LSR_DR) != 0); } - -void -serial_close(unsigned long com_port) -{ -} diff --git a/arch/ppc/boot/common/serial_stub.c b/arch/ppc/boot/common/serial_stub.c new file mode 100644 index 000000000000..2dc176ed0580 --- /dev/null +++ b/arch/ppc/boot/common/serial_stub.c @@ -0,0 +1,28 @@ +/* + * arch/ppc/boot/common/serial_stub.c + * + * This is a few stub routines to make the boot code cleaner looking when + * there is no serial port support doesn't need to be closed, for example. + * + * Author: Tom Rini + * + * 2003 (c) MontaVista, Software, Inc. This file is licensed under the terms + * of the GNU General Public License version 2. This program is licensed "as + * is" without any warranty of any kind, whether express or implied. + */ + +void __attribute__ ((weak)) +serial_fixups(void) +{ +} + +unsigned long __attribute__ ((weak)) +serial_init(int chan, void *ignored) +{ + return 0; +} + +void __attribute__ ((weak)) +serial_close(unsigned long com_port) +{ +} diff --git a/arch/ppc/boot/simple/Makefile b/arch/ppc/boot/simple/Makefile index 94217ae45849..7f7fc2258bb8 100644 --- a/arch/ppc/boot/simple/Makefile +++ b/arch/ppc/boot/simple/Makefile @@ -96,10 +96,6 @@ EXTRA := chrpmap.o END := pcore TFTPIMAGE := /tftpboot/zImage.$(END) endif -# The PowerPMC 250 needs the dummy serial_fixups() -ifeq ($(CONFIG_POWERPMC250),y) -EXTRA := direct.o -endif ifeq ($(CONFIG_SANDPOINT),y) EXTRA := direct.o TFTPIMAGE := /tftpboot/zImage.sandpoint diff --git a/arch/ppc/boot/simple/direct.S b/arch/ppc/boot/simple/direct.S deleted file mode 100644 index b5ced833e370..000000000000 --- a/arch/ppc/boot/simple/direct.S +++ /dev/null @@ -1,15 +0,0 @@ -/* - * arch/ppc/boot/simple/direct.S - * - * Author: Tom Rini - * - * This is an empty function for machines which use SERIAL_IO_MEM - * and don't need ISA_io set to anything but 0, or perform any other - * serial fixups. - */ - - .text - - .globl serial_fixups -serial_fixups: - blr diff --git a/arch/ppc/boot/simple/m8260_tty.c b/arch/ppc/boot/simple/m8260_tty.c index 120df32fe9cf..9d1d54a5e8f2 100644 --- a/arch/ppc/boot/simple/m8260_tty.c +++ b/arch/ppc/boot/simple/m8260_tty.c @@ -311,8 +311,3 @@ serial_tstc(void *ignored) return(!(rbdf->cbd_sc & BD_SC_EMPTY)); } - -void -serial_close(unsigned long com_port) -{ -} diff --git a/arch/ppc/boot/simple/m8xx_tty.c b/arch/ppc/boot/simple/m8xx_tty.c index 93b59a3bc1c7..1d2778e248c6 100644 --- a/arch/ppc/boot/simple/m8xx_tty.c +++ b/arch/ppc/boot/simple/m8xx_tty.c @@ -288,8 +288,3 @@ serial_tstc(void *ignored) return(!(rbdf->cbd_sc & BD_SC_EMPTY)); } - -void -serial_close(unsigned long com_port) -{ -} -- cgit v1.2.3 From 989ff8b261d25bb604df68e4f269ac134e63fafa Mon Sep 17 00:00:00 2001 From: Tom Rini Date: Sun, 29 Jun 2003 15:54:34 +1000 Subject: PPC32: Remove references to a removed file. --- arch/ppc/boot/simple/Makefile | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/arch/ppc/boot/simple/Makefile b/arch/ppc/boot/simple/Makefile index 7f7fc2258bb8..bd7bdeaca772 100644 --- a/arch/ppc/boot/simple/Makefile +++ b/arch/ppc/boot/simple/Makefile @@ -47,19 +47,17 @@ endif ifeq ($(CONFIG_EBONY),y) ZIMAGE := zImage-TREE ZIMAGEINITRD := zImage.initrd-TREE -EXTRA := direct.o END := ebony ENTRYPOINT := 0x01000000 TFTPIMAGE := /tftpboot/zImage.$(END) endif ifeq ($(CONFIG_EV64260),y) -EXTRA := direct.o misc-ev64260.o +EXTRA := misc-ev64260.o TFTPIMAGE := /tftpboot/zImage.ev64260 endif ifeq ($(CONFIG_GEMINI),y) ZIMAGE := zImage-STRIPELF ZIMAGEINITRD := zImage.initrd-STRIPELF -EXTRA := direct.o END := gemini TFTPIMAGE := /tftpboot/zImage.$(END) endif @@ -78,7 +76,6 @@ endif ifeq ($(CONFIG_MCPN765)$(CONFIG_MVME5100)$(CONFIG_PRPMC750)$(CONFIG_PRPMC800)$(CONFIG_LOPEC)$(CONFIG_PPLUS),y) ZIMAGE := zImage-PPLUS ZIMAGEINITRD := zImage.initrd-PPLUS -EXTRA := direct.o TFTPIMAGE := /tftpboot/zImage.pplus ZNETBOOT := zImage.pplus ZNETBOOTRD := zImage.initrd.pplus @@ -86,9 +83,6 @@ endif ifeq ($(CONFIG_PPLUS),y) EXTRA := legacy.o endif -ifeq ($(CONFIG_PAL4),y) -EXTRA := direct.o -endif ifeq ($(CONFIG_PCORE)$(CONFIG_POWERPMC250),y) ZIMAGE := zImage-STRIPELF ZIMAGEINITRD := zImage.initrd-STRIPELF @@ -97,13 +91,11 @@ END := pcore TFTPIMAGE := /tftpboot/zImage.$(END) endif ifeq ($(CONFIG_SANDPOINT),y) -EXTRA := direct.o TFTPIMAGE := /tftpboot/zImage.sandpoint endif ifeq ($(CONFIG_SPRUCE),y) ZIMAGE := zImage-TREE ZIMAGEINITRD := zImage.initrd-TREE -EXTRA := direct.o END := spruce ENTRYPOINT := 0x00800000 MISC := misc-spruce.o @@ -112,7 +104,6 @@ endif ifeq ($(CONFIG_ZX4500),y) ZIMAGE := zImage-STRIPELF ZIMAGEINITRD := zImage.initrd-STRIPELF -EXTRA := direct.o END := zx4500 TFTPIMAGE := /tftpboot/zImage.$(END) endif -- cgit v1.2.3 From 6ee508d6c40457f5d443b63fc74c52603475faab Mon Sep 17 00:00:00 2001 From: Steve French Date: Sun, 29 Jun 2003 10:24:25 -0500 Subject: cifs xattr support part 1 --- fs/cifs/Makefile | 2 +- fs/cifs/cifsfs.h | 6 +++++- fs/cifs/xattr.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 62 insertions(+), 2 deletions(-) create mode 100644 fs/cifs/xattr.c diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile index 1295411affd2..76f667fe61af 100644 --- a/fs/cifs/Makefile +++ b/fs/cifs/Makefile @@ -3,4 +3,4 @@ # obj-$(CONFIG_CIFS) += cifs.o -cifs-objs := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o link.o misc.o netmisc.o smbdes.o smbencrypt.o transport.o asn1.o md4.o md5.o cifs_unicode.o nterr.o +cifs-objs := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o link.o misc.o netmisc.o smbdes.o smbencrypt.o transport.o asn1.o md4.o md5.o cifs_unicode.o nterr.o xattr.o diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 2776f7c0b7c2..efc50328da89 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -88,5 +88,9 @@ extern int cifs_follow_link(struct dentry *direntry, struct nameidata *nd); extern int cifs_readlink(struct dentry *direntry, char *buffer, int buflen); extern int cifs_symlink(struct inode *inode, struct dentry *direntry, const char *symname); - +extern int cifs_removexattr(struct dentry *, const char *); +extern int cifs_setxattr(struct dentry *, const char *, const void *, + size_t, int); +extern ssize_t cifs_getxattr(struct dentry *, const char *, void *, size_t); +extern ssize_t cifs_listxattr(struct dentry *, char *, size_t); #endif /* _CIFSSMB_H */ diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c new file mode 100644 index 000000000000..e377fa0247f0 --- /dev/null +++ b/fs/cifs/xattr.c @@ -0,0 +1,56 @@ +/* + * fs/cifs/xattr.c + * + * Copyright (c) International Business Machines Corp., 2003 + * Author(s): Steve French (sfrench@us.ibm.com) + * + * This library is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 2.1 of the License, or + * (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include + +int cifs_removexattr(struct dentry * direntry, const char * name) +{ + int rc = -EOPNOTSUPP; + return rc; +} + +int cifs_setxattr(struct dentry * direntry, const char * name, + const void * value, size_t size, int flags) +{ + int rc = -EOPNOTSUPP; + return rc; +} + +ssize_t cifs_getxattr(struct dentry * direntry, const char * name, + void * value, size_t size) +{ + ssize_t rc = -EOPNOTSUPP; + return rc; +} + +ssize_t cifs_listxattr(struct dentry * direntry, char * ea_data, size_t ea_size) +{ + ssize_t rc = -EOPNOTSUPP; + + /* return dosattributes as pseudo xattr */ + /* return alt name if available as pseudo attr */ + + /* if proc/fs/cifs/streamstoxattr is set then + search server for EAs or streams to + returns as xattrs */ + + return rc; +} -- cgit v1.2.3 From 685bf7cbd81066f0cab63df2731bf079a2cc15ee Mon Sep 17 00:00:00 2001 From: Tom Rini Date: Sun, 29 Jun 2003 20:10:43 -0700 Subject: PPC32: Minor KGDB updates. --- arch/ppc/kernel/ppc-stub.c | 10 ---------- arch/ppc/kernel/setup.c | 2 ++ 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/arch/ppc/kernel/ppc-stub.c b/arch/ppc/kernel/ppc-stub.c index acc76c5f264b..0e8be4cb3706 100644 --- a/arch/ppc/kernel/ppc-stub.c +++ b/arch/ppc/kernel/ppc-stub.c @@ -851,14 +851,4 @@ kgdb_output_string (const char* s, unsigned int count) putpacket(buffer); return 1; - } - -#if defined(CONFIG_6xx) || defined(CONFIG_POWER3) - -/* This is used on arches which don't have a serial driver that maps - * the ports for us */ -void -kgdb_map_scc(void) -{ } -#endif diff --git a/arch/ppc/kernel/setup.c b/arch/ppc/kernel/setup.c index a0f733b81d09..27be4e413106 100644 --- a/arch/ppc/kernel/setup.c +++ b/arch/ppc/kernel/setup.c @@ -619,6 +619,8 @@ void __init setup_arch(char **cmdline_p) if (strstr(cmd_line, "nokgdb")) printk("kgdb default breakpoint deactivated on command line\n"); else { + if (ppc_md.progress) + ppc_md.progress("setup_arch: kgdb breakpoint", 0x4000); printk("kgdb default breakpoint activated\n"); breakpoint(); } -- cgit v1.2.3 From f4e7c1c1bee32e27b27c9123d7cf5b4311150c9a Mon Sep 17 00:00:00 2001 From: Tom Rini Date: Mon, 30 Jun 2003 18:34:14 -0700 Subject: PPC32: Add a backend for standard (ns1655x) UARTs for debugers. --- arch/ppc/syslib/Makefile | 5 +- arch/ppc/syslib/gen550_dbg.c | 174 ++++++++++++++++++++++++++++++++++++++++++ arch/ppc/syslib/gen550_kgdb.c | 84 ++++++++++++++++++++ 3 files changed, 262 insertions(+), 1 deletion(-) create mode 100644 arch/ppc/syslib/gen550_dbg.c create mode 100644 arch/ppc/syslib/gen550_kgdb.c diff --git a/arch/ppc/syslib/Makefile b/arch/ppc/syslib/Makefile index 62bb56c8e6ce..eff4d506fdef 100644 --- a/arch/ppc/syslib/Makefile +++ b/arch/ppc/syslib/Makefile @@ -64,5 +64,8 @@ obj-$(CONFIG_SPRUCE) += cpc700_pic.o indirect_pci.o pci_auto.o \ obj-$(CONFIG_ZX4500) += indirect_pci.o pci_auto.o mpc10x_common.o \ i8259.o open_pic.o obj-$(CONFIG_8260) += m8260_setup.o ppc8260_pic.o +ifeq ($(CONFIG_SERIAL_8250)$(CONFIG_PPC_GEN550),yy) +obj-$(CONFIG_KGDB) += gen550_kgdb.o gen550_dbg.o +obj-$(CONFIG_SERIAL_TEXT_DEBUG) += gen550_dbg.o +endif obj-$(CONFIG_BOOTX_TEXT) += btext.o - diff --git a/arch/ppc/syslib/gen550_dbg.c b/arch/ppc/syslib/gen550_dbg.c new file mode 100644 index 000000000000..7fb0b41a241c --- /dev/null +++ b/arch/ppc/syslib/gen550_dbg.c @@ -0,0 +1,174 @@ +/* + * arch/ppc/syslib/gen550_dbg.c + * + * A library of polled 16550 serial routines. These are intended to + * be used to support progress messages, xmon, kgdb, etc. on a + * variety of platforms. + * + * Adapted from lots of code ripped from the arch/ppc/boot/ polled + * 16550 support. + * + * Author: Matt Porter + * + * 2002-2003 (c) MontaVista Software, Inc. This file is licensed under + * the terms of the GNU General Public License version 2. This program + * is licensed "as is" without any warranty of any kind, whether express + * or implied. + */ + +#include +#include /* For linux/serial_core.h */ +#include +#include +#include +#include +#include +#include + +#define SERIAL_BAUD 9600 + +static struct serial_state rs_table[RS_TABLE_SIZE] = { + SERIAL_PORT_DFNS /* defined in */ +}; + +static void (*serial_outb)(unsigned long, unsigned char); +static unsigned long (*serial_inb)(unsigned long); + +static int shift; + +unsigned long direct_inb(unsigned long addr) +{ + return readb(addr); +} + +void direct_outb(unsigned long addr, unsigned char val) +{ + writeb(val, addr); +} + +unsigned long io_inb(unsigned long port) +{ + return inb(port); +} + +void io_outb(unsigned long port, unsigned char val) +{ + outb(val, port); +} + +unsigned long serial_init(int chan, void *ignored) +{ + unsigned long com_port; + unsigned char lcr, dlm; + + /* We need to find out which type io we're expecting. If it's + * 'SERIAL_IO_PORT', we get an offset from the isa_io_base. + * If it's 'SERIAL_IO_MEM', we can the exact location. -- Tom */ + switch (rs_table[chan].io_type) { + case SERIAL_IO_PORT: + com_port = rs_table[chan].port; + serial_outb = io_outb; + serial_inb = io_inb; + break; + case SERIAL_IO_MEM: + com_port = (unsigned long)rs_table[chan].iomem_base; + serial_outb = direct_outb; + serial_inb = direct_inb; + break; + default: + /* We can't deal with it. */ + return -1; + } + + /* How far apart the registers are. */ + shift = rs_table[chan].iomem_reg_shift; + + /* save the LCR */ + lcr = serial_inb(com_port + (UART_LCR << shift)); + + /* Access baud rate */ + serial_outb(com_port + (UART_LCR << shift), UART_LCR_DLAB); + dlm = serial_inb(com_port + (UART_DLM << shift)); + + /* + * Test if serial port is unconfigured + * We assume that no-one uses less than 110 baud or + * less than 7 bits per character these days. + * -- paulus. + */ + if ((dlm <= 4) && (lcr & 2)) { + /* port is configured, put the old LCR back */ + serial_outb(com_port + (UART_LCR << shift), lcr); + } + else { + /* Input clock. */ + serial_outb(com_port + (UART_DLL << shift), + (rs_table[chan].baud_base / SERIAL_BAUD) & 0xFF); + serial_outb(com_port + (UART_DLM << shift), + (rs_table[chan].baud_base / SERIAL_BAUD) >> 8); + /* 8 data, 1 stop, no parity */ + serial_outb(com_port + (UART_LCR << shift), 0x03); + /* RTS/DTR */ + serial_outb(com_port + (UART_MCR << shift), 0x03); + + /* Clear & enable FIFOs */ + serial_outb(com_port + (UART_FCR << shift), 0x07); + } + + return (com_port); +} + +void +serial_putc(unsigned long com_port, unsigned char c) +{ + while ((serial_inb(com_port + (UART_LSR << shift)) & UART_LSR_THRE) == 0) + ; + serial_outb(com_port, c); +} + +unsigned char +serial_getc(unsigned long com_port) +{ + while ((serial_inb(com_port + (UART_LSR << shift)) & UART_LSR_DR) == 0) + ; + return serial_inb(com_port); +} + +int +serial_tstc(unsigned long com_port) +{ + return ((serial_inb(com_port + (UART_LSR << shift)) & UART_LSR_DR) != 0); +} + +void +serial_close(unsigned long com_port) +{ +} + +void +gen550_init(int i, struct uart_port *serial_req) +{ + rs_table[i].io_type = serial_req->iotype; + rs_table[i].port = serial_req->line; + rs_table[i].iomem_base = serial_req->membase; + rs_table[i].iomem_reg_shift = serial_req->regshift; +} + +#ifdef CONFIG_SERIAL_TEXT_DEBUG +void +gen550_progress(char *s, unsigned short hex) +{ + volatile unsigned int progress_debugport; + volatile char c; + + progress_debugport = serial_init(0, NULL); + + serial_putc(progress_debugport, '\r'); + + while ((c = *s++) != 0) + serial_putc(progress_debugport, c); + + serial_putc(progress_debugport, '\n'); + serial_putc(progress_debugport, '\r'); +} +#endif /* CONFIG_SERIAL_TEXT_DEBUG */ diff --git a/arch/ppc/syslib/gen550_kgdb.c b/arch/ppc/syslib/gen550_kgdb.c new file mode 100644 index 000000000000..93b374f25dc3 --- /dev/null +++ b/arch/ppc/syslib/gen550_kgdb.c @@ -0,0 +1,84 @@ +/* + * arch/ppc/syslib/gen550_kgdb.c + * + * Generic 16550 kgdb support intended to be useful on a variety + * of platforms. To enable this support, it is necessary to set + * the CONFIG_GEN550 option. Any virtual mapping of the serial + * port(s) to be used can be accomplished by setting + * ppc_md.early_serial_map to a platform-specific mapping function. + * + * Adapted from ppc4xx_kgdb.c. + * + * Author: Matt Porter + * + * 2002-2003 (c) MontaVista Software, Inc. This file is licensed under + * the terms of the GNU General Public License version 2. This program + * is licensed "as is" without any warranty of any kind, whether express + * or implied. + */ + +#include +#include +#include + +#include + +extern unsigned long serial_init(int, void *); +extern unsigned long serial_getc(unsigned long); +extern unsigned long serial_putc(unsigned long, unsigned char); + +#if defined(CONFIG_KGDB_TTYS0) +#define KGDB_PORT 0 +#elif defined(CONFIG_KGDB_TTYS1) +#define KGDB_PORT 1 +#elif defined(CONFIG_KGDB_TTYS2) +#define KGDB_PORT 2 +#elif defined(CONFIG_KGDB_TTYS3) +#define KGDB_PORT 3 +#else +#error "invalid kgdb_tty port" +#endif + +static volatile unsigned int kgdb_debugport; + +void putDebugChar(unsigned char c) +{ + if (kgdb_debugport == 0) + kgdb_debugport = serial_init(KGDB_PORT, NULL); + + serial_putc(kgdb_debugport, c); +} + +int getDebugChar(void) +{ + if (kgdb_debugport == 0) + kgdb_debugport = serial_init(KGDB_PORT, NULL); + + return(serial_getc(kgdb_debugport)); +} + +void kgdb_interruptible(int enable) +{ + return; +} + +void putDebugString(char* str) +{ + while (*str != '\0') { + putDebugChar(*str); + str++; + } + putDebugChar('\r'); + return; +} + +/* + * Note: gen550_init() must be called already on the port we are going + * to use. + */ +void +kgdb_map_scc(void) +{ + printk(KERN_DEBUG "kgdb init\n"); + kgdb_debugport = serial_init(KGDB_PORT, NULL); +} -- cgit v1.2.3 From 99b31e61112b5f0776f86dca3af8c4f78913816d Mon Sep 17 00:00:00 2001 From: Tom Rini Date: Mon, 30 Jun 2003 20:56:56 -0700 Subject: PPC32: Update the Motorola Sandpoint support. This removes support for the X2 version of the board, and has a number of cleanups and enhancements. --- arch/ppc/Kconfig | 15 +- arch/ppc/configs/sandpoint_defconfig | 22 +- arch/ppc/platforms/Makefile | 2 +- arch/ppc/platforms/sandpoint.c | 719 ++++++++++++++++++++++++++++++++++ arch/ppc/platforms/sandpoint.h | 56 +-- arch/ppc/platforms/sandpoint_pci.c | 181 --------- arch/ppc/platforms/sandpoint_serial.h | 49 --- arch/ppc/platforms/sandpoint_setup.c | 643 ------------------------------ include/asm-ppc/serial.h | 2 +- 9 files changed, 778 insertions(+), 911 deletions(-) create mode 100644 arch/ppc/platforms/sandpoint.c delete mode 100644 arch/ppc/platforms/sandpoint_pci.c delete mode 100644 arch/ppc/platforms/sandpoint_serial.h delete mode 100644 arch/ppc/platforms/sandpoint_setup.c diff --git a/arch/ppc/Kconfig b/arch/ppc/Kconfig index 9ade9f30eabb..a9e3f85e4a7f 100644 --- a/arch/ppc/Kconfig +++ b/arch/ppc/Kconfig @@ -473,6 +473,10 @@ config PRPMC800 config SANDPOINT bool "Motorola-Sandpoint" + help + Select SANDPOINT if configuring for a Motorola Sandpoint X3 + or X3(b). + X3 (any flavor). config ADIR bool "SBS-Adirondack" @@ -500,6 +504,11 @@ config PPC_CHRP depends on PPC_MULTIPLATFORM default y +config PPC_GEN550 + bool + depends on SANDPOINT + default y + config PPC_PMAC bool depends on PPC_MULTIPLATFORM @@ -515,10 +524,6 @@ config PPC_OF depends on PPC_PMAC || PPC_CHRP default y -config SANDPOINT_X3 - bool "Sandpoint X3" - depends on SANDPOINT - config FORCE bool depends on 6xx && !8260 && (PCORE || POWERPMC250) @@ -526,7 +531,7 @@ config FORCE config EPIC_SERIAL_MODE bool - depends on 6xx && !8260 && (LOPEC || SANDPOINT_X3) + depends on 6xx && !8260 && (LOPEC || SANDPOINT) default y config WILLOW diff --git a/arch/ppc/configs/sandpoint_defconfig b/arch/ppc/configs/sandpoint_defconfig index a7b1f4bbbf00..a7106cc0ec7b 100644 --- a/arch/ppc/configs/sandpoint_defconfig +++ b/arch/ppc/configs/sandpoint_defconfig @@ -68,7 +68,7 @@ CONFIG_SANDPOINT=y # CONFIG_PAL4 is not set # CONFIG_GEMINI is not set # CONFIG_ZX4500 is not set -CONFIG_SANDPOINT_X3=y +CONFIG_PPC_GEN550=y CONFIG_EPIC_SERIAL_MODE=y # CONFIG_MPC10X_STORE_GATHERING is not set # CONFIG_SMP is not set @@ -84,8 +84,8 @@ CONFIG_ALTIVEC=y CONFIG_PCI=y CONFIG_PCI_DOMAINS=y CONFIG_KCORE_ELF=y -CONFIG_BINFMT_ELF=y CONFIG_KERNEL_ELF=y +CONFIG_BINFMT_ELF=y CONFIG_BINFMT_MISC=m # CONFIG_PCI_LEGACY_PROC is not set # CONFIG_PCI_NAMES is not set @@ -117,6 +117,11 @@ CONFIG_KERNEL_START=0xc0000000 CONFIG_TASK_SIZE=0x80000000 CONFIG_BOOT_LOAD=0x00800000 +# +# Generic Driver Options +# +# CONFIG_FW_LOADER is not set + # # Memory Technology Devices (MTD) # @@ -147,7 +152,7 @@ CONFIG_BLK_DEV_INITRD=y # CONFIG_MD is not set # -# ATA/IDE/MFM/RLL support +# ATA/ATAPI/MFM/RLL support # CONFIG_IDE=y @@ -166,6 +171,7 @@ CONFIG_BLK_DEV_IDEDISK=y CONFIG_BLK_DEV_IDECD=y # CONFIG_BLK_DEV_IDEFLOPPY is not set # CONFIG_IDE_TASK_IOCTL is not set +CONFIG_IDE_TASKFILE_IO=y # # IDE chipset support/bugfixes @@ -173,7 +179,7 @@ CONFIG_BLK_DEV_IDECD=y # CONFIG_BLK_DEV_IDEPCI is not set # -# SCSI support +# SCSI device support # # CONFIG_SCSI is not set @@ -292,6 +298,7 @@ CONFIG_NET_PCI=y # CONFIG_AMD8111_ETH is not set # CONFIG_ADAPTEC_STARFIRE is not set # CONFIG_B44 is not set +# CONFIG_TC35815 is not set # CONFIG_DGRS is not set # CONFIG_EEPRO100 is not set CONFIG_E100=y @@ -412,11 +419,6 @@ CONFIG_FONT_8x16=y # # CONFIG_LOGO is not set -# -# Old CD-ROM drivers (not SCSI, not IDE) -# -# CONFIG_CD_NO_IDESCSI is not set - # # Input device support # @@ -443,6 +445,7 @@ CONFIG_SERIO=y CONFIG_SERIO_I8042=y # CONFIG_SERIO_SERPORT is not set # CONFIG_SERIO_CT82C710 is not set +# CONFIG_SERIO_PCIPS2 is not set # # Input Device Drivers @@ -731,6 +734,7 @@ CONFIG_USB_SE401=m # # USB Network adaptors # +# CONFIG_USB_AX8817X is not set # CONFIG_USB_CATC is not set # CONFIG_USB_KAWETH is not set # CONFIG_USB_PEGASUS is not set diff --git a/arch/ppc/platforms/Makefile b/arch/ppc/platforms/Makefile index 263a40b0563a..4c2772eb44f5 100644 --- a/arch/ppc/platforms/Makefile +++ b/arch/ppc/platforms/Makefile @@ -43,7 +43,7 @@ obj-$(CONFIG_POWERPMC250) += powerpmc250.o obj-$(CONFIG_PPLUS) += pplus_pci.o pplus_setup.o obj-$(CONFIG_PRPMC750) += prpmc750_setup.o prpmc750_pci.o obj-$(CONFIG_PRPMC800) += prpmc800_setup.o prpmc800_pci.o -obj-$(CONFIG_SANDPOINT) += sandpoint_setup.o sandpoint_pci.o +obj-$(CONFIG_SANDPOINT) += sandpoint.o obj-$(CONFIG_SPRUCE) += spruce_setup.o spruce_pci.o obj-$(CONFIG_ZX4500) += zx4500_setup.o zx4500_pci.o diff --git a/arch/ppc/platforms/sandpoint.c b/arch/ppc/platforms/sandpoint.c new file mode 100644 index 000000000000..5c3e87fd081e --- /dev/null +++ b/arch/ppc/platforms/sandpoint.c @@ -0,0 +1,719 @@ +/* + * arch/ppc/platforms/sandpoint_setup.c + * + * Board setup routines for the Motorola SPS Sandpoint Test Platform. + * + * Author: Mark A. Greer + * mgreer@mvista.com + * + * 2000-2003 (c) MontaVista Software, Inc. This file is licensed under + * the terms of the GNU General Public License version 2. This program + * is licensed "as is" without any warranty of any kind, whether express + * or implied. + */ + +/* + * This file adds support for the Motorola SPS Sandpoint Test Platform. + * These boards have a PPMC slot for the processor so any combination + * of cpu and host bridge can be attached. This port is for an 8240 PPMC + * module from Motorola SPS and other closely related cpu/host bridge + * combinations (e.g., 750/755/7400 with MPC107 host bridge). + * The sandpoint itself has a Windbond 83c553 (PCI-ISA bridge, 2 DMA ctlrs, 2 + * cascaded 8259 interrupt ctlrs, 8254 Timer/Counter, and an IDE ctlr), a + * National 87308 (RTC, 2 UARTs, Keyboard & mouse ctlrs, and a floppy ctlr), + * and 4 PCI slots (only 2 of which are usable; the other 2 are keyed for 3.3V + * but are really 5V). + * + * The firmware on the sandpoint is called DINK (not my acronym :). This port + * depends on DINK to do some basic initialization (e.g., initialize the memory + * ctlr) and to ensure that the processor is using MAP B (CHRP map). + * + * The switch settings for the Sandpoint board MUST be as follows: + * S3: down + * S4: up + * S5: up + * S6: down + * + * 'down' is in the direction from the PCI slots towards the PPMC slot; + * 'up' is in the direction from the PPMC slot towards the PCI slots. + * Be careful, the way the sandpoint board is installed in XT chasses will + * make the directions reversed. + * + * Since Motorola listened to our suggestions for improvement, we now have + * the Sandpoint X3 board. All of the PCI slots are available, it uses + * the serial interrupt interface (just a hardware thing we need to + * configure properly). + * + * Use the default X3 switch settings. The interrupts are then: + * EPIC Source + * 0 SIOINT (8259, active low) + * 1 PCI #1 + * 2 PCI #2 + * 3 PCI #3 + * 4 PCI #4 + * 7 Winbond INTC (IDE interrupt) + * 8 Winbond INTD (IDE interrupt) + * + * + * Motorola has finally released a version of DINK32 that correctly + * (seemingly) initalizes the memory controller correctly, regardless + * of the amount of memory in the system. Once a method of determining + * what version of DINK initializes the system for us, if applicable, is + * found, we can hopefully stop hardcoding 32MB of RAM. + * + * It is important to note that this code only supports the Sandpoint X3 + * (all flavors) platform, and it does not support the X2 anymore. Code + * that at one time worked on the X2 can be found at: + * ftp://source.mvista.com/pub/linuxppc/obsolete/sandpoint/ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include /* for linux/serial_core.h */ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "sandpoint.h" + +extern void gen550_progress(char *, unsigned short); +extern void gen550_init(int, struct uart_port *); + +unsigned char __res[sizeof(bd_t)]; + +static void sandpoint_halt(void); + +/* + * Define all of the IRQ senses and polarities. Taken from the + * Sandpoint X3 User's manual. + */ +static u_char sandpoint_openpic_initsenses[] __initdata = { + (IRQ_SENSE_LEVEL | IRQ_POLARITY_NEGATIVE), /* 0: SIOINT */ + (IRQ_SENSE_LEVEL | IRQ_POLARITY_NEGATIVE), /* 2: PCI Slot 1 */ + (IRQ_SENSE_LEVEL | IRQ_POLARITY_NEGATIVE), /* 3: PCI Slot 2 */ + (IRQ_SENSE_LEVEL | IRQ_POLARITY_NEGATIVE), /* 4: PCI Slot 3 */ + (IRQ_SENSE_LEVEL | IRQ_POLARITY_NEGATIVE), /* 5: PCI Slot 4 */ + (IRQ_SENSE_LEVEL | IRQ_POLARITY_NEGATIVE), /* 8: IDE (INT C) */ + (IRQ_SENSE_LEVEL | IRQ_POLARITY_NEGATIVE) /* 9: IDE (INT D) */ +}; + +/* + * Motorola SPS Sandpoint interrupt routing. + */ +static inline int +sandpoint_map_irq(struct pci_dev *dev, unsigned char idsel, unsigned char pin) +{ + static char pci_irq_table[][4] = + /* + * PCI IDSEL/INTPIN->INTLINE + * A B C D + */ + { + { 16, 0, 0, 0 }, /* IDSEL 11 - i8259 on Winbond */ + { 0, 0, 0, 0 }, /* IDSEL 12 - unused */ + { 18, 21, 20, 19 }, /* IDSEL 13 - PCI slot 1 */ + { 19, 18, 21, 20 }, /* IDSEL 14 - PCI slot 2 */ + { 20, 19, 18, 21 }, /* IDSEL 15 - PCI slot 3 */ + { 21, 20, 19, 18 }, /* IDSEL 16 - PCI slot 4 */ + }; + + const long min_idsel = 11, max_idsel = 16, irqs_per_slot = 4; + return PCI_IRQ_TABLE_LOOKUP; +} + +static void __init +sandpoint_setup_winbond_83553(struct pci_controller *hose) +{ + int devfn; + + /* + * Route IDE interrupts directly to the 8259's IRQ 14 & 15. + * We can't route the IDE interrupt to PCI INTC# or INTD# because those + * woule interfere with the PMC's INTC# and INTD# lines. + */ + /* + * Winbond Fcn 0 + */ + devfn = PCI_DEVFN(11,0); + + early_write_config_byte(hose, + 0, + devfn, + 0x43, /* IDE Interrupt Routing Control */ + 0xef); + early_write_config_word(hose, + 0, + devfn, + 0x44, /* PCI Interrupt Routing Control */ + 0x0000); + + /* Want ISA memory cycles to be forwarded to PCI bus */ + early_write_config_byte(hose, + 0, + devfn, + 0x48, /* ISA-to-PCI Addr Decoder Control */ + 0xf0); + + /* Enable RTC and Keyboard address locations. */ + early_write_config_byte(hose, + 0, + devfn, + 0x4d, /* Chip Select Control Register */ + 0x00); + + /* Enable Port 92. */ + early_write_config_byte(hose, + 0, + devfn, + 0x4e, /* AT System Control Register */ + 0x06); + /* + * Winbond Fcn 1 + */ + devfn = PCI_DEVFN(11,1); + + /* Put IDE controller into native mode. */ + early_write_config_byte(hose, + 0, + devfn, + 0x09, /* Programming interface Register */ + 0x8f); + + /* Init IRQ routing, enable both ports, disable fast 16 */ + early_write_config_dword(hose, + 0, + devfn, + 0x40, /* IDE Control/Status Register */ + 0x00ff0011); + return; +} + +static void __init +sandpoint_find_bridges(void) +{ + struct pci_controller *hose; + + hose = pcibios_alloc_controller(); + + if (!hose) + return; + + hose->first_busno = 0; + hose->last_busno = 0xff; + + if (mpc10x_bridge_init(hose, + MPC10X_MEM_MAP_B, + MPC10X_MEM_MAP_B, + MPC10X_MAPB_EUMB_BASE) == 0) { + + /* Do early winbond init, then scan PCI bus */ + sandpoint_setup_winbond_83553(hose); + hose->last_busno = pciauto_bus_scan(hose, hose->first_busno); + + ppc_md.pcibios_fixup = NULL; + ppc_md.pcibios_fixup_bus = NULL; + ppc_md.pci_swizzle = common_swizzle; + ppc_md.pci_map_irq = sandpoint_map_irq; + } + else { + if (ppc_md.progress) + ppc_md.progress("Bridge init failed", 0x100); + printk("Host bridge init failed\n"); + } + + return; +} + +#if defined(CONFIG_SERIAL_8250) && \ + (defined(CONFIG_KGDB) || defined(CONFIG_SERIAL_TEXT_DEBUG)) +static void __init +sandpoint_early_serial_map(void) +{ + struct uart_port serial_req; + + /* Setup serial port access */ + memset(&serial_req, 0, sizeof(serial_req)); + serial_req.uartclk = UART_CLK; + serial_req.irq = 4; + serial_req.flags = STD_COM_FLAGS; + serial_req.iotype = SERIAL_IO_MEM; + serial_req.membase = (u_char *)SANDPOINT_SERIAL_0; + + gen550_init(0, &serial_req); + + if (early_serial_setup(&serial_req) != 0) + printk(KERN_ERR "Early serial init of port 0 failed\n"); + + /* Assume early_serial_setup() doesn't modify serial_req */ + serial_req.line = 1; + serial_req.irq = 3; /* XXXX */ + serial_req.membase = (u_char *)SANDPOINT_SERIAL_1; + + gen550_init(1, &serial_req); + + if (early_serial_setup(&serial_req) != 0) + printk(KERN_ERR "Early serial init of port 1 failed\n"); +} +#endif + +static void __init +sandpoint_setup_arch(void) +{ + loops_per_jiffy = 100000000 / HZ; + +#ifdef CONFIG_BLK_DEV_INITRD + if (initrd_start) + ROOT_DEV = Root_RAM0; + else +#endif +#ifdef CONFIG_ROOT_NFS + ROOT_DEV = Root_NFS; +#else + ROOT_DEV = Root_HDA1; +#endif + + /* Lookup PCI host bridges */ + sandpoint_find_bridges(); + +#ifdef CONFIG_DUMMY_CONSOLE + conswitchp = &dummy_con; +#endif + + printk(KERN_INFO "Motorola SPS Sandpoint Test Platform\n"); + printk(KERN_INFO "Port by MontaVista Software, Inc. (source@mvista.com)\n"); + + /* DINK32 12.3 and below do not correctly enable any caches. + * We will do this now with good known values. Future versions + * of DINK32 are supposed to get this correct. + */ + if (cur_cpu_spec[0]->cpu_features & CPU_FTR_SPEC7450) + /* 745x is different. We only want to pass along enable. */ + _set_L2CR(L2CR_L2E); + else if (cur_cpu_spec[0]->cpu_features & CPU_FTR_L2CR) + /* All modules have 1MB of L2. We also assume that an + * L2 divisor of 3 will work. + */ + _set_L2CR(L2CR_L2E | L2CR_L2SIZ_1MB | L2CR_L2CLK_DIV3 + | L2CR_L2RAM_PIPE | L2CR_L2OH_1_0 | L2CR_L2DF); +#if 0 + /* Untested right now. */ + if (cur_cpu_spec[0]->cpu_features & CPU_FTR_L3CR) { + /* Magic value. */ + _set_L3CR(0x8f032000); + } +#endif +} + +#define SANDPOINT_87308_CFG_ADDR 0x15c +#define SANDPOINT_87308_CFG_DATA 0x15d + +#define SANDPOINT_87308_CFG_INB(addr, byte) { \ + outb((addr), SANDPOINT_87308_CFG_ADDR); \ + (byte) = inb(SANDPOINT_87308_CFG_DATA); \ +} + +#define SANDPOINT_87308_CFG_OUTB(addr, byte) { \ + outb((addr), SANDPOINT_87308_CFG_ADDR); \ + outb((byte), SANDPOINT_87308_CFG_DATA); \ +} + +#define SANDPOINT_87308_SELECT_DEV(dev_num) { \ + SANDPOINT_87308_CFG_OUTB(0x07, (dev_num)); \ +} + +#define SANDPOINT_87308_DEV_ENABLE(dev_num) { \ + SANDPOINT_87308_SELECT_DEV(dev_num); \ + SANDPOINT_87308_CFG_OUTB(0x30, 0x01); \ +} + +/* + * Initialize the ISA devices on the Nat'l PC87308VUL SuperIO chip. + */ +static int __init +sandpoint_setup_natl_87308(void) +{ + u_char reg; + + /* + * Enable all the devices on the Super I/O chip. + */ + SANDPOINT_87308_SELECT_DEV(0x00); /* Select kbd logical device */ + SANDPOINT_87308_CFG_OUTB(0xf0, 0x00); /* Set KBC clock to 8 Mhz */ + SANDPOINT_87308_DEV_ENABLE(0x00); /* Enable keyboard */ + SANDPOINT_87308_DEV_ENABLE(0x01); /* Enable mouse */ + SANDPOINT_87308_DEV_ENABLE(0x02); /* Enable rtc */ + SANDPOINT_87308_DEV_ENABLE(0x03); /* Enable fdc (floppy) */ + SANDPOINT_87308_DEV_ENABLE(0x04); /* Enable parallel */ + SANDPOINT_87308_DEV_ENABLE(0x05); /* Enable UART 2 */ + SANDPOINT_87308_CFG_OUTB(0xf0, 0x82); /* Enable bank select regs */ + SANDPOINT_87308_DEV_ENABLE(0x06); /* Enable UART 1 */ + SANDPOINT_87308_CFG_OUTB(0xf0, 0x82); /* Enable bank select regs */ + + /* Set up floppy in PS/2 mode */ + outb(0x09, SIO_CONFIG_RA); + reg = inb(SIO_CONFIG_RD); + reg = (reg & 0x3F) | 0x40; + outb(reg, SIO_CONFIG_RD); + outb(reg, SIO_CONFIG_RD); /* Have to write twice to change! */ + + return 0; +} + +arch_initcall(sandpoint_setup_natl_87308); + +/* + * Fix IDE interrupts. + */ +static int __init +sandpoint_fix_winbond_83553(void) +{ + /* Make all 8259 interrupt level sensitive */ + outb(0xf8, 0x4d0); + outb(0xde, 0x4d1); + + return 0; +} + +arch_initcall(sandpoint_fix_winbond_83553); + +static int __init +sandpoint_request_io(void) +{ + request_region(0x00,0x20,"dma1"); + request_region(0x20,0x20,"pic1"); + request_region(0x40,0x20,"timer"); + request_region(0x80,0x10,"dma page reg"); + request_region(0xa0,0x20,"pic2"); + request_region(0xc0,0x20,"dma2"); + + return 0; +} + +arch_initcall(sandpoint_request_io); + +/* + * Interrupt setup and service. Interrrupts on the Sandpoint come + * from the four PCI slots plus the 8259 in the Winbond Super I/O (SIO). + * The 8259 is cascaded from EPIC IRQ0, IRQ1-4 map to PCI slots 1-4, + * IDE is on EPIC 7 and 8. + */ +static void __init +sandpoint_init_IRQ(void) +{ + int i; + + OpenPIC_InitSenses = sandpoint_openpic_initsenses; + OpenPIC_NumInitSenses = sizeof(sandpoint_openpic_initsenses); + + /* + * We need to tell openpic_set_sources where things actually are. + * mpc10x_common will setup OpenPIC_Addr at ioremap(EUMB phys base + + * EPIC offset (0x40000)); The EPIC IRQ Register Address Map - + * Interrupt Source Configuration Registers gives these numbers + * as offsets starting at 0x50200, we need to adjust occordinly. + */ + /* Map serial interrupts 0-15 */ + openpic_set_sources(0, 16, OpenPIC_Addr + 0x10200); + + openpic_init(NUM_8259_INTERRUPTS); + + /* + * openpic_init() has set up irq_desc[16-31] to be openpic + * interrupts. We need to set irq_desc[0-15] to be i8259 + * interrupts. + */ + for(i=0; i < NUM_8259_INTERRUPTS; i++) + irq_desc[i].handler = &i8259_pic; + + /* + * The EPIC allows for a read in the range of 0xFEF00000 -> + * 0xFEFFFFFF to generate a PCI interrupt-acknowledge transaction. + */ + i8259_init(0xfef00000); +} + +static u32 +sandpoint_irq_canonicalize(u32 irq) +{ + if (irq == 2) + return 9; + else + return irq; +} + +static unsigned long __init +sandpoint_find_end_of_memory(void) +{ + bd_t *bp = (bd_t *)__res; + + if (bp->bi_memsize) + return bp->bi_memsize; + + /* DINK32 13.0 correctly initalizes things, so iff you use + * this you _should_ be able to change this instead of a + * hardcoded value. */ +#if 0 + return mpc10x_get_mem_size(MPC10X_MEM_MAP_B); +#else + return 32*1024*1024; +#endif +} + +static void __init +sandpoint_map_io(void) +{ + io_block_mapping(0xfe000000, 0xfe000000, 0x02000000, _PAGE_IO); +} + +static void +sandpoint_restart(char *cmd) +{ + local_irq_disable(); + + /* Set exception prefix high - to the firmware */ + _nmask_and_or_msr(0, MSR_IP); + + /* Reset system via Port 92 */ + outb(0x00, 0x92); + outb(0x01, 0x92); + for(;;); /* Spin until reset happens */ +} + +static void +sandpoint_power_off(void) +{ + local_irq_disable(); + for(;;); /* No way to shut power off with software */ + /* NOTREACHED */ +} + +static void +sandpoint_halt(void) +{ + sandpoint_power_off(); + /* NOTREACHED */ +} + +static int +sandpoint_show_cpuinfo(struct seq_file *m) +{ + seq_printf(m, "vendor\t\t: Motorola SPS\n"); + seq_printf(m, "machine\t\t: Sandpoint\n"); + + return 0; +} + +#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE) +/* + * IDE support. + */ +static int sandpoint_ide_ports_known = 0; +static unsigned long sandpoint_ide_regbase[MAX_HWIFS]; +static unsigned long sandpoint_ide_ctl_regbase[MAX_HWIFS]; +static unsigned long sandpoint_idedma_regbase; + +static void +sandpoint_ide_probe(void) +{ + struct pci_dev *pdev = pci_find_device(PCI_VENDOR_ID_WINBOND, + PCI_DEVICE_ID_WINBOND_82C105, NULL); + + if (pdev) { + sandpoint_ide_regbase[0]=pdev->resource[0].start; + sandpoint_ide_regbase[1]=pdev->resource[2].start; + sandpoint_ide_ctl_regbase[0]=pdev->resource[1].start; + sandpoint_ide_ctl_regbase[1]=pdev->resource[3].start; + sandpoint_idedma_regbase=pdev->resource[4].start; + } + + sandpoint_ide_ports_known = 1; +} + +static int +sandpoint_ide_default_irq(unsigned long base) +{ + if (sandpoint_ide_ports_known == 0) + sandpoint_ide_probe(); + + if (base == sandpoint_ide_regbase[0]) + return SANDPOINT_IDE_INT0; + else if (base == sandpoint_ide_regbase[1]) + return SANDPOINT_IDE_INT1; + else + return 0; +} + +static unsigned long +sandpoint_ide_default_io_base(int index) +{ + if (sandpoint_ide_ports_known == 0) + sandpoint_ide_probe(); + + return sandpoint_ide_regbase[index]; +} + +static void __init +sandpoint_ide_init_hwif_ports(hw_regs_t *hw, unsigned long data_port, + unsigned long ctrl_port, int *irq) +{ + unsigned long reg = data_port; + uint alt_status_base; + int i; + + for (i = IDE_DATA_OFFSET; i <= IDE_STATUS_OFFSET; i++) { + hw->io_ports[i] = reg++; + } + + if (data_port == sandpoint_ide_regbase[0]) { + alt_status_base = sandpoint_ide_ctl_regbase[0] + 2; + hw->irq = 14; + } + else if (data_port == sandpoint_ide_regbase[1]) { + alt_status_base = sandpoint_ide_ctl_regbase[1] + 2; + hw->irq = 15; + } + else { + alt_status_base = 0; + hw->irq = 0; + } + + if (ctrl_port) { + hw->io_ports[IDE_CONTROL_OFFSET] = ctrl_port; + } else { + hw->io_ports[IDE_CONTROL_OFFSET] = alt_status_base; + } + + if (irq != NULL) { + *irq = hw->irq; + } +} +#endif + +/* + * Set BAT 3 to map 0xf8000000 to end of physical memory space 1-to-1. + */ +static __inline__ void +sandpoint_set_bat(void) +{ + unsigned long bat3u, bat3l; + + __asm__ __volatile__( + " lis %0,0xf800\n \ + ori %1,%0,0x002a\n \ + ori %0,%0,0x0ffe\n \ + mtspr 0x21e,%0\n \ + mtspr 0x21f,%1\n \ + isync\n \ + sync " + : "=r" (bat3u), "=r" (bat3l)); +} + +TODC_ALLOC(); + +void __init +platform_init(unsigned long r3, unsigned long r4, unsigned long r5, + unsigned long r6, unsigned long r7) +{ + parse_bootinfo(find_bootinfo()); + + /* ASSUMPTION: If both r3 (bd_t pointer) and r6 (cmdline pointer) + * are non-zero, then we should use the board info from the bd_t + * structure and the cmdline pointed to by r6 instead of the + * information from birecs, if any. Otherwise, use the information + * from birecs as discovered by the preceeding call to + * parse_bootinfo(). This rule should work with both PPCBoot, which + * uses a bd_t board info structure, and the kernel boot wrapper, + * which uses birecs. + */ + if (r3 && r6) { + /* copy board info structure */ + memcpy( (void *)__res,(void *)(r3+KERNELBASE), sizeof(bd_t) ); + /* copy command line */ + *(char *)(r7+KERNELBASE) = 0; + strcpy(cmd_line, (char *)(r6+KERNELBASE)); + } + +#ifdef CONFIG_BLK_DEV_INITRD + /* take care of initrd if we have one */ + if (r4) { + initrd_start = r4 + KERNELBASE; + initrd_end = r5 + KERNELBASE; + } +#endif /* CONFIG_BLK_DEV_INITRD */ + + /* Map in board regs, etc. */ + sandpoint_set_bat(); + + isa_io_base = MPC10X_MAPB_ISA_IO_BASE; + isa_mem_base = MPC10X_MAPB_ISA_MEM_BASE; + pci_dram_offset = MPC10X_MAPB_DRAM_OFFSET; + ISA_DMA_THRESHOLD = 0x00ffffff; + DMA_MODE_READ = 0x44; + DMA_MODE_WRITE = 0x48; + + ppc_md.setup_arch = sandpoint_setup_arch; + ppc_md.show_cpuinfo = sandpoint_show_cpuinfo; + ppc_md.irq_canonicalize = sandpoint_irq_canonicalize; + ppc_md.init_IRQ = sandpoint_init_IRQ; + ppc_md.get_irq = openpic_get_irq; + + ppc_md.restart = sandpoint_restart; + ppc_md.power_off = sandpoint_power_off; + ppc_md.halt = sandpoint_halt; + + ppc_md.find_end_of_memory = sandpoint_find_end_of_memory; + ppc_md.setup_io_mappings = sandpoint_map_io; + + TODC_INIT(TODC_TYPE_PC97307, 0x70, 0x00, 0x71, 8); + ppc_md.time_init = todc_time_init; + ppc_md.set_rtc_time = todc_set_rtc_time; + ppc_md.get_rtc_time = todc_get_rtc_time; + ppc_md.calibrate_decr = todc_calibrate_decr; + + ppc_md.nvram_read_val = todc_mc146818_read_val; + ppc_md.nvram_write_val = todc_mc146818_write_val; + +#if defined(CONFIG_SERIAL_8250) && \ + (defined(CONFIG_KGDB) || defined(CONFIG_SERIAL_TEXT_DEBUG)) + sandpoint_early_serial_map(); +#ifdef CONFIG_SERIAL_TEXT_DEBUG + ppc_md.progress = gen550_progress; +#endif +#endif + +#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE) + ppc_ide_md.default_irq = sandpoint_ide_default_irq; + ppc_ide_md.default_io_base = sandpoint_ide_default_io_base; + ppc_ide_md.ide_init_hwif = sandpoint_ide_init_hwif_ports; +#endif +} diff --git a/arch/ppc/platforms/sandpoint.h b/arch/ppc/platforms/sandpoint.h index e1a5b36c5de4..bdb7fb83f155 100644 --- a/arch/ppc/platforms/sandpoint.h +++ b/arch/ppc/platforms/sandpoint.h @@ -6,7 +6,7 @@ * Author: Mark A. Greer * mgreer@mvista.com * - * 2000-2001 (c) MontaVista, Software, Inc. This file is licensed under + * 2000-2003 (c) MontaVista, Software, Inc. This file is licensed under * the terms of the GNU General Public License version 2. This program * is licensed "as is" without any warranty of any kind, whether express * or implied. @@ -19,8 +19,8 @@ #ifndef __PPC_PLATFORMS_SANDPOINT_H #define __PPC_PLATFORMS_SANDPOINT_H -#ifdef CONFIG_SANDPOINT_X3 -#define SANDPOINT_SIO_SLOT 0 /* Cascaded from EPIC IRQ 0 */ +#include + #if 0 /* The Sandpoint X3 allows the IDE interrupt to be directly connected * from the Windbond (PCI INTC or INTD) to the serial EPIC. Someday @@ -28,27 +28,13 @@ * initialization than change it to route the different interrupts :-). * -- Dan */ -#define SANDPOINT_IDE_INT0 23 /* EPIC 7 */ -#define SANDPOINT_IDE_INT1 24 /* EPIC 8 */ -#else -#define SANDPOINT_IDE_INT0 14 /* 8259 Test */ -#define SANDPOINT_IDE_INT1 15 /* 8259 Test */ -#endif +#define SANDPOINT_IDE_INT0 23 /* EPIC 7 */ +#define SANDPOINT_IDE_INT1 24 /* EPIC 8 */ #else - /* - * Define the PCI slot that the 8259 is sharing interrupts with. - * Valid values are 1 (PCI slot 2) and 2 (PCI slot 3). - */ -#define SANDPOINT_SIO_SLOT 1 - -/* ...and for the IDE from the 8259.... -*/ -#define SANDPOINT_IDE_INT0 14 -#define SANDPOINT_IDE_INT1 15 +#define SANDPOINT_IDE_INT0 14 /* 8259 Test */ +#define SANDPOINT_IDE_INT1 15 /* 8259 Test */ #endif -#define SANDPOINT_SIO_IRQ (SANDPOINT_SIO_SLOT + NUM_8259_INTERRUPTS) - /* * The sandpoint boards have processor modules that either have an 8240 or * an MPC107 host bridge on them. These bridges have an IDSEL line that allows @@ -62,7 +48,33 @@ */ #define SANDPOINT_HOST_BRIDGE_IDSEL 12 +/* + * Serial defines. + */ +#define SANDPOINT_SERIAL_0 0xfe0003f8 +#define SANDPOINT_SERIAL_1 0xfe0002f8 + +#define RS_TABLE_SIZE 2 + +/* Rate for the 1.8432 Mhz clock for the onboard serial chip */ +#define BASE_BAUD ( 1843200 / 16 ) +#define UART_CLK 1843200 + +#ifdef CONFIG_SERIAL_DETECT_IRQ +#define STD_COM_FLAGS (ASYNC_BOOT_AUTOCONF|ASYNC_SKIP_TEST|ASYNC_AUTO_IRQ) +#else +#define STD_COM_FLAGS (ASYNC_BOOT_AUTOCONF|ASYNC_SKIP_TEST) +#endif + +#define STD_SERIAL_PORT_DFNS \ + { 0, BASE_BAUD, SANDPOINT_SERIAL_0, 4, STD_COM_FLAGS, /* ttyS0 */ \ + iomem_base: (u8 *)SANDPOINT_SERIAL_0, \ + io_type: SERIAL_IO_MEM }, \ + { 0, BASE_BAUD, SANDPOINT_SERIAL_1, 3, STD_COM_FLAGS, /* ttyS1 */ \ + iomem_base: (u8 *)SANDPOINT_SERIAL_1, \ + io_type: SERIAL_IO_MEM }, -void sandpoint_find_bridges(void); +#define SERIAL_PORT_DFNS \ + STD_SERIAL_PORT_DFNS #endif /* __PPC_PLATFORMS_SANDPOINT_H */ diff --git a/arch/ppc/platforms/sandpoint_pci.c b/arch/ppc/platforms/sandpoint_pci.c deleted file mode 100644 index e585637f05f4..000000000000 --- a/arch/ppc/platforms/sandpoint_pci.c +++ /dev/null @@ -1,181 +0,0 @@ -/* - * arch/ppc/platforms/sandpoint_pci.c - * - * PCI setup routines for the Motorola SPS Sandpoint Test Platform - * - * Author: Mark A. Greer - * mgreer@mvista.com - * - * 2000-2001 (c) MontaVista, Software, Inc. This file is licensed under - * the terms of the GNU General Public License version 2. This program - * is licensed "as is" without any warranty of any kind, whether express - * or implied. - */ -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include "sandpoint.h" - -/* - * Motorola SPS Sandpoint interrupt routing. - */ -static inline int -sandpoint_map_irq(struct pci_dev *dev, unsigned char idsel, unsigned char pin) -{ - static char pci_irq_table[][4] = - /* - * PCI IDSEL/INTPIN->INTLINE - * A B C D - */ - { - { SANDPOINT_SIO_IRQ, - 0, 0, 0 }, /* IDSEL 11 - i8259 on Winbond */ - { 0, 0, 0, 0 }, /* IDSEL 12 - unused */ -#ifdef CONFIG_SANDPOINT_X3 -#if 0 /* This is what it _should_ look like -- Dan */ - { 17, 20, 19, 18 }, /* IDSEL 13 - PCI slot 1 */ - { 18, 17, 20, 19 }, /* IDSEL 14 - PCI slot 2 */ - { 19, 18, 17, 20 }, /* IDSEL 15 - PCI slot 3 */ - { 20, 19, 18, 17 }, /* IDSEL 16 - PCI slot 4 */ -#else - { 18, 21, 20, 19 }, /* IDSEL 13 - PCI slot 1 */ - { 19, 18, 21, 20 }, /* IDSEL 14 - PCI slot 2 */ - { 20, 19, 18, 21 }, /* IDSEL 15 - PCI slot 3 */ - { 21, 20, 19, 18 }, /* IDSEL 16 - PCI slot 4 */ -#endif -#else - { 16, 19, 18, 17 }, /* IDSEL 13 - PCI slot 1 */ - { 17, 16, 19, 18 }, /* IDSEL 14 - PCI slot 2 */ - { 18, 17, 16, 19 }, /* IDSEL 15 - PCI slot 3 */ - { 19, 18, 17, 16 }, /* IDSEL 16 - PCI slot 4 */ -#endif - }; - - const long min_idsel = 11, max_idsel = 16, irqs_per_slot = 4; - return PCI_IRQ_TABLE_LOOKUP; -} - -static void __init -sandpoint_setup_winbond_83553(struct pci_controller *hose) -{ - int devfn; - - /* - * Route IDE interrupts directly to the 8259's IRQ 14 & 15. - * We can't route the IDE interrupt to PCI INTC# or INTD# because those - * woule interfere with the PMC's INTC# and INTD# lines. - */ - /* - * Winbond Fcn 0 - */ - devfn = PCI_DEVFN(11,0); - - early_write_config_byte(hose, - 0, - devfn, - 0x43, /* IDE Interrupt Routing Control */ - 0xef); - early_write_config_word(hose, - 0, - devfn, - 0x44, /* PCI Interrupt Routing Control */ - 0x0000); - - /* Want ISA memory cycles to be forwarded to PCI bus */ - early_write_config_byte(hose, - 0, - devfn, - 0x48, /* ISA-to-PCI Addr Decoder Control */ - 0xf0); - - /* Enable RTC and Keyboard address locations. */ - early_write_config_byte(hose, - 0, - devfn, - 0x4d, /* Chip Select Control Register */ - 0x00); - - /* Enable Port 92. */ - early_write_config_byte(hose, - 0, - devfn, - 0x4e, /* AT System Control Register */ - 0x06); - /* - * Winbond Fcn 1 - */ - devfn = PCI_DEVFN(11,1); - - /* Put IDE controller into native mode. */ - early_write_config_byte(hose, - 0, - devfn, - 0x09, /* Programming interface Register */ - 0x8f); - - /* Init IRQ routing, enable both ports, disable fast 16 */ - early_write_config_dword(hose, - 0, - devfn, - 0x40, /* IDE Control/Status Register */ - 0x00ff0011); - return; -} - -static int -sandpoint_exclude_device(u_char bus, u_char devfn) -{ - if ((bus == 0) && (PCI_SLOT(devfn) == SANDPOINT_HOST_BRIDGE_IDSEL)) { - return PCIBIOS_DEVICE_NOT_FOUND; - } - else { - return PCIBIOS_SUCCESSFUL; - } -} - -void __init -sandpoint_find_bridges(void) -{ - struct pci_controller *hose; - - hose = pcibios_alloc_controller(); - - if (!hose) - return; - - hose->first_busno = 0; - hose->last_busno = 0xff; - - if (mpc10x_bridge_init(hose, - MPC10X_MEM_MAP_B, - MPC10X_MEM_MAP_B, - MPC10X_MAPB_EUMB_BASE) == 0) { - - /* Do early winbond init, then scan PCI bus */ - sandpoint_setup_winbond_83553(hose); - ppc_md.pci_exclude_device = sandpoint_exclude_device; - hose->last_busno = pciauto_bus_scan(hose, hose->first_busno); - - ppc_md.pcibios_fixup = NULL; - ppc_md.pcibios_fixup_bus = NULL; - ppc_md.pci_swizzle = common_swizzle; - ppc_md.pci_map_irq = sandpoint_map_irq; - } - else { - if (ppc_md.progress) - ppc_md.progress("Bridge init failed", 0x100); - printk("Host bridge init failed\n"); - } - - return; -} diff --git a/arch/ppc/platforms/sandpoint_serial.h b/arch/ppc/platforms/sandpoint_serial.h deleted file mode 100644 index 0981c415fa57..000000000000 --- a/arch/ppc/platforms/sandpoint_serial.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * include/asm-ppc/sandpoint_serial.h - * - * Definitions for Motorola SPS Sandpoint Test Platform - * - * Author: Mark A. Greer - * mgreer@mvista.com - * - * 2001 (c) MontaVista, Software, Inc. This file is licensed under - * the terms of the GNU General Public License version 2. This program - * is licensed "as is" without any warranty of any kind, whether express - * or implied. - */ - -#ifndef __ASMPPC_SANDPOINT_SERIAL_H -#define __ASMPPC_SANDPOINT_SERIAL_H - -#include - -#define SANDPOINT_SERIAL_0 0xfe0003f8 -#define SANDPOINT_SERIAL_1 0xfe0002f8 - -#ifdef CONFIG_SERIAL_MANY_PORTS -#define RS_TABLE_SIZE 64 -#else -#define RS_TABLE_SIZE 2 -#endif - -/* Rate for the 1.8432 Mhz clock for the onboard serial chip */ -#define BASE_BAUD ( 1843200 / 16 ) - -#ifdef CONFIG_SERIAL_DETECT_IRQ -#define STD_COM_FLAGS (ASYNC_BOOT_AUTOCONF|ASYNC_SKIP_TEST|ASYNC_AUTO_IRQ) -#else -#define STD_COM_FLAGS (ASYNC_BOOT_AUTOCONF|ASYNC_SKIP_TEST) -#endif - -#define STD_SERIAL_PORT_DFNS \ - { 0, BASE_BAUD, SANDPOINT_SERIAL_0, 4, STD_COM_FLAGS, /* ttyS0 */ \ - iomem_base: (u8 *)SANDPOINT_SERIAL_0, \ - io_type: SERIAL_IO_MEM }, \ - { 0, BASE_BAUD, SANDPOINT_SERIAL_1, 3, STD_COM_FLAGS, /* ttyS1 */ \ - iomem_base: (u8 *)SANDPOINT_SERIAL_1, \ - io_type: SERIAL_IO_MEM }, - -#define SERIAL_PORT_DFNS \ - STD_SERIAL_PORT_DFNS - -#endif /* __ASMPPC_SANDPOINT_SERIAL_H */ diff --git a/arch/ppc/platforms/sandpoint_setup.c b/arch/ppc/platforms/sandpoint_setup.c deleted file mode 100644 index 5860f3d0adef..000000000000 --- a/arch/ppc/platforms/sandpoint_setup.c +++ /dev/null @@ -1,643 +0,0 @@ -/* - * arch/ppc/platforms/sandpoint_setup.c - * - * Board setup routines for the Motorola SPS Sandpoint Test Platform. - * - * Author: Mark A. Greer - * mgreer@mvista.com - * - * 2000-2002 (c) MontaVista, Software, Inc. This file is licensed under - * the terms of the GNU General Public License version 2. This program - * is licensed "as is" without any warranty of any kind, whether express - * or implied. - */ - -/* - * This file adds support for the Motorola SPS Sandpoint Test Platform. - * These boards have a PPMC slot for the processor so any combination - * of cpu and host bridge can be attached. This port is for an 8240 PPMC - * module from Motorola SPS and other closely related cpu/host bridge - * combinations (e.g., 750/755/7400 with MPC107 host bridge). - * The sandpoint itself has a Windbond 83c553 (PCI-ISA bridge, 2 DMA ctlrs, 2 - * cascaded 8259 interrupt ctlrs, 8254 Timer/Counter, and an IDE ctlr), a - * National 87308 (RTC, 2 UARTs, Keyboard & mouse ctlrs, and a floppy ctlr), - * and 4 PCI slots (only 2 of which are usable; the other 2 are keyed for 3.3V - * but are really 5V). - * - * The firmware on the sandpoint is called DINK (not my acronym :). This port - * depends on DINK to do some basic initialization (e.g., initialize the memory - * ctlr) and to ensure that the processor is using MAP B (CHRP map). - * - * The switch settings for the Sandpoint board MUST be as follows: - * S3: down - * S4: up - * S5: up - * S6: down - * - * 'down' is in the direction from the PCI slots towards the PPMC slot; - * 'up' is in the direction from the PPMC slot towards the PCI slots. - * Be careful, the way the sandpoint board is installed in XT chasses will - * make the directions reversed. - * - * Since Motorola listened to our suggestions for improvement, we now have - * the Sandpoint X3 board. All of the PCI slots are available, it uses - * the serial interrupt interface (just a hardware thing we need to - * configure properly). - * - * Use the default X3 switch settings. The interrupts are then: - * EPIC Source - * 0 SIOINT (8259, active low) - * 1 PCI #1 - * 2 PCI #2 - * 3 PCI #3 - * 4 PCI #4 - * 7 Winbond INTC (IDE interrupt) - * 8 Winbond INTD (IDE interrupt) - * - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "sandpoint.h" - -extern u_int openpic_irq(void); -extern void openpic_eoi(void); - -static void sandpoint_halt(void); - - -/* - * *** IMPORTANT *** - * - * The first 16 entries of 'sandpoint_openpic_initsenses[]' are there and - * initialized to 0 on purpose. DO NOT REMOVE THEM as the 'offset' parameter - * of 'openpic_init()' does not work for the sandpoint because the 8259 - * interrupt is NOT routed to the EPIC's IRQ 0 AND the EPIC's IRQ 0's offset is - * the same as a normal openpic's IRQ 16 offset. - */ -static u_char sandpoint_openpic_initsenses[] __initdata = { - 0, /* 0-15 not used by EPCI but by 8259 (std PC-type IRQs) */ - 0, /* 1 */ - 0, /* 2 */ - 0, /* 3 */ - 0, /* 4 */ - 0, /* 5 */ - 0, /* 6 */ - 0, /* 7 */ - 0, /* 8 */ - 0, /* 9 */ - 0, /* 10 */ - 0, /* 11 */ - 0, /* 12 */ - 0, /* 13 */ - 0, /* 14 */ - 0, /* 15 */ -#ifdef CONFIG_SANDPOINT_X3 - 1, /* 16: EPIC IRQ 0: Active Low -- SIOINT (8259) */ - 0, /* AACK! Shouldn't need this.....see sandpoint_pci.c for more info */ - 1, /* 17: EPIC IRQ 1: Active Low -- PCI Slot 1 */ - 1, /* 18: EPIC IRQ 2: Active Low -- PCI Slot 2 */ - 1, /* 19: EPIC IRQ 3: Active Low -- PCI Slot 3 */ - 1, /* 20: EPIC IRQ 4: Active Low -- PCI Slot 4 */ - 0, /* 21 -- Unused */ - 0, /* 22 -- Unused */ - 1, /* 23 -- IDE (Winbond INT C) */ - 1, /* 24 -- IDE (Winbond INT D) */ - /* 35 - 31 (EPIC 9 - 15) Unused */ -#else - 1, /* 16: EPIC IRQ 0: Active Low -- PCI intrs */ - 1, /* 17: EPIC IRQ 1: Active Low -- PCI (possibly 8259) intrs */ - 1, /* 18: EPIC IRQ 2: Active Low -- PCI (possibly 8259) intrs */ - 1 /* 19: EPIC IRQ 3: Active Low -- PCI intrs */ - /* 20: EPIC IRQ 4: Not used */ -#endif -}; - -static void __init -sandpoint_setup_arch(void) -{ - loops_per_jiffy = 100000000 / HZ; - -#ifdef CONFIG_BLK_DEV_INITRD - if (initrd_start) - ROOT_DEV = Root_RAM0; - else -#endif -#ifdef CONFIG_ROOT_NFS - ROOT_DEV = Root_NFS; -#else - ROOT_DEV = Root_HDA1; -#endif - - /* Lookup PCI host bridges */ - sandpoint_find_bridges(); - -#ifdef CONFIG_DUMMY_CONSOLE - conswitchp = &dummy_con; -#endif - - printk("Motorola SPS Sandpoint Test Platform\n"); - printk("Sandpoint port (MontaVista Software, Inc. (source@mvista.com))\n"); - - /* The Sandpoint rom doesn't enable any caches. Do that now. - * The 7450 portion will also set up the L3s once I get enough - * information do do so. If the processor running doesn't have - * and L2, the _set_L2CR is a no-op. - */ - if (cur_cpu_spec[0]->cpu_features & CPU_FTR_SPEC7450) { - /* Just enable L2, the bits are different from others. - */ - _set_L2CR(L2CR_L2E); - } - else { - /* The magic number for Sandpoint/74xx PrPMCs. - */ - _set_L2CR(0xbd014000); - } -} - -#define SANDPOINT_87308_CFG_ADDR 0x15c -#define SANDPOINT_87308_CFG_DATA 0x15d - -#define SANDPOINT_87308_CFG_INB(addr, byte) { \ - outb((addr), SANDPOINT_87308_CFG_ADDR); \ - (byte) = inb(SANDPOINT_87308_CFG_DATA); \ -} - -#define SANDPOINT_87308_CFG_OUTB(addr, byte) { \ - outb((addr), SANDPOINT_87308_CFG_ADDR); \ - outb((byte), SANDPOINT_87308_CFG_DATA); \ -} - -#define SANDPOINT_87308_SELECT_DEV(dev_num) { \ - SANDPOINT_87308_CFG_OUTB(0x07, (dev_num)); \ -} - -#define SANDPOINT_87308_DEV_ENABLE(dev_num) { \ - SANDPOINT_87308_SELECT_DEV(dev_num); \ - SANDPOINT_87308_CFG_OUTB(0x30, 0x01); \ -} - -/* - * Initialize the ISA devices on the Nat'l PC87308VUL SuperIO chip. - */ -static void __init -sandpoint_setup_natl_87308(void) -{ - u_char reg; - - /* - * Enable all the devices on the Super I/O chip. - */ - SANDPOINT_87308_SELECT_DEV(0x00); /* Select kbd logical device */ - SANDPOINT_87308_CFG_OUTB(0xf0, 0x00); /* Set KBC clock to 8 Mhz */ - SANDPOINT_87308_DEV_ENABLE(0x00); /* Enable keyboard */ - SANDPOINT_87308_DEV_ENABLE(0x01); /* Enable mouse */ - SANDPOINT_87308_DEV_ENABLE(0x02); /* Enable rtc */ - SANDPOINT_87308_DEV_ENABLE(0x03); /* Enable fdc (floppy) */ - SANDPOINT_87308_DEV_ENABLE(0x04); /* Enable parallel */ - SANDPOINT_87308_DEV_ENABLE(0x05); /* Enable UART 2 */ - SANDPOINT_87308_CFG_OUTB(0xf0, 0x82); /* Enable bank select regs */ - SANDPOINT_87308_DEV_ENABLE(0x06); /* Enable UART 1 */ - SANDPOINT_87308_CFG_OUTB(0xf0, 0x82); /* Enable bank select regs */ - - /* Set up floppy in PS/2 mode */ - outb(0x09, SIO_CONFIG_RA); - reg = inb(SIO_CONFIG_RD); - reg = (reg & 0x3F) | 0x40; - outb(reg, SIO_CONFIG_RD); - outb(reg, SIO_CONFIG_RD); /* Have to write twice to change! */ - - return; -} - -/* - * Fix IDE interrupts. - */ -static void __init -sandpoint_fix_winbond_83553(void) -{ - /* Make all 8259 interrupt level sensitive */ - outb(0xf8, 0x4d0); - outb(0xde, 0x4d1); - - return; -} - -static void __init -sandpoint_init2(void) -{ - /* Do Sandpoint board specific initialization. */ - sandpoint_fix_winbond_83553(); - sandpoint_setup_natl_87308(); - - request_region(0x00,0x20,"dma1"); - request_region(0x20,0x20,"pic1"); - request_region(0x40,0x20,"timer"); - request_region(0x80,0x10,"dma page reg"); - request_region(0xa0,0x20,"pic2"); - request_region(0xc0,0x20,"dma2"); - - return; -} - -/* - * Interrupt setup and service. Interrrupts on the Sandpoint come - * from the four PCI slots plus the 8259 in the Winbond Super I/O (SIO). - * These interrupts are sent to one of four IRQs on the EPIC. - * The SIO shares its interrupt with either slot 2 or slot 3 (INTA#). - * Slot numbering is confusing. Sometimes in the documentation they - * use 0,1,2,3 and others 1,2,3,4. We will use slots 1,2,3,4 and - * map this to IRQ 16, 17, 18, 19. - * For Sandpoint X3, this has been better designed. The 8259 is - * cascaded from EPIC IRQ0, IRQ1-4 map to PCI slots 1-4, IDE is on - * EPIC 7 and 8. - */ -static void __init -sandpoint_init_IRQ(void) -{ - int i; - - /* - * 3 things cause us to jump through some hoops: - * 1) the EPIC on the 8240 & 107 are not full-blown openpic pic's - * 2) the 8259 is NOT cascaded on the openpic IRQ 0 - * 3) the 8259 shares its interrupt line with some PCI interrupts. - * - * What we'll do is set up the 8259 to be level sensitive, active low - * just like a PCI device. Then, when an interrupt on the IRQ that is - * shared with the 8259 comes in, we'll take a peek at the 8259 to see - * it its generating an interrupt. If it is, we'll handle the 8259 - * interrupt. Otherwise, we'll handle it just like a normal PCI - * interrupt. This does give the 8259 interrupts a higher priority - * than the EPIC ones--hopefully, not a problem. - */ - OpenPIC_InitSenses = sandpoint_openpic_initsenses; - OpenPIC_NumInitSenses = sizeof(sandpoint_openpic_initsenses); - - openpic_init(1, 0, NULL, -1); - - /* - * openpic_init() has set up irq_desc[0-23] to be openpic - * interrupts. We need to set irq_desc[0-15] to be 8259 interrupts. - * We then need to request and enable the 8259 irq. - */ - for(i=0; i < NUM_8259_INTERRUPTS; i++) - irq_desc[i].handler = &i8259_pic; - - if (request_irq(SANDPOINT_SIO_IRQ, no_action, SA_INTERRUPT, - "8259 cascade to EPIC", NULL)) { - - printk("Unable to get OpenPIC IRQ %d for cascade\n", - SANDPOINT_SIO_IRQ); - } - - i8259_init(NULL); -} - -static int -sandpoint_get_irq(struct pt_regs *regs) -{ - int irq, cascade_irq; - - irq = openpic_irq(); - - if (irq == SANDPOINT_SIO_IRQ) { - cascade_irq = i8259_irq(regs); - - if (cascade_irq != -1) { - irq = cascade_irq; - openpic_eoi(); - } - } - else if (irq == OPENPIC_VEC_SPURIOUS) { - irq = -1; - } - - return irq; -} - -static u32 -sandpoint_irq_canonicalize(u32 irq) -{ - if (irq == 2) - { - return 9; - } - else - { - return irq; - } -} - -static ulong __init -sandpoint_find_end_of_memory(void) -{ - ulong size = 0; - -#if 0 /* Leave out until DINK sets mem ctlr correctly */ - size = mpc10x_get_mem_size(MPC10X_MEM_MAP_B); -#else - size = 32*1024*1024; -#endif - - return size; -} - -static void __init -sandpoint_map_io(void) -{ - io_block_mapping(0xfe000000, 0xfe000000, 0x02000000, _PAGE_IO); -} - -/* - * Due to Sandpoint X2 errata, the Port 92 will not work. - */ -static void -sandpoint_restart(char *cmd) -{ - local_irq_disable(); - - /* Set exception prefix high - to the firmware */ - _nmask_and_or_msr(0, MSR_IP); - - /* Reset system via Port 92 */ - outb(0x00, 0x92); - outb(0x01, 0x92); - for(;;); /* Spin until reset happens */ -} - -static void -sandpoint_power_off(void) -{ - local_irq_disable(); - for(;;); /* No way to shut power off with software */ - /* NOTREACHED */ -} - -static void -sandpoint_halt(void) -{ - sandpoint_power_off(); - /* NOTREACHED */ -} - -static int -sandpoint_show_cpuinfo(struct seq_file *m) -{ - uint pvid; - - pvid = mfspr(PVR); - - seq_printf(m, "vendor\t\t: Motorola SPS\n"); - seq_printf(m, "machine\t\t: Sandpoint\n"); - seq_printf(m, "processor\t: PVID: 0x%x, vendor: %s\n", - pvid, (pvid & (1<<15) ? "IBM" : "Motorola")); - - return 0; -} - -#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE) -/* - * IDE support. - */ -static int sandpoint_ide_ports_known = 0; -static unsigned long sandpoint_ide_regbase[MAX_HWIFS]; -static unsigned long sandpoint_ide_ctl_regbase[MAX_HWIFS]; -static unsigned long sandpoint_idedma_regbase; - -static void -sandpoint_ide_probe(void) -{ - struct pci_dev *pdev = pci_find_device(PCI_VENDOR_ID_WINBOND, - PCI_DEVICE_ID_WINBOND_82C105, - NULL); - - if(pdev) { - sandpoint_ide_regbase[0]=pdev->resource[0].start; - sandpoint_ide_regbase[1]=pdev->resource[2].start; - sandpoint_ide_ctl_regbase[0]=pdev->resource[1].start; - sandpoint_ide_ctl_regbase[1]=pdev->resource[3].start; - sandpoint_idedma_regbase=pdev->resource[4].start; - } - - sandpoint_ide_ports_known = 1; - return; -} - -static int -sandpoint_ide_default_irq(unsigned long base) -{ - if (sandpoint_ide_ports_known == 0) - sandpoint_ide_probe(); - - if (base == sandpoint_ide_regbase[0]) - return SANDPOINT_IDE_INT0; - else if (base == sandpoint_ide_regbase[1]) - return SANDPOINT_IDE_INT1; - else - return 0; -} - -static unsigned long -sandpoint_ide_default_io_base(int index) -{ - if (sandpoint_ide_ports_known == 0) - sandpoint_ide_probe(); - - return sandpoint_ide_regbase[index]; -} - -static void __init -sandpoint_ide_init_hwif_ports(hw_regs_t *hw, unsigned long data_port, - unsigned long ctrl_port, int *irq) -{ - unsigned long reg = data_port; - uint alt_status_base; - int i; - - for (i = IDE_DATA_OFFSET; i <= IDE_STATUS_OFFSET; i++) { - hw->io_ports[i] = reg++; - } - - if (data_port == sandpoint_ide_regbase[0]) { - alt_status_base = sandpoint_ide_ctl_regbase[0] + 2; - hw->irq = 14; - } - else if (data_port == sandpoint_ide_regbase[1]) { - alt_status_base = sandpoint_ide_ctl_regbase[1] + 2; - hw->irq = 15; - } - else { - alt_status_base = 0; - hw->irq = 0; - } - - if (ctrl_port) { - hw->io_ports[IDE_CONTROL_OFFSET] = ctrl_port; - } else { - hw->io_ports[IDE_CONTROL_OFFSET] = alt_status_base; - } - - if (irq != NULL) { - *irq = hw->irq; - } - - return; -} -#endif - -/* - * Set BAT 3 to map 0xf8000000 to end of physical memory space 1-to-1. - */ -static __inline__ void -sandpoint_set_bat(void) -{ - unsigned long bat3u, bat3l; - static int mapping_set = 0; - - if (!mapping_set) { - - __asm__ __volatile__( - " lis %0,0xf800\n \ - ori %1,%0,0x002a\n \ - ori %0,%0,0x0ffe\n \ - mtspr 0x21e,%0\n \ - mtspr 0x21f,%1\n \ - isync\n \ - sync " - : "=r" (bat3u), "=r" (bat3l)); - - mapping_set = 1; - } - - return; -} - -#ifdef CONFIG_SERIAL_TEXT_DEBUG -#include -#include -#include - -static struct serial_state rs_table[RS_TABLE_SIZE] = { - SERIAL_PORT_DFNS /* Defined in */ -}; - -static void -sandpoint_progress(char *s, unsigned short hex) -{ - volatile char c; - volatile unsigned long com_port; - u16 shift; - - com_port = rs_table[0].port; - shift = rs_table[0].iomem_reg_shift; - - while ((c = *s++) != 0) { - while ((*((volatile unsigned char *)com_port + - (UART_LSR << shift)) & UART_LSR_THRE) == 0) - ; - *(volatile unsigned char *)com_port = c; - - if (c == '\n') { - while ((*((volatile unsigned char *)com_port + - (UART_LSR << shift)) & UART_LSR_THRE) == 0) - ; - *(volatile unsigned char *)com_port = '\r'; - } - } -} -#endif /* CONFIG_SERIAL_TEXT_DEBUG */ - -__init void sandpoint_setup_pci_ptrs(void); - -TODC_ALLOC(); - -void __init -platform_init(unsigned long r3, unsigned long r4, unsigned long r5, - unsigned long r6, unsigned long r7) -{ - parse_bootinfo(find_bootinfo()); - - /* Map in board regs, etc. */ - sandpoint_set_bat(); - - isa_io_base = MPC10X_MAPB_ISA_IO_BASE; - isa_mem_base = MPC10X_MAPB_ISA_MEM_BASE; - pci_dram_offset = MPC10X_MAPB_DRAM_OFFSET; - ISA_DMA_THRESHOLD = 0x00ffffff; - DMA_MODE_READ = 0x44; - DMA_MODE_WRITE = 0x48; - - ppc_md.setup_arch = sandpoint_setup_arch; - ppc_md.show_cpuinfo = sandpoint_show_cpuinfo; - ppc_md.irq_canonicalize = sandpoint_irq_canonicalize; - ppc_md.init_IRQ = sandpoint_init_IRQ; - ppc_md.get_irq = sandpoint_get_irq; - ppc_md.init = sandpoint_init2; - - ppc_md.restart = sandpoint_restart; - ppc_md.power_off = sandpoint_power_off; - ppc_md.halt = sandpoint_halt; - - ppc_md.find_end_of_memory = sandpoint_find_end_of_memory; - ppc_md.setup_io_mappings = sandpoint_map_io; - - TODC_INIT(TODC_TYPE_PC97307, 0x70, 0x00, 0x71, 8); - ppc_md.time_init = todc_time_init; - ppc_md.set_rtc_time = todc_set_rtc_time; - ppc_md.get_rtc_time = todc_get_rtc_time; - ppc_md.calibrate_decr = todc_calibrate_decr; - - ppc_md.nvram_read_val = todc_mc146818_read_val; - ppc_md.nvram_write_val = todc_mc146818_write_val; - - ppc_md.heartbeat = NULL; - ppc_md.heartbeat_reset = 0; - ppc_md.heartbeat_count = 0; - -#ifdef CONFIG_SERIAL_TEXT_DEBUG - ppc_md.progress = sandpoint_progress; -#else /* !CONFIG_SERIAL_TEXT_DEBUG */ - ppc_md.progress = NULL; -#endif /* CONFIG_SERIAL_TEXT_DEBUG */ - -#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE) - ppc_ide_md.default_irq = sandpoint_ide_default_irq; - ppc_ide_md.default_io_base = sandpoint_ide_default_io_base; - ppc_ide_md.ide_init_hwif = sandpoint_ide_init_hwif_ports; -#endif - - return; -} diff --git a/include/asm-ppc/serial.h b/include/asm-ppc/serial.h index 887fa5302a9f..7496840db63d 100644 --- a/include/asm-ppc/serial.h +++ b/include/asm-ppc/serial.h @@ -25,7 +25,7 @@ #elif defined(CONFIG_PRPMC800) #include #elif defined(CONFIG_SANDPOINT) -#include +#include #elif defined(CONFIG_SPRUCE) #include #elif defined(CONFIG_ZX4500) -- cgit v1.2.3 From ed387adb14aa03ec837d027f16e02bdaae35660c Mon Sep 17 00:00:00 2001 From: Tom Rini Date: Mon, 30 Jun 2003 20:57:28 -0700 Subject: PPC32: Fix CONFIG_NVRAM && !CONFIG_PPC_PMAC. Noted by Felix von Leitner --- arch/ppc/kernel/ppc_ksyms.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/ppc/kernel/ppc_ksyms.c b/arch/ppc/kernel/ppc_ksyms.c index 2832d5580c1a..8ef2791d112b 100644 --- a/arch/ppc/kernel/ppc_ksyms.c +++ b/arch/ppc/kernel/ppc_ksyms.c @@ -272,8 +272,10 @@ EXPORT_SYMBOL(kd_mksound); #ifdef CONFIG_NVRAM EXPORT_SYMBOL(nvram_read_byte); EXPORT_SYMBOL(nvram_write_byte); +#ifdef CONFIG_PPC_PMAC EXPORT_SYMBOL(pmac_xpram_read); EXPORT_SYMBOL(pmac_xpram_write); +#endif #endif /* CONFIG_NVRAM */ EXPORT_SYMBOL(to_tm); -- cgit v1.2.3 From f9a4581cb66d91f31cf27584a1d7f87fd7b9939d Mon Sep 17 00:00:00 2001 From: Tom Rini Date: Tue, 1 Jul 2003 02:50:37 -0700 Subject: Remove the Zynx 4500 platform code. It was old and unmaintained. This change can also be found in patch form at: ftp://source.mvista.com/pub/linuxppc/obsolete/zx4500/ --- arch/ppc/Kconfig | 7 +- arch/ppc/boot/simple/Makefile | 6 - arch/ppc/configs/adir_defconfig | 1 - arch/ppc/configs/apus_defconfig | 1 - arch/ppc/configs/common_defconfig | 1 - arch/ppc/configs/ev64260_defconfig | 1 - arch/ppc/configs/gemini_defconfig | 1 - arch/ppc/configs/ibmchrp_defconfig | 1 - arch/ppc/configs/k2_defconfig | 1 - arch/ppc/configs/lopec_defconfig | 1 - arch/ppc/configs/mcpn765_defconfig | 1 - arch/ppc/configs/menf1_defconfig | 1 - arch/ppc/configs/mvme5100_defconfig | 1 - arch/ppc/configs/pcore_defconfig | 1 - arch/ppc/configs/pmac_defconfig | 1 - arch/ppc/configs/power3_defconfig | 1 - arch/ppc/configs/pplus_defconfig | 1 - arch/ppc/configs/prpmc750_defconfig | 1 - arch/ppc/configs/prpmc800_defconfig | 1 - arch/ppc/configs/sandpoint_defconfig | 1 - arch/ppc/configs/spruce_defconfig | 1 - arch/ppc/configs/zx4500_defconfig | 560 ----------------------------------- arch/ppc/defconfig | 1 - arch/ppc/platforms/Makefile | 1 - arch/ppc/platforms/zx4500.h | 68 ----- arch/ppc/platforms/zx4500_pci.c | 138 --------- arch/ppc/platforms/zx4500_serial.h | 46 --- arch/ppc/platforms/zx4500_setup.c | 359 ---------------------- arch/ppc/syslib/Makefile | 2 - include/asm-ppc/serial.h | 2 - 30 files changed, 2 insertions(+), 1207 deletions(-) delete mode 100644 arch/ppc/configs/zx4500_defconfig delete mode 100644 arch/ppc/platforms/zx4500.h delete mode 100644 arch/ppc/platforms/zx4500_pci.c delete mode 100644 arch/ppc/platforms/zx4500_serial.h delete mode 100644 arch/ppc/platforms/zx4500_setup.c diff --git a/arch/ppc/Kconfig b/arch/ppc/Kconfig index 9ade9f30eabb..7e65585ddd6d 100644 --- a/arch/ppc/Kconfig +++ b/arch/ppc/Kconfig @@ -490,9 +490,6 @@ config GEMINI series Single Board Computer. More information is available at: . -config ZX4500 - bool "Zynx-ZX4500" - endchoice config PPC_CHRP @@ -536,7 +533,7 @@ config WILLOW config MPC10X_STORE_GATHERING bool "Enable MPC10x store gathering" - depends on FORCE || MENF1 || SANDPOINT || ZX4500 + depends on FORCE || MENF1 || SANDPOINT config GT64260 bool @@ -1528,7 +1525,7 @@ config BOOTX_TEXT config SERIAL_TEXT_DEBUG bool "Support for early boot texts over serial port" - depends on 4xx || GT64260 || LOPEC || MCPN765 || PPLUS || PRPMC800 || SANDPOINT || ZX4500 + depends on 4xx || GT64260 || LOPEC || MCPN765 || PPLUS || PRPMC800 || SANDPOINT config OCP bool diff --git a/arch/ppc/boot/simple/Makefile b/arch/ppc/boot/simple/Makefile index bd7bdeaca772..b780626e3286 100644 --- a/arch/ppc/boot/simple/Makefile +++ b/arch/ppc/boot/simple/Makefile @@ -101,12 +101,6 @@ ENTRYPOINT := 0x00800000 MISC := misc-spruce.o TFTPIMAGE := /tftpboot/zImage.$(END) endif -ifeq ($(CONFIG_ZX4500),y) -ZIMAGE := zImage-STRIPELF -ZIMAGEINITRD := zImage.initrd-STRIPELF -END := zx4500 -TFTPIMAGE := /tftpboot/zImage.$(END) -endif ifeq ($(CONFIG_SMP),y) TFTPIMAGE += .smp endif diff --git a/arch/ppc/configs/adir_defconfig b/arch/ppc/configs/adir_defconfig index aef58609eb8a..d0dd92bbb842 100644 --- a/arch/ppc/configs/adir_defconfig +++ b/arch/ppc/configs/adir_defconfig @@ -67,7 +67,6 @@ CONFIG_ADIR=y # CONFIG_K2 is not set # CONFIG_PAL4 is not set # CONFIG_GEMINI is not set -# CONFIG_ZX4500 is not set # CONFIG_SMP is not set # CONFIG_PREEMPT is not set # CONFIG_ALTIVEC is not set diff --git a/arch/ppc/configs/apus_defconfig b/arch/ppc/configs/apus_defconfig index 2ccbd93d1e85..559b52330407 100644 --- a/arch/ppc/configs/apus_defconfig +++ b/arch/ppc/configs/apus_defconfig @@ -68,7 +68,6 @@ CONFIG_APUS=y # CONFIG_K2 is not set # CONFIG_PAL4 is not set # CONFIG_GEMINI is not set -# CONFIG_ZX4500 is not set # CONFIG_SMP is not set # CONFIG_PREEMPT is not set # CONFIG_ALTIVEC is not set diff --git a/arch/ppc/configs/common_defconfig b/arch/ppc/configs/common_defconfig index 5e50731b4db1..2acfa03cafce 100644 --- a/arch/ppc/configs/common_defconfig +++ b/arch/ppc/configs/common_defconfig @@ -68,7 +68,6 @@ CONFIG_PPC_MULTIPLATFORM=y # CONFIG_K2 is not set # CONFIG_PAL4 is not set # CONFIG_GEMINI is not set -# CONFIG_ZX4500 is not set CONFIG_PPC_CHRP=y CONFIG_PPC_PMAC=y CONFIG_PPC_PREP=y diff --git a/arch/ppc/configs/ev64260_defconfig b/arch/ppc/configs/ev64260_defconfig index d6204290b3ad..1e3617a621b6 100644 --- a/arch/ppc/configs/ev64260_defconfig +++ b/arch/ppc/configs/ev64260_defconfig @@ -67,7 +67,6 @@ CONFIG_EV64260=y # CONFIG_K2 is not set # CONFIG_PAL4 is not set # CONFIG_GEMINI is not set -# CONFIG_ZX4500 is not set CONFIG_GT64260=y CONFIG_SERIAL_CONSOLE_BAUD=115200 # CONFIG_SMP is not set diff --git a/arch/ppc/configs/gemini_defconfig b/arch/ppc/configs/gemini_defconfig index 98d499db74c3..d2e5789b2809 100644 --- a/arch/ppc/configs/gemini_defconfig +++ b/arch/ppc/configs/gemini_defconfig @@ -67,7 +67,6 @@ CONFIG_PPC_STD_MMU=y # CONFIG_K2 is not set # CONFIG_PAL4 is not set CONFIG_GEMINI=y -# CONFIG_ZX4500 is not set # CONFIG_SMP is not set # CONFIG_PREEMPT is not set CONFIG_ALTIVEC=y diff --git a/arch/ppc/configs/ibmchrp_defconfig b/arch/ppc/configs/ibmchrp_defconfig index 66017e5bbfeb..7503b06782de 100644 --- a/arch/ppc/configs/ibmchrp_defconfig +++ b/arch/ppc/configs/ibmchrp_defconfig @@ -67,7 +67,6 @@ CONFIG_PPC_MULTIPLATFORM=y # CONFIG_K2 is not set # CONFIG_PAL4 is not set # CONFIG_GEMINI is not set -# CONFIG_ZX4500 is not set CONFIG_PPC_CHRP=y CONFIG_PPC_PMAC=y CONFIG_PPC_PREP=y diff --git a/arch/ppc/configs/k2_defconfig b/arch/ppc/configs/k2_defconfig index c345891ae749..e111e715643b 100644 --- a/arch/ppc/configs/k2_defconfig +++ b/arch/ppc/configs/k2_defconfig @@ -67,7 +67,6 @@ CONFIG_PPC_STD_MMU=y CONFIG_K2=y # CONFIG_PAL4 is not set # CONFIG_GEMINI is not set -# CONFIG_ZX4500 is not set # CONFIG_CPC710_DATA_GATHERING is not set # CONFIG_SMP is not set # CONFIG_PREEMPT is not set diff --git a/arch/ppc/configs/lopec_defconfig b/arch/ppc/configs/lopec_defconfig index 70962e27db3d..d9034bab80b0 100644 --- a/arch/ppc/configs/lopec_defconfig +++ b/arch/ppc/configs/lopec_defconfig @@ -67,7 +67,6 @@ CONFIG_LOPEC=y # CONFIG_K2 is not set # CONFIG_PAL4 is not set # CONFIG_GEMINI is not set -# CONFIG_ZX4500 is not set CONFIG_EPIC_SERIAL_MODE=y # CONFIG_SMP is not set # CONFIG_PREEMPT is not set diff --git a/arch/ppc/configs/mcpn765_defconfig b/arch/ppc/configs/mcpn765_defconfig index 851a94f7ed54..8e2704044880 100644 --- a/arch/ppc/configs/mcpn765_defconfig +++ b/arch/ppc/configs/mcpn765_defconfig @@ -62,7 +62,6 @@ CONFIG_MCPN765=y # CONFIG_K2 is not set # CONFIG_PAL4 is not set # CONFIG_GEMINI is not set -# CONFIG_ZX4500 is not set # CONFIG_SMP is not set # CONFIG_PREEMPT is not set CONFIG_ALTIVEC=y diff --git a/arch/ppc/configs/menf1_defconfig b/arch/ppc/configs/menf1_defconfig index b9d61c21ff37..64eece52ad33 100644 --- a/arch/ppc/configs/menf1_defconfig +++ b/arch/ppc/configs/menf1_defconfig @@ -67,7 +67,6 @@ CONFIG_MENF1=y # CONFIG_K2 is not set # CONFIG_PAL4 is not set # CONFIG_GEMINI is not set -# CONFIG_ZX4500 is not set CONFIG_MPC10X_STORE_GATHERING=y # CONFIG_SMP is not set # CONFIG_PREEMPT is not set diff --git a/arch/ppc/configs/mvme5100_defconfig b/arch/ppc/configs/mvme5100_defconfig index 69e9c155f484..01f8a8366bdf 100644 --- a/arch/ppc/configs/mvme5100_defconfig +++ b/arch/ppc/configs/mvme5100_defconfig @@ -67,7 +67,6 @@ CONFIG_MVME5100=y # CONFIG_K2 is not set # CONFIG_PAL4 is not set # CONFIG_GEMINI is not set -# CONFIG_ZX4500 is not set # CONFIG_MVME5100_IPMC761_PRESENT is not set # CONFIG_SMP is not set # CONFIG_PREEMPT is not set diff --git a/arch/ppc/configs/pcore_defconfig b/arch/ppc/configs/pcore_defconfig index b5568f2c374a..ebdf9b60c92c 100644 --- a/arch/ppc/configs/pcore_defconfig +++ b/arch/ppc/configs/pcore_defconfig @@ -67,7 +67,6 @@ CONFIG_PCORE=y # CONFIG_K2 is not set # CONFIG_PAL4 is not set # CONFIG_GEMINI is not set -# CONFIG_ZX4500 is not set CONFIG_FORCE=y # CONFIG_MPC10X_STORE_GATHERING is not set # CONFIG_SMP is not set diff --git a/arch/ppc/configs/pmac_defconfig b/arch/ppc/configs/pmac_defconfig index 411322433e7f..39171b94c976 100644 --- a/arch/ppc/configs/pmac_defconfig +++ b/arch/ppc/configs/pmac_defconfig @@ -69,7 +69,6 @@ CONFIG_PPC_MULTIPLATFORM=y # CONFIG_K2 is not set # CONFIG_PAL4 is not set # CONFIG_GEMINI is not set -# CONFIG_ZX4500 is not set CONFIG_PPC_CHRP=y CONFIG_PPC_PMAC=y CONFIG_PPC_PREP=y diff --git a/arch/ppc/configs/power3_defconfig b/arch/ppc/configs/power3_defconfig index a986e394dbff..e42225f01bc8 100644 --- a/arch/ppc/configs/power3_defconfig +++ b/arch/ppc/configs/power3_defconfig @@ -67,7 +67,6 @@ CONFIG_PPC_MULTIPLATFORM=y # CONFIG_K2 is not set # CONFIG_PAL4 is not set # CONFIG_GEMINI is not set -# CONFIG_ZX4500 is not set CONFIG_PPC_CHRP=y CONFIG_PPC_PMAC=y CONFIG_PPC_PREP=y diff --git a/arch/ppc/configs/pplus_defconfig b/arch/ppc/configs/pplus_defconfig index 8dfa789c9939..c50b0f327f56 100644 --- a/arch/ppc/configs/pplus_defconfig +++ b/arch/ppc/configs/pplus_defconfig @@ -67,7 +67,6 @@ CONFIG_PPLUS=y # CONFIG_K2 is not set # CONFIG_PAL4 is not set # CONFIG_GEMINI is not set -# CONFIG_ZX4500 is not set # CONFIG_SMP is not set # CONFIG_PREEMPT is not set # CONFIG_ALTIVEC is not set diff --git a/arch/ppc/configs/prpmc750_defconfig b/arch/ppc/configs/prpmc750_defconfig index 24c993675118..8ed08b66bdbf 100644 --- a/arch/ppc/configs/prpmc750_defconfig +++ b/arch/ppc/configs/prpmc750_defconfig @@ -67,7 +67,6 @@ CONFIG_PRPMC750=y # CONFIG_K2 is not set # CONFIG_PAL4 is not set # CONFIG_GEMINI is not set -# CONFIG_ZX4500 is not set # CONFIG_SMP is not set # CONFIG_PREEMPT is not set # CONFIG_ALTIVEC is not set diff --git a/arch/ppc/configs/prpmc800_defconfig b/arch/ppc/configs/prpmc800_defconfig index ee07a3c5ebb3..c45c85f7d573 100644 --- a/arch/ppc/configs/prpmc800_defconfig +++ b/arch/ppc/configs/prpmc800_defconfig @@ -67,7 +67,6 @@ CONFIG_PRPMC800=y # CONFIG_K2 is not set # CONFIG_PAL4 is not set # CONFIG_GEMINI is not set -# CONFIG_ZX4500 is not set # CONFIG_SMP is not set # CONFIG_PREEMPT is not set # CONFIG_ALTIVEC is not set diff --git a/arch/ppc/configs/sandpoint_defconfig b/arch/ppc/configs/sandpoint_defconfig index a7b1f4bbbf00..5007a3b7063c 100644 --- a/arch/ppc/configs/sandpoint_defconfig +++ b/arch/ppc/configs/sandpoint_defconfig @@ -67,7 +67,6 @@ CONFIG_SANDPOINT=y # CONFIG_K2 is not set # CONFIG_PAL4 is not set # CONFIG_GEMINI is not set -# CONFIG_ZX4500 is not set CONFIG_SANDPOINT_X3=y CONFIG_EPIC_SERIAL_MODE=y # CONFIG_MPC10X_STORE_GATHERING is not set diff --git a/arch/ppc/configs/spruce_defconfig b/arch/ppc/configs/spruce_defconfig index 4a5a09f95060..5af7d5179d5b 100644 --- a/arch/ppc/configs/spruce_defconfig +++ b/arch/ppc/configs/spruce_defconfig @@ -67,7 +67,6 @@ CONFIG_SPRUCE=y # CONFIG_K2 is not set # CONFIG_PAL4 is not set # CONFIG_GEMINI is not set -# CONFIG_ZX4500 is not set # CONFIG_SPRUCE_BAUD_33M is not set # CONFIG_SMP is not set # CONFIG_PREEMPT is not set diff --git a/arch/ppc/configs/zx4500_defconfig b/arch/ppc/configs/zx4500_defconfig deleted file mode 100644 index 8b1bf65a1a50..000000000000 --- a/arch/ppc/configs/zx4500_defconfig +++ /dev/null @@ -1,560 +0,0 @@ -# -# Automatically generated make config: don't edit -# -CONFIG_MMU=y -CONFIG_RWSEM_XCHGADD_ALGORITHM=y -CONFIG_HAVE_DEC_LOCK=y - -# -# Code maturity level options -# -CONFIG_EXPERIMENTAL=y - -# -# General setup -# -CONFIG_SWAP=y -CONFIG_SYSVIPC=y -# CONFIG_BSD_PROCESS_ACCT is not set -CONFIG_SYSCTL=y -CONFIG_LOG_BUF_SHIFT=14 -CONFIG_EMBEDDED=y -CONFIG_FUTEX=y -# CONFIG_EPOLL is not set - -# -# Loadable module support -# -CONFIG_MODULES=y -CONFIG_MODULE_UNLOAD=y -# CONFIG_MODULE_FORCE_UNLOAD is not set -CONFIG_OBSOLETE_MODPARM=y -# CONFIG_MODVERSIONS is not set -CONFIG_KMOD=y - -# -# Platform support -# -CONFIG_PPC=y -CONFIG_PPC32=y -CONFIG_6xx=y -# CONFIG_40x is not set -# CONFIG_POWER3 is not set -# CONFIG_8xx is not set - -# -# IBM 4xx options -# -# CONFIG_8260 is not set -CONFIG_GENERIC_ISA_DMA=y -CONFIG_PPC_STD_MMU=y -# CONFIG_PPC_MULTIPLATFORM is not set -# CONFIG_APUS is not set -# CONFIG_WILLOW_2 is not set -# CONFIG_PCORE is not set -# CONFIG_POWERPMC250 is not set -# CONFIG_EV64260 is not set -# CONFIG_SPRUCE is not set -# CONFIG_MENF1 is not set -# CONFIG_LOPEC is not set -# CONFIG_MCPN765 is not set -# CONFIG_MVME5100 is not set -# CONFIG_PPLUS is not set -# CONFIG_PRPMC750 is not set -# CONFIG_PRPMC800 is not set -# CONFIG_SANDPOINT is not set -# CONFIG_ADIR is not set -# CONFIG_K2 is not set -# CONFIG_PAL4 is not set -# CONFIG_GEMINI is not set -CONFIG_ZX4500=y -# CONFIG_MPC10X_STORE_GATHERING is not set -# CONFIG_SMP is not set -# CONFIG_PREEMPT is not set -# CONFIG_ALTIVEC is not set -# CONFIG_TAU is not set -# CONFIG_CPU_FREQ is not set - -# -# General setup -# -# CONFIG_HIGHMEM is not set -CONFIG_PCI=y -CONFIG_PCI_DOMAINS=y -CONFIG_KCORE_ELF=y -CONFIG_BINFMT_ELF=y -CONFIG_KERNEL_ELF=y -CONFIG_BINFMT_MISC=y -# CONFIG_PCI_LEGACY_PROC is not set -CONFIG_PCI_NAMES=y -# CONFIG_HOTPLUG is not set - -# -# Parallel port support -# -# CONFIG_PARPORT is not set -CONFIG_PPC601_SYNC_FIX=y -# CONFIG_CMDLINE_BOOL is not set - -# -# Advanced setup -# -# CONFIG_ADVANCED_OPTIONS is not set - -# -# Default settings for advanced configuration options are used -# -CONFIG_HIGHMEM_START=0xfe000000 -CONFIG_LOWMEM_SIZE=0x30000000 -CONFIG_KERNEL_START=0xc0000000 -CONFIG_TASK_SIZE=0x80000000 -CONFIG_BOOT_LOAD=0x00800000 - -# -# Memory Technology Devices (MTD) -# -# CONFIG_MTD is not set - -# -# Plug and Play support -# -# CONFIG_PNP is not set - -# -# Block devices -# -# CONFIG_BLK_DEV_FD is not set -# CONFIG_BLK_CPQ_DA is not set -# CONFIG_BLK_CPQ_CISS_DA is not set -# CONFIG_BLK_DEV_DAC960 is not set -# CONFIG_BLK_DEV_UMEM is not set -CONFIG_BLK_DEV_LOOP=y -CONFIG_BLK_DEV_NBD=y -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_SIZE=16384 -CONFIG_BLK_DEV_INITRD=y - -# -# Multi-device support (RAID and LVM) -# -# CONFIG_MD is not set - -# -# ATA/IDE/MFM/RLL support -# -# CONFIG_IDE is not set - -# -# SCSI support -# -# CONFIG_SCSI is not set - -# -# Fusion MPT device support -# - -# -# IEEE 1394 (FireWire) support (EXPERIMENTAL) -# -# CONFIG_IEEE1394 is not set - -# -# I2O device support -# -# CONFIG_I2O is not set - -# -# Networking support -# -CONFIG_NET=y - -# -# Networking options -# -CONFIG_PACKET=y -# CONFIG_PACKET_MMAP is not set -# CONFIG_NETLINK_DEV is not set -# CONFIG_NETFILTER is not set -CONFIG_UNIX=y -# CONFIG_NET_KEY is not set -CONFIG_INET=y -# CONFIG_IP_MULTICAST is not set -# CONFIG_IP_ADVANCED_ROUTER is not set -CONFIG_IP_PNP=y -CONFIG_IP_PNP_DHCP=y -CONFIG_IP_PNP_BOOTP=y -# CONFIG_IP_PNP_RARP is not set -# CONFIG_NET_IPIP is not set -# CONFIG_NET_IPGRE is not set -# CONFIG_ARPD is not set -# CONFIG_INET_ECN is not set -CONFIG_SYN_COOKIES=y -# CONFIG_INET_AH is not set -# CONFIG_INET_ESP is not set -# CONFIG_INET_IPCOMP is not set -# CONFIG_IPV6 is not set -# CONFIG_XFRM_USER is not set - -# -# SCTP Configuration (EXPERIMENTAL) -# -CONFIG_IPV6_SCTP__=y -# CONFIG_IP_SCTP is not set -# CONFIG_ATM is not set -# CONFIG_VLAN_8021Q is not set -# CONFIG_LLC is not set -# CONFIG_DECNET is not set -# CONFIG_BRIDGE is not set -# CONFIG_X25 is not set -# CONFIG_LAPB is not set -# CONFIG_NET_DIVERT is not set -# CONFIG_ECONET is not set -# CONFIG_WAN_ROUTER is not set -# CONFIG_NET_FASTROUTE is not set -# CONFIG_NET_HW_FLOWCONTROL is not set - -# -# QoS and/or fair queueing -# -# CONFIG_NET_SCHED is not set - -# -# Network testing -# -# CONFIG_NET_PKTGEN is not set -CONFIG_NETDEVICES=y - -# -# ARCnet devices -# -# CONFIG_ARCNET is not set -# CONFIG_DUMMY is not set -# CONFIG_BONDING is not set -# CONFIG_EQUALIZER is not set -# CONFIG_TUN is not set -# CONFIG_ETHERTAP is not set - -# -# Ethernet (10 or 100Mbit) -# -CONFIG_NET_ETHERNET=y -# CONFIG_MII is not set -# CONFIG_OAKNET is not set -# CONFIG_HAPPYMEAL is not set -# CONFIG_SUNGEM is not set -# CONFIG_NET_VENDOR_3COM is not set - -# -# Tulip family network device support -# -# CONFIG_NET_TULIP is not set -# CONFIG_HP100 is not set -CONFIG_NET_PCI=y -# CONFIG_PCNET32 is not set -# CONFIG_AMD8111_ETH is not set -# CONFIG_ADAPTEC_STARFIRE is not set -# CONFIG_B44 is not set -# CONFIG_DGRS is not set -CONFIG_EEPRO100=y -# CONFIG_EEPRO100_PIO is not set -# CONFIG_E100 is not set -# CONFIG_FEALNX is not set -# CONFIG_NATSEMI is not set -# CONFIG_NE2K_PCI is not set -# CONFIG_8139CP is not set -# CONFIG_8139TOO is not set -# CONFIG_SIS900 is not set -# CONFIG_EPIC100 is not set -# CONFIG_SUNDANCE is not set -# CONFIG_TLAN is not set -# CONFIG_VIA_RHINE is not set - -# -# Ethernet (1000 Mbit) -# -# CONFIG_ACENIC is not set -# CONFIG_DL2K is not set -# CONFIG_E1000 is not set -# CONFIG_NS83820 is not set -# CONFIG_HAMACHI is not set -# CONFIG_YELLOWFIN is not set -# CONFIG_R8169 is not set -# CONFIG_SK98LIN is not set -# CONFIG_TIGON3 is not set - -# -# Ethernet (10000 Mbit) -# -# CONFIG_IXGB is not set -# CONFIG_FDDI is not set -# CONFIG_HIPPI is not set -# CONFIG_PPP is not set -# CONFIG_SLIP is not set - -# -# Wireless LAN (non-hamradio) -# -# CONFIG_NET_RADIO is not set - -# -# Token Ring devices (depends on LLC=y) -# -# CONFIG_RCPCI is not set -# CONFIG_SHAPER is not set - -# -# Wan interfaces -# -# CONFIG_WAN is not set - -# -# Amateur Radio support -# -# CONFIG_HAMRADIO is not set - -# -# IrDA (infrared) support -# -# CONFIG_IRDA is not set - -# -# ISDN subsystem -# -# CONFIG_ISDN_BOOL is not set - -# -# Graphics support -# -# CONFIG_FB is not set - -# -# Old CD-ROM drivers (not SCSI, not IDE) -# -# CONFIG_CD_NO_IDESCSI is not set - -# -# Input device support -# -# CONFIG_INPUT is not set - -# -# Userland interfaces -# - -# -# Input I/O drivers -# -# CONFIG_GAMEPORT is not set -CONFIG_SOUND_GAMEPORT=y -# CONFIG_SERIO is not set - -# -# Input Device Drivers -# - -# -# Macintosh device drivers -# - -# -# Character devices -# -# CONFIG_SERIAL_NONSTANDARD is not set - -# -# Serial drivers -# -CONFIG_SERIAL_8250=y -CONFIG_SERIAL_8250_CONSOLE=y -# CONFIG_SERIAL_8250_EXTENDED is not set - -# -# Non-8250 serial port support -# -CONFIG_SERIAL_CORE=y -CONFIG_SERIAL_CORE_CONSOLE=y -CONFIG_UNIX98_PTYS=y -CONFIG_UNIX98_PTY_COUNT=256 - -# -# I2C support -# -# CONFIG_I2C is not set - -# -# I2C Hardware Sensors Mainboard support -# - -# -# I2C Hardware Sensors Chip support -# -# CONFIG_I2C_SENSOR is not set - -# -# Mice -# -# CONFIG_BUSMOUSE is not set -# CONFIG_QIC02_TAPE is not set - -# -# IPMI -# -# CONFIG_IPMI_HANDLER is not set - -# -# Watchdog Cards -# -# CONFIG_WATCHDOG is not set -# CONFIG_NVRAM is not set -CONFIG_GEN_RTC=y -# CONFIG_GEN_RTC_X is not set -# CONFIG_DTLK is not set -# CONFIG_R3964 is not set -# CONFIG_APPLICOM is not set - -# -# Ftape, the floppy tape device driver -# -# CONFIG_FTAPE is not set -# CONFIG_AGP is not set -# CONFIG_DRM is not set -# CONFIG_RAW_DRIVER is not set -# CONFIG_HANGCHECK_TIMER is not set - -# -# Multimedia devices -# -# CONFIG_VIDEO_DEV is not set - -# -# Digital Video Broadcasting Devices -# -# CONFIG_DVB is not set - -# -# File systems -# -CONFIG_EXT2_FS=y -# CONFIG_EXT2_FS_XATTR is not set -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_XATTR=y -# CONFIG_EXT3_FS_POSIX_ACL is not set -# CONFIG_EXT3_FS_SECURITY is not set -CONFIG_JBD=y -# CONFIG_JBD_DEBUG is not set -CONFIG_FS_MBCACHE=y -# CONFIG_REISERFS_FS is not set -# CONFIG_JFS_FS is not set -# CONFIG_XFS_FS is not set -# CONFIG_MINIX_FS is not set -# CONFIG_ROMFS_FS is not set -# CONFIG_QUOTA is not set -# CONFIG_AUTOFS_FS is not set -# CONFIG_AUTOFS4_FS is not set - -# -# CD-ROM/DVD Filesystems -# -CONFIG_ISO9660_FS=y -# CONFIG_JOLIET is not set -# CONFIG_ZISOFS is not set -# CONFIG_UDF_FS is not set - -# -# DOS/FAT/NT Filesystems -# -# CONFIG_FAT_FS is not set -# CONFIG_NTFS_FS is not set - -# -# Pseudo filesystems -# -CONFIG_PROC_FS=y -CONFIG_DEVFS_FS=y -# CONFIG_DEVFS_MOUNT is not set -# CONFIG_DEVFS_DEBUG is not set -CONFIG_DEVPTS_FS=y -# CONFIG_DEVPTS_FS_XATTR is not set -CONFIG_TMPFS=y -CONFIG_RAMFS=y - -# -# Miscellaneous filesystems -# -# CONFIG_ADFS_FS is not set -# CONFIG_AFFS_FS is not set -# CONFIG_HFS_FS is not set -# CONFIG_BEFS_FS is not set -# CONFIG_BFS_FS is not set -# CONFIG_EFS_FS is not set -# CONFIG_CRAMFS is not set -# CONFIG_VXFS_FS is not set -# CONFIG_HPFS_FS is not set -# CONFIG_QNX4FS_FS is not set -# CONFIG_SYSV_FS is not set -# CONFIG_UFS_FS is not set - -# -# Network File Systems -# -CONFIG_NFS_FS=y -# CONFIG_NFS_V3 is not set -# CONFIG_NFS_V4 is not set -CONFIG_NFSD=y -# CONFIG_NFSD_V3 is not set -# CONFIG_NFSD_TCP is not set -CONFIG_ROOT_NFS=y -CONFIG_LOCKD=y -CONFIG_EXPORTFS=y -CONFIG_SUNRPC=y -# CONFIG_SUNRPC_GSS is not set -# CONFIG_SMB_FS is not set -# CONFIG_CIFS is not set -# CONFIG_NCP_FS is not set -# CONFIG_CODA_FS is not set -# CONFIG_INTERMEZZO_FS is not set -# CONFIG_AFS_FS is not set - -# -# Partition Types -# -# CONFIG_PARTITION_ADVANCED is not set -CONFIG_MSDOS_PARTITION=y - -# -# Sound -# -# CONFIG_SOUND is not set - -# -# USB support -# -# CONFIG_USB is not set -# CONFIG_USB_GADGET is not set - -# -# Bluetooth support -# -# CONFIG_BT is not set - -# -# Library routines -# -# CONFIG_CRC32 is not set - -# -# Kernel hacking -# -# CONFIG_DEBUG_KERNEL is not set -# CONFIG_KALLSYMS is not set -CONFIG_SERIAL_TEXT_DEBUG=y - -# -# Security options -# -# CONFIG_SECURITY is not set - -# -# Cryptographic options -# -# CONFIG_CRYPTO is not set diff --git a/arch/ppc/defconfig b/arch/ppc/defconfig index 119106ad7e80..23a49dc0ed1d 100644 --- a/arch/ppc/defconfig +++ b/arch/ppc/defconfig @@ -68,7 +68,6 @@ CONFIG_PPC_MULTIPLATFORM=y # CONFIG_K2 is not set # CONFIG_PAL4 is not set # CONFIG_GEMINI is not set -# CONFIG_ZX4500 is not set CONFIG_PPC_CHRP=y CONFIG_PPC_PMAC=y CONFIG_PPC_PREP=y diff --git a/arch/ppc/platforms/Makefile b/arch/ppc/platforms/Makefile index 263a40b0563a..eea39bf1e4aa 100644 --- a/arch/ppc/platforms/Makefile +++ b/arch/ppc/platforms/Makefile @@ -45,7 +45,6 @@ obj-$(CONFIG_PRPMC750) += prpmc750_setup.o prpmc750_pci.o obj-$(CONFIG_PRPMC800) += prpmc800_setup.o prpmc800_pci.o obj-$(CONFIG_SANDPOINT) += sandpoint_setup.o sandpoint_pci.o obj-$(CONFIG_SPRUCE) += spruce_setup.o spruce_pci.o -obj-$(CONFIG_ZX4500) += zx4500_setup.o zx4500_pci.o ifeq ($(CONFIG_SMP),y) obj-$(CONFIG_PPC_PMAC) += pmac_smp.o diff --git a/arch/ppc/platforms/zx4500.h b/arch/ppc/platforms/zx4500.h deleted file mode 100644 index 8a26e691e940..000000000000 --- a/arch/ppc/platforms/zx4500.h +++ /dev/null @@ -1,68 +0,0 @@ -/* * arch/ppc/platforms/zx4500.h - * - * Board setup routines for Znyx ZX4500 cPCI board. - * - * Author: Mark A. Greer - * mgreer@mvista.com - * - * 2000-2001 (c) MontaVista, Software, Inc. This file is licensed under - * the terms of the GNU General Public License version 2. This program - * is licensed "as is" without any warranty of any kind, whether express - * or implied. - */ -#ifndef __PPC_PLATFORMS_ZX4500_H_ -#define __PPC_PLATFORMS_ZX4500_H_ - -/* - * Define the addresses of CPLD registers in CLPD area. - */ -#define ZX4500_CPLD_BOARD_ID 0xff800001 -#define ZX4500_CPLD_REV 0xff800002 -#define ZX4500_CPLD_RESET 0xff800011 -#define ZX4500_CPLD_PHY1 0xff800014 -#define ZX4500_CPLD_PHY2 0xff800015 -#define ZX4500_CPLD_PHY3 0xff800016 -#define ZX4500_CPLD_SYSCTL 0xff800017 -#define ZX4500_CPLD_EXT_FLASH 0xff800018 -#define ZX4500_CPLD_DUAL1 0xff800019 -#define ZX4500_CPLD_DUAL2 0xff80001A -#define ZX4500_CPLD_STATUS 0xff800030 -#define ZX4500_CPLD_STREAM 0xff800032 -#define ZX4500_CPLD_PHY1_LED 0xff800034 -#define ZX4500_CPLD_PHY2_LED 0xff800035 -#define ZX4500_CPLD_PHY3_LED 0xff800036 -#define ZX4500_CPLD_PHY1_LNK 0xff80003C -#define ZX4500_CPLD_PHY2_LNK 0xff80003D -#define ZX4500_CPLD_PHY3_LNK 0xff80003E - -#define ZX4500_CPLD_RESET_SOFT 0x01 /* Soft Reset */ -#define ZX4500_CPLD_RESET_XBUS 0x40 /* Reset entire board */ - -#define ZX4500_CPLD_SYSCTL_PMC 0x01 /* Enable INTA/B/C/D from PMC */ -#define ZX4500_CPLD_SYSCTL_BCM 0x04 /* Enable INTA from BCM */ -#define ZX4500_CPLD_SYSCTL_SINTA 0x08 /* Enable SINTA from 21554 */ -#define ZX4500_CPLD_SYSCTL_WD 0x20 /* Enable Watchdog Timer */ -#define ZX4500_CPLD_SYSCTL_PMC_TRI 0x80 /* Tri-state PMC EREADY */ - -#define ZX4500_CPLD_DUAL2_LED_PULL 0x01 /* Pull LED */ -#define ZX4500_CPLD_DUAL2_LED_EXT_FAULT 0x02 /* External Fault LED */ -#define ZX4500_CPLD_DUAL2_LED_INT_FAULT 0x04 /* Internal Fault LED */ -#define ZX4500_CPLD_DUAL2_LED_OK 0x08 /* OK LED */ -#define ZX4500_CPLD_DUAL2_LED_CLK 0x10 /* CLK LED */ - -/* - * Defines related to boot string stored in flash. - */ -#define ZX4500_BOOT_STRING_ADDR 0xfff7f000 -#define ZX4500_BOOT_STRING_LEN 80 - -/* - * Define the IDSEL that the PCI bus side of the 8240 is connected to. - * This IDSEL must not be selected from the 8240 processor side. - */ -#define ZX4500_HOST_BRIDGE_IDSEL 20 - - -void zx4500_find_bridges(void); - -#endif /* __PPC_PLATFORMS_ZX4500_H_ */ diff --git a/arch/ppc/platforms/zx4500_pci.c b/arch/ppc/platforms/zx4500_pci.c deleted file mode 100644 index 325ecfe31c93..000000000000 --- a/arch/ppc/platforms/zx4500_pci.c +++ /dev/null @@ -1,138 +0,0 @@ -/* - * arch/ppc/platforms/zx4500_pci.c - * - * PCI setup routines for Znyx ZX4500 cPCI boards. - * - * Author: Mark A. Greer - * mgreer@mvista.com - * - * 2000-2001 (c) MontaVista, Software, Inc. This file is licensed under - * the terms of the GNU General Public License version 2. This program - * is licensed "as is" without any warranty of any kind, whether express - * or implied. - */ -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include "zx4500.h" - -/* - * Znyx ZX4500 interrupt routes. - */ -static inline int -zx4500_map_irq(struct pci_dev *dev, unsigned char idsel, unsigned char pin) -{ - static char pci_irq_table[][4] = - /* - * PCI IDSEL/INTPIN->INTLINE - * A B C D - */ - { - { 19, 0, 0, 0 }, /* IDSEL 21 - 21554 PCI-cPCI bridge */ - { 18, 0, 0, 0 }, /* IDSEL 22 - BCM5600 INTA */ - { 16, 20, 16, 20 }, /* IDSEL 23 - PPMC Slot */ - }; - - const long min_idsel = 21, max_idsel = 23, irqs_per_slot = 4; - return PCI_IRQ_TABLE_LOOKUP; -} - -void __init -zx4500_board_init(struct pci_controller *hose) -{ - uint val; - u_char sysctl; - - /* - * CPLD Registers are mapped in by BAT 3 in zx4500_setup_arch(). - * - * Turn off all interrupts routed through the CPLD. - * Also, turn off watchdog timer and drive PMC EREADY low. - */ - sysctl = in_8((volatile u_char *)ZX4500_CPLD_SYSCTL); - sysctl &= ~(ZX4500_CPLD_SYSCTL_PMC | - ZX4500_CPLD_SYSCTL_BCM | - ZX4500_CPLD_SYSCTL_SINTA | - ZX4500_CPLD_SYSCTL_WD | - ZX4500_CPLD_SYSCTL_PMC_TRI); - out_8((volatile u_char *)ZX4500_CPLD_SYSCTL, sysctl); - - /* - * Kludge the size that BAR2 of the 21554 asks for - * (i.e., set Upstream I/O or Memory 0 Setup Register). - * Old versions of SROM wants 1 GB which is too large, make it ask - * for 256 MB. - */ - early_read_config_dword(hose, 0, PCI_DEVFN(21,0), 0xc4, &val); - - if (val != 0) { - early_write_config_dword(hose, - 0, - PCI_DEVFN(21,0), - 0xc4, - val | 0xf0000000); - } - - return; -} - -static int -zx4500_exclude_device(u_char bus, u_char devfn) -{ - if ((bus == 0) && (PCI_SLOT(devfn) == ZX4500_HOST_BRIDGE_IDSEL)) { - return PCIBIOS_DEVICE_NOT_FOUND; - } - else { - return PCIBIOS_SUCCESSFUL; - } -} - -void __init -zx4500_find_bridges(void) -{ - struct pci_controller *hose; - - hose = pcibios_alloc_controller(); - - if (!hose) - return; - - hose->first_busno = 0; - hose->last_busno = 0xff; - - if (mpc10x_bridge_init(hose, - MPC10X_MEM_MAP_B, - MPC10X_MEM_MAP_B, - MPC10X_MAPB_EUMB_BASE) == 0) { - - hose->mem_resources[0].end = 0xffffffff; - - /* Initialize the board */ - zx4500_board_init(hose); - - /* scan PCI bus */ - ppc_md.pci_exclude_device = zx4500_exclude_device; - hose->last_busno = pciauto_bus_scan(hose, hose->first_busno); - - ppc_md.pcibios_fixup = NULL; - ppc_md.pcibios_fixup_bus = NULL; - ppc_md.pci_swizzle = common_swizzle; - ppc_md.pci_map_irq = zx4500_map_irq; - } - else { - if (ppc_md.progress) - ppc_md.progress("Bridge init failed", 0x100); - printk("Host bridge init failed\n"); - } - - return; -} diff --git a/arch/ppc/platforms/zx4500_serial.h b/arch/ppc/platforms/zx4500_serial.h deleted file mode 100644 index e1f8c70337ba..000000000000 --- a/arch/ppc/platforms/zx4500_serial.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - * arch/ppc/platforms/zx4500_serial.h - * - * Definitions for Znyx ZX4500 board support - * - * Author: Mark A. Greer - * mgreer@mvista.com - * - * 2000-2001 (c) MontaVista, Software, Inc. This file is licensed under - * the terms of the GNU General Public License version 2. This program - * is licensed "as is" without any warranty of any kind, whether express - * or implied. - */ - -#ifndef __ASMPPC_ZX4500_SERIAL_H -#define __ASMPPC_ZX4500_SERIAL_H - -#include - -/* Define the UART base address (only 1 UART) */ -#define ZX4500_SERIAL_1 0xff880000 - -#ifdef CONFIG_SERIAL_MANY_PORTS -#define RS_TABLE_SIZE 64 -#else -#define RS_TABLE_SIZE 1 -#endif - -/* Rate for the 1.8432 Mhz clock for the onboard serial chip */ -#define BASE_BAUD ( 1843200 / 16 ) - -#ifdef CONFIG_SERIAL_DETECT_IRQ -#define STD_COM_FLAGS (ASYNC_BOOT_AUTOCONF|ASYNC_SKIP_TEST|ASYNC_AUTO_IRQ) -#else -#define STD_COM_FLAGS (ASYNC_BOOT_AUTOCONF|ASYNC_SKIP_TEST) -#endif - -#define STD_SERIAL_PORT_DFNS \ - { 0, BASE_BAUD, ZX4500_SERIAL_1, 17, STD_COM_FLAGS, /* ttyS0 */ \ - iomem_base: (u8 *)ZX4500_SERIAL_1, \ - io_type: SERIAL_IO_MEM }, - -#define SERIAL_PORT_DFNS \ - STD_SERIAL_PORT_DFNS - -#endif /* __ASMPPC_ZX4500_SERIAL_H */ diff --git a/arch/ppc/platforms/zx4500_setup.c b/arch/ppc/platforms/zx4500_setup.c deleted file mode 100644 index 7a71b270956a..000000000000 --- a/arch/ppc/platforms/zx4500_setup.c +++ /dev/null @@ -1,359 +0,0 @@ -/* - * arch/ppc/platforms/zx4500_setup.c - * - * Board setup routines for Znyx ZX4500 family of cPCI boards. - * - * Author: Mark A. Greer - * mgreer@mvista.com - * - * 2000-2001 (c) MontaVista, Software, Inc. This file is licensed under - * the terms of the GNU General Public License version 2. This program - * is licensed "as is" without any warranty of any kind, whether express - * or implied. - */ - -/* - * This file adds support for the Znyx ZX4500 series of cPCI boards. - * These boards have an 8240, UART on the processor bus, a PPMC slot (for now - * the card in this slot can _not_ be a monarch), Broadcom BCM5600, and an - * Intel 21554 bridge. - * - * Currently, this port assumes that the 8240 is the master and performs PCI - * arbitration, etc. It is also assumed that the 8240 is wired to come up - * using memory MAP B (CHRP map). - * - * Note: This board port will not work properly as it is. You must apply the - * patch that is at ftp://ftp.mvista.com/pub/Area51/zx4500/zx_patch_2_5 - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "zx4500.h" - -static u_char zx4500_openpic_initsenses[] __initdata = { - 0, /* 0-15 are not used on an 8240 EPIC */ - 0, /* 1 */ - 0, /* 2 */ - 0, /* 3 */ - 0, /* 4 */ - 0, /* 5 */ - 0, /* 6 */ - 0, /* 7 */ - 0, /* 8 */ - 0, /* 9 */ - 0, /* 10 */ - 0, /* 11 */ - 0, /* 12 */ - 0, /* 13 */ - 0, /* 14 */ - 0, /* 15 */ - 1, /* 16: EPIC IRQ 0: Active Low -- PMC #INTA & #INTC */ - 1, /* 17: EPIC IRQ 1: Active Low -- UART */ - 1, /* 18: EPIC IRQ 2: Active Low -- BCM5600 #INTA */ - 1, /* 19: EPIC IRQ 3: Active Low -- 21554 #SINTA */ - 1, /* 20: EPIC IRQ 4: Active Low -- PMC #INTB & #INTD */ -}; - - -static void __init -zx4500_setup_arch(void) -{ - char boot_string[ZX4500_BOOT_STRING_LEN + 1]; - char *boot_arg; - extern char cmd_line[]; - - - loops_per_jiffy = 50000000 / HZ; - -#ifdef CONFIG_BLK_DEV_INITRD - if (initrd_start) - ROOT_DEV = Root_RAM0; - else -#endif -#if defined(CONFIG_ROOT_NFS) - ROOT_DEV = Root_NFS; -#else - ROOT_DEV = Root_SDA1; -#endif - - /* Get boot string from flash */ - strlcpy(boot_string, - (char *)ZX4500_BOOT_STRING_ADDR, - sizeof(boot_string)); - boot_string[ZX4500_BOOT_STRING_LEN] = '\0'; - - /* Can be delimited by 0xff */ - boot_arg = strchr(boot_string, 0xff); - - if (boot_arg != NULL) { - *boot_arg = '\0'; - } - - /* First 3 chars must be 'dev'. If not, ignore. */ - if (!strncmp(boot_string, "dev", 3)) { - /* skip 'dev?' and any blanks after it */ - boot_arg = strchr(boot_string, ' '); - - if (boot_arg != NULL) { - while (*boot_arg == ' ') boot_arg++; - strcat(cmd_line, " "); - strcat(cmd_line, boot_arg); - } - } - - /* nothing but serial consoles... */ - printk("Znyx ZX4500 Series High Performance Switch\n"); - printk("ZX4500 port (C) 2000, 2001 MontaVista Software, Inc. (source@mvista.com)\n"); - - /* Lookup PCI host bridge */ - zx4500_find_bridges(); - - printk("ZX4500 Board ID: 0x%x, Revision #: 0x%x\n", - in_8((volatile u_char *)ZX4500_CPLD_BOARD_ID), - in_8((volatile u_char *)ZX4500_CPLD_REV)); - - return; -} - -static ulong __init -zx4500_find_end_of_memory(void) -{ - return mpc10x_get_mem_size(MPC10X_MEM_MAP_B); -} - -static void __init -zx4500_map_io(void) -{ - io_block_mapping(0xfe000000, 0xfe000000, 0x02000000, _PAGE_IO); -} - -/* - * Enable interrupts routed thru CPLD to reach the 8240's EPIC. - * Need to enable all 4 PMC intrs, BCM INTA, and 21554 SINTA to 8240. - * UART intrs routed directly to 8240 (not thru CPLD). - */ -static void __init -zx4500_enable_cpld_intrs(void) -{ - u_char sysctl; - - sysctl = in_8((volatile u_char *)ZX4500_CPLD_SYSCTL); - sysctl |= (ZX4500_CPLD_SYSCTL_PMC | - ZX4500_CPLD_SYSCTL_BCM | - ZX4500_CPLD_SYSCTL_SINTA); - out_8((volatile u_char *)ZX4500_CPLD_SYSCTL, sysctl); - - return; -} - -static void __init -zx4500_init_IRQ(void) -{ - OpenPIC_InitSenses = zx4500_openpic_initsenses; - OpenPIC_NumInitSenses = sizeof(zx4500_openpic_initsenses); - - openpic_init(1, 0, NULL, -1); - - zx4500_enable_cpld_intrs(); /* Allow CPLD to route intrs to 8240 */ - - return; -} - -static void -zx4500_restart(char *cmd) -{ - local_irq_disable(); - - out_8((volatile u_char *)ZX4500_CPLD_RESET, ZX4500_CPLD_RESET_XBUS); - for (;;); - - panic("Restart failed.\n"); - /* NOTREACHED */ -} - -static void -zx4500_power_off(void) -{ - local_irq_disable(); - for(;;); /* No way to shut power off with software */ - /* NOTREACHED */ -} - -static void -zx4500_halt(void) -{ - zx4500_power_off(); - /* NOTREACHED */ -} - -static int -zx4500_get_bus_speed(void) -{ - int bus_speed; - - bus_speed = 100000000; - - return bus_speed; -} - -static int -zx4500_show_cpuinfo(struct seq_file *m) -{ - uint pvid; - - seq_printf(m, "vendor\t\t: Znyx\n"); - seq_printf(m, "machine\t\t: ZX4500\n"); - seq_printf(m, "processor\t: PVID: 0x%x, vendor: %s\n", - pvid, (pvid & (1<<15) ? "IBM" : "Motorola")); - seq_printf(m, "bus speed\t: %dMhz\n", - zx4500_get_bus_speed()/1000000); - - return 0; -} - -static void __init -zx4500_calibrate_decr(void) -{ - ulong freq; - - freq = zx4500_get_bus_speed() / 4; - - printk("time_init: decrementer frequency = %lu.%.6lu MHz\n", - freq/1000000, freq%1000000); - - tb_ticks_per_jiffy = freq / HZ; - tb_to_us = mulhwu_scale_factor(freq, 1000000); - - return; -} - -/* - * Set BAT 3 to map 0xf0000000 to end of physical memory space 1-1. - */ -static __inline__ void -zx4500_set_bat(void) -{ - unsigned long bat3u, bat3l; - static int mapping_set = 0; - - if (!mapping_set) { - - __asm__ __volatile__( - " lis %0,0xf800\n \ - ori %1,%0,0x002a\n \ - ori %0,%0,0x0ffe\n \ - mtspr 0x21e,%0\n \ - mtspr 0x21f,%1\n \ - isync\n \ - sync " - : "=r" (bat3u), "=r" (bat3l)); - - mapping_set = 1; - } - - return; -} - -#ifdef CONFIG_SERIAL_TEXT_DEBUG -#include -#include -#include - -static struct serial_state rs_table[RS_TABLE_SIZE] = { - SERIAL_PORT_DFNS /* Defined in */ -}; - -void -zx4500_progress(char *s, unsigned short hex) -{ - volatile char c; - volatile unsigned long com_port; - u16 shift; - - com_port = rs_table[0].port; - shift = rs_table[0].iomem_reg_shift; - - while ((c = *s++) != 0) { - while ((*((volatile unsigned char *)com_port + - (UART_LSR << shift)) & UART_LSR_THRE) == 0) - ; - *(volatile unsigned char *)com_port = c; - - if (c == '\n') { - while ((*((volatile unsigned char *)com_port + - (UART_LSR << shift)) & UART_LSR_THRE) == 0) - ; - *(volatile unsigned char *)com_port = '\r'; - } - } -} -#endif /* CONFIG_SERIAL_TEXT_DEBUG */ - -void __init -platform_init(unsigned long r3, unsigned long r4, unsigned long r5, - unsigned long r6, unsigned long r7) -{ - parse_bootinfo(find_bootinfo()); - - /* Map in board registers, etc. */ - zx4500_set_bat(); - - isa_io_base = MPC10X_MAPB_ISA_IO_BASE; - isa_mem_base = MPC10X_MAPB_ISA_MEM_BASE; - pci_dram_offset = MPC10X_MAPB_DRAM_OFFSET; - - ppc_md.setup_arch = zx4500_setup_arch; - ppc_md.show_cpuinfo = zx4500_show_cpuinfo; - ppc_md.irq_canonicalize = NULL; - ppc_md.init_IRQ = zx4500_init_IRQ; - ppc_md.get_irq = openpic_get_irq; - ppc_md.init = NULL; - - ppc_md.restart = zx4500_restart; - ppc_md.power_off = zx4500_power_off; - ppc_md.halt = zx4500_halt; - - ppc_md.find_end_of_memory = zx4500_find_end_of_memory; - ppc_md.setup_io_mappings = zx4500_map_io; - - ppc_md.calibrate_decr = zx4500_calibrate_decr; - - ppc_md.heartbeat = NULL; - ppc_md.heartbeat_reset = 0; - ppc_md.heartbeat_count = 0; - -#ifdef CONFIG_SERIAL_TEXT_DEBUG - ppc_md.progress = zx4500_progress; -#else /* !CONFIG_SERIAL_TEXT_DEBUG */ - ppc_md.progress = NULL; -#endif /* CONFIG_SERIAL_TEXT_DEBUG */ - - return; -} diff --git a/arch/ppc/syslib/Makefile b/arch/ppc/syslib/Makefile index 62bb56c8e6ce..b16cddaada63 100644 --- a/arch/ppc/syslib/Makefile +++ b/arch/ppc/syslib/Makefile @@ -61,8 +61,6 @@ obj-$(CONFIG_SANDPOINT) += i8259.o open_pic.o mpc10x_common.o \ pci_auto.o indirect_pci.o todc_time.o obj-$(CONFIG_SPRUCE) += cpc700_pic.o indirect_pci.o pci_auto.o \ todc_time.o -obj-$(CONFIG_ZX4500) += indirect_pci.o pci_auto.o mpc10x_common.o \ - i8259.o open_pic.o obj-$(CONFIG_8260) += m8260_setup.o ppc8260_pic.o obj-$(CONFIG_BOOTX_TEXT) += btext.o diff --git a/include/asm-ppc/serial.h b/include/asm-ppc/serial.h index 887fa5302a9f..caf3f4a5020b 100644 --- a/include/asm-ppc/serial.h +++ b/include/asm-ppc/serial.h @@ -28,8 +28,6 @@ #include #elif defined(CONFIG_SPRUCE) #include -#elif defined(CONFIG_ZX4500) -#include #elif defined(CONFIG_40x) #include #else -- cgit v1.2.3 From c5bddde0b5c55f99e5dc4c31652a4dd743fbea12 Mon Sep 17 00:00:00 2001 From: Tom Rini Date: Tue, 1 Jul 2003 03:01:31 -0700 Subject: PPC32: Remove the MEN F1 platform code. It was old and unmaintained. This change can also be found in patch form at: ftp://source.mvista.com/pub/linuxppc/obsolete/menf1 --- arch/ppc/Kconfig | 5 +- arch/ppc/boot/simple/Makefile | 12 -- arch/ppc/configs/adir_defconfig | 1 - arch/ppc/configs/apus_defconfig | 1 - arch/ppc/configs/common_defconfig | 1 - arch/ppc/configs/ev64260_defconfig | 1 - arch/ppc/configs/gemini_defconfig | 1 - arch/ppc/configs/ibmchrp_defconfig | 1 - arch/ppc/configs/k2_defconfig | 1 - arch/ppc/configs/lopec_defconfig | 1 - arch/ppc/configs/mcpn765_defconfig | 1 - arch/ppc/configs/mvme5100_defconfig | 1 - arch/ppc/configs/pcore_defconfig | 1 - arch/ppc/configs/pmac_defconfig | 1 - arch/ppc/configs/power3_defconfig | 1 - arch/ppc/configs/pplus_defconfig | 1 - arch/ppc/configs/prpmc750_defconfig | 1 - arch/ppc/configs/prpmc800_defconfig | 1 - arch/ppc/configs/sandpoint_defconfig | 1 - arch/ppc/configs/spruce_defconfig | 1 - arch/ppc/defconfig | 1 - arch/ppc/platforms/menf1.h | 24 --- arch/ppc/platforms/menf1_pci.c | 98 ------------ arch/ppc/platforms/menf1_setup.c | 283 ----------------------------------- 24 files changed, 1 insertion(+), 440 deletions(-) delete mode 100644 arch/ppc/platforms/menf1.h delete mode 100644 arch/ppc/platforms/menf1_pci.c delete mode 100644 arch/ppc/platforms/menf1_setup.c diff --git a/arch/ppc/Kconfig b/arch/ppc/Kconfig index 7e65585ddd6d..ee21b10f10ba 100644 --- a/arch/ppc/Kconfig +++ b/arch/ppc/Kconfig @@ -450,9 +450,6 @@ config EV64260 config SPRUCE bool "IBM-Spruce" -config MENF1 - bool "MEN-F1" - config LOPEC bool "Motorola-LoPEC" @@ -533,7 +530,7 @@ config WILLOW config MPC10X_STORE_GATHERING bool "Enable MPC10x store gathering" - depends on FORCE || MENF1 || SANDPOINT + depends on FORCE || SANDPOINT config GT64260 bool diff --git a/arch/ppc/boot/simple/Makefile b/arch/ppc/boot/simple/Makefile index b780626e3286..3c45f8bda059 100644 --- a/arch/ppc/boot/simple/Makefile +++ b/arch/ppc/boot/simple/Makefile @@ -61,12 +61,6 @@ ZIMAGEINITRD := zImage.initrd-STRIPELF END := gemini TFTPIMAGE := /tftpboot/zImage.$(END) endif -ifeq ($(CONFIG_MENF1),y) -ZIMAGE := zImage-MENF1 -ZIMAGEINITRD := zImage.initrd-MENF1 -EXTRA := chrpmap.o -TFTPIMAGE := /tftpboot/zImage.menf1 -endif ifeq ($(CONFIG_K2),y) EXTRA := legacy.o TFTPIMAGE := /tftpboot/zImage.k2 @@ -202,12 +196,6 @@ $(images)/zImage.initrd-TREE: $(obj)/zvmlinux.initrd $(MKTREE) $(MKTREE) $(obj)/zvmlinux.initrd $(images)/zImage.initrd.$(END) \ $(ENTRYPOINT) -$(images)/zImage-MENF1: $(obj)/zvmlinux $(MKPREP) - $(MKPREP) -pbp $(obj)/zvmlinux $(images)/zImage.menf1 - -$(images)/zImage.initrd-MENF1: $(obj)/zvmlinux.initrd $(MKPREP) - $(MKPREP) -pbp $(obj)/zvmlinux.initrd $(images)/zImage.initrd.menf1 - $(images)/zImage-PPLUS: $(obj)/zvmlinux $(MKPREP) $(MKBUGBOOT) $(MKPREP) -pbp $(obj)/zvmlinux $(images)/zImage.pplus $(MKBUGBOOT) $(obj)/zvmlinux $(images)/zImage.bugboot diff --git a/arch/ppc/configs/adir_defconfig b/arch/ppc/configs/adir_defconfig index d0dd92bbb842..90ddd552a9dc 100644 --- a/arch/ppc/configs/adir_defconfig +++ b/arch/ppc/configs/adir_defconfig @@ -55,7 +55,6 @@ CONFIG_PPC_STD_MMU=y # CONFIG_POWERPMC250 is not set # CONFIG_EV64260 is not set # CONFIG_SPRUCE is not set -# CONFIG_MENF1 is not set # CONFIG_LOPEC is not set # CONFIG_MCPN765 is not set # CONFIG_MVME5100 is not set diff --git a/arch/ppc/configs/apus_defconfig b/arch/ppc/configs/apus_defconfig index 559b52330407..718364870856 100644 --- a/arch/ppc/configs/apus_defconfig +++ b/arch/ppc/configs/apus_defconfig @@ -56,7 +56,6 @@ CONFIG_APUS=y # CONFIG_POWERPMC250 is not set # CONFIG_EV64260 is not set # CONFIG_SPRUCE is not set -# CONFIG_MENF1 is not set # CONFIG_LOPEC is not set # CONFIG_MCPN765 is not set # CONFIG_MVME5100 is not set diff --git a/arch/ppc/configs/common_defconfig b/arch/ppc/configs/common_defconfig index 2acfa03cafce..23d13c9aa6ef 100644 --- a/arch/ppc/configs/common_defconfig +++ b/arch/ppc/configs/common_defconfig @@ -56,7 +56,6 @@ CONFIG_PPC_MULTIPLATFORM=y # CONFIG_POWERPMC250 is not set # CONFIG_EV64260 is not set # CONFIG_SPRUCE is not set -# CONFIG_MENF1 is not set # CONFIG_LOPEC is not set # CONFIG_MCPN765 is not set # CONFIG_MVME5100 is not set diff --git a/arch/ppc/configs/ev64260_defconfig b/arch/ppc/configs/ev64260_defconfig index 1e3617a621b6..c5bfc64d7831 100644 --- a/arch/ppc/configs/ev64260_defconfig +++ b/arch/ppc/configs/ev64260_defconfig @@ -55,7 +55,6 @@ CONFIG_PPC_STD_MMU=y # CONFIG_POWERPMC250 is not set CONFIG_EV64260=y # CONFIG_SPRUCE is not set -# CONFIG_MENF1 is not set # CONFIG_LOPEC is not set # CONFIG_MCPN765 is not set # CONFIG_MVME5100 is not set diff --git a/arch/ppc/configs/gemini_defconfig b/arch/ppc/configs/gemini_defconfig index d2e5789b2809..324f8a7ae156 100644 --- a/arch/ppc/configs/gemini_defconfig +++ b/arch/ppc/configs/gemini_defconfig @@ -55,7 +55,6 @@ CONFIG_PPC_STD_MMU=y # CONFIG_POWERPMC250 is not set # CONFIG_EV64260 is not set # CONFIG_SPRUCE is not set -# CONFIG_MENF1 is not set # CONFIG_LOPEC is not set # CONFIG_MCPN765 is not set # CONFIG_MVME5100 is not set diff --git a/arch/ppc/configs/ibmchrp_defconfig b/arch/ppc/configs/ibmchrp_defconfig index 7503b06782de..ef4bd5c260de 100644 --- a/arch/ppc/configs/ibmchrp_defconfig +++ b/arch/ppc/configs/ibmchrp_defconfig @@ -55,7 +55,6 @@ CONFIG_PPC_MULTIPLATFORM=y # CONFIG_POWERPMC250 is not set # CONFIG_EV64260 is not set # CONFIG_SPRUCE is not set -# CONFIG_MENF1 is not set # CONFIG_LOPEC is not set # CONFIG_MCPN765 is not set # CONFIG_MVME5100 is not set diff --git a/arch/ppc/configs/k2_defconfig b/arch/ppc/configs/k2_defconfig index e111e715643b..a7bca0e729e9 100644 --- a/arch/ppc/configs/k2_defconfig +++ b/arch/ppc/configs/k2_defconfig @@ -55,7 +55,6 @@ CONFIG_PPC_STD_MMU=y # CONFIG_POWERPMC250 is not set # CONFIG_EV64260 is not set # CONFIG_SPRUCE is not set -# CONFIG_MENF1 is not set # CONFIG_LOPEC is not set # CONFIG_MCPN765 is not set # CONFIG_MVME5100 is not set diff --git a/arch/ppc/configs/lopec_defconfig b/arch/ppc/configs/lopec_defconfig index d9034bab80b0..f4d189fc3fe0 100644 --- a/arch/ppc/configs/lopec_defconfig +++ b/arch/ppc/configs/lopec_defconfig @@ -55,7 +55,6 @@ CONFIG_PPC_STD_MMU=y # CONFIG_POWERPMC250 is not set # CONFIG_EV64260 is not set # CONFIG_SPRUCE is not set -# CONFIG_MENF1 is not set CONFIG_LOPEC=y # CONFIG_MCPN765 is not set # CONFIG_MVME5100 is not set diff --git a/arch/ppc/configs/mcpn765_defconfig b/arch/ppc/configs/mcpn765_defconfig index 8e2704044880..23b969014ad2 100644 --- a/arch/ppc/configs/mcpn765_defconfig +++ b/arch/ppc/configs/mcpn765_defconfig @@ -50,7 +50,6 @@ CONFIG_PPC_STD_MMU=y # CONFIG_POWERPMC250 is not set # CONFIG_EV64260 is not set # CONFIG_SPRUCE is not set -# CONFIG_MENF1 is not set # CONFIG_LOPEC is not set CONFIG_MCPN765=y # CONFIG_MVME5100 is not set diff --git a/arch/ppc/configs/mvme5100_defconfig b/arch/ppc/configs/mvme5100_defconfig index 01f8a8366bdf..67a31c66cea5 100644 --- a/arch/ppc/configs/mvme5100_defconfig +++ b/arch/ppc/configs/mvme5100_defconfig @@ -55,7 +55,6 @@ CONFIG_PPC_STD_MMU=y # CONFIG_POWERPMC250 is not set # CONFIG_EV64260 is not set # CONFIG_SPRUCE is not set -# CONFIG_MENF1 is not set # CONFIG_LOPEC is not set # CONFIG_MCPN765 is not set CONFIG_MVME5100=y diff --git a/arch/ppc/configs/pcore_defconfig b/arch/ppc/configs/pcore_defconfig index ebdf9b60c92c..13db3dcada77 100644 --- a/arch/ppc/configs/pcore_defconfig +++ b/arch/ppc/configs/pcore_defconfig @@ -55,7 +55,6 @@ CONFIG_PCORE=y # CONFIG_POWERPMC250 is not set # CONFIG_EV64260 is not set # CONFIG_SPRUCE is not set -# CONFIG_MENF1 is not set # CONFIG_LOPEC is not set # CONFIG_MCPN765 is not set # CONFIG_MVME5100 is not set diff --git a/arch/ppc/configs/pmac_defconfig b/arch/ppc/configs/pmac_defconfig index 39171b94c976..86cddf4e9b52 100644 --- a/arch/ppc/configs/pmac_defconfig +++ b/arch/ppc/configs/pmac_defconfig @@ -57,7 +57,6 @@ CONFIG_PPC_MULTIPLATFORM=y # CONFIG_POWERPMC250 is not set # CONFIG_EV64260 is not set # CONFIG_SPRUCE is not set -# CONFIG_MENF1 is not set # CONFIG_LOPEC is not set # CONFIG_MCPN765 is not set # CONFIG_MVME5100 is not set diff --git a/arch/ppc/configs/power3_defconfig b/arch/ppc/configs/power3_defconfig index e42225f01bc8..360f281bd1e6 100644 --- a/arch/ppc/configs/power3_defconfig +++ b/arch/ppc/configs/power3_defconfig @@ -55,7 +55,6 @@ CONFIG_PPC_MULTIPLATFORM=y # CONFIG_POWERPMC250 is not set # CONFIG_EV64260 is not set # CONFIG_SPRUCE is not set -# CONFIG_MENF1 is not set # CONFIG_LOPEC is not set # CONFIG_MCPN765 is not set # CONFIG_MVME5100 is not set diff --git a/arch/ppc/configs/pplus_defconfig b/arch/ppc/configs/pplus_defconfig index c50b0f327f56..ff29c2bae2be 100644 --- a/arch/ppc/configs/pplus_defconfig +++ b/arch/ppc/configs/pplus_defconfig @@ -55,7 +55,6 @@ CONFIG_PPC_STD_MMU=y # CONFIG_POWERPMC250 is not set # CONFIG_EV64260 is not set # CONFIG_SPRUCE is not set -# CONFIG_MENF1 is not set # CONFIG_LOPEC is not set # CONFIG_MCPN765 is not set # CONFIG_MVME5100 is not set diff --git a/arch/ppc/configs/prpmc750_defconfig b/arch/ppc/configs/prpmc750_defconfig index 8ed08b66bdbf..6500b795e2b4 100644 --- a/arch/ppc/configs/prpmc750_defconfig +++ b/arch/ppc/configs/prpmc750_defconfig @@ -55,7 +55,6 @@ CONFIG_PPC_STD_MMU=y # CONFIG_POWERPMC250 is not set # CONFIG_EV64260 is not set # CONFIG_SPRUCE is not set -# CONFIG_MENF1 is not set # CONFIG_LOPEC is not set # CONFIG_MCPN765 is not set # CONFIG_MVME5100 is not set diff --git a/arch/ppc/configs/prpmc800_defconfig b/arch/ppc/configs/prpmc800_defconfig index c45c85f7d573..7aa5e0cf3acd 100644 --- a/arch/ppc/configs/prpmc800_defconfig +++ b/arch/ppc/configs/prpmc800_defconfig @@ -55,7 +55,6 @@ CONFIG_PPC_STD_MMU=y # CONFIG_POWERPMC250 is not set # CONFIG_EV64260 is not set # CONFIG_SPRUCE is not set -# CONFIG_MENF1 is not set # CONFIG_LOPEC is not set # CONFIG_MCPN765 is not set # CONFIG_MVME5100 is not set diff --git a/arch/ppc/configs/sandpoint_defconfig b/arch/ppc/configs/sandpoint_defconfig index 5007a3b7063c..e561740f569c 100644 --- a/arch/ppc/configs/sandpoint_defconfig +++ b/arch/ppc/configs/sandpoint_defconfig @@ -55,7 +55,6 @@ CONFIG_PPC_STD_MMU=y # CONFIG_POWERPMC250 is not set # CONFIG_EV64260 is not set # CONFIG_SPRUCE is not set -# CONFIG_MENF1 is not set # CONFIG_LOPEC is not set # CONFIG_MCPN765 is not set # CONFIG_MVME5100 is not set diff --git a/arch/ppc/configs/spruce_defconfig b/arch/ppc/configs/spruce_defconfig index 5af7d5179d5b..7b0c4e560306 100644 --- a/arch/ppc/configs/spruce_defconfig +++ b/arch/ppc/configs/spruce_defconfig @@ -55,7 +55,6 @@ CONFIG_PPC_STD_MMU=y # CONFIG_POWERPMC250 is not set # CONFIG_EV64260 is not set CONFIG_SPRUCE=y -# CONFIG_MENF1 is not set # CONFIG_LOPEC is not set # CONFIG_MCPN765 is not set # CONFIG_MVME5100 is not set diff --git a/arch/ppc/defconfig b/arch/ppc/defconfig index 23a49dc0ed1d..b404c4872d8f 100644 --- a/arch/ppc/defconfig +++ b/arch/ppc/defconfig @@ -56,7 +56,6 @@ CONFIG_PPC_MULTIPLATFORM=y # CONFIG_POWERPMC250 is not set # CONFIG_EV64260 is not set # CONFIG_SPRUCE is not set -# CONFIG_MENF1 is not set # CONFIG_LOPEC is not set # CONFIG_MCPN765 is not set # CONFIG_MVME5100 is not set diff --git a/arch/ppc/platforms/menf1.h b/arch/ppc/platforms/menf1.h deleted file mode 100644 index ecb9f094ce93..000000000000 --- a/arch/ppc/platforms/menf1.h +++ /dev/null @@ -1,24 +0,0 @@ -/* - * arch/ppc/platforms/menf1.h - * - * Definitions for MEN F1 board support - * - * Author: Matt Porter - * - * 2001 (c) MontaVista, Software, Inc. This file is licensed under - * the terms of the GNU General Public License version 2. This program - * is licensed "as is" without any warranty of any kind, whether express - * or implied. - */ - -#ifndef __PPC_PLATFORMS_MENF1_H -#define __PPC_PLATFORMS_MENF1_H - -#define MENF1_NVRAM_AS0 0x70 -#define MENF1_NVRAM_AS1 0x72 -#define MENF1_NVRAM_DATA 0x71 - -#define MENF1_IDE0_BASE_ADDR 0x1f0 -#define MENF1_IDE1_BASE_ADDR 0x170 - -#endif /* __PPC_PLATFORMS_MENF1_H */ diff --git a/arch/ppc/platforms/menf1_pci.c b/arch/ppc/platforms/menf1_pci.c deleted file mode 100644 index 303d8756ffd6..000000000000 --- a/arch/ppc/platforms/menf1_pci.c +++ /dev/null @@ -1,98 +0,0 @@ -/* - * arch/ppc/platforms/menf1_pci.c - * - * PCI support for MEN F1 - * - * Author: Matt Porter - * - * 2001 (c) MontaVista, Software, Inc. This file is licensed under - * the terms of the GNU General Public License version 2. This program - * is licensed "as is" without any warranty of any kind, whether express - * or implied. - */ - -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include "menf1.h" - -#undef DEBUG -#ifdef DEBUG -#define DBG(x...) printk(x) -#else -#define DBG(x...) -#endif /* DEBUG */ - -static inline int __init -menf1_map_irq(struct pci_dev *dev, unsigned char idsel, unsigned char pin) -{ - static char pci_irq_table[][4] = - /* - * PCI IDSEL/INTPIN->INTLINE - * A B C D - */ - { - {10, 11, 7, 9}, /* IDSEL 26 - PCMIP 0 */ - {0, 0, 0, 0}, /* IDSEL 27 - M5229 IDE */ - {0, 0, 0, 0}, /* IDSEL 28 - M7101 PMU */ - {9, 10, 11, 7}, /* IDSEL 29 - PCMIP 1 */ - {10, 11, 7, 9}, /* IDSEL 30 - P2P Bridge */ - }; - const long min_idsel = 26, max_idsel = 30, irqs_per_slot = 4; - return PCI_IRQ_TABLE_LOOKUP; -}; - -static int -menf1_exclude_device(u_char bus, u_char devfn) -{ - if ((bus == 0) && (devfn == 0xe0)) { - return PCIBIOS_DEVICE_NOT_FOUND; - } - else { - return PCIBIOS_SUCCESSFUL; - } -} - -void __init -menf1_find_bridges(void) -{ - struct pci_controller* hose; - - hose = pcibios_alloc_controller(); - if (!hose) - return; - - hose->first_busno = 0; - hose->last_busno = 0xff; - - ppc_md.pci_exclude_device = menf1_exclude_device; - - mpc10x_bridge_init(hose, - MPC10X_MEM_MAP_B, - MPC10X_MEM_MAP_B, - MPC10X_MAPB_EUMB_BASE); - - hose->last_busno = pciauto_bus_scan(hose, hose->first_busno); - - { - /* Add ISA bus wait states */ - unsigned char isa_control; - - early_read_config_byte(hose, 0, 0x90, 0x43, &isa_control); - isa_control |= 0x33; - early_write_config_byte(hose, 0, 0x90, 0x43, isa_control); - } - - ppc_md.pci_swizzle = common_swizzle; - ppc_md.pci_map_irq = menf1_map_irq; -} diff --git a/arch/ppc/platforms/menf1_setup.c b/arch/ppc/platforms/menf1_setup.c deleted file mode 100644 index b202350a7657..000000000000 --- a/arch/ppc/platforms/menf1_setup.c +++ /dev/null @@ -1,283 +0,0 @@ -/* - * arch/ppc/platforms/menf1_setup.c - * - * Board setup routines for MEN F1 - * - * Author: Matt Porter - * - * 2001 (c) MontaVista, Software, Inc. This file is licensed under - * the terms of the GNU General Public License version 2. This program - * is licensed "as is" without any warranty of any kind, whether express - * or implied. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "menf1.h" - -extern void menf1_find_bridges(void); -extern unsigned long loops_per_jiffy; - -/* Dummy variable to satisfy mpc10x_common.o */ -void *OpenPIC_Addr; - -static int -menf1_show_cpuinfo(struct seq_file *m) -{ - seq_printf(m, "machine\t\t: MEN F1\n"); - - return 0; -} - -static void __init -menf1_setup_arch(void) -{ - /* init to some ~sane value until calibrate_delay() runs */ - loops_per_jiffy = 50000000/HZ; - - /* Lookup PCI host bridges */ - menf1_find_bridges(); - -#ifdef CONFIG_BLK_DEV_INITRD - if (initrd_start) - ROOT_DEV = Root_RAM0; - else -#endif -#ifdef CONFIG_ROOT_NFS - ROOT_DEV = Root_NFS; -#else - ROOT_DEV = Root_HDA2; -#endif - -#ifdef CONFIG_DUMMY_CONSOLE - conswitchp = &dummy_con; -#endif - - printk("MEN F1 port (C) 2001 MontaVista Software, Inc. (source@mvista.com)\n"); -} - -static void -menf1_restart(char *cmd) -{ - - int picr1; - struct pci_dev *pdev; - - local_irq_disable(); - - /* - * Firmware doesn't like re-entry using Map B (CHRP), so make sure the - * PCI bridge is using MAP A (PReP). - */ - - pdev = pci_find_slot(0, PCI_DEVFN(0,0)); - - while(pdev == NULL); /* paranoia */ - - pci_read_config_dword(pdev, MPC10X_CFG_PICR1_REG, &picr1); - - picr1 = (picr1 & ~MPC10X_CFG_PICR1_ADDR_MAP_MASK) | - MPC10X_CFG_PICR1_ADDR_MAP_A; - - pci_write_config_dword(pdev, MPC10X_CFG_PICR1_REG, picr1); - - asm volatile("sync"); - - /* SRR0 has system reset vector, SRR1 has default MSR value */ - /* rfi restores MSR from SRR1 and sets the PC to the SRR0 value */ - __asm__ __volatile__ - ("\n\ - lis 3,0xfff0 - ori 3,3,0x0100 - mtspr 26,3 - li 3,0 - mtspr 27,3 - rfi - "); - while(1); -} - -static void -menf1_halt(void) -{ - local_irq_disable(); - while (1); -} - -static void -menf1_power_off(void) -{ - menf1_halt(); -} - -static void __init -menf1_init_IRQ(void) -{ - int i; - - for ( i = 0 ; i < NUM_8259_INTERRUPTS ; i++ ) - irq_desc[i].handler = &i8259_pic; - i8259_init(NULL); -} - -/* - * Set BAT 3 to map 0xF0000000. - */ -static __inline__ void -menf1_set_bat(void) -{ - static int mapping_set = 0; - - if (!mapping_set) - { - - /* wait for all outstanding memory accesses to complete */ - mb(); - - /* setup DBATs */ - mtspr(DBAT3U, 0xf0001ffe); - mtspr(DBAT3L, 0xf000002a); - - /* wait for updates */ - mb(); - - mapping_set = 1; - } - return; -} - -static unsigned long __init -menf1_find_end_of_memory(void) -{ - /* Cover the I/O with a BAT */ - menf1_set_bat(); - - /* Read the memory size from the MPC107 SMC */ - return mpc10x_get_mem_size(MPC10X_MEM_MAP_B); -} - -static void __init -menf1_map_io(void) -{ - io_block_mapping(0xfe000000, 0xfe000000, 0x02000000, _PAGE_IO); -} - -#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE) -/* IDE functions */ - -static void __init -menf1_ide_init_hwif_ports (hw_regs_t *hw, unsigned long data_port, - unsigned long ctrl_port, int *irq) -{ - unsigned long reg = data_port; - int i = 8; - - for (i = IDE_DATA_OFFSET; i <= IDE_STATUS_OFFSET; i++) { - hw->io_ports[i] = reg; - reg += 1; - } - if (ctrl_port) - hw->io_ports[IDE_CONTROL_OFFSET] = ctrl_port; - else - hw->io_ports[IDE_CONTROL_OFFSET] = - hw->io_ports[IDE_DATA_OFFSET] + 0x206; - - if (irq != NULL) - *irq = 0; -} - -static int -menf1_ide_default_irq(unsigned long base) -{ - if (base == MENF1_IDE0_BASE_ADDR) - return 14; - else if (base == MENF1_IDE1_BASE_ADDR) - return 15; - else - return 0; -} - -static unsigned long -menf1_ide_default_io_base(int index) -{ - if (index == 0) - return MENF1_IDE0_BASE_ADDR; - else if (index == 1) - return MENF1_IDE1_BASE_ADDR; - else - return 0; -} -#endif - -TODC_ALLOC(); - -void __init -platform_init(unsigned long r3, unsigned long r4, unsigned long r5, - unsigned long r6, unsigned long r7) -{ - parse_bootinfo(find_bootinfo()); - - isa_io_base = MPC10X_MAPB_ISA_IO_BASE; - isa_mem_base = MPC10X_MAPB_ISA_MEM_BASE; - pci_dram_offset = MPC10X_MAPB_DRAM_OFFSET; - - ppc_md.setup_arch = menf1_setup_arch; - ppc_md.show_cpuinfo = menf1_show_cpuinfo; - ppc_md.init_IRQ = menf1_init_IRQ; - ppc_md.get_irq = i8259_irq; - - ppc_md.find_end_of_memory = menf1_find_end_of_memory; - ppc_md.setup_io_mappings = menf1_map_io; - - ppc_md.restart = menf1_restart; - ppc_md.power_off = menf1_power_off; - ppc_md.halt = menf1_halt; - - TODC_INIT(TODC_TYPE_MK48T59, - MENF1_NVRAM_AS0, - MENF1_NVRAM_AS1, - MENF1_NVRAM_DATA, - 7); - - ppc_md.time_init = todc_time_init; - ppc_md.get_rtc_time = todc_get_rtc_time; - ppc_md.set_rtc_time = todc_set_rtc_time; - ppc_md.calibrate_decr = todc_calibrate_decr; - - ppc_md.nvram_read_val = todc_m48txx_read_val; - ppc_md.nvram_write_val = todc_m48txx_write_val; - -#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE) - ppc_ide_md.default_io_base = menf1_ide_default_io_base; - ppc_ide_md.default_irq = menf1_ide_default_irq; - ppc_ide_md.ide_init_hwif = menf1_ide_init_hwif_ports; -#endif -} -- cgit v1.2.3 From eaab542a79cce648b67a7203e2c89528c03bf6a0 Mon Sep 17 00:00:00 2001 From: Martin Diehl Date: Tue, 1 Jul 2003 08:55:31 -0400 Subject: [PATCH] Missing IrDA stuff for 2.5.73-bk8: sir_dev Cleanups, and kernel thread + swsuspend fix. --- drivers/net/irda/irtty-sir.c | 30 +++++++++++----------------- drivers/net/irda/sir_dev.c | 22 ++++++++------------- drivers/net/irda/sir_kthread.c | 45 ++++++------------------------------------ 3 files changed, 26 insertions(+), 71 deletions(-) diff --git a/drivers/net/irda/irtty-sir.c b/drivers/net/irda/irtty-sir.c index a2d387ced051..99d7afc609bc 100644 --- a/drivers/net/irda/irtty-sir.c +++ b/drivers/net/irda/irtty-sir.c @@ -212,13 +212,6 @@ static int irtty_set_dtr_rts(struct sir_dev *dev, int dtr, int rts) /* called from sir_dev when there is more data to send * context is either netdev->hard_xmit or some transmit-completion bh * i.e. we are under spinlock here and must not sleep. - * - * Note: as of 2.5.44 the usb-serial driver calls down() on a semaphore - * hence we are hitting the might_sleep bugcatcher. IMHO the whole tty-api - * would be pretty pointless if write_room/write would be allowed to sleep. - * Furthermore other tty ldiscs (like ppp) do also require the driver not - * to sleep there. Hence this is considered a current limitation of - * usb-serial. */ static int irtty_do_write(struct sir_dev *dev, const unsigned char *ptr, size_t len) @@ -269,16 +262,15 @@ static void irtty_receive_buf(struct tty_struct *tty, const unsigned char *cp, struct sirtty_cb *priv = tty->disc_data; int i; - if (unlikely(!priv || priv->magic!=IRTTY_MAGIC)) - return; - /* Please use ASSERT - Fix ASSERT as needed - Jean II */ + ASSERT(priv != NULL, return;); + ASSERT(priv->magic == IRTTY_MAGIC, return;); if (unlikely(count==0)) /* yes, this happens */ return; dev = priv->dev; if (!dev) { - printk(KERN_ERR "%s(), not ready yet!\n", __FUNCTION__); + WARNING("%s(), not ready yet!\n", __FUNCTION__); return; } @@ -306,8 +298,8 @@ static int irtty_receive_room(struct tty_struct *tty) { struct sirtty_cb *priv = tty->disc_data; - if (unlikely(!priv || priv->magic!=IRTTY_MAGIC)) - return 0; + ASSERT(priv != NULL, return 0;); + ASSERT(priv->magic == IRTTY_MAGIC, return 0;); return 65536; /* We can handle an infinite amount of data. :-) */ } @@ -323,8 +315,8 @@ static void irtty_write_wakeup(struct tty_struct *tty) { struct sirtty_cb *priv = tty->disc_data; - if (unlikely(!priv || priv->magic!=IRTTY_MAGIC)) - return; + ASSERT(priv != NULL, return;); + ASSERT(priv->magic == IRTTY_MAGIC, return;); tty->flags &= ~(1 << TTY_DO_WRITE_WAKEUP); @@ -559,7 +551,7 @@ static int irtty_open(struct tty_struct *tty) up(&irtty_sem); - printk(KERN_INFO "%s - done\n", __FUNCTION__); + IRDA_DEBUG(0, "%s - %s: irda line discipline opened\n", __FUNCTION__, tty->name); return 0; @@ -580,8 +572,8 @@ static void irtty_close(struct tty_struct *tty) { struct sirtty_cb *priv = tty->disc_data; - if (!priv || priv->magic != IRTTY_MAGIC) - return; + ASSERT(priv != NULL, return;); + ASSERT(priv->magic == IRTTY_MAGIC, return;); /* Hm, with a dongle attached the dongle driver wants * to close the dongle - which requires the use of @@ -610,6 +602,8 @@ static void irtty_close(struct tty_struct *tty) tty->driver->stop(tty); kfree(priv); + + IRDA_DEBUG(0, "%s - %s: irda line discipline closed\n", __FUNCTION__, tty->name); } /* ------------------------------------------------------- */ diff --git a/drivers/net/irda/sir_dev.c b/drivers/net/irda/sir_dev.c index d0af0c633be7..6572d11bbc1f 100644 --- a/drivers/net/irda/sir_dev.c +++ b/drivers/net/irda/sir_dev.c @@ -201,14 +201,12 @@ void sirdev_write_complete(struct sir_dev *dev) int sirdev_receive(struct sir_dev *dev, const unsigned char *cp, size_t count) { if (!dev || !dev->netdev) { - IRDA_DEBUG(0, "%s(), not ready yet!\n", __FUNCTION__); - /* Use WARNING instead of IRDA_DEBUG */ + WARNING("%s(), not ready yet!\n", __FUNCTION__); return -1; } if (!dev->irlap) { - IRDA_DEBUG(0, "%s - too early: %p / %d!\n", __FUNCTION__, cp, count); - /* Use WARNING instead of IRDA_DEBUG */ + WARNING("%s - too early: %p / %d!\n", __FUNCTION__, cp, count); return -1; } @@ -218,7 +216,7 @@ int sirdev_receive(struct sir_dev *dev, const unsigned char *cp, size_t count) */ irda_device_set_media_busy(dev->netdev, TRUE); dev->stats.rx_dropped++; - printk(KERN_INFO "%s; rx-drop: %d\n", __FUNCTION__, count); + IRDA_DEBUG(0, "%s; rx-drop: %d\n", __FUNCTION__, count); return 0; } @@ -431,7 +429,6 @@ static int sirdev_alloc_buffers(struct sir_dev *dev) return -ENOMEM; skb_reserve(dev->rx_buff.skb, 1); dev->rx_buff.head = dev->rx_buff.skb->data; - /* No need to memset the buffer, unless you are really pedantic */ dev->tx_buff.head = kmalloc(dev->tx_buff.truesize, GFP_KERNEL); if (dev->tx_buff.head == NULL) { @@ -439,8 +436,6 @@ static int sirdev_alloc_buffers(struct sir_dev *dev) dev->rx_buff.skb = NULL; dev->rx_buff.head = NULL; return -ENOMEM; - /* Hu ??? This should not be here, Martin ? */ - memset(dev->tx_buff.head, 0, dev->tx_buff.truesize); } dev->tx_buff.data = dev->tx_buff.head; @@ -492,7 +487,7 @@ static int sirdev_open(struct net_device *ndev) netif_wake_queue(ndev); - printk(KERN_INFO "%s - done, speed = %d\n", __FUNCTION__, dev->speed); + IRDA_DEBUG(2, "%s - done, speed = %d\n", __FUNCTION__, dev->speed); return 0; @@ -512,7 +507,7 @@ static int sirdev_close(struct net_device *ndev) struct sir_dev *dev = ndev->priv; const struct sir_driver *drv; - printk(KERN_INFO "%s\n", __FUNCTION__); +// IRDA_DEBUG(0, "%s\n", __FUNCTION__); netif_stop_queue(ndev); @@ -570,7 +565,7 @@ struct sir_dev * sirdev_get_instance(const struct sir_driver *drv, const char *n struct net_device *ndev; struct sir_dev *dev; - printk(KERN_INFO "%s - %s\n", __FUNCTION__, name); + IRDA_DEBUG(0, "%s - %s\n", __FUNCTION__, name); /* instead of adding tests to protect against drv->do_write==NULL * at several places we refuse to create a sir_dev instance for @@ -584,8 +579,7 @@ struct sir_dev * sirdev_get_instance(const struct sir_driver *drv, const char *n */ dev = kmalloc(sizeof(*dev), GFP_KERNEL); if (dev == NULL) { - printk(KERN_ERR "IrDA: Can't allocate memory for " - "IrDA control block!\n"); + ERROR("%s - Can't allocate memory for IrDA control block!\n", __FUNCTION__); goto out; } memset(dev, 0, sizeof(*dev)); @@ -638,7 +632,7 @@ int sirdev_put_instance(struct sir_dev *dev) { int err = 0; - printk(KERN_INFO "%s\n", __FUNCTION__); + IRDA_DEBUG(0, "%s\n", __FUNCTION__); atomic_set(&dev->enable_rx, 0); diff --git a/drivers/net/irda/sir_kthread.c b/drivers/net/irda/sir_kthread.c index cc38b8809b14..5e7f455432c2 100644 --- a/drivers/net/irda/sir_kthread.c +++ b/drivers/net/irda/sir_kthread.c @@ -19,6 +19,7 @@ #include #include #include +#include #include @@ -107,44 +108,12 @@ static void run_irda_queue(void) spin_unlock_irqrestore(&irda_rq_queue.lock, flags); } -static int irda_rt_prio = 0; /* MODULE_PARM? */ - static int irda_thread(void *startup) { DECLARE_WAITQUEUE(wait, current); daemonize("kIrDAd"); - set_fs(KERNEL_DS); - - if (irda_rt_prio > 0) { -#if 0 /* works but requires EXPORT_SYMBOL(setscheduler) */ - struct sched_param param; - - param.sched_priority = irda_rt_prio; - setscheduler(0, SCHED_FIFO, ¶m); -#endif - -#if 0 /* doesn't work - has some tendency to trigger instant reboot! - * looks like we would have to deactivate current on the - * runqueue - which is only possible inside of kernel/sched.h - */ - - /* runqueues are per-cpu and we are current on this cpu. Hence - * The tasklist_lock with irq-off protects our runqueue too - * and we don't have to lock it (which would be impossible, - * because it is private in kernel/sched.c) - */ - - read_lock_irq(&tasklist_lock); - current->rt_priority = (irda_rt_priopolicy = SCHED_FIFO; - current->prio = MAX_USER_RT_PRIO-1 - irda_rt_prio; - read_unlock_irq(&tasklist_lock); -#endif - } - irda_rq_queue.thread = current; complete((struct completion *)startup); @@ -166,6 +135,10 @@ static int irda_thread(void *startup) set_task_state(current, TASK_RUNNING); remove_wait_queue(&irda_rq_queue.kick, &wait); + /* make swsusp happy with our thread */ + if (current->flags & PF_FREEZE) + refrigerator(PF_IOTHREAD); + run_irda_queue(); } @@ -442,7 +415,6 @@ static void irda_config_fsm(void *data) case SIRDEV_STATE_COMPLETE: /* config change finished, so we are not busy any longer */ sirdev_enable_rx(dev); - printk(KERN_INFO "%s - up\n", __FUNCTION__); up(&fsm->sem); return; } @@ -462,9 +434,7 @@ int sirdev_schedule_request(struct sir_dev *dev, int initial_state, unsigned par struct sir_fsm *fsm = &dev->fsm; int xmit_was_down; -// IRDA_DEBUG(2, "%s - state=0x%04x / param=%u\n", __FUNCTION__, initial_state, param); - - printk(KERN_INFO "%s - state=0x%04x / param=%u\n", __FUNCTION__, initial_state, param); + IRDA_DEBUG(2, "%s - state=0x%04x / param=%u\n", __FUNCTION__, initial_state, param); if (in_interrupt()) { if (down_trylock(&fsm->sem)) { @@ -474,12 +444,10 @@ int sirdev_schedule_request(struct sir_dev *dev, int initial_state, unsigned par } else down(&fsm->sem); - printk(KERN_INFO "%s - down\n", __FUNCTION__); if (fsm->state == SIRDEV_STATE_DEAD) { /* race with sirdev_close should never happen */ ERROR("%s(), instance staled!\n", __FUNCTION__); - printk(KERN_INFO "%s - up\n", __FUNCTION__); up(&fsm->sem); return -ESTALE; /* or better EPIPE? */ } @@ -501,7 +469,6 @@ int sirdev_schedule_request(struct sir_dev *dev, int initial_state, unsigned par atomic_set(&dev->enable_rx, 1); if (!xmit_was_down) netif_wake_queue(dev->netdev); - printk(KERN_INFO "%s - up\n", __FUNCTION__); up(&fsm->sem); return -EAGAIN; } -- cgit v1.2.3 From cb707d7d8072a75cf875829a5f4fcbeece1d1f13 Mon Sep 17 00:00:00 2001 From: Ben Collins Date: Tue, 1 Jul 2003 06:24:39 -0700 Subject: [SPARC64]: Fix formatting and typos in boot Makefile. --- arch/sparc64/boot/Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/sparc64/boot/Makefile b/arch/sparc64/boot/Makefile index 0e46fc2d9821..5eb14ff1c518 100644 --- a/arch/sparc64/boot/Makefile +++ b/arch/sparc64/boot/Makefile @@ -10,11 +10,11 @@ ELFTOAOUT := elftoaout host-progs := piggyback targets := image tftpboot.img vmlinux.aout -quiet_cmd_elftoaout = ELT2AOUT $@ +quiet_cmd_elftoaout = ELF2AOUT $@ cmd_elftoaout = $(ELFTOAOUT) vmlinux -o $@ -quiet_cmd_piggy = PIGGY $@ +quiet_cmd_piggy = PIGGY $@ cmd_piggy = $(obj)/piggyback $@ System.map $(ROOT_IMG) -quiet_cmd_strip = STRIP $@ +quiet_cmd_strip = STRIP $@ cmd_strip = $(STRIP) -R .comment -R .note -K sun4u_init -K _end -K _start vmlinux -o $@ -- cgit v1.2.3 From fd9afbd57291d8585e6041a703aa56b5a88c3cfb Mon Sep 17 00:00:00 2001 From: Ben Collins Date: Tue, 1 Jul 2003 06:25:32 -0700 Subject: [SPARC64]: Enable KALLSYMS, use print_symbol(). --- arch/sparc64/Kconfig | 7 +++++++ arch/sparc64/kernel/process.c | 2 ++ arch/sparc64/kernel/traps.c | 5 ++++- 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig index 5764acac8480..8ee1cc052721 100644 --- a/arch/sparc64/Kconfig +++ b/arch/sparc64/Kconfig @@ -924,6 +924,13 @@ config DEBUG_SPINLOCK best used in conjunction with the NMI watchdog so that spinlock deadlocks are also debuggable. +config KALLSYMS + bool "Load all symbols for debugging/ksymoops" + help + Say Y here to let the kernel print out symbolic crash information and + symbolic stack backtraces. This increases the size of the kernel + somewhat, as all symbols have to be loaded into the kernel image. + config DEBUG_SPINLOCK_SLEEP bool "Sleep-inside-spinlock checking" help diff --git a/arch/sparc64/kernel/process.c b/arch/sparc64/kernel/process.c index 6326ab3761ea..46d473c10417 100644 --- a/arch/sparc64/kernel/process.c +++ b/arch/sparc64/kernel/process.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -284,6 +285,7 @@ void __show_regs(struct pt_regs * regs) #endif printk("TSTATE: %016lx TPC: %016lx TNPC: %016lx Y: %08x %s\n", regs->tstate, regs->tpc, regs->tnpc, regs->y, print_tainted()); + print_symbol("TPC: <%s>\n", regs->tpc); printk("g0: %016lx g1: %016lx g2: %016lx g3: %016lx\n", regs->u_regs[0], regs->u_regs[1], regs->u_regs[2], regs->u_regs[3]); diff --git a/arch/sparc64/kernel/traps.c b/arch/sparc64/kernel/traps.c index bfeedd653c24..3ae4b144b27f 100644 --- a/arch/sparc64/kernel/traps.c +++ b/arch/sparc64/kernel/traps.c @@ -12,6 +12,7 @@ #include #include /* for jiffies */ #include +#include #include #include #include @@ -1644,7 +1645,9 @@ void die_if_kernel(char *str, struct pt_regs *regs) (char *) rw < ((char *) current) + sizeof (union thread_union) && !(((unsigned long) rw) & 0x7)) { - printk("Caller[%016lx]\n", rw->ins[7]); + printk("Caller[%016lx]", rw->ins[7]); + print_symbol(": %s\n", rw->ins[7]); + printk("\n"); lastrw = rw; rw = (struct reg_window *) (rw->ins[6] + STACK_BIAS); -- cgit v1.2.3 From 2ea58325e8b4b0c40b132dc02d037f69565c2769 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Tue, 1 Jul 2003 19:41:10 -0700 Subject: [PATCH] dm: fix memory leak --- drivers/md/dm-ioctl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index eaeeb15358f7..2d7697c4624c 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -238,6 +238,7 @@ void __hash_remove(struct hash_cell *hc) list_del(&hc->name_list); unregister_with_devfs(hc); dm_put(hc->md); + free_cell(hc); } void dm_hash_remove_all(void) -- cgit v1.2.3 From 8732dde80439eb2e2da2d9b792f77233196ed209 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Tue, 1 Jul 2003 19:41:21 -0700 Subject: [PATCH] dm: remove bogus yields Replace a couple of bogus yields() with schedule() and io_schedule() respectively. --- drivers/md/dm-ioctl.c | 3 +-- drivers/md/dm.c | 5 ++--- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 2d7697c4624c..744c3273e6e3 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -730,8 +730,7 @@ static int wait_device_event(struct dm_ioctl *param, struct dm_ioctl *user) dm_table_put(table); dm_put(md); - yield(); - set_current_state(TASK_RUNNING); + schedule(); out: return results_to_user(user, param, NULL, 0); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 05e6ffc49d3e..2a9ae0ab6823 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -783,10 +783,9 @@ int dm_suspend(struct mapped_device *md) if (!atomic_read(&md->pending)) break; - yield(); + io_schedule(); } - - current->state = TASK_RUNNING; + set_current_state(TASK_RUNNING); down_write(&md->lock); remove_wait_queue(&md->wait, &wait); -- cgit v1.2.3 From 9d26f90e8e75cb403ee926624eeaa1529d1f37fa Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 1 Jul 2003 21:08:01 -0700 Subject: [PATCH] PCI: change WARN_ON(irqs_disabled()) to WARN_ON(in_interrupt()) to keep the fusion drivers happy. --- drivers/pci/search.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/pci/search.c b/drivers/pci/search.c index 85c74126ee6a..4793caaa4989 100644 --- a/drivers/pci/search.c +++ b/drivers/pci/search.c @@ -9,6 +9,7 @@ #include #include +#include spinlock_t pci_bus_lock = SPIN_LOCK_UNLOCKED; @@ -66,7 +67,7 @@ pci_find_next_bus(const struct pci_bus *from) struct list_head *n; struct pci_bus *b = NULL; - WARN_ON(irqs_disabled()); + WARN_ON(in_interrupt()); spin_lock(&pci_bus_lock); n = from ? from->node.next : pci_root_buses.next; if (n != &pci_root_buses) @@ -125,7 +126,7 @@ pci_find_subsys(unsigned int vendor, unsigned int device, struct list_head *n; struct pci_dev *dev; - WARN_ON(irqs_disabled()); + WARN_ON(in_interrupt()); spin_lock(&pci_bus_lock); n = from ? from->global_list.next : pci_devices.next; @@ -190,7 +191,7 @@ pci_get_subsys(unsigned int vendor, unsigned int device, struct list_head *n; struct pci_dev *dev; - WARN_ON(irqs_disabled()); + WARN_ON(in_interrupt()); spin_lock(&pci_bus_lock); n = from ? from->global_list.next : pci_devices.next; @@ -256,7 +257,7 @@ pci_find_device_reverse(unsigned int vendor, unsigned int device, const struct p struct list_head *n; struct pci_dev *dev; - WARN_ON(irqs_disabled()); + WARN_ON(in_interrupt()); spin_lock(&pci_bus_lock); n = from ? from->global_list.prev : pci_devices.prev; -- cgit v1.2.3 From 495c3da118c8b380933cee1dc2a77a8f334ad579 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 1 Jul 2003 21:16:48 -0700 Subject: Linux 2.5.74 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 4abb3ed46dff..51145c81be12 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 2 PATCHLEVEL = 5 -SUBLEVEL = 73 +SUBLEVEL = 74 EXTRAVERSION = # *DOCUMENTATION* -- cgit v1.2.3 From eeb9647974eff3854b8fb2d285eb07c2e4b1a480 Mon Sep 17 00:00:00 2001 From: Dagfinn Ilmari MannsÃ¥ker Date: Wed, 2 Jul 2003 02:02:30 -0700 Subject: [PATCH] Allow modular DM With the recent fixes, io_schedule needs to be exported for modular dm to work. --- kernel/ksyms.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/ksyms.c b/kernel/ksyms.c index 8544287c9045..816627adc50a 100644 --- a/kernel/ksyms.c +++ b/kernel/ksyms.c @@ -462,6 +462,7 @@ EXPORT_SYMBOL(preempt_schedule); #endif EXPORT_SYMBOL(schedule_timeout); EXPORT_SYMBOL(yield); +EXPORT_SYMBOL(io_schedule); EXPORT_SYMBOL(__cond_resched); EXPORT_SYMBOL(set_user_nice); EXPORT_SYMBOL(task_nice); -- cgit v1.2.3 From 17003453e368de3c1ab7700ab134bce5f0b50e47 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 2 Jul 2003 08:47:23 -0700 Subject: [PATCH] move_vma() make_pages_present() fix From: Hugh Dickins mremap's move_vma VM_LOCKED case was still wrong. If the do_munmap unmaps a part of new_vma, then its vm_start and vm_end from before cannot both be the right addresses for the make_pages_present range, and may BUG() there. We need [new_addr, new_addr+new_len) to be locked down; but move_page_tables already transferred the locked pages [new_addr, new_addr+old_len), and they're either held in a VM_LOCKED vma throughout, or temporarily in no vma: in neither case can be swapped out, so no need to run over that range again. --- mm/mremap.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/mm/mremap.c b/mm/mremap.c index 8c6ec8b926d4..3bab43a88125 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -244,9 +244,7 @@ static unsigned long move_vma(struct vm_area_struct *vma, } if (!move_page_tables(vma, new_addr, addr, old_len)) { - unsigned long must_fault_in; - unsigned long fault_in_start; - unsigned long fault_in_end; + unsigned long vm_locked = vma->vm_flags & VM_LOCKED; if (allocated_vma) { *new_vma = *vma; @@ -272,14 +270,8 @@ static unsigned long move_vma(struct vm_area_struct *vma, } else vma = NULL; /* nothing more to do */ - must_fault_in = new_vma->vm_flags & VM_LOCKED; - fault_in_start = new_vma->vm_start; - fault_in_end = new_vma->vm_end; - do_munmap(current->mm, addr, old_len); - /* new_vma could have been invalidated by do_munmap */ - /* Restore VM_ACCOUNT if one or two pieces of vma left */ if (vma) { vma->vm_flags |= VM_ACCOUNT; @@ -288,9 +280,11 @@ static unsigned long move_vma(struct vm_area_struct *vma, } current->mm->total_vm += new_len >> PAGE_SHIFT; - if (must_fault_in) { + if (vm_locked) { current->mm->locked_vm += new_len >> PAGE_SHIFT; - make_pages_present(fault_in_start, fault_in_end); + if (new_len > old_len) + make_pages_present(new_addr + old_len, + new_addr + new_len); } return new_addr; } -- cgit v1.2.3 From 98eb235b7febbb2941e1b442b92fc5e23b0d7a83 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 2 Jul 2003 08:47:30 -0700 Subject: [PATCH] page unmapping debug From: Manfred Spraul Manfred's latest page unmapping debug patch. The patch adds support for a special debug mode to both the page and the slab allocator: Unused pages are removed from the kernel linear mapping. This means that now any access to freed memory will cause an immediate exception. Right now, read accesses remain totally unnoticed and write accesses may be catched by the slab poisoning, but usually far too late for a meaningfull bug report. The implementation is based on a new arch dependant function, kernel_map_pages(), that removes the pages from the linear mapping. It's right now only implemented for i386. Changelog: - Add kernel_map_pages() for i386, based on change_page_attr. If DEBUG_PAGEALLOC is not set, then the function is an empty stub. The stub is in , i.e. it exists for all archs. - Make change_page_attr irq safe. Note that it's not fully irq safe due to the lack of the tlb flush ipi, but it's good enough for kernel_map_pages(). Another problem is that kernel_map_pages is not permitted to fail, thus PSE is disabled if DEBUG_PAGEALLOC is enabled - use kernel_map pages for the page allocator. - use kernel_map_pages for the slab allocator. I couldn't resist and added additional debugging support into mm/slab.c: * at kfree time, the complete backtrace of the kfree caller is stored in the freed object. * a ptrinfo() function that dumps all known data about a kernel virtual address: the pte value, if it belongs to a slab cache the cache name and additional info. * merging of common code: new helper function obj_dbglen and obj_dbghdr for the conversion between the user visible object pointers/len and the actual, internal addresses and len values. --- arch/i386/Kconfig | 8 ++ arch/i386/kernel/cpu/common.c | 8 ++ arch/i386/mm/pageattr.c | 82 +++++++++------- include/asm-i386/cacheflush.h | 5 + include/linux/mm.h | 8 ++ include/linux/slab.h | 2 + mm/page_alloc.c | 15 ++- mm/slab.c | 218 ++++++++++++++++++++++++++++++++++-------- 8 files changed, 267 insertions(+), 79 deletions(-) diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index ed5254e36eeb..3d78369616c0 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -1339,6 +1339,14 @@ config DEBUG_SPINLOCK best used in conjunction with the NMI watchdog so that spinlock deadlocks are also debuggable. +config DEBUG_PAGEALLOC + bool "Page alloc debugging" + depends on DEBUG_KERNEL + help + Unmap pages from the kernel linear mapping after free_pages(). + This results in a large slowdown, but helps to find certain types + of memory corruptions. + config DEBUG_HIGHMEM bool "Highmem debugging" depends on DEBUG_KERNEL && HIGHMEM diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index a114c2ab7f83..5e579ede103c 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -430,6 +430,14 @@ void __init early_cpu_init(void) rise_init_cpu(); nexgen_init_cpu(); umc_init_cpu(); + +#ifdef CONFIG_DEBUG_PAGEALLOC + /* pse is not compatible with on-the-fly unmapping, + * disable it even if the cpus claim to support it. + */ + clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability); + disable_pse = 1; +#endif } /* * cpu_init() initializes state that is per-CPU. Some data is already diff --git a/arch/i386/mm/pageattr.c b/arch/i386/mm/pageattr.c index 7bb9f7ebe469..51b777c42d53 100644 --- a/arch/i386/mm/pageattr.c +++ b/arch/i386/mm/pageattr.c @@ -13,6 +13,10 @@ #include #include +static spinlock_t cpa_lock = SPIN_LOCK_UNLOCKED; +static struct list_head df_list = LIST_HEAD_INIT(df_list); + + static inline pte_t *lookup_address(unsigned long address) { pgd_t *pgd = pgd_offset_k(address); @@ -31,10 +35,15 @@ static struct page *split_large_page(unsigned long address, pgprot_t prot) { int i; unsigned long addr; - struct page *base = alloc_pages(GFP_KERNEL, 0); + struct page *base; pte_t *pbase; + + spin_unlock_irq(&cpa_lock); + base = alloc_pages(GFP_KERNEL, 0); + spin_lock_irq(&cpa_lock); if (!base) return NULL; + address = __pa(address); addr = address & LARGE_PAGE_MASK; pbase = (pte_t *)page_address(base); @@ -87,7 +96,7 @@ static inline void revert_page(struct page *kpte_page, unsigned long address) } static int -__change_page_attr(struct page *page, pgprot_t prot, struct page **oldpage) +__change_page_attr(struct page *page, pgprot_t prot) { pte_t *kpte; unsigned long address; @@ -123,7 +132,7 @@ __change_page_attr(struct page *page, pgprot_t prot, struct page **oldpage) } if (cpu_has_pse && (atomic_read(&kpte_page->count) == 1)) { - *oldpage = kpte_page; + list_add(&kpte_page->list, &df_list); revert_page(kpte_page, address); } return 0; @@ -134,12 +143,6 @@ static inline void flush_map(void) on_each_cpu(flush_kernel_map, NULL, 1, 1); } -struct deferred_page { - struct deferred_page *next; - struct page *fpage; -}; -static struct deferred_page *df_list; /* protected by init_mm.mmap_sem */ - /* * Change the page attributes of an page in the linear mapping. * @@ -156,47 +159,54 @@ static struct deferred_page *df_list; /* protected by init_mm.mmap_sem */ int change_page_attr(struct page *page, int numpages, pgprot_t prot) { int err = 0; - struct page *fpage; int i; + unsigned long flags; - down_write(&init_mm.mmap_sem); + spin_lock_irqsave(&cpa_lock, flags); for (i = 0; i < numpages; i++, page++) { - fpage = NULL; - err = __change_page_attr(page, prot, &fpage); + err = __change_page_attr(page, prot); if (err) break; - if (fpage) { - struct deferred_page *df; - df = kmalloc(sizeof(struct deferred_page), GFP_KERNEL); - if (!df) { - flush_map(); - __free_page(fpage); - } else { - df->next = df_list; - df->fpage = fpage; - df_list = df; - } - } } - up_write(&init_mm.mmap_sem); + spin_unlock_irqrestore(&cpa_lock, flags); return err; } void global_flush_tlb(void) { - struct deferred_page *df, *next_df; + LIST_HEAD(l); + struct list_head* n; - down_read(&init_mm.mmap_sem); - df = xchg(&df_list, NULL); - up_read(&init_mm.mmap_sem); + BUG_ON(irqs_disabled()); + + spin_lock_irq(&cpa_lock); + list_splice_init(&df_list, &l); + spin_unlock_irq(&cpa_lock); flush_map(); - for (; df; df = next_df) { - next_df = df->next; - if (df->fpage) - __free_page(df->fpage); - kfree(df); - } + n = l.next; + while (n != &l) { + struct page *pg = list_entry(n, struct page, list); + n = n->next; + __free_page(pg); + } } +#ifdef CONFIG_DEBUG_PAGEALLOC +void kernel_map_pages(struct page *page, int numpages, int enable) +{ + if (PageHighMem(page)) + return; + /* the return value is ignored - the calls cannot fail, + * large pages are disabled at boot time. + */ + change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0)); + /* we should perform an IPI and flush all tlbs, + * but that can deadlock->flush only current cpu. + */ + __flush_tlb_all(); +} +EXPORT_SYMBOL(kernel_map_pages); +#endif + EXPORT_SYMBOL(change_page_attr); EXPORT_SYMBOL(global_flush_tlb); diff --git a/include/asm-i386/cacheflush.h b/include/asm-i386/cacheflush.h index adc632b97542..d3ce5511dbbc 100644 --- a/include/asm-i386/cacheflush.h +++ b/include/asm-i386/cacheflush.h @@ -17,4 +17,9 @@ void global_flush_tlb(void); int change_page_attr(struct page *page, int numpages, pgprot_t prot); +#ifdef CONFIG_DEBUG_PAGEALLOC +/* internal debugging function */ +void kernel_map_pages(struct page *page, int numpages, int enable); +#endif + #endif /* _I386_CACHEFLUSH_H */ diff --git a/include/linux/mm.h b/include/linux/mm.h index 492bc8aeb053..4d183974fd36 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -609,5 +609,13 @@ extern struct page * follow_page(struct mm_struct *mm, unsigned long address, int write); extern int remap_page_range(struct vm_area_struct *vma, unsigned long from, unsigned long to, unsigned long size, pgprot_t prot); + +#ifndef CONFIG_DEBUG_PAGEALLOC +static inline void +kernel_map_pages(struct page *page, int numpages, int enable) +{ +} +#endif + #endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */ diff --git a/include/linux/slab.h b/include/linux/slab.h index 843c8d638d29..9f8bccba4ad3 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -114,6 +114,8 @@ extern kmem_cache_t *signal_cachep; extern kmem_cache_t *sighand_cachep; extern kmem_cache_t *bio_cachep; +void ptrinfo(unsigned long addr); + #endif /* __KERNEL__ */ #endif /* _LINUX_SLAB_H */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index dfd254c2c94b..741866b59d7d 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -32,6 +32,8 @@ #include #include +#include + DECLARE_BITMAP(node_online_map, MAX_NUMNODES); DECLARE_BITMAP(memblk_online_map, MAX_NR_MEMBLKS); struct pglist_data *pgdat_list; @@ -265,6 +267,7 @@ void __free_pages_ok(struct page *page, unsigned int order) mod_page_state(pgfree, 1 << order); free_pages_check(__FUNCTION__, page); list_add(&page->list, &list); + kernel_map_pages(page, 1<pageset[get_cpu()].pcp[cold]; @@ -556,7 +560,7 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order, (!wait && z->free_pages >= z->pages_high)) { page = buffered_rmqueue(z, order, cold); if (page) - return page; + goto got_pg; } min += z->pages_low * sysctl_lower_zone_protection; } @@ -579,7 +583,7 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order, (!wait && z->free_pages >= z->pages_high)) { page = buffered_rmqueue(z, order, cold); if (page) - return page; + goto got_pg; } min += local_min * sysctl_lower_zone_protection; } @@ -594,7 +598,7 @@ rebalance: page = buffered_rmqueue(z, order, cold); if (page) - return page; + goto got_pg; } goto nopage; } @@ -622,7 +626,7 @@ rebalance: (!wait && z->free_pages >= z->pages_high)) { page = buffered_rmqueue(z, order, cold); if (page) - return page; + goto got_pg; } min += z->pages_low * sysctl_lower_zone_protection; } @@ -653,6 +657,9 @@ nopage: current->comm, order, gfp_mask); } return NULL; +got_pg: + kernel_map_pages(page, 1 << order, 1); + return page; } /* diff --git a/mm/slab.c b/mm/slab.c index cad1bb2e8ebb..afb8d8415999 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -89,7 +89,11 @@ #include #include #include +#include + #include +#include +#include /* * DEBUG - 1 for kmem_cache_create() to honour; SLAB_DEBUG_INITIAL, @@ -351,6 +355,34 @@ struct kmem_cache_s { #define POISON_AFTER 0x6b /* for use-after-free poisoning */ #define POISON_END 0xa5 /* end-byte of poisoning */ +static inline int obj_dbghead(kmem_cache_t *cachep) +{ + if (cachep->flags & SLAB_RED_ZONE) + return BYTES_PER_WORD; + return 0; +} + +static inline int obj_dbglen(kmem_cache_t *cachep) +{ + int len = 0; + + if (cachep->flags & SLAB_RED_ZONE) { + len += 2*BYTES_PER_WORD; + } + if (cachep->flags & SLAB_STORE_USER) { + len += BYTES_PER_WORD; + } + return len; +} +#else +static inline int obj_dbghead(kmem_cache_t *cachep) +{ + return 0; +} +static inline int obj_dbglen(kmem_cache_t *cachep) +{ + return 0; +} #endif /* @@ -765,16 +797,45 @@ static inline void kmem_freepages (kmem_cache_t *cachep, void *addr) } #if DEBUG -static void poison_obj(kmem_cache_t *cachep, void *addr, unsigned char val) + +#ifdef CONFIG_DEBUG_PAGEALLOC +static void store_stackinfo(kmem_cache_t *cachep, unsigned long *addr, unsigned long caller) { - int size = cachep->objsize; - if (cachep->flags & SLAB_RED_ZONE) { - addr += BYTES_PER_WORD; - size -= 2*BYTES_PER_WORD; - } - if (cachep->flags & SLAB_STORE_USER) { - size -= BYTES_PER_WORD; + int size = cachep->objsize-obj_dbglen(cachep); + + addr = (unsigned long *)&((char*)addr)[obj_dbghead(cachep)]; + + if (size < 5*sizeof(unsigned long)) + return; + + *addr++=0x12345678; + *addr++=caller; + *addr++=smp_processor_id(); + size -= 3*sizeof(unsigned long); + { + unsigned long *sptr = &caller; + unsigned long svalue; + + while (((long) sptr & (THREAD_SIZE-1)) != 0) { + svalue = *sptr++; + if (kernel_text_address(svalue)) { + *addr++=svalue; + size -= sizeof(unsigned long); + if (size <= sizeof(unsigned long)) + break; + } + } + } + *addr++=0x87654321; +} +#endif + +static void poison_obj(kmem_cache_t *cachep, void *addr, unsigned char val) +{ + int size = cachep->objsize-obj_dbglen(cachep); + addr = &((char*)addr)[obj_dbghead(cachep)]; + memset(addr, val, size); *(unsigned char *)(addr+size-1) = POISON_END; } @@ -796,15 +857,11 @@ static void *scan_poisoned_obj(unsigned char* addr, unsigned int size) static void check_poison_obj(kmem_cache_t *cachep, void *addr) { - int size = cachep->objsize; void *end; - if (cachep->flags & SLAB_RED_ZONE) { - addr += BYTES_PER_WORD; - size -= 2*BYTES_PER_WORD; - } - if (cachep->flags & SLAB_STORE_USER) { - size -= BYTES_PER_WORD; - } + int size = cachep->objsize-obj_dbglen(cachep); + + addr = &((char*)addr)[obj_dbghead(cachep)]; + end = scan_poisoned_obj(addr, size); if (end) { int s; @@ -858,8 +915,16 @@ static void slab_destroy (kmem_cache_t *cachep, struct slab *slabp) void *objp = slabp->s_mem + cachep->objsize * i; int objlen = cachep->objsize; - if (cachep->flags & SLAB_POISON) + if (cachep->flags & SLAB_POISON) { +#ifdef CONFIG_DEBUG_PAGEALLOC + if ((cachep->objsize%PAGE_SIZE)==0 && OFF_SLAB(cachep)) + kernel_map_pages(virt_to_page(objp), cachep->objsize/PAGE_SIZE,1); + else + check_poison_obj(cachep, objp); +#else check_poison_obj(cachep, objp); +#endif + } if (cachep->flags & SLAB_STORE_USER) objlen -= BYTES_PER_WORD; @@ -952,6 +1017,10 @@ kmem_cache_create (const char *name, size_t size, size_t offset, } #if FORCED_DEBUG +#ifdef CONFIG_DEBUG_PAGEALLOC + if (size < PAGE_SIZE-3*BYTES_PER_WORD && size > 128) + size = PAGE_SIZE-3*BYTES_PER_WORD; +#endif /* * Enable redzoning and last user accounting, except * - for caches with forced alignment: redzoning would violate the @@ -1404,6 +1473,8 @@ static void cache_init_objs (kmem_cache_t * cachep, slab_error(cachep, "constructor overwrote the" " start of an object"); } + if ((cachep->objsize % PAGE_SIZE) == 0 && OFF_SLAB(cachep) && cachep->flags & SLAB_POISON) + kernel_map_pages(virt_to_page(objp), cachep->objsize/PAGE_SIZE, 0); #else if (cachep->ctor) cachep->ctor(objp, cachep, ctor_flags); @@ -1584,24 +1655,27 @@ static inline void *cache_free_debugcheck (kmem_cache_t * cachep, void * objp, v * caller can perform a verify of its state (debugging). * Called without the cache-lock held. */ - if (cachep->flags & SLAB_RED_ZONE) { - cachep->ctor(objp+BYTES_PER_WORD, + cachep->ctor(objp+obj_dbghead(cachep), cachep, SLAB_CTOR_CONSTRUCTOR|SLAB_CTOR_VERIFY); - } else { - cachep->ctor(objp, cachep, SLAB_CTOR_CONSTRUCTOR|SLAB_CTOR_VERIFY); - } } if (cachep->flags & SLAB_POISON && cachep->dtor) { /* we want to cache poison the object, * call the destruction callback */ - if (cachep->flags & SLAB_RED_ZONE) - cachep->dtor(objp+BYTES_PER_WORD, cachep, 0); - else - cachep->dtor(objp, cachep, 0); + cachep->dtor(objp+obj_dbghead(cachep), cachep, 0); } - if (cachep->flags & SLAB_POISON) + if (cachep->flags & SLAB_POISON) { +#ifdef CONFIG_DEBUG_PAGEALLOC + if ((cachep->objsize % PAGE_SIZE) == 0 && OFF_SLAB(cachep)) { + store_stackinfo(cachep, objp, POISON_AFTER); + kernel_map_pages(virt_to_page(objp), cachep->objsize/PAGE_SIZE, 0); + } else { + poison_obj(cachep, objp, POISON_AFTER); + } +#else poison_obj(cachep, objp, POISON_AFTER); +#endif + } #endif return objp; } @@ -1617,6 +1691,7 @@ static inline void check_slabp(kmem_cache_t *cachep, struct slab *slabp) for (i = slabp->free; i != BUFCTL_END; i = slab_bufctl(slabp)[i]) { entries++; BUG_ON(entries > cachep->num); + BUG_ON(i < 0 || i >= cachep->num); } BUG_ON(entries != cachep->num - slabp->inuse); #endif @@ -1746,9 +1821,16 @@ cache_alloc_debugcheck_after(kmem_cache_t *cachep, if (!objp) return objp; - if (cachep->flags & SLAB_POISON) { + if (cachep->flags & SLAB_POISON) { +#ifdef CONFIG_DEBUG_PAGEALLOC + if ((cachep->objsize % PAGE_SIZE) == 0 && OFF_SLAB(cachep)) + kernel_map_pages(virt_to_page(objp), cachep->objsize/PAGE_SIZE, 1); + else + check_poison_obj(cachep, objp); +#else check_poison_obj(cachep, objp); - poison_obj(cachep, objp, POISON_BEFORE); +#endif + poison_obj(cachep, objp, POISON_BEFORE); } if (cachep->flags & SLAB_STORE_USER) { objlen -= BYTES_PER_WORD; @@ -2085,16 +2167,7 @@ free_percpu(const void *objp) unsigned int kmem_cache_size(kmem_cache_t *cachep) { - unsigned int objlen = cachep->objsize; - -#if DEBUG - if (cachep->flags & SLAB_RED_ZONE) - objlen -= 2*BYTES_PER_WORD; - if (cachep->flags & SLAB_STORE_USER) - objlen -= BYTES_PER_WORD; -#endif - - return objlen; + return cachep->objsize-obj_dbglen(cachep); } kmem_cache_t * kmem_find_general_cachep (size_t size, int gfpflags) @@ -2626,3 +2699,70 @@ unsigned int ksize(const void *objp) return size; } +void ptrinfo(unsigned long addr) +{ + struct page *page; + + printk("Dumping data about address %p.\n", (void*)addr); + if (!virt_addr_valid((void*)addr)) { + printk("virt addr invalid.\n"); + return; + } + do { + pgd_t *pgd = pgd_offset_k(addr); + pmd_t *pmd; + if (pgd_none(*pgd)) { + printk("No pgd.\n"); + break; + } + pmd = pmd_offset(pgd, addr); + if (pmd_none(*pmd)) { + printk("No pmd.\n"); + break; + } +#ifdef CONFIG_X86 + if (pmd_large(*pmd)) { + printk("Large page.\n"); + break; + } +#endif + printk("normal page, pte_val 0x%llx\n", + (unsigned long long)pte_val(*pte_offset_kernel(pmd, addr))); + } while(0); + + page = virt_to_page((void*)addr); + printk("struct page at %p, flags %lxh.\n", page, page->flags); + if (PageSlab(page)) { + kmem_cache_t *c; + struct slab *s; + unsigned long flags; + int objnr; + void *objp; + + c = GET_PAGE_CACHE(page); + printk("belongs to cache %s.\n",c->name); + + spin_lock_irqsave(&c->spinlock, flags); + s = GET_PAGE_SLAB(page); + printk("slabp %p with %d inuse objects (from %d).\n", + s, s->inuse, c->num); + check_slabp(c,s); + + objnr = (addr-(unsigned long)s->s_mem)/c->objsize; + objp = s->s_mem+c->objsize*objnr; + printk("points into object no %d, starting at %p, len %d.\n", + objnr, objp, c->objsize); + if (objnr >= c->num) { + printk("Bad obj number.\n"); + } else { + kernel_map_pages(virt_to_page(objp), c->objsize/PAGE_SIZE, 1); + + printk("redzone: %lxh/%lxh/%lxh.\n", + ((unsigned long*)objp)[0], + ((unsigned long*)(objp+c->objsize))[-2], + ((unsigned long*)(objp+c->objsize))[-1]); + } + spin_unlock_irqrestore(&c->spinlock, flags); + + } +} -- cgit v1.2.3 From d4388840f41d71d1570326f77860431c7080f7ed Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 2 Jul 2003 08:47:43 -0700 Subject: [PATCH] NUMA memory reporting fix From: Dave Hansen The current numa meminfo code exports (via sysfs) pgdat->node_size, as totalram. This variable is consistently used elsewhere to mean "the number of physical pages that this particular node spans". This is _not_ what we want to see from meminfo, which is: "how much actual memory does this node have?" The following patch removes pgdat->node_size, and replaces it with ->node_spanned_pages. This is to avoid confusion with a new variable, node_present_pages, which is the _actual_ value that we want to export in meminfo. Most of the patch is a simple s/node_size/node_spanned_pages/. The node_size() macro is also removed, and replaced with new ones for node_{spanned,present}_pages() to avoid confusion. We were bitten by this problem in this bug: http://bugme.osdl.org/show_bug.cgi?id=818 Compiled and tested on NUMA-Q. --- arch/alpha/mm/numa.c | 4 ++-- arch/arm/mm/init.c | 4 ++-- arch/arm26/mm/init.c | 4 ++-- arch/i386/mm/pgtable.c | 2 +- arch/ia64/mm/init.c | 4 ++-- arch/ppc64/mm/init.c | 4 ++-- arch/ppc64/mm/numa.c | 16 ++++++++-------- arch/x86_64/mm/init.c | 2 +- arch/x86_64/mm/numa.c | 2 +- include/asm-alpha/mmzone.h | 3 +-- include/asm-i386/mmzone.h | 5 ++--- include/asm-mips64/mmzone.h | 2 +- include/asm-ppc64/mmzone.h | 1 - include/asm-x86_64/mmzone.h | 3 +-- include/linux/mmzone.h | 7 ++++++- mm/page_alloc.c | 7 ++++--- 16 files changed, 36 insertions(+), 34 deletions(-) diff --git a/arch/alpha/mm/numa.c b/arch/alpha/mm/numa.c index 5a7ad83d367c..3100bb87bd50 100644 --- a/arch/alpha/mm/numa.c +++ b/arch/alpha/mm/numa.c @@ -338,7 +338,7 @@ void __init mem_init(void) lmem_map = node_mem_map(nid); pfn = NODE_DATA(nid)->node_start_pfn; - for (i = 0; i < node_size(nid); i++, pfn++) + for (i = 0; i < node_spanned_pages(nid); i++, pfn++) if (page_is_ram(pfn) && PageReserved(lmem_map+i)) reservedpages++; } @@ -372,7 +372,7 @@ show_mem(void) printk("Free swap: %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10)); for (nid = 0; nid < numnodes; nid++) { struct page * lmem_map = node_mem_map(nid); - i = node_size(nid); + i = node_spanned_pages(nid); while (i-- > 0) { total++; if (PageReserved(lmem_map+i)) diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 2fbcb7f5766a..90dcf272009c 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -79,7 +79,7 @@ void show_mem(void) struct page *page, *end; page = NODE_MEM_MAP(node); - end = page + NODE_DATA(node)->node_size; + end = page + NODE_DATA(node)->node_spanned_pages; do { total++; @@ -576,7 +576,7 @@ void __init mem_init(void) for (node = 0; node < numnodes; node++) { pg_data_t *pgdat = NODE_DATA(node); - if (pgdat->node_size != 0) + if (pgdat->node_spanned_pages != 0) totalram_pages += free_all_bootmem_node(pgdat); } diff --git a/arch/arm26/mm/init.c b/arch/arm26/mm/init.c index c2105fb1a84c..01c772bef70e 100644 --- a/arch/arm26/mm/init.c +++ b/arch/arm26/mm/init.c @@ -68,7 +68,7 @@ void show_mem(void) page = NODE_MEM_MAP(0); - end = page + NODE_DATA(0)->node_size; + end = page + NODE_DATA(0)->node_spanned_pages; do { total++; @@ -353,7 +353,7 @@ void __init mem_init(void) max_mapnr = virt_to_page(high_memory) - mem_map; /* this will put all unused low memory onto the freelists */ - if (pgdat->node_size != 0) + if (pgdat->node_spanned_pages != 0) totalram_pages += free_all_bootmem_node(pgdat); printk(KERN_INFO "Memory:"); diff --git a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c index 7ab983c90c53..941c2aa5236c 100644 --- a/arch/i386/mm/pgtable.c +++ b/arch/i386/mm/pgtable.c @@ -34,7 +34,7 @@ void show_mem(void) show_free_areas(); printk("Free swap: %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10)); for_each_pgdat(pgdat) { - for (i = 0; i < pgdat->node_size; ++i) { + for (i = 0; i < pgdat->node_spanned_pages; ++i) { page = pgdat->node_mem_map + i; total++; if (PageHighMem(page)) diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 8fc22262ce0f..4d37e437da3f 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -232,7 +232,7 @@ show_mem(void) printk("Free swap: %6dkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); for_each_pgdat(pgdat) { printk("Node ID: %d\n", pgdat->node_id); - for(i = 0; i < pgdat->node_size; i++) { + for(i = 0; i < pgdat->node_spanned_pages; i++) { if (PageReserved(pgdat->node_mem_map+i)) reserved++; else if (PageSwapCache(pgdat->node_mem_map+i)) @@ -240,7 +240,7 @@ show_mem(void) else if (page_count(pgdat->node_mem_map + i)) shared += page_count(pgdat->node_mem_map + i) - 1; } - printk("\t%d pages of RAM\n", pgdat->node_size); + printk("\t%d pages of RAM\n", pgdat->node_spanned_pages); printk("\t%d reserved pages\n", reserved); printk("\t%d pages shared\n", shared); printk("\t%d pages swap cached\n", cached); diff --git a/arch/ppc64/mm/init.c b/arch/ppc64/mm/init.c index 79b716dbe6db..ca2472a9116a 100644 --- a/arch/ppc64/mm/init.c +++ b/arch/ppc64/mm/init.c @@ -109,7 +109,7 @@ void show_mem(void) show_free_areas(); printk("Free swap: %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10)); for_each_pgdat(pgdat) { - for (i = 0; i < pgdat->node_size; i++) { + for (i = 0; i < pgdat->node_spanned_pages; i++) { page = pgdat->node_mem_map + i; total++; if (PageReserved(page)) @@ -564,7 +564,7 @@ void __init mem_init(void) int nid; for (nid = 0; nid < numnodes; nid++) { - if (node_data[nid].node_size != 0) { + if (node_data[nid].node_spanned_pages != 0) { printk("freeing bootmem node %x\n", nid); totalram_pages += free_all_bootmem_node(NODE_DATA(nid)); diff --git a/arch/ppc64/mm/numa.c b/arch/ppc64/mm/numa.c index fd86d7ec8267..19b4ee36ee8f 100644 --- a/arch/ppc64/mm/numa.c +++ b/arch/ppc64/mm/numa.c @@ -160,21 +160,21 @@ new_range: * this simple case and complain if there is a gap in * memory */ - if (node_data[numa_domain].node_size) { + if (node_data[numa_domain].node_spanned_pages) { unsigned long shouldstart = node_data[numa_domain].node_start_pfn + - node_data[numa_domain].node_size; + node_data[numa_domain].node_spanned_pages; if (shouldstart != (start / PAGE_SIZE)) { printk(KERN_ERR "Hole in node, disabling " "region start %lx length %lx\n", start, size); continue; } - node_data[numa_domain].node_size += size / PAGE_SIZE; + node_data[numa_domain].node_spanned_pages += size / PAGE_SIZE; } else { node_data[numa_domain].node_start_pfn = start / PAGE_SIZE; - node_data[numa_domain].node_size = size / PAGE_SIZE; + node_data[numa_domain].node_spanned_pages = size / PAGE_SIZE; } for (i = start ; i < (start+size); i += MEMORY_INCREMENT) @@ -202,7 +202,7 @@ void setup_nonnuma(void) map_cpu_to_node(i, 0); node_data[0].node_start_pfn = 0; - node_data[0].node_size = lmb_end_of_DRAM() / PAGE_SIZE; + node_data[0].node_spanned_pages = lmb_end_of_DRAM() / PAGE_SIZE; for (i = 0 ; i < lmb_end_of_DRAM(); i += MEMORY_INCREMENT) numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] = 0; @@ -224,12 +224,12 @@ void __init do_init_bootmem(void) unsigned long bootmem_paddr; unsigned long bootmap_pages; - if (node_data[nid].node_size == 0) + if (node_data[nid].node_spanned_pages == 0) continue; start_paddr = node_data[nid].node_start_pfn * PAGE_SIZE; end_paddr = start_paddr + - (node_data[nid].node_size * PAGE_SIZE); + (node_data[nid].node_spanned_pages * PAGE_SIZE); dbg("node %d\n", nid); dbg("start_paddr = %lx\n", start_paddr); @@ -311,7 +311,7 @@ void __init paging_init(void) unsigned long start_pfn; unsigned long end_pfn; - if (node_data[nid].node_size == 0) + if (node_data[nid].node_spanned_pages == 0) continue; start_pfn = plat_node_bdata[nid].node_boot_start >> PAGE_SHIFT; diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c index 3be6a8e4b679..cafd352ba636 100644 --- a/arch/x86_64/mm/init.c +++ b/arch/x86_64/mm/init.c @@ -64,7 +64,7 @@ void show_mem(void) printk("Free swap: %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10)); for_each_pgdat(pgdat) { - for (i = 0; i < pgdat->node_size; ++i) { + for (i = 0; i < pgdat->node_spanned_pages; ++i) { page = pgdat->node_mem_map + i; total++; if (PageReserved(page)) diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c index 0150d11586a7..738ae097faeb 100644 --- a/arch/x86_64/mm/numa.c +++ b/arch/x86_64/mm/numa.c @@ -86,7 +86,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long en memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t)); NODE_DATA(nodeid)->bdata = &plat_node_bdata[nodeid]; NODE_DATA(nodeid)->node_start_pfn = start_pfn; - NODE_DATA(nodeid)->node_size = end_pfn - start_pfn; + NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn; /* Find a place for the bootmem map */ bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); diff --git a/include/asm-alpha/mmzone.h b/include/asm-alpha/mmzone.h index 6edb9c64aa7b..36e3130c6696 100644 --- a/include/asm-alpha/mmzone.h +++ b/include/asm-alpha/mmzone.h @@ -31,7 +31,6 @@ extern pg_data_t node_data[]; #define pa_to_nid(pa) alpha_pa_to_nid(pa) #define NODE_DATA(nid) (&node_data[(nid)]) -#define node_size(nid) (NODE_DATA(nid)->node_size) #define node_localnr(pfn, nid) ((pfn) - NODE_DATA(nid)->node_start_pfn) @@ -124,7 +123,7 @@ PLAT_NODE_DATA_LOCALNR(unsigned long p, int n) #define pfn_to_nid(pfn) pa_to_nid(((u64)pfn << PAGE_SHIFT)) #define pfn_valid(pfn) \ (((pfn) - node_start_pfn(pfn_to_nid(pfn))) < \ - node_size(pfn_to_nid(pfn))) \ + node_spanned_pages(pfn_to_nid(pfn))) \ #define virt_addr_valid(kaddr) pfn_valid((__pa(kaddr) >> PAGE_SHIFT)) diff --git a/include/asm-i386/mmzone.h b/include/asm-i386/mmzone.h index b6138f07c309..d5da17912a8c 100644 --- a/include/asm-i386/mmzone.h +++ b/include/asm-i386/mmzone.h @@ -32,8 +32,7 @@ extern struct pglist_data *node_data[]; #define alloc_bootmem_low_pages_node(ignore, x) \ __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, 0) -#define node_size(nid) (node_data[nid]->node_size) -#define node_localnr(pfn, nid) ((pfn) - node_data[nid]->node_start_pfn) +#define node_localnr(pfn, nid) ((pfn) - node_data[nid]->node_start_pfn) /* * Following are macros that each numa implmentation must define. @@ -54,7 +53,7 @@ extern struct pglist_data *node_data[]; #define node_end_pfn(nid) \ ({ \ pg_data_t *__pgdat = NODE_DATA(nid); \ - __pgdat->node_start_pfn + __pgdat->node_size; \ + __pgdat->node_start_pfn + __pgdat->node_spanned_pages; \ }) #define local_mapnr(kvaddr) \ diff --git a/include/asm-mips64/mmzone.h b/include/asm-mips64/mmzone.h index 25a42752b182..cba337b07be1 100644 --- a/include/asm-mips64/mmzone.h +++ b/include/asm-mips64/mmzone.h @@ -24,7 +24,7 @@ extern plat_pg_data_t *plat_node_data[]; #define PHYSADDR_TO_NID(pa) NASID_TO_COMPACT_NODEID(NASID_GET(pa)) #define PLAT_NODE_DATA(n) (plat_node_data[n]) -#define PLAT_NODE_DATA_SIZE(n) (PLAT_NODE_DATA(n)->gendata.node_size) +#define PLAT_NODE_DATA_SIZE(n) (PLAT_NODE_DATA(n)->gendata.node_spanned_pages) #define PLAT_NODE_DATA_LOCALNR(p, n) \ (((p) >> PAGE_SHIFT) - PLAT_NODE_DATA(n)->gendata.node_start_pfn) diff --git a/include/asm-ppc64/mmzone.h b/include/asm-ppc64/mmzone.h index 8503e25b17b3..2e5136012845 100644 --- a/include/asm-ppc64/mmzone.h +++ b/include/asm-ppc64/mmzone.h @@ -54,7 +54,6 @@ static inline int pa_to_nid(unsigned long pa) */ #define NODE_DATA(nid) (&node_data[nid]) -#define node_size(nid) (NODE_DATA(nid)->node_size) #define node_localnr(pfn, nid) ((pfn) - NODE_DATA(nid)->node_start_pfn) /* diff --git a/include/asm-x86_64/mmzone.h b/include/asm-x86_64/mmzone.h index c1a69000c8d7..398c530270c2 100644 --- a/include/asm-x86_64/mmzone.h +++ b/include/asm-x86_64/mmzone.h @@ -40,8 +40,7 @@ static inline __attribute__((pure)) int phys_to_nid(unsigned long addr) #define node_mem_map(nid) (NODE_DATA(nid)->node_mem_map) #define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) #define node_end_pfn(nid) (NODE_DATA(nid)->node_start_pfn + \ - NODE_DATA(nid)->node_size) -#define node_size(nid) (NODE_DATA(nid)->node_size) + NODE_DATA(nid)->node_spanned_pages) #define local_mapnr(kvaddr) \ ( (__pa(kvaddr) >> PAGE_SHIFT) - node_start_pfn(kvaddr_to_nid(kvaddr)) ) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 21e95664fdf8..e768f7ab8963 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -184,12 +184,17 @@ typedef struct pglist_data { unsigned long *valid_addr_bitmap; struct bootmem_data *bdata; unsigned long node_start_pfn; - unsigned long node_size; + unsigned long node_present_pages; /* total number of physical pages */ + unsigned long node_spanned_pages; /* total size of physical page + range, including holes */ int node_id; struct pglist_data *pgdat_next; wait_queue_head_t kswapd_wait; } pg_data_t; +#define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages) +#define node_spanned_pages(nid) (NODE_DATA(nid)->node_spanned_pages) + extern int numnodes; extern struct pglist_data *pgdat_list; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 741866b59d7d..8b2a02c0350a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -903,7 +903,7 @@ void si_meminfo_node(struct sysinfo *val, int nid) { pg_data_t *pgdat = NODE_DATA(nid); - val->totalram = pgdat->node_size; + val->totalram = pgdat->node_present_pages; val->freeram = nr_free_pages_pgdat(pgdat); val->totalhigh = pgdat->node_zones[ZONE_HIGHMEM].present_pages; val->freehigh = pgdat->node_zones[ZONE_HIGHMEM].free_pages; @@ -1138,12 +1138,13 @@ static void __init calculate_zone_totalpages(struct pglist_data *pgdat, for (i = 0; i < MAX_NR_ZONES; i++) totalpages += zones_size[i]; - pgdat->node_size = totalpages; + pgdat->node_spanned_pages = totalpages; realtotalpages = totalpages; if (zholes_size) for (i = 0; i < MAX_NR_ZONES; i++) realtotalpages -= zholes_size[i]; + pgdat->node_present_pages = realtotalpages; printk("On node %d totalpages: %lu\n", pgdat->node_id, realtotalpages); } @@ -1349,7 +1350,7 @@ void __init free_area_init_node(int nid, struct pglist_data *pgdat, pgdat->node_start_pfn = node_start_pfn; calculate_zone_totalpages(pgdat, zones_size, zholes_size); if (!node_mem_map) { - size = (pgdat->node_size + 1) * sizeof(struct page); + size = (pgdat->node_spanned_pages + 1) * sizeof(struct page); node_mem_map = alloc_bootmem_node(pgdat, size); } pgdat->node_mem_map = node_mem_map; -- cgit v1.2.3 From c94f7f38722f9d121186196f4b05ca934fccd9f9 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 2 Jul 2003 08:47:51 -0700 Subject: [PATCH] ramfs: use rgeneric_file_llseek Teach ramfs to use generic_file_llseek: default_llseek takes lock_kernel(). --- fs/ramfs/inode.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c index 372f06515900..bd0a6765ec6d 100644 --- a/fs/ramfs/inode.c +++ b/fs/ramfs/inode.c @@ -146,6 +146,7 @@ static struct file_operations ramfs_file_operations = { .mmap = generic_file_mmap, .fsync = simple_sync_file, .sendfile = generic_file_sendfile, + .llseek = generic_file_llseek, }; static struct inode_operations ramfs_file_inode_operations = { -- cgit v1.2.3 From 3bd404cf66f024fac6e6071ead0f4032cf1f7c20 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 2 Jul 2003 08:47:59 -0700 Subject: [PATCH] inode_change_ok(): remove lock_kernel() `attr' is on the stack, and the inode's contents can change as soon as we return from inode_change_ok() anyway. I can't see anything which is actually being locked in there. --- fs/attr.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/attr.c b/fs/attr.c index 0d9e778fb5d3..2048b99a112c 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -22,8 +22,6 @@ int inode_change_ok(struct inode *inode, struct iattr *attr) int retval = -EPERM; unsigned int ia_valid = attr->ia_valid; - lock_kernel(); - /* If force is set do it anyway. */ if (ia_valid & ATTR_FORCE) goto fine; @@ -58,7 +56,6 @@ int inode_change_ok(struct inode *inode, struct iattr *attr) fine: retval = 0; error: - unlock_kernel(); return retval; } -- cgit v1.2.3 From 1fe128d23d3d492e20cc693fcd9eff05a4071d45 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 2 Jul 2003 08:48:06 -0700 Subject: [PATCH] nommu vmtruncate: remove lock_kernel() lock_kernel() need not be held across truncate. --- mm/nommu.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/mm/nommu.c b/mm/nommu.c index cd7900bf3fe8..018262482d5a 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -62,11 +62,8 @@ do_expand: inode->i_size = offset; out_truncate: - if (inode->i_op && inode->i_op->truncate) { - lock_kernel(); + if (inode->i_op && inode->i_op->truncate) inode->i_op->truncate(inode); - unlock_kernel(); - } return 0; out_sig: send_sig(SIGXFSZ, current, 0); -- cgit v1.2.3 From 10be509c3c9ea14a7de80252ca1b8b1c43c821fb Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 2 Jul 2003 08:48:18 -0700 Subject: [PATCH] procfs: remove some unneeded lock_kernel()s From: William Lee Irwin III Remove spurious BKL acquisitions in /proc/. The BKL is not required to access nr_threads for reporting, and get_locks_status() takes it internally, wrapping all operations with it. --- fs/proc/proc_misc.c | 9 ++++----- fs/proc/root.c | 10 ++++++---- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index 1268f3883f4e..eed19a3ea677 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -497,11 +497,10 @@ static int ds1286_read_proc(char *page, char **start, off_t off, static int locks_read_proc(char *page, char **start, off_t off, int count, int *eof, void *data) { - int len; - lock_kernel(); - len = get_locks_status(page, start, off, count); - unlock_kernel(); - if (len < count) *eof = 1; + int len = get_locks_status(page, start, off, count); + + if (len < count) + *eof = 1; return len; } diff --git a/fs/proc/root.c b/fs/proc/root.c index f6b7c065a969..fb40f8c53cb4 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -81,11 +81,13 @@ void __init proc_root_init(void) static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentry) { - if (dir->i_ino == PROC_ROOT_INO) { /* check for safety... */ - lock_kernel(); + /* + * nr_threads is actually protected by the tasklist_lock; + * however, it's conventional to do reads, especially for + * reporting, without any locking whatsoever. + */ + if (dir->i_ino == PROC_ROOT_INO) /* check for safety... */ dir->i_nlink = proc_root.nlink + nr_threads; - unlock_kernel(); - } if (!proc_lookup(dir, dentry)) { return NULL; -- cgit v1.2.3 From e90f7e031d2e727d9dc3a8282243496b3b389c53 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 2 Jul 2003 08:48:26 -0700 Subject: [PATCH] remove lock_kernel() from file_ops.flush() Rework the file_ops.flush() API sothat it is no longer called under lock_kernel(). Push lock_kernel() down to all impementations except CIFS, which doesn't want it. --- Documentation/filesystems/Locking | 2 +- fs/coda/file.c | 11 ++++++++--- fs/nfs/file.c | 2 ++ fs/open.c | 5 +---- 4 files changed, 12 insertions(+), 8 deletions(-) diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 03f43b68e801..56d482903188 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -318,7 +318,7 @@ poll: no ioctl: yes (see below) mmap: no open: maybe (see below) -flush: yes +flush: no release: no fsync: yes (see below) fasync: yes (see below) diff --git a/fs/coda/file.c b/fs/coda/file.c index 486ca3af939c..8d4865cbc91a 100644 --- a/fs/coda/file.c +++ b/fs/coda/file.c @@ -153,19 +153,22 @@ int coda_flush(struct file *coda_file) struct inode *coda_inode; int err = 0, fcnt; + lock_kernel(); + coda_vfs_stat.flush++; /* last close semantics */ fcnt = file_count(coda_file); - if (fcnt > 1) return 0; + if (fcnt > 1) + goto out; /* No need to make an upcall when we have not made any modifications * to the file */ if ((coda_file->f_flags & O_ACCMODE) == O_RDONLY) - return 0; + goto out; if (use_coda_close) - return 0; + goto out; cfi = CODA_FTOC(coda_file); BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC); @@ -180,6 +183,8 @@ int coda_flush(struct file *coda_file) err = 0; } +out: + unlock_kernel(); return err; } diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 26956c7de1d8..c440dc858825 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -104,11 +104,13 @@ nfs_file_flush(struct file *file) dfprintk(VFS, "nfs: flush(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino); + lock_kernel(); status = nfs_wb_file(inode, file); if (!status) { status = file->f_error; file->f_error = 0; } + unlock_kernel(); return status; } diff --git a/fs/open.c b/fs/open.c index 2e2e4e4dae97..98ce4f7374ef 100644 --- a/fs/open.c +++ b/fs/open.c @@ -952,11 +952,8 @@ int filp_close(struct file *filp, fl_owner_t id) return 0; } retval = 0; - if (filp->f_op && filp->f_op->flush) { - lock_kernel(); + if (filp->f_op && filp->f_op->flush) retval = filp->f_op->flush(filp); - unlock_kernel(); - } dnotify_flush(filp, id); locks_remove_posix(filp, id); fput(filp); -- cgit v1.2.3 From 7e6e90121ebfc0c93eb8ea1b134aacae5096308e Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 2 Jul 2003 08:48:33 -0700 Subject: [PATCH] block_llseek(): remove lock_kernel() Replace it with the blockdev inode's i_sem. And we only really need that for atomic access to file->f_pos. --- fs/block_dev.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/block_dev.c b/fs/block_dev.c index 52e956455310..74db58a40025 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -155,11 +155,13 @@ static int blkdev_commit_write(struct file *file, struct page *page, unsigned fr */ static loff_t block_llseek(struct file *file, loff_t offset, int origin) { - /* ewww */ - loff_t size = file->f_dentry->d_inode->i_bdev->bd_inode->i_size; + struct inode *bd_inode; + loff_t size; loff_t retval; - lock_kernel(); + bd_inode = file->f_dentry->d_inode->i_bdev->bd_inode; + down(&bd_inode->i_sem); + size = bd_inode->i_size; switch (origin) { case 2: @@ -175,7 +177,7 @@ static loff_t block_llseek(struct file *file, loff_t offset, int origin) } retval = offset; } - unlock_kernel(); + up(&bd_inode->i_sem); return retval; } -- cgit v1.2.3 From a7fd6e5df6e72130604d7254fcb33ee18ea0d48a Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 2 Jul 2003 08:48:41 -0700 Subject: [PATCH] Make CONFIG_TC35815 depend on CONFIG_TOSHIBA_JMR3927 From: Adrian Bunk I got an error at the final linking with CONFIG_TC35815 enabled since the variables tc_readl and tc_writel are not available. The only place where they are defined is arch/mips/pci/ops-jmr3927.c. --- drivers/net/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 35e9724bb622..19b7d611e38e 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -1397,7 +1397,7 @@ config CS89x0 config TC35815 tristate "TOSHIBA TC35815 Ethernet support" - depends on NET_PCI && PCI + depends on NET_PCI && PCI && TOSHIBA_JMR3927 config DGRS tristate "Digi Intl. RightSwitch SE-X support" -- cgit v1.2.3 From 4ef892a0a92e340e4f77ad4db4a88740a1e46b27 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 2 Jul 2003 08:48:52 -0700 Subject: [PATCH] Report detached thread exit to the debugger From: Daniel Jacobowitz Right now, CLONE_DETACHED threads silently vanish from GDB's sight when they exit. This patch lets the thread report its exit to the debugger, and then be auto-reaped as soon as it is collected, instead of being reaped as soon as it exits and not reported at all. GDB works either way, but this is more correct and will be useful for some later GDB patches. --- kernel/exit.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/kernel/exit.c b/kernel/exit.c index 367854d246ef..2f090213e481 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -651,6 +651,8 @@ static void exit_notify(struct task_struct *tsk) if (tsk->exit_signal != -1) { int signal = tsk->parent == tsk->real_parent ? tsk->exit_signal : SIGCHLD; do_notify_parent(tsk, signal); + } else if (tsk->ptrace) { + do_notify_parent(tsk, SIGCHLD); } tsk->state = TASK_ZOMBIE; @@ -715,7 +717,7 @@ NORET_TYPE void do_exit(long code) tsk->exit_code = code; exit_notify(tsk); - if (tsk->exit_signal == -1) + if (tsk->exit_signal == -1 && tsk->ptrace == 0) release_task(tsk); schedule(); @@ -859,7 +861,7 @@ static int wait_task_zombie(task_t *p, unsigned int *stat_addr, struct rusage *r BUG_ON(state != TASK_DEAD); return 0; } - if (unlikely(p->exit_signal == -1)) + if (unlikely(p->exit_signal == -1 && p->ptrace == 0)) /* * This can only happen in a race with a ptraced thread * dying on another processor. @@ -889,8 +891,12 @@ static int wait_task_zombie(task_t *p, unsigned int *stat_addr, struct rusage *r /* Double-check with lock held. */ if (p->real_parent != p->parent) { __ptrace_unlink(p); - do_notify_parent(p, p->exit_signal); p->state = TASK_ZOMBIE; + /* If this is a detached thread, this is where it goes away. */ + if (p->exit_signal == -1) + release_task (p); + else + do_notify_parent(p, p->exit_signal); p = NULL; } write_unlock_irq(&tasklist_lock); -- cgit v1.2.3 From c8cf1ab9d60f82526a6bb585763807294036b5d7 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 2 Jul 2003 08:48:59 -0700 Subject: [PATCH] timer renaming and cleanups From: john stultz This renames the bad "timer" variable to "cur_timer" and moves externs to .h files. --- arch/i386/kernel/io_apic.c | 2 +- arch/i386/kernel/time.c | 29 +++++++++++++---------------- arch/i386/kernel/timers/timer.c | 6 ------ arch/i386/lib/delay.c | 2 +- include/asm-i386/timer.h | 12 ++++++++++++ 5 files changed, 27 insertions(+), 24 deletions(-) diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index 57159fe9face..3e5ec5c69f80 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c @@ -35,6 +35,7 @@ #include #include #include +#include #include @@ -2052,7 +2053,6 @@ static inline void unlock_ExtINT_logic(void) */ static inline void check_timer(void) { - extern int timer_ack; int pin1, pin2; int vector; diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c index db98194645c3..9309c404e7a2 100644 --- a/arch/i386/kernel/time.c +++ b/arch/i386/kernel/time.c @@ -80,8 +80,7 @@ spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED; spinlock_t i8253_lock = SPIN_LOCK_UNLOCKED; EXPORT_SYMBOL(i8253_lock); -extern struct timer_opts timer_none; -struct timer_opts* timer = &timer_none; +struct timer_opts *cur_timer = &timer_none; /* * This version of gettimeofday has microsecond resolution @@ -93,14 +92,14 @@ void do_gettimeofday(struct timeval *tv) unsigned long usec, sec; do { + unsigned long lost; + seq = read_seqbegin(&xtime_lock); - usec = timer->get_offset(); - { - unsigned long lost = jiffies - wall_jiffies; - if (lost) - usec += lost * (1000000 / HZ); - } + usec = cur_timer->get_offset(); + lost = jiffies - wall_jiffies; + if (lost) + usec += lost * (1000000 / HZ); sec = xtime.tv_sec; usec += (xtime.tv_nsec / 1000); } while (read_seqretry(&xtime_lock, seq)); @@ -126,7 +125,7 @@ int do_settimeofday(struct timespec *tv) * wall time. Discover what correction gettimeofday() would have * made, and then undo it! */ - tv->tv_nsec -= timer->get_offset() * NSEC_PER_USEC; + tv->tv_nsec -= cur_timer->get_offset() * NSEC_PER_USEC; tv->tv_nsec -= (jiffies - wall_jiffies) * TICK_NSEC; while (tv->tv_nsec < 0) { @@ -180,7 +179,7 @@ int timer_ack; */ unsigned long long monotonic_clock(void) { - return timer->monotonic_clock(); + return cur_timer->monotonic_clock(); } EXPORT_SYMBOL(monotonic_clock); @@ -189,7 +188,8 @@ EXPORT_SYMBOL(monotonic_clock); * timer_interrupt() needs to keep up the real-time clock, * as well as call the "do_timer()" routine every clocktick */ -static inline void do_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) +static inline void do_timer_interrupt(int irq, void *dev_id, + struct pt_regs *regs) { #ifdef CONFIG_X86_IO_APIC if (timer_ack) { @@ -259,7 +259,7 @@ irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) */ write_seqlock(&xtime_lock); - timer->mark_offset(); + cur_timer->mark_offset(); do_timer_interrupt(irq, NULL, regs); @@ -301,16 +301,13 @@ static int time_init_device(void) device_initcall(time_init_device); - void __init time_init(void) { - xtime.tv_sec = get_cmos_time(); wall_to_monotonic.tv_sec = -xtime.tv_sec; xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ); wall_to_monotonic.tv_nsec = -xtime.tv_nsec; - - timer = select_timer(); + cur_timer = select_timer(); time_init_hook(); } diff --git a/arch/i386/kernel/timers/timer.c b/arch/i386/kernel/timers/timer.c index 538c2c33f4e6..d14eded75eee 100644 --- a/arch/i386/kernel/timers/timer.c +++ b/arch/i386/kernel/timers/timer.c @@ -3,12 +3,6 @@ #include #include -/* list of externed timers */ -extern struct timer_opts timer_pit; -extern struct timer_opts timer_tsc; -#ifdef CONFIG_X86_CYCLONE_TIMER -extern struct timer_opts timer_cyclone; -#endif /* list of timers, ordered by preference, NULL terminated */ static struct timer_opts* timers[] = { #ifdef CONFIG_X86_CYCLONE_TIMER diff --git a/arch/i386/lib/delay.c b/arch/i386/lib/delay.c index a678560d383f..028dd9bbd229 100644 --- a/arch/i386/lib/delay.c +++ b/arch/i386/lib/delay.c @@ -25,7 +25,7 @@ extern struct timer_opts* timer; void __delay(unsigned long loops) { - timer->delay(loops); + cur_timer->delay(loops); } inline void __const_udelay(unsigned long xloops) diff --git a/include/asm-i386/timer.h b/include/asm-i386/timer.h index 005ce2e61748..8a01f5391c62 100644 --- a/include/asm-i386/timer.h +++ b/include/asm-i386/timer.h @@ -25,4 +25,16 @@ extern struct timer_opts* select_timer(void); /* Modifiers for buggy PIT handling */ extern int pit_latch_buggy; + +extern struct timer_opts *cur_timer; +extern int timer_ack; + +/* list of externed timers */ +extern struct timer_opts timer_none; +extern struct timer_opts timer_pit; +extern struct timer_opts timer_tsc; +#ifdef CONFIG_X86_CYCLONE_TIMER +extern struct timer_opts timer_cyclone; +#endif + #endif -- cgit v1.2.3 From 48ecce4b8ed5888dc791d003961291340a467175 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 2 Jul 2003 08:49:07 -0700 Subject: [PATCH] fix lost_tick detector for speedstep From: john stultz The patch tries to resolve issues caused by running the TSC based lost tick compensation code on CPUs that change frequency (speedstep, etc). Should the CPU be in slow mode when calibrate_tsc() executes, the kernel will assume we have so many cycles per tick. Later when the cpu speeds up, the kernel will start noting that too many cycles have past since the last interrupt. Since this can occasionally happen, the lost tick compensation code then tries to fix this by incrementing jiffies. Thus every tick we end up incrementing jiffies many times, causing timers to expire too quickly and time to rush ahead. This patch detects when there has been 100 consecutive interrupts where we had to compensate for lost ticks. If this occurs, we spit out a warning and fall back to using the PIT as a time source. I've tested this on my speedstep enabled laptop with success, and others laptop users seeing this problem have reported it works for them. Also to ensure we don't fall back to the slower PIT too quickly, I tested the code on a system I have that looses ~30 ticks about every second and it can still manage to use the TSC as a good time source. This solves most of the "time doubling" problems seen on laptops. Additionally this revision has been modified to use the cleanups made in rename-timer_A1. --- arch/i386/kernel/timers/timer.c | 9 +++++++++ arch/i386/kernel/timers/timer_tsc.c | 13 ++++++++++++- include/asm-i386/timer.h | 1 + 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/arch/i386/kernel/timers/timer.c b/arch/i386/kernel/timers/timer.c index d14eded75eee..cf8f641d38f5 100644 --- a/arch/i386/kernel/timers/timer.c +++ b/arch/i386/kernel/timers/timer.c @@ -23,6 +23,15 @@ static int __init clock_setup(char* str) } __setup("clock=", clock_setup); + +/* The chosen timesource has been found to be bad. + * Fall back to a known good timesource (the PIT) + */ +void clock_fallback(void) +{ + cur_timer = &timer_pit; +} + /* iterates through the list of timers, returning the first * one that initializes successfully. */ diff --git a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c index 02de71f68d9e..e89d8da8e49b 100644 --- a/arch/i386/kernel/timers/timer_tsc.c +++ b/arch/i386/kernel/timers/timer_tsc.c @@ -124,6 +124,7 @@ static void mark_offset_tsc(void) int countmp; static int count1 = 0; unsigned long long this_offset, last_offset; + static int lost_count = 0; write_lock(&monotonic_lock); last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; @@ -178,9 +179,19 @@ static void mark_offset_tsc(void) delta += delay_at_last_interrupt; lost = delta/(1000000/HZ); delay = delta%(1000000/HZ); - if (lost >= 2) + if (lost >= 2) { jiffies += lost-1; + /* sanity check to ensure we're not always loosing ticks */ + if (lost_count++ > 100) { + printk(KERN_WARNING "Loosing too many ticks!\n"); + printk(KERN_WARNING "TSC cannot be used as a timesource." + " (Are you running with SpeedStep?)\n"); + printk(KERN_WARNING "Falling back to a sane timesource.\n"); + clock_fallback(); + } + } else + lost_count = 0; /* update the monotonic base value */ this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; monotonic_base += cycles_2_ns(this_offset - last_offset); diff --git a/include/asm-i386/timer.h b/include/asm-i386/timer.h index 8a01f5391c62..cfcfb5b7a128 100644 --- a/include/asm-i386/timer.h +++ b/include/asm-i386/timer.h @@ -21,6 +21,7 @@ struct timer_opts{ #define TICK_SIZE (tick_nsec / 1000) extern struct timer_opts* select_timer(void); +extern void clock_fallback(void); /* Modifiers for buggy PIT handling */ -- cgit v1.2.3 From 3271736784253fe68ccb46ec2320066f70ceecb6 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 2 Jul 2003 08:49:14 -0700 Subject: [PATCH] fix lost-tick compensation corner-case From: john stultz This patch catches a corner case in the lost-tick compensation code. There is a check to see if we overflowed between reads of the two time sources, however should the high res time source be slightly slower then what we calibrated, its possible to trigger this code when no ticks have been lost. This patch adds an extra check to insure we have seen more then one tick before we check for this overflow. This seems to resolve the remaining "time doubling" issues that I've seen reported. --- arch/i386/kernel/timers/timer_cyclone.c | 2 +- arch/i386/kernel/timers/timer_tsc.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/i386/kernel/timers/timer_cyclone.c b/arch/i386/kernel/timers/timer_cyclone.c index 0d2c2baf2a64..f8b55ffd229d 100644 --- a/arch/i386/kernel/timers/timer_cyclone.c +++ b/arch/i386/kernel/timers/timer_cyclone.c @@ -88,7 +88,7 @@ static void mark_offset_cyclone(void) * between cyclone and pit reads (as noted when * usec delta is > 90% # of usecs/tick) */ - if (abs(delay - delay_at_last_interrupt) > (900000/HZ)) + if (lost && abs(delay - delay_at_last_interrupt) > (900000/HZ)) jiffies++; } diff --git a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c index e89d8da8e49b..95a62acf713a 100644 --- a/arch/i386/kernel/timers/timer_tsc.c +++ b/arch/i386/kernel/timers/timer_tsc.c @@ -205,7 +205,7 @@ static void mark_offset_tsc(void) * between tsc and pit reads (as noted when * usec delta is > 90% # of usecs/tick) */ - if (abs(delay - delay_at_last_interrupt) > (900000/HZ)) + if (lost && abs(delay - delay_at_last_interrupt) > (900000/HZ)) jiffies++; } -- cgit v1.2.3 From cee396e281fc2c8a55261eea1a89a594e98f3e0f Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 2 Jul 2003 08:49:26 -0700 Subject: [PATCH] cleanup and generalise lowmem_page_address From: William Lee Irwin III This patch allows architectures to micro-optimize lowmem_page_address() at their whims. Roman Zippel originally wrote and/or suggested this back when dependencies on page->virtual existing were being shaken out. That's long-settled, so it's fine to do this now. --- include/linux/mm.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 4d183974fd36..d75f64725853 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -339,9 +339,14 @@ static inline void set_page_zone(struct page *page, unsigned long zone_num) page->flags |= zone_num << ZONE_SHIFT; } -static inline void * lowmem_page_address(struct page *page) +#ifndef CONFIG_DISCONTIGMEM +/* The array of struct pages - for discontigmem use pgdat->lmem_map */ +extern struct page *mem_map; +#endif + +static inline void *lowmem_page_address(struct page *page) { - return __va( ( (page - page_zone(page)->zone_mem_map) + page_zone(page)->zone_start_pfn) << PAGE_SHIFT); + return __va(page_to_pfn(page) << PAGE_SHIFT); } #if defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL) @@ -395,11 +400,6 @@ static inline int page_mapped(struct page *page) #define VM_FAULT_MINOR 1 #define VM_FAULT_MAJOR 2 -#ifndef CONFIG_DISCONTIGMEM -/* The array of struct pages - for discontigmem use pgdat->lmem_map */ -extern struct page *mem_map; -#endif - extern void show_free_areas(void); struct page *shmem_nopage(struct vm_area_struct * vma, -- cgit v1.2.3 From bc75ac4f1dcec256a65b531e2d5be84f5b0fe6bc Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 2 Jul 2003 08:49:35 -0700 Subject: [PATCH] Security hook for vm_enough_memory From: Stephen Smalley This patch against 2.5.73 replaces vm_enough_memory with a security hook per Alan Cox's suggestion so that security modules can completely replace the logic if desired. Note that the patch changes the interface to follow the convention of the other security hooks, i.e. return 0 if ok or -errno on failure (-ENOMEM in this case) rather than returning a boolean. It also exports various variables and functions required for the vm_enough_memory logic. --- arch/ia64/ia32/binfmt_elf32.c | 3 +- arch/ia64/kernel/sys_ia64.c | 1 - arch/mips/kernel/sysirix.c | 5 ++- arch/s390/kernel/compat_exec.c | 3 +- arch/x86_64/ia32/ia32_binfmt.c | 4 ++- fs/exec.c | 2 +- include/linux/mman.h | 3 +- include/linux/security.h | 16 ++++++++++ include/linux/slab.h | 2 ++ kernel/fork.c | 2 +- mm/mmap.c | 71 +++++------------------------------------- mm/mprotect.c | 2 +- mm/mremap.c | 3 +- mm/page_alloc.c | 5 +++ mm/shmem.c | 9 +++--- mm/slab.c | 2 ++ mm/swap.c | 2 ++ mm/swapfile.c | 6 +++- security/capability.c | 65 ++++++++++++++++++++++++++++++++++++++ security/dummy.c | 52 +++++++++++++++++++++++++++++++ 20 files changed, 178 insertions(+), 80 deletions(-) diff --git a/arch/ia64/ia32/binfmt_elf32.c b/arch/ia64/ia32/binfmt_elf32.c index dcc4982c2c66..8b2a41592746 100644 --- a/arch/ia64/ia32/binfmt_elf32.c +++ b/arch/ia64/ia32/binfmt_elf32.c @@ -13,6 +13,7 @@ #include #include +#include #include #include @@ -177,7 +178,7 @@ ia32_setup_arg_pages (struct linux_binprm *bprm) if (!mpnt) return -ENOMEM; - if (!vm_enough_memory((IA32_STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))>>PAGE_SHIFT)) { + if (security_vm_enough_memory((IA32_STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))>>PAGE_SHIFT)) { kmem_cache_free(vm_area_cachep, mpnt); return -ENOMEM; } diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c index 6de52294bd80..6d94e8b1a0ad 100644 --- a/arch/ia64/kernel/sys_ia64.c +++ b/arch/ia64/kernel/sys_ia64.c @@ -100,7 +100,6 @@ ia64_shmat (int shmid, void *shmaddr, int shmflg) asmlinkage unsigned long ia64_brk (unsigned long brk) { - extern int vm_enough_memory (long pages); unsigned long rlim, retval, newbrk, oldbrk; struct mm_struct *mm = current->mm; diff --git a/arch/mips/kernel/sysirix.c b/arch/mips/kernel/sysirix.c index 5722c28c1e9d..fdcc9d5bd057 100644 --- a/arch/mips/kernel/sysirix.c +++ b/arch/mips/kernel/sysirix.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -527,8 +528,6 @@ asmlinkage int irix_gtime(struct pt_regs *regs) return get_seconds(); } -int vm_enough_memory(long pages); - /* * IRIX is completely broken... it returns 0 on success, otherwise * ENOMEM. @@ -585,7 +584,7 @@ asmlinkage int irix_brk(unsigned long brk) /* * Check if we have enough memory.. */ - if (!vm_enough_memory((newbrk-oldbrk) >> PAGE_SHIFT)) { + if (security_vm_enough_memory((newbrk-oldbrk) >> PAGE_SHIFT)) { ret = -ENOMEM; goto out; } diff --git a/arch/s390/kernel/compat_exec.c b/arch/s390/kernel/compat_exec.c index 74245a64e514..33832846833f 100644 --- a/arch/s390/kernel/compat_exec.c +++ b/arch/s390/kernel/compat_exec.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -55,7 +56,7 @@ int setup_arg_pages32(struct linux_binprm *bprm) if (!mpnt) return -ENOMEM; - if (!vm_enough_memory((STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))>>PAGE_SHIFT)) { + if (security_vm_enough_memory((STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))>>PAGE_SHIFT)) { kmem_cache_free(vm_area_cachep, mpnt); return -ENOMEM; } diff --git a/arch/x86_64/ia32/ia32_binfmt.c b/arch/x86_64/ia32/ia32_binfmt.c index 496dfa9da747..e4b86e6cbf9b 100644 --- a/arch/x86_64/ia32/ia32_binfmt.c +++ b/arch/x86_64/ia32/ia32_binfmt.c @@ -14,6 +14,8 @@ #include #include #include +#include + #include #include #include @@ -339,7 +341,7 @@ int setup_arg_pages(struct linux_binprm *bprm) if (!mpnt) return -ENOMEM; - if (!vm_enough_memory((IA32_STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))>>PAGE_SHIFT)) { + if (security_vm_enough_memory((IA32_STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))>>PAGE_SHIFT)) { kmem_cache_free(vm_area_cachep, mpnt); return -ENOMEM; } diff --git a/fs/exec.c b/fs/exec.c index 204d0a3a1565..f91b25952248 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -392,7 +392,7 @@ int setup_arg_pages(struct linux_binprm *bprm) if (!mpnt) return -ENOMEM; - if (!vm_enough_memory((STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))>>PAGE_SHIFT)) { + if (security_vm_enough_memory((STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))>>PAGE_SHIFT)) { kmem_cache_free(vm_area_cachep, mpnt); return -ENOMEM; } diff --git a/include/linux/mman.h b/include/linux/mman.h index 474d1c046436..a8956f6588ad 100644 --- a/include/linux/mman.h +++ b/include/linux/mman.h @@ -9,7 +9,8 @@ #define MREMAP_MAYMOVE 1 #define MREMAP_FIXED 2 -extern int vm_enough_memory(long pages); +extern int sysctl_overcommit_memory; +extern int sysctl_overcommit_ratio; extern atomic_t vm_committed_space; #ifdef CONFIG_SMP diff --git a/include/linux/security.h b/include/linux/security.h index 9589f99c3ef3..4d91dfc52c52 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -49,6 +49,7 @@ extern int cap_bprm_secureexec(struct linux_binprm *bprm); extern int cap_task_post_setuid (uid_t old_ruid, uid_t old_euid, uid_t old_suid, int flags); extern void cap_task_reparent_to_init (struct task_struct *p); extern int cap_syslog (int type); +extern int cap_vm_enough_memory (long pages); static inline int cap_netlink_send (struct sk_buff *skb) { @@ -958,6 +959,10 @@ struct swap_info_struct; * See the syslog(2) manual page for an explanation of the @type values. * @type contains the type of action. * Return 0 if permission is granted. + * @vm_enough_memory: + * Check permissions for allocating a new virtual mapping. + * @pages contains the number of pages. + * Return 0 if permission is granted. * * @register_security: * allow module stacking. @@ -989,6 +994,7 @@ struct security_operations { int (*quotactl) (int cmds, int type, int id, struct super_block * sb); int (*quota_on) (struct file * f); int (*syslog) (int type); + int (*vm_enough_memory) (long pages); int (*bprm_alloc_security) (struct linux_binprm * bprm); void (*bprm_free_security) (struct linux_binprm * bprm); @@ -1238,6 +1244,11 @@ static inline int security_syslog(int type) return security_ops->syslog(type); } +static inline int security_vm_enough_memory(long pages) +{ + return security_ops->vm_enough_memory(pages); +} + static inline int security_bprm_alloc (struct linux_binprm *bprm) { return security_ops->bprm_alloc_security (bprm); @@ -1898,6 +1909,11 @@ static inline int security_syslog(int type) return cap_syslog(type); } +static inline int security_vm_enough_memory(long pages) +{ + return cap_vm_enough_memory(pages); +} + static inline int security_bprm_alloc (struct linux_binprm *bprm) { return 0; diff --git a/include/linux/slab.h b/include/linux/slab.h index 9f8bccba4ad3..d797c981f37e 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -116,6 +116,8 @@ extern kmem_cache_t *bio_cachep; void ptrinfo(unsigned long addr); +extern atomic_t slab_reclaim_pages; + #endif /* __KERNEL__ */ #endif /* _LINUX_SLAB_H */ diff --git a/kernel/fork.c b/kernel/fork.c index 2abbc9c2da23..c17e05614c88 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -286,7 +286,7 @@ static inline int dup_mmap(struct mm_struct * mm, struct mm_struct * oldmm) continue; if (mpnt->vm_flags & VM_ACCOUNT) { unsigned int len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT; - if (!vm_enough_memory(len)) + if (security_vm_enough_memory(len)) goto fail_nomem; charge += len; } diff --git a/mm/mmap.c b/mm/mmap.c index c83cf2a8b126..1052f84a82a2 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -53,65 +54,9 @@ int sysctl_overcommit_memory = 0; /* default is heuristic overcommit */ int sysctl_overcommit_ratio = 50; /* default is 50% */ atomic_t vm_committed_space = ATOMIC_INIT(0); -/* - * Check that a process has enough memory to allocate a new virtual - * mapping. 1 means there is enough memory for the allocation to - * succeed and 0 implies there is not. - * - * We currently support three overcommit policies, which are set via the - * vm.overcommit_memory sysctl. See Documentation/vm/overcommit-acounting - * - * Strict overcommit modes added 2002 Feb 26 by Alan Cox. - * Additional code 2002 Jul 20 by Robert Love. - */ -extern atomic_t slab_reclaim_pages; -int vm_enough_memory(long pages) -{ - unsigned long free, allowed; - - vm_acct_memory(pages); - - /* - * Sometimes we want to use more memory than we have - */ - if (sysctl_overcommit_memory == 1) - return 1; - - if (sysctl_overcommit_memory == 0) { - free = get_page_cache_size(); - free += nr_free_pages(); - free += nr_swap_pages; - - /* - * Any slabs which are created with the - * SLAB_RECLAIM_ACCOUNT flag claim to have contents - * which are reclaimable, under pressure. The dentry - * cache and most inode caches should fall into this - */ - free += atomic_read(&slab_reclaim_pages); - - /* - * Leave the last 3% for root - */ - if (!capable(CAP_SYS_ADMIN)) - free -= free / 32; - - if (free > pages) - return 1; - vm_unacct_memory(pages); - return 0; - } - - allowed = totalram_pages * sysctl_overcommit_ratio / 100; - allowed += total_swap_pages; - - if (atomic_read(&vm_committed_space) < allowed) - return 1; - - vm_unacct_memory(pages); - - return 0; -} +EXPORT_SYMBOL(sysctl_overcommit_memory); +EXPORT_SYMBOL(sysctl_overcommit_ratio); +EXPORT_SYMBOL(vm_committed_space); /* * Requires inode->i_mapping->i_shared_sem @@ -646,7 +591,7 @@ munmap_back: * Private writable mapping: check memory availability */ charged = len >> PAGE_SHIFT; - if (!vm_enough_memory(charged)) + if (security_vm_enough_memory(charged)) return -ENOMEM; vm_flags |= VM_ACCOUNT; } @@ -950,7 +895,7 @@ int expand_stack(struct vm_area_struct * vma, unsigned long address) grow = (address - vma->vm_end) >> PAGE_SHIFT; /* Overcommit.. */ - if (!vm_enough_memory(grow)) { + if (security_vm_enough_memory(grow)) { spin_unlock(&vma->vm_mm->page_table_lock); return -ENOMEM; } @@ -1004,7 +949,7 @@ int expand_stack(struct vm_area_struct *vma, unsigned long address) grow = (vma->vm_start - address) >> PAGE_SHIFT; /* Overcommit.. */ - if (!vm_enough_memory(grow)) { + if (security_vm_enough_memory(grow)) { spin_unlock(&vma->vm_mm->page_table_lock); return -ENOMEM; } @@ -1376,7 +1321,7 @@ unsigned long do_brk(unsigned long addr, unsigned long len) if (mm->map_count > MAX_MAP_COUNT) return -ENOMEM; - if (!vm_enough_memory(len >> PAGE_SHIFT)) + if (security_vm_enough_memory(len >> PAGE_SHIFT)) return -ENOMEM; flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags; diff --git a/mm/mprotect.c b/mm/mprotect.c index 978a9509c350..2c015794e3c1 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -175,7 +175,7 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, if (newflags & VM_WRITE) { if (!(vma->vm_flags & (VM_ACCOUNT|VM_WRITE|VM_SHARED))) { charged = (end - start) >> PAGE_SHIFT; - if (!vm_enough_memory(charged)) + if (security_vm_enough_memory(charged)) return -ENOMEM; newflags |= VM_ACCOUNT; } diff --git a/mm/mremap.c b/mm/mremap.c index 3bab43a88125..088af945ac5e 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -385,7 +386,7 @@ unsigned long do_mremap(unsigned long addr, if (vma->vm_flags & VM_ACCOUNT) { charged = (new_len - old_len) >> PAGE_SHIFT; - if (!vm_enough_memory(charged)) + if (security_vm_enough_memory(charged)) goto out_nc; } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 8b2a02c0350a..16077203e5a4 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -43,6 +43,9 @@ int nr_swap_pages; int numnodes = 1; int sysctl_lower_zone_protection = 0; +EXPORT_SYMBOL(totalram_pages); +EXPORT_SYMBOL(nr_swap_pages); + /* * Used by page_zone() to look up the address of the struct zone whose * id is encoded in the upper bits of page->flags @@ -733,6 +736,7 @@ unsigned int nr_free_pages(void) return sum; } +EXPORT_SYMBOL(nr_free_pages); unsigned int nr_used_zone_pages(void) { @@ -825,6 +829,7 @@ DEFINE_PER_CPU(struct page_state, page_states) = {0}; EXPORT_PER_CPU_SYMBOL(page_states); atomic_t nr_pagecache = ATOMIC_INIT(0); +EXPORT_SYMBOL(nr_pagecache); #ifdef CONFIG_SMP DEFINE_PER_CPU(long, nr_pagecache_local) = 0; #endif diff --git a/mm/shmem.c b/mm/shmem.c index 73301cee3f41..1f4ed8fece45 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -507,7 +508,7 @@ static int shmem_notify_change(struct dentry *dentry, struct iattr *attr) */ change = VM_ACCT(attr->ia_size) - VM_ACCT(inode->i_size); if (change > 0) { - if (!vm_enough_memory(change)) + if (security_vm_enough_memory(change)) return -ENOMEM; } else if (attr->ia_size < inode->i_size) { vm_unacct_memory(-change); @@ -1139,7 +1140,7 @@ shmem_file_write(struct file *file, const char __user *buf, size_t count, loff_t maxpos = inode->i_size; if (maxpos < pos + count) { maxpos = pos + count; - if (!vm_enough_memory(VM_ACCT(maxpos) - VM_ACCT(inode->i_size))) { + if (security_vm_enough_memory(VM_ACCT(maxpos) - VM_ACCT(inode->i_size))) { err = -ENOMEM; goto out; } @@ -1493,7 +1494,7 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s memcpy(info, symname, len); inode->i_op = &shmem_symlink_inline_operations; } else { - if (!vm_enough_memory(VM_ACCT(1))) { + if (security_vm_enough_memory(VM_ACCT(1))) { iput(inode); return -ENOMEM; } @@ -1887,7 +1888,7 @@ struct file *shmem_file_setup(char *name, loff_t size, unsigned long flags) if (size > SHMEM_MAX_BYTES) return ERR_PTR(-EINVAL); - if ((flags & VM_ACCOUNT) && !vm_enough_memory(VM_ACCT(size))) + if ((flags & VM_ACCOUNT) && security_vm_enough_memory(VM_ACCT(size))) return ERR_PTR(-ENOMEM); error = -ENOMEM; diff --git a/mm/slab.c b/mm/slab.c index afb8d8415999..e05fcba90af2 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -90,6 +90,7 @@ #include #include #include +#include #include #include @@ -462,6 +463,7 @@ struct list_head cache_chain; * SLAB_RECLAIM_ACCOUNT turns this on per-slab */ atomic_t slab_reclaim_pages; +EXPORT_SYMBOL(slab_reclaim_pages); /* * chicken and egg problem: delay the per-cpu array allocation diff --git a/mm/swap.c b/mm/swap.c index 5818b0a5a72d..37302961e371 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include /* for try_to_release_page() */ #include @@ -370,6 +371,7 @@ void vm_acct_memory(long pages) } preempt_enable(); } +EXPORT_SYMBOL(vm_acct_memory); #endif diff --git a/mm/swapfile.c b/mm/swapfile.c index bdfd09be8d4c..bc31505b689f 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -20,7 +20,9 @@ #include #include #include +#include #include +#include #include #include @@ -30,6 +32,8 @@ unsigned int nr_swapfiles; int total_swap_pages; static int swap_overflow; +EXPORT_SYMBOL(total_swap_pages); + static const char Bad_file[] = "Bad swap file entry "; static const char Unused_file[] = "Unused swap file entry "; static const char Bad_offset[] = "Bad swap offset entry "; @@ -1042,7 +1046,7 @@ asmlinkage long sys_swapoff(const char __user * specialfile) swap_list_unlock(); goto out_dput; } - if (vm_enough_memory(p->pages)) + if (!security_vm_enough_memory(p->pages)) vm_unacct_memory(p->pages); else { err = -ENOMEM; diff --git a/security/capability.c b/security/capability.c index e01bc5271c36..cff54dd440fc 100644 --- a/security/capability.c +++ b/security/capability.c @@ -15,6 +15,9 @@ #include #include #include +#include +#include +#include #include #include #include @@ -275,6 +278,65 @@ int cap_syslog (int type) return 0; } +/* + * Check that a process has enough memory to allocate a new virtual + * mapping. 0 means there is enough memory for the allocation to + * succeed and -ENOMEM implies there is not. + * + * We currently support three overcommit policies, which are set via the + * vm.overcommit_memory sysctl. See Documentation/vm/overcommit-acounting + * + * Strict overcommit modes added 2002 Feb 26 by Alan Cox. + * Additional code 2002 Jul 20 by Robert Love. + */ +int cap_vm_enough_memory(long pages) +{ + unsigned long free, allowed; + + vm_acct_memory(pages); + + /* + * Sometimes we want to use more memory than we have + */ + if (sysctl_overcommit_memory == 1) + return 0; + + if (sysctl_overcommit_memory == 0) { + free = get_page_cache_size(); + free += nr_free_pages(); + free += nr_swap_pages; + + /* + * Any slabs which are created with the + * SLAB_RECLAIM_ACCOUNT flag claim to have contents + * which are reclaimable, under pressure. The dentry + * cache and most inode caches should fall into this + */ + free += atomic_read(&slab_reclaim_pages); + + /* + * Leave the last 3% for root + */ + if (!capable(CAP_SYS_ADMIN)) + free -= free / 32; + + if (free > pages) + return 0; + vm_unacct_memory(pages); + return -ENOMEM; + } + + allowed = totalram_pages * sysctl_overcommit_ratio / 100; + allowed += total_swap_pages; + + if (atomic_read(&vm_committed_space) < allowed) + return 0; + + vm_unacct_memory(pages); + + return -ENOMEM; +} + EXPORT_SYMBOL(cap_capable); EXPORT_SYMBOL(cap_ptrace); EXPORT_SYMBOL(cap_capget); @@ -286,6 +348,7 @@ EXPORT_SYMBOL(cap_bprm_secureexec); EXPORT_SYMBOL(cap_task_post_setuid); EXPORT_SYMBOL(cap_task_reparent_to_init); EXPORT_SYMBOL(cap_syslog); +EXPORT_SYMBOL(cap_vm_enough_memory); #ifdef CONFIG_SECURITY @@ -307,6 +370,8 @@ static struct security_operations capability_ops = { .task_reparent_to_init = cap_task_reparent_to_init, .syslog = cap_syslog, + + .vm_enough_memory = cap_vm_enough_memory, }; #if defined(CONFIG_SECURITY_CAPABILITIES_MODULE) diff --git a/security/dummy.c b/security/dummy.c index a4307e78a168..76c6560a76c2 100644 --- a/security/dummy.c +++ b/security/dummy.c @@ -17,6 +17,9 @@ #include #include #include +#include +#include +#include #include #include #include @@ -97,6 +100,54 @@ static int dummy_syslog (int type) return 0; } +static int dummy_vm_enough_memory(long pages) +{ + unsigned long free, allowed; + + vm_acct_memory(pages); + + /* + * Sometimes we want to use more memory than we have + */ + if (sysctl_overcommit_memory == 1) + return 0; + + if (sysctl_overcommit_memory == 0) { + free = get_page_cache_size(); + free += nr_free_pages(); + free += nr_swap_pages; + + /* + * Any slabs which are created with the + * SLAB_RECLAIM_ACCOUNT flag claim to have contents + * which are reclaimable, under pressure. The dentry + * cache and most inode caches should fall into this + */ + free += atomic_read(&slab_reclaim_pages); + + /* + * Leave the last 3% for root + */ + if (current->euid) + free -= free / 32; + + if (free > pages) + return 0; + vm_unacct_memory(pages); + return -ENOMEM; + } + + allowed = totalram_pages * sysctl_overcommit_ratio / 100; + allowed += total_swap_pages; + + if (atomic_read(&vm_committed_space) < allowed) + return 0; + + vm_unacct_memory(pages); + + return -ENOMEM; +} + static int dummy_bprm_alloc_security (struct linux_binprm *bprm) { return 0; @@ -793,6 +844,7 @@ void security_fixup_ops (struct security_operations *ops) set_to_dummy_if_null(ops, quota_on); set_to_dummy_if_null(ops, sysctl); set_to_dummy_if_null(ops, syslog); + set_to_dummy_if_null(ops, vm_enough_memory); set_to_dummy_if_null(ops, bprm_alloc_security); set_to_dummy_if_null(ops, bprm_free_security); set_to_dummy_if_null(ops, bprm_compute_creds); -- cgit v1.2.3 From 6501a85b74f6111b1a3bc13eb10e89602879dfbe Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 2 Jul 2003 08:49:43 -0700 Subject: [PATCH] ext2: inode allocation race fix ext2's inode allocator will call find_group_orlov(), which will return a suitable blockgroup in which the inode should be allocated. But by the time we actually try to allocate an inode in the blockgroup, other CPUs could have used them all up. ext2 will bogusly fail with "ext2_new_inode: Free inodes count corrupted in group NN". To fix this we just advance onto the next blockgroup if the rare race happens. If we've scanned all blockgroups then return -ENOSPC. (This is a bit inaccurate: after we've scanned all blockgroups, there may still be available inodes due to inode freeing activity in other blockgroups. This cannot be fixed without fs-wide locking. The effect is a slightly early ENOSPC in a nearly-full filesystem). --- fs/ext2/ialloc.c | 87 ++++++++++++++++++++++++++++++++------------------------ 1 file changed, 50 insertions(+), 37 deletions(-) diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c index af48364bcab8..107ff87b7208 100644 --- a/fs/ext2/ialloc.c +++ b/fs/ext2/ialloc.c @@ -489,17 +489,18 @@ found: return group; } -struct inode * ext2_new_inode(struct inode * dir, int mode) +struct inode *ext2_new_inode(struct inode *dir, int mode) { struct super_block *sb; struct buffer_head *bitmap_bh = NULL; struct buffer_head *bh2; int group, i; - ino_t ino; + ino_t ino = 0; struct inode * inode; - struct ext2_group_desc * desc; - struct ext2_super_block * es; + struct ext2_group_desc *gdp; + struct ext2_super_block *es; struct ext2_inode_info *ei; + struct ext2_sb_info *sbi; int err; sb = dir->i_sb; @@ -508,36 +509,62 @@ struct inode * ext2_new_inode(struct inode * dir, int mode) return ERR_PTR(-ENOMEM); ei = EXT2_I(inode); - es = EXT2_SB(sb)->s_es; + sbi = EXT2_SB(sb); + es = sbi->s_es; repeat: if (S_ISDIR(mode)) { - if (test_opt (sb, OLDALLOC)) + if (test_opt(sb, OLDALLOC)) group = find_group_dir(sb, dir); else group = find_group_orlov(sb, dir); } else group = find_group_other(sb, dir); - err = -ENOSPC; - if (group == -1) + if (group == -1) { + err = -ENOSPC; goto fail; + } - err = -EIO; - bitmap_bh = read_inode_bitmap(sb, group); - if (!bitmap_bh) - goto fail2; - - i = ext2_find_first_zero_bit((unsigned long *)bitmap_bh->b_data, - EXT2_INODES_PER_GROUP(sb)); - if (i >= EXT2_INODES_PER_GROUP(sb)) - goto bad_count; - if (ext2_set_bit_atomic(sb_bgl_lock(EXT2_SB(sb), group), - i, (void *) bitmap_bh->b_data)) { + for (i = 0; i < sbi->s_groups_count; i++) { + gdp = ext2_get_group_desc(sb, group, &bh2); brelse(bitmap_bh); - ext2_release_inode(sb, group, S_ISDIR(mode)); - goto repeat; + bitmap_bh = read_inode_bitmap(sb, group); + if (!bitmap_bh) { + err = -EIO; + goto fail2; + } + + i = ext2_find_first_zero_bit((unsigned long *)bitmap_bh->b_data, + EXT2_INODES_PER_GROUP(sb)); + if (i >= EXT2_INODES_PER_GROUP(sb)) { + /* + * Rare race: find_group_xx() decided that there were + * free inodes in this group, but by the time we tried + * to allocate one, they're all gone. This can also + * occur because the counters which find_group_orlov() + * uses are approximate. So just go and search the + * next block group. + */ + if (++group == sbi->s_groups_count) + group = 0; + continue; + } + if (ext2_set_bit_atomic(sb_bgl_lock(EXT2_SB(sb), group), + i, bitmap_bh->b_data)) { + brelse(bitmap_bh); + bitmap_bh = NULL; + ext2_release_inode(sb, group, S_ISDIR(mode)); + goto repeat; + } + goto got; } + /* + * Scanned all blockgroups. + */ + err = -ENOSPC; + goto fail2; +got: mark_buffer_dirty(bitmap_bh); if (sb->s_flags & MS_SYNCHRONOUS) sync_dirty_buffer(bitmap_bh); @@ -605,8 +632,9 @@ repeat: inode->i_generation = EXT2_SB(sb)->s_next_generation++; insert_inode_hash(inode); - if(DQUOT_ALLOC_INODE(inode)) { + if (DQUOT_ALLOC_INODE(inode)) { DQUOT_DROP(inode); + err = -ENOSPC; goto fail3; } err = ext2_init_acl(inode, dir); @@ -631,21 +659,6 @@ fail: make_bad_inode(inode); iput(inode); return ERR_PTR(err); - -bad_count: - brelse(bitmap_bh); - ext2_error (sb, "ext2_new_inode", - "Free inodes count corrupted in group %d", - group); - /* Is it really ENOSPC? */ - err = -ENOSPC; - if (sb->s_flags & MS_RDONLY) - goto fail; - - desc = ext2_get_group_desc (sb, group, &bh2); - desc->bg_free_inodes_count = 0; - mark_buffer_dirty(bh2); - goto repeat; } unsigned long ext2_count_free_inodes (struct super_block * sb) -- cgit v1.2.3 From 610a61e01f5fd6e1b9e8d8e2f4905d24c7875aa5 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 2 Jul 2003 08:49:50 -0700 Subject: [PATCH] fix double mmdrop() on exec path If load_elf_binary() (and the other binary handlers) fail after flush_old_exec() (for example, in setup_arg_pages()) then do_execve() will go through and do mmdrop(bprm.mm). But bprm.mm is now current->mm. We've just freed the current process's mm. The kernel dies in a most ghastly manner. Fix that up by nulling out bprm.mm in flush_old_exec(), at the point where we consumed the mm. Handle the null pointer in the do_execve() error path. Also: don't open-code free_arg_pages() in do_execve(): call it instead. --- fs/exec.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index f91b25952248..ef73cbeff536 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -441,9 +441,9 @@ static inline void free_arg_pages(struct linux_binprm *bprm) { int i; - for (i = 0 ; i < MAX_ARG_PAGES ; i++) { + for (i = 0; i < MAX_ARG_PAGES; i++) { if (bprm->page[i]) - __free_page(bprm->page[i]); + __free_page(bprm->page[i]); bprm->page[i] = NULL; } } @@ -772,6 +772,8 @@ int flush_old_exec(struct linux_binprm * bprm) if (retval) goto out; + bprm->mm = NULL; /* We're using it now */ + /* This is the point of no return */ current->sas_ss_sp = current->sas_ss_size = 0; @@ -999,7 +1001,7 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) } read_lock(&binfmt_lock); put_binfmt(fmt); - if (retval != -ENOEXEC) + if (retval != -ENOEXEC || bprm->mm == NULL) break; if (!bprm->file) { read_unlock(&binfmt_lock); @@ -1007,7 +1009,7 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) } } read_unlock(&binfmt_lock); - if (retval != -ENOEXEC) { + if (retval != -ENOEXEC || bprm->mm == NULL) { break; #ifdef CONFIG_KMOD }else{ @@ -1035,7 +1037,6 @@ int do_execve(char * filename, struct linux_binprm bprm; struct file *file; int retval; - int i; sched_balance_exec(); @@ -1103,17 +1104,14 @@ int do_execve(char * filename, out: /* Something went wrong, return the inode and free the argument pages*/ - for (i = 0 ; i < MAX_ARG_PAGES ; i++) { - struct page * page = bprm.page[i]; - if (page) - __free_page(page); - } + free_arg_pages(&bprm); if (bprm.security) security_bprm_free(&bprm); out_mm: - mmdrop(bprm.mm); + if (bprm.mm) + mmdrop(bprm.mm); out_file: if (bprm.file) { -- cgit v1.2.3 From 90153a16f179e04f2f58c9e4e428b669f2282178 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 2 Jul 2003 08:50:04 -0700 Subject: [PATCH] ext3: fix journal_release_buffer() race CPU0 CPU1 journal_get_write_access(bh) (Add buffer to t_reserved_list) journal_get_write_access(bh) (It's already on t_reserved_list: nothing to do) (We decide we don't want to journal the buffer after all) journal_release_buffer() (It gets pulled off the transaction) journal_dirty_metadata() (The buffer isn't on the reserved list! The kernel explodes) Simple fix: just leave the buffer on t_reserved_list in journal_release_buffer(). If nobody ends up claiming the buffer then it will get thrown away at start of transaction commit. --- fs/jbd/commit.c | 21 +++++++++++++++++---- fs/jbd/transaction.c | 35 +++++++++++------------------------ 2 files changed, 28 insertions(+), 28 deletions(-) diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 54f7862a3717..2580162cad52 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c @@ -169,10 +169,23 @@ void journal_commit_transaction(journal_t *journal) * that multiple journal_get_write_access() calls to the same * buffer are perfectly permissable. */ - while (commit_transaction->t_reserved_list) { - jh = commit_transaction->t_reserved_list; - JBUFFER_TRACE(jh, "reserved, unused: refile"); - journal_refile_buffer(journal, jh); + { + int nr = 0; + while (commit_transaction->t_reserved_list) { + jh = commit_transaction->t_reserved_list; + JBUFFER_TRACE(jh, "reserved, unused: refile"); + journal_refile_buffer(journal, jh); + nr++; + } + if (nr) { + static int noisy; + + if (noisy < 10) { + noisy++; + printk("%s: freed %d reserved buffers\n", + __FUNCTION__, nr); + } + } } /* diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index 54e16b97fdaa..12ad174e7662 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c @@ -1168,37 +1168,24 @@ out: * journal_release_buffer: undo a get_write_access without any buffer * updates, if the update decided in the end that it didn't need access. * - * journal_get_write_access() can block, so it is quite possible for a - * journaling component to decide after the write access is returned - * that global state has changed and the update is no longer required. - * * The caller passes in the number of credits which should be put back for * this buffer (zero or one). + * + * We leave the buffer attached to t_reserved_list because even though this + * handle doesn't want it, some other concurrent handle may want to journal + * this buffer. If that handle is curently in between get_write_access() and + * journal_dirty_metadata() then it expects the buffer to be reserved. If + * we were to rip it off t_reserved_list here, the other handle will explode + * when journal_dirty_metadata is presented with a non-reserved buffer. + * + * If nobody really wants to journal this buffer then it will be thrown + * away at the start of commit. */ void journal_release_buffer(handle_t *handle, struct buffer_head *bh, int credits) { - transaction_t *transaction = handle->h_transaction; - journal_t *journal = transaction->t_journal; - struct journal_head *jh = bh2jh(bh); - - JBUFFER_TRACE(jh, "entry"); - - /* If the buffer is reserved but not modified by this - * transaction, then it is safe to release it. In all other - * cases, just leave the buffer as it is. */ - - jbd_lock_bh_state(bh); - spin_lock(&journal->j_list_lock); - if (jh->b_jlist == BJ_Reserved && jh->b_transaction == transaction && - !buffer_jbddirty(jh2bh(jh))) { - JBUFFER_TRACE(jh, "unused: refiling it"); - __journal_refile_buffer(jh); - } - spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(bh); + BUFFER_TRACE(bh, "entry"); handle->h_buffer_credits += credits; - JBUFFER_TRACE(jh, "exit"); } /** -- cgit v1.2.3 From e46e0cf298c7e658b6e57d97ddffdaeb1a5b5b8c Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 2 Jul 2003 08:50:11 -0700 Subject: [PATCH] Set limits on CONFIG_LOG_BUF_SHIFT From: bert hubert Attached patch adds a range check to LOG_BUF_SHIFT and clarifies the configuration somewhat. I managed to build a non-booting kernel because I thought 64 was a nice power of two, which lead to the kernel blocking when it tried to actually use or allocate a 2^64 buffer. --- init/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/init/Kconfig b/init/Kconfig index d3a9874335aa..ab6212b48202 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -93,7 +93,8 @@ config SYSCTL limited in memory. config LOG_BUF_SHIFT - int "Kernel log buffer size" if DEBUG_KERNEL + int "Kernel log buffer size (16 => 64KB, 17 => 128KB)" if DEBUG_KERNEL + range 12 20 default 17 if ARCH_S390 default 16 if X86_NUMAQ || IA64 default 15 if SMP -- cgit v1.2.3 From 403d24bf1c2805892ed9280fd51c525e40c43737 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 2 Jul 2003 08:50:19 -0700 Subject: [PATCH] Fix cciss hang From: Jens Axboe It fixes a hang when performing large I/O's. Has been tested and acked by the maintainer, "Wiran, Francis" . --- drivers/block/cciss.c | 5 +++-- drivers/block/ll_rw_blk.c | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 2933c96a6142..03165688cccd 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -1887,7 +1887,7 @@ queue: BUG(); if (( c = cmd_alloc(h, 1)) == NULL) - goto startio; + goto full; blkdev_dequeue_request(creq); @@ -1960,8 +1960,9 @@ queue: h->maxQsinceinit = h->Qdepth; goto queue; -startio: +full: blk_stop_queue(q); +startio: start_io(h); } diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c index 6287b0064d9b..0f11567e5277 100644 --- a/drivers/block/ll_rw_blk.c +++ b/drivers/block/ll_rw_blk.c @@ -1072,8 +1072,8 @@ static void blk_unplug_timeout(unsigned long data) **/ void blk_start_queue(request_queue_t *q) { - if (test_and_clear_bit(QUEUE_FLAG_STOPPED, &q->queue_flags)) - schedule_work(&q->unplug_work); + clear_bit(QUEUE_FLAG_STOPPED, &q->queue_flags); + schedule_work(&q->unplug_work); } /** -- cgit v1.2.3 From b9d6ea3caf701a205b626016713f0fe3079afc09 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 2 Jul 2003 08:50:27 -0700 Subject: [PATCH] e100 use-after-free fix I though Scott had recently merged this but it seems not. We'll be needing this patch if you merge Manfred's page unmapping debug patch. --- drivers/net/e100/e100_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/e100/e100_main.c b/drivers/net/e100/e100_main.c index 9d8d7a1e85aa..d102b207be15 100644 --- a/drivers/net/e100/e100_main.c +++ b/drivers/net/e100/e100_main.c @@ -1085,10 +1085,10 @@ e100_xmit_frame(struct sk_buff *skb, struct net_device *dev) goto exit1; } - e100_prepare_xmit_buff(bdp, skb); - bdp->drv_stats.net_stats.tx_bytes += skb->len; + e100_prepare_xmit_buff(bdp, skb); + dev->trans_start = jiffies; exit1: -- cgit v1.2.3 From 26e48e571aba7b6cba0cebb41d832949137b5fd5 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 2 Jul 2003 10:32:49 -0700 Subject: [PATCH] Remove cpu arg from cpu_raise_irq The function cpu_raise_softirq() takes a softirq number, and a cpu number, but cannot be used with cpu != smp_processor_id(), because there's no locking around the pending softirq lists. Since noone does this, remove that arg. As per Linus' suggestion, names changed: raise_softirq(int nr) cpu_raise_softirq(int cpu, int nr) -> raise_softirq_irqoff(int nr) __cpu_raise_softirq(int cpu, int nr) -> __raise_softirq_irqoff(int nr) --- drivers/scsi/scsi.c | 2 +- include/linux/interrupt.h | 4 ++-- include/linux/netdevice.h | 8 ++++---- kernel/ksyms.c | 2 +- kernel/softirq.c | 14 +++++++------- net/core/dev.c | 2 +- 6 files changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index af35f91fe3b2..633c9a028e29 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -582,7 +582,7 @@ void scsi_done(struct scsi_cmnd *cmd) local_irq_save(flags); cpu = smp_processor_id(); list_add_tail(&cmd->eh_entry, &done_q[cpu]); - cpu_raise_softirq(cpu, SCSI_SOFTIRQ); + raise_softirq_irqoff(SCSI_SOFTIRQ); local_irq_restore(flags); } diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index da2eaeb18118..21e48723b386 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -94,8 +94,8 @@ struct softirq_action asmlinkage void do_softirq(void); extern void open_softirq(int nr, void (*action)(struct softirq_action*), void *data); extern void softirq_init(void); -#define __cpu_raise_softirq(cpu, nr) do { softirq_pending(cpu) |= 1UL << (nr); } while (0) -extern void FASTCALL(cpu_raise_softirq(unsigned int cpu, unsigned int nr)); +#define __raise_softirq_irqoff(nr) do { local_softirq_pending() |= 1UL << (nr); } while (0) +extern void FASTCALL(raise_softirq_irqoff(unsigned int nr)); extern void FASTCALL(raise_softirq(unsigned int nr)); #ifndef invoke_softirq diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d79375c33273..3aef822b4493 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -561,7 +561,7 @@ static inline void __netif_schedule(struct net_device *dev) cpu = smp_processor_id(); dev->next_sched = softnet_data[cpu].output_queue; softnet_data[cpu].output_queue = dev; - cpu_raise_softirq(cpu, NET_TX_SOFTIRQ); + raise_softirq_irqoff(NET_TX_SOFTIRQ); local_irq_restore(flags); } } @@ -612,7 +612,7 @@ static inline void dev_kfree_skb_irq(struct sk_buff *skb) cpu = smp_processor_id(); skb->next = softnet_data[cpu].completion_queue; softnet_data[cpu].completion_queue = skb; - cpu_raise_softirq(cpu, NET_TX_SOFTIRQ); + raise_softirq_irqoff(NET_TX_SOFTIRQ); local_irq_restore(flags); } } @@ -779,7 +779,7 @@ static inline void __netif_rx_schedule(struct net_device *dev) dev->quota += dev->weight; else dev->quota = dev->weight; - __cpu_raise_softirq(cpu, NET_RX_SOFTIRQ); + __raise_softirq_irqoff(NET_RX_SOFTIRQ); local_irq_restore(flags); } @@ -805,7 +805,7 @@ static inline int netif_rx_reschedule(struct net_device *dev, int undo) local_irq_save(flags); cpu = smp_processor_id(); list_add_tail(&dev->poll_list, &softnet_data[cpu].poll_list); - __cpu_raise_softirq(cpu, NET_RX_SOFTIRQ); + __raise_softirq_irqoff(NET_RX_SOFTIRQ); local_irq_restore(flags); return 1; } diff --git a/kernel/ksyms.c b/kernel/ksyms.c index 816627adc50a..66ea4b6b4d84 100644 --- a/kernel/ksyms.c +++ b/kernel/ksyms.c @@ -587,7 +587,7 @@ EXPORT_SYMBOL(tasklet_kill); EXPORT_SYMBOL(do_softirq); EXPORT_SYMBOL(raise_softirq); EXPORT_SYMBOL(open_softirq); -EXPORT_SYMBOL(cpu_raise_softirq); +EXPORT_SYMBOL(raise_softirq_irqoff); EXPORT_SYMBOL(__tasklet_schedule); EXPORT_SYMBOL(__tasklet_hi_schedule); diff --git a/kernel/softirq.c b/kernel/softirq.c index 20bf233a14c3..e581740a6e26 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -117,9 +117,9 @@ EXPORT_SYMBOL(local_bh_enable); /* * This function must run with irqs disabled! */ -inline void cpu_raise_softirq(unsigned int cpu, unsigned int nr) +inline void raise_softirq_irqoff(unsigned int nr) { - __cpu_raise_softirq(cpu, nr); + __raise_softirq_irqoff(nr); /* * If we're in an interrupt or softirq, we're done @@ -139,7 +139,7 @@ void raise_softirq(unsigned int nr) unsigned long flags; local_irq_save(flags); - cpu_raise_softirq(smp_processor_id(), nr); + raise_softirq_irqoff(nr); local_irq_restore(flags); } @@ -168,7 +168,7 @@ void __tasklet_schedule(struct tasklet_struct *t) local_irq_save(flags); t->next = __get_cpu_var(tasklet_vec).list; __get_cpu_var(tasklet_vec).list = t; - cpu_raise_softirq(smp_processor_id(), TASKLET_SOFTIRQ); + raise_softirq_irqoff(TASKLET_SOFTIRQ); local_irq_restore(flags); } @@ -179,7 +179,7 @@ void __tasklet_hi_schedule(struct tasklet_struct *t) local_irq_save(flags); t->next = __get_cpu_var(tasklet_hi_vec).list; __get_cpu_var(tasklet_hi_vec).list = t; - cpu_raise_softirq(smp_processor_id(), HI_SOFTIRQ); + raise_softirq_irqoff(HI_SOFTIRQ); local_irq_restore(flags); } @@ -211,7 +211,7 @@ static void tasklet_action(struct softirq_action *a) local_irq_disable(); t->next = __get_cpu_var(tasklet_vec).list; __get_cpu_var(tasklet_vec).list = t; - __cpu_raise_softirq(smp_processor_id(), TASKLET_SOFTIRQ); + __raise_softirq_irqoff(TASKLET_SOFTIRQ); local_irq_enable(); } } @@ -244,7 +244,7 @@ static void tasklet_hi_action(struct softirq_action *a) local_irq_disable(); t->next = __get_cpu_var(tasklet_hi_vec).list; __get_cpu_var(tasklet_hi_vec).list = t; - __cpu_raise_softirq(smp_processor_id(), HI_SOFTIRQ); + __raise_softirq_irqoff(HI_SOFTIRQ); local_irq_enable(); } } diff --git a/net/core/dev.c b/net/core/dev.c index 5102b235b57c..0605391589ad 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1712,7 +1712,7 @@ out: softnet_break: netdev_rx_stat[this_cpu].time_squeeze++; - __cpu_raise_softirq(this_cpu, NET_RX_SOFTIRQ); + __raise_softirq_irqoff(NET_RX_SOFTIRQ); goto out; } -- cgit v1.2.3 From 6f9199b58b53c44e340ca15bfb332769493af72c Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 2 Jul 2003 10:32:57 -0700 Subject: [PATCH] Per-cpu variable in mm/slab.c Rather trivial conversion. Tested on SMP. --- mm/slab.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/slab.c b/mm/slab.c index e05fcba90af2..bee3dfdf8619 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -475,7 +475,7 @@ enum { FULL } g_cpucache_up; -static struct timer_list reap_timers[NR_CPUS]; +static DEFINE_PER_CPU(struct timer_list, reap_timers); static void reap_timer_fnc(unsigned long data); @@ -525,7 +525,7 @@ static void __slab_error(const char *function, kmem_cache_t *cachep, char *msg) */ static void start_cpu_timer(int cpu) { - struct timer_list *rt = &reap_timers[cpu]; + struct timer_list *rt = &per_cpu(reap_timers, cpu); if (rt->function == NULL) { init_timer(rt); @@ -2457,7 +2457,7 @@ next: static void reap_timer_fnc(unsigned long data) { int cpu = smp_processor_id(); - struct timer_list *rt = &reap_timers[cpu]; + struct timer_list *rt = &__get_cpu_var(reap_timers); cache_reap(); mod_timer(rt, jiffies + REAPTIMEOUT_CPUC + cpu); -- cgit v1.2.3 From 7b957b78f962cf3e844b7ddf8d740cb21dd276b9 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 2 Jul 2003 10:38:21 -0700 Subject: [PATCH] Remove unused __syscall_count Noone seems to use __syscall_count. Remove the field from i386 irq_cpustat_t struct, and the generic accessor macros. Because some archs have hardcoded asm references to offsets in this structure, I haven't touched non-x86, but doing so is usually trivial. --- include/asm-i386/hardirq.h | 1 - include/linux/irq_cpustat.h | 2 -- 2 files changed, 3 deletions(-) diff --git a/include/asm-i386/hardirq.h b/include/asm-i386/hardirq.h index e8b9149f0b29..a711a1890d97 100644 --- a/include/asm-i386/hardirq.h +++ b/include/asm-i386/hardirq.h @@ -7,7 +7,6 @@ typedef struct { unsigned int __softirq_pending; - unsigned int __syscall_count; struct task_struct * __ksoftirqd_task; /* waitqueue is too large */ unsigned long idle_timestamp; unsigned int __nmi_count; /* arch dependent */ diff --git a/include/linux/irq_cpustat.h b/include/linux/irq_cpustat.h index 3f49c2ba63ed..641e7964a0d7 100644 --- a/include/linux/irq_cpustat.h +++ b/include/linux/irq_cpustat.h @@ -29,8 +29,6 @@ extern irq_cpustat_t irq_stat[]; /* defined in asm/hardirq.h */ /* arch independent irq_stat fields */ #define softirq_pending(cpu) __IRQ_STAT((cpu), __softirq_pending) #define local_softirq_pending() softirq_pending(smp_processor_id()) -#define syscall_count(cpu) __IRQ_STAT((cpu), __syscall_count) -#define local_syscall_count() syscall_count(smp_processor_id()) #define ksoftirqd_task(cpu) __IRQ_STAT((cpu), __ksoftirqd_task) #define local_ksoftirqd_task() ksoftirqd_task(smp_processor_id()) -- cgit v1.2.3 From 3ac57d3424bca0406b5349f5187f5e3d84f64013 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 2 Jul 2003 10:38:29 -0700 Subject: [PATCH] Make ksoftirqd a normal per-cpu variable. This moves the ksoftirqd pointers out of the irq_stat struct, and uses a normal per-cpu variable. It's not that time critical, nor referenced in assembler. This moves us closer to making irq_stat a per-cpu variable. Because some archs have hardcoded asm references to offsets in this structure, I haven't touched non-x86. The __ksoftirqd_task field is unused in other archs, too. --- include/asm-i386/hardirq.h | 1 - include/linux/irq_cpustat.h | 2 -- kernel/softirq.c | 16 ++++++++++------ 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/include/asm-i386/hardirq.h b/include/asm-i386/hardirq.h index a711a1890d97..5a14545af179 100644 --- a/include/asm-i386/hardirq.h +++ b/include/asm-i386/hardirq.h @@ -7,7 +7,6 @@ typedef struct { unsigned int __softirq_pending; - struct task_struct * __ksoftirqd_task; /* waitqueue is too large */ unsigned long idle_timestamp; unsigned int __nmi_count; /* arch dependent */ unsigned int apic_timer_irqs; /* arch dependent */ diff --git a/include/linux/irq_cpustat.h b/include/linux/irq_cpustat.h index 641e7964a0d7..03b3e17de805 100644 --- a/include/linux/irq_cpustat.h +++ b/include/linux/irq_cpustat.h @@ -29,8 +29,6 @@ extern irq_cpustat_t irq_stat[]; /* defined in asm/hardirq.h */ /* arch independent irq_stat fields */ #define softirq_pending(cpu) __IRQ_STAT((cpu), __softirq_pending) #define local_softirq_pending() softirq_pending(smp_processor_id()) -#define ksoftirqd_task(cpu) __IRQ_STAT((cpu), __ksoftirqd_task) -#define local_ksoftirqd_task() ksoftirqd_task(smp_processor_id()) /* arch dependent irq_stat fields */ #define nmi_count(cpu) __IRQ_STAT((cpu), __nmi_count) /* i386 */ diff --git a/kernel/softirq.c b/kernel/softirq.c index e581740a6e26..96294a3d673f 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -14,6 +14,7 @@ #include #include #include +#include #include /* @@ -41,15 +42,18 @@ EXPORT_SYMBOL(irq_stat); static struct softirq_action softirq_vec[32] __cacheline_aligned_in_smp; +static DEFINE_PER_CPU(struct task_struct *, ksoftirqd); + /* * we cannot loop indefinitely here to avoid userspace starvation, * but we also don't want to introduce a worst case 1/HZ latency * to the pending events, so lets the scheduler to balance * the softirq load for us. */ -static inline void wakeup_softirqd(unsigned cpu) +static inline void wakeup_softirqd(void) { - struct task_struct * tsk = ksoftirqd_task(cpu); + /* Interrupts are disabled: no need to stop preemption */ + struct task_struct *tsk = __get_cpu_var(ksoftirqd); if (tsk && tsk->state != TASK_RUNNING) wake_up_process(tsk); @@ -96,7 +100,7 @@ restart: goto restart; } if (pending) - wakeup_softirqd(smp_processor_id()); + wakeup_softirqd(); __local_bh_enable(); } @@ -131,7 +135,7 @@ inline void raise_softirq_irqoff(unsigned int nr) * schedule the softirq soon. */ if (!in_interrupt()) - wakeup_softirqd(cpu); + wakeup_softirqd(); } void raise_softirq(unsigned int nr) @@ -325,7 +329,7 @@ static int ksoftirqd(void * __bind_cpu) __set_current_state(TASK_INTERRUPTIBLE); mb(); - local_ksoftirqd_task() = current; + __get_cpu_var(ksoftirqd) = current; for (;;) { if (!local_softirq_pending()) @@ -354,7 +358,7 @@ static int __devinit cpu_callback(struct notifier_block *nfb, return NOTIFY_BAD; } - while (!ksoftirqd_task(hotcpu)) + while (!per_cpu(ksoftirqd, hotcpu)) yield(); } return NOTIFY_OK; -- cgit v1.2.3 From bf948e74a8a11c84ff04ba75fb395545bb89bcaf Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 2 Jul 2003 10:39:31 -0700 Subject: The sbp2 driver needs , but didn't include it. It apparently used to work due to some random magic indirect include, but broke lately. Do the obvious fix. --- drivers/ieee1394/sbp2.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/ieee1394/sbp2.c b/drivers/ieee1394/sbp2.c index 3fd94842cfb9..e93e9e3fece9 100644 --- a/drivers/ieee1394/sbp2.c +++ b/drivers/ieee1394/sbp2.c @@ -56,6 +56,8 @@ #include #include #include +#include + #include #include #include -- cgit v1.2.3 From 5b34c381fc70278c6c2d141e38fbd44fec55f04b Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Thu, 3 Jul 2003 01:50:39 -0700 Subject: [PATCH] PCI: Improve documentation Fix some grammar problems Add a note about Fast Back to Back support Change the slot_name recommendation to pci_name(). --- Documentation/pci.txt | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/Documentation/pci.txt b/Documentation/pci.txt index 15c52173d047..d30dc107ac62 100644 --- a/Documentation/pci.txt +++ b/Documentation/pci.txt @@ -7,14 +7,14 @@ The world of PCI is vast and it's full of (mostly unpleasant) surprises. Different PCI devices have different requirements and different bugs -- because of this, the PCI support layer in Linux kernel is not as trivial as one would wish. This short pamphlet tries to help all potential driver -authors to find their way through the deep forests of PCI handling. +authors find their way through the deep forests of PCI handling. 0. Structure of PCI drivers ~~~~~~~~~~~~~~~~~~~~~~~~~~~ There exist two kinds of PCI drivers: new-style ones (which leave most of probing for devices to the PCI layer and support online insertion and removal -of devices [thus supporting PCI, hot-pluggable PCI and CardBus in single +of devices [thus supporting PCI, hot-pluggable PCI and CardBus in a single driver]) and old-style ones which just do all the probing themselves. Unless you have a very good reason to do so, please don't use the old way of probing in any new code. After the driver finds the devices it wishes to operate @@ -174,7 +174,7 @@ which enables the bus master bit in PCI_COMMAND register and also fixes the latency timer value if it's set to something bogus by the BIOS. If you want to use the PCI Memory-Write-Invalidate transaction, -call pci_set_mwi(). This enables bit PCI_COMMAND bit for Mem-Wr-Inval +call pci_set_mwi(). This enables the PCI_COMMAND bit for Mem-Wr-Inval and also ensures that the cache line size register is set correctly. Make sure to check the return value of pci_set_mwi(), not all architectures may support Memory-Write-Invalidate. @@ -236,7 +236,7 @@ pci_clear_mwi() Disable Memory-Write-Invalidate transactions. 7. Miscellaneous hints ~~~~~~~~~~~~~~~~~~~~~~ When displaying PCI slot names to the user (for example when a driver wants -to tell the user what card has it found), please use pci_dev->slot_name +to tell the user what card has it found), please use pci_name(pci_dev) for this purpose. Always refer to the PCI devices by a pointer to the pci_dev structure. @@ -248,6 +248,10 @@ can be pretty complex. If you're going to use PCI bus mastering DMA, take a look at Documentation/DMA-mapping.txt. +Don't try to turn on Fast Back to Back writes in your driver. All devices +on the bus need to be capable of doing it, so this is something which needs +to be handled by platform and generic code, not individual drivers. + 8. Obsolete functions ~~~~~~~~~~~~~~~~~~~~~ -- cgit v1.2.3 From 235d9018e3b26211418db29bb0bc32d3826bc74a Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Thu, 3 Jul 2003 01:50:59 -0700 Subject: [PATCH] PCI: arch/i386/pci/direct.c can use __init, not __devinit pci_sanity_check() is only called from functions marked __init, so it can be __init too. --- arch/i386/pci/direct.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/i386/pci/direct.c b/arch/i386/pci/direct.c index 3a8be667ddfd..b99be74dc91b 100644 --- a/arch/i386/pci/direct.c +++ b/arch/i386/pci/direct.c @@ -177,7 +177,7 @@ static struct pci_raw_ops pci_direct_conf2 = { * This should be close to trivial, but it isn't, because there are buggy * chipsets (yes, you guessed it, by Intel and Compaq) that have no class ID. */ -static int __devinit pci_sanity_check(struct pci_raw_ops *o) +static int __init pci_sanity_check(struct pci_raw_ops *o) { u32 x = 0; int devfn; -- cgit v1.2.3 From f9cc1da5f29f35a0ebb69124092df437b4ab41fe Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Thu, 3 Jul 2003 01:51:15 -0700 Subject: [PATCH] PCI: pci_find_bus needs a domain Give pci_find_bus a domain argument and move its declaration to --- drivers/pci/hotplug/acpiphp_glue.c | 2 +- drivers/pci/hotplug/cpci_hotplug_pci.c | 2 +- drivers/pci/hotplug/ibmphp_core.c | 6 +++--- drivers/pci/pci.h | 1 - drivers/pci/search.c | 18 ++++++++++-------- include/linux/pci.h | 1 + 6 files changed, 16 insertions(+), 14 deletions(-) diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c index 3af6ad4adbe7..4e8ddf184341 100644 --- a/drivers/pci/hotplug/acpiphp_glue.c +++ b/drivers/pci/hotplug/acpiphp_glue.c @@ -385,7 +385,7 @@ static void add_host_bridge (acpi_handle *handle, int seg, int bus) bridge->seg = seg; bridge->bus = bus; - bridge->pci_bus = pci_find_bus(bus); + bridge->pci_bus = pci_find_bus(seg, bus); bridge->res_lock = SPIN_LOCK_UNLOCKED; diff --git a/drivers/pci/hotplug/cpci_hotplug_pci.c b/drivers/pci/hotplug/cpci_hotplug_pci.c index 8ffe245a1ca2..88bc69c50539 100644 --- a/drivers/pci/hotplug/cpci_hotplug_pci.c +++ b/drivers/pci/hotplug/cpci_hotplug_pci.c @@ -395,7 +395,7 @@ static int cpci_configure_bridge(struct pci_bus* bus, struct pci_dev* dev) /* Scan behind bridge */ n = pci_scan_bridge(bus, dev, max, 2); - child = pci_find_bus(max + 1); + child = pci_find_bus(0, max + 1); if (!child) return -ENODEV; pci_proc_attach_bus(child); diff --git a/drivers/pci/hotplug/ibmphp_core.c b/drivers/pci/hotplug/ibmphp_core.c index 1e44444e6287..1f0fa666cf8b 100644 --- a/drivers/pci/hotplug/ibmphp_core.c +++ b/drivers/pci/hotplug/ibmphp_core.c @@ -774,7 +774,7 @@ static u8 bus_structure_fixup (u8 busno) struct pci_dev *dev; u16 l; - if (pci_find_bus(busno) || !(ibmphp_find_same_bus_num (busno))) + if (pci_find_bus(0, busno) || !(ibmphp_find_same_bus_num (busno))) return 1; bus = kmalloc (sizeof (*bus), GFP_KERNEL); @@ -819,7 +819,7 @@ static int ibm_configure_device (struct pci_func *func) func->dev = pci_find_slot (func->busno, PCI_DEVFN(func->device, func->function)); if (func->dev == NULL) { - struct pci_bus *bus = pci_find_bus(func->busno); + struct pci_bus *bus = pci_find_bus(0, func->busno); if (!bus) return 0; @@ -1335,7 +1335,7 @@ static int __init ibmphp_init (void) goto exit; } - bus = pci_find_bus(0); + bus = pci_find_bus(0, 0); if (!bus) { err ("Can't find the root pci bus, can not continue\n"); rc = -ENODEV; diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 3288e401d914..2ad19d3f928e 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -29,7 +29,6 @@ extern int pci_remove_device_safe(struct pci_dev *dev); extern unsigned char pci_max_busnr(void); extern unsigned char pci_bus_max_busnr(struct pci_bus *bus); extern int pci_bus_find_capability (struct pci_bus *bus, unsigned int devfn, int cap); -extern struct pci_bus *pci_find_bus(unsigned char busnr); struct pci_dev_wrapped { struct pci_dev *dev; diff --git a/drivers/pci/search.c b/drivers/pci/search.c index 4793caaa4989..15c687f74343 100644 --- a/drivers/pci/search.c +++ b/drivers/pci/search.c @@ -31,22 +31,24 @@ pci_do_find_bus(struct pci_bus* bus, unsigned char busnr) } /** - * pci_find_bus - locate PCI bus from a given bus number + * pci_find_bus - locate PCI bus from a given domain and bus number + * @domain: number of PCI domain to search * @busnr: number of desired PCI bus * - * Given a PCI bus number, the desired PCI bus is located in system - * global list of PCI buses. If the bus is found, a pointer to its + * Given a PCI bus number and domain number, the desired PCI bus is located + * in the global list of PCI buses. If the bus is found, a pointer to its * data structure is returned. If no bus is found, %NULL is returned. */ -struct pci_bus * -pci_find_bus(unsigned char busnr) +struct pci_bus * pci_find_bus(int domain, int busnr) { - struct pci_bus* bus = NULL; - struct pci_bus* tmp_bus; + struct pci_bus *bus = NULL; + struct pci_bus *tmp_bus; while ((bus = pci_find_next_bus(bus)) != NULL) { + if (pci_domain_nr(bus) != domain) + continue; tmp_bus = pci_do_find_bus(bus, busnr); - if(tmp_bus) + if (tmp_bus) return tmp_bus; } return NULL; diff --git a/include/linux/pci.h b/include/linux/pci.h index 3ceb5d7da821..72f08971a232 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -543,6 +543,7 @@ void pcibios_update_irq(struct pci_dev *, int irq); /* Generic PCI functions used internally */ +extern struct pci_bus *pci_find_bus(int domain, int busnr); int pci_bus_exists(const struct list_head *list, int nr); struct pci_bus *pci_scan_bus_parented(struct device *parent, int bus, struct pci_ops *ops, void *sysdata); static inline struct pci_bus *pci_scan_bus(int bus, struct pci_ops *ops, void *sysdata) -- cgit v1.2.3 From fed2058e09d7ddf242079e0dd409fc25e5f428c0 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Thu, 3 Jul 2003 01:51:30 -0700 Subject: [PATCH] PCI: Remove pci_bus_exists Convert all callers of pci_bus_exists() to call pci_find_bus() instead. Since all callers of pci_find_bus() are __init or __devinit, mark it as __devinit too. --- arch/i386/pci/legacy.c | 2 +- arch/sh/kernel/cpu/sh4/pci-sh7751.c | 2 +- drivers/pci/probe.c | 13 +------------ drivers/pci/search.c | 5 +++-- include/linux/pci.h | 1 - 5 files changed, 6 insertions(+), 17 deletions(-) diff --git a/arch/i386/pci/legacy.c b/arch/i386/pci/legacy.c index 29fea7d6ad6c..71ca3133cdeb 100644 --- a/arch/i386/pci/legacy.c +++ b/arch/i386/pci/legacy.c @@ -28,7 +28,7 @@ static void __devinit pcibios_fixup_peer_bridges(void) } for (n=0; n <= pcibios_last_bus; n++) { - if (pci_bus_exists(&pci_root_buses, n)) + if (pci_find_bus(0, n)) continue; bus->number = n; bus->ops = &pci_root_ops; diff --git a/arch/sh/kernel/cpu/sh4/pci-sh7751.c b/arch/sh/kernel/cpu/sh4/pci-sh7751.c index 365c71a4fbe0..0831b1c646ac 100644 --- a/arch/sh/kernel/cpu/sh4/pci-sh7751.c +++ b/arch/sh/kernel/cpu/sh4/pci-sh7751.c @@ -200,7 +200,7 @@ static void __init pcibios_fixup_peer_bridges(void) return; PCIDBG(2,"PCI: Peer bridge fixup\n"); for (n=0; n <= pcibios_last_bus; n++) { - if (pci_bus_exists(&pci_root_buses, n)) + if (pci_find_bus(0, n)) continue; bus.number = n; bus.ops = pci_root_ops; diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 0894f4aed331..af83b3936f6f 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -633,22 +633,11 @@ unsigned int __devinit pci_do_scan_bus(struct pci_bus *bus) return max; } -int __devinit pci_bus_exists(const struct list_head *list, int nr) -{ - const struct pci_bus *b; - - list_for_each_entry(b, list, node) { - if (b->number == nr || pci_bus_exists(&b->children, nr)) - return 1; - } - return 0; -} - struct pci_bus * __devinit pci_scan_bus_parented(struct device *parent, int bus, struct pci_ops *ops, void *sysdata) { struct pci_bus *b; - if (pci_bus_exists(&pci_root_buses, bus)) { + if (pci_find_bus(0, bus)) { /* If we already got to this bus through a different bridge, ignore it */ DBG("PCI: Bus %02x already known\n", bus); return NULL; diff --git a/drivers/pci/search.c b/drivers/pci/search.c index 15c687f74343..366f1f16fb2f 100644 --- a/drivers/pci/search.c +++ b/drivers/pci/search.c @@ -7,13 +7,14 @@ * Copyright 2003 -- Greg Kroah-Hartman */ +#include #include #include #include spinlock_t pci_bus_lock = SPIN_LOCK_UNLOCKED; -static struct pci_bus * +static struct pci_bus * __devinit pci_do_find_bus(struct pci_bus* bus, unsigned char busnr) { struct pci_bus* child; @@ -39,7 +40,7 @@ pci_do_find_bus(struct pci_bus* bus, unsigned char busnr) * in the global list of PCI buses. If the bus is found, a pointer to its * data structure is returned. If no bus is found, %NULL is returned. */ -struct pci_bus * pci_find_bus(int domain, int busnr) +struct pci_bus * __devinit pci_find_bus(int domain, int busnr) { struct pci_bus *bus = NULL; struct pci_bus *tmp_bus; diff --git a/include/linux/pci.h b/include/linux/pci.h index 72f08971a232..a219c58ad88e 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -544,7 +544,6 @@ void pcibios_update_irq(struct pci_dev *, int irq); /* Generic PCI functions used internally */ extern struct pci_bus *pci_find_bus(int domain, int busnr); -int pci_bus_exists(const struct list_head *list, int nr); struct pci_bus *pci_scan_bus_parented(struct device *parent, int bus, struct pci_ops *ops, void *sysdata); static inline struct pci_bus *pci_scan_bus(int bus, struct pci_ops *ops, void *sysdata) { -- cgit v1.2.3 From 8a8cd91cf2d5a16fc6a61c9c64856163e0ca204d Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Thu, 3 Jul 2003 01:51:45 -0700 Subject: [PATCH] PCI: arch/i386/pci/irq.c should use pci_find_bus Use pci_find_bus rather than relying on the return value of pci_scan_bus. --- arch/i386/pci/irq.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/arch/i386/pci/irq.c b/arch/i386/pci/irq.c index 53db10c42fea..2fc6431a43ef 100644 --- a/arch/i386/pci/irq.c +++ b/arch/i386/pci/irq.c @@ -102,13 +102,12 @@ static void __init pirq_peer_trick(void) #endif busmap[e->bus] = 1; } - for(i=1; i<256; i++) - /* - * It might be a secondary bus, but in this case its parent is already - * known (ascending bus order) and therefore pci_scan_bus returns immediately. - */ - if (busmap[i] && pci_scan_bus(i, &pci_root_ops, NULL)) + for(i = 1; i < 256; i++) { + if (!busmap[i] || pci_find_bus(0, i)) + continue; + if (pci_scan_bus(i, &pci_root_ops, NULL)) printk(KERN_INFO "PCI: Discovered primary peer bus %02x [IRQ]\n", i); + } pcibios_last_bus = -1; } -- cgit v1.2.3 From bac11c6fd5987f2ac30c34ef046c7af1a8fdd660 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Thu, 3 Jul 2003 01:51:59 -0700 Subject: [PATCH] PCI: arch/i386/pci/legacy.c: use raw_pci_ops Make pcibios_fixup_peer_bridges() use raw_pci_ops directly instead of faking pci_bus and pci_dev. --- arch/i386/pci/legacy.c | 24 +++++------------------- 1 file changed, 5 insertions(+), 19 deletions(-) diff --git a/arch/i386/pci/legacy.c b/arch/i386/pci/legacy.c index 71ca3133cdeb..c72bf1a59b7e 100644 --- a/arch/i386/pci/legacy.c +++ b/arch/i386/pci/legacy.c @@ -11,40 +11,26 @@ */ static void __devinit pcibios_fixup_peer_bridges(void) { - int n; - struct pci_bus *bus; - struct pci_dev *dev; - u16 l; + int n, devfn; if (pcibios_last_bus <= 0 || pcibios_last_bus >= 0xff) return; DBG("PCI: Peer bridge fixup\n"); - bus = kmalloc(sizeof(*bus), GFP_ATOMIC); - dev = kmalloc(sizeof(*dev), GFP_ATOMIC); - if (!bus || !dev) { - printk(KERN_ERR "Out of memory in %s\n", __FUNCTION__); - goto exit; - } - for (n=0; n <= pcibios_last_bus; n++) { + u32 l; if (pci_find_bus(0, n)) continue; - bus->number = n; - bus->ops = &pci_root_ops; - dev->bus = bus; - for (dev->devfn=0; dev->devfn<256; dev->devfn += 8) - if (!pci_read_config_word(dev, PCI_VENDOR_ID, &l) && + for (devfn = 0; devfn < 256; devfn += 8) { + if (!raw_pci_ops->read(0, n, devfn, PCI_VENDOR_ID, 2, &l) && l != 0x0000 && l != 0xffff) { DBG("Found device at %02x:%02x [%04x]\n", n, dev->devfn, l); printk(KERN_INFO "PCI: Discovered peer bus %02x\n", n); pci_scan_bus(n, &pci_root_ops, NULL); break; } + } } -exit: - kfree(dev); - kfree(bus); } static int __init pci_legacy_init(void) -- cgit v1.2.3 From 93482f5c3c3c255ee835cfcfab75d250277f83a4 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Thu, 3 Jul 2003 01:52:14 -0700 Subject: [PATCH] PCI config space in sysfs - Fix a couple of bugs in sysfs's handling of binary files (my fault). - Implement pci config space reads and writes in sysfs --- drivers/pci/pci-sysfs.c | 105 ++++++++++++++++++++++++++++++++++++++++++++++++ fs/sysfs/bin.c | 23 +++++------ 2 files changed, 114 insertions(+), 14 deletions(-) diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index 4e13d7141b78..dc560bf77421 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -3,6 +3,8 @@ * * (C) Copyright 2002 Greg Kroah-Hartman * (C) Copyright 2002 IBM Corp. + * (C) Copyright 2003 Matthew Wilcox + * (C) Copyright 2003 Hewlett-Packard * * File attributes for PCI devices * @@ -60,6 +62,108 @@ pci_show_resources(struct device * dev, char * buf) static DEVICE_ATTR(resource,S_IRUGO,pci_show_resources,NULL); +static ssize_t +pci_read_config(struct kobject *kobj, char *buf, loff_t off, size_t count) +{ + struct pci_dev *dev = to_pci_dev(container_of(kobj,struct device,kobj)); + unsigned int size = 64; + + /* Several chips lock up trying to read undefined config space */ + if (capable(CAP_SYS_ADMIN)) { + size = 256; + } else if (dev->hdr_type == PCI_HEADER_TYPE_CARDBUS) { + size = 128; + } + + if (off > size) + return 0; + if (off + count > size) { + size -= off; + count = size; + } else { + size = count; + } + + while (off & 3) { + unsigned char val; + pci_read_config_byte(dev, off, &val); + buf[off] = val; + off++; + if (--size == 0) + break; + } + + while (size > 3) { + unsigned int val; + pci_read_config_dword(dev, off, &val); + buf[off] = val & 0xff; + buf[off + 1] = (val >> 8) & 0xff; + buf[off + 2] = (val >> 16) & 0xff; + buf[off + 3] = (val >> 24) & 0xff; + off += 4; + size -= 4; + } + + while (size > 0) { + unsigned char val; + pci_read_config_byte(dev, off, &val); + buf[off] = val; + off++; + --size; + } + + return count; +} + +static ssize_t +pci_write_config(struct kobject *kobj, char *buf, loff_t off, size_t count) +{ + struct pci_dev *dev = to_pci_dev(container_of(kobj,struct device,kobj)); + unsigned int size = count; + + if (off > 256) + return 0; + if (off + count > 256) { + size = 256 - off; + count = size; + } + + while (off & 3) { + pci_write_config_byte(dev, off, buf[off]); + off++; + if (--size == 0) + break; + } + + while (size > 3) { + unsigned int val = buf[off]; + val |= (unsigned int) buf[off + 1] << 8; + val |= (unsigned int) buf[off + 2] << 16; + val |= (unsigned int) buf[off + 3] << 24; + pci_write_config_dword(dev, off, val); + off += 4; + size -= 4; + } + + while (size > 0) { + pci_write_config_byte(dev, off, buf[off]); + off++; + --size; + } + + return count; +} + +static struct bin_attribute pci_config_attr = { + .attr = { + .name = "config", + .mode = S_IRUGO | S_IWUSR, + }, + .size = 256, + .read = pci_read_config, + .write = pci_write_config, +}; + void pci_create_sysfs_dev_files (struct pci_dev *pdev) { struct device *dev = &pdev->dev; @@ -72,4 +176,5 @@ void pci_create_sysfs_dev_files (struct pci_dev *pdev) device_create_file (dev, &dev_attr_class); device_create_file (dev, &dev_attr_irq); device_create_file (dev, &dev_attr_resource); + sysfs_create_bin_file(&dev->kobj, &pci_config_attr); } diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c index e5439f1aafa5..09ef6dcec6b2 100644 --- a/fs/sysfs/bin.c +++ b/fs/sysfs/bin.c @@ -42,18 +42,17 @@ read(struct file * file, char __user * userbuf, size_t count, loff_t * off) ret = fill_read(dentry, buffer, offs, count); if (ret < 0) - goto Done; + return ret; count = ret; - ret = -EFAULT; - if (copy_to_user(userbuf, buffer, count) != 0) - goto Done; + if (copy_to_user(userbuf, buffer + offs, count) != 0) + return -EINVAL; + + printk("offs = %lld, *off = %lld, count = %zd\n", offs, *off, count); *off = offs + count; - ret = count; - Done: - return ret; + return count; } static int @@ -72,7 +71,6 @@ static ssize_t write(struct file * file, const char __user * userbuf, struct dentry *dentry = file->f_dentry; int size = dentry->d_inode->i_size; loff_t offs = *off; - int ret; if (count > PAGE_SIZE) count = PAGE_SIZE; @@ -83,16 +81,13 @@ static ssize_t write(struct file * file, const char __user * userbuf, count = size - offs; } - ret = -EFAULT; - if (copy_from_user(buffer, userbuf, count)) - goto Done; + if (copy_from_user(buffer + offs, userbuf, count)) + return -EFAULT; count = flush_write(dentry, buffer, offs, count); if (count > 0) *off = offs + count; - ret = count; - Done: - return ret; + return count; } static int open(struct inode * inode, struct file * file) -- cgit v1.2.3 From adf9e8aee44bec56adabb9895d24a65c8ffa146b Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Thu, 3 Jul 2003 01:52:29 -0700 Subject: [PATCH] Driver Core: fix firmware binary files Fixes the sysfs binary file bug. --- drivers/base/firmware_class.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c index b186dba8d2d8..f3981af94570 100644 --- a/drivers/base/firmware_class.c +++ b/drivers/base/firmware_class.c @@ -149,7 +149,7 @@ firmware_data_read(struct kobject *kobj, if (offset + count > fw->size) count = fw->size - offset; - memcpy(buffer, fw->data + offset, count); + memcpy(buffer + offset, fw->data + offset, count); return count; } static int @@ -198,7 +198,7 @@ firmware_data_write(struct kobject *kobj, if (retval) return retval; - memcpy(fw->data + offset, buffer, count); + memcpy(fw->data + offset, buffer + offset, count); fw->size = max_t(size_t, offset + count, fw->size); -- cgit v1.2.3 From 687c7f9e2be367cbe9438dd47093ff97fedc9dc2 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 3 Jul 2003 02:06:08 -0700 Subject: [PATCH] sysfs: change print() to pr_debug() to not annoy everyone. --- fs/sysfs/bin.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c index 09ef6dcec6b2..7f0d265d0bb9 100644 --- a/fs/sysfs/bin.c +++ b/fs/sysfs/bin.c @@ -2,6 +2,8 @@ * bin.c - binary file operations for sysfs. */ +#undef DEBUG + #include #include #include @@ -48,7 +50,7 @@ read(struct file * file, char __user * userbuf, size_t count, loff_t * off) if (copy_to_user(userbuf, buffer + offs, count) != 0) return -EINVAL; - printk("offs = %lld, *off = %lld, count = %zd\n", offs, *off, count); + pr_debug("offs = %lld, *off = %lld, count = %zd\n", offs, *off, count); *off = offs + count; -- cgit v1.2.3 From 98823466c86c19e7c5e7d8ebe75527acf7f47f6a Mon Sep 17 00:00:00 2001 From: Adam Belay Date: Thu, 3 Jul 2003 15:39:09 +0000 Subject: [PNP] Handle Disabled Resources Properly Some devices will allow for individual resources to be disabled, even when the device as a whole is active. The current PnP resource manager is not handling this situation properly. This patch corrects the issue by detecting disabled resources and then flagging them. The pnp layer will now skip over any disabled resources. Interface updates have also been included so that we can properly display resource tables when a resource is disabled. Also note that a new flag "IORESOURCE_DISABLED" has been added to linux/ioports.h. --- drivers/pnp/interface.c | 22 ++++++++++++++++++---- drivers/pnp/manager.c | 12 ++++++++++++ drivers/pnp/resource.c | 8 ++++++++ drivers/pnp/support.c | 24 ++++++++++++++++++++---- include/linux/ioport.h | 1 + 5 files changed, 59 insertions(+), 8 deletions(-) diff --git a/drivers/pnp/interface.c b/drivers/pnp/interface.c index eb2094bf72d0..e2b7388f7ec2 100644 --- a/drivers/pnp/interface.c +++ b/drivers/pnp/interface.c @@ -259,7 +259,10 @@ static ssize_t pnp_show_current_resources(struct device *dmdev, char *buf) for (i = 0; i < PNP_MAX_PORT; i++) { if (pnp_port_valid(dev, i)) { pnp_printf(buffer,"io"); - pnp_printf(buffer," 0x%lx-0x%lx \n", + if (pnp_port_flags(dev, i) & IORESOURCE_DISABLED) + pnp_printf(buffer," disabled\n"); + else + pnp_printf(buffer," 0x%lx-0x%lx\n", pnp_port_start(dev, i), pnp_port_end(dev, i)); } @@ -267,7 +270,10 @@ static ssize_t pnp_show_current_resources(struct device *dmdev, char *buf) for (i = 0; i < PNP_MAX_MEM; i++) { if (pnp_mem_valid(dev, i)) { pnp_printf(buffer,"mem"); - pnp_printf(buffer," 0x%lx-0x%lx \n", + if (pnp_mem_flags(dev, i) & IORESOURCE_DISABLED) + pnp_printf(buffer," disabled\n"); + else + pnp_printf(buffer," 0x%lx-0x%lx\n", pnp_mem_start(dev, i), pnp_mem_end(dev, i)); } @@ -275,13 +281,21 @@ static ssize_t pnp_show_current_resources(struct device *dmdev, char *buf) for (i = 0; i < PNP_MAX_IRQ; i++) { if (pnp_irq_valid(dev, i)) { pnp_printf(buffer,"irq"); - pnp_printf(buffer," %ld \n", pnp_irq(dev, i)); + if (pnp_irq_flags(dev, i) & IORESOURCE_DISABLED) + pnp_printf(buffer," disabled\n"); + else + pnp_printf(buffer," %ld\n", + pnp_irq(dev, i)); } } for (i = 0; i < PNP_MAX_DMA; i++) { if (pnp_dma_valid(dev, i)) { pnp_printf(buffer,"dma"); - pnp_printf(buffer," %ld \n", pnp_dma(dev, i)); + if (pnp_dma_flags(dev, i) & IORESOURCE_DISABLED) + pnp_printf(buffer," disabled\n"); + else + pnp_printf(buffer," %ld\n", + pnp_dma(dev, i)); } } ret = (buffer->curr - buf); diff --git a/drivers/pnp/manager.c b/drivers/pnp/manager.c index a56dfac58b36..cc2bd90ae990 100644 --- a/drivers/pnp/manager.c +++ b/drivers/pnp/manager.c @@ -40,6 +40,9 @@ static int pnp_assign_port(struct pnp_dev *dev, struct pnp_port *rule, int idx) if (!(dev->res.port_resource[idx].flags & IORESOURCE_AUTO)) return 1; + if (!rule->size) + return 1; /* skip disabled resource requests */ + start = &dev->res.port_resource[idx].start; end = &dev->res.port_resource[idx].end; flags = &dev->res.port_resource[idx].flags; @@ -76,6 +79,9 @@ static int pnp_assign_mem(struct pnp_dev *dev, struct pnp_mem *rule, int idx) if (!(dev->res.mem_resource[idx].flags & IORESOURCE_AUTO)) return 1; + if (!rule->size) + return 1; /* skip disabled resource requests */ + start = &dev->res.mem_resource[idx].start; end = &dev->res.mem_resource[idx].end; flags = &dev->res.mem_resource[idx].flags; @@ -128,6 +134,9 @@ static int pnp_assign_irq(struct pnp_dev * dev, struct pnp_irq *rule, int idx) if (!(dev->res.irq_resource[idx].flags & IORESOURCE_AUTO)) return 1; + if (!rule->map) + return 1; /* skip disabled resource requests */ + start = &dev->res.irq_resource[idx].start; end = &dev->res.irq_resource[idx].end; flags = &dev->res.irq_resource[idx].flags; @@ -168,6 +177,9 @@ static int pnp_assign_dma(struct pnp_dev *dev, struct pnp_dma *rule, int idx) if (!(dev->res.dma_resource[idx].flags & IORESOURCE_AUTO)) return 1; + if (!rule->map) + return 1; /* skip disabled resource requests */ + start = &dev->res.dma_resource[idx].start; end = &dev->res.dma_resource[idx].end; flags = &dev->res.dma_resource[idx].flags; diff --git a/drivers/pnp/resource.c b/drivers/pnp/resource.c index 978decf7504a..c7c664a3035f 100644 --- a/drivers/pnp/resource.c +++ b/drivers/pnp/resource.c @@ -286,6 +286,8 @@ int pnp_check_port(struct pnp_dev * dev, int idx) continue; for (tmp = 0; tmp < PNP_MAX_PORT; tmp++) { if (tdev->res.port_resource[tmp].flags & IORESOURCE_IO) { + if (pnp_port_flags(dev, tmp) & IORESOURCE_DISABLED) + continue; tport = &tdev->res.port_resource[tmp].start; tend = &tdev->res.port_resource[tmp].end; if (ranged_conflict(port,end,tport,tend)) @@ -340,6 +342,8 @@ int pnp_check_mem(struct pnp_dev * dev, int idx) continue; for (tmp = 0; tmp < PNP_MAX_MEM; tmp++) { if (tdev->res.mem_resource[tmp].flags & IORESOURCE_MEM) { + if (pnp_mem_flags(dev, tmp) & IORESOURCE_DISABLED) + continue; taddr = &tdev->res.mem_resource[tmp].start; tend = &tdev->res.mem_resource[tmp].end; if (ranged_conflict(addr,end,taddr,tend)) @@ -409,6 +413,8 @@ int pnp_check_irq(struct pnp_dev * dev, int idx) continue; for (tmp = 0; tmp < PNP_MAX_IRQ; tmp++) { if (tdev->res.irq_resource[tmp].flags & IORESOURCE_IRQ) { + if (pnp_irq_flags(dev, tmp) & IORESOURCE_DISABLED) + continue; if ((tdev->res.irq_resource[tmp].start == *irq)) return 0; } @@ -462,6 +468,8 @@ int pnp_check_dma(struct pnp_dev * dev, int idx) continue; for (tmp = 0; tmp < PNP_MAX_DMA; tmp++) { if (tdev->res.dma_resource[tmp].flags & IORESOURCE_DMA) { + if (pnp_dma_flags(dev, tmp) & IORESOURCE_DISABLED) + continue; if ((tdev->res.dma_resource[tmp].start == *dma)) return 0; } diff --git a/drivers/pnp/support.c b/drivers/pnp/support.c index af359e092ed0..375aa2172239 100644 --- a/drivers/pnp/support.c +++ b/drivers/pnp/support.c @@ -68,9 +68,13 @@ static void current_irqresource(struct pnp_resource_table * res, int irq) int i = 0; while ((res->irq_resource[i].flags & IORESOURCE_IRQ) && i < PNP_MAX_IRQ) i++; if (i < PNP_MAX_IRQ) { + res->irq_resource[i].flags = IORESOURCE_IRQ; // Also clears _UNSET flag + if (irq == -1) { + res->irq_resource[i].flags |= IORESOURCE_DISABLED; + return; + } res->irq_resource[i].start = res->irq_resource[i].end = (unsigned long) irq; - res->irq_resource[i].flags = IORESOURCE_IRQ; // Also clears _UNSET flag } } @@ -79,9 +83,13 @@ static void current_dmaresource(struct pnp_resource_table * res, int dma) int i = 0; while ((res->dma_resource[i].flags & IORESOURCE_DMA) && i < PNP_MAX_DMA) i++; if (i < PNP_MAX_DMA) { + res->dma_resource[i].flags = IORESOURCE_DMA; // Also clears _UNSET flag + if (dma == -1) { + res->dma_resource[i].flags |= IORESOURCE_DISABLED; + return; + } res->dma_resource[i].start = res->dma_resource[i].end = (unsigned long) dma; - res->dma_resource[i].flags = IORESOURCE_DMA; // Also clears _UNSET flag } } @@ -90,9 +98,13 @@ static void current_ioresource(struct pnp_resource_table * res, int io, int len) int i = 0; while ((res->port_resource[i].flags & IORESOURCE_IO) && i < PNP_MAX_PORT) i++; if (i < PNP_MAX_PORT) { + res->port_resource[i].flags = IORESOURCE_IO; // Also clears _UNSET flag + if (len <= 0 || (io + len -1) >= 0x10003) { + res->port_resource[i].flags |= IORESOURCE_DISABLED; + return; + } res->port_resource[i].start = (unsigned long) io; res->port_resource[i].end = (unsigned long)(io + len - 1); - res->port_resource[i].flags = IORESOURCE_IO; // Also clears _UNSET flag } } @@ -101,9 +113,13 @@ static void current_memresource(struct pnp_resource_table * res, int mem, int le int i = 0; while ((res->mem_resource[i].flags & IORESOURCE_MEM) && i < PNP_MAX_MEM) i++; if (i < PNP_MAX_MEM) { + res->mem_resource[i].flags = IORESOURCE_MEM; // Also clears _UNSET flag + if (len <= 0) { + res->mem_resource[i].flags |= IORESOURCE_DISABLED; + return; + } res->mem_resource[i].start = (unsigned long) mem; res->mem_resource[i].end = (unsigned long)(mem + len - 1); - res->mem_resource[i].flags = IORESOURCE_MEM; // Also clears _UNSET flag } } diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 9193a8df0122..26d6293ed4c9 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -43,6 +43,7 @@ struct resource_list { #define IORESOURCE_SHADOWABLE 0x00010000 #define IORESOURCE_BUS_HAS_VGA 0x00080000 +#define IORESOURCE_DISABLED 0x10000000 #define IORESOURCE_UNSET 0x20000000 #define IORESOURCE_AUTO 0x40000000 #define IORESOURCE_BUSY 0x80000000 /* Driver has marked this resource busy */ -- cgit v1.2.3 From 8a60bbfc9e5c9db789110d769bf424624e15f4ae Mon Sep 17 00:00:00 2001 From: Adam Belay Date: Thu, 3 Jul 2003 15:42:36 +0000 Subject: [PNP] Allow resource auto config to assign disabled resources This patch updates the resource manager so that it actually assigns disabled resources when they are requested by the device. --- drivers/pnp/manager.c | 40 +++++++++++++++++++++++++--------------- 1 file changed, 25 insertions(+), 15 deletions(-) diff --git a/drivers/pnp/manager.c b/drivers/pnp/manager.c index cc2bd90ae990..cdd637ce4dd1 100644 --- a/drivers/pnp/manager.c +++ b/drivers/pnp/manager.c @@ -40,17 +40,20 @@ static int pnp_assign_port(struct pnp_dev *dev, struct pnp_port *rule, int idx) if (!(dev->res.port_resource[idx].flags & IORESOURCE_AUTO)) return 1; - if (!rule->size) - return 1; /* skip disabled resource requests */ - start = &dev->res.port_resource[idx].start; end = &dev->res.port_resource[idx].end; flags = &dev->res.port_resource[idx].flags; /* set the initial values */ + *flags = *flags | rule->flags | IORESOURCE_IO; + + if (!rule->size) { + *flags |= IORESOURCE_DISABLED; + return 1; /* skip disabled resource requests */ + } + *start = rule->min; *end = *start + rule->size - 1; - *flags = *flags | rule->flags | IORESOURCE_IO; /* run through until pnp_check_port is happy */ while (!pnp_check_port(dev, idx)) { @@ -79,16 +82,11 @@ static int pnp_assign_mem(struct pnp_dev *dev, struct pnp_mem *rule, int idx) if (!(dev->res.mem_resource[idx].flags & IORESOURCE_AUTO)) return 1; - if (!rule->size) - return 1; /* skip disabled resource requests */ - start = &dev->res.mem_resource[idx].start; end = &dev->res.mem_resource[idx].end; flags = &dev->res.mem_resource[idx].flags; /* set the initial values */ - *start = rule->min; - *end = *start + rule->size -1; *flags = *flags | rule->flags | IORESOURCE_MEM; /* convert pnp flags to standard Linux flags */ @@ -101,6 +99,14 @@ static int pnp_assign_mem(struct pnp_dev *dev, struct pnp_mem *rule, int idx) if (rule->flags & IORESOURCE_MEM_SHADOWABLE) *flags |= IORESOURCE_SHADOWABLE; + if (!rule->size) { + *flags |= IORESOURCE_DISABLED; + return 1; /* skip disabled resource requests */ + } + + *start = rule->min; + *end = *start + rule->size -1; + /* run through until pnp_check_mem is happy */ while (!pnp_check_mem(dev, idx)) { *start += rule->align; @@ -134,9 +140,6 @@ static int pnp_assign_irq(struct pnp_dev * dev, struct pnp_irq *rule, int idx) if (!(dev->res.irq_resource[idx].flags & IORESOURCE_AUTO)) return 1; - if (!rule->map) - return 1; /* skip disabled resource requests */ - start = &dev->res.irq_resource[idx].start; end = &dev->res.irq_resource[idx].end; flags = &dev->res.irq_resource[idx].flags; @@ -144,6 +147,11 @@ static int pnp_assign_irq(struct pnp_dev * dev, struct pnp_irq *rule, int idx) /* set the initial values */ *flags = *flags | rule->flags | IORESOURCE_IRQ; + if (!rule->map) { + *flags |= IORESOURCE_DISABLED; + return 1; /* skip disabled resource requests */ + } + for (i = 0; i < 16; i++) { if(rule->map & (1<res.dma_resource[idx].flags & IORESOURCE_AUTO)) return 1; - if (!rule->map) - return 1; /* skip disabled resource requests */ - start = &dev->res.dma_resource[idx].start; end = &dev->res.dma_resource[idx].end; flags = &dev->res.dma_resource[idx].flags; @@ -187,6 +192,11 @@ static int pnp_assign_dma(struct pnp_dev *dev, struct pnp_dma *rule, int idx) /* set the initial values */ *flags = *flags | rule->flags | IORESOURCE_DMA; + if (!rule->map) { + *flags |= IORESOURCE_DISABLED; + return 1; /* skip disabled resource requests */ + } + for (i = 0; i < 8; i++) { if(rule->map & (1< Date: Thu, 3 Jul 2003 15:45:44 +0000 Subject: [PNP] Fix manual resource setting API This patch corrects a trivial thinko in the manual resource api. --- drivers/pnp/manager.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/pnp/manager.c b/drivers/pnp/manager.c index cdd637ce4dd1..d902dfc47164 100644 --- a/drivers/pnp/manager.c +++ b/drivers/pnp/manager.c @@ -400,25 +400,24 @@ int pnp_manual_config_dev(struct pnp_dev *dev, struct pnp_resource_table * res, dev->res = *res; if (!(mode & PNP_CONFIG_FORCE)) { for (i = 0; i < PNP_MAX_PORT; i++) { - if(pnp_check_port(dev,i)) + if(!pnp_check_port(dev,i)) goto fail; } for (i = 0; i < PNP_MAX_MEM; i++) { - if(pnp_check_mem(dev,i)) + if(!pnp_check_mem(dev,i)) goto fail; } for (i = 0; i < PNP_MAX_IRQ; i++) { - if(pnp_check_irq(dev,i)) + if(!pnp_check_irq(dev,i)) goto fail; } for (i = 0; i < PNP_MAX_DMA; i++) { - if(pnp_check_dma(dev,i)) + if(!pnp_check_dma(dev,i)) goto fail; } } up(&pnp_res_mutex); - pnp_auto_config_dev(dev); kfree(bak); return 0; -- cgit v1.2.3 From 1cf6d20f607854e784041115edc5709b5c847937 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 3 Jul 2003 02:28:49 -0700 Subject: [PATCH] SYSFS: add module referencing to sysfs attribute files. --- fs/sysfs/file.c | 9 +++++++++ include/linux/device.h | 11 ++++++----- include/linux/sysfs.h | 2 ++ 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index 1d25a84702b2..2cedefe8c4a0 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -247,6 +247,12 @@ static int check_perm(struct inode * inode, struct file * file) if (!kobj || !attr) goto Einval; + /* Grab the module reference for this attribute if we have one */ + if (!try_module_get(attr->owner)) { + error = -ENODEV; + goto Done; + } + /* if the kobject has no ktype, then we assume that it is a subsystem * itself, and use ops for it. */ @@ -300,6 +306,7 @@ static int check_perm(struct inode * inode, struct file * file) goto Done; Eaccess: error = -EACCES; + module_put(attr->owner); Done: if (error && kobj) kobject_put(kobj); @@ -314,10 +321,12 @@ static int sysfs_open_file(struct inode * inode, struct file * filp) static int sysfs_release(struct inode * inode, struct file * filp) { struct kobject * kobj = filp->f_dentry->d_parent->d_fsdata; + struct attribute * attr = filp->f_dentry->d_fsdata; struct sysfs_buffer * buffer = filp->private_data; if (kobj) kobject_put(kobj); + module_put(attr->owner); if (buffer) { if (buffer->page) diff --git a/include/linux/device.h b/include/linux/device.h index 1bd92551c077..edf43ff2ffb2 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -95,7 +96,7 @@ struct bus_attribute { #define BUS_ATTR(_name,_mode,_show,_store) \ struct bus_attribute bus_attr_##_name = { \ - .attr = {.name = __stringify(_name), .mode = _mode }, \ + .attr = {.name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE }, \ .show = _show, \ .store = _store, \ }; @@ -136,7 +137,7 @@ struct driver_attribute { #define DRIVER_ATTR(_name,_mode,_show,_store) \ struct driver_attribute driver_attr_##_name = { \ - .attr = {.name = __stringify(_name), .mode = _mode }, \ + .attr = {.name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE }, \ .show = _show, \ .store = _store, \ }; @@ -176,7 +177,7 @@ struct class_attribute { #define CLASS_ATTR(_name,_mode,_show,_store) \ struct class_attribute class_attr_##_name = { \ - .attr = {.name = __stringify(_name), .mode = _mode }, \ + .attr = {.name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE }, \ .show = _show, \ .store = _store, \ }; @@ -226,7 +227,7 @@ struct class_device_attribute { #define CLASS_DEVICE_ATTR(_name,_mode,_show,_store) \ struct class_device_attribute class_device_attr_##_name = { \ - .attr = {.name = __stringify(_name), .mode = _mode }, \ + .attr = {.name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE }, \ .show = _show, \ .store = _store, \ }; @@ -324,7 +325,7 @@ struct device_attribute { #define DEVICE_ATTR(_name,_mode,_show,_store) \ struct device_attribute dev_attr_##_name = { \ - .attr = {.name = __stringify(_name), .mode = _mode }, \ + .attr = {.name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE }, \ .show = _show, \ .store = _store, \ }; diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index f054416c8145..6d8af386ab1d 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -10,9 +10,11 @@ #define _SYSFS_H_ struct kobject; +struct module; struct attribute { char * name; + struct module * owner; mode_t mode; }; -- cgit v1.2.3 From 047981803ae7c69c502b5e2b4a4ddfb480827f7b Mon Sep 17 00:00:00 2001 From: John Stultz Date: Thu, 3 Jul 2003 02:39:18 -0700 Subject: [PATCH] jiffies include fix This patch fixes a bad declaration of jiffies in timer_tsc.c and timer_cyclone.c, replacing it with the proper usage of jiffies.h. Caught by gregkh. --- arch/i386/kernel/timers/timer_cyclone.c | 2 +- arch/i386/kernel/timers/timer_tsc.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/i386/kernel/timers/timer_cyclone.c b/arch/i386/kernel/timers/timer_cyclone.c index 0d2c2baf2a64..ecf003393ac4 100644 --- a/arch/i386/kernel/timers/timer_cyclone.c +++ b/arch/i386/kernel/timers/timer_cyclone.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -18,7 +19,6 @@ #include extern spinlock_t i8253_lock; -extern unsigned long jiffies; extern unsigned long calibrate_tsc(void); /* Number of usecs that the last interrupt was delayed */ diff --git a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c index 02de71f68d9e..09e1ce484183 100644 --- a/arch/i386/kernel/timers/timer_tsc.c +++ b/arch/i386/kernel/timers/timer_tsc.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -21,7 +22,6 @@ int tsc_disable __initdata = 0; extern spinlock_t i8253_lock; -extern unsigned long jiffies; static int use_tsc; /* Number of usecs that the last interrupt was delayed */ -- cgit v1.2.3 From f91c01ac74c4970d1d31324d6e80d78aaceae2b8 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 3 Jul 2003 03:43:18 -0700 Subject: [PATCH] sysfs: add sysfs_rename_dir() Based on a patch written by Dan Aloni --- fs/sysfs/dir.c | 22 ++++++++++++++++++++++ include/linux/sysfs.h | 3 +++ 2 files changed, 25 insertions(+) diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 1ca3a06db1df..0b1588ab9259 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -121,7 +121,29 @@ void sysfs_remove_dir(struct kobject * kobj) dput(parent); } +void sysfs_rename_dir(struct kobject * kobj, char *new_name) +{ + struct dentry * new_dentry, * parent; + + if (!strcmp(kobj->name, new_name)) + return; + + if (!kobj->parent) + return; + + parent = kobj->parent->dentry; + + down(&parent->d_inode->i_sem); + + new_dentry = sysfs_get_dentry(parent, new_name); + d_move(kobj->dentry, new_dentry); + + strlcpy(kobj->name, new_name, KOBJ_NAME_LEN); + + up(&parent->d_inode->i_sem); +} EXPORT_SYMBOL(sysfs_create_dir); EXPORT_SYMBOL(sysfs_remove_dir); +EXPORT_SYMBOL(sysfs_rename_dir); diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 6d8af386ab1d..441c0d91f583 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -39,6 +39,9 @@ sysfs_create_dir(struct kobject *); extern void sysfs_remove_dir(struct kobject *); +extern void +sysfs_rename_dir(struct kobject *, char *new_name); + extern int sysfs_create_file(struct kobject *, struct attribute *); -- cgit v1.2.3 From e956d3ab2a9fd0387d41f5035e0902e06bcbc219 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 3 Jul 2003 03:43:34 -0700 Subject: [PATCH] kobject: add kobject_rename() Based on a patch written by Dan Aloni --- include/linux/kobject.h | 2 ++ lib/kobject.c | 15 +++++++++++++++ 2 files changed, 17 insertions(+) diff --git a/include/linux/kobject.h b/include/linux/kobject.h index 5d42248dd95f..e744a55d07d5 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -39,6 +39,8 @@ extern void kobject_cleanup(struct kobject *); extern int kobject_add(struct kobject *); extern void kobject_del(struct kobject *); +extern void kobject_rename(struct kobject *, char *new_name); + extern int kobject_register(struct kobject *); extern void kobject_unregister(struct kobject *); diff --git a/lib/kobject.c b/lib/kobject.c index fb49131f5ff9..15fa0ba4dd88 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -313,6 +313,21 @@ int kobject_register(struct kobject * kobj) return error; } +/** + * kobject_rename - change the name of an object + * @kobj: object in question. + * @new_name: object's new name + */ + +void kobject_rename(struct kobject * kobj, char *new_name) +{ + kobj = kobject_get(kobj); + if (!kobj) + return; + sysfs_rename_dir(kobj, new_name); + kobject_put(kobj); +} + /** * kobject_del - unlink kobject from hierarchy. * @kobj: object. -- cgit v1.2.3 From 59c6630a851e15bc6bcecac9656e916574203b95 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 3 Jul 2003 03:43:49 -0700 Subject: [PATCH] driver core: added class_device_rename() Based on a patch written by Dan Aloni --- drivers/base/class.c | 18 ++++++++++++++++++ include/linux/device.h | 2 ++ 2 files changed, 20 insertions(+) diff --git a/drivers/base/class.c b/drivers/base/class.c index ea551b8dc28b..d9eff17c0bb0 100644 --- a/drivers/base/class.c +++ b/drivers/base/class.c @@ -339,6 +339,24 @@ void class_device_unregister(struct class_device *class_dev) class_device_put(class_dev); } +int class_device_rename(struct class_device *class_dev, char *new_name) +{ + class_dev = class_device_get(class_dev); + if (!class_dev) + return -EINVAL; + + pr_debug("CLASS: renaming '%s' to '%s'\n", class_dev->class_id, + new_name); + + strlcpy(class_dev->class_id, new_name, KOBJ_NAME_LEN); + + kobject_rename(&class_dev->kobj, new_name); + + class_device_put(class_dev); + + return 0; +} + struct class_device * class_device_get(struct class_device *class_dev) { if (class_dev) diff --git a/include/linux/device.h b/include/linux/device.h index edf43ff2ffb2..2795b85ac6f1 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -216,6 +216,8 @@ extern void class_device_initialize(struct class_device *); extern int class_device_add(struct class_device *); extern void class_device_del(struct class_device *); +extern int class_device_rename(struct class_device *, char *); + extern struct class_device * class_device_get(struct class_device *); extern void class_device_put(struct class_device *); -- cgit v1.2.3 From 7e2fa9927e8b1601ae947f87a50fdd5860a9599d Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 3 Jul 2003 03:51:08 -0700 Subject: driver core: add my copyright to class.c --- drivers/base/class.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/base/class.c b/drivers/base/class.c index d9eff17c0bb0..2a9c349bd7c9 100644 --- a/drivers/base/class.c +++ b/drivers/base/class.c @@ -3,6 +3,8 @@ * * Copyright (c) 2002-3 Patrick Mochel * Copyright (c) 2002-3 Open Source Development Labs + * Copyright (c) 2003 Greg Kroah-Hartman + * Copyright (c) 2003 IBM Corp. * * This file is released under the GPLv2 * -- cgit v1.2.3 From 4b22645477b933f5cf2a972beebef367b628cdc2 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 3 Jul 2003 04:54:42 -0700 Subject: Add an asynchronous buffer read-ahead facility. Nobody uses it for now, but I needed it for some tuning tests, and it is potentially useful for others. --- fs/buffer.c | 22 ++++++++++++++++++++++ include/linux/buffer_head.h | 7 +++++++ 2 files changed, 29 insertions(+) diff --git a/fs/buffer.c b/fs/buffer.c index 56c9f4e03bdd..f063200c5b66 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1447,6 +1447,28 @@ __getblk(struct block_device *bdev, sector_t block, int size) } EXPORT_SYMBOL(__getblk); +/* + * Do async read-ahead on a buffer.. + */ +void +__breadahead(struct block_device *bdev, sector_t block, int size) +{ + struct buffer_head *bh = __getblk(bdev, block, size); + if (!test_set_buffer_locked(bh)) { + if (!buffer_uptodate(bh)) { + /* + * This eats the bh count from __getblk() and + * unlocks when the read is done. + */ + bh->b_end_io = end_buffer_io_sync; + submit_bh(READ, bh); + return; + } + unlock_buffer(bh); + } + brelse(bh); +} + /** * __bread() - reads a specified block and returns the bh * @block: number of block diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 22d3ac8efc6b..1f468b0491ed 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -167,6 +167,7 @@ struct buffer_head *__find_get_block(struct block_device *, sector_t, int); struct buffer_head * __getblk(struct block_device *, sector_t, int); void __brelse(struct buffer_head *); void __bforget(struct buffer_head *); +void __breadahead(struct block_device *, sector_t block, int size); struct buffer_head *__bread(struct block_device *, sector_t block, int size); struct buffer_head *alloc_buffer_head(int gfp_flags); void free_buffer_head(struct buffer_head * bh); @@ -241,6 +242,12 @@ sb_bread(struct super_block *sb, sector_t block) return __bread(sb->s_bdev, block, sb->s_blocksize); } +static inline void +sb_breadahead(struct super_block *sb, sector_t block) +{ + __breadahead(sb->s_bdev, block, sb->s_blocksize); +} + static inline struct buffer_head * sb_getblk(struct super_block *sb, sector_t block) { -- cgit v1.2.3 From 9c67eccb82d6ce0fb44a812ef5f76be970eedd1b Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 3 Jul 2003 05:20:52 -0700 Subject: Re-organize "ext3_get_inode_loc()" and make it easier to follow by splitting it into two functions: one that calculates the position, and the other that actually reads the inode block off the disk. --- fs/ext3/inode.c | 101 +++++++++++++++++++++++++----------------------- include/linux/ext3_fs.h | 6 ++- 2 files changed, 57 insertions(+), 50 deletions(-) diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index c2b0304b1855..aa632b07899a 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -2290,68 +2290,72 @@ out_stop: ext3_journal_stop(handle); } -/* - * ext3_get_inode_loc returns with an extra refcount against the - * inode's underlying buffer_head on success. - */ - -int ext3_get_inode_loc (struct inode *inode, struct ext3_iloc *iloc) +static unsigned long ext3_get_inode_block(struct super_block *sb, + unsigned long ino, struct ext3_iloc *iloc) { - struct buffer_head *bh = 0; - unsigned long block; - unsigned long block_group; - unsigned long group_desc; - unsigned long desc; - unsigned long offset; + unsigned long desc, group_desc, block_group; + unsigned long offset, block; + struct buffer_head *bh; struct ext3_group_desc * gdp; - if ((inode->i_ino != EXT3_ROOT_INO && - inode->i_ino != EXT3_JOURNAL_INO && - inode->i_ino < EXT3_FIRST_INO(inode->i_sb)) || - inode->i_ino > le32_to_cpu( - EXT3_SB(inode->i_sb)->s_es->s_inodes_count)) { - ext3_error (inode->i_sb, "ext3_get_inode_loc", - "bad inode number: %lu", inode->i_ino); - goto bad_inode; + if ((ino != EXT3_ROOT_INO && + ino != EXT3_JOURNAL_INO && + ino < EXT3_FIRST_INO(sb)) || + ino > le32_to_cpu( + EXT3_SB(sb)->s_es->s_inodes_count)) { + ext3_error (sb, "ext3_get_inode_block", + "bad inode number: %lu", ino); + return 0; } - block_group = (inode->i_ino - 1) / EXT3_INODES_PER_GROUP(inode->i_sb); - if (block_group >= EXT3_SB(inode->i_sb)->s_groups_count) { - ext3_error (inode->i_sb, "ext3_get_inode_loc", + block_group = (ino - 1) / EXT3_INODES_PER_GROUP(sb); + if (block_group >= EXT3_SB(sb)->s_groups_count) { + ext3_error (sb, "ext3_get_inode_block", "group >= groups count"); - goto bad_inode; + return 0; } - group_desc = block_group >> EXT3_DESC_PER_BLOCK_BITS(inode->i_sb); - desc = block_group & (EXT3_DESC_PER_BLOCK(inode->i_sb) - 1); - bh = EXT3_SB(inode->i_sb)->s_group_desc[group_desc]; + group_desc = block_group >> EXT3_DESC_PER_BLOCK_BITS(sb); + desc = block_group & (EXT3_DESC_PER_BLOCK(sb) - 1); + bh = EXT3_SB(sb)->s_group_desc[group_desc]; if (!bh) { - ext3_error (inode->i_sb, "ext3_get_inode_loc", + ext3_error (sb, "ext3_get_inode_block", "Descriptor not loaded"); - goto bad_inode; + return 0; } gdp = (struct ext3_group_desc *) bh->b_data; /* * Figure out the offset within the block group inode table */ - offset = ((inode->i_ino - 1) % EXT3_INODES_PER_GROUP(inode->i_sb)) * - EXT3_INODE_SIZE(inode->i_sb); + offset = ((ino - 1) % EXT3_INODES_PER_GROUP(sb)) * + EXT3_INODE_SIZE(sb); block = le32_to_cpu(gdp[desc].bg_inode_table) + - (offset >> EXT3_BLOCK_SIZE_BITS(inode->i_sb)); - if (!(bh = sb_bread(inode->i_sb, block))) { - ext3_error (inode->i_sb, "ext3_get_inode_loc", - "unable to read inode block - " - "inode=%lu, block=%lu", inode->i_ino, block); - goto bad_inode; - } - offset &= (EXT3_BLOCK_SIZE(inode->i_sb) - 1); + (offset >> EXT3_BLOCK_SIZE_BITS(sb)); - iloc->bh = bh; - iloc->raw_inode = (struct ext3_inode *) (bh->b_data + offset); iloc->block_group = block_group; + iloc->offset = offset & (EXT3_BLOCK_SIZE(sb) - 1); + return block; +} - return 0; +/* + * ext3_get_inode_loc returns with an extra refcount against the + * inode's underlying buffer_head on success. + */ + +int ext3_get_inode_loc (struct inode *inode, struct ext3_iloc *iloc) +{ + unsigned long block; - bad_inode: + block = ext3_get_inode_block(inode->i_sb, inode->i_ino, iloc); + if (block) { + struct buffer_head *bh = sb_bread(inode->i_sb, block); + if (bh) { + iloc->bh = bh; + return 0; + } + ext3_error (inode->i_sb, "ext3_get_inode_loc", + "unable to read inode block - " + "inode=%lu, block=%lu", inode->i_ino, block); + } return -EIO; } @@ -2388,7 +2392,7 @@ void ext3_read_inode(struct inode * inode) if (ext3_get_inode_loc(inode, &iloc)) goto bad_inode; bh = iloc.bh; - raw_inode = iloc.raw_inode; + raw_inode = ext3_raw_inode(&iloc); inode->i_mode = le16_to_cpu(raw_inode->i_mode); inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); @@ -2454,11 +2458,9 @@ void ext3_read_inode(struct inode * inode) * even on big-endian machines: we do NOT byteswap the block numbers! */ for (block = 0; block < EXT3_N_BLOCKS; block++) - ei->i_data[block] = iloc.raw_inode->i_block[block]; + ei->i_data[block] = raw_inode->i_block[block]; INIT_LIST_HEAD(&ei->i_orphan); - brelse (iloc.bh); - if (S_ISREG(inode->i_mode)) { inode->i_op = &ext3_file_inode_operations; inode->i_fop = &ext3_file_operations; @@ -2476,8 +2478,9 @@ void ext3_read_inode(struct inode * inode) } else { inode->i_op = &ext3_special_inode_operations; init_special_inode(inode, inode->i_mode, - le32_to_cpu(iloc.raw_inode->i_block[0])); + le32_to_cpu(raw_inode->i_block[0])); } + brelse (iloc.bh); ext3_set_inode_flags(inode); return; @@ -2497,7 +2500,7 @@ static int ext3_do_update_inode(handle_t *handle, struct inode *inode, struct ext3_iloc *iloc) { - struct ext3_inode *raw_inode = iloc->raw_inode; + struct ext3_inode *raw_inode = ext3_raw_inode(iloc); struct ext3_inode_info *ei = EXT3_I(inode); struct buffer_head *bh = iloc->bh; int err = 0, rc, block; diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h index f077563bcfc3..c360f84fed3d 100644 --- a/include/linux/ext3_fs.h +++ b/include/linux/ext3_fs.h @@ -636,10 +636,14 @@ struct dx_hash_info struct ext3_iloc { struct buffer_head *bh; - struct ext3_inode *raw_inode; + unsigned long offset; unsigned long block_group; }; +static inline struct ext3_inode *ext3_raw_inode(struct ext3_iloc *iloc) +{ + return (struct ext3_inode *) (iloc->bh->b_data + iloc->offset); +} /* * This structure is stuffed into the struct file's private_data field -- cgit v1.2.3 From 81523bf22b35bb51d40a4553c6e2591969ff40ad Mon Sep 17 00:00:00 2001 From: Jeff Garzik Date: Thu, 3 Jul 2003 06:23:39 -0700 Subject: [PATCH] fix via irq routing Via irq routing has a funky PIRQD location. I checked my datasheets and, yep, this is correct all the way back to via686a. This bug existed for _ages_. I wonder if I created it, even... --- arch/i386/pci/irq.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/i386/pci/irq.c b/arch/i386/pci/irq.c index 2fc6431a43ef..4f9feba2fc95 100644 --- a/arch/i386/pci/irq.c +++ b/arch/i386/pci/irq.c @@ -195,15 +195,16 @@ static int pirq_piix_set(struct pci_dev *router, struct pci_dev *dev, int pirq, /* * The VIA pirq rules are nibble-based, like ALI, * but without the ugly irq number munging. + * However, PIRQD is in the upper instead of lower 4 bits. */ static int pirq_via_get(struct pci_dev *router, struct pci_dev *dev, int pirq) { - return read_config_nybble(router, 0x55, pirq); + return read_config_nybble(router, 0x55, pirq == 4 ? 5 : pirq); } static int pirq_via_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) { - write_config_nybble(router, 0x55, pirq, irq); + write_config_nybble(router, 0x55, pirq == 4 ? 5 : pirq, irq); return 1; } -- cgit v1.2.3 From fc8b427ef827733152f3e9d9e8b61ac7d69e06a5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 3 Jul 2003 22:06:06 -0700 Subject: [PATCH] Add open intent information to the 'struct nameidata' - Add open intent information to the 'struct nameidata'. - Pass the struct nameidata as an optional parameter to the lookup() inode operation. - Pass the struct nameidata as an optional parameter to the d_revalidate() dentry operation. - Make link_path_walk() set the LOOKUP_CONTINUE flag in nd->flags instead of passing it as an extra parameter to d_revalidate(). - Make open_namei(), and sys_uselib() set the open()/create() intent data. --- drivers/net/wan/comx.c | 4 ++-- fs/adfs/adfs.h | 2 +- fs/adfs/dir.c | 2 +- fs/affs/namei.c | 2 +- fs/afs/dir.c | 10 ++++----- fs/afs/mntpt.c | 4 ++-- fs/autofs/root.c | 8 ++++---- fs/autofs4/root.c | 14 ++++++------- fs/befs/linuxvfs.c | 4 ++-- fs/bfs/dir.c | 2 +- fs/cifs/cifsfs.h | 2 +- fs/cifs/dir.c | 4 ++-- fs/coda/dir.c | 8 ++++---- fs/cramfs/inode.c | 2 +- fs/devfs/base.c | 6 +++--- fs/efs/namei.c | 2 +- fs/exec.c | 3 ++- fs/ext2/namei.c | 2 +- fs/ext3/namei.c | 2 +- fs/freevxfs/vxfs_lookup.c | 5 +++-- fs/hfs/dir_cap.c | 4 ++-- fs/hfs/dir_dbl.c | 4 ++-- fs/hfs/dir_nat.c | 4 ++-- fs/hfs/sysdep.c | 4 ++-- fs/hpfs/dir.c | 2 +- fs/hpfs/hpfs_fn.h | 2 +- fs/intermezzo/dcache.c | 2 +- fs/intermezzo/dir.c | 4 ++-- fs/intermezzo/intermezzo_fs.h | 2 +- fs/isofs/namei.c | 2 +- fs/jffs/inode-v23.c | 2 +- fs/jffs2/dir.c | 4 ++-- fs/jfs/namei.c | 2 +- fs/libfs.c | 2 +- fs/minix/namei.c | 2 +- fs/msdos/namei.c | 2 +- fs/namei.c | 48 ++++++++++++++++++++++++++++--------------- fs/ncpfs/dir.c | 12 +++++------ fs/nfs/dir.c | 6 +++--- fs/ntfs/namei.c | 3 ++- fs/openpromfs/inode.c | 4 ++-- fs/proc/base.c | 10 ++++----- fs/proc/generic.c | 2 +- fs/proc/root.c | 6 +++--- fs/qnx4/namei.c | 2 +- fs/reiserfs/namei.c | 2 +- fs/romfs/inode.c | 2 +- fs/smbfs/dir.c | 8 ++++---- fs/sysv/namei.c | 2 +- fs/udf/namei.c | 3 ++- fs/ufs/namei.c | 2 +- fs/umsdos/dir.c | 4 ++-- fs/umsdos/rdir.c | 4 ++-- fs/vfat/namei.c | 6 +++--- fs/xfs/linux/xfs_iops.c | 3 ++- include/linux/affs_fs.h | 2 +- include/linux/dcache.h | 3 ++- include/linux/efs_fs.h | 2 +- include/linux/fs.h | 4 ++-- include/linux/iso_fs.h | 2 +- include/linux/msdos_fs.h | 4 ++-- include/linux/namei.h | 16 ++++++++++++++- include/linux/proc_fs.h | 4 ++-- include/linux/qnx4_fs.h | 2 +- include/linux/umsdos_fs.p | 4 ++-- 65 files changed, 169 insertions(+), 135 deletions(-) diff --git a/drivers/net/wan/comx.c b/drivers/net/wan/comx.c index 1039bf85ea0a..8cd73bc9a3d5 100644 --- a/drivers/net/wan/comx.c +++ b/drivers/net/wan/comx.c @@ -86,7 +86,7 @@ static struct comx_protocol *comx_lines = NULL; static int comx_mkdir(struct inode *, struct dentry *, int); static int comx_rmdir(struct inode *, struct dentry *); -static struct dentry *comx_lookup(struct inode *, struct dentry *); +static struct dentry *comx_lookup(struct inode *, struct dentry *, struct nameidata *); static struct inode_operations comx_root_inode_ops = { .lookup = comx_lookup, @@ -922,7 +922,7 @@ static int comx_rmdir(struct inode *dir, struct dentry *dentry) return 0; } -static struct dentry *comx_lookup(struct inode *dir, struct dentry *dentry) +static struct dentry *comx_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { struct proc_dir_entry *de; struct inode *inode = NULL; diff --git a/fs/adfs/adfs.h b/fs/adfs/adfs.h index f4fde1c2310a..6e4a1b3a4e15 100644 --- a/fs/adfs/adfs.h +++ b/fs/adfs/adfs.h @@ -88,7 +88,7 @@ void __adfs_error(struct super_block *sb, const char *function, #define adfs_error(sb, fmt...) __adfs_error(sb, __FUNCTION__, fmt) /* namei.c */ -extern struct dentry *adfs_lookup(struct inode *dir, struct dentry *dentry); +extern struct dentry *adfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *); /* super.c */ diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c index df29ce99c6ed..aae5b4e066db 100644 --- a/fs/adfs/dir.c +++ b/fs/adfs/dir.c @@ -269,7 +269,7 @@ struct dentry_operations adfs_dentry_operations = { .d_compare = adfs_compare, }; -struct dentry *adfs_lookup(struct inode *dir, struct dentry *dentry) +struct dentry *adfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { struct inode *inode = NULL; struct object_info obj; diff --git a/fs/affs/namei.c b/fs/affs/namei.c index 01defe3d0ff8..55beff12444f 100644 --- a/fs/affs/namei.c +++ b/fs/affs/namei.c @@ -210,7 +210,7 @@ affs_find_entry(struct inode *dir, struct dentry *dentry) } struct dentry * -affs_lookup(struct inode *dir, struct dentry *dentry) +affs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { struct super_block *sb = dir->i_sb; struct buffer_head *bh; diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 12effcc6f896..a63e3e9679fe 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -23,10 +23,10 @@ #include "super.h" #include "internal.h" -static struct dentry *afs_dir_lookup(struct inode *dir, struct dentry *dentry); +static struct dentry *afs_dir_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *); static int afs_dir_open(struct inode *inode, struct file *file); static int afs_dir_readdir(struct file *file, void *dirent, filldir_t filldir); -static int afs_d_revalidate(struct dentry *dentry, int flags); +static int afs_d_revalidate(struct dentry *dentry, struct nameidata *); static int afs_d_delete(struct dentry *dentry); static int afs_dir_lookup_filldir(void *_cookie, const char *name, int nlen, loff_t fpos, ino_t ino, unsigned dtype); @@ -414,7 +414,7 @@ static int afs_dir_lookup_filldir(void *_cookie, const char *name, int nlen, lof /* * look up an entry in a directory */ -static struct dentry *afs_dir_lookup(struct inode *dir, struct dentry *dentry) +static struct dentry *afs_dir_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { struct afs_dir_lookup_cookie cookie; struct afs_super_info *as; @@ -487,7 +487,7 @@ static struct dentry *afs_dir_lookup(struct inode *dir, struct dentry *dentry) * - NOTE! the hit can be a negative hit too, so we can't assume we have an inode * (derived from nfs_lookup_revalidate) */ -static int afs_d_revalidate(struct dentry *dentry, int flags) +static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd) { struct afs_dir_lookup_cookie cookie; struct dentry *parent; @@ -495,7 +495,7 @@ static int afs_d_revalidate(struct dentry *dentry, int flags) unsigned fpos; int ret; - _enter("%s,%x",dentry->d_name.name,flags); + _enter("%s,%p",dentry->d_name.name,nd); parent = dget_parent(dentry); dir = parent->d_inode; diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c index 0279fcbf8329..d22887d47f38 100644 --- a/fs/afs/mntpt.c +++ b/fs/afs/mntpt.c @@ -21,7 +21,7 @@ #include "internal.h" -static struct dentry *afs_mntpt_lookup(struct inode *dir, struct dentry *dentry); +static struct dentry *afs_mntpt_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *); static int afs_mntpt_open(struct inode *inode, struct file *file); struct file_operations afs_mntpt_file_operations = { @@ -93,7 +93,7 @@ int afs_mntpt_check_symlink(afs_vnode_t *vnode) /* * no valid lookup procedure on this sort of dir */ -static struct dentry *afs_mntpt_lookup(struct inode *dir, struct dentry *dentry) +static struct dentry *afs_mntpt_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { return ERR_PTR(-EREMOTE); } /* end afs_mntpt_lookup() */ diff --git a/fs/autofs/root.c b/fs/autofs/root.c index e6e3b0c468d7..546ac2f9af87 100644 --- a/fs/autofs/root.c +++ b/fs/autofs/root.c @@ -18,7 +18,7 @@ #include "autofs_i.h" static int autofs_root_readdir(struct file *,void *,filldir_t); -static struct dentry *autofs_root_lookup(struct inode *,struct dentry *); +static struct dentry *autofs_root_lookup(struct inode *,struct dentry *, struct nameidata *); static int autofs_root_symlink(struct inode *,struct dentry *,const char *); static int autofs_root_unlink(struct inode *,struct dentry *); static int autofs_root_rmdir(struct inode *,struct dentry *); @@ -144,7 +144,7 @@ static int try_to_fill_dentry(struct dentry *dentry, struct super_block *sb, str * yet completely filled in, and revalidate has to delay such * lookups.. */ -static int autofs_revalidate(struct dentry * dentry, int flags) +static int autofs_revalidate(struct dentry * dentry, struct nameidata *nd) { struct inode * dir; struct autofs_sb_info *sbi; @@ -195,7 +195,7 @@ static struct dentry_operations autofs_dentry_operations = { .d_revalidate = autofs_revalidate, }; -static struct dentry *autofs_root_lookup(struct inode *dir, struct dentry *dentry) +static struct dentry *autofs_root_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { struct autofs_sb_info *sbi; int oz_mode; @@ -230,7 +230,7 @@ static struct dentry *autofs_root_lookup(struct inode *dir, struct dentry *dentr d_add(dentry, NULL); up(&dir->i_sem); - autofs_revalidate(dentry, 0); + autofs_revalidate(dentry, nd); down(&dir->i_sem); /* diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index de61c25779c1..49f9f4d3b406 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -18,13 +18,13 @@ #include #include "autofs_i.h" -static struct dentry *autofs4_dir_lookup(struct inode *,struct dentry *); +static struct dentry *autofs4_dir_lookup(struct inode *,struct dentry *, struct nameidata *); static int autofs4_dir_symlink(struct inode *,struct dentry *,const char *); static int autofs4_dir_unlink(struct inode *,struct dentry *); static int autofs4_dir_rmdir(struct inode *,struct dentry *); static int autofs4_dir_mkdir(struct inode *,struct dentry *,int); static int autofs4_root_ioctl(struct inode *, struct file *,unsigned int,unsigned long); -static struct dentry *autofs4_root_lookup(struct inode *,struct dentry *); +static struct dentry *autofs4_root_lookup(struct inode *,struct dentry *, struct nameidata *); struct file_operations autofs4_root_operations = { .open = dcache_dir_open, @@ -143,7 +143,7 @@ static int try_to_fill_dentry(struct dentry *dentry, * yet completely filled in, and revalidate has to delay such * lookups.. */ -static int autofs4_root_revalidate(struct dentry * dentry, int flags) +static int autofs4_root_revalidate(struct dentry * dentry, struct nameidata *nd) { struct inode * dir = dentry->d_parent->d_inode; struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb); @@ -183,7 +183,7 @@ static int autofs4_root_revalidate(struct dentry * dentry, int flags) return 1; } -static int autofs4_revalidate(struct dentry *dentry, int flags) +static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd) { struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); @@ -225,7 +225,7 @@ static struct dentry_operations autofs4_dentry_operations = { /* Lookups in non-root dirs never find anything - if it's there, it's already in the dcache */ /* SMP-safe */ -static struct dentry *autofs4_dir_lookup(struct inode *dir, struct dentry *dentry) +static struct dentry *autofs4_dir_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { #if 0 DPRINTK(("autofs_dir_lookup: ignoring lookup of %.*s/%.*s\n", @@ -239,7 +239,7 @@ static struct dentry *autofs4_dir_lookup(struct inode *dir, struct dentry *dentr } /* Lookups in the root directory */ -static struct dentry *autofs4_root_lookup(struct inode *dir, struct dentry *dentry) +static struct dentry *autofs4_root_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { struct autofs_sb_info *sbi; int oz_mode; @@ -276,7 +276,7 @@ static struct dentry *autofs4_root_lookup(struct inode *dir, struct dentry *dent if (dentry->d_op && dentry->d_op->d_revalidate) { up(&dir->i_sem); - (dentry->d_op->d_revalidate)(dentry, 0); + (dentry->d_op->d_revalidate)(dentry, nd); down(&dir->i_sem); } diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index 4fb5a163e50d..d7846d65b361 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -33,7 +33,7 @@ static int befs_readdir(struct file *, void *, filldir_t); static int befs_get_block(struct inode *, sector_t, struct buffer_head *, int); static int befs_readpage(struct file *file, struct page *page); static sector_t befs_bmap(struct address_space *mapping, sector_t block); -static struct dentry *befs_lookup(struct inode *, struct dentry *); +static struct dentry *befs_lookup(struct inode *, struct dentry *, struct nameidata *); static void befs_read_inode(struct inode *ino); static struct inode *befs_alloc_inode(struct super_block *sb); static void befs_destroy_inode(struct inode *inode); @@ -163,7 +163,7 @@ befs_get_block(struct inode *inode, sector_t block, } static struct dentry * -befs_lookup(struct inode *dir, struct dentry *dentry) +befs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { struct inode *inode = NULL; struct super_block *sb = dir->i_sb; diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c index 386f5fff4a77..d1f665826065 100644 --- a/fs/bfs/dir.c +++ b/fs/bfs/dir.c @@ -127,7 +127,7 @@ static int bfs_create(struct inode * dir, struct dentry * dentry, int mode) return 0; } -static struct dentry * bfs_lookup(struct inode * dir, struct dentry * dentry) +static struct dentry * bfs_lookup(struct inode * dir, struct dentry * dentry, struct nameidata *nd) { struct inode * inode = NULL; struct buffer_head * bh; diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 2776f7c0b7c2..92aef944dcab 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -47,7 +47,7 @@ extern void cifs_delete_inode(struct inode *); /* Functions related to inodes */ extern struct inode_operations cifs_dir_inode_ops; extern int cifs_create(struct inode *, struct dentry *, int); -extern struct dentry *cifs_lookup(struct inode *, struct dentry *); +extern struct dentry *cifs_lookup(struct inode *, struct dentry *, struct nameidata *); extern int cifs_unlink(struct inode *, struct dentry *); extern int cifs_hardlink(struct dentry *, struct inode *, struct dentry *); extern int cifs_mkdir(struct inode *, struct dentry *, int); diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 123639718e99..b8b546eb8489 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -178,7 +178,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode) } struct dentry * -cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry) +cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, struct nameidata *nd) { int rc, xid; struct cifs_sb_info *cifs_sb; @@ -262,7 +262,7 @@ cifs_dir_open(struct inode *inode, struct file *file) } static int -cifs_d_revalidate(struct dentry *direntry, int flags) +cifs_d_revalidate(struct dentry *direntry, struct nameidata *nd) { int isValid = 1; diff --git a/fs/coda/dir.c b/fs/coda/dir.c index a7952879bd8f..030977f42952 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -30,7 +30,7 @@ /* dir inode-ops */ static int coda_create(struct inode *dir, struct dentry *new, int mode); static int coda_mknod(struct inode *dir, struct dentry *new, int mode, dev_t rdev); -static struct dentry *coda_lookup(struct inode *dir, struct dentry *target); +static struct dentry *coda_lookup(struct inode *dir, struct dentry *target, struct nameidata *nd); static int coda_link(struct dentry *old_dentry, struct inode *dir_inode, struct dentry *entry); static int coda_unlink(struct inode *dir_inode, struct dentry *entry); @@ -45,7 +45,7 @@ static int coda_rename(struct inode *old_inode, struct dentry *old_dentry, static int coda_readdir(struct file *file, void *dirent, filldir_t filldir); /* dentry ops */ -static int coda_dentry_revalidate(struct dentry *de, int); +static int coda_dentry_revalidate(struct dentry *de, struct nameidata *nd); static int coda_dentry_delete(struct dentry *); /* support routines */ @@ -90,7 +90,7 @@ struct file_operations coda_dir_operations = { /* inode operations for directories */ /* access routines: lookup, readlink, permission */ -static struct dentry *coda_lookup(struct inode *dir, struct dentry *entry) +static struct dentry *coda_lookup(struct inode *dir, struct dentry *entry, struct nameidata *nd) { struct inode *res_inode = NULL; struct ViceFid resfid = {0,0,0}; @@ -627,7 +627,7 @@ static int coda_venus_readdir(struct file *filp, filldir_t filldir, } /* called when a cache lookup succeeds */ -static int coda_dentry_revalidate(struct dentry *de, int flags) +static int coda_dentry_revalidate(struct dentry *de, struct nameidata *nd) { struct inode *inode = de->d_inode; struct coda_inode_info *cii; diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index c6d6844796bb..b6a83ad7b325 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c @@ -342,7 +342,7 @@ static int cramfs_readdir(struct file *filp, void *dirent, filldir_t filldir) /* * Lookup and fill in the inode data.. */ -static struct dentry * cramfs_lookup(struct inode *dir, struct dentry *dentry) +static struct dentry * cramfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { unsigned int offset = 0; int sorted; diff --git a/fs/devfs/base.c b/fs/devfs/base.c index c632affe5dae..5c787aaa4901 100644 --- a/fs/devfs/base.c +++ b/fs/devfs/base.c @@ -2175,7 +2175,7 @@ static struct dentry_operations devfs_dops = .d_iput = devfs_d_iput, }; -static int devfs_d_revalidate_wait (struct dentry *dentry, int flags); +static int devfs_d_revalidate_wait (struct dentry *dentry, struct nameidata *); static struct dentry_operations devfs_wait_dops = { @@ -2212,7 +2212,7 @@ struct devfs_lookup_struct /* XXX: this doesn't handle the case where we got a negative dentry but a devfs entry has been registered in the meanwhile */ -static int devfs_d_revalidate_wait (struct dentry *dentry, int flags) +static int devfs_d_revalidate_wait (struct dentry *dentry, struct nameidata *nd) { struct inode *dir = dentry->d_parent->d_inode; struct fs_info *fs_info = dir->i_sb->s_fs_info; @@ -2265,7 +2265,7 @@ static int devfs_d_revalidate_wait (struct dentry *dentry, int flags) /* Inode operations for device entries follow */ -static struct dentry *devfs_lookup (struct inode *dir, struct dentry *dentry) +static struct dentry *devfs_lookup (struct inode *dir, struct dentry *dentry, struct nameidata *nd) { struct devfs_entry tmp; /* Must stay in scope until devfsd idle again */ struct devfs_lookup_struct lookup_info; diff --git a/fs/efs/namei.c b/fs/efs/namei.c index 086630cc435a..e6c7210f0a68 100644 --- a/fs/efs/namei.c +++ b/fs/efs/namei.c @@ -57,7 +57,7 @@ static efs_ino_t efs_find_entry(struct inode *inode, const char *name, int len) return(0); } -struct dentry *efs_lookup(struct inode *dir, struct dentry *dentry) { +struct dentry *efs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { efs_ino_t inodenum; struct inode * inode = NULL; diff --git a/fs/exec.c b/fs/exec.c index ef73cbeff536..68a64ee4b234 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -117,7 +117,8 @@ asmlinkage long sys_uselib(const char __user * library) struct nameidata nd; int error; - error = user_path_walk(library, &nd); + nd.intent.open.flags = O_RDONLY; + error = __user_walk(library, LOOKUP_FOLLOW|LOOKUP_OPEN, &nd); if (error) goto out; diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index 04489df5a2e5..9b9b713c8472 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c @@ -66,7 +66,7 @@ static inline int ext2_add_nondir(struct dentry *dentry, struct inode *inode) * Methods themselves. */ -static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry) +static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) { struct inode * inode; ino_t ino; diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index ae995cad505a..cf521814314a 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -970,7 +970,7 @@ errout: } #endif -static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry) +static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) { struct inode * inode; struct ext3_dir_entry_2 * de; diff --git a/fs/freevxfs/vxfs_lookup.c b/fs/freevxfs/vxfs_lookup.c index 1f2c91676ee7..9c7f99f7bd01 100644 --- a/fs/freevxfs/vxfs_lookup.c +++ b/fs/freevxfs/vxfs_lookup.c @@ -51,7 +51,7 @@ #define VXFS_BLOCK_PER_PAGE(sbp) ((PAGE_CACHE_SIZE / (sbp)->s_blocksize)) -static struct dentry * vxfs_lookup(struct inode *, struct dentry *); +static struct dentry * vxfs_lookup(struct inode *, struct dentry *, struct nameidata *); static int vxfs_readdir(struct file *, void *, filldir_t); struct inode_operations vxfs_dir_inode_ops = { @@ -193,6 +193,7 @@ vxfs_inode_by_name(struct inode *dip, struct dentry *dp) * vxfs_lookup - lookup pathname component * @dip: dir in which we lookup * @dp: dentry we lookup + * @nd: lookup nameidata * * Description: * vxfs_lookup tries to lookup the pathname component described @@ -203,7 +204,7 @@ vxfs_inode_by_name(struct inode *dip, struct dentry *dp) * in the return pointer. */ static struct dentry * -vxfs_lookup(struct inode *dip, struct dentry *dp) +vxfs_lookup(struct inode *dip, struct dentry *dp, struct nameidata *nd) { struct inode *ip = NULL; ino_t ino; diff --git a/fs/hfs/dir_cap.c b/fs/hfs/dir_cap.c index 78da551630a4..62bbda0a6311 100644 --- a/fs/hfs/dir_cap.c +++ b/fs/hfs/dir_cap.c @@ -28,7 +28,7 @@ /*================ Forward declarations ================*/ -static struct dentry *cap_lookup(struct inode *, struct dentry *); +static struct dentry *cap_lookup(struct inode *, struct dentry *, struct nameidata *); static int cap_readdir(struct file *, void *, filldir_t); /*================ Global variables ================*/ @@ -95,7 +95,7 @@ struct inode_operations hfs_cap_rdir_inode_operations = { * inode corresponding to an entry in a directory, given the inode for * the directory and the name (and its length) of the entry. */ -static struct dentry *cap_lookup(struct inode * dir, struct dentry *dentry) +static struct dentry *cap_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) { ino_t dtype; struct hfs_name cname; diff --git a/fs/hfs/dir_dbl.c b/fs/hfs/dir_dbl.c index 36b7abd1eb6b..9ccdc5afa5d5 100644 --- a/fs/hfs/dir_dbl.c +++ b/fs/hfs/dir_dbl.c @@ -24,7 +24,7 @@ /*================ Forward declarations ================*/ -static struct dentry *dbl_lookup(struct inode *, struct dentry *); +static struct dentry *dbl_lookup(struct inode *, struct dentry *, struct nameidata *); static int dbl_readdir(struct file *, void *, filldir_t); static int dbl_create(struct inode *, struct dentry *, int); static int dbl_mkdir(struct inode *, struct dentry *, int); @@ -108,7 +108,7 @@ static int is_hdr(struct inode *dir, const char *name, int len) * the inode for the directory and the name (and its length) of the * entry. */ -static struct dentry *dbl_lookup(struct inode * dir, struct dentry *dentry) +static struct dentry *dbl_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) { struct hfs_name cname; struct hfs_cat_entry *entry; diff --git a/fs/hfs/dir_nat.c b/fs/hfs/dir_nat.c index 5dda709ebdf5..9688bcf7c145 100644 --- a/fs/hfs/dir_nat.c +++ b/fs/hfs/dir_nat.c @@ -30,7 +30,7 @@ /*================ Forward declarations ================*/ -static struct dentry *nat_lookup(struct inode *, struct dentry *); +static struct dentry *nat_lookup(struct inode *, struct dentry *, struct nameidata *); static int nat_readdir(struct file *, void *, filldir_t); static int nat_rmdir(struct inode *, struct dentry *); static int nat_hdr_unlink(struct inode *, struct dentry *); @@ -97,7 +97,7 @@ struct inode_operations hfs_nat_hdir_inode_operations = { * the inode corresponding to an entry in a directory, given the inode * for the directory and the name (and its length) of the entry. */ -static struct dentry *nat_lookup(struct inode * dir, struct dentry *dentry) +static struct dentry *nat_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) { ino_t dtype; struct hfs_name cname; diff --git a/fs/hfs/sysdep.c b/fs/hfs/sysdep.c index f0a08037ab1e..1b083b8b9a2f 100644 --- a/fs/hfs/sysdep.c +++ b/fs/hfs/sysdep.c @@ -19,7 +19,7 @@ #include #include -static int hfs_revalidate_dentry(struct dentry *, int); +static int hfs_revalidate_dentry(struct dentry *, struct nameidata *); static int hfs_hash_dentry(struct dentry *, struct qstr *); static int hfs_compare_dentry(struct dentry *, struct qstr *, struct qstr *); static void hfs_dentry_iput(struct dentry *, struct inode *); @@ -90,7 +90,7 @@ static void hfs_dentry_iput(struct dentry *dentry, struct inode *inode) iput(inode); } -static int hfs_revalidate_dentry(struct dentry *dentry, int flags) +static int hfs_revalidate_dentry(struct dentry *dentry, struct nameidata *nd) { struct inode *inode = dentry->d_inode; int diff; diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c index 1c1e10c72822..9f0a0d3b2382 100644 --- a/fs/hpfs/dir.c +++ b/fs/hpfs/dir.c @@ -198,7 +198,7 @@ out: * to tell read_inode to read fnode or not. */ -struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry) +struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { const char *name = dentry->d_name.name; unsigned len = dentry->d_name.len; diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h index 91f880e88362..2c2565358d49 100644 --- a/fs/hpfs/hpfs_fn.h +++ b/fs/hpfs/hpfs_fn.h @@ -216,7 +216,7 @@ void hpfs_set_dentry_operations(struct dentry *); int hpfs_dir_release(struct inode *, struct file *); loff_t hpfs_dir_lseek(struct file *, loff_t, int); int hpfs_readdir(struct file *, void *, filldir_t); -struct dentry *hpfs_lookup(struct inode *, struct dentry *); +struct dentry *hpfs_lookup(struct inode *, struct dentry *, struct nameidata *); /* dnode.c */ diff --git a/fs/intermezzo/dcache.c b/fs/intermezzo/dcache.c index 2d3ebd2a7611..91cd4d94b5e6 100644 --- a/fs/intermezzo/dcache.c +++ b/fs/intermezzo/dcache.c @@ -50,7 +50,7 @@ kmem_cache_t * presto_dentry_slab; /* called when a cache lookup succeeds */ -static int presto_d_revalidate(struct dentry *de, int flag) +static int presto_d_revalidate(struct dentry *de, struct nameidata *nd) { struct inode *inode = de->d_inode; struct presto_file_set * root_fset; diff --git a/fs/intermezzo/dir.c b/fs/intermezzo/dir.c index 0446fb4dc174..e7b22dd30a16 100644 --- a/fs/intermezzo/dir.c +++ b/fs/intermezzo/dir.c @@ -239,7 +239,7 @@ struct dentry *presto_add_ilookup_dentry(struct dentry *parent, return de; } -struct dentry *presto_lookup(struct inode * dir, struct dentry *dentry) +struct dentry *presto_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) { int rc = 0; struct dentry *de; @@ -286,7 +286,7 @@ struct dentry *presto_lookup(struct inode * dir, struct dentry *dentry) (dir, dentry, ino, generation); is_ilookup = 1; } else - de = iops->lookup(dir, dentry); + de = iops->lookup(dir, dentry, nd); #if 0 } #endif diff --git a/fs/intermezzo/intermezzo_fs.h b/fs/intermezzo/intermezzo_fs.h index 8d2d33fcee0e..3a7c60be8f26 100644 --- a/fs/intermezzo/intermezzo_fs.h +++ b/fs/intermezzo/intermezzo_fs.h @@ -370,7 +370,7 @@ extern int presto_ilookup_uid; # define PRESTO_ILOOKUP_MAGIC "...ino:" # define PRESTO_ILOOKUP_SEP ':' int izo_dentry_is_ilookup(struct dentry *, ino_t *id, unsigned int *generation); -struct dentry *presto_lookup(struct inode * dir, struct dentry *dentry); +struct dentry *presto_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd); struct presto_dentry_data { int dd_count; /* how mnay dentries are using this dentry */ diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c index 840cb90d4897..8d525f6bf606 100644 --- a/fs/isofs/namei.c +++ b/fs/isofs/namei.c @@ -158,7 +158,7 @@ isofs_find_entry(struct inode *dir, struct dentry *dentry, return 0; } -struct dentry *isofs_lookup(struct inode * dir, struct dentry * dentry) +struct dentry *isofs_lookup(struct inode * dir, struct dentry * dentry, struct nameidata *nd) { unsigned long ino; struct inode *inode; diff --git a/fs/jffs/inode-v23.c b/fs/jffs/inode-v23.c index e7e6d5442774..141fadbf8438 100644 --- a/fs/jffs/inode-v23.c +++ b/fs/jffs/inode-v23.c @@ -642,7 +642,7 @@ jffs_readdir(struct file *filp, void *dirent, filldir_t filldir) /* Find a file in a directory. If the file exists, return its corresponding dentry. */ static struct dentry * -jffs_lookup(struct inode *dir, struct dentry *dentry) +jffs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { struct jffs_file *d; struct jffs_file *f; diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c index 61d1b71bc20c..65dd67235f61 100644 --- a/fs/jffs2/dir.c +++ b/fs/jffs2/dir.c @@ -33,7 +33,7 @@ typedef dev_t mknod_arg_t; static int jffs2_readdir (struct file *, void *, filldir_t); static int jffs2_create (struct inode *,struct dentry *,int); -static struct dentry *jffs2_lookup (struct inode *,struct dentry *); +static struct dentry *jffs2_lookup (struct inode *,struct dentry *, struct nameidata *); static int jffs2_link (struct dentry *,struct inode *,struct dentry *); static int jffs2_unlink (struct inode *,struct dentry *); static int jffs2_symlink (struct inode *,struct dentry *,const char *); @@ -73,7 +73,7 @@ struct inode_operations jffs2_dir_inode_operations = and we use the same hash function as the dentries. Makes this nice and simple */ -static struct dentry *jffs2_lookup(struct inode *dir_i, struct dentry *target) +static struct dentry *jffs2_lookup(struct inode *dir_i, struct dentry *target, struct nameidata *nd) { struct jffs2_inode_info *dir_f; struct jffs2_sb_info *c; diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index fd1467521794..b4aa9941a51d 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -1373,7 +1373,7 @@ int jfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev) return -rc; } -static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry) +static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry, struct nameidata *nd) { struct btstack btstack; ino_t inum; diff --git a/fs/libfs.c b/fs/libfs.c index 62fb3c0fbc24..884da83cf77a 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -29,7 +29,7 @@ int simple_statfs(struct super_block *sb, struct kstatfs *buf) * exist, we know it is negative. */ -struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry) +struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { d_add(dentry, NULL); return NULL; diff --git a/fs/minix/namei.c b/fs/minix/namei.c index d2b9ae264ce1..007fb7786236 100644 --- a/fs/minix/namei.c +++ b/fs/minix/namei.c @@ -54,7 +54,7 @@ struct dentry_operations minix_dentry_operations = { .d_hash = minix_hash, }; -static struct dentry *minix_lookup(struct inode * dir, struct dentry *dentry) +static struct dentry *minix_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) { struct inode * inode = NULL; ino_t ino; diff --git a/fs/msdos/namei.c b/fs/msdos/namei.c index 31eb0d076c1f..f0651cd1b996 100644 --- a/fs/msdos/namei.c +++ b/fs/msdos/namei.c @@ -193,7 +193,7 @@ static struct dentry_operations msdos_dentry_operations = { */ /***** Get inode using directory and name */ -struct dentry *msdos_lookup(struct inode *dir,struct dentry *dentry) +struct dentry *msdos_lookup(struct inode *dir,struct dentry *dentry, struct nameidata *nd) { struct super_block *sb = dir->i_sb; struct inode *inode = NULL; diff --git a/fs/namei.c b/fs/namei.c index 8c847a1963f8..a04cf1aaceb2 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -273,7 +273,7 @@ void path_release(struct nameidata *nd) * Internal lookup() using the new generic dcache. * SMP-safe */ -static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, int flags) +static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, struct nameidata *nd) { struct dentry * dentry = __d_lookup(parent, name); @@ -284,7 +284,7 @@ static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, dentry = d_lookup(parent, name); if (dentry && dentry->d_op && dentry->d_op->d_revalidate) { - if (!dentry->d_op->d_revalidate(dentry, flags) && !d_invalidate(dentry)) { + if (!dentry->d_op->d_revalidate(dentry, nd) && !d_invalidate(dentry)) { dput(dentry); dentry = NULL; } @@ -336,7 +336,7 @@ ok: * make sure that nobody added the entry to the dcache in the meantime.. * SMP-safe */ -static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, int flags) +static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, struct nameidata *nd) { struct dentry * result; struct inode *dir = parent->d_inode; @@ -361,7 +361,7 @@ static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, i struct dentry * dentry = d_alloc(parent, name); result = ERR_PTR(-ENOMEM); if (dentry) { - result = dir->i_op->lookup(dir, dentry); + result = dir->i_op->lookup(dir, dentry, nd); if (result) dput(dentry); else @@ -377,7 +377,7 @@ static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, i */ up(&dir->i_sem); if (result->d_op && result->d_op->d_revalidate) { - if (!result->d_op->d_revalidate(result, flags) && !d_invalidate(result)) { + if (!result->d_op->d_revalidate(result, nd) && !d_invalidate(result)) { dput(result); result = ERR_PTR(-ENOENT); } @@ -524,7 +524,7 @@ struct path { * It _is_ time-critical. */ static int do_lookup(struct nameidata *nd, struct qstr *name, - struct path *path, int flags) + struct path *path) { struct vfsmount *mnt = nd->mnt; struct dentry *dentry = __d_lookup(nd->dentry, name); @@ -539,13 +539,13 @@ done: return 0; need_lookup: - dentry = real_lookup(nd->dentry, name, LOOKUP_CONTINUE); + dentry = real_lookup(nd->dentry, name, nd); if (IS_ERR(dentry)) goto fail; goto done; need_revalidate: - if (dentry->d_op->d_revalidate(dentry, flags)) + if (dentry->d_op->d_revalidate(dentry, nd)) goto done; if (d_invalidate(dentry)) goto done; @@ -638,8 +638,9 @@ int link_path_walk(const char * name, struct nameidata *nd) if (err < 0) break; } + nd->flags |= LOOKUP_CONTINUE; /* This does the actual lookups.. */ - err = do_lookup(nd, &this, &next, LOOKUP_CONTINUE); + err = do_lookup(nd, &this, &next); if (err) break; /* Check mountpoints.. */ @@ -681,6 +682,7 @@ int link_path_walk(const char * name, struct nameidata *nd) last_with_slashes: lookup_flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY; last_component: + nd->flags &= ~LOOKUP_CONTINUE; if (lookup_flags & LOOKUP_PARENT) goto lookup_parent; if (this.name[0] == '.') switch (this.len) { @@ -700,7 +702,7 @@ last_component: if (err < 0) break; } - err = do_lookup(nd, &this, &next, 0); + err = do_lookup(nd, &this, &next); if (err) break; follow_mount(&next.mnt, &next.dentry); @@ -769,6 +771,7 @@ static int __emul_lookup_dentry(const char *name, struct nameidata *nd) */ nd_root.last_type = LAST_ROOT; nd_root.flags = nd->flags; + memcpy(&nd_root.intent, &nd->intent, sizeof(nd_root.intent)); read_lock(¤t->fs->lock); nd_root.mnt = mntget(current->fs->rootmnt); nd_root.dentry = dget(current->fs->root); @@ -866,7 +869,7 @@ int path_lookup(const char *name, unsigned int flags, struct nameidata *nd) * needs parent already locked. Doesn't follow mounts. * SMP-safe. */ -struct dentry * lookup_hash(struct qstr *name, struct dentry * base) +static struct dentry * __lookup_hash(struct qstr *name, struct dentry * base, struct nameidata *nd) { struct dentry * dentry; struct inode *inode; @@ -889,13 +892,13 @@ struct dentry * lookup_hash(struct qstr *name, struct dentry * base) goto out; } - dentry = cached_lookup(base, name, 0); + dentry = cached_lookup(base, name, nd); if (!dentry) { struct dentry *new = d_alloc(base, name); dentry = ERR_PTR(-ENOMEM); if (!new) goto out; - dentry = inode->i_op->lookup(inode, new); + dentry = inode->i_op->lookup(inode, new, nd); if (!dentry) dentry = new; else @@ -905,6 +908,11 @@ out: return dentry; } +struct dentry * lookup_hash(struct qstr *name, struct dentry * base) +{ + return __lookup_hash(name, base, NULL); +} + /* SMP-safe */ struct dentry * lookup_one_len(const char * name, struct dentry * base, int len) { @@ -1222,11 +1230,15 @@ int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd) if (flag & O_APPEND) acc_mode |= MAY_APPEND; + /* Fill in the open() intent data */ + nd->intent.open.flags = flag; + nd->intent.open.create_mode = mode; + /* * The simplest case - just a plain lookup. */ if (!(flag & O_CREAT)) { - error = path_lookup(pathname, lookup_flags(flag), nd); + error = path_lookup(pathname, lookup_flags(flag)|LOOKUP_OPEN, nd); if (error) return error; dentry = nd->dentry; @@ -1236,7 +1248,7 @@ int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd) /* * Create - we need to know the parent. */ - error = path_lookup(pathname, LOOKUP_PARENT, nd); + error = path_lookup(pathname, LOOKUP_PARENT|LOOKUP_OPEN|LOOKUP_CREATE, nd); if (error) return error; @@ -1250,8 +1262,9 @@ int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd) goto exit; dir = nd->dentry; + nd->flags &= ~LOOKUP_PARENT; down(&dir->d_inode->i_sem); - dentry = lookup_hash(&nd->last, nd->dentry); + dentry = __lookup_hash(&nd->last, nd->dentry, nd); do_last: error = PTR_ERR(dentry); @@ -1354,7 +1367,7 @@ do_link: } dir = nd->dentry; down(&dir->d_inode->i_sem); - dentry = lookup_hash(&nd->last, nd->dentry); + dentry = __lookup_hash(&nd->last, nd->dentry, nd); putname(nd->last.name); goto do_last; } @@ -1368,6 +1381,7 @@ static struct dentry *lookup_create(struct nameidata *nd, int is_dir) dentry = ERR_PTR(-EEXIST); if (nd->last_type != LAST_NORM) goto fail; + nd->flags &= ~LOOKUP_PARENT; dentry = lookup_hash(&nd->last, nd->dentry); if (IS_ERR(dentry)) goto fail; diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index d4577dc7a551..d695f6db5baa 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -35,7 +35,7 @@ static void ncp_do_readdir(struct file *, void *, filldir_t, static int ncp_readdir(struct file *, void *, filldir_t); static int ncp_create(struct inode *, struct dentry *, int); -static struct dentry *ncp_lookup(struct inode *, struct dentry *); +static struct dentry *ncp_lookup(struct inode *, struct dentry *, struct nameidata *); static int ncp_unlink(struct inode *, struct dentry *); static int ncp_mkdir(struct inode *, struct dentry *, int); static int ncp_rmdir(struct inode *, struct dentry *); @@ -72,7 +72,7 @@ struct inode_operations ncp_dir_inode_operations = /* * Dentry operations routines */ -static int ncp_lookup_validate(struct dentry *, int); +static int ncp_lookup_validate(struct dentry *, struct nameidata *); static int ncp_hash_dentry(struct dentry *, struct qstr *); static int ncp_compare_dentry (struct dentry *, struct qstr *, struct qstr *); static int ncp_delete_dentry(struct dentry *); @@ -264,7 +264,7 @@ leave_me:; static int -__ncp_lookup_validate(struct dentry * dentry, int flags) +__ncp_lookup_validate(struct dentry * dentry, struct nameidata *nd) { struct ncp_server *server; struct dentry *parent; @@ -333,11 +333,11 @@ finished: } static int -ncp_lookup_validate(struct dentry * dentry, int flags) +ncp_lookup_validate(struct dentry * dentry, struct nameidata *nd) { int res; lock_kernel(); - res = __ncp_lookup_validate(dentry, flags); + res = __ncp_lookup_validate(dentry, nd); unlock_kernel(); return res; } @@ -797,7 +797,7 @@ out: return result; } -static struct dentry *ncp_lookup(struct inode *dir, struct dentry *dentry) +static struct dentry *ncp_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { struct ncp_server *server = NCP_SERVER(dir); struct inode *inode = NULL; diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index d5499baadd1c..abf189a02e50 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -37,7 +37,7 @@ static int nfs_opendir(struct inode *, struct file *); static int nfs_readdir(struct file *, void *, filldir_t); -static struct dentry *nfs_lookup(struct inode *, struct dentry *); +static struct dentry *nfs_lookup(struct inode *, struct dentry *, struct nameidata *); static int nfs_cached_lookup(struct inode *, struct dentry *, struct nfs_fh *, struct nfs_fattr *); static int nfs_create(struct inode *, struct dentry *, int); @@ -515,7 +515,7 @@ static inline int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry) * If the parent directory is seen to have changed, we throw out the * cached dentry and do a new lookup. */ -static int nfs_lookup_revalidate(struct dentry * dentry, int flags) +static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd) { struct inode *dir; struct inode *inode; @@ -630,7 +630,7 @@ struct dentry_operations nfs_dentry_operations = { .d_iput = nfs_dentry_iput, }; -static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry) +static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) { struct inode *inode = NULL; int error; diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c index 84d43247ba6e..a8c6e8a4e3b8 100644 --- a/fs/ntfs/namei.c +++ b/fs/ntfs/namei.c @@ -29,6 +29,7 @@ * ntfs_lookup - find the inode represented by a dentry in a directory inode * @dir_ino: directory inode in which to look for the inode * @dent: dentry representing the inode to look for + * @nd: lookup nameidata * * In short, ntfs_lookup() looks for the inode represented by the dentry @dent * in the directory inode @dir_ino and if found attaches the inode to the @@ -87,7 +88,7 @@ * name. We then convert the name to the current NLS code page, and proceed * searching for a dentry with this name, etc, as in case 2), above. */ -static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent) +static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent, struct nameidata *nd) { ntfs_volume *vol = NTFS_SB(dir_ino->i_sb); struct inode *dent_inode; diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c index 5a1fb89449be..3c11c87e2f22 100644 --- a/fs/openpromfs/inode.c +++ b/fs/openpromfs/inode.c @@ -61,7 +61,7 @@ static char *alias_names [ALIASES_NNODES]; static int openpromfs_create (struct inode *, struct dentry *, int); static int openpromfs_readdir(struct file *, void *, filldir_t); -static struct dentry *openpromfs_lookup(struct inode *, struct dentry *dentry); +static struct dentry *openpromfs_lookup(struct inode *, struct dentry *dentry, struct nameidata *nd); static int openpromfs_unlink (struct inode *, struct dentry *dentry); static ssize_t nodenum_read(struct file *file, char *buf, @@ -639,7 +639,7 @@ static int lookup_children(u16 n, const char * name, int len) return 0; } -static struct dentry *openpromfs_lookup(struct inode * dir, struct dentry *dentry) +static struct dentry *openpromfs_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) { int ino = 0; #define OPFSL_DIR 0 diff --git a/fs/proc/base.c b/fs/proc/base.c index e843c6584cc9..3d05ee25f8e9 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -864,7 +864,7 @@ out_unlock: * directory. In this case, however, we can do it - no aliasing problems * due to the way we treat inodes. */ -static int pid_revalidate(struct dentry * dentry, int flags) +static int pid_revalidate(struct dentry * dentry, struct nameidata *nd) { if (pid_alive(proc_task(dentry->d_inode))) return 1; @@ -872,7 +872,7 @@ static int pid_revalidate(struct dentry * dentry, int flags) return 0; } -static int pid_fd_revalidate(struct dentry * dentry, int flags) +static int pid_fd_revalidate(struct dentry * dentry, struct nameidata *nd) { struct task_struct *task = proc_task(dentry->d_inode); int fd = proc_type(dentry->d_inode) - PROC_PID_FD_DIR; @@ -961,7 +961,7 @@ out: } /* SMP-safe */ -static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry) +static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, struct nameidata *nd) { struct task_struct *task = proc_task(dir); unsigned fd = name_to_int(dentry); @@ -1219,7 +1219,7 @@ out: return ERR_PTR(error); } -static struct dentry *proc_base_lookup(struct inode *dir, struct dentry *dentry){ +static struct dentry *proc_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){ return proc_pident_lookup(dir, dentry, base_stuff); } @@ -1326,7 +1326,7 @@ void proc_pid_flush(struct dentry *proc_dentry) } /* SMP-safe */ -struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry) +struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) { struct task_struct *task; struct inode *inode; diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 6f658ceafc3a..979237c72966 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -336,7 +336,7 @@ static struct dentry_operations proc_dentry_operations = * Don't create negative dentries here, return -ENOENT by hand * instead. */ -struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry) +struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) { struct inode *inode = NULL; struct proc_dir_entry * de; diff --git a/fs/proc/root.c b/fs/proc/root.c index fb40f8c53cb4..936962d01c28 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -79,7 +79,7 @@ void __init proc_root_init(void) proc_bus = proc_mkdir("bus", 0); } -static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentry) +static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentry, struct nameidata *nd) { /* * nr_threads is actually protected by the tasklist_lock; @@ -89,11 +89,11 @@ static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentr if (dir->i_ino == PROC_ROOT_INO) /* check for safety... */ dir->i_nlink = proc_root.nlink + nr_threads; - if (!proc_lookup(dir, dentry)) { + if (!proc_lookup(dir, dentry, nd)) { return NULL; } - return proc_pid_lookup(dir, dentry); + return proc_pid_lookup(dir, dentry, nd); } static int proc_root_readdir(struct file * filp, diff --git a/fs/qnx4/namei.c b/fs/qnx4/namei.c index 0a7592c5b958..12e423ae6de0 100644 --- a/fs/qnx4/namei.c +++ b/fs/qnx4/namei.c @@ -107,7 +107,7 @@ static struct buffer_head *qnx4_find_entry(int len, struct inode *dir, return NULL; } -struct dentry * qnx4_lookup(struct inode *dir, struct dentry *dentry) +struct dentry * qnx4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { int ino; struct qnx4_inode_entry *de; diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c index 46bc6549577d..18a3353274c4 100644 --- a/fs/reiserfs/namei.c +++ b/fs/reiserfs/namei.c @@ -316,7 +316,7 @@ static int reiserfs_find_entry (struct inode * dir, const char * name, int namel } -static struct dentry * reiserfs_lookup (struct inode * dir, struct dentry * dentry) +static struct dentry * reiserfs_lookup (struct inode * dir, struct dentry * dentry, struct nameidata *nd) { int retval; struct inode * inode = NULL; diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c index fb60389d42fc..24cd428a521e 100644 --- a/fs/romfs/inode.c +++ b/fs/romfs/inode.c @@ -329,7 +329,7 @@ out: } static struct dentry * -romfs_lookup(struct inode *dir, struct dentry *dentry) +romfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { unsigned long offset, maxoff; int fslen, res; diff --git a/fs/smbfs/dir.c b/fs/smbfs/dir.c index af4b42beab8f..081402f119dc 100644 --- a/fs/smbfs/dir.c +++ b/fs/smbfs/dir.c @@ -24,7 +24,7 @@ static int smb_readdir(struct file *, void *, filldir_t); static int smb_dir_open(struct inode *, struct file *); -static struct dentry *smb_lookup(struct inode *, struct dentry *); +static struct dentry *smb_lookup(struct inode *, struct dentry *, struct nameidata *); static int smb_create(struct inode *, struct dentry *, int); static int smb_mkdir(struct inode *, struct dentry *, int); static int smb_rmdir(struct inode *, struct dentry *); @@ -268,7 +268,7 @@ smb_dir_open(struct inode *dir, struct file *file) /* * Dentry operations routines */ -static int smb_lookup_validate(struct dentry *, int); +static int smb_lookup_validate(struct dentry *, struct nameidata *); static int smb_hash_dentry(struct dentry *, struct qstr *); static int smb_compare_dentry(struct dentry *, struct qstr *, struct qstr *); static int smb_delete_dentry(struct dentry *); @@ -292,7 +292,7 @@ static struct dentry_operations smbfs_dentry_operations_case = * This is the callback when the dcache has a lookup hit. */ static int -smb_lookup_validate(struct dentry * dentry, int flags) +smb_lookup_validate(struct dentry * dentry, struct nameidata *nd) { struct smb_sb_info *server = server_from_dentry(dentry); struct inode * inode = dentry->d_inode; @@ -420,7 +420,7 @@ smb_renew_times(struct dentry * dentry) } static struct dentry * -smb_lookup(struct inode *dir, struct dentry *dentry) +smb_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { struct smb_fattr finfo; struct inode *inode; diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c index f2988f107696..4c0eb5730065 100644 --- a/fs/sysv/namei.c +++ b/fs/sysv/namei.c @@ -64,7 +64,7 @@ struct dentry_operations sysv_dentry_operations = { .d_hash = sysv_hash, }; -static struct dentry *sysv_lookup(struct inode * dir, struct dentry * dentry) +static struct dentry *sysv_lookup(struct inode * dir, struct dentry * dentry, struct nameidata *nd) { struct inode * inode = NULL; ino_t ino; diff --git a/fs/udf/namei.c b/fs/udf/namei.c index b5be4880deac..7881ffbbd82e 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -289,6 +289,7 @@ udf_find_entry(struct inode *dir, struct dentry *dentry, * PRE-CONDITIONS * dir Pointer to inode of parent directory. * dentry Pointer to dentry to complete. + * nd Pointer to lookup nameidata * * POST-CONDITIONS * Zero on success. @@ -299,7 +300,7 @@ udf_find_entry(struct inode *dir, struct dentry *dentry, */ static struct dentry * -udf_lookup(struct inode *dir, struct dentry *dentry) +udf_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { struct inode *inode = NULL; struct fileIdentDesc cfi, *fi; diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c index 24c6c5d2938d..55496ec96e56 100644 --- a/fs/ufs/namei.c +++ b/fs/ufs/namei.c @@ -62,7 +62,7 @@ static inline int ufs_add_nondir(struct dentry *dentry, struct inode *inode) return err; } -static struct dentry *ufs_lookup(struct inode * dir, struct dentry *dentry) +static struct dentry *ufs_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) { struct inode * inode = NULL; ino_t ino; diff --git a/fs/umsdos/dir.c b/fs/umsdos/dir.c index befb7545f53c..775f02021128 100644 --- a/fs/umsdos/dir.c +++ b/fs/umsdos/dir.c @@ -30,7 +30,7 @@ extern struct inode *pseudo_root; */ /* nothing for now ... */ -static int umsdos_dentry_validate(struct dentry *dentry, int flags) +static int umsdos_dentry_validate(struct dentry *dentry, struct nameidata *nd) { return 1; } @@ -564,7 +564,7 @@ out_remove: * Called by VFS; should fill dentry->d_inode via d_add. */ -struct dentry *UMSDOS_lookup (struct inode *dir, struct dentry *dentry) +struct dentry *UMSDOS_lookup (struct inode *dir, struct dentry *dentry, struct nameidata *nd) { struct dentry *ret; diff --git a/fs/umsdos/rdir.c b/fs/umsdos/rdir.c index d4ac89d1e668..2f32539b1a37 100644 --- a/fs/umsdos/rdir.c +++ b/fs/umsdos/rdir.c @@ -101,7 +101,7 @@ struct dentry *umsdos_rlookup_x ( struct inode *dir, struct dentry *dentry, int goto out; } - ret = msdos_lookup (dir, dentry); + ret = msdos_lookup (dir, dentry, NULL); if (ret) { printk(KERN_WARNING "umsdos_rlookup_x: %s/%s failed, ret=%ld\n", @@ -129,7 +129,7 @@ out: } -struct dentry *UMSDOS_rlookup ( struct inode *dir, struct dentry *dentry) +struct dentry *UMSDOS_rlookup ( struct inode *dir, struct dentry *dentry, struct nameidata *nd) { return umsdos_rlookup_x (dir, dentry, 0); } diff --git a/fs/vfat/namei.c b/fs/vfat/namei.c index 1f83a9d77e8a..04f6754fe235 100644 --- a/fs/vfat/namei.c +++ b/fs/vfat/namei.c @@ -45,7 +45,7 @@ static int vfat_hashi(struct dentry *parent, struct qstr *qstr); static int vfat_hash(struct dentry *parent, struct qstr *qstr); static int vfat_cmpi(struct dentry *dentry, struct qstr *a, struct qstr *b); static int vfat_cmp(struct dentry *dentry, struct qstr *a, struct qstr *b); -static int vfat_revalidate(struct dentry *dentry, int); +static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd); static struct dentry_operations vfat_dentry_ops[4] = { { @@ -68,7 +68,7 @@ static struct dentry_operations vfat_dentry_ops[4] = { } }; -static int vfat_revalidate(struct dentry *dentry, int flags) +static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd) { PRINTK1(("vfat_revalidate: %s\n", dentry->d_name.name)); spin_lock(&dcache_lock); @@ -860,7 +860,7 @@ static int vfat_find(struct inode *dir,struct qstr* qname, return res ? res : -ENOENT; } -struct dentry *vfat_lookup(struct inode *dir,struct dentry *dentry) +struct dentry *vfat_lookup(struct inode *dir,struct dentry *dentry, struct nameidata *nd) { int res; struct vfat_slot_info sinfo; diff --git a/fs/xfs/linux/xfs_iops.c b/fs/xfs/linux/xfs_iops.c index 14d393eb6d2e..c4ff85065f41 100644 --- a/fs/xfs/linux/xfs_iops.c +++ b/fs/xfs/linux/xfs_iops.c @@ -192,7 +192,8 @@ linvfs_mkdir( STATIC struct dentry * linvfs_lookup( struct inode *dir, - struct dentry *dentry) + struct dentry *dentry, + struct nameidata *nd) { struct inode *ip = NULL; vnode_t *vp, *cvp = NULL; diff --git a/include/linux/affs_fs.h b/include/linux/affs_fs.h index 47ed05c8b744..837fe37aba93 100644 --- a/include/linux/affs_fs.h +++ b/include/linux/affs_fs.h @@ -41,7 +41,7 @@ extern int affs_init_bitmap(struct super_block *sb); /* namei.c */ extern int affs_hash_name(struct super_block *sb, const u8 *name, unsigned int len); -extern struct dentry *affs_lookup(struct inode *dir, struct dentry *dentry); +extern struct dentry *affs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *); extern int affs_unlink(struct inode *dir, struct dentry *dentry); extern int affs_create(struct inode *dir, struct dentry *dentry, int mode); extern int affs_mkdir(struct inode *dir, struct dentry *dentry, int mode); diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 1442779bd893..a25d9f0443a4 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -10,6 +10,7 @@ #include #include +struct nameidata; struct vfsmount; /* @@ -106,7 +107,7 @@ struct dentry { #define DNAME_INLINE_LEN (sizeof(struct dentry)-offsetof(struct dentry,d_iname)) struct dentry_operations { - int (*d_revalidate)(struct dentry *, int); + int (*d_revalidate)(struct dentry *, struct nameidata *); int (*d_hash) (struct dentry *, struct qstr *); int (*d_compare) (struct dentry *, struct qstr *, struct qstr *); int (*d_delete)(struct dentry *); diff --git a/include/linux/efs_fs.h b/include/linux/efs_fs.h index c78e9c2a7b3a..1640eb875d4e 100644 --- a/include/linux/efs_fs.h +++ b/include/linux/efs_fs.h @@ -46,7 +46,7 @@ extern int efs_statfs(struct super_block *, struct kstatfs *); extern void efs_read_inode(struct inode *); extern efs_block_t efs_map_block(struct inode *, efs_block_t); -extern struct dentry *efs_lookup(struct inode *, struct dentry *); +extern struct dentry *efs_lookup(struct inode *, struct dentry *, struct nameidata *); extern int efs_bmap(struct inode *, int); #endif /* __EFS_FS_H__ */ diff --git a/include/linux/fs.h b/include/linux/fs.h index c3bda88631bc..3ddf4c4edfb7 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -731,7 +731,7 @@ struct file_operations { struct inode_operations { int (*create) (struct inode *,struct dentry *,int); - struct dentry * (*lookup) (struct inode *,struct dentry *); + struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameidata *); int (*link) (struct dentry *,struct inode *,struct dentry *); int (*unlink) (struct inode *,struct dentry *); int (*symlink) (struct inode *,struct dentry *,const char *); @@ -1291,7 +1291,7 @@ extern int simple_prepare_write(struct file *file, struct page *page, extern int simple_commit_write(struct file *file, struct page *page, unsigned offset, unsigned to); -extern struct dentry *simple_lookup(struct inode *, struct dentry *); +extern struct dentry *simple_lookup(struct inode *, struct dentry *, struct nameidata *); extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *); extern struct file_operations simple_dir_operations; extern struct inode_operations simple_dir_inode_operations; diff --git a/include/linux/iso_fs.h b/include/linux/iso_fs.h index 4763b595287a..223f161da018 100644 --- a/include/linux/iso_fs.h +++ b/include/linux/iso_fs.h @@ -227,7 +227,7 @@ extern int isofs_name_translate(struct iso_directory_record *, char *, struct in int get_joliet_filename(struct iso_directory_record *, unsigned char *, struct inode *); int get_acorn_filename(struct iso_directory_record *, char *, struct inode *); -extern struct dentry *isofs_lookup(struct inode *, struct dentry *); +extern struct dentry *isofs_lookup(struct inode *, struct dentry *, struct nameidata *); extern struct buffer_head *isofs_bread(struct inode *, sector_t); extern int isofs_get_blocks(struct inode *, sector_t, struct buffer_head **, unsigned long); diff --git a/include/linux/msdos_fs.h b/include/linux/msdos_fs.h index 4268ed112436..d1b00ab916ce 100644 --- a/include/linux/msdos_fs.h +++ b/include/linux/msdos_fs.h @@ -307,7 +307,7 @@ extern int fat_scan(struct inode *dir, const char *name, struct msdos_dir_entry **res_de, loff_t *i_pos); /* msdos/namei.c - these are for Umsdos */ -extern struct dentry *msdos_lookup(struct inode *dir, struct dentry *); +extern struct dentry *msdos_lookup(struct inode *dir, struct dentry *, struct nameidata *); extern int msdos_create(struct inode *dir, struct dentry *dentry, int mode); extern int msdos_rmdir(struct inode *dir, struct dentry *dentry); extern int msdos_mkdir(struct inode *dir, struct dentry *dentry, int mode); @@ -317,7 +317,7 @@ extern int msdos_rename(struct inode *old_dir, struct dentry *old_dentry, extern int msdos_fill_super(struct super_block *sb, void *data, int silent); /* vfat/namei.c - these are for dmsdos */ -extern struct dentry *vfat_lookup(struct inode *dir, struct dentry *); +extern struct dentry *vfat_lookup(struct inode *dir, struct dentry *, struct nameidata *); extern int vfat_create(struct inode *dir, struct dentry *dentry, int mode); extern int vfat_rmdir(struct inode *dir, struct dentry *dentry); extern int vfat_unlink(struct inode *dir, struct dentry *dentry); diff --git a/include/linux/namei.h b/include/linux/namei.h index 16baf5cdb9c7..256ceac1fc69 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -5,12 +5,22 @@ struct vfsmount; +struct open_intent { + int flags; + int create_mode; +}; + struct nameidata { struct dentry *dentry; struct vfsmount *mnt; struct qstr last; unsigned int flags; int last_type; + + /* Intent data */ + union { + struct open_intent open; + } intent; }; /* @@ -31,7 +41,11 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND}; #define LOOKUP_CONTINUE 4 #define LOOKUP_PARENT 16 #define LOOKUP_NOALT 32 - +/* + * Intent data + */ +#define LOOKUP_OPEN (0x0100) +#define LOOKUP_CREATE (0x0200) extern int FASTCALL(__user_walk(const char __user *, unsigned, struct nameidata *)); #define user_path_walk(name,nd) \ diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index c0144a1ba4cb..e2e54ee6186d 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -92,7 +92,7 @@ extern struct proc_dir_entry *proc_root_kcore; extern void proc_root_init(void); extern void proc_misc_init(void); -struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry); +struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *); struct dentry *proc_pid_unhash(struct task_struct *p); void proc_pid_flush(struct dentry *proc_dentry); int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir); @@ -115,7 +115,7 @@ extern int proc_match(int, const char *,struct proc_dir_entry *); * of the /proc/ subdirectories. */ extern int proc_readdir(struct file *, void *, filldir_t); -extern struct dentry *proc_lookup(struct inode *, struct dentry *); +extern struct dentry *proc_lookup(struct inode *, struct dentry *, struct nameidata *); extern struct file_operations proc_kcore_operations; extern struct file_operations proc_kmsg_operations; diff --git a/include/linux/qnx4_fs.h b/include/linux/qnx4_fs.h index 5710620989d5..2aa7a7ca3d54 100644 --- a/include/linux/qnx4_fs.h +++ b/include/linux/qnx4_fs.h @@ -110,7 +110,7 @@ struct qnx4_inode_info { struct inode vfs_inode; }; -extern struct dentry *qnx4_lookup(struct inode *dir, struct dentry *dentry); +extern struct dentry *qnx4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd); extern unsigned long qnx4_count_free_blocks(struct super_block *sb); extern unsigned long qnx4_block_map(struct inode *inode, long iblock); diff --git a/include/linux/umsdos_fs.p b/include/linux/umsdos_fs.p index 7034b7eb6b16..1c284c5a7aec 100644 --- a/include/linux/umsdos_fs.p +++ b/include/linux/umsdos_fs.p @@ -10,7 +10,7 @@ char * umsdos_d_path(struct dentry *, char *, int); void umsdos_lookup_patch_new(struct dentry *, struct umsdos_info *); int umsdos_is_pseudodos (struct inode *dir, struct dentry *dentry); struct dentry *umsdos_lookup_x ( struct inode *dir, struct dentry *dentry, int nopseudo); -struct dentry *UMSDOS_lookup(struct inode *, struct dentry *); +struct dentry *UMSDOS_lookup(struct inode *, struct dentry *, struct nameidata *); struct dentry *umsdos_lookup_dentry(struct dentry *, char *, int, int); struct dentry *umsdos_covered(struct dentry *, char *, int); @@ -92,7 +92,7 @@ int UMSDOS_rename (struct inode *old_dir, /* rdir.c 22/03/95 03.31.42 */ struct dentry *umsdos_rlookup_x (struct inode *dir, struct dentry *dentry, int nopseudo); -struct dentry *UMSDOS_rlookup (struct inode *dir, struct dentry *dentry); +struct dentry *UMSDOS_rlookup (struct inode *dir, struct dentry *dentry, struct nameidata *nd); static inline struct umsdos_inode_info *UMSDOS_I(struct inode *inode) { -- cgit v1.2.3 From 675b5da0145fb4b54c4d9f6ea0106df844e4f75d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 3 Jul 2003 22:06:21 -0700 Subject: [PATCH] Pass 'nameidata' to ->create() - Make the VFS pass the struct nameidata as an optional argument to the create inode operation. - Patch vfs_create() to take a struct nameidata as an optional argument. --- fs/affs/namei.c | 2 +- fs/bfs/dir.c | 3 ++- fs/cifs/cifsfs.h | 2 +- fs/cifs/dir.c | 3 ++- fs/coda/dir.c | 4 ++-- fs/ext2/namei.c | 2 +- fs/ext3/namei.c | 3 ++- fs/hfs/dir.c | 2 +- fs/hfs/dir_dbl.c | 6 +++--- fs/hpfs/hpfs_fn.h | 2 +- fs/hpfs/namei.c | 2 +- fs/hugetlbfs/inode.c | 2 +- fs/intermezzo/dir.c | 3 ++- fs/intermezzo/vfs.c | 2 +- fs/jffs/inode-v23.c | 3 ++- fs/jffs2/dir.c | 5 +++-- fs/jfs/namei.c | 4 +++- fs/minix/namei.c | 3 ++- fs/msdos/namei.c | 3 ++- fs/namei.c | 9 +++++---- fs/ncpfs/dir.c | 5 +++-- fs/nfs/dir.c | 5 +++-- fs/nfsd/vfs.c | 4 ++-- fs/openpromfs/inode.c | 5 +++-- fs/qnx4/namei.c | 3 ++- fs/ramfs/inode.c | 2 +- fs/reiserfs/namei.c | 3 ++- fs/smbfs/dir.c | 5 +++-- fs/sysv/namei.c | 2 +- fs/udf/namei.c | 2 +- fs/ufs/namei.c | 3 ++- fs/umsdos/emd.c | 2 +- fs/umsdos/namei.c | 4 ++-- fs/vfat/namei.c | 3 ++- fs/xfs/linux/xfs_iops.c | 3 ++- include/linux/affs_fs.h | 2 +- include/linux/fs.h | 4 ++-- include/linux/hfs_fs.h | 2 +- include/linux/msdos_fs.h | 4 ++-- include/linux/qnx4_fs.h | 3 +-- mm/shmem.c | 3 ++- 41 files changed, 77 insertions(+), 57 deletions(-) diff --git a/fs/affs/namei.c b/fs/affs/namei.c index 55beff12444f..f2cbba3b7578 100644 --- a/fs/affs/namei.c +++ b/fs/affs/namei.c @@ -256,7 +256,7 @@ affs_unlink(struct inode *dir, struct dentry *dentry) } int -affs_create(struct inode *dir, struct dentry *dentry, int mode) +affs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd) { struct super_block *sb = dir->i_sb; struct inode *inode; diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c index d1f665826065..7e5b4781eb25 100644 --- a/fs/bfs/dir.c +++ b/fs/bfs/dir.c @@ -78,7 +78,8 @@ struct file_operations bfs_dir_operations = { extern void dump_imap(const char *, struct super_block *); -static int bfs_create(struct inode * dir, struct dentry * dentry, int mode) +static int bfs_create(struct inode * dir, struct dentry * dentry, int mode, + struct nameidata *nd) { int err; struct inode * inode; diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 92aef944dcab..9c493d50c3fe 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -46,7 +46,7 @@ extern void cifs_delete_inode(struct inode *); /* Functions related to inodes */ extern struct inode_operations cifs_dir_inode_ops; -extern int cifs_create(struct inode *, struct dentry *, int); +extern int cifs_create(struct inode *, struct dentry *, int, struct nameidata *); extern struct dentry *cifs_lookup(struct inode *, struct dentry *, struct nameidata *); extern int cifs_unlink(struct inode *, struct dentry *); extern int cifs_hardlink(struct dentry *, struct inode *, struct dentry *); diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index b8b546eb8489..69c4b70e6b46 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -119,7 +119,8 @@ build_wildcard_path_from_dentry(struct dentry *direntry) /* Inode operations in similar order to how they appear in the Linux file fs.h */ int -cifs_create(struct inode *inode, struct dentry *direntry, int mode) +cifs_create(struct inode *inode, struct dentry *direntry, int mode, + struct nameidata *nd) { int rc = -ENOENT; int xid; diff --git a/fs/coda/dir.c b/fs/coda/dir.c index 030977f42952..8b3627e0d0e2 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -28,7 +28,7 @@ #include /* dir inode-ops */ -static int coda_create(struct inode *dir, struct dentry *new, int mode); +static int coda_create(struct inode *dir, struct dentry *new, int mode, struct nameidata *nd); static int coda_mknod(struct inode *dir, struct dentry *new, int mode, dev_t rdev); static struct dentry *coda_lookup(struct inode *dir, struct dentry *target, struct nameidata *nd); static int coda_link(struct dentry *old_dentry, struct inode *dir_inode, @@ -190,7 +190,7 @@ static inline void coda_dir_changed(struct inode *dir, int link) } /* creation routines: create, mknod, mkdir, link, symlink */ -static int coda_create(struct inode *dir, struct dentry *de, int mode) +static int coda_create(struct inode *dir, struct dentry *de, int mode, struct nameidata *nd) { int error=0; const char *name=de->d_name.name; diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index 9b9b713c8472..52fb0eb666bf 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c @@ -120,7 +120,7 @@ struct dentry *ext2_get_parent(struct dentry *child) * If the create succeeds, we fill in the inode information * with d_instantiate(). */ -static int ext2_create (struct inode * dir, struct dentry * dentry, int mode) +static int ext2_create (struct inode * dir, struct dentry * dentry, int mode, struct nameidata *nd) { struct inode * inode = ext2_new_inode (dir, mode); int err = PTR_ERR(inode); diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index cf521814314a..74e53bcc480e 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -1623,7 +1623,8 @@ static int ext3_add_nondir(handle_t *handle, * If the create succeeds, we fill in the inode information * with d_instantiate(). */ -static int ext3_create (struct inode * dir, struct dentry * dentry, int mode) +static int ext3_create (struct inode * dir, struct dentry * dentry, int mode, + struct nameidata *nd) { handle_t *handle; struct inode * inode; diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c index fe696c097d27..40df8a2b116c 100644 --- a/fs/hfs/dir.c +++ b/fs/hfs/dir.c @@ -163,7 +163,7 @@ static inline void mark_inodes_deleted(struct hfs_cat_entry *entry, * a directory and return a corresponding inode, given the inode for * the directory and the name (and its length) of the new file. */ -int hfs_create(struct inode * dir, struct dentry *dentry, int mode) +int hfs_create(struct inode * dir, struct dentry *dentry, int mode, struct nameidata *nd) { struct hfs_cat_entry *entry = HFS_I(dir)->entry; struct hfs_cat_entry *new; diff --git a/fs/hfs/dir_dbl.c b/fs/hfs/dir_dbl.c index 9ccdc5afa5d5..ee2ccef70fe7 100644 --- a/fs/hfs/dir_dbl.c +++ b/fs/hfs/dir_dbl.c @@ -26,7 +26,7 @@ static struct dentry *dbl_lookup(struct inode *, struct dentry *, struct nameidata *); static int dbl_readdir(struct file *, void *, filldir_t); -static int dbl_create(struct inode *, struct dentry *, int); +static int dbl_create(struct inode *, struct dentry *, int, struct nameidata *); static int dbl_mkdir(struct inode *, struct dentry *, int); static int dbl_unlink(struct inode *, struct dentry *); static int dbl_rmdir(struct inode *, struct dentry *); @@ -272,7 +272,7 @@ out: * the directory and the name (and its length) of the new file. */ static int dbl_create(struct inode * dir, struct dentry *dentry, - int mode) + int mode, struct nameidata *nd) { int error; @@ -280,7 +280,7 @@ static int dbl_create(struct inode * dir, struct dentry *dentry, if (is_hdr(dir, dentry->d_name.name, dentry->d_name.len)) { error = -EEXIST; } else { - error = hfs_create(dir, dentry, mode); + error = hfs_create(dir, dentry, mode, nd); } unlock_kernel(); return error; diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h index 2c2565358d49..a4dc5bab6efd 100644 --- a/fs/hpfs/hpfs_fn.h +++ b/fs/hpfs/hpfs_fn.h @@ -285,7 +285,7 @@ void hpfs_decide_conv(struct inode *, unsigned char *, unsigned); /* namei.c */ int hpfs_mkdir(struct inode *, struct dentry *, int); -int hpfs_create(struct inode *, struct dentry *, int); +int hpfs_create(struct inode *, struct dentry *, int, struct nameidata *); int hpfs_mknod(struct inode *, struct dentry *, int, dev_t); int hpfs_symlink(struct inode *, struct dentry *, const char *); int hpfs_unlink(struct inode *, struct dentry *); diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c index 8540f23659a0..128647db8ffc 100644 --- a/fs/hpfs/namei.c +++ b/fs/hpfs/namei.c @@ -106,7 +106,7 @@ bail: return -ENOSPC; } -int hpfs_create(struct inode *dir, struct dentry *dentry, int mode) +int hpfs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd) { const char *name = dentry->d_name.name; unsigned len = dentry->d_name.len; diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index f0d2a2c65170..5888e05f81bf 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -462,7 +462,7 @@ static int hugetlbfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) return retval; } -static int hugetlbfs_create(struct inode *dir, struct dentry *dentry, int mode) +static int hugetlbfs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd) { return hugetlbfs_mknod(dir, dentry, mode | S_IFREG, 0); } diff --git a/fs/intermezzo/dir.c b/fs/intermezzo/dir.c index e7b22dd30a16..61cd7b4d54ea 100644 --- a/fs/intermezzo/dir.c +++ b/fs/intermezzo/dir.c @@ -412,7 +412,8 @@ int presto_prep(struct dentry *dentry, struct presto_cache **cache, return 0; } -static int presto_create(struct inode * dir, struct dentry * dentry, int mode) +static int presto_create(struct inode * dir, struct dentry * dentry, int mode, + struct nameidata *nd) { int error; struct presto_cache *cache; diff --git a/fs/intermezzo/vfs.c b/fs/intermezzo/vfs.c index 5dd78cfed581..c3e124c6777f 100644 --- a/fs/intermezzo/vfs.c +++ b/fs/intermezzo/vfs.c @@ -598,7 +598,7 @@ int presto_do_create(struct presto_file_set *fset, struct dentry *dir, } DQUOT_INIT(dir->d_inode); lock_kernel(); - error = iops->create(dir->d_inode, dentry, mode); + error = iops->create(dir->d_inode, dentry, mode, NULL); if (error) { EXIT; goto exit_lock; diff --git a/fs/jffs/inode-v23.c b/fs/jffs/inode-v23.c index 141fadbf8438..94d3560caeae 100644 --- a/fs/jffs/inode-v23.c +++ b/fs/jffs/inode-v23.c @@ -1273,7 +1273,8 @@ jffs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) * with d_instantiate(). */ static int -jffs_create(struct inode *dir, struct dentry *dentry, int mode) +jffs_create(struct inode *dir, struct dentry *dentry, int mode, + struct nameidata *nd) { struct jffs_raw_inode raw_inode; struct jffs_control *c; diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c index 65dd67235f61..9a2df58cb486 100644 --- a/fs/jffs2/dir.c +++ b/fs/jffs2/dir.c @@ -32,7 +32,7 @@ typedef dev_t mknod_arg_t; static int jffs2_readdir (struct file *, void *, filldir_t); -static int jffs2_create (struct inode *,struct dentry *,int); +static int jffs2_create (struct inode *,struct dentry *,int, struct nameidata *); static struct dentry *jffs2_lookup (struct inode *,struct dentry *, struct nameidata *); static int jffs2_link (struct dentry *,struct inode *,struct dentry *); static int jffs2_unlink (struct inode *,struct dentry *); @@ -175,7 +175,8 @@ static int jffs2_readdir(struct file *filp, void *dirent, filldir_t filldir) /***********************************************************************/ -static int jffs2_create(struct inode *dir_i, struct dentry *dentry, int mode) +static int jffs2_create(struct inode *dir_i, struct dentry *dentry, int mode, + struct nameidata *nd) { struct jffs2_raw_inode *ri; struct jffs2_inode_info *f, *dir_f; diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index b4aa9941a51d..3bf710dd0901 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -54,11 +54,13 @@ s64 commitZeroLink(tid_t, struct inode *); * PARAMETER: dip - parent directory vnode * dentry - dentry of new file * mode - create mode (rwxrwxrwx). + * nd- nd struct * * RETURN: Errors from subroutines * */ -int jfs_create(struct inode *dip, struct dentry *dentry, int mode) +int jfs_create(struct inode *dip, struct dentry *dentry, int mode, + struct nameidata *nd) { int rc = 0; tid_t tid; /* transaction id */ diff --git a/fs/minix/namei.c b/fs/minix/namei.c index 007fb7786236..2b9e6c64d25a 100644 --- a/fs/minix/namei.c +++ b/fs/minix/namei.c @@ -89,7 +89,8 @@ static int minix_mknod(struct inode * dir, struct dentry *dentry, int mode, dev_ return error; } -static int minix_create(struct inode * dir, struct dentry *dentry, int mode) +static int minix_create(struct inode * dir, struct dentry *dentry, int mode, + struct nameidata *nd) { return minix_mknod(dir, dentry, mode, 0); } diff --git a/fs/msdos/namei.c b/fs/msdos/namei.c index f0651cd1b996..19c047776ecd 100644 --- a/fs/msdos/namei.c +++ b/fs/msdos/namei.c @@ -261,7 +261,8 @@ static int msdos_add_entry(struct inode *dir, const char *name, */ /***** Create a file */ -int msdos_create(struct inode *dir,struct dentry *dentry,int mode) +int msdos_create(struct inode *dir,struct dentry *dentry,int mode, + struct nameidata *nd) { struct super_block *sb = dir->i_sb; struct buffer_head *bh; diff --git a/fs/namei.c b/fs/namei.c index a04cf1aaceb2..ae67748c2fc8 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1105,7 +1105,8 @@ void unlock_rename(struct dentry *p1, struct dentry *p2) } } -int vfs_create(struct inode *dir, struct dentry *dentry, int mode) +int vfs_create(struct inode *dir, struct dentry *dentry, int mode, + struct nameidata *nd) { int error = may_create(dir, dentry); @@ -1120,7 +1121,7 @@ int vfs_create(struct inode *dir, struct dentry *dentry, int mode) if (error) return error; DQUOT_INIT(dir); - error = dir->i_op->create(dir, dentry, mode); + error = dir->i_op->create(dir, dentry, mode, nd); if (!error) { inode_dir_notify(dir, DN_CREATE); security_inode_post_create(dir, dentry, mode); @@ -1277,7 +1278,7 @@ do_last: if (!dentry->d_inode) { if (!IS_POSIXACL(dir->d_inode)) mode &= ~current->fs->umask; - error = vfs_create(dir->d_inode, dentry, mode); + error = vfs_create(dir->d_inode, dentry, mode, nd); up(&dir->d_inode->i_sem); dput(nd->dentry); nd->dentry = dentry; @@ -1445,7 +1446,7 @@ asmlinkage long sys_mknod(const char __user * filename, int mode, dev_t dev) if (!IS_ERR(dentry)) { switch (mode & S_IFMT) { case 0: case S_IFREG: - error = vfs_create(nd.dentry->d_inode,dentry,mode); + error = vfs_create(nd.dentry->d_inode,dentry,mode,&nd); break; case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK: error = vfs_mknod(nd.dentry->d_inode,dentry,mode,dev); diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index d695f6db5baa..f10460e559a5 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -34,7 +34,7 @@ static void ncp_do_readdir(struct file *, void *, filldir_t, static int ncp_readdir(struct file *, void *, filldir_t); -static int ncp_create(struct inode *, struct dentry *, int); +static int ncp_create(struct inode *, struct dentry *, int, struct nameidata *); static struct dentry *ncp_lookup(struct inode *, struct dentry *, struct nameidata *); static int ncp_unlink(struct inode *, struct dentry *); static int ncp_mkdir(struct inode *, struct dentry *, int); @@ -942,7 +942,8 @@ out: return error; } -static int ncp_create(struct inode *dir, struct dentry *dentry, int mode) +static int ncp_create(struct inode *dir, struct dentry *dentry, int mode, + struct nameidata *nd) { return ncp_create_new(dir, dentry, mode, 0, 0); } diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index abf189a02e50..c1bd1794de60 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -40,7 +40,7 @@ static int nfs_readdir(struct file *, void *, filldir_t); static struct dentry *nfs_lookup(struct inode *, struct dentry *, struct nameidata *); static int nfs_cached_lookup(struct inode *, struct dentry *, struct nfs_fh *, struct nfs_fattr *); -static int nfs_create(struct inode *, struct dentry *, int); +static int nfs_create(struct inode *, struct dentry *, int, struct nameidata *); static int nfs_mkdir(struct inode *, struct dentry *, int); static int nfs_rmdir(struct inode *, struct dentry *); static int nfs_unlink(struct inode *, struct dentry *); @@ -787,7 +787,8 @@ out_err: * that the operation succeeded on the server, but an error in the * reply path made it appear to have failed. */ -static int nfs_create(struct inode *dir, struct dentry *dentry, int mode) +static int nfs_create(struct inode *dir, struct dentry *dentry, int mode, + struct nameidata *nd) { struct iattr attr; struct nfs_fattr fattr; diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 8759cb1076ad..29114b798e56 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -924,7 +924,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, err = nfserr_perm; switch (type) { case S_IFREG: - err = vfs_create(dirp, dchild, iap->ia_mode); + err = vfs_create(dirp, dchild, iap->ia_mode, NULL); break; case S_IFDIR: err = vfs_mkdir(dirp, dchild, iap->ia_mode); @@ -1067,7 +1067,7 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp, goto out; } - err = vfs_create(dirp, dchild, iap->ia_mode); + err = vfs_create(dirp, dchild, iap->ia_mode, NULL); if (err < 0) goto out_nfserr; diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c index 3c11c87e2f22..c0df469c9dc6 100644 --- a/fs/openpromfs/inode.c +++ b/fs/openpromfs/inode.c @@ -59,7 +59,7 @@ static char *alias_names [ALIASES_NNODES]; #define NODE2INO(node) (node + OPENPROM_FIRST_INO) #define NODEP2INO(no) (no + OPENPROM_FIRST_INO + last_node) -static int openpromfs_create (struct inode *, struct dentry *, int); +static int openpromfs_create (struct inode *, struct dentry *, int, struct nameidata *); static int openpromfs_readdir(struct file *, void *, filldir_t); static struct dentry *openpromfs_lookup(struct inode *, struct dentry *dentry, struct nameidata *nd); static int openpromfs_unlink (struct inode *, struct dentry *dentry); @@ -854,7 +854,8 @@ out: return 0; } -static int openpromfs_create (struct inode *dir, struct dentry *dentry, int mode) +static int openpromfs_create (struct inode *dir, struct dentry *dentry, int mode, + struct nameidata *nd) { char *p; struct inode *inode; diff --git a/fs/qnx4/namei.c b/fs/qnx4/namei.c index 12e423ae6de0..36e903d89777 100644 --- a/fs/qnx4/namei.c +++ b/fs/qnx4/namei.c @@ -142,7 +142,8 @@ out: } #ifdef CONFIG_QNX4FS_RW -int qnx4_create(struct inode *dir, struct dentry *dentry, int mode) +int qnx4_create(struct inode *dir, struct dentry *dentry, int mode, + struct nameidata *nd) { QNX4DEBUG(("qnx4: qnx4_create\n")); if (dir == NULL) { diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c index bd0a6765ec6d..362ee3135e69 100644 --- a/fs/ramfs/inode.c +++ b/fs/ramfs/inode.c @@ -111,7 +111,7 @@ static int ramfs_mkdir(struct inode * dir, struct dentry * dentry, int mode) return retval; } -static int ramfs_create(struct inode *dir, struct dentry *dentry, int mode) +static int ramfs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd) { return ramfs_mknod(dir, dentry, mode | S_IFREG, 0); } diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c index 18a3353274c4..93151fb285c0 100644 --- a/fs/reiserfs/namei.c +++ b/fs/reiserfs/namei.c @@ -558,7 +558,8 @@ static int new_inode_init(struct inode *inode, struct inode *dir, int mode) { return 0 ; } -static int reiserfs_create (struct inode * dir, struct dentry *dentry, int mode) +static int reiserfs_create (struct inode * dir, struct dentry *dentry, int mode, + struct nameidata *nd) { int retval; struct inode * inode; diff --git a/fs/smbfs/dir.c b/fs/smbfs/dir.c index 081402f119dc..f0b62740ae89 100644 --- a/fs/smbfs/dir.c +++ b/fs/smbfs/dir.c @@ -25,7 +25,7 @@ static int smb_readdir(struct file *, void *, filldir_t); static int smb_dir_open(struct inode *, struct file *); static struct dentry *smb_lookup(struct inode *, struct dentry *, struct nameidata *); -static int smb_create(struct inode *, struct dentry *, int); +static int smb_create(struct inode *, struct dentry *, int, struct nameidata *); static int smb_mkdir(struct inode *, struct dentry *, int); static int smb_rmdir(struct inode *, struct dentry *); static int smb_unlink(struct inode *, struct dentry *); @@ -510,7 +510,8 @@ out_close: /* N.B. How should the mode argument be used? */ static int -smb_create(struct inode *dir, struct dentry *dentry, int mode) +smb_create(struct inode *dir, struct dentry *dentry, int mode, + struct nameidata *nd) { struct smb_sb_info *server = server_from_dentry(dentry); __u16 fileid; diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c index 4c0eb5730065..cbf08f04d07c 100644 --- a/fs/sysv/namei.c +++ b/fs/sysv/namei.c @@ -96,7 +96,7 @@ static int sysv_mknod(struct inode * dir, struct dentry * dentry, int mode, dev_ return err; } -static int sysv_create(struct inode * dir, struct dentry * dentry, int mode) +static int sysv_create(struct inode * dir, struct dentry * dentry, int mode, struct nameidata *nd) { return sysv_mknod(dir, dentry, mode, 0); } diff --git a/fs/udf/namei.c b/fs/udf/namei.c index 7881ffbbd82e..d2ac88dae447 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -621,7 +621,7 @@ static int udf_delete_entry(struct inode *inode, struct fileIdentDesc *fi, return udf_write_fi(inode, cfi, fi, fibh, NULL, NULL); } -static int udf_create(struct inode *dir, struct dentry *dentry, int mode) +static int udf_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd) { struct udf_fileident_bh fibh; struct inode *inode; diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c index 55496ec96e56..82f391298c48 100644 --- a/fs/ufs/namei.c +++ b/fs/ufs/namei.c @@ -92,7 +92,8 @@ static struct dentry *ufs_lookup(struct inode * dir, struct dentry *dentry, stru * If the create succeeds, we fill in the inode information * with d_instantiate(). */ -static int ufs_create (struct inode * dir, struct dentry * dentry, int mode) +static int ufs_create (struct inode * dir, struct dentry * dentry, int mode, + struct nameidata *nd) { struct inode * inode = ufs_new_inode(dir, mode); int err = PTR_ERR(inode); diff --git a/fs/umsdos/emd.c b/fs/umsdos/emd.c index 06190391d47e..7fadb55b7e44 100644 --- a/fs/umsdos/emd.c +++ b/fs/umsdos/emd.c @@ -105,7 +105,7 @@ int umsdos_make_emd(struct dentry *parent) Printk(("umsdos_make_emd: creating EMD %s/%s\n", parent->d_name.name, demd->d_name.name)); - err = msdos_create(parent->d_inode, demd, S_IFREG | 0777); + err = msdos_create(parent->d_inode, demd, S_IFREG | 0777, NULL); if (err) { printk (KERN_WARNING "umsdos_make_emd: create %s/%s failed, err=%d\n", diff --git a/fs/umsdos/namei.c b/fs/umsdos/namei.c index 3d89ba970a06..2d8a64af1aed 100644 --- a/fs/umsdos/namei.c +++ b/fs/umsdos/namei.c @@ -274,7 +274,7 @@ static int umsdos_create_any (struct inode *dir, struct dentry *dentry, if (fake->d_inode) goto out_remove_dput; - ret = msdos_create (dir, fake, S_IFREG | 0777); + ret = msdos_create (dir, fake, S_IFREG | 0777, NULL); if (ret) goto out_remove_dput; @@ -311,7 +311,7 @@ out_remove: * * Return the status of the operation. 0 mean success. */ -int UMSDOS_create (struct inode *dir, struct dentry *dentry, int mode) +int UMSDOS_create (struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd) { return umsdos_create_any (dir, dentry, mode, 0, 0); } diff --git a/fs/vfat/namei.c b/fs/vfat/namei.c index 04f6754fe235..12c067c8355d 100644 --- a/fs/vfat/namei.c +++ b/fs/vfat/namei.c @@ -912,7 +912,8 @@ error: return dentry; } -int vfat_create(struct inode *dir,struct dentry* dentry,int mode) +int vfat_create(struct inode *dir,struct dentry* dentry,int mode, + struct nameidata *nd) { struct super_block *sb = dir->i_sb; struct inode *inode = NULL; diff --git a/fs/xfs/linux/xfs_iops.c b/fs/xfs/linux/xfs_iops.c index c4ff85065f41..e94d003ee0a4 100644 --- a/fs/xfs/linux/xfs_iops.c +++ b/fs/xfs/linux/xfs_iops.c @@ -175,7 +175,8 @@ STATIC int linvfs_create( struct inode *dir, struct dentry *dentry, - int mode) + int mode, + struct nameidata *nd) { return linvfs_mknod(dir, dentry, mode, 0); } diff --git a/include/linux/affs_fs.h b/include/linux/affs_fs.h index 837fe37aba93..c849309b1131 100644 --- a/include/linux/affs_fs.h +++ b/include/linux/affs_fs.h @@ -43,7 +43,7 @@ extern int affs_init_bitmap(struct super_block *sb); extern int affs_hash_name(struct super_block *sb, const u8 *name, unsigned int len); extern struct dentry *affs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *); extern int affs_unlink(struct inode *dir, struct dentry *dentry); -extern int affs_create(struct inode *dir, struct dentry *dentry, int mode); +extern int affs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *); extern int affs_mkdir(struct inode *dir, struct dentry *dentry, int mode); extern int affs_rmdir(struct inode *dir, struct dentry *dentry); extern int affs_link(struct dentry *olddentry, struct inode *dir, diff --git a/include/linux/fs.h b/include/linux/fs.h index 3ddf4c4edfb7..66cf193c6e64 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -639,7 +639,7 @@ static inline void unlock_super(struct super_block * sb) /* * VFS helper functions.. */ -extern int vfs_create(struct inode *, struct dentry *, int); +extern int vfs_create(struct inode *, struct dentry *, int, struct nameidata *); extern int vfs_mkdir(struct inode *, struct dentry *, int); extern int vfs_mknod(struct inode *, struct dentry *, int, dev_t); extern int vfs_symlink(struct inode *, struct dentry *, const char *); @@ -730,7 +730,7 @@ struct file_operations { }; struct inode_operations { - int (*create) (struct inode *,struct dentry *,int); + int (*create) (struct inode *,struct dentry *,int, struct nameidata *); struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameidata *); int (*link) (struct dentry *,struct inode *,struct dentry *); int (*unlink) (struct inode *,struct dentry *); diff --git a/include/linux/hfs_fs.h b/include/linux/hfs_fs.h index 40971822e3e4..7bebd13150dd 100644 --- a/include/linux/hfs_fs.h +++ b/include/linux/hfs_fs.h @@ -234,7 +234,7 @@ extern struct hfs_cat_entry *hfs_cat_get(struct hfs_mdb *, const struct hfs_cat_key *); /* dir.c */ -extern int hfs_create(struct inode *, struct dentry *, int); +extern int hfs_create(struct inode *, struct dentry *, int, struct nameidata *); extern int hfs_mkdir(struct inode *, struct dentry *, int); extern int hfs_unlink(struct inode *, struct dentry *); extern int hfs_rmdir(struct inode *, struct dentry *); diff --git a/include/linux/msdos_fs.h b/include/linux/msdos_fs.h index d1b00ab916ce..1ce9ba2f57b0 100644 --- a/include/linux/msdos_fs.h +++ b/include/linux/msdos_fs.h @@ -308,7 +308,7 @@ extern int fat_scan(struct inode *dir, const char *name, /* msdos/namei.c - these are for Umsdos */ extern struct dentry *msdos_lookup(struct inode *dir, struct dentry *, struct nameidata *); -extern int msdos_create(struct inode *dir, struct dentry *dentry, int mode); +extern int msdos_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *); extern int msdos_rmdir(struct inode *dir, struct dentry *dentry); extern int msdos_mkdir(struct inode *dir, struct dentry *dentry, int mode); extern int msdos_unlink(struct inode *dir, struct dentry *dentry); @@ -318,7 +318,7 @@ extern int msdos_fill_super(struct super_block *sb, void *data, int silent); /* vfat/namei.c - these are for dmsdos */ extern struct dentry *vfat_lookup(struct inode *dir, struct dentry *, struct nameidata *); -extern int vfat_create(struct inode *dir, struct dentry *dentry, int mode); +extern int vfat_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *); extern int vfat_rmdir(struct inode *dir, struct dentry *dentry); extern int vfat_unlink(struct inode *dir, struct dentry *dentry); extern int vfat_mkdir(struct inode *dir, struct dentry *dentry, int mode); diff --git a/include/linux/qnx4_fs.h b/include/linux/qnx4_fs.h index 2aa7a7ca3d54..53233c8fb3ef 100644 --- a/include/linux/qnx4_fs.h +++ b/include/linux/qnx4_fs.h @@ -117,14 +117,13 @@ extern unsigned long qnx4_block_map(struct inode *inode, long iblock); extern struct buffer_head *qnx4_getblk(struct inode *, int, int); extern struct buffer_head *qnx4_bread(struct inode *, int, int); -extern int qnx4_create(struct inode *dir, struct dentry *dentry, int mode); extern struct inode_operations qnx4_file_inode_operations; extern struct inode_operations qnx4_dir_inode_operations; extern struct file_operations qnx4_file_operations; extern struct file_operations qnx4_dir_operations; extern int qnx4_is_free(struct super_block *sb, long block); extern int qnx4_set_bitmap(struct super_block *sb, long block, int busy); -extern int qnx4_create(struct inode *inode, struct dentry *dentry, int mode); +extern int qnx4_create(struct inode *inode, struct dentry *dentry, int mode, struct nameidata *nd); extern void qnx4_truncate(struct inode *inode); extern void qnx4_free_inode(struct inode *inode); extern int qnx4_unlink(struct inode *dir, struct dentry *dentry); diff --git a/mm/shmem.c b/mm/shmem.c index 1f4ed8fece45..e9d5042bc13b 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1398,7 +1398,8 @@ static int shmem_mkdir(struct inode *dir, struct dentry *dentry, int mode) return 0; } -static int shmem_create(struct inode *dir, struct dentry *dentry, int mode) +static int shmem_create(struct inode *dir, struct dentry *dentry, int mode, + struct nameidata *nd) { return shmem_mknod(dir, dentry, mode | S_IFREG, 0); } -- cgit v1.2.3 From a574f324dab607946682f60b9efdc1b3d810cf03 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 3 Jul 2003 22:06:43 -0700 Subject: [PATCH] Pass 'nameidata' to ->permission() - Make the VFS pass the struct nameidata as an optional parameter to the permission() inode operation. - Patch may_create()/may_open() so it passes the struct nameidata from vfs_create()/open_namei() as an argument to permission(). - Add an intent flag for the sys_access() function. --- drivers/block/floppy.c | 2 +- fs/cifs/cifsfs.c | 2 +- fs/coda/dir.c | 2 +- fs/coda/pioctl.c | 6 ++++-- fs/exec.c | 6 +++--- fs/ext2/acl.c | 2 +- fs/ext2/acl.h | 2 +- fs/ext2/xattr_user.c | 4 ++-- fs/ext3/acl.c | 2 +- fs/ext3/acl.h | 2 +- fs/ext3/xattr_user.c | 4 ++-- fs/hpfs/namei.c | 2 +- fs/intermezzo/dir.c | 10 +++++----- fs/intermezzo/file.c | 2 +- fs/intermezzo/vfs.c | 6 +++--- fs/jfs/acl.c | 2 +- fs/jfs/jfs_acl.h | 2 +- fs/jfs/xattr.c | 4 ++-- fs/namei.c | 34 ++++++++++++++++++---------------- fs/namespace.c | 2 +- fs/ncpfs/ioctl.c | 22 +++++++++++----------- fs/nfs/dir.c | 2 +- fs/nfsd/nfsfh.c | 2 +- fs/nfsd/vfs.c | 4 ++-- fs/open.c | 16 ++++++++-------- fs/proc/base.c | 2 +- fs/smbfs/file.c | 2 +- fs/udf/file.c | 2 +- fs/xfs/linux/xfs_iops.c | 3 ++- include/linux/coda_linux.h | 2 +- include/linux/fs.h | 4 ++-- include/linux/namei.h | 1 + include/linux/nfs_fs.h | 2 +- kernel/sysctl.c | 4 ++-- net/unix/af_unix.c | 2 +- 35 files changed, 87 insertions(+), 81 deletions(-) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index ca2332522f9a..40ff4c76558b 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -3767,7 +3767,7 @@ static int floppy_open(struct inode * inode, struct file * filp) * Needed so that programs such as fdrawcmd still can work on write * protected disks */ if ((filp->f_mode & 2) || - (inode->i_sb && (permission(inode,2) == 0))) + (inode->i_sb && (permission(inode,2, NULL) == 0))) filp->private_data = (void*) 8; if (UFDCS->rawcmd == 1) diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 1b3c43949f33..2201681095ca 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -178,7 +178,7 @@ cifs_statfs(struct super_block *sb, struct kstatfs *buf) return 0; /* always return success? what if volume is no longer available? */ } -static int cifs_permission(struct inode * inode, int mask) +static int cifs_permission(struct inode * inode, int mask, struct nameidata *nd) { /* the server does permission checks, we do not need to do it here */ return 0; diff --git a/fs/coda/dir.c b/fs/coda/dir.c index 8b3627e0d0e2..2917ab9f4976 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -147,7 +147,7 @@ exit: } -int coda_permission(struct inode *inode, int mask) +int coda_permission(struct inode *inode, int mask, struct nameidata *nd) { int error = 0; diff --git a/fs/coda/pioctl.c b/fs/coda/pioctl.c index 67228f3c2122..e10ac76438c0 100644 --- a/fs/coda/pioctl.c +++ b/fs/coda/pioctl.c @@ -24,7 +24,8 @@ #include /* pioctl ops */ -static int coda_ioctl_permission(struct inode *inode, int mask); +static int coda_ioctl_permission(struct inode *inode, int mask, + struct nameidata *nd); static int coda_pioctl(struct inode * inode, struct file * filp, unsigned int cmd, unsigned long user_data); @@ -41,7 +42,8 @@ struct file_operations coda_ioctl_operations = { }; /* the coda pioctl inode ops */ -static int coda_ioctl_permission(struct inode *inode, int mask) +static int coda_ioctl_permission(struct inode *inode, int mask, + struct nameidata *nd) { return 0; } diff --git a/fs/exec.c b/fs/exec.c index 68a64ee4b234..4f37deb79e00 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -126,7 +126,7 @@ asmlinkage long sys_uselib(const char __user * library) if (!S_ISREG(nd.dentry->d_inode->i_mode)) goto exit; - error = permission(nd.dentry->d_inode, MAY_READ | MAY_EXEC); + error = permission(nd.dentry->d_inode, MAY_READ | MAY_EXEC, &nd); if (error) goto exit; @@ -462,7 +462,7 @@ struct file *open_exec(const char *name) file = ERR_PTR(-EACCES); if (!(nd.mnt->mnt_flags & MNT_NOEXEC) && S_ISREG(inode->i_mode)) { - int err = permission(inode, MAY_EXEC); + int err = permission(inode, MAY_EXEC, &nd); if (!err && !(inode->i_mode & 0111)) err = -EACCES; file = ERR_PTR(err); @@ -794,7 +794,7 @@ int flush_old_exec(struct linux_binprm * bprm) flush_thread(); if (bprm->e_uid != current->euid || bprm->e_gid != current->egid || - permission(bprm->file->f_dentry->d_inode,MAY_READ)) + permission(bprm->file->f_dentry->d_inode,MAY_READ, NULL)) current->mm->dumpable = 0; /* An exec changes our domain. We are no longer part of the thread diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c index 9367f43f4acb..4db56bdd8fe5 100644 --- a/fs/ext2/acl.c +++ b/fs/ext2/acl.c @@ -309,7 +309,7 @@ check_capabilities: * BKL held [before 2.5.x] */ int -ext2_permission(struct inode *inode, int mask) +ext2_permission(struct inode *inode, int mask, struct nameidata *nd) { return __ext2_permission(inode, mask, 1); } diff --git a/fs/ext2/acl.h b/fs/ext2/acl.h index 0cfbf4d1029b..2e0560130b63 100644 --- a/fs/ext2/acl.h +++ b/fs/ext2/acl.h @@ -59,7 +59,7 @@ static inline int ext2_acl_count(size_t size) #define EXT2_ACL_NOT_CACHED ((void *)-1) /* acl.c */ -extern int ext2_permission (struct inode *, int); +extern int ext2_permission (struct inode *, int, struct nameidata *); extern int ext2_permission_locked (struct inode *, int); extern int ext2_acl_chmod (struct inode *); extern int ext2_init_acl (struct inode *, struct inode *); diff --git a/fs/ext2/xattr_user.c b/fs/ext2/xattr_user.c index 027beb89c7e0..fc0ec86f4928 100644 --- a/fs/ext2/xattr_user.c +++ b/fs/ext2/xattr_user.c @@ -47,7 +47,7 @@ ext2_xattr_user_get(struct inode *inode, const char *name, #ifdef CONFIG_EXT2_FS_POSIX_ACL error = ext2_permission_locked(inode, MAY_READ); #else - error = permission(inode, MAY_READ); + error = permission(inode, MAY_READ, NULL); #endif if (error) return error; @@ -71,7 +71,7 @@ ext2_xattr_user_set(struct inode *inode, const char *name, #ifdef CONFIG_EXT2_FS_POSIX_ACL error = ext2_permission_locked(inode, MAY_WRITE); #else - error = permission(inode, MAY_WRITE); + error = permission(inode, MAY_WRITE, NULL); #endif if (error) return error; diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c index 9313430093c5..d29f14efb253 100644 --- a/fs/ext3/acl.c +++ b/fs/ext3/acl.c @@ -312,7 +312,7 @@ check_capabilities: * inode->i_sem: up */ int -ext3_permission(struct inode *inode, int mask) +ext3_permission(struct inode *inode, int mask, struct nameidata *nd) { return __ext3_permission(inode, mask, 1); } diff --git a/fs/ext3/acl.h b/fs/ext3/acl.h index e0962a6c24b2..6aaef97a5fc3 100644 --- a/fs/ext3/acl.h +++ b/fs/ext3/acl.h @@ -59,7 +59,7 @@ static inline int ext3_acl_count(size_t size) #define EXT3_ACL_NOT_CACHED ((void *)-1) /* acl.c */ -extern int ext3_permission (struct inode *, int); +extern int ext3_permission (struct inode *, int, struct nameidata *); extern int ext3_permission_locked (struct inode *, int); extern int ext3_acl_chmod (struct inode *); extern int ext3_init_acl (handle_t *, struct inode *, struct inode *); diff --git a/fs/ext3/xattr_user.c b/fs/ext3/xattr_user.c index b93a74ded763..b8c789e60fa0 100644 --- a/fs/ext3/xattr_user.c +++ b/fs/ext3/xattr_user.c @@ -49,7 +49,7 @@ ext3_xattr_user_get(struct inode *inode, const char *name, #ifdef CONFIG_EXT3_FS_POSIX_ACL error = ext3_permission_locked(inode, MAY_READ); #else - error = permission(inode, MAY_READ); + error = permission(inode, MAY_READ, NULL); #endif if (error) return error; @@ -73,7 +73,7 @@ ext3_xattr_user_set(struct inode *inode, const char *name, #ifdef CONFIG_EXT3_FS_POSIX_ACL error = ext3_permission_locked(inode, MAY_WRITE); #else - error = permission(inode, MAY_WRITE); + error = permission(inode, MAY_WRITE, NULL); #endif if (error) return error; diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c index 128647db8ffc..866976557245 100644 --- a/fs/hpfs/namei.c +++ b/fs/hpfs/namei.c @@ -374,7 +374,7 @@ again: d_drop(dentry); spin_lock(&dentry->d_lock); if (atomic_read(&dentry->d_count) > 1 || - permission(inode, MAY_WRITE) || + permission(inode, MAY_WRITE, NULL) || get_write_access(inode)) { spin_unlock(&dentry->d_lock); d_rehash(dentry); diff --git a/fs/intermezzo/dir.c b/fs/intermezzo/dir.c index 61cd7b4d54ea..7e646f9c2211 100644 --- a/fs/intermezzo/dir.c +++ b/fs/intermezzo/dir.c @@ -81,7 +81,7 @@ static inline void presto_unlock(struct inode *dir) /* * these are initialized in super.c */ -extern int presto_permission(struct inode *inode, int mask); +extern int presto_permission(struct inode *inode, int mask, struct nameidata *nd); static int izo_authorized_uid = 0; int izo_dentry_is_ilookup(struct dentry *dentry, ino_t *id, @@ -830,7 +830,7 @@ int presto_rename(struct inode *old_dir, struct dentry *old_dentry, * appropriate permission function. Thus we do not worry here about ACLs * or EAs. -SHP */ -int presto_permission(struct inode *inode, int mask) +int presto_permission(struct inode *inode, int mask, struct nameidata *nd) { unsigned short mode = inode->i_mode; struct presto_cache *cache; @@ -852,11 +852,11 @@ int presto_permission(struct inode *inode, int mask) if ( S_ISREG(mode) && fiops && fiops->permission ) { EXIT; - return fiops->permission(inode, mask); + return fiops->permission(inode, mask, nd); } if ( S_ISDIR(mode) && diops && diops->permission ) { EXIT; - return diops->permission(inode, mask); + return diops->permission(inode, mask, nd); } } @@ -867,7 +867,7 @@ int presto_permission(struct inode *inode, int mask) * the VFS permission function. */ inode->i_op->permission = NULL; - rc = permission(inode, mask); + rc = permission(inode, mask, nd); inode->i_op->permission = &presto_permission; EXIT; diff --git a/fs/intermezzo/file.c b/fs/intermezzo/file.c index 9f0b10422c4e..a1efcbfaa2c9 100644 --- a/fs/intermezzo/file.c +++ b/fs/intermezzo/file.c @@ -53,7 +53,7 @@ /* * these are initialized in super.c */ -extern int presto_permission(struct inode *inode, int mask); +extern int presto_permission(struct inode *inode, int mask, struct nameidata *nd); static int presto_open_upcall(int minor, struct dentry *de) diff --git a/fs/intermezzo/vfs.c b/fs/intermezzo/vfs.c index c3e124c6777f..1cfa4c9a4b60 100644 --- a/fs/intermezzo/vfs.c +++ b/fs/intermezzo/vfs.c @@ -134,7 +134,7 @@ static inline int may_delete(struct inode *dir,struct dentry *victim, int isdir) int error; if (!victim->d_inode || victim->d_parent->d_inode != dir) return -ENOENT; - error = permission(dir,MAY_WRITE | MAY_EXEC); + error = permission(dir,MAY_WRITE | MAY_EXEC, NULL); if (error) return error; if (IS_APPEND(dir)) @@ -158,7 +158,7 @@ static inline int may_create(struct inode *dir, struct dentry *child) { return -EEXIST; if (IS_DEADDIR(dir)) return -ENOENT; - return permission(dir,MAY_WRITE | MAY_EXEC); + return permission(dir,MAY_WRITE | MAY_EXEC, NULL); } #ifdef PRESTO_DEBUG @@ -1840,7 +1840,7 @@ int presto_rename_dir(struct presto_file_set *fset, struct dentry *old_parent, * we'll need to flip '..'. */ if (new_dir != old_dir) { - error = permission(old_dentry->d_inode, MAY_WRITE); + error = permission(old_dentry->d_inode, MAY_WRITE, NULL); } if (error) return error; diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c index 758d370e6419..a83ab660a0b7 100644 --- a/fs/jfs/acl.c +++ b/fs/jfs/acl.c @@ -208,7 +208,7 @@ check_capabilities: return -EACCES; } -int jfs_permission(struct inode * inode, int mask) +int jfs_permission(struct inode * inode, int mask, struct nameidata *nd) { return __jfs_permission(inode, mask, 0); } diff --git a/fs/jfs/jfs_acl.h b/fs/jfs/jfs_acl.h index 179a3893a945..cfb445231972 100644 --- a/fs/jfs/jfs_acl.h +++ b/fs/jfs/jfs_acl.h @@ -25,7 +25,7 @@ struct posix_acl *jfs_get_acl(struct inode *, int); int jfs_set_acl(struct inode *, int, struct posix_acl *); int jfs_permission_have_sem(struct inode *, int); -int jfs_permission(struct inode *, int); +int jfs_permission(struct inode *, int, struct nameidata *); int jfs_init_acl(struct inode *, struct inode *); int jfs_setattr(struct dentry *, struct iattr *); diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c index a9c455de618b..4ae1b0ffaf05 100644 --- a/fs/jfs/xattr.c +++ b/fs/jfs/xattr.c @@ -731,7 +731,7 @@ static int can_set_xattr(struct inode *inode, const char *name, #ifdef CONFIG_JFS_POSIX_ACL return jfs_permission_have_sem(inode, MAY_WRITE); #else - return permission(inode, MAY_WRITE); + return permission(inode, MAY_WRITE, NULL); #endif } @@ -893,7 +893,7 @@ static int can_get_xattr(struct inode *inode, const char *name) else return jfs_permission_have_sem(inode, MAY_READ); #else - return permission(inode, MAY_READ); + return permission(inode, MAY_READ, NULL); #endif } diff --git a/fs/namei.c b/fs/namei.c index ae67748c2fc8..2fc6f11fe795 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -203,7 +203,7 @@ int vfs_permission(struct inode * inode, int mask) return -EACCES; } -int permission(struct inode * inode,int mask) +int permission(struct inode * inode,int mask, struct nameidata *nd) { int retval; int submask; @@ -212,7 +212,7 @@ int permission(struct inode * inode,int mask) submask = mask & ~MAY_APPEND; if (inode->i_op && inode->i_op->permission) - retval = inode->i_op->permission(inode, submask); + retval = inode->i_op->permission(inode, submask, nd); else retval = vfs_permission(inode, submask); if (retval) @@ -588,7 +588,7 @@ int link_path_walk(const char * name, struct nameidata *nd) err = exec_permission_lite(inode); if (err == -EAGAIN) { - err = permission(inode, MAY_EXEC); + err = permission(inode, MAY_EXEC, nd); } if (err) break; @@ -876,7 +876,7 @@ static struct dentry * __lookup_hash(struct qstr *name, struct dentry * base, st int err; inode = base->d_inode; - err = permission(inode, MAY_EXEC); + err = permission(inode, MAY_EXEC, nd); dentry = ERR_PTR(err); if (err) goto out; @@ -996,12 +996,12 @@ static inline int check_sticky(struct inode *dir, struct inode *inode) * 10. We don't allow removal of NFS sillyrenamed files; it's handled by * nfs_async_unlink(). */ -static inline int may_delete(struct inode *dir,struct dentry *victim, int isdir) +static inline int may_delete(struct inode *dir,struct dentry *victim,int isdir) { int error; if (!victim->d_inode || victim->d_parent->d_inode != dir) return -ENOENT; - error = permission(dir,MAY_WRITE | MAY_EXEC); + error = permission(dir,MAY_WRITE | MAY_EXEC, NULL); if (error) return error; if (IS_APPEND(dir)) @@ -1031,12 +1031,14 @@ static inline int may_delete(struct inode *dir,struct dentry *victim, int isdir) * 3. We should have write and exec permissions on dir * 4. We can't do it if dir is immutable (done in permission()) */ -static inline int may_create(struct inode *dir, struct dentry *child) { +static inline int may_create(struct inode *dir, struct dentry *child, + struct nameidata *nd) +{ if (child->d_inode) return -EEXIST; if (IS_DEADDIR(dir)) return -ENOENT; - return permission(dir,MAY_WRITE | MAY_EXEC); + return permission(dir,MAY_WRITE | MAY_EXEC, nd); } /* @@ -1108,7 +1110,7 @@ void unlock_rename(struct dentry *p1, struct dentry *p2) int vfs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd) { - int error = may_create(dir, dentry); + int error = may_create(dir, dentry, nd); if (error) return error; @@ -1144,7 +1146,7 @@ int may_open(struct nameidata *nd, int acc_mode, int flag) if (S_ISDIR(inode->i_mode) && (flag & FMODE_WRITE)) return -EISDIR; - error = permission(inode, acc_mode); + error = permission(inode, acc_mode, nd); if (error) return error; @@ -1398,7 +1400,7 @@ fail: int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) { - int error = may_create(dir, dentry); + int error = may_create(dir, dentry, NULL); if (error) return error; @@ -1469,7 +1471,7 @@ out: int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) { - int error = may_create(dir, dentry); + int error = may_create(dir, dentry, NULL); if (error) return error; @@ -1715,7 +1717,7 @@ slashes: int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname) { - int error = may_create(dir, dentry); + int error = may_create(dir, dentry, NULL); if (error) return error; @@ -1777,7 +1779,7 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de if (!inode) return -ENOENT; - error = may_create(dir, new_dentry); + error = may_create(dir, new_dentry, NULL); if (error) return error; @@ -1898,7 +1900,7 @@ int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, * we'll need to flip '..'. */ if (new_dir != old_dir) { - error = permission(old_dentry->d_inode, MAY_WRITE); + error = permission(old_dentry->d_inode, MAY_WRITE, NULL); if (error) return error; } @@ -1976,7 +1978,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, return error; if (!new_dentry->d_inode) - error = may_create(new_dir, new_dentry); + error = may_create(new_dir, new_dentry, NULL); else error = may_delete(new_dir, new_dentry, is_dir); if (error) diff --git a/fs/namespace.c b/fs/namespace.c index 61e5ec891363..a31cd95801cb 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -403,7 +403,7 @@ static int mount_is_safe(struct nameidata *nd) if (current->uid != nd->dentry->d_inode->i_uid) return -EPERM; } - if (permission(nd->dentry->d_inode, MAY_WRITE)) + if (permission(nd->dentry->d_inode, MAY_WRITE, nd)) return -EPERM; return 0; #endif diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c index 3497f67be924..fb3e550a9abc 100644 --- a/fs/ncpfs/ioctl.c +++ b/fs/ncpfs/ioctl.c @@ -40,7 +40,7 @@ int ncp_ioctl(struct inode *inode, struct file *filp, switch (cmd) { case NCP_IOC_NCPREQUEST: - if ((permission(inode, MAY_WRITE) != 0) + if ((permission(inode, MAY_WRITE, NULL) != 0) && (current->uid != server->m.mounted_uid)) { return -EACCES; } @@ -99,7 +99,7 @@ int ncp_ioctl(struct inode *inode, struct file *filp, { struct ncp_fs_info info; - if ((permission(inode, MAY_WRITE) != 0) + if ((permission(inode, MAY_WRITE, NULL) != 0) && (current->uid != server->m.mounted_uid)) { return -EACCES; } @@ -127,7 +127,7 @@ int ncp_ioctl(struct inode *inode, struct file *filp, { struct ncp_fs_info_v2 info2; - if ((permission(inode, MAY_WRITE) != 0) + if ((permission(inode, MAY_WRITE, NULL) != 0) && (current->uid != server->m.mounted_uid)) { return -EACCES; } @@ -155,7 +155,7 @@ int ncp_ioctl(struct inode *inode, struct file *filp, { unsigned long tmp = server->m.mounted_uid; - if ( (permission(inode, MAY_READ) != 0) + if ( (permission(inode, MAY_READ, NULL) != 0) && (current->uid != server->m.mounted_uid)) { return -EACCES; @@ -169,7 +169,7 @@ int ncp_ioctl(struct inode *inode, struct file *filp, { struct ncp_setroot_ioctl sr; - if ( (permission(inode, MAY_READ) != 0) + if ( (permission(inode, MAY_READ, NULL) != 0) && (current->uid != server->m.mounted_uid)) { return -EACCES; @@ -249,7 +249,7 @@ int ncp_ioctl(struct inode *inode, struct file *filp, #ifdef CONFIG_NCPFS_PACKET_SIGNING case NCP_IOC_SIGN_INIT: - if ((permission(inode, MAY_WRITE) != 0) + if ((permission(inode, MAY_WRITE, NULL) != 0) && (current->uid != server->m.mounted_uid)) { return -EACCES; @@ -272,7 +272,7 @@ int ncp_ioctl(struct inode *inode, struct file *filp, return 0; case NCP_IOC_SIGN_WANTED: - if ( (permission(inode, MAY_READ) != 0) + if ( (permission(inode, MAY_READ, NULL) != 0) && (current->uid != server->m.mounted_uid)) { return -EACCES; @@ -285,7 +285,7 @@ int ncp_ioctl(struct inode *inode, struct file *filp, { int newstate; - if ( (permission(inode, MAY_WRITE) != 0) + if ( (permission(inode, MAY_WRITE, NULL) != 0) && (current->uid != server->m.mounted_uid)) { return -EACCES; @@ -306,7 +306,7 @@ int ncp_ioctl(struct inode *inode, struct file *filp, #ifdef CONFIG_NCPFS_IOCTL_LOCKING case NCP_IOC_LOCKUNLOCK: - if ( (permission(inode, MAY_WRITE) != 0) + if ( (permission(inode, MAY_WRITE, NULL) != 0) && (current->uid != server->m.mounted_uid)) { return -EACCES; @@ -608,7 +608,7 @@ outrel: } #endif /* CONFIG_NCPFS_NLS */ case NCP_IOC_SETDENTRYTTL: - if ((permission(inode, MAY_WRITE) != 0) && + if ((permission(inode, MAY_WRITE, NULL) != 0) && (current->uid != server->m.mounted_uid)) return -EACCES; { @@ -637,7 +637,7 @@ outrel: /* NCP_IOC_GETMOUNTUID may be same as NCP_IOC_GETMOUNTUID2, so we have this out of switch */ if (cmd == NCP_IOC_GETMOUNTUID) { - if ((permission(inode, MAY_READ) != 0) + if ((permission(inode, MAY_READ, NULL) != 0) && (current->uid != server->m.mounted_uid)) { return -EACCES; } diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index c1bd1794de60..93585f0099fb 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1240,7 +1240,7 @@ out: } int -nfs_permission(struct inode *inode, int mask) +nfs_permission(struct inode *inode, int mask, struct nameidata *nd) { struct nfs_access_cache *cache = &NFS_I(inode)->cache_access; struct rpc_cred *cred; diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 6d023b27ff6b..32a50f1bed11 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -56,7 +56,7 @@ int nfsd_acceptable(void *expv, struct dentry *dentry) /* make sure parents give x permission to user */ int err; parent = dget_parent(tdentry); - err = permission(parent->d_inode, S_IXOTH); + err = permission(parent->d_inode, S_IXOTH, NULL); if (err < 0) { dput(parent); break; diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 29114b798e56..663f4839cc33 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1584,12 +1584,12 @@ nfsd_permission(struct svc_export *exp, struct dentry *dentry, int acc) inode->i_uid == current->fsuid) return 0; - err = permission(inode, acc & (MAY_READ|MAY_WRITE|MAY_EXEC)); + err = permission(inode, acc & (MAY_READ|MAY_WRITE|MAY_EXEC), NULL); /* Allow read access to binaries even when mode 111 */ if (err == -EACCES && S_ISREG(inode->i_mode) && acc == (MAY_READ | MAY_OWNER_OVERRIDE)) - err = permission(inode, MAY_EXEC); + err = permission(inode, MAY_EXEC, NULL); return err? nfserrno(err) : 0; } diff --git a/fs/open.c b/fs/open.c index 98ce4f7374ef..8a4197969f27 100644 --- a/fs/open.c +++ b/fs/open.c @@ -219,7 +219,7 @@ static inline long do_sys_truncate(const char __user * path, loff_t length) if (!S_ISREG(inode->i_mode)) goto dput_and_out; - error = permission(inode,MAY_WRITE); + error = permission(inode,MAY_WRITE,&nd); if (error) goto dput_and_out; @@ -365,7 +365,7 @@ asmlinkage long sys_utime(char __user * filename, struct utimbuf __user * times) newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET; } else { if (current->fsuid != inode->i_uid && - (error = permission(inode,MAY_WRITE)) != 0) + (error = permission(inode,MAY_WRITE,&nd)) != 0) goto dput_and_out; } down(&inode->i_sem); @@ -410,7 +410,7 @@ long do_utimes(char __user * filename, struct timeval * times) newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET; } else { if (current->fsuid != inode->i_uid && - (error = permission(inode,MAY_WRITE)) != 0) + (error = permission(inode,MAY_WRITE,&nd)) != 0) goto dput_and_out; } down(&inode->i_sem); @@ -467,9 +467,9 @@ asmlinkage long sys_access(const char __user * filename, int mode) else current->cap_effective = current->cap_permitted; - res = user_path_walk(filename, &nd); + res = __user_walk(filename, LOOKUP_FOLLOW|LOOKUP_ACCESS, &nd); if (!res) { - res = permission(nd.dentry->d_inode, mode); + res = permission(nd.dentry->d_inode, mode, &nd); /* SuS v2 requires we report a read only fs too */ if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode) && !special_file(nd.dentry->d_inode->i_mode)) @@ -493,7 +493,7 @@ asmlinkage long sys_chdir(const char __user * filename) if (error) goto out; - error = permission(nd.dentry->d_inode,MAY_EXEC); + error = permission(nd.dentry->d_inode,MAY_EXEC,&nd); if (error) goto dput_and_out; @@ -526,7 +526,7 @@ asmlinkage long sys_fchdir(unsigned int fd) if (!S_ISDIR(inode->i_mode)) goto out_putf; - error = permission(inode, MAY_EXEC); + error = permission(inode, MAY_EXEC, NULL); if (!error) set_fs_pwd(current->fs, mnt, dentry); out_putf: @@ -544,7 +544,7 @@ asmlinkage long sys_chroot(const char __user * filename) if (error) goto out; - error = permission(nd.dentry->d_inode,MAY_EXEC); + error = permission(nd.dentry->d_inode,MAY_EXEC,&nd); if (error) goto dput_and_out; diff --git a/fs/proc/base.c b/fs/proc/base.c index 3d05ee25f8e9..2c8d50e98d48 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -334,7 +334,7 @@ out: goto exit; } -static int proc_permission(struct inode *inode, int mask) +static int proc_permission(struct inode *inode, int mask, struct nameidata *nd) { if (vfs_permission(inode, mask) != 0) return -EACCES; diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c index a174775b2d13..6b25d7c89177 100644 --- a/fs/smbfs/file.c +++ b/fs/smbfs/file.c @@ -367,7 +367,7 @@ smb_file_release(struct inode *inode, struct file * file) * privileges, so we need our own check for this. */ static int -smb_file_permission(struct inode *inode, int mask) +smb_file_permission(struct inode *inode, int mask, struct nameidata *nd) { int mode = inode->i_mode; int error = 0; diff --git a/fs/udf/file.c b/fs/udf/file.c index 9fd46aff63ae..b1cf9999e902 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -188,7 +188,7 @@ int udf_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, { int result = -EINVAL; - if ( permission(inode, MAY_READ) != 0 ) + if ( permission(inode, MAY_READ, NULL) != 0 ) { udf_debug("no permission to access inode %lu\n", inode->i_ino); diff --git a/fs/xfs/linux/xfs_iops.c b/fs/xfs/linux/xfs_iops.c index e94d003ee0a4..8a90e5495a3a 100644 --- a/fs/xfs/linux/xfs_iops.c +++ b/fs/xfs/linux/xfs_iops.c @@ -431,7 +431,8 @@ linvfs_follow_link( STATIC int linvfs_permission( struct inode *inode, - int mode) + int mode, + struct nameidata *nd) { vnode_t *vp = LINVFS_GET_VP(inode); int error; diff --git a/include/linux/coda_linux.h b/include/linux/coda_linux.h index b22d34fcfb6d..650a6f997f8a 100644 --- a/include/linux/coda_linux.h +++ b/include/linux/coda_linux.h @@ -38,7 +38,7 @@ extern struct file_operations coda_ioctl_operations; int coda_open(struct inode *i, struct file *f); int coda_flush(struct file *f); int coda_release(struct inode *i, struct file *f); -int coda_permission(struct inode *inode, int mask); +int coda_permission(struct inode *inode, int mask, struct nameidata *nd); int coda_revalidate_inode(struct dentry *); int coda_getattr(struct vfsmount *, struct dentry *, struct kstat *); int coda_setattr(struct dentry *, struct iattr *); diff --git a/include/linux/fs.h b/include/linux/fs.h index 66cf193c6e64..7a5f305101c5 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -743,7 +743,7 @@ struct inode_operations { int (*readlink) (struct dentry *, char __user *,int); int (*follow_link) (struct dentry *, struct nameidata *); void (*truncate) (struct inode *); - int (*permission) (struct inode *, int); + int (*permission) (struct inode *, int, struct nameidata *); int (*setattr) (struct dentry *, struct iattr *); int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *); int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); @@ -1121,7 +1121,7 @@ extern int do_remount_sb(struct super_block *sb, int flags, extern sector_t bmap(struct inode *, sector_t); extern int setattr_mask(unsigned int); extern int notify_change(struct dentry *, struct iattr *); -extern int permission(struct inode *, int); +extern int permission(struct inode *, int, struct nameidata *); extern int vfs_permission(struct inode *, int); extern int get_write_access(struct inode *); extern int deny_write_access(struct file *); diff --git a/include/linux/namei.h b/include/linux/namei.h index 256ceac1fc69..4117cd90a345 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -46,6 +46,7 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND}; */ #define LOOKUP_OPEN (0x0100) #define LOOKUP_CREATE (0x0200) +#define LOOKUP_ACCESS (0x0400) extern int FASTCALL(__user_walk(const char __user *, unsigned, struct nameidata *)); #define user_path_walk(name,nd) \ diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 3d7525998534..a6d594bb252c 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -240,7 +240,7 @@ extern struct inode *nfs_fhget(struct dentry *, struct nfs_fh *, struct nfs_fattr *); extern int __nfs_refresh_inode(struct inode *, struct nfs_fattr *); extern int nfs_getattr(struct vfsmount *, struct dentry *, struct kstat *); -extern int nfs_permission(struct inode *, int); +extern int nfs_permission(struct inode *, int, struct nameidata *); extern int nfs_open(struct inode *, struct file *); extern int nfs_release(struct inode *, struct file *); extern int __nfs_revalidate_inode(struct nfs_server *, struct inode *); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 7f0cc00cfa4d..edebad7ddec4 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -130,7 +130,7 @@ extern ctl_table random_table[]; static ssize_t proc_readsys(struct file *, char __user *, size_t, loff_t *); static ssize_t proc_writesys(struct file *, const char __user *, size_t, loff_t *); -static int proc_sys_permission(struct inode *, int); +static int proc_sys_permission(struct inode *, int, struct nameidata *); struct file_operations proc_sys_file_operations = { .read = proc_readsys, @@ -1177,7 +1177,7 @@ static ssize_t proc_writesys(struct file * file, const char __user * buf, return do_rw_proc(1, file, (char __user *) buf, count, ppos); } -static int proc_sys_permission(struct inode *inode, int op) +static int proc_sys_permission(struct inode *inode, int op, struct nameidata *nd) { return test_perm(inode->i_mode, op); } diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 64b97aec0312..f249d4388e36 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -594,7 +594,7 @@ static struct sock *unix_find_other(struct sockaddr_un *sunname, int len, err = path_lookup(sunname->sun_path, LOOKUP_FOLLOW, &nd); if (err) goto fail; - err = permission(nd.dentry->d_inode,MAY_WRITE); + err = permission(nd.dentry->d_inode,MAY_WRITE, &nd); if (err) goto put_fail; -- cgit v1.2.3 From 52d1430de35a9cfa1019162c0582971f7b4ce5e1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 3 Jul 2003 22:07:27 -0700 Subject: [PATCH] Use the intents in 'nameidata' to improve NFS close-to-open consistency - Make use of the open intents to improve close-to-open cache consistency. Only force data cache revalidation when we're doing an open(). - Add true exclusive create to NFSv3. - Optimize away the redundant ->lookup() to check for an existing file when we know that we're doing NFSv3 exclusive create. - Optimize away all ->permission() checks other than those for path traversal, open(), and sys_access(). --- fs/nfs/dir.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++----------- fs/nfs/file.c | 3 --- 2 files changed, 49 insertions(+), 14 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 93585f0099fb..fd894fa6584b 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -78,13 +78,9 @@ struct inode_operations nfs_dir_inode_operations = { static int nfs_opendir(struct inode *inode, struct file *filp) { - struct nfs_server *server = NFS_SERVER(inode); int res = 0; lock_kernel(); - /* Do cto revalidation */ - if (!(server->flags & NFS_MOUNT_NOCTO)) - res = __nfs_revalidate_inode(server, inode); /* Call generic open code in order to cache credentials */ if (!res) res = nfs_open(inode, filp); @@ -485,9 +481,13 @@ static inline void nfs_renew_times(struct dentry * dentry) } static inline -int nfs_lookup_verify_inode(struct inode *inode) +int nfs_lookup_verify_inode(struct inode *inode, int isopen) { - return nfs_revalidate_inode(NFS_SERVER(inode), inode); + struct nfs_server *server = NFS_SERVER(inode); + + if (isopen && !(server->flags & NFS_MOUNT_NOCTO)) + return __nfs_revalidate_inode(server, inode); + return nfs_revalidate_inode(server, inode); } /* @@ -497,8 +497,17 @@ int nfs_lookup_verify_inode(struct inode *inode) * If parent mtime has changed, we revalidate, else we wait for a * period corresponding to the parent's attribute cache timeout value. */ -static inline int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry) +static inline +int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry, + struct nameidata *nd) { + int ndflags = 0; + + if (nd) + ndflags = nd->flags; + /* Don't revalidate a negative dentry if we're creating a new file */ + if ((ndflags & LOOKUP_CREATE) && !(ndflags & LOOKUP_CONTINUE)) + return 0; if (!nfs_check_verifier(dir, dentry)) return 1; return time_after(jiffies, dentry->d_time + NFS_ATTRTIMEO(dir)); @@ -523,14 +532,18 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd) int error; struct nfs_fh fhandle; struct nfs_fattr fattr; + int isopen = 0; parent = dget_parent(dentry); lock_kernel(); dir = parent->d_inode; inode = dentry->d_inode; + if (nd && !(nd->flags & LOOKUP_CONTINUE) && (nd->flags & LOOKUP_OPEN)) + isopen = 1; + if (!inode) { - if (nfs_neg_need_reval(dir, dentry)) + if (nfs_neg_need_reval(dir, dentry, nd)) goto out_bad; goto out_valid; } @@ -543,7 +556,7 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd) /* Force a full look up iff the parent directory has changed */ if (nfs_check_verifier(dir, dentry)) { - if (nfs_lookup_verify_inode(inode)) + if (nfs_lookup_verify_inode(inode, isopen)) goto out_bad; goto out_valid; } @@ -552,7 +565,7 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd) if (!error) { if (memcmp(NFS_FH(inode), &fhandle, sizeof(struct nfs_fh))!= 0) goto out_bad; - if (nfs_lookup_verify_inode(inode)) + if (nfs_lookup_verify_inode(inode, isopen)) goto out_bad; goto out_valid_renew; } @@ -630,6 +643,16 @@ struct dentry_operations nfs_dentry_operations = { .d_iput = nfs_dentry_iput, }; +static inline +int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd) +{ + if (NFS_PROTO(dir)->version == 2) + return 0; + if (!nd || (nd->flags & LOOKUP_CONTINUE) || !(nd->flags & LOOKUP_CREATE)) + return 0; + return (nd->intent.open.flags & O_EXCL) != 0; +} + static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) { struct inode *inode = NULL; @@ -647,6 +670,10 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru error = -ENOMEM; dentry->d_op = &nfs_dentry_operations; + /* If we're doing an exclusive create, optimize away the lookup */ + if (nfs_is_exclusive_create(dir, nd)) + return NULL; + lock_kernel(); error = nfs_cached_lookup(dir, dentry, &fhandle, &fattr); if (!error) { @@ -794,6 +821,7 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode, struct nfs_fattr fattr; struct nfs_fh fhandle; int error; + int open_flags = 0; dfprintk(VFS, "NFS: create(%s/%ld, %s\n", dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); @@ -801,6 +829,9 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode, attr.ia_mode = mode; attr.ia_valid = ATTR_MODE; + if (nd && (nd->flags & LOOKUP_CREATE)) + open_flags = nd->intent.open.flags; + /* * The 0 argument passed into the create function should one day * contain the O_EXCL flag if requested. This allows NFSv3 to @@ -810,7 +841,7 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode, lock_kernel(); nfs_zap_caches(dir); error = NFS_PROTO(dir)->create(dir, &dentry->d_name, - &attr, 0, &fhandle, &fattr); + &attr, open_flags, &fhandle, &fattr); if (!error) error = nfs_instantiate(dentry, &fhandle, &fattr); else @@ -1247,6 +1278,13 @@ nfs_permission(struct inode *inode, int mask, struct nameidata *nd) int mode = inode->i_mode; int res; + /* Are we checking permissions on anything other than lookup? */ + if (!(mask & MAY_EXEC)) { + /* We only need to check permissions on file open() and access() */ + if (!nd || !(nd->flags & (LOOKUP_OPEN|LOOKUP_ACCESS))) + return 0; + } + if (mask & MAY_WRITE) { /* * diff --git a/fs/nfs/file.c b/fs/nfs/file.c index c440dc858825..7375f369e517 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -82,9 +82,6 @@ nfs_file_open(struct inode *inode, struct file *filp) /* Do NFSv4 open() call */ if ((open = server->rpc_ops->file_open) != NULL) res = open(inode, filp); - /* Do cto revalidation */ - else if (!(server->flags & NFS_MOUNT_NOCTO)) - res = __nfs_revalidate_inode(server, inode); /* Call generic open code in order to cache credentials */ if (!res) res = nfs_open(inode, filp); -- cgit v1.2.3 -- cgit v1.2.3 From fdcd7356f9d3c53965ab87d826156369c2d61adc Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 4 Jul 2003 01:07:32 -0700 Subject: Signing fixes part 2 --- fs/cifs/Makefile | 2 +- fs/cifs/cifsencrypt.c | 83 +++++++++++++++++++++++++++++++++++++++++++++++++++ fs/cifs/cifsglob.h | 3 ++ fs/cifs/cifspdu.h | 8 ++++- fs/cifs/cifsproto.h | 7 +++-- fs/cifs/cifssmb.c | 12 ++++---- fs/cifs/connect.c | 12 ++++---- fs/cifs/smbencrypt.c | 2 +- fs/cifs/transport.c | 18 ++++++----- 9 files changed, 121 insertions(+), 26 deletions(-) create mode 100755 fs/cifs/cifsencrypt.c diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile index 76f667fe61af..94fd2b73c451 100644 --- a/fs/cifs/Makefile +++ b/fs/cifs/Makefile @@ -3,4 +3,4 @@ # obj-$(CONFIG_CIFS) += cifs.o -cifs-objs := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o link.o misc.o netmisc.o smbdes.o smbencrypt.o transport.o asn1.o md4.o md5.o cifs_unicode.o nterr.o xattr.o +cifs-objs := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o link.o misc.o netmisc.o smbdes.o smbencrypt.o transport.o asn1.o md4.o md5.o cifs_unicode.o nterr.o xattr.o cifsencrypt.o diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c new file mode 100755 index 000000000000..e7c468f1812d --- /dev/null +++ b/fs/cifs/cifsencrypt.c @@ -0,0 +1,83 @@ +/* + * fs/cifs/cifsencrypt.c + * + * Copyright (c) International Business Machines Corp., 2003 + * Author(s): Steve French (sfrench@us.ibm.com) + * + * This library is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 2.1 of the License, or + * (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include "cifspdu.h" +#include "cifsglob.h" +#include "cifs_debug.h" + +/* Calculate and return the CIFS signature based on the mac key and the smb pdu */ +/* the eight byte signature must be allocated by the caller. */ +/* Note that the smb header signature field on input contains the + sequence number before this function is called */ + +static int cifs_calculate_signature(const struct smb_hdr * cifs_pdu, const char * mac_key, char * signature) +{ + + if((cifs_pdu == NULL) || (signature == NULL)) + return -EINVAL; + + /* MD5(mac_key, text) */ + /* return 1st eight bytes in signature */ + + return 0; +} + +int cifs_sign_smb(struct smb_hdr * cifs_pdu, struct cifsSesInfo * ses) +{ + int rc = 0; + char smb_signature[8]; + + /* BB remember to initialize sequence number elsewhere and initialize mac_signing key elsewhere BB */ + /* BB remember to add code to save expected sequence number in midQ entry BB */ + + if((cifs_pdu == NULL) || (ses == NULL)) + return -EINVAL; + + if((le32_to_cpu(cifs_pdu->Flags2) & SMBFLG2_SECURITY_SIGNATURE) == 0) + return rc; + + cifs_pdu->Signature.Sequence.SequenceNumber = ses->sequence_number; + cifs_pdu->Signature.Sequence.Reserved = 0; + rc = cifs_calculate_signature(cifs_pdu, ses->mac_signing_key,smb_signature); + if(rc) + memset(cifs_pdu->Signature.SecuritySignature, 0, 8); + else + memcpy(cifs_pdu->Signature.SecuritySignature, smb_signature, 8); + + return rc; +} + +int cifs_verify_signature(const struct smb_hdr * cifs_pdu, const char * mac_key, + __u32 expected_sequence_number) +{ + unsigned int rc = 0; + + if((cifs_pdu == NULL) || (mac_key == NULL)) + return -EINVAL; + + /* BB no need to verify negprot or if flag is not on for session (or for frame?? */ + /* BB what if signatures are supposed to be on for session but server does not + send one? BB */ + /* BB also do not verify oplock breaks for signature */ + + return rc; +} diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index ae3e49d1de45..f79a0695fb36 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -149,7 +149,9 @@ struct cifsSesInfo { struct TCP_Server_Info *server; /* pointer to server info */ atomic_t inUse; /* # of CURRENT users of this ses */ enum statusEnum status; + __u32 sequence_number; /* needed for CIFS PDU signature */ __u16 ipc_tid; /* special tid for connection to IPC share */ + char mac_signing_key[CIFS_SESSION_KEY_SIZE]; char *serverOS; /* name of operating system underlying the server */ char *serverNOS; /* name of network operating system that the server is running */ char *serverDomain; /* security realm of server */ @@ -249,6 +251,7 @@ struct mid_q_entry { struct list_head qhead; /* mids waiting on reply from this server */ __u16 mid; /* multiplex id */ __u16 pid; /* process id */ + __u32 sequence_number; /* for CIFS signing */ __u16 command; /* smb command code */ struct timeval when_sent; /* time when smb sent */ struct cifsSesInfo *ses; /* smb was sent to this server */ diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h index 1e6be8054126..bd9e6183deb2 100644 --- a/fs/cifs/cifspdu.h +++ b/fs/cifs/cifspdu.h @@ -307,7 +307,13 @@ struct smb_hdr { __u8 Flags; __u16 Flags2; /* note: le */ __u16 PidHigh; /* note: le */ - __u8 SecuritySignature[8]; /* note le */ + union { + struct { + __u32 SequenceNumber; /* le */ + __u32 Reserved; /* zero */ + } Sequence; + __u8 SecuritySignature[8]; /* le */ + } Signature; __u8 pad[2]; __u16 Tid; __u16 Pid; /* note: le */ diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index e557442f61fd..ba9d0c64c0e2 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -79,8 +79,7 @@ extern int setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo, struct nls_table * nls_info); extern int CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses); extern int CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses, - char *session_key, char *ntlm_session_key, - const struct nls_table *); + char *ntlm_session_key, const struct nls_table *); extern int CIFSSpnegoSessSetup(unsigned int xid, struct cifsSesInfo *ses, char *SecurityBlob,int SecurityBlobLength, const struct nls_table *); @@ -226,6 +225,10 @@ extern void tconInfoFree(struct cifsTconInfo *); extern int cifs_demultiplex_thread(struct TCP_Server_Info *); extern int cifs_reconnect(struct TCP_Server_Info *server); +extern int cifs_sign_smb(struct smb_hdr *, struct cifsSesInfo *); +extern int cifs_verify_signature(const struct smb_hdr *, const char * mac_key, + __u32 expected_sequence_number); + /* BB routines below not implemented yet BB */ extern int CIFSBuildServerList(int xid, char *serverBufferList, diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 1d2d347e3adb..5761b8349297 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -106,9 +106,6 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses) pSMB->hdr.Flags2 |= SMBFLG2_UNICODE; if (extended_security) pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC; - if (sign_CIFS_PDUs) { - pSMB->hdr.Flags2 |= SMBFLG2_SECURITY_SIGNATURE; - } pSMB->ByteCount = strlen(protocols[0].name) + 1; strncpy(pSMB->DialectsArray, protocols[0].name, 30); @@ -260,10 +257,13 @@ CIFSSMBLogoff(const int xid, struct cifsSesInfo *ses) up(&ses->sesSem); return -EBUSY; } - if(ses->server->secMode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) - pSMB->hdr.Flags2 |= SMBFLG2_SECURITY_SIGNATURE; + rc = smb_init(SMB_COM_LOGOFF_ANDX, 2, 0 /* no tcon anymore */, (void **) &pSMB, (void **) &smb_buffer_response); + + if(ses->server->secMode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) + pSMB->hdr.Flags2 |= SMBFLG2_SECURITY_SIGNATURE; + if (rc) { up(&ses->sesSem); return rc; @@ -1657,8 +1657,6 @@ CIFSGetDFSRefer(const int xid, struct cifsSesInfo *ses, if (ses->capabilities & CAP_DFS) { pSMB->hdr.Flags2 |= SMBFLG2_DFS; } - if(ses->server->secMode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) - pSMB->hdr.Flags2 |= SMBFLG2_SECURITY_SIGNATURE; if (ses->capabilities & CAP_UNICODE) { pSMB->hdr.Flags2 |= SMBFLG2_UNICODE; diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index add558cc01af..bd4c4e9a309f 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -641,6 +641,7 @@ int setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo, struct nls_tab if(pSesInfo->server->maxBuf == 0) /* no need to send on reconnect */ rc = CIFSSMBNegotiate(xid, pSesInfo); pSesInfo->capabilities = pSesInfo->server->capabilities; + pSesInfo->sequence_number = 0; if (!rc) { cFYI(1,("Security Mode: 0x%x Capabilities: 0x%x Time Zone: %d", pSesInfo->server->secMode, @@ -690,9 +691,7 @@ int setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo, struct nls_tab pSesInfo->server->cryptKey, ntlm_session_key); rc = CIFSSessSetup(xid, pSesInfo, - session_key, - ntlm_session_key, - nls_info); + ntlm_session_key, nls_info); } if (rc) { cERROR(1,("Send error in SessSetup = %d",rc)); @@ -1024,7 +1023,6 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, int CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses, char session_key[CIFS_SESSION_KEY_SIZE], - char session_key2[CIFS_SESSION_KEY_SIZE], const struct nls_table *nls_codepage) { struct smb_hdr *smb_buffer; @@ -1081,9 +1079,9 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses, pSMB->req_no_secext.CaseSensitivePasswordLength = cpu_to_le16(CIFS_SESSION_KEY_SIZE); bcc_ptr = pByteArea(smb_buffer); - /* memcpy(bcc_ptr, (char *) session_key, CIFS_SESSION_KEY_SIZE); + /* memcpy(bcc_ptr, (char *) lm_session_key, CIFS_SESSION_KEY_SIZE); bcc_ptr += CIFS_SESSION_KEY_SIZE; */ - memcpy(bcc_ptr, (char *) session_key2, CIFS_SESSION_KEY_SIZE); + memcpy(bcc_ptr, (char *) session_key, CIFS_SESSION_KEY_SIZE); bcc_ptr += CIFS_SESSION_KEY_SIZE; if (ses->capabilities & CAP_UNICODE) { @@ -1094,7 +1092,7 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses, if(user == NULL) bytes_returned = 0; /* skill null user */ else - bytes_returned = + bytes_returned = cifs_strtoUCS((wchar_t *) bcc_ptr, user, 100, nls_codepage); bcc_ptr += 2 * bytes_returned; /* convert num 16 bit words to bytes */ bcc_ptr += 2; /* trailing null */ diff --git a/fs/cifs/smbencrypt.c b/fs/cifs/smbencrypt.c index 83e772647425..9c810c517cf0 100644 --- a/fs/cifs/smbencrypt.c +++ b/fs/cifs/smbencrypt.c @@ -520,7 +520,7 @@ cli_caclulate_sign_mac(struct smb_hdr *outbuf, __u8 * mac_key, be32_to_cpu(outbuf->smb_buf_length)); MD5Final(calc_md5_mac, &md5_ctx); - memcpy(outbuf->SecuritySignature, calc_md5_mac, 8); + memcpy(outbuf->Signature.SecuritySignature, calc_md5_mac, 8); (*send_seq_num)++; *reply_seq_num = *send_seq_num; (*send_seq_num)++; diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 4705a16d9ef5..62d6136f293a 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -154,12 +154,8 @@ smb_send(struct socket *ssocket, struct smb_hdr *smb_buffer, /* smb header is converted in header_assemble. bcc and rest of SMB word area, and byte area if necessary, is converted to littleendian in - cifssmb.c and RFC1001 len is converted to bigendian in smb_send */ - if (smb_buf_length > 12) - smb_buffer->Flags2 = cpu_to_le16(smb_buffer->Flags2); - - /* if(smb_buffer->Flags2 & SMBFLG2_SECURITY_SIGNATURE) - sign_smb(smb_buffer); */ /* BB enable when signing tested more */ + cifssmb.c and RFC1001 len is converted to bigendian in smb_send + Flags2 is converted in SendReceive */ smb_buffer->smb_buf_length = cpu_to_be32(smb_buffer->smb_buf_length); cFYI(1, ("Sending smb of length %d ", smb_buf_length)); @@ -200,6 +196,12 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses, DeleteMidQEntry(midQ); return -EIO; } + + if (in_buf->smb_buf_length > 12) + in_buf->Flags2 = cpu_to_le16(in_buf->Flags2); + + rc = cifs_sign_smb(in_buf, ses); + midQ->midState = MID_REQUEST_SUBMITTED; rc = smb_send(ses->server->ssocket, in_buf, in_buf->smb_buf_length, (struct sockaddr *) &(ses->server->sockAddr)); @@ -247,9 +249,11 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses, memcpy(out_buf, midQ->resp_buf, receive_len + 4 /* include 4 byte RFC1001 header */ ); - /* convert the length back to a form that we can use */ +/* int cifs_verify_signature(out_buf, ses->mac_signing_key, + __u32 expected_sequence_number); */ dump_smb(out_buf, 92); + /* convert the length into a more usable form */ out_buf->smb_buf_length = be32_to_cpu(out_buf->smb_buf_length); if (out_buf->smb_buf_length > 12) -- cgit v1.2.3 From 7fa5b96b3e2a6eceb4302c46cb682fabba84e725 Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 4 Jul 2003 02:32:31 -0700 Subject: Update cifs vfs information and readme --- fs/cifs/CHANGES | 5 ++ fs/cifs/README | 146 ++++++++++++++++++++++++++++++++++++++++++++------------ fs/cifs/TODO | 22 ++++----- 3 files changed, 132 insertions(+), 41 deletions(-) diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES index 03af9fb35147..ee8682efbada 100644 --- a/fs/cifs/CHANGES +++ b/fs/cifs/CHANGES @@ -1,3 +1,8 @@ +Version 0.81 +------------ +Finish up CIFS packet digital signing for the default +NTLM security case. + Version 0.80 ----------- Fix oops on stopping oplock thread when removing cifs when diff --git a/fs/cifs/README b/fs/cifs/README index 20626fb1aca4..12d93b7fadd1 100644 --- a/fs/cifs/README +++ b/fs/cifs/README @@ -1,5 +1,5 @@ -This is the CIFS VFS support for Linux. It supports many advanced network filesystem -features such as heirarchical dfs like filesystem, hardlinks, locking and more. +The CIFS VFS support for Linux supports many advanced network filesystem +features such as heirarchical dfs like namespace, hardlinks, locking and more. It was designed to comply with the SNIA CIFS Technical Reference (which supersedes the 1992 X/Open SMB Standard) as well as to perform best practice practical interoperability with Windows 2000, Windows XP, Samba and equivalent @@ -9,43 +9,65 @@ For questions or bug reports please contact sfrench@samba.org (sfrench@us.ibm.co Build instructions: ================== -Get the kernel source e.g. http://linux.bkbits.net/linux-2.5 or http://www.kernel.org -http://cifs.bkbits.net/linux-2.4 -make menuconfig (or make xconfig) -select cifs from within the network filesystem choices -save and exit -make dep -make modules (or "make" if you did not select CIFS VFS to be built as a module) +For Linux 2.4: +1a) Get the linux kernel source with cifs vfs already in it +from bitkeeper via bk://cifs.bkbits.net/linux-2.4 +or +1b) Get the kernel source (e.g.from http://www.kernel.org) +and download the cifs vfs source (see the project page +at http://us1.samba.org/samba/Linux_CIFS_client.html) +and change directory into the top of the kernel directory +then patch the kernel (e.g. "patch -p1 < cifs_24.patch") +to add the cifs vfs to your kernel configure options if +it has not already been added (e.g. current SuSE and UL +users do not need to do not need that patch since the cifs vfs is +already in the kernel configure menu) and then +mkdir linux/fs/cifs and then copy the current cifs vfs files from +the cifs download to your kernel build directory e.g. + cp /fs/cifs/* to /fs/cifs +2) make menuconfig (or make xconfig) +3) select cifs from within the network filesystem choices +4) save and exit +5) make dep +6) make modules (or "make" if CIFS VFS not to be built as a module) + +For Linux 2.5: +1) Download the kernel (e.g. from http://www.kernel.org or from bitkeeper +at bk://linux.bkbits.net/linux-2.5) and change directory into the top +of the kernel directory tree (e.g. /usr/src/linux-2.5.73) +2) make menuconfig (or make xconfig) +3) select cifs from within the network filesystem choices +4) save and exit +5) make + Installation instructions: ========================= -If you have built the CIFS vfs as module (successfully)you -simply type "make modules_install" (or if you prefer manually copy the file to +If you have built the CIFS vfs as module (successfully) simply +type "make modules_install" (or if you prefer, manually copy the file to the modules directory e.g. /lib/modules/2.4.10-4GB/kernel/fs/cifs/cifs.o). If you have built the CIFS vfs into the kernel itself, follow the instructions for your distribution on how to install a new kernel (usually you would simply type "make install"). -If you do not have the utility mount.cifs (in the Samba 3.0 source tree and on the -CIFS VFS web site) copy it to the directory /sbin (or the same directory in which -mount.smbfs resides). Although no helper software is required, the installation -of mount.cifs is recommended. Eventually the Samba 3.0 utility program "net" +If you do not have the utility mount.cifs (in the Samba 3.0 source tree and on +the CIFS VFS web site) copy it to the same directory in which mount.smbfs and +similar files reside (usually /sbin). Although the helper software is required, +mount.cifs is recommended. Eventually the Samba 3.0 utility program "net" may also be helpful since it may someday provide easier mount syntax for users used to Windows e.g. net use -and there will likely be other helper programs available ala smbmount to provide -additional optional function in the future. Note that running Winbind on all -of your Linux clients is useful in mapping Uids and Gids consistently to the -proper network user. +Note that running Winbind on all of your Linux clients is useful in +in mapping Uids and Gids consistently to the proper network user. Samba Considerations ==================== To get the maximum benefit from the CIFS VFS, we recommend using a server that -supports the SNIA CIFS Unix Extensions standard (e.g. Samba 2.2.5 or Samba 3.0) -but the CIFS vfs works fine with a wide variety of CIFS servers. Note that the -uid, gid and file permissions will display default values if you do not have -a server that supports the Unix extensions for CIFS (such as Samba 2.2.3 or +supports the SNIA CIFS Unix Extensions standard (e.g. Samba 2.2.5 or later or +Samba 3.0) but the CIFS vfs works fine with a wide variety of CIFS servers. +Note that uid, gid and file permissions will display default values if you do +not have a server that supports the Unix extensions for CIFS (such as Samba 2.2.3 or later). To enable the Unix CIFS Extensions in the Samba server, add the line: unix extensions = yes to your smb.conf file on the server. Note that the following smb.conf settings are @@ -81,15 +103,79 @@ either "pure-TCP" (port 445 TCP/IP CIFS connections) or RFC 1001/1002 support fo "Netbios-Over-TCP/IP." Neither of these is likely to be a problem as most servers support this. IPv6 support is planned for the future. +CIFS VFS Mount Options +====================== +A partial list of the supported mount options follows: + user The user name to use when trying to establish + the CIFS session. + password The user password. If the mount helper is + installed, the user will be prompted for password + if it is not supplied. + ip The ip address of the target server + unc The target server Universal Network Name (export) to + mount. + domain Set the SMB/CIFS workgroup name prepended to the + username during CIFS session establishment + uid If CIFS Unix extensions are not supported by the server + this overrides the default uid for inodes. + gid If CIFS Unix extensions are not supported by the server + this overrides the default gid for inodes. + file_mode If CIFS Unix extensions are not supported by the server + this overrides the default mode for file inodes. + dir_mode If CIFS Unix extensions are not supported by the server + this overrides the default mode for directory inodes. + port attempt to contact the server on this tcp port, before + trying the usual ports (port 445, then 139). + rsize default read size + wsize default write size + rw mount the network share read-write (note that the + server may still consider the share read-only) + ro mount network share read-only + version used to distinguish different versions of the + mount helper utility (not typically needed) + Misc /proc/fs/cifs Flags and Debug Info ======================================= -Various experimental features and tracing can be enabled by changing flags in /proc/fs/cifs (after -the cifs module has been installed or built into the kernel, e.g. insmod cifs). To enable -a feature you can set it to 1 e.g. to enable tracing to the kernel message log you can do -"echo 1 > /proc/fs/cifs/cifsFYI" and "echo 1 > /proc/fs/cifs/traceSMB" -Also note that "cat /proc/fs/cifs/DebugData" will display some information about the currently -active sessions and the shares that are mounted. Currently the ntlmv2 enablement and packet -signing will not work since they the implementation is not quite complete, so do not enable +Informational pseudo-files: + DebugData Displays information about active CIFS sessions + SimultaneousOps Counter which holds maximum number of + simultaneous outstanding SMB/CIFS requests. + Stats Lists summary resource usage information + +Configuration pseudo-files: + MultiuserMount If set to one, more than one CIFS session to + the same server ip address can be established + if more than one uid accesses the same mount + point and if the uids user/password mapping + information is available. (default is 0) + PacketSigningEnabled If set to one, cifs packet signing is enabled + (default 0) + cifsFYI If set to one, additional debug information is + logged to the system error log. (default 0) + ExtendedSecurity If set to one, SPNEGO session establishment + is allowed which enables more advanced + secure CIFS session establishment (default 0) + NTLMV2Enabled If set to one, more secure password hashes + are used when the server supports them and + when kerberos is not negotiated (default 0) + traceSMB If set to one, debug information is logged to the + system error log with the start of smb requests + and responses (default 0) + LookupCacheEnable If set to one, inode information is kept cached + for one second improving performance of lookups + (default 1) + OplockEnabled If set to one, safe distributed caching enabled. + +These experimental features and tracing can be enabled by changing flags in /proc/fs/cifs +(after the cifs module has been installed or built into the kernel, e.g. insmod cifs). +To enable a feature set it to 1 e.g. to enable tracing to the kernel message log +type: + echo 1 > /proc/fs/cifs/cifsFYI +and for more extensive tracing including the start of smb requests and responses + echo 1 > /proc/fs/cifs/traceSMB +Also note that "cat /proc/fs/cifs/DebugData" will display some information about the +active sessions and the shares that are mounted. NTLMv2 enablement and packet +signing will not work since they the implementation is not quite complete. Do not enable these flags unless you are doing specific testing. Enabling extended security works to Windows 2000 Workstations and XP but not to Windows 2000 server or Samba since it does not usually send "raw NTLMSSP" (instead it sends NTLMSSP encapsulated in SPNEGO/GSSAPI, which diff --git a/fs/cifs/TODO b/fs/cifs/TODO index e47e28db4f28..28a2c85fb926 100644 --- a/fs/cifs/TODO +++ b/fs/cifs/TODO @@ -1,4 +1,4 @@ -version 0.6.5 February 15, 2003 +version 0.8.1 July 4th, 2003 A Partial List of Known Problems and Missing Features ===================================================== @@ -19,10 +19,9 @@ d) Kerberos/SPNEGO session setup support - (started) e) NTLMv2 authentication and MD5-HMAC signing SMB PDUs - (mostly implemented) signing necessary for some Windows 2003 servers in domain - controller mode. + mode. -f) oplock support (ie safe CIFS distributed file caching) is not quite complete. -In addition Directory entry caching relies on a 1 second timer, rather than +f) Directory entry caching relies on a 1 second timer, rather than using FindNotify or equivalent. - (started) g) There may be a few additional changes that could be done to take advantage @@ -47,15 +46,16 @@ m) finish support for IPv6 n) send oplock break response when sent (oplock currently disabled in /proc/fs/cifs) -o) remove calls to set end of file by name when we already have file open -(use the existing handle since some servers only support that and it -reduces the oplock breaks coming from windows). Piggyback identical +o) reduces the oplock breaks coming from windows). Piggyback identical file opens on top of each other by incrementing reference count rather than resending (helps reduce server resource utilization and avoid spurious oplock breaks). +p) Improve performance of readpages by sending more than one read +at a time when 8 pages or more are requested. -KNOWN BUGS (updated May 16, 2003) + +KNOWN BUGS (updated July 4th, 2003) ==================================== 1) existing symbolic links (Windows reparse points) are recognized but can not be created remotely. They are implemented for Samba and those that @@ -74,8 +74,8 @@ Misc testing to do 1) check out max path names and max path name components against various server types. -2) Run dbench +2) Run dbench. Modify file portion of ltp so it can run against a mounted network +share and run it against cifs vfs. -3) Finish high stress fsx testing on SMP clients +3) Additional performance testing and optimization using iozone and similar tools. -4) Additional performance testing and optimization -- cgit v1.2.3 From ddb6ee510226e05aaea1b90a3e4d672a2a0be857 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 4 Jul 2003 03:00:12 -0700 Subject: [PATCH] EISA: core changes - Now reserves I/O ranges according to EISA specs (four 256 bytes regions instead of a single 4KB region). - By default, do not try to probe the bus if the mainboard does not seems to support EISA (allow this behaviour to be changed through a command-line option). - Use parent bridge device dma_mask as default for each discovered device. - Allow devices to be enabled or disabled from the kernel command line (useful for non-x86 platforms where the firmware simply disable devices it doesn't know about...). --- drivers/eisa/eisa-bus.c | 234 +++++++++++++++++++++++++++++++++++++++--------- include/linux/eisa.h | 21 ++++- 2 files changed, 209 insertions(+), 46 deletions(-) diff --git a/drivers/eisa/eisa-bus.c b/drivers/eisa/eisa-bus.c index a5241c65d63b..116298c5f7b8 100644 --- a/drivers/eisa/eisa-bus.c +++ b/drivers/eisa/eisa-bus.c @@ -1,7 +1,7 @@ /* * EISA bus support functions for sysfs. * - * (C) 2002 Marc Zyngier + * (C) 2002, 2003 Marc Zyngier * * This code is released under the GPL version 2. */ @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -24,7 +25,7 @@ struct eisa_device_info { char name[DEVICE_NAME_SIZE]; }; -struct eisa_device_info __initdata eisa_table[] = { +static struct eisa_device_info __initdata eisa_table[] = { #ifdef CONFIG_EISA_NAMES #include "devlist.h" #endif @@ -32,6 +33,30 @@ struct eisa_device_info __initdata eisa_table[] = { #define EISA_INFOS (sizeof (eisa_table) / (sizeof (struct eisa_device_info))) +#define EISA_MAX_FORCED_DEV 16 +#define EISA_FORCED_OFFSET 2 + +static int enable_dev[EISA_MAX_FORCED_DEV + EISA_FORCED_OFFSET] = { 1, EISA_MAX_FORCED_DEV, }; +static int disable_dev[EISA_MAX_FORCED_DEV + EISA_FORCED_OFFSET] = { 1, EISA_MAX_FORCED_DEV, }; + +static int is_forced_dev (int *forced_tab, + struct eisa_root_device *root, + struct eisa_device *edev) +{ + int i, x; + + for (i = 0; i < EISA_MAX_FORCED_DEV; i++) { + if (!forced_tab[EISA_FORCED_OFFSET + i]) + return 0; + + x = (root->bus_nr << 8) | edev->slot; + if (forced_tab[EISA_FORCED_OFFSET + i] == x) + return 1; + } + + return 0; +} + static void __init eisa_name_device (struct eisa_device *edev) { int i; @@ -92,7 +117,8 @@ static int eisa_bus_match (struct device *dev, struct device_driver *drv) return 0; while (strlen (eids->sig)) { - if (!strcmp (eids->sig, edev->id.sig)) { + if (!strcmp (eids->sig, edev->id.sig) && + edev->state & EISA_CONFIG_ENABLED) { edev->id.driver_data = eids->driver_data; return 1; } @@ -132,41 +158,160 @@ static ssize_t eisa_show_sig (struct device *dev, char *buf) static DEVICE_ATTR(signature, S_IRUGO, eisa_show_sig, NULL); -static int __init eisa_register_device (struct eisa_root_device *root, - struct eisa_device *edev, - char *sig, int slot) +static ssize_t eisa_show_state (struct device *dev, char *buf) +{ + struct eisa_device *edev = to_eisa_device (dev); + return sprintf (buf,"%d\n", edev->state & EISA_CONFIG_ENABLED); +} + +static DEVICE_ATTR(enabled, S_IRUGO, eisa_show_state, NULL); + +static int __init eisa_init_device (struct eisa_root_device *root, + struct eisa_device *edev, + int slot) { + char *sig; + unsigned long sig_addr; + int i; + + sig_addr = SLOT_ADDRESS (root, slot) + EISA_VENDOR_ID_OFFSET; + + if (!(sig = decode_eisa_sig (sig_addr))) + return -1; /* No EISA device here */ + memcpy (edev->id.sig, sig, EISA_SIG_LEN); edev->slot = slot; + edev->state = inb (SLOT_ADDRESS (root, slot) + EISA_CONFIG_OFFSET) & EISA_CONFIG_ENABLED; edev->base_addr = SLOT_ADDRESS (root, slot); - edev->dma_mask = 0xffffffff; /* Default DMA mask */ + edev->dma_mask = root->dma_mask; /* Default DMA mask */ eisa_name_device (edev); edev->dev.parent = root->dev; edev->dev.bus = &eisa_bus_type; edev->dev.dma_mask = &edev->dma_mask; sprintf (edev->dev.bus_id, "%02X:%02X", root->bus_nr, slot); - edev->res.name = edev->dev.name; + for (i = 0; i < EISA_MAX_RESOURCES; i++) + edev->res[i].name = edev->dev.name; + + if (is_forced_dev (enable_dev, root, edev)) + edev->state = EISA_CONFIG_ENABLED | EISA_CONFIG_FORCED; + + if (is_forced_dev (disable_dev, root, edev)) + edev->state = EISA_CONFIG_FORCED; + + return 0; +} +static int __init eisa_register_device (struct eisa_device *edev) +{ if (device_register (&edev->dev)) return -1; device_create_file (&edev->dev, &dev_attr_signature); + device_create_file (&edev->dev, &dev_attr_enabled); + + return 0; +} + +static int __init eisa_request_resources (struct eisa_root_device *root, + struct eisa_device *edev, + int slot) +{ + int i; + + for (i = 0; i < EISA_MAX_RESOURCES; i++) { + /* Don't register resource for slot 0, since this is + * very likely to fail... :-( Instead, grab the EISA + * id, now we can display something in /proc/ioports. + */ + + /* Only one region for mainboard */ + if (!slot && i > 0) { + edev->res[i].start = edev->res[i].end = 0; + continue; + } + + if (slot) { + edev->res[i].name = NULL; + edev->res[i].start = SLOT_ADDRESS (root, slot) + (i * 0x400); + edev->res[i].end = edev->res[i].start + 0xff; + edev->res[i].flags = IORESOURCE_IO; + } else { + edev->res[i].name = NULL; + edev->res[i].start = SLOT_ADDRESS (root, slot) + EISA_VENDOR_ID_OFFSET; + edev->res[i].end = edev->res[i].start + 3; + edev->res[i].flags = IORESOURCE_BUSY; + } + + if (request_resource (root->res, &edev->res[i])) + goto failed; + } return 0; + + failed: + while (--i >= 0) + release_resource (&edev->res[i]); + + return -1; +} + +static void __init eisa_release_resources (struct eisa_device *edev) +{ + int i; + + for (i = 0; i < EISA_MAX_RESOURCES; i++) + if (edev->res[i].start || edev->res[i].end) + release_resource (&edev->res[i]); } static int __init eisa_probe (struct eisa_root_device *root) { int i, c; - char *str; - unsigned long sig_addr; struct eisa_device *edev; printk (KERN_INFO "EISA: Probing bus %d at %s\n", root->bus_nr, root->dev->name); + + /* First try to get hold of slot 0. If there is no device + * here, simply fail, unless root->force_probe is set. */ - for (c = 0, i = 0; i <= root->slots; i++) { + if (!(edev = kmalloc (sizeof (*edev), GFP_KERNEL))) { + printk (KERN_ERR "EISA: Couldn't allocate mainboard slot\n"); + return -ENOMEM; + } + + memset (edev, 0, sizeof (*edev)); + + if (eisa_request_resources (root, edev, 0)) { + printk (KERN_WARNING \ + "EISA: Cannot allocate resource for mainboard\n"); + kfree (edev); + if (!root->force_probe) + return -EBUSY; + goto force_probe; + } + + if (eisa_init_device (root, edev, 0)) { + eisa_release_resources (edev); + kfree (edev); + if (!root->force_probe) + return -ENODEV; + goto force_probe; + } + + printk (KERN_INFO "EISA: Mainboard %s detected.\n", edev->id.sig); + + if (eisa_register_device (edev)) { + printk (KERN_ERR "EISA: Failed to register %s\n", + edev->id.sig); + eisa_release_resources (edev); + kfree (edev); + } + + force_probe: + + for (c = 0, i = 1; i <= root->slots; i++) { if (!(edev = kmalloc (sizeof (*edev), GFP_KERNEL))) { printk (KERN_ERR "EISA: Out of memory for slot %d\n", i); @@ -175,24 +320,7 @@ static int __init eisa_probe (struct eisa_root_device *root) memset (edev, 0, sizeof (*edev)); - /* Don't register resource for slot 0, since this is - * very likely to fail... :-( Instead, grab the EISA - * id, now we can display something in /proc/ioports. - */ - - if (i) { - edev->res.name = NULL; - edev->res.start = SLOT_ADDRESS (root, i); - edev->res.end = edev->res.start + 0xfff; - edev->res.flags = IORESOURCE_IO; - } else { - edev->res.name = NULL; - edev->res.start = SLOT_ADDRESS (root, i) + EISA_VENDOR_ID_OFFSET; - edev->res.end = edev->res.start + 3; - edev->res.flags = IORESOURCE_BUSY; - } - - if (request_resource (root->res, &edev->res)) { + if (eisa_request_resources (root, edev, i)) { printk (KERN_WARNING \ "Cannot allocate resource for EISA slot %d\n", i); @@ -200,30 +328,41 @@ static int __init eisa_probe (struct eisa_root_device *root) continue; } - sig_addr = SLOT_ADDRESS (root, i) + EISA_VENDOR_ID_OFFSET; - - if (!(str = decode_eisa_sig (sig_addr))) { - release_resource (&edev->res); + if (eisa_init_device (root, edev, i)) { + eisa_release_resources (edev); kfree (edev); continue; } - if (!i) - printk (KERN_INFO "EISA: Motherboard %s detected\n", - str); - else { - printk (KERN_INFO "EISA: slot %d : %s detected.\n", - i, str); - - c++; + printk (KERN_INFO "EISA: slot %d : %s detected", + i, edev->id.sig); + + switch (edev->state) { + case EISA_CONFIG_ENABLED | EISA_CONFIG_FORCED: + printk (" (forced enabled)"); + break; + + case EISA_CONFIG_FORCED: + printk (" (forced disabled)"); + break; + + case 0: + printk (" (disabled)"); + break; } + + printk (".\n"); - if (eisa_register_device (root, edev, str, i)) { - printk (KERN_ERR "EISA: Failed to register %s\n", str); - release_resource (&edev->res); + c++; + + if (eisa_register_device (edev)) { + printk (KERN_ERR "EISA: Failed to register %s\n", + edev->id.sig); + eisa_release_resources (edev); kfree (edev); } } + printk (KERN_INFO "EISA: Detected %d card%s.\n", c, c == 1 ? "" : "s"); return 0; @@ -274,6 +413,13 @@ static int __init eisa_init (void) return 0; } +/* Couldn't use intarray with checking on... :-( */ +#undef param_check_intarray +#define param_check_intarray(name, p) + +module_param(enable_dev, intarray, 0444); +module_param(disable_dev, intarray, 0444); + postcore_initcall (eisa_init); EXPORT_SYMBOL (eisa_bus_type); diff --git a/include/linux/eisa.h b/include/linux/eisa.h index d77fdf0f9f01..93e4c5503d46 100644 --- a/include/linux/eisa.h +++ b/include/linux/eisa.h @@ -4,6 +4,8 @@ #define EISA_SIG_LEN 8 #define EISA_MAX_SLOTS 8 +#define EISA_MAX_RESOURCES 4 + /* A few EISA constants/offsets... */ #define EISA_DMA1_STATUS 8 @@ -17,6 +19,10 @@ #define EISA_INT1_EDGE_LEVEL 0x4D0 #define EISA_INT2_EDGE_LEVEL 0x4D1 #define EISA_VENDOR_ID_OFFSET 0xC80 +#define EISA_CONFIG_OFFSET 0xC84 + +#define EISA_CONFIG_ENABLED 1 +#define EISA_CONFIG_FORCED 2 /* The EISA signature, in ASCII form, null terminated */ struct eisa_device_id { @@ -26,19 +32,28 @@ struct eisa_device_id { /* There is not much we can say about an EISA device, apart from * signature, slot number, and base address. dma_mask is set by - * default to 32 bits.*/ + * default to parent device mask..*/ struct eisa_device { struct eisa_device_id id; int slot; + int state; unsigned long base_addr; - struct resource res; + struct resource res[EISA_MAX_RESOURCES]; u64 dma_mask; struct device dev; /* generic device */ }; #define to_eisa_device(n) container_of(n, struct eisa_device, dev) +static inline int eisa_get_region_index (void *addr) +{ + unsigned long x = (unsigned long) addr; + + x &= 0xc00; + return (x >> 12); +} + struct eisa_driver { const struct eisa_device_id *id_table; struct device_driver driver; @@ -69,6 +84,8 @@ struct eisa_root_device { struct resource *res; unsigned long bus_base_addr; int slots; /* Max slot number */ + int force_probe; /* Probe even when no slot 0 */ + u64 dma_mask; /* from bridge device */ int bus_nr; /* Set by eisa_root_register */ struct resource eisa_root_res; /* ditto */ }; -- cgit v1.2.3 From d8d9c9e8e86b51067118ec842a84e84e455b1d82 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 4 Jul 2003 03:00:19 -0700 Subject: [PATCH] EISA: Documentation update --- Documentation/eisa.txt | 60 +++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 49 insertions(+), 11 deletions(-) diff --git a/Documentation/eisa.txt b/Documentation/eisa.txt index c27a2676dcea..8c8388da868a 100644 --- a/Documentation/eisa.txt +++ b/Documentation/eisa.txt @@ -46,12 +46,14 @@ root of an EISA bus. The eisa_root_device structure holds a reference to this device, as well as some parameters for probing purposes. struct eisa_root_device { - struct list_head node; - struct device *dev; /* Pointer to bridge device */ - struct resource *res; - unsigned long bus_base_addr; - int slots; /* Max slot number */ - int bus_nr; /* Set by eisa_root_register */ + struct device *dev; /* Pointer to bridge device */ + struct resource *res; + unsigned long bus_base_addr; + int slots; /* Max slot number */ + int force_probe; /* Probe even when no slot 0 */ + u64 dma_mask; /* from bridge device */ + int bus_nr; /* Set by eisa_root_register */ + struct resource eisa_root_res; /* ditto */ }; node : used for eisa_root_register internal purpose @@ -59,6 +61,8 @@ dev : pointer to the root device res : root device I/O resource bus_base_addr : slot 0 address on this bus slots : max slot number to probe +force_probe : Probe even when slot 0 is empty (no EISA mainboard) +dma_mask : Default DMA mask. Usualy the bridge device dma_mask. bus_nr : unique bus id, set by eisa_root_register ** Driver : @@ -87,7 +91,7 @@ driver : a generic driver, such as described in Documentation/driver-model/driver.txt. Only .name, .probe and .remove members are mandatory. -An example is the 3c509 driver : +An example is the 3c59x driver : static struct eisa_device_id vortex_eisa_ids[] = { { "TCM5920", EISA_3C592_OFFSET }, @@ -116,15 +120,20 @@ encapsulated in a 'struct eisa_device' described as follows : struct eisa_device { struct eisa_device_id id; int slot; - unsigned long base_addr; - struct resource res; + int state; + unsigned long base_addr; + struct resource res[EISA_MAX_RESOURCES]; + u64 dma_mask; struct device dev; /* generic device */ }; id : EISA id, as read from device. id.driver_data is set from the matching driver EISA id. slot : slot number which the device was detected on -res : I/O resource allocated to this device +state : set of flags indicating the state of the device. Current + flags are EISA_CONFIG_ENABLED and EISA_CONFIG_FORCED. +res : set of four 256 bytes I/O regions allocated to this device +dma_mask: DMA mask set from the parent device. dev : generic device (see Documentation/driver-model/device.txt) You can get the 'struct eisa_device' from 'struct device' using the @@ -140,6 +149,32 @@ void *eisa_get_drvdata (struct eisa_device *edev): Gets the pointer previously stored into the device's driver_data area. +int eisa_get_region_index (void *addr); + +Returns the region number (0 <= x < EISA_MAX_RESOURCES) of a given +address. + +** Kernel parameters : + +eisa_bus.enable_dev : + +A comma-separated list of slots to be enabled, even if the firmware +set the card as disabled. The driver must be able to properly +initialize the device in such conditions. + +eisa_bus.disable_dev : + +A comma-separated list of slots to be enabled, even if the firmware +set the card as enabled. The driver won't be called to handle this +device. + +virtual_root.force_probe : + +Force the probing code to probe EISA slots even when it cannot find an +EISA compliant mainboard (nothing appears on slot 0). Defaultd to 0 +(don't force), and set to 1 (force probing) when either +CONFIG_ALPHA_JENSEN or CONFIG_EISA_VLB_PRIMING are set. + ** Random notes : Converting an EISA driver to the new API mostly involves *deleting* @@ -156,10 +191,13 @@ Unfortunately, most drivers are doing the probing by themselves, and expect to have explored the whole machine when they exit their probe routine. +For example, switching your favorite EISA SCSI card to the "hotplug" +model is "the right thing"(tm). + ** Thanks : I'd like to thank the following people for their help : - Xavier Benigni for lending me a wonderful Alpha Jensen, - James Bottomley, Jeff Garzik for getting this stuff into the kernel, - Andries Brouwer for contributing numerous EISA ids, -- Catrin Jones for coping with too many machines at home +- Catrin Jones for coping with far too many machines at home. -- cgit v1.2.3 From 5fe1dbf48ce9657b5f6066278499e8022a881fea Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 4 Jul 2003 03:00:26 -0700 Subject: [PATCH] EISA: More EISA ids --- drivers/eisa/eisa.ids | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/eisa/eisa.ids b/drivers/eisa/eisa.ids index 819f9470d7c9..ed69837d8b74 100644 --- a/drivers/eisa/eisa.ids +++ b/drivers/eisa/eisa.ids @@ -504,6 +504,7 @@ DTK0001 "DTK PLM-3300I 80486 EISA Board" DTK0003 "DTK PLM-3331P EISACACHE486 33/25/50 MHZ" ECS0580 "DI-580A EISA SCSI Host Adapter" ECS0590 "DI-590 EISA SCSI Cache Host Adapter" +EGL0101 "Eagle Technology EP3210 EtherXpert EISA Adapter" ELS8041 "ELSA WINNER 1000 Enhanced VGA" ETI1001 "NE3300 Ethernet Rev. C & D" EVX0002 "PN-3000 System Board" @@ -515,6 +516,9 @@ FIX1516 "15-16MB Memory Hole Patch - Netserver LF/LC 5/66" FSI2001 "ESA-200 ATM" FSI2002 "ESA-200A ATM" FSI2003 "ESA-200E ATM" +GCI0101 "Gateway G/Ethernet 32EB -- 32-Bit EISA Bus Master Ethernet Adpater" +GCI0102 "Gateway G/Ethernet 32EB -- 32-Bit EISA Bus Master Ethernet Adapter" +GCI0103 "Gateway G/Ethernet 32EB -- 32-Bit EISA Bus Master Ethernet Adapter" GDT2001 "GDT2000/GDT2020 Fast-SCSI Cache Controller - Rev. 1.0" GDT3001 "GDT3000/GDT3020 Dual Channel SCSI Controller - Rev. 1.0" GDT3002 "GDT30x0A Cache Controller" @@ -1138,12 +1142,14 @@ NON0501 "c't Universal 16-Bit Sound Adapter" NON0601 "c't Universal 8-Bit Adapter" NSS0011 "Newport Systems Solutions WNIC Adapter" NVL0701 "Novell NE3200 Bus Master Ethernet" +NVL0702 "Novell NE3200T Bus Master Ethernet" NVL0901 "Novell NE2100 Ethernet/Cheapernet Adapter" NVL1001 "Novell NMSL (Netware Mirrored Server Link)" NVL1201 "Novell NE32HUB 32-bit Base EISA Adapter" NVL1301 "Novell NE32HUB 32-bit TPE EISA Adapter" NVL1401 "Novell NE32HUB PME ISA Adapter" NVL1501 "Novell NE2000PLUS Ethernet Adapter" +NVL1801 "Eagle Technology NE3210 EISA Ethernet LAN Adapter" OLC0701 "Olicom ISA 16/4 Token-Ring Network Adapter" OLC0702 "Olicom OC-3117, ISA 16/4 Adapter (NIC)" OLC0801 "OC-3118 Olicom ISA 16/4 Token-Ring Network Adapter" -- cgit v1.2.3 From e0e5907e3c25dc31c6c43cbb4317ef5e6858e0e5 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 4 Jul 2003 03:00:33 -0700 Subject: [PATCH] EISA: PA-RISC changes - Probe the right number of EISA slots on PA-RISC. No more, no less. --- drivers/parisc/eisa.c | 24 +++++++++++++----------- drivers/parisc/eisa_enumerator.c | 9 ++++++--- 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/drivers/parisc/eisa.c b/drivers/parisc/eisa.c index 547ef6adffe9..92959266a026 100644 --- a/drivers/parisc/eisa.c +++ b/drivers/parisc/eisa.c @@ -378,19 +378,21 @@ static int __devinit eisa_probe(struct parisc_device *dev) } } eisa_eeprom_init(eisa_dev.eeprom_addr); - eisa_enumerator(eisa_dev.eeprom_addr, &eisa_dev.hba.io_space, &eisa_dev.hba.lmmio_space); + result = eisa_enumerator(eisa_dev.eeprom_addr, &eisa_dev.hba.io_space, &eisa_dev.hba.lmmio_space); init_eisa_pic(); - /* FIXME : Get the number of slots from the enumerator, not a - * hadcoded value. Also don't enumerate the bus twice. */ - eisa_dev.root.dev = &dev->dev; - dev->dev.driver_data = &eisa_dev.root; - eisa_dev.root.bus_base_addr = 0; - eisa_dev.root.res = &eisa_dev.hba.io_space; - eisa_dev.root.slots = EISA_MAX_SLOTS; - if (eisa_root_register (&eisa_dev.root)) { - printk(KERN_ERR "EISA: Failed to register EISA root\n"); - return -1; + if (result >= 0) { + /* FIXME : Don't enumerate the bus twice. */ + eisa_dev.root.dev = &dev->dev; + dev->dev.driver_data = &eisa_dev.root; + eisa_dev.root.bus_base_addr = 0; + eisa_dev.root.res = &eisa_dev.hba.io_space; + eisa_dev.root.slots = result; + eisa_dev.root.dma_mask = 0xffffffff; /* wild guess */ + if (eisa_root_register (&eisa_dev.root)) { + printk(KERN_ERR "EISA: Failed to register EISA root\n"); + return -1; + } } return 0; diff --git a/drivers/parisc/eisa_enumerator.c b/drivers/parisc/eisa_enumerator.c index ebd15f128ffb..6d8aae003f6c 100644 --- a/drivers/parisc/eisa_enumerator.c +++ b/drivers/parisc/eisa_enumerator.c @@ -438,6 +438,10 @@ static int init_slot(int slot, struct eeprom_eisa_slot_info *es) id = le32_to_cpu(inl(SLOT2PORT(slot)+EPI)); if (0xffffffff == id) { + /* Maybe we didn't expect a card to be here... */ + if (es->eisa_slot_id == 0xffffffff) + return -1; + /* this board is not here or it does not * support readid */ @@ -499,8 +503,7 @@ int eisa_enumerator(unsigned long eeprom_addr, (&eeprom_buf[HPEE_SLOT_INFO(i)]); if (-1==init_slot(i+1, es)) { - return -1; - + continue; } if (es->config_data_offset < HPEE_MAX_LENGTH) { @@ -513,6 +516,6 @@ int eisa_enumerator(unsigned long eeprom_addr, return -1; } } - return 0; + return eh->num_slots; } -- cgit v1.2.3 From e34121f75e49cf1bbc8e04e58fba65de09ebbe85 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 4 Jul 2003 03:00:39 -0700 Subject: [PATCH] EISA: PCI-EISA dma_mask - Use parent bridge device dma_mask as default for each discovered device. --- drivers/eisa/pci_eisa.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/eisa/pci_eisa.c b/drivers/eisa/pci_eisa.c index 635c2997a041..3dedb1997b8d 100644 --- a/drivers/eisa/pci_eisa.c +++ b/drivers/eisa/pci_eisa.c @@ -20,7 +20,7 @@ static struct eisa_root_device pci_eisa_root; static int __devinit pci_eisa_init (struct pci_dev *pdev, - const struct pci_device_id *ent) + const struct pci_device_id *ent) { int rc; @@ -35,6 +35,7 @@ static int __devinit pci_eisa_init (struct pci_dev *pdev, pci_eisa_root.res = pdev->bus->resource[0]; pci_eisa_root.bus_base_addr = pdev->bus->resource[0]->start; pci_eisa_root.slots = EISA_MAX_SLOTS; + pci_eisa_root.dma_mask = pdev->dma_mask; if (eisa_root_register (&pci_eisa_root)) { printk (KERN_ERR "pci_eisa : Could not register EISA root\n"); -- cgit v1.2.3 From c4404d6516d511008abbd1253898759dbab6855d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 4 Jul 2003 03:00:47 -0700 Subject: [PATCH] EISA: avoid unnecessary probing - By default, do not try to probe the bus if the mainboard does not seems to support EISA (allow this behaviour to be changed through a command-line option). --- drivers/eisa/virtual_root.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/eisa/virtual_root.c b/drivers/eisa/virtual_root.c index 12f684f9dc8c..3c384ccfe489 100644 --- a/drivers/eisa/virtual_root.c +++ b/drivers/eisa/virtual_root.c @@ -7,12 +7,22 @@ * This code is released under the GPL version 2. */ +#include #include #include #include #include +#include #include +#if defined(CONFIG_ALPHA_JENSEN) || defined(CONFIG_EISA_VLB_PRIMING) +#define EISA_FORCE_PROBE_DEFAULT 1 +#else +#define EISA_FORCE_PROBE_DEFAULT 0 +#endif + +static int force_probe = EISA_FORCE_PROBE_DEFAULT; + /* The default EISA device parent (virtual root device). * Now use a platform device, since that's the obvious choice. */ @@ -29,6 +39,7 @@ static struct eisa_root_device eisa_bus_root = { .bus_base_addr = 0, .res = &ioport_resource, .slots = EISA_MAX_SLOTS, + .dma_mask = 0xffffffff, }; static int virtual_eisa_root_init (void) @@ -39,6 +50,8 @@ static int virtual_eisa_root_init (void) return r; } + eisa_bus_root.force_probe = force_probe; + eisa_root_dev.dev.driver_data = &eisa_bus_root; if (eisa_root_register (&eisa_bus_root)) { @@ -51,4 +64,6 @@ static int virtual_eisa_root_init (void) return 0; } +module_param (force_probe, int, 0444); + device_initcall (virtual_eisa_root_init); -- cgit v1.2.3 From b79c85248a7cede77706226ec00e4eeb79eadfbe Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 4 Jul 2003 03:24:32 -0700 Subject: Go back to defaulting to 6-byte commands for MODE SENSE, since some drivers seem to be unhappy about the 10-byte version. The subsystem configuration can override this (eg USB or ide-scsi). --- drivers/scsi/scsi_scan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 5f9307658f4c..6720dd216fdc 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -646,7 +646,7 @@ static int scsi_add_lun(struct scsi_device *sdev, char *inq_result, int *bflags) sdev->max_device_blocked = SCSI_DEFAULT_DEVICE_BLOCKED; sdev->use_10_for_rw = 1; - sdev->use_10_for_ms = 1; + sdev->use_10_for_ms = 0; if(sdev->host->hostt->slave_configure) sdev->host->hostt->slave_configure(sdev); -- cgit v1.2.3 From 9e008c3c401ea935ef32593ddeda0a4963fab6f9 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 4 Jul 2003 03:53:27 -0700 Subject: When forcing through a signal for some thread-synchronous event (ie SIGSEGV, SIGFPE etc that happens as a result of a trap as opposed to an external event), if the signal is blocked we will not invoce a signal handler, we will just kill the thread with the signal. This is equivalent to what we do in the SIG_IGN case: you cannot ignore or block synchronous signals, and if you try, we'll just have to kill you. We don't want to handle endless recursive faults, which the old behaviour easily led to if the stack was bad, for example. --- kernel/signal.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/kernel/signal.c b/kernel/signal.c index 78c4dfa0073c..4768ea5bab22 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -797,10 +797,11 @@ force_sig_info(int sig, struct siginfo *info, struct task_struct *t) int ret; spin_lock_irqsave(&t->sighand->siglock, flags); - if (t->sighand->action[sig-1].sa.sa_handler == SIG_IGN) + if (sigismember(&t->blocked, sig) || t->sighand->action[sig-1].sa.sa_handler == SIG_IGN) { t->sighand->action[sig-1].sa.sa_handler = SIG_DFL; - sigdelset(&t->blocked, sig); - recalc_sigpending_tsk(t); + sigdelset(&t->blocked, sig); + recalc_sigpending_tsk(t); + } ret = specific_send_sig_info(sig, info, t); spin_unlock_irqrestore(&t->sighand->siglock, flags); -- cgit v1.2.3 From c7aa953c75afbbd212e5b4e6f859b2d32f55b22e Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Fri, 4 Jul 2003 04:57:37 -0700 Subject: [PATCH] wrong pid in siginfo_t If a signal is sent via kill() or tkill() the kernel fills in the wrong PID value in the siginfo_t structure (obviously only if the handler has SA_SIGINFO set). POSIX specifies the the si_pid field is filled with the process ID, and in Linux parlance that's the "thread group" ID, not the thread ID. --- kernel/signal.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/signal.c b/kernel/signal.c index 4768ea5bab22..dbefbea7623e 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2082,7 +2082,7 @@ sys_kill(int pid, int sig) info.si_signo = sig; info.si_errno = 0; info.si_code = SI_USER; - info.si_pid = current->pid; + info.si_pid = current->tgid; info.si_uid = current->uid; return kill_something_info(sig, &info, pid); @@ -2105,7 +2105,7 @@ sys_tkill(int pid, int sig) info.si_signo = sig; info.si_errno = 0; info.si_code = SI_TKILL; - info.si_pid = current->pid; + info.si_pid = current->tgid; info.si_uid = current->uid; read_lock(&tasklist_lock); -- cgit v1.2.3 From d8d90b60f9f123dbee861e7145e7ba508214b528 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 4 Jul 2003 19:35:49 -0700 Subject: [PATCH] PCI domain scanning fix From: Matthew Wilcox ppc64 oopses on boot because pci_scan_bus_parented() is unexpectedly returning NULL. Change pci_scan_bus_parented() to correctly handle overlapping PCI bus numbers on different domains. --- drivers/pci/probe.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index af83b3936f6f..ccd50ca202f8 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -637,12 +637,6 @@ struct pci_bus * __devinit pci_scan_bus_parented(struct device *parent, int bus, { struct pci_bus *b; - if (pci_find_bus(0, bus)) { - /* If we already got to this bus through a different bridge, ignore it */ - DBG("PCI: Bus %02x already known\n", bus); - return NULL; - } - b = pci_alloc_bus(); if (!b) return NULL; @@ -656,6 +650,14 @@ struct pci_bus * __devinit pci_scan_bus_parented(struct device *parent, int bus, b->sysdata = sysdata; b->ops = ops; + if (pci_find_bus(pci_domain_nr(b), bus)) { + /* If we already got to this bus through a different bridge, ignore it */ + DBG("PCI: Bus %02x already known\n", bus); + kfree(b->dev); + kfree(b); + return NULL; + } + list_add_tail(&b->node, &pci_root_buses); memset(b->dev,0,sizeof(*(b->dev))); -- cgit v1.2.3 From 3faa61fe2ece423aeda58d42f2b8c998cfb9fa3a Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 4 Jul 2003 19:35:55 -0700 Subject: [PATCH] ipc semaphore optimization From: "Chen, Kenneth W" This patch proposes a performance fix for the current IPC semaphore implementation. There are two shortcoming in the current implementation: try_atomic_semop() was called two times to wake up a blocked process, once from the update_queue() (executed from the process that wakes up the sleeping process) and once in the retry part of the blocked process (executed from the block process that gets woken up). A second issue is that when several sleeping processes that are eligible for wake up, they woke up in daisy chain formation and each one in turn to wake up next process in line. However, every time when a process wakes up, it start scans the wait queue from the beginning, not from where it was last scanned. This causes large number of unnecessary scanning of the wait queue under a situation of deep wait queue. Blocked processes come and go, but chances are there are still quite a few blocked processes sit at the beginning of that queue. What we are proposing here is to merge the portion of the code in the bottom part of sys_semtimedop() (code that gets executed when a sleeping process gets woken up) into update_queue() function. The benefit is two folds: (1) is to reduce redundant calls to try_atomic_semop() and (2) to increase efficiency of finding eligible processes to wake up and higher concurrency for multiple wake-ups. We have measured that this patch improves throughput for a large application significantly on a industry standard benchmark. This patch is relative to 2.5.72. Any feedback is very much appreciated. Some kernel profile data attached: Kernel profile before optimization: ----------------------------------------------- 0.05 0.14 40805/529060 sys_semop [133] 0.55 1.73 488255/529060 ia64_ret_from_syscall [2] [52] 2.5 0.59 1.88 529060 sys_semtimedop [52] 0.05 0.83 477766/817966 schedule_timeout [62] 0.34 0.46 529064/989340 update_queue [61] 0.14 0.00 1006740/6473086 try_atomic_semop [75] 0.06 0.00 529060/989336 ipcperms [149] ----------------------------------------------- 0.30 0.40 460276/989340 semctl_main [68] 0.34 0.46 529064/989340 sys_semtimedop [52] [61] 1.5 0.64 0.87 989340 update_queue [61] 0.75 0.00 5466346/6473086 try_atomic_semop [75] 0.01 0.11 477676/576698 wake_up_process [146] ----------------------------------------------- 0.14 0.00 1006740/6473086 sys_semtimedop [52] 0.75 0.00 5466346/6473086 update_queue [61] [75] 0.9 0.89 0.00 6473086 try_atomic_semop [75] ----------------------------------------------- Kernel profile with optimization: ----------------------------------------------- 0.03 0.05 26139/503178 sys_semop [155] 0.46 0.92 477039/503178 ia64_ret_from_syscall [2] [61] 1.2 0.48 0.97 503178 sys_semtimedop [61] 0.04 0.79 470724/784394 schedule_timeout [62] 0.05 0.00 503178/3301773 try_atomic_semop [109] 0.05 0.00 503178/930934 ipcperms [149] 0.00 0.03 32454/460210 update_queue [99] ----------------------------------------------- 0.00 0.03 32454/460210 sys_semtimedop [61] 0.06 0.36 427756/460210 semctl_main [75] [99] 0.4 0.06 0.39 460210 update_queue [99] 0.30 0.00 2798595/3301773 try_atomic_semop [109] 0.00 0.09 470630/614097 wake_up_process [146] ----------------------------------------------- 0.05 0.00 503178/3301773 sys_semtimedop [61] 0.30 0.00 2798595/3301773 update_queue [99] [109] 0.3 0.35 0.00 3301773 try_atomic_semop [109] -----------------------------------------------=20 Both number of function calls to try_atomic_semop() and update_queue() are reduced by 50% as a result of the merge. Execution time of sys_semtimedop is reduced because of the reduction in the low level functions. --- include/linux/sem.h | 1 - ipc/sem.c | 95 +++++++++++++++++++++-------------------------------- 2 files changed, 38 insertions(+), 58 deletions(-) diff --git a/include/linux/sem.h b/include/linux/sem.h index 2821bc07f647..6e13e5efc163 100644 --- a/include/linux/sem.h +++ b/include/linux/sem.h @@ -109,7 +109,6 @@ struct sem_queue { int id; /* internal sem id */ struct sembuf * sops; /* array of pending operations */ int nsops; /* number of operations */ - int alter; /* operation will alter semaphore */ }; /* Each task has a list of undo requests. They are executed automatically diff --git a/ipc/sem.c b/ipc/sem.c index 07d9a2e054b7..d1a54864f753 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -49,6 +49,10 @@ * increase. If there are decrement operations in the operations * array we do the same as before. * + * With the incarnation of O(1) scheduler, it becomes unnecessary to perform + * check/retry algorithm for waking up blocked processes as the new scheduler + * is better at handling thread switch than the old one. + * * /proc/sysvipc/sem support (c) 1999 Dragos Acostachioaie * * SMP-threaded, sysctl's added @@ -258,8 +262,7 @@ static inline void remove_from_queue (struct sem_array * sma, */ static int try_atomic_semop (struct sem_array * sma, struct sembuf * sops, - int nsops, struct sem_undo *un, int pid, - int do_undo) + int nsops, struct sem_undo *un, int pid) { int result, sem_op; struct sembuf *sop; @@ -289,10 +292,6 @@ static int try_atomic_semop (struct sem_array * sma, struct sembuf * sops, curr->semval = result; } - if (do_undo) { - result = 0; - goto undo; - } sop--; while (sop >= sops) { sma->sem_base[sop->sem_num].sempid = pid; @@ -334,23 +333,14 @@ static void update_queue (struct sem_array * sma) for (q = sma->sem_pending; q; q = q->next) { - if (q->status == 1) - continue; /* this one was woken up before */ - error = try_atomic_semop(sma, q->sops, q->nsops, - q->undo, q->pid, q->alter); + q->undo, q->pid); /* Does q->sleeper still need to sleep? */ if (error <= 0) { - /* Found one, wake it up */ - wake_up_process(q->sleeper); - if (error == 0 && q->alter) { - /* if q-> alter let it self try */ - q->status = 1; - return; - } q->status = error; remove_from_queue(sma,q); + wake_up_process(q->sleeper); } } } @@ -1062,7 +1052,7 @@ retry_undos: if (error) goto out_unlock_free; - error = try_atomic_semop (sma, sops, nsops, un, current->pid, 0); + error = try_atomic_semop (sma, sops, nsops, un, current->pid); if (error <= 0) goto update; @@ -1075,55 +1065,46 @@ retry_undos: queue.nsops = nsops; queue.undo = un; queue.pid = current->pid; - queue.alter = decrease; queue.id = semid; if (alter) append_to_queue(sma ,&queue); else prepend_to_queue(sma ,&queue); - for (;;) { - queue.status = -EINTR; - queue.sleeper = current; - current->state = TASK_INTERRUPTIBLE; - sem_unlock(sma); + queue.status = -EINTR; + queue.sleeper = current; + current->state = TASK_INTERRUPTIBLE; + sem_unlock(sma); - if (timeout) - jiffies_left = schedule_timeout(jiffies_left); - else - schedule(); + if (timeout) + jiffies_left = schedule_timeout(jiffies_left); + else + schedule(); - sma = sem_lock(semid); - if(sma==NULL) { - if(queue.prev != NULL) - BUG(); - error = -EIDRM; - goto out_free; - } - /* - * If queue.status == 1 we where woken up and - * have to retry else we simply return. - * If an interrupt occurred we have to clean up the - * queue - * - */ - if (queue.status == 1) - { - error = try_atomic_semop (sma, sops, nsops, un, - current->pid,0); - if (error <= 0) - break; - } else { - error = queue.status; - if (error == -EINTR && timeout && jiffies_left == 0) - error = -EAGAIN; - if (queue.prev) /* got Interrupt */ - break; - /* Everything done by update_queue */ - goto out_unlock_free; - } + sma = sem_lock(semid); + if(sma==NULL) { + if(queue.prev != NULL) + BUG(); + error = -EIDRM; + goto out_free; + } + + /* + * If queue.status != -EINTR we are woken up by another process + */ + error = queue.status; + if (queue.status != -EINTR) { + goto out_unlock_free; } + + /* + * If an interrupt occurred we have to clean up the queue + */ + if (timeout && jiffies_left == 0) + error = -EAGAIN; remove_from_queue(sma,&queue); + goto out_unlock_free; + update: if (alter) update_queue (sma); -- cgit v1.2.3 From 3abbd8ff39f3da75117a35ac50020818ff3ef7a6 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 4 Jul 2003 19:36:03 -0700 Subject: [PATCH] bring back the batch_requests function From: Nick Piggin The batch_requests function got lost during the merge of the dynamic request allocation patch. We need it for the anticipatory scheduler - when the number of threads exceeds the number of requests, the anticipated-upon task will undesirably sleep in get_request_wait(). And apparently some block devices which use small requests need it so they string a decent number together. Jens has acked this patch. --- drivers/block/ll_rw_blk.c | 34 ++++++++++++++++++++++++++++++---- include/linux/blkdev.h | 1 + 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c index 0f11567e5277..dfd489ea0234 100644 --- a/drivers/block/ll_rw_blk.c +++ b/drivers/block/ll_rw_blk.c @@ -51,6 +51,11 @@ static int queue_nr_requests; unsigned long blk_max_low_pfn, blk_max_pfn; static wait_queue_head_t congestion_wqh[2]; +static inline int batch_requests(void) +{ + return min(BLKDEV_MAX_RQ / 8, 8); +} + /* * Return the threshold (number of free requests) at which the queue is * considered to be congested. It include a little hysteresis to keep the @@ -1180,6 +1185,8 @@ static int blk_init_free_list(request_queue_t *q) struct request_list *rl = &q->rq; rl->count[READ] = rl->count[WRITE] = 0; + init_waitqueue_head(&rl->wait[READ]); + init_waitqueue_head(&rl->wait[WRITE]); rl->rq_pool = mempool_create(BLKDEV_MIN_RQ, mempool_alloc_slab, mempool_free_slab, request_cachep); @@ -1325,18 +1332,33 @@ out: } /* - * No available requests for this queue, unplug the device. + * No available requests for this queue, unplug the device and wait for some + * requests to become available. */ static struct request *get_request_wait(request_queue_t *q, int rw) { + DEFINE_WAIT(wait); struct request *rq; generic_unplug_device(q); do { rq = get_request(q, rw, GFP_NOIO); - if (!rq) - blk_congestion_wait(rw, HZ / 50); + if (!rq) { + struct request_list *rl = &q->rq; + + prepare_to_wait_exclusive(&rl->wait[rw], &wait, + TASK_UNINTERRUPTIBLE); + /* + * If _all_ the requests were suddenly returned then + * no wakeup will be delivered. So now we're on the + * waitqueue, go check for that. + */ + rq = get_request(q, rw, GFP_ATOMIC & ~__GFP_HIGH); + if (!rq) + io_schedule(); + finish_wait(&rl->wait[rw], &wait); + } } while (!rq); return rq; @@ -1498,8 +1520,12 @@ void __blk_put_request(request_queue_t *q, struct request *req) blk_free_request(q, req); rl->count[rw]--; - if ((BLKDEV_MAX_RQ - rl->count[rw]) >= queue_congestion_off_threshold()) + if ((BLKDEV_MAX_RQ - rl->count[rw]) >= + queue_congestion_off_threshold()) clear_queue_congested(q, rw); + if ((BLKDEV_MAX_RQ - rl->count[rw]) >= batch_requests() && + waitqueue_active(&rl->wait[rw])) + wake_up(&rl->wait[rw]); } } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 786ea3563752..621a5b042a9c 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -27,6 +27,7 @@ struct request_pm_state; struct request_list { int count[2]; mempool_t *rq_pool; + wait_queue_head_t wait[2]; }; /* -- cgit v1.2.3 From 33c664854c9c467f4c30fe038c2afa12cc126311 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 4 Jul 2003 19:36:09 -0700 Subject: [PATCH] Create `kblockd' workqueue keventd is inappropriate for running block request queues because keventd itself can get blocked on disk I/O. Via call_usermodehelper()'s vfork and, presumably, GFP_KERNEL allocations. So create a new gang of kernel threads whose mandate is for running low-level disk operations. It must ever block on disk IO, so any memory allocations should be GFP_NOIO. We mainly use it for running unplug operations from interrupt context. --- drivers/block/Makefile | 5 +++++ drivers/block/ll_rw_blk.c | 24 ++++++++++++++++++++++-- include/linux/blkdev.h | 4 ++++ 3 files changed, 31 insertions(+), 2 deletions(-) diff --git a/drivers/block/Makefile b/drivers/block/Makefile index c723e8ecc584..67c567bc9308 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -8,6 +8,11 @@ # In the future, some of these should be built conditionally. # +# +# NOTE that ll_rw_blk.c must come early in linkage order - it starts the +# kblockd threads +# + obj-y := elevator.o ll_rw_blk.o ioctl.o genhd.o scsi_ioctl.o deadline-iosched.o obj-$(CONFIG_MAC_FLOPPY) += swim3.o diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c index dfd489ea0234..3e68ceb9578c 100644 --- a/drivers/block/ll_rw_blk.c +++ b/drivers/block/ll_rw_blk.c @@ -48,9 +48,15 @@ static spinlock_t blk_plug_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED; */ static int queue_nr_requests; -unsigned long blk_max_low_pfn, blk_max_pfn; static wait_queue_head_t congestion_wqh[2]; +/* + * Controlling structure to kblockd + */ +static struct workqueue_struct *kblockd_workqueue; + +unsigned long blk_max_low_pfn, blk_max_pfn; + static inline int batch_requests(void) { return min(BLKDEV_MAX_RQ / 8, 8); @@ -2308,10 +2314,24 @@ void blk_rq_prep_restart(struct request *rq) rq->current_nr_sectors = rq->hard_cur_sectors; } +int kblockd_schedule_work(struct work_struct *work) +{ + return queue_work(kblockd_workqueue, work); +} + +void kblockd_flush(void) +{ + flush_workqueue(kblockd_workqueue); +} + int __init blk_dev_init(void) { int i; + kblockd_workqueue = create_workqueue("kblockd"); + if (!kblockd_workqueue) + panic("Failed to create kblockd\n"); + request_cachep = kmem_cache_create("blkdev_requests", sizeof(struct request), 0, 0, NULL, NULL); if (!request_cachep) @@ -2331,7 +2351,7 @@ int __init blk_dev_init(void) for (i = 0; i < ARRAY_SIZE(congestion_wqh); i++) init_waitqueue_head(&congestion_wqh[i]); return 0; -}; +} EXPORT_SYMBOL(process_that_request_first); EXPORT_SYMBOL(end_that_request_first); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 621a5b042a9c..e97790517973 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -561,6 +561,10 @@ static inline void put_dev_sector(Sector p) page_cache_release(p.v); } +struct work_struct; +int kblockd_schedule_work(struct work_struct *work); +void kblockd_flush(void); + #ifdef CONFIG_LBD # include # define sector_div(a, b) do_div(a, b) -- cgit v1.2.3 From 7d2483a936d9012f0a8ed9b61e191d6b827a66d2 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 4 Jul 2003 19:36:16 -0700 Subject: [PATCH] elv_may_queue() API function Introduces the elv_may_queue() predicate with which the IO scheduler may tell the generic request layer that we may add another request to this queue. It is used by the CFQ elevator. --- drivers/block/elevator.c | 10 ++++++++++ drivers/block/ll_rw_blk.c | 2 +- include/linux/elevator.h | 5 +++++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/block/elevator.c b/drivers/block/elevator.c index 00b56d022b0e..bf40a06781d5 100644 --- a/drivers/block/elevator.c +++ b/drivers/block/elevator.c @@ -361,6 +361,16 @@ void elv_put_request(request_queue_t *q, struct request *rq) e->elevator_put_req_fn(q, rq); } +int elv_may_queue(request_queue_t *q, int rw) +{ + elevator_t *e = &q->elevator; + + if (e->elevator_may_queue_fn) + return e->elevator_may_queue_fn(q, rw); + + return 1; +} + int elv_register_queue(struct gendisk *disk) { request_queue_t *q = disk->queue; diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c index 3e68ceb9578c..1debfebc2f57 100644 --- a/drivers/block/ll_rw_blk.c +++ b/drivers/block/ll_rw_blk.c @@ -1294,7 +1294,7 @@ static struct request *get_request(request_queue_t *q, int rw, int gfp_mask) struct request_list *rl = &q->rq; spin_lock_irq(q->queue_lock); - if (rl->count[rw] == BLKDEV_MAX_RQ) { + if (rl->count[rw] == BLKDEV_MAX_RQ || !elv_may_queue(q, rw)) { spin_unlock_irq(q->queue_lock); goto out; } diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 522e51609ef3..66bedb242218 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -15,6 +15,8 @@ typedef int (elevator_queue_empty_fn) (request_queue_t *); typedef void (elevator_remove_req_fn) (request_queue_t *, struct request *); typedef struct request *(elevator_request_list_fn) (request_queue_t *, struct request *); typedef struct list_head *(elevator_get_sort_head_fn) (request_queue_t *, struct request *); +typedef int (elevator_may_queue_fn) (request_queue_t *, int); + typedef int (elevator_set_req_fn) (request_queue_t *, struct request *, int); typedef void (elevator_put_req_fn) (request_queue_t *, struct request *); @@ -39,6 +41,8 @@ struct elevator_s elevator_set_req_fn *elevator_set_req_fn; elevator_put_req_fn *elevator_put_req_fn; + elevator_may_queue_fn *elevator_may_queue_fn; + elevator_init_fn *elevator_init_fn; elevator_exit_fn *elevator_exit_fn; @@ -64,6 +68,7 @@ extern struct request *elv_former_request(request_queue_t *, struct request *); extern struct request *elv_latter_request(request_queue_t *, struct request *); extern int elv_register_queue(struct gendisk *); extern void elv_unregister_queue(struct gendisk *); +extern int elv_may_queue(request_queue_t *, int); extern int elv_set_request(request_queue_t *, struct request *, int); extern void elv_put_request(request_queue_t *, struct request *); -- cgit v1.2.3 From 104e6fdc6f35ea08e1c6ed03158b336b2e9983ed Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 4 Jul 2003 19:36:23 -0700 Subject: [PATCH] elevator completion API From: Nick Piggin Introduces an elevator_completed_req() callback with which the generic queueing layer may tell an IO scheduler that a particualr request has finished. --- drivers/block/elevator.c | 9 +++++++++ drivers/block/ll_rw_blk.c | 2 ++ include/linux/elevator.h | 3 +++ 3 files changed, 14 insertions(+) diff --git a/drivers/block/elevator.c b/drivers/block/elevator.c index bf40a06781d5..406755724e03 100644 --- a/drivers/block/elevator.c +++ b/drivers/block/elevator.c @@ -371,6 +371,14 @@ int elv_may_queue(request_queue_t *q, int rw) return 1; } +void elv_completed_request(request_queue_t *q, struct request *rq) +{ + elevator_t *e = &q->elevator; + + if (e->elevator_completed_req_fn) + e->elevator_completed_req_fn(q, rq); +} + int elv_register_queue(struct gendisk *disk) { request_queue_t *q = disk->queue; @@ -418,5 +426,6 @@ EXPORT_SYMBOL(__elv_add_request); EXPORT_SYMBOL(elv_next_request); EXPORT_SYMBOL(elv_remove_request); EXPORT_SYMBOL(elv_queue_empty); +EXPORT_SYMBOL(elv_completed_request); EXPORT_SYMBOL(elevator_exit); EXPORT_SYMBOL(elevator_init); diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c index 1debfebc2f57..71750da0db6d 100644 --- a/drivers/block/ll_rw_blk.c +++ b/drivers/block/ll_rw_blk.c @@ -1510,6 +1510,8 @@ void __blk_put_request(request_queue_t *q, struct request *req) if (unlikely(--req->ref_count)) return; + elv_completed_request(req->q, req); + req->rq_status = RQ_INACTIVE; req->q = NULL; req->rl = NULL; diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 66bedb242218..07de69c1ef8a 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -15,6 +15,7 @@ typedef int (elevator_queue_empty_fn) (request_queue_t *); typedef void (elevator_remove_req_fn) (request_queue_t *, struct request *); typedef struct request *(elevator_request_list_fn) (request_queue_t *, struct request *); typedef struct list_head *(elevator_get_sort_head_fn) (request_queue_t *, struct request *); +typedef void (elevator_completed_req_fn) (request_queue_t *, struct request *); typedef int (elevator_may_queue_fn) (request_queue_t *, int); typedef int (elevator_set_req_fn) (request_queue_t *, struct request *, int); @@ -34,6 +35,7 @@ struct elevator_s elevator_remove_req_fn *elevator_remove_req_fn; elevator_queue_empty_fn *elevator_queue_empty_fn; + elevator_completed_req_fn *elevator_completed_req_fn; elevator_request_list_fn *elevator_former_req_fn; elevator_request_list_fn *elevator_latter_req_fn; @@ -69,6 +71,7 @@ extern struct request *elv_latter_request(request_queue_t *, struct request *); extern int elv_register_queue(struct gendisk *); extern void elv_unregister_queue(struct gendisk *); extern int elv_may_queue(request_queue_t *, int); +extern void elv_completed_request(request_queue_t *, struct request *); extern int elv_set_request(request_queue_t *, struct request *, int); extern void elv_put_request(request_queue_t *, struct request *); -- cgit v1.2.3 From 97ff29c22ec3df25621561194692e7e945fcf489 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 4 Jul 2003 19:36:30 -0700 Subject: [PATCH] anticipatory I/O scheduler From: Nick Piggin This is the core anticipatory IO scheduler. There are nearly 100 changesets in this and five months work. I really cannot describe it fully here. Major points: - It works by recognising that reads are dependent: we don't know where the next read will occur, but it's probably close-by the previous one. So once a read has completed we leave the disk idle, anticipating that a request for a nearby read will come in. - There is read batching and write batching logic. - when we're servicing a batch of writes we will refuse to seek away for a read for some tens of milliseconds. Then the write stream is preempted. - when we're servicing a batch of reads (via anticipation) we'll do that for some tens of milliseconds, then preempt. - There are request deadlines, for latency and fairness. The oldest outstanding request is examined at regular intervals. If this request is older than a specific deadline, it will be the next one dispatched. This gives a good fairness heuristic while being simple because processes tend to have localised IO. Just about all of the rest of the complexity involves an array of fixups which prevent most of teh obvious failure modes with anticipation: trying to not leave the disk head pointlessly idle. Some of these algorithms are: - Process tracking. If the process whose read we are anticipating submits a write, abandon anticipation. - Process exit tracking. If the process whose read we are anticipating exits, abandon anticipation. - Process IO history. We accumulate statistical info on the process's recent IO patterns to aid in making decisions about how long to anticipate new reads. Currently thinktime and seek distance are tracked. Thinktime is the time between when a process's last request has completed and when it submits another one. Seek distance is simply the number of sectors between each read request. If either statistic becomes too high, the it isn't anticipated that the process will submit another read. The above all means that we need a per-process "io context". This is a fully refcounted structure. In this patch it is AS-only. later we generalise it a little so other IO schedulers could use the same framework. - Requests are grouped as synchronous and asynchronous whereas deadline scheduler groups requests as reads and writes. This can provide better sync write performance, and may give better responsiveness with journalling filesystems (although we haven't done that yet). We currently detect synchronous writes by nastily setting PF_SYNCWRITE in current->flags. The plan is to remove this later, and to propagate the sync hint from writeback_contol.sync_mode into bio->bi_flags thence into request->flags. Once that is done, direct-io needs to set the BIO sync hint as well. - There is also quite a bit of complexity gone into bashing TCQ into submission. Timing for a read batch is not started until the first read request actually completes. A read batch also does not start until all outstanding writes have completed. AS is the default IO scheduler. deadline may be chosen by booting with "elevator=deadline". There are a few reasons for retaining deadline: - AS is often slower than deadline in random IO loads with large TCQ windows. The usual real world task here is OLTP database loads. - deadline is presumably more stable. - deadline is much simpler. The tunable per-queue entries under /sys/block/*/iosched/ are all in milliseconds: * read_expire Controls how long until a request becomes "expired". It also controls the interval between which expired requests are served, so set to 50, a request might take anywhere < 100ms to be serviced _if_ it is the next on the expired list. Obviously it can't make the disk go faster. Result is basically the timeslice a reader gets in the presence of other IO. 100*((seek time / read_expire) + 1) is very roughly the % streaming read efficiency your disk should get in the presence of multiple readers. * read_batch_expire Controls how much time a batch of reads is given before pending writes are served. Higher value is more efficient. Shouldn't really be below read_expire. * write_ versions of the above * antic_expire Controls the maximum amount of time we can anticipate a good read before giving up. Many other factors may cause anticipation to be stopped early, or some processes will not be "anticipated" at all. Should be a bit higher for big seek time devices though not a linear correspondance - most processes have only a few ms thinktime. --- drivers/block/Makefile | 3 +- drivers/block/as-iosched.c | 1888 ++++++++++++++++++++++++++++++++++++++++++++ drivers/block/ll_rw_blk.c | 25 +- fs/buffer.c | 4 + fs/fs-writeback.c | 2 + include/linux/elevator.h | 5 + include/linux/sched.h | 5 + kernel/exit.c | 2 + kernel/fork.c | 1 + 9 files changed, 1932 insertions(+), 3 deletions(-) create mode 100644 drivers/block/as-iosched.c diff --git a/drivers/block/Makefile b/drivers/block/Makefile index 67c567bc9308..4733ec79a6d0 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -13,7 +13,8 @@ # kblockd threads # -obj-y := elevator.o ll_rw_blk.o ioctl.o genhd.o scsi_ioctl.o deadline-iosched.o +obj-y := elevator.o ll_rw_blk.o ioctl.o genhd.o scsi_ioctl.o \ + deadline-iosched.o as-iosched.o obj-$(CONFIG_MAC_FLOPPY) += swim3.o obj-$(CONFIG_BLK_DEV_FD) += floppy.o diff --git a/drivers/block/as-iosched.c b/drivers/block/as-iosched.c new file mode 100644 index 000000000000..e6af1f822630 --- /dev/null +++ b/drivers/block/as-iosched.c @@ -0,0 +1,1888 @@ +/* + * linux/drivers/block/as-iosched.c + * + * Anticipatory & deadline i/o scheduler. + * + * Copyright (C) 2002 Jens Axboe + * Nick Piggin + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define REQ_SYNC 1 +#define REQ_ASYNC 0 + +/* + * See Documentation/as-iosched.txt + */ + +/* + * max time before a read is submitted. + */ +#define default_read_expire (HZ / 20) + +/* + * ditto for writes, these limits are not hard, even + * if the disk is capable of satisfying them. + */ +#define default_write_expire (HZ / 5) + +/* + * read_batch_expire describes how long we will allow a stream of reads to + * persist before looking to see whether it is time to switch over to writes. + */ +#define default_read_batch_expire (HZ / 5) + +/* + * write_batch_expire describes how long we want a stream of writes to run for. + * This is not a hard limit, but a target we set for the auto-tuning thingy. + * See, the problem is: we can send a lot of writes to disk cache / TCQ in + * a short amount of time... + */ +#define default_write_batch_expire (HZ / 20) + +/* + * max time we may wait to anticipate a read (default around 6ms) + */ +#define default_antic_expire ((HZ / 150) ? HZ / 150 : 1) + +/* + * This is the per-process anticipatory I/O scheduler state. It is refcounted + * and kmalloc'ed. + * + * There is no locking protecting the contents of this structure! Pointers + * to a single as_io_context may appear in multiple queues at once. + */ + +/* + * Keep track of up to 20ms thinktimes. We can go as big as we like here, + * however huge values tend to interfere and not decay fast enough. A program + * might be in a non-io phase of operation. Waiting on user input for example, + * or doing a lengthy computation. A small penalty can be justified there, and + * will still catch out those processes that constantly have large thinktimes. + */ +#define MAX_THINKTIME (HZ/50UL) + +/* Bits in as_io_context.state */ +enum as_io_states { + AS_TASK_RUNNING=0, /* Process has not exitted */ + AS_TASK_IORUNNING, /* Process has completed some IO */ +}; + +struct as_io_context { + atomic_t refcount; + pid_t pid; + unsigned long state; + atomic_t nr_queued; /* queued reads & sync writes */ + atomic_t nr_dispatched; /* number of requests gone to the drivers */ + + spinlock_t lock; + + /* IO History tracking */ + /* Thinktime */ + unsigned long last_end_request; + unsigned long ttime_total; + unsigned long ttime_samples; + unsigned long ttime_mean; + /* Layout pattern */ + long seek_samples; + sector_t last_request_pos; + sector_t seek_total; + sector_t seek_mean; +}; + +enum anticipation_status { + ANTIC_OFF=0, /* Not anticipating (normal operation) */ + ANTIC_WAIT_REQ, /* The last read has not yet completed */ + ANTIC_WAIT_NEXT, /* Currently anticipating a request vs + last read (which has completed) */ + ANTIC_FINISHED, /* Anticipating but have found a candidate + * or timed out */ +}; + +struct as_data { + /* + * run time data + */ + + struct request_queue *q; /* the "owner" queue */ + + /* + * requests (as_rq s) are present on both sort_list and fifo_list + */ + struct rb_root sort_list[2]; + struct list_head fifo_list[2]; + + struct as_rq *next_arq[2]; /* next in sort order */ + sector_t last_sector[2]; /* last REQ_SYNC & REQ_ASYNC sectors */ + struct list_head *dispatch; /* driver dispatch queue */ + struct list_head *hash; /* request hash */ + unsigned long hash_valid_count; /* barrier hash count */ + unsigned long current_batch_expires; + unsigned long last_check_fifo[2]; + int changed_batch; + int batch_data_dir; /* current batch REQ_SYNC / REQ_ASYNC */ + int write_batch_count; /* max # of reqs in a write batch */ + int current_write_count; /* how many requests left this batch */ + int write_batch_idled; /* has the write batch gone idle? */ + mempool_t *arq_pool; + + enum anticipation_status antic_status; + unsigned long antic_start; /* jiffies: when it started */ + struct timer_list antic_timer; /* anticipatory scheduling timer */ + struct work_struct antic_work; /* Deferred unplugging */ + struct as_io_context *as_io_context;/* Identify the expected process */ + int aic_finished; /* IO associated with as_io_context finished */ + int nr_dispatched; + + /* + * settings that change how the i/o scheduler behaves + */ + unsigned long fifo_expire[2]; + unsigned long batch_expire[2]; + unsigned long antic_expire; +}; + +#define list_entry_fifo(ptr) list_entry((ptr), struct as_rq, fifo) + +/* + * per-request data. + */ +enum arq_state { + AS_RQ_NEW=0, /* New - not referenced and not on any lists */ + AS_RQ_QUEUED, /* In the request queue. It belongs to the + scheduler */ + AS_RQ_DISPATCHED, /* On the dispatch list. It belongs to the + driver now */ +}; + +struct as_rq { + /* + * rbtree index, key is the starting offset + */ + struct rb_node rb_node; + sector_t rb_key; + + struct request *request; + + struct as_io_context *as_io_context; /* The submitting task */ + + /* + * request hash, key is the ending offset (for back merge lookup) + */ + struct list_head hash; + unsigned long hash_valid_count; + + /* + * expire fifo + */ + struct list_head fifo; + unsigned long expires; + + int is_sync; + enum arq_state state; /* debug only */ +}; + +#define RQ_DATA(rq) ((struct as_rq *) (rq)->elevator_private) + +static kmem_cache_t *arq_pool; + +/* + * IO Context helper functions + */ +/* Debug */ +static atomic_t nr_as_io_requests = ATOMIC_INIT(0); + +static void put_as_io_context(struct as_io_context **paic) +{ + struct as_io_context *aic = *paic; + + if (aic == NULL) + return; + + BUG_ON(atomic_read(&aic->refcount) == 0); + + if (atomic_dec_and_test(&aic->refcount)) { + WARN_ON(atomic_read(&nr_as_io_requests) == 0); + atomic_dec(&nr_as_io_requests); + kfree(aic); + } +} + +/* Called by the exitting task */ +void exit_as_io_context(void) +{ + unsigned long flags; + struct as_io_context *aic; + + local_irq_save(flags); + aic = current->as_io_context; + if (aic) { + clear_bit(AS_TASK_RUNNING, &aic->state); + put_as_io_context(&aic); + current->as_io_context = NULL; + } + local_irq_restore(flags); +} + +/* + * If the current task has no IO context then create one and initialise it. + * If it does have a context, take a ref on it. + * + * This is always called in the context of the task which submitted the I/O. + * But weird things happen, so we disable local interrupts to ensure exclusive + * access to *current. + */ +static struct as_io_context *get_as_io_context(void) +{ + struct task_struct *tsk = current; + unsigned long flags; + struct as_io_context *ret; + + local_irq_save(flags); + ret = tsk->as_io_context; + if (ret == NULL) { + ret = kmalloc(sizeof(*ret), GFP_ATOMIC); + if (ret) { + atomic_inc(&nr_as_io_requests); + atomic_set(&ret->refcount, 1); + ret->pid = tsk->pid; + ret->state = 1 << AS_TASK_RUNNING; + atomic_set(&ret->nr_queued, 0); + atomic_set(&ret->nr_dispatched, 0); + spin_lock_init(&ret->lock); + ret->ttime_total = 0; + ret->ttime_samples = 0; + ret->ttime_mean = 0; + ret->seek_total = 0; + ret->seek_samples = 0; + ret->seek_mean = 0; + tsk->as_io_context = ret; + } + } + local_irq_restore(flags); + atomic_inc(&ret->refcount); + return ret; +} + +static void +copy_as_io_context(struct as_io_context **pdst, struct as_io_context **psrc) +{ + struct as_io_context *src = *psrc; + + if (src) { + BUG_ON(atomic_read(&src->refcount) == 0); + atomic_inc(&src->refcount); + put_as_io_context(pdst); + *pdst = src; + } +} + +static void +swap_as_io_context(struct as_io_context **aic1, struct as_io_context **aic2) +{ + struct as_io_context *temp; + temp = *aic1; + *aic1 = *aic2; + *aic2 = temp; +} + +/* + * the back merge hash support functions + */ +static const int as_hash_shift = 6; +#define AS_HASH_BLOCK(sec) ((sec) >> 3) +#define AS_HASH_FN(sec) (hash_long(AS_HASH_BLOCK((sec)), as_hash_shift)) +#define AS_HASH_ENTRIES (1 << as_hash_shift) +#define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors) +#define list_entry_hash(ptr) list_entry((ptr), struct as_rq, hash) +#define ON_HASH(arq) (arq)->hash_valid_count + +#define AS_INVALIDATE_HASH(ad) \ + do { \ + if (!++(ad)->hash_valid_count) \ + (ad)->hash_valid_count = 1; \ + } while (0) + +static inline void __as_del_arq_hash(struct as_rq *arq) +{ + arq->hash_valid_count = 0; + list_del_init(&arq->hash); +} + +static inline void as_del_arq_hash(struct as_rq *arq) +{ + if (ON_HASH(arq)) + __as_del_arq_hash(arq); +} + +static void as_remove_merge_hints(request_queue_t *q, struct as_rq *arq) +{ + as_del_arq_hash(arq); + + if (q->last_merge == &arq->request->queuelist) + q->last_merge = NULL; +} + +static void as_add_arq_hash(struct as_data *ad, struct as_rq *arq) +{ + struct request *rq = arq->request; + + BUG_ON(ON_HASH(arq)); + + arq->hash_valid_count = ad->hash_valid_count; + list_add(&arq->hash, &ad->hash[AS_HASH_FN(rq_hash_key(rq))]); +} + +/* + * move hot entry to front of chain + */ +static inline void as_hot_arq_hash(struct as_data *ad, struct as_rq *arq) +{ + struct request *rq = arq->request; + struct list_head *head = &ad->hash[AS_HASH_FN(rq_hash_key(rq))]; + + if (!ON_HASH(arq)) { + WARN_ON(1); + return; + } + + if (arq->hash.prev != head) { + list_del(&arq->hash); + list_add(&arq->hash, head); + } +} + +static struct request *as_find_arq_hash(struct as_data *ad, sector_t offset) +{ + struct list_head *hash_list = &ad->hash[AS_HASH_FN(offset)]; + struct list_head *entry, *next = hash_list->next; + + while ((entry = next) != hash_list) { + struct as_rq *arq = list_entry_hash(entry); + struct request *__rq = arq->request; + + next = entry->next; + + BUG_ON(!ON_HASH(arq)); + + if (!rq_mergeable(__rq) + || arq->hash_valid_count != ad->hash_valid_count) { + __as_del_arq_hash(arq); + continue; + } + + if (rq_hash_key(__rq) == offset) + return __rq; + } + + return NULL; +} + +/* + * rb tree support functions + */ +#define RB_NONE (2) +#define RB_EMPTY(root) ((root)->rb_node == NULL) +#define ON_RB(node) ((node)->rb_color != RB_NONE) +#define RB_CLEAR(node) ((node)->rb_color = RB_NONE) +#define rb_entry_arq(node) rb_entry((node), struct as_rq, rb_node) +#define ARQ_RB_ROOT(ad, arq) (&(ad)->sort_list[(arq)->is_sync]) +#define rq_rb_key(rq) (rq)->sector + +/* + * as_find_first_arq finds the first (lowest sector numbered) request + * for the specified data_dir. Used to sweep back to the start of the disk + * (1-way elevator) after we process the last (highest sector) request. + */ +static struct as_rq *as_find_first_arq(struct as_data *ad, int data_dir) +{ + struct rb_node *n = ad->sort_list[data_dir].rb_node; + + if (n == NULL) + return NULL; + + for (;;) { + if (n->rb_left == NULL) + return rb_entry_arq(n); + + n = n->rb_left; + } +} + +static struct as_rq *__as_add_arq_rb(struct as_data *ad, struct as_rq *arq) +{ + struct rb_node **p = &ARQ_RB_ROOT(ad, arq)->rb_node; + struct rb_node *parent = NULL; + struct as_rq *__arq; + + while (*p) { + parent = *p; + __arq = rb_entry_arq(parent); + + if (arq->rb_key < __arq->rb_key) + p = &(*p)->rb_left; + else if (arq->rb_key > __arq->rb_key) + p = &(*p)->rb_right; + else + return __arq; + } + + rb_link_node(&arq->rb_node, parent, p); + return 0; +} + +static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq); +/* + * Add the request to the rb tree if it is unique. If there is an alias (an + * existing request against the same sector), which can happen when using + * direct IO, then move the alias to the dispatch list and then add the + * request. + */ +static void as_add_arq_rb(struct as_data *ad, struct as_rq *arq) +{ + struct as_rq *alias; + struct request *rq = arq->request; + + arq->rb_key = rq_rb_key(rq); + + /* This can be caused by direct IO */ + while ((alias = __as_add_arq_rb(ad, arq))) + as_move_to_dispatch(ad, alias); + + rb_insert_color(&arq->rb_node, ARQ_RB_ROOT(ad, arq)); +} + +static inline void as_del_arq_rb(struct as_data *ad, struct as_rq *arq) +{ + if (!ON_RB(&arq->rb_node)) { + WARN_ON(1); + return; + } + + rb_erase(&arq->rb_node, ARQ_RB_ROOT(ad, arq)); + RB_CLEAR(&arq->rb_node); +} + +static struct request * +as_find_arq_rb(struct as_data *ad, sector_t sector, int data_dir) +{ + struct rb_node *n = ad->sort_list[data_dir].rb_node; + struct as_rq *arq; + + while (n) { + arq = rb_entry_arq(n); + + if (sector < arq->rb_key) + n = n->rb_left; + else if (sector > arq->rb_key) + n = n->rb_right; + else + return arq->request; + } + + return NULL; +} + +/* + * IO Scheduler proper + */ + +#define MAXBACK (1024 * 1024) /* + * Maximum distance the disk will go backward + * for a request. + */ + +/* + * as_choose_req selects the preferred one of two requests of the same data_dir + * ignoring time - eg. timeouts, which is the job of as_dispatch_request + */ +static struct as_rq * +as_choose_req(struct as_data *ad, struct as_rq *arq1, struct as_rq *arq2) +{ + int data_dir; + sector_t last, s1, s2, d1, d2; + int r1_wrap=0, r2_wrap=0; /* requests are behind the disk head */ + const sector_t maxback = MAXBACK; + + if (arq1 == NULL || arq1 == arq2) + return arq2; + if (arq2 == NULL) + return arq1; + + data_dir = arq1->is_sync; + + last = ad->last_sector[data_dir]; + s1 = arq1->request->sector; + s2 = arq2->request->sector; + + BUG_ON(data_dir != arq2->is_sync); + + /* + * Strict one way elevator _except_ in the case where we allow + * short backward seeks which are biased as twice the cost of a + * similar forward seek. + */ + if (s1 >= last) + d1 = s1 - last; + else if (s1+maxback >= last) + d1 = (last - s1)*2; + else { + r1_wrap = 1; + d1 = 0; /* shut up, gcc */ + } + + if (s2 >= last) + d2 = s2 - last; + else if (s2+maxback >= last) + d2 = (last - s2)*2; + else { + r2_wrap = 1; + d2 = 0; + } + + /* Found required data */ + if (!r1_wrap && r2_wrap) + return arq1; + else if (!r2_wrap && r1_wrap) + return arq2; + else if (r1_wrap && r2_wrap) { + /* both behind the head */ + if (s1 <= s2) + return arq1; + else + return arq2; + } + + /* Both requests in front of the head */ + if (d1 < d2) + return arq1; + else if (d2 < d1) + return arq2; + else { + if (s1 >= s2) + return arq1; + else + return arq2; + } +} + +/* + * as_find_next_arq finds the next request after @prev in elevator order. + * this with as_choose_req form the basis for how the scheduler chooses + * what request to process next. Anticipation works on top of this. + */ +static struct as_rq *as_find_next_arq(struct as_data *ad, struct as_rq *last) +{ + const int data_dir = last->is_sync; + struct as_rq *ret; + struct rb_node *rbnext = rb_next(&last->rb_node); + struct rb_node *rbprev = rb_prev(&last->rb_node); + struct as_rq *arq_next, *arq_prev; + + BUG_ON(!ON_RB(&last->rb_node)); + + if (rbprev) + arq_prev = rb_entry_arq(rbprev); + else + arq_prev = NULL; + + if (rbnext) + arq_next = rb_entry_arq(rbnext); + else { + arq_next = as_find_first_arq(ad, data_dir); + if (arq_next == last) + arq_next = NULL; + } + + ret = as_choose_req(ad, arq_next, arq_prev); + + return ret; +} + +/* + * anticipatory scheduling functions follow + */ + +/* + * as_antic_expired tells us when we have anticipated too long. + * The funny "absolute difference" math on the elapsed time is to handle + * jiffy wraps, and disks which have been idle for 0x80000000 jiffies. + */ +static int as_antic_expired(struct as_data *ad) +{ + long delta_jif; + + delta_jif = jiffies - ad->antic_start; + if (unlikely(delta_jif < 0)) + delta_jif = -delta_jif; + if (delta_jif < ad->antic_expire) + return 0; + + return 1; +} + +/* + * as_antic_waitnext starts anticipating that a nice request will soon be + * submitted. See also as_antic_waitreq + */ +static void as_antic_waitnext(struct as_data *ad) +{ + unsigned long timeout; + + BUG_ON(ad->antic_status != ANTIC_OFF + && ad->antic_status != ANTIC_WAIT_REQ); + + timeout = ad->antic_start + ad->antic_expire; + + mod_timer(&ad->antic_timer, timeout); + + ad->antic_status = ANTIC_WAIT_NEXT; +} + +/* + * as_antic_waitreq starts anticipating. We don't start timing the anticipation + * until the request that we're anticipating on has finished. This means we + * are timing from when the candidate process wakes up hopefully. + */ +static void as_antic_waitreq(struct as_data *ad) +{ + BUG_ON(ad->antic_status == ANTIC_FINISHED); + if (ad->antic_status == ANTIC_OFF) { + if (!ad->as_io_context || ad->aic_finished) + as_antic_waitnext(ad); + else + ad->antic_status = ANTIC_WAIT_REQ; + } +} + +/* + * This is called directly by the functions in this file to stop anticipation. + * We kill the timer and schedule a call to the request_fn asap. + */ +static void as_antic_stop(struct as_data *ad) +{ + int status = ad->antic_status; + + if (status == ANTIC_WAIT_REQ || status == ANTIC_WAIT_NEXT) { + if (status == ANTIC_WAIT_NEXT) + del_timer(&ad->antic_timer); + ad->antic_status = ANTIC_FINISHED; + /* see as_work_handler */ + kblockd_schedule_work(&ad->antic_work); + } +} + +/* + * as_antic_timeout is the timer function set by as_antic_waitnext. + */ +static void as_antic_timeout(unsigned long data) +{ + struct request_queue *q = (struct request_queue *)data; + struct as_data *ad = q->elevator.elevator_data; + unsigned long flags; + + spin_lock_irqsave(q->queue_lock, flags); + if (ad->antic_status == ANTIC_WAIT_REQ + || ad->antic_status == ANTIC_WAIT_NEXT) { + ad->antic_status = ANTIC_FINISHED; + kblockd_schedule_work(&ad->antic_work); + } + spin_unlock_irqrestore(q->queue_lock, flags); +} + +/* + * as_close_req decides if one request is considered "close" to the + * previous one issued. + */ +static int as_close_req(struct as_data *ad, struct as_rq *arq) +{ + unsigned long delay; /* milliseconds */ + sector_t last = ad->last_sector[ad->batch_data_dir]; + sector_t next = arq->request->sector; + sector_t delta; /* acceptable close offset (in sectors) */ + + if (ad->antic_status == ANTIC_OFF || !ad->aic_finished) + delay = 0; + else + delay = ((jiffies - ad->antic_start) * 1000) / HZ; + + if (delay <= 1) + delta = 64; + else if (delay <= 20 && delay <= ad->antic_expire) + delta = 64 << (delay-1); + else + return 1; + + return (last - (delta>>1) <= next) && (next <= last + delta); +} + +/* + * as_can_break_anticipation returns true if we have been anticipating this + * request. + * + * It also returns true if the process against which we are anticipating + * submits a write - that's presumably an fsync, O_SYNC write, etc. We want to + * dispatch it ASAP, because we know that application will not be submitting + * any new reads. + * + * If the task which has submitted the request has exitted, break anticipation. + * + * If this task has queued some other IO, do not enter enticipation. + */ +static int as_can_break_anticipation(struct as_data *ad, struct as_rq *arq) +{ + struct as_io_context *aic; + + if (arq && arq->is_sync == REQ_SYNC && as_close_req(ad, arq)) { + /* close request */ + return 1; + } + + if (ad->aic_finished && as_antic_expired(ad)) { + /* + * In this situation status should really be FINISHED, + * however the timer hasn't had the chance to run yet. + */ + return 1; + } + + aic = ad->as_io_context; + BUG_ON(!aic); + + if (arq && aic == arq->as_io_context) { + /* request from same process */ + return 1; + } + + if (!test_bit(AS_TASK_RUNNING, &aic->state)) { + /* process anticipated on has exitted */ + return 1; + } + + if (atomic_read(&aic->nr_queued) > 0) { + /* process has more requests queued */ + return 1; + } + + if (atomic_read(&aic->nr_dispatched) > 0) { + /* process has more requests dispatched */ + return 1; + } + + if (aic->ttime_mean > ad->antic_expire) { + /* the process thinks too much between requests */ + return 1; + } + + if (arq && aic->seek_samples) { + sector_t s; + if (ad->last_sector[REQ_SYNC] < arq->request->sector) + s = arq->request->sector - ad->last_sector[REQ_SYNC]; + else + s = ad->last_sector[REQ_SYNC] - arq->request->sector; + + if (aic->seek_mean > (s>>1)) { + /* this request is better than what we're expecting */ + return 1; + } + } + + return 0; +} + +/* + * as_can_anticipate indicates weather we should either run arq + * or keep anticipating a better request. + */ +static int as_can_anticipate(struct as_data *ad, struct as_rq *arq) +{ + if (!ad->as_io_context) + /* + * Last request submitted was a write + */ + return 0; + + if (ad->antic_status == ANTIC_FINISHED) + /* + * Don't restart if we have just finished. Run the next request + */ + return 0; + + if (as_can_break_anticipation(ad, arq)) + /* + * This request is a good candidate. Don't keep anticipating, + * run it. + */ + return 0; + + /* + * OK from here, we haven't finished, and don't have a decent request! + * Status is either ANTIC_OFF so start waiting, + * ANTIC_WAIT_REQ so continue waiting for request to finish + * or ANTIC_WAIT_NEXT so continue waiting for an acceptable request. + * + */ + + return 1; +} + +/* + * as_update_iohist keeps a decaying histogram of IO thinktimes, and + * updates @aic->ttime_mean based on that. It is called when a new + * request is queued. + */ +static void as_update_iohist(struct as_io_context *aic, struct request *rq) +{ + struct as_rq *arq = RQ_DATA(rq); + int data_dir = arq->is_sync; + unsigned long thinktime; + sector_t seek_dist; + + if (aic == NULL) + return; + + if (data_dir == REQ_SYNC) { + spin_lock(&aic->lock); + + if (test_bit(AS_TASK_IORUNNING, &aic->state) + && !atomic_read(&aic->nr_queued) + && !atomic_read(&aic->nr_dispatched)) { + /* Calculate read -> read thinktime */ + thinktime = jiffies - aic->last_end_request; + thinktime = min(thinktime, MAX_THINKTIME-1); + /* fixed point: 1.0 == 1<<8 */ + aic->ttime_samples += 256; + aic->ttime_total += 256*thinktime; + if (aic->ttime_samples) + /* fixed point factor is cancelled here */ + aic->ttime_mean = (aic->ttime_total + 128) + / aic->ttime_samples; + aic->ttime_samples = (aic->ttime_samples>>1) + + (aic->ttime_samples>>2); + aic->ttime_total = (aic->ttime_total>>1) + + (aic->ttime_total>>2); + } + + /* Calculate read -> read seek distance */ + if (!aic->seek_samples) + seek_dist = 0; + else if (aic->last_request_pos < rq->sector) + seek_dist = rq->sector - aic->last_request_pos; + else + seek_dist = aic->last_request_pos - rq->sector; + + aic->last_request_pos = rq->sector + rq->nr_sectors; + + /* + * Don't allow the seek distance to get too large from the + * odd fragment, pagein, etc + */ + if (aic->seek_samples < 400) /* second&third seek */ + seek_dist = min(seek_dist, (aic->seek_mean * 4) + + 2*1024*1024); + else + seek_dist = min(seek_dist, (aic->seek_mean * 4) + + 2*1024*64); + + aic->seek_samples += 256; + aic->seek_total += 256*seek_dist; + if (aic->seek_samples) { + aic->seek_mean = aic->seek_total + 128; + do_div(aic->seek_mean, aic->seek_samples); + } + aic->seek_samples = (aic->seek_samples>>1) + + (aic->seek_samples>>2); + aic->seek_total = (aic->seek_total>>1) + + (aic->seek_total>>2); + + spin_unlock(&aic->lock); + } +} + +/* + * as_update_arq must be called whenever a request (arq) is added to + * the sort_list. This function keeps caches up to date, and checks if the + * request might be one we are "anticipating" + */ +static void as_update_arq(struct as_data *ad, struct as_rq *arq) +{ + const int data_dir = arq->is_sync; + + /* keep the next_arq cache up to date */ + ad->next_arq[data_dir] = as_choose_req(ad, arq, ad->next_arq[data_dir]); + + /* + * have we been anticipating this request? + * or does it come from the same process as the one we are anticipating + * for? + */ + if (ad->antic_status == ANTIC_WAIT_REQ + || ad->antic_status == ANTIC_WAIT_NEXT) { + if (as_can_break_anticipation(ad, arq)) + as_antic_stop(ad); + } +} + +/* + * Gathers timings and resizes the write batch automatically + */ +void update_write_batch(struct as_data *ad) +{ + unsigned long batch = ad->batch_expire[REQ_ASYNC]; + long write_time; + + write_time = (jiffies - ad->current_batch_expires) + batch; + if (write_time < 0) + write_time = 0; + + if (write_time > batch && !ad->write_batch_idled) { + if (write_time > batch * 3) + ad->write_batch_count /= 2; + else + ad->write_batch_count--; + } else if (write_time < batch && ad->current_write_count == 0) { + if (batch > write_time * 3) + ad->write_batch_count *= 2; + else + ad->write_batch_count++; + } + + if (ad->write_batch_count < 1) + ad->write_batch_count = 1; +} + +/* + * as_completed_request is to be called when a request has completed and + * returned something to the requesting process, be it an error or data. + */ +static void as_completed_request(request_queue_t *q, struct request *rq) +{ + struct as_data *ad = q->elevator.elevator_data; + struct as_rq *arq = RQ_DATA(rq); + struct as_io_context *aic = arq->as_io_context; + + if (unlikely(!blk_fs_request(rq))) { + WARN_ON(aic); + return; + } + + WARN_ON(blk_fs_request(rq) && arq->state == AS_RQ_NEW); + + if (arq->state != AS_RQ_DISPATCHED) + return; + + if (ad->changed_batch && ad->nr_dispatched == 1) { + kblockd_schedule_work(&ad->antic_work); + ad->changed_batch = 2; + } + ad->nr_dispatched--; + + /* + * Start counting the batch from when a request of that direction is + * actually serviced. This should help devices with big TCQ windows + * and writeback caches + */ + if (ad->batch_data_dir == REQ_SYNC && ad->changed_batch + && ad->batch_data_dir == arq->is_sync) { + update_write_batch(ad); + ad->current_batch_expires = jiffies + + ad->batch_expire[REQ_SYNC]; + ad->changed_batch = 0; + } + + if (!aic) + return; + + spin_lock(&aic->lock); + if (arq->is_sync == REQ_SYNC) { + set_bit(AS_TASK_IORUNNING, &aic->state); + aic->last_end_request = jiffies; + } + + if (ad->as_io_context == aic) { + ad->antic_start = jiffies; + ad->aic_finished = 1; + if (ad->antic_status == ANTIC_WAIT_REQ) { + /* + * We were waiting on this request, now anticipate + * the next one + */ + as_antic_waitnext(ad); + } + } + spin_unlock(&aic->lock); + + put_as_io_context(&arq->as_io_context); +} + +/* + * as_remove_queued_request removes a request from the pre dispatch queue + * without updating refcounts. It is expected the caller will drop the + * reference unless it replaces the request at somepart of the elevator + * (ie. the dispatch queue) + */ +static void as_remove_queued_request(request_queue_t *q, struct request *rq) +{ + struct as_rq *arq = RQ_DATA(rq); + + if (!arq) + BUG(); + else { + const int data_dir = arq->is_sync; + struct as_data *ad = q->elevator.elevator_data; + + WARN_ON(arq->state != AS_RQ_QUEUED); + + if (arq->as_io_context) { + BUG_ON(!atomic_read(&arq->as_io_context->nr_queued)); + atomic_dec(&arq->as_io_context->nr_queued); + } + + /* + * Update the "next_arq" cache if we are about to remove its + * entry + */ + if (ad->next_arq[data_dir] == arq) + ad->next_arq[data_dir] = as_find_next_arq(ad, arq); + + list_del_init(&arq->fifo); + as_remove_merge_hints(q, arq); + as_del_arq_rb(ad, arq); + } + +} + +/* + * as_remove_dispatched_request is called to remove a request which has gone + * to the dispatch list. + */ +static void as_remove_dispatched_request(request_queue_t *q, struct request *rq) +{ + struct as_rq *arq = RQ_DATA(rq); + struct as_io_context *aic; + + if (!arq) { + WARN_ON(1); + return; + } + + WARN_ON(arq->state != AS_RQ_DISPATCHED); + WARN_ON(ON_RB(&arq->rb_node)); + aic = arq->as_io_context; + if (aic) { + WARN_ON(!atomic_read(&aic->nr_dispatched)); + atomic_dec(&aic->nr_dispatched); + } +} +/* + * as_remove_request is called when a driver has finished with a request. + * This should be only called for dispatched requests, but for some reason + * a POWER4 box running hwscan it does not. + */ +static void as_remove_request(request_queue_t *q, struct request *rq) +{ + struct as_rq *arq = RQ_DATA(rq); + + if (unlikely(!blk_fs_request(rq))) + return; + + if (!arq) { + WARN_ON(1); + return; + } + + if (ON_RB(&arq->rb_node)) + as_remove_queued_request(q, rq); + else + as_remove_dispatched_request(q, rq); +} + +/* + * as_fifo_expired returns 0 if there are no expired reads on the fifo, + * 1 otherwise. It is ratelimited so that we only perform the check once per + * `fifo_expire' interval. Otherwise a large number of expired requests + * would create a hopeless seekstorm. + * + * See as_antic_expired comment. + */ +static int as_fifo_expired(struct as_data *ad, int adir) +{ + struct as_rq *arq; + long delta_jif; + + delta_jif = jiffies - ad->last_check_fifo[adir]; + if (unlikely(delta_jif < 0)) + delta_jif = -delta_jif; + if (delta_jif < ad->fifo_expire[adir]) + return 0; + + ad->last_check_fifo[adir] = jiffies; + + if (list_empty(&ad->fifo_list[adir])) + return 0; + + arq = list_entry_fifo(ad->fifo_list[adir].next); + + return time_after(jiffies, arq->expires); +} + +/* + * as_batch_expired returns true if the current batch has expired. A batch + * is a set of reads or a set of writes. + */ +static inline int as_batch_expired(struct as_data *ad) +{ + if (ad->changed_batch) + return 0; + + if (ad->batch_data_dir == REQ_SYNC) + /* TODO! add a check so a complete fifo gets written? */ + return time_after(jiffies, ad->current_batch_expires); + + return time_after(jiffies, ad->current_batch_expires) + || ad->current_write_count == 0; +} + +/* + * move an entry to dispatch queue + */ +static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq) +{ + const int data_dir = arq->is_sync; + + BUG_ON(!ON_RB(&arq->rb_node)); + + as_antic_stop(ad); + ad->antic_status = ANTIC_OFF; + + /* + * This has to be set in order to be correctly updated by + * as_find_next_arq + */ + ad->last_sector[data_dir] = arq->request->sector + + arq->request->nr_sectors; + + ad->nr_dispatched++; + + if (data_dir == REQ_SYNC) { + /* In case we have to anticipate after this */ + copy_as_io_context(&ad->as_io_context, &arq->as_io_context); + } else { + if (ad->as_io_context) { + put_as_io_context(&ad->as_io_context); + ad->as_io_context = NULL; + } + + if (ad->current_write_count != 0) + ad->current_write_count--; + } + ad->aic_finished = 0; + + ad->next_arq[data_dir] = as_find_next_arq(ad, arq); + + /* + * take it off the sort and fifo list, add to dispatch queue + */ + as_remove_queued_request(ad->q, arq->request); + list_add_tail(&arq->request->queuelist, ad->dispatch); + if (arq->as_io_context) + atomic_inc(&arq->as_io_context->nr_dispatched); + + WARN_ON(arq->state != AS_RQ_QUEUED); + arq->state = AS_RQ_DISPATCHED; +} + +/* + * as_dispatch_request selects the best request according to + * read/write expire, batch expire, etc, and moves it to the dispatch + * queue. Returns 1 if a request was found, 0 otherwise. + */ +static int as_dispatch_request(struct as_data *ad) +{ + struct as_rq *arq; + const int reads = !list_empty(&ad->fifo_list[REQ_SYNC]); + const int writes = !list_empty(&ad->fifo_list[REQ_ASYNC]); + + /* Signal that the write batch was uncontended, so we can't time it */ + if (ad->batch_data_dir == REQ_ASYNC && !reads) { + if (ad->current_write_count == 0 || !writes) + ad->write_batch_idled = 1; + } + + if (!(reads || writes) + || ad->antic_status == ANTIC_WAIT_REQ + || ad->antic_status == ANTIC_WAIT_NEXT + || ad->changed_batch == 1) + return 0; + + if (!(reads && writes && as_batch_expired(ad)) ) { + /* + * batch is still running or no reads or no writes + */ + arq = ad->next_arq[ad->batch_data_dir]; + + if (ad->batch_data_dir == REQ_SYNC && ad->antic_expire) { + if (as_fifo_expired(ad, REQ_SYNC)) + goto fifo_expired; + + if (as_can_anticipate(ad, arq)) { + as_antic_waitreq(ad); + return 0; + } + } + + if (arq) { + /* we have a "next request" */ + if (reads && !writes) + ad->current_batch_expires = + jiffies + ad->batch_expire[REQ_SYNC]; + goto dispatch_request; + } + } + + /* + * at this point we are not running a batch. select the appropriate + * data direction (read / write) + */ + + if (reads) { + BUG_ON(RB_EMPTY(&ad->sort_list[REQ_SYNC])); + + if (writes && ad->batch_data_dir == REQ_SYNC) + /* + * Last batch was a read, switch to writes + */ + goto dispatch_writes; + + if (ad->batch_data_dir == REQ_ASYNC) + ad->changed_batch = 1; + ad->batch_data_dir = REQ_SYNC; + arq = list_entry_fifo(ad->fifo_list[ad->batch_data_dir].next); + ad->last_check_fifo[ad->batch_data_dir] = jiffies; + goto dispatch_request; + } + + /* + * the last batch was a read + */ + + if (writes) { +dispatch_writes: + BUG_ON(RB_EMPTY(&ad->sort_list[REQ_ASYNC])); + + if (ad->batch_data_dir == REQ_SYNC) + ad->changed_batch = 1; + ad->batch_data_dir = REQ_ASYNC; + ad->current_write_count = ad->write_batch_count; + ad->write_batch_idled = 0; + arq = ad->next_arq[ad->batch_data_dir]; + goto dispatch_request; + } + + BUG(); + return 0; + +dispatch_request: + /* + * If a request has expired, service it. + */ + + if (as_fifo_expired(ad, ad->batch_data_dir)) { +fifo_expired: + arq = list_entry_fifo(ad->fifo_list[ad->batch_data_dir].next); + BUG_ON(arq == NULL); + } + + if (ad->changed_batch) { + if (ad->changed_batch == 1 && ad->nr_dispatched) + return 0; + if (ad->batch_data_dir == REQ_ASYNC) { + ad->current_batch_expires = jiffies + + ad->batch_expire[REQ_ASYNC]; + ad->changed_batch = 0; + } else + ad->changed_batch = 2; + arq->request->flags |= REQ_HARDBARRIER; + } + + /* + * arq is the selected appropriate request. + */ + as_move_to_dispatch(ad, arq); + + return 1; +} + +static struct request *as_next_request(request_queue_t *q) +{ + struct as_data *ad = q->elevator.elevator_data; + struct request *rq = NULL; + + /* + * if there are still requests on the dispatch queue, grab the first + */ + if (!list_empty(ad->dispatch) || as_dispatch_request(ad)) + rq = list_entry_rq(ad->dispatch->next); + + return rq; +} + +/* + * add arq to rbtree and fifo + */ +static void as_add_request(struct as_data *ad, struct as_rq *arq) +{ + int data_dir; + + if (rq_data_dir(arq->request) == READ + || current->flags&PF_SYNCWRITE) + arq->is_sync = 1; + else + arq->is_sync = 0; + data_dir = arq->is_sync; + + arq->as_io_context = get_as_io_context(); + + if (arq->as_io_context) { + atomic_inc(&arq->as_io_context->nr_queued); + as_update_iohist(arq->as_io_context, arq->request); + } + + as_add_arq_rb(ad, arq); + + /* + * set expire time (only used for reads) and add to fifo list + */ + arq->expires = jiffies + ad->fifo_expire[data_dir]; + list_add_tail(&arq->fifo, &ad->fifo_list[data_dir]); + arq->state = AS_RQ_QUEUED; + as_update_arq(ad, arq); /* keep state machine up to date */ +} + +static void +as_insert_request(request_queue_t *q, struct request *rq, + struct list_head *insert_here) +{ + struct as_data *ad = q->elevator.elevator_data; + struct as_rq *arq = RQ_DATA(rq); + + if (unlikely(rq->flags & REQ_HARDBARRIER)) { + AS_INVALIDATE_HASH(ad); + q->last_merge = NULL; + + while (ad->next_arq[REQ_SYNC]) + as_move_to_dispatch(ad, ad->next_arq[REQ_SYNC]); + + while (ad->next_arq[REQ_ASYNC]) + as_move_to_dispatch(ad, ad->next_arq[REQ_ASYNC]); + } + + if (unlikely(!blk_fs_request(rq))) { + if (!insert_here) + insert_here = ad->dispatch->prev; + + list_add(&rq->queuelist, insert_here); + + /* Stop anticipating - let this request get through */ + if (!list_empty(ad->dispatch) + && (ad->antic_status == ANTIC_WAIT_REQ + || ad->antic_status == ANTIC_WAIT_NEXT)) + as_antic_stop(ad); + + return; + } + + if (rq_mergeable(rq)) { + as_add_arq_hash(ad, arq); + + if (!q->last_merge) + q->last_merge = &rq->queuelist; + } + + as_add_request(ad, arq); +} + +/* + * as_queue_empty tells us if there are requests left in the device. It may + * not be the case that a driver can get the next request even if the queue + * is not empty - it is used in the block layer to check for plugging and + * merging opportunities + */ +static int as_queue_empty(request_queue_t *q) +{ + struct as_data *ad = q->elevator.elevator_data; + + if (!list_empty(&ad->fifo_list[REQ_ASYNC]) + || !list_empty(&ad->fifo_list[REQ_SYNC]) + || !list_empty(ad->dispatch)) + return 0; + + return 1; +} + +static struct request * +as_former_request(request_queue_t *q, struct request *rq) +{ + struct as_rq *arq = RQ_DATA(rq); + struct rb_node *rbprev = rb_prev(&arq->rb_node); + struct request *ret = NULL; + + if (rbprev) + ret = rb_entry_arq(rbprev)->request; + + return ret; +} + +static struct request * +as_latter_request(request_queue_t *q, struct request *rq) +{ + struct as_rq *arq = RQ_DATA(rq); + struct rb_node *rbnext = rb_next(&arq->rb_node); + struct request *ret = NULL; + + if (rbnext) + ret = rb_entry_arq(rbnext)->request; + + return ret; +} + +static int +as_merge(request_queue_t *q, struct list_head **insert, struct bio *bio) +{ + struct as_data *ad = q->elevator.elevator_data; + sector_t rb_key = bio->bi_sector + bio_sectors(bio); + struct request *__rq; + int ret; + + /* + * try last_merge to avoid going to hash + */ + ret = elv_try_last_merge(q, bio); + if (ret != ELEVATOR_NO_MERGE) { + __rq = list_entry_rq(q->last_merge); + goto out_insert; + } + + /* + * see if the merge hash can satisfy a back merge + */ + __rq = as_find_arq_hash(ad, bio->bi_sector); + if (__rq) { + BUG_ON(__rq->sector + __rq->nr_sectors != bio->bi_sector); + + if (elv_rq_merge_ok(__rq, bio)) { + ret = ELEVATOR_BACK_MERGE; + goto out; + } + } + + /* + * check for front merge + */ + __rq = as_find_arq_rb(ad, rb_key, bio_data_dir(bio)); + if (__rq) { + BUG_ON(rb_key != rq_rb_key(__rq)); + + if (elv_rq_merge_ok(__rq, bio)) { + ret = ELEVATOR_FRONT_MERGE; + goto out; + } + } + + return ELEVATOR_NO_MERGE; +out: + q->last_merge = &__rq->queuelist; +out_insert: + if (ret) + as_hot_arq_hash(ad, RQ_DATA(__rq)); + *insert = &__rq->queuelist; + return ret; +} + +static void as_merged_request(request_queue_t *q, struct request *req) +{ + struct as_data *ad = q->elevator.elevator_data; + struct as_rq *arq = RQ_DATA(req); + + /* + * hash always needs to be repositioned, key is end sector + */ + as_del_arq_hash(arq); + as_add_arq_hash(ad, arq); + + /* + * if the merge was a front merge, we need to reposition request + */ + if (rq_rb_key(req) != arq->rb_key) { + as_del_arq_rb(ad, arq); + as_add_arq_rb(ad, arq); + /* + * Note! At this stage of this and the next function, our next + * request may not be optimal - eg the request may have "grown" + * behind the disk head. We currently don't bother adjusting. + */ + } + + q->last_merge = &req->queuelist; +} + +static void +as_merged_requests(request_queue_t *q, struct request *req, + struct request *next) +{ + struct as_data *ad = q->elevator.elevator_data; + struct as_rq *arq = RQ_DATA(req); + struct as_rq *anext = RQ_DATA(next); + + BUG_ON(!arq); + BUG_ON(!anext); + + /* + * reposition arq (this is the merged request) in hash, and in rbtree + * in case of a front merge + */ + as_del_arq_hash(arq); + as_add_arq_hash(ad, arq); + + if (rq_rb_key(req) != arq->rb_key) { + as_del_arq_rb(ad, arq); + as_add_arq_rb(ad, arq); + } + + /* + * if anext expires before arq, assign its expire time to arq + * and move into anext position (anext will be deleted) in fifo + */ + if (!list_empty(&arq->fifo) && !list_empty(&anext->fifo)) { + if (time_before(anext->expires, arq->expires)) { + list_move(&arq->fifo, &anext->fifo); + arq->expires = anext->expires; + /* + * Don't copy here but swap, because when anext is + * removed below, it must contain the unused context + */ + swap_as_io_context(&arq->as_io_context, + &anext->as_io_context); + } + } + + /* + * kill knowledge of next, this one is a goner + */ + as_remove_queued_request(q, next); + put_as_io_context(&anext->as_io_context); +} + +/* + * This is executed in a "deferred" process context, by kblockd. It calls the + * driver's request_fn so the driver can submit that request. + * + * IMPORTANT! This guy will reenter the elevator, so set up all queue global + * state before calling, and don't rely on any state over calls. + * + * FIXME! dispatch queue is not a queue at all! + */ +static void as_work_handler(void *data) +{ + struct request_queue *q = data; + unsigned long flags; + + spin_lock_irqsave(q->queue_lock, flags); + if (as_next_request(q)) + q->request_fn(q); + spin_unlock_irqrestore(q->queue_lock, flags); +} + +static void as_put_request(request_queue_t *q, struct request *rq) +{ + struct as_data *ad = q->elevator.elevator_data; + struct as_rq *arq = RQ_DATA(rq); + + if (!arq) { + WARN_ON(1); + return; + } + + mempool_free(arq, ad->arq_pool); + rq->elevator_private = NULL; +} + +static int as_set_request(request_queue_t *q, struct request *rq, int gfp_mask) +{ + struct as_data *ad = q->elevator.elevator_data; + struct as_rq *arq = mempool_alloc(ad->arq_pool, gfp_mask); + + if (arq) { + RB_CLEAR(&arq->rb_node); + arq->request = rq; + arq->state = AS_RQ_NEW; + arq->as_io_context = NULL; + INIT_LIST_HEAD(&arq->hash); + arq->hash_valid_count = 0; + INIT_LIST_HEAD(&arq->fifo); + rq->elevator_private = arq; + return 0; + } + + return 1; +} + +static void as_exit(request_queue_t *q, elevator_t *e) +{ + struct as_data *ad = e->elevator_data; + + del_timer_sync(&ad->antic_timer); + kblockd_flush(); + + BUG_ON(!list_empty(&ad->fifo_list[REQ_SYNC])); + BUG_ON(!list_empty(&ad->fifo_list[REQ_ASYNC])); + + mempool_destroy(ad->arq_pool); + put_as_io_context(&ad->as_io_context); + kfree(ad->hash); + kfree(ad); +} + +/* + * initialize elevator private data (as_data), and alloc a arq for + * each request on the free lists + */ +static int as_init(request_queue_t *q, elevator_t *e) +{ + struct as_data *ad; + int i; + + if (!arq_pool) + return -ENOMEM; + + ad = kmalloc(sizeof(*ad), GFP_KERNEL); + if (!ad) + return -ENOMEM; + memset(ad, 0, sizeof(*ad)); + + ad->q = q; /* Identify what queue the data belongs to */ + + ad->hash = kmalloc(sizeof(struct list_head)*AS_HASH_ENTRIES,GFP_KERNEL); + if (!ad->hash) { + kfree(ad); + return -ENOMEM; + } + + ad->arq_pool = mempool_create(BLKDEV_MIN_RQ, mempool_alloc_slab, mempool_free_slab, arq_pool); + if (!ad->arq_pool) { + kfree(ad->hash); + kfree(ad); + return -ENOMEM; + } + + /* anticipatory scheduling helpers */ + ad->antic_timer.function = as_antic_timeout; + ad->antic_timer.data = (unsigned long)q; + init_timer(&ad->antic_timer); + INIT_WORK(&ad->antic_work, as_work_handler, q); + + for (i = 0; i < AS_HASH_ENTRIES; i++) + INIT_LIST_HEAD(&ad->hash[i]); + + INIT_LIST_HEAD(&ad->fifo_list[REQ_SYNC]); + INIT_LIST_HEAD(&ad->fifo_list[REQ_ASYNC]); + ad->sort_list[REQ_SYNC] = RB_ROOT; + ad->sort_list[REQ_ASYNC] = RB_ROOT; + ad->dispatch = &q->queue_head; + ad->fifo_expire[REQ_SYNC] = default_read_expire; + ad->fifo_expire[REQ_ASYNC] = default_write_expire; + ad->hash_valid_count = 1; + ad->antic_expire = default_antic_expire; + ad->batch_expire[REQ_SYNC] = default_read_batch_expire; + ad->batch_expire[REQ_ASYNC] = default_write_batch_expire; + e->elevator_data = ad; + + ad->current_batch_expires = jiffies + ad->batch_expire[REQ_SYNC]; + ad->write_batch_count = ad->batch_expire[REQ_ASYNC] / 10; + if (ad->write_batch_count < 2) + ad->write_batch_count = 2; + return 0; +} + +/* + * sysfs parts below + */ +struct as_fs_entry { + struct attribute attr; + ssize_t (*show)(struct as_data *, char *); + ssize_t (*store)(struct as_data *, const char *, size_t); +}; + +static ssize_t +as_var_show(unsigned int var, char *page) +{ + var = (var * 1000) / HZ; + return sprintf(page, "%d\n", var); +} + +static ssize_t +as_var_store(unsigned long *var, const char *page, size_t count) +{ + unsigned long tmp; + char *p = (char *) page; + + tmp = simple_strtoul(p, &p, 10); + if (tmp != 0) { + tmp = (tmp * HZ) / 1000; + if (tmp == 0) + tmp = 1; + } + *var = tmp; + return count; +} + +#define SHOW_FUNCTION(__FUNC, __VAR) \ +static ssize_t __FUNC(struct as_data *ad, char *page) \ +{ \ + return as_var_show(__VAR, (page)); \ +} +SHOW_FUNCTION(as_readexpire_show, ad->fifo_expire[REQ_SYNC]); +SHOW_FUNCTION(as_writeexpire_show, ad->fifo_expire[REQ_ASYNC]); +SHOW_FUNCTION(as_anticexpire_show, ad->antic_expire); +SHOW_FUNCTION(as_read_batchexpire_show, ad->batch_expire[REQ_SYNC]); +SHOW_FUNCTION(as_write_batchexpire_show, ad->batch_expire[REQ_ASYNC]); +#undef SHOW_FUNCTION + +#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX) \ +static ssize_t __FUNC(struct as_data *ad, const char *page, size_t count) \ +{ \ + int ret = as_var_store(__PTR, (page), count); \ + if (*(__PTR) < (MIN)) \ + *(__PTR) = (MIN); \ + else if (*(__PTR) > (MAX)) \ + *(__PTR) = (MAX); \ + return ret; \ +} +STORE_FUNCTION(as_readexpire_store, &ad->fifo_expire[REQ_SYNC], 0, INT_MAX); +STORE_FUNCTION(as_writeexpire_store, &ad->fifo_expire[REQ_ASYNC], 0, INT_MAX); +STORE_FUNCTION(as_anticexpire_store, &ad->antic_expire, 0, INT_MAX); +STORE_FUNCTION(as_read_batchexpire_store, + &ad->batch_expire[REQ_SYNC], 0, INT_MAX); +STORE_FUNCTION(as_write_batchexpire_store, + &ad->batch_expire[REQ_ASYNC], 0, INT_MAX); +#undef STORE_FUNCTION + +static struct as_fs_entry as_readexpire_entry = { + .attr = {.name = "read_expire", .mode = S_IRUGO | S_IWUSR }, + .show = as_readexpire_show, + .store = as_readexpire_store, +}; +static struct as_fs_entry as_writeexpire_entry = { + .attr = {.name = "write_expire", .mode = S_IRUGO | S_IWUSR }, + .show = as_writeexpire_show, + .store = as_writeexpire_store, +}; +static struct as_fs_entry as_anticexpire_entry = { + .attr = {.name = "antic_expire", .mode = S_IRUGO | S_IWUSR }, + .show = as_anticexpire_show, + .store = as_anticexpire_store, +}; +static struct as_fs_entry as_read_batchexpire_entry = { + .attr = {.name = "read_batch_expire", .mode = S_IRUGO | S_IWUSR }, + .show = as_read_batchexpire_show, + .store = as_read_batchexpire_store, +}; +static struct as_fs_entry as_write_batchexpire_entry = { + .attr = {.name = "write_batch_expire", .mode = S_IRUGO | S_IWUSR }, + .show = as_write_batchexpire_show, + .store = as_write_batchexpire_store, +}; + +static struct attribute *default_attrs[] = { + &as_readexpire_entry.attr, + &as_writeexpire_entry.attr, + &as_anticexpire_entry.attr, + &as_read_batchexpire_entry.attr, + &as_write_batchexpire_entry.attr, + NULL, +}; + +#define to_as(atr) container_of((atr), struct as_fs_entry, attr) + +static ssize_t +as_attr_show(struct kobject *kobj, struct attribute *attr, char *page) +{ + elevator_t *e = container_of(kobj, elevator_t, kobj); + struct as_fs_entry *entry = to_as(attr); + + if (!entry->show) + return 0; + + return entry->show(e->elevator_data, page); +} + +static ssize_t +as_attr_store(struct kobject *kobj, struct attribute *attr, + const char *page, size_t length) +{ + elevator_t *e = container_of(kobj, elevator_t, kobj); + struct as_fs_entry *entry = to_as(attr); + + if (!entry->store) + return -EINVAL; + + return entry->store(e->elevator_data, page, length); +} + +static struct sysfs_ops as_sysfs_ops = { + .show = as_attr_show, + .store = as_attr_store, +}; + +struct kobj_type as_ktype = { + .sysfs_ops = &as_sysfs_ops, + .default_attrs = default_attrs, +}; + +static int __init as_slab_setup(void) +{ + arq_pool = kmem_cache_create("as_arq", sizeof(struct as_rq), + 0, 0, NULL, NULL); + + if (!arq_pool) + panic("as: can't init slab pool\n"); + + return 0; +} + +subsys_initcall(as_slab_setup); + +elevator_t iosched_as = { + .elevator_merge_fn = as_merge, + .elevator_merged_fn = as_merged_request, + .elevator_merge_req_fn = as_merged_requests, + .elevator_next_req_fn = as_next_request, + .elevator_add_req_fn = as_insert_request, + .elevator_remove_req_fn = as_remove_request, + .elevator_queue_empty_fn = as_queue_empty, + .elevator_completed_req_fn = as_completed_request, + .elevator_former_req_fn = as_former_request, + .elevator_latter_req_fn = as_latter_request, + .elevator_set_req_fn = as_set_request, + .elevator_put_req_fn = as_put_request, + .elevator_init_fn = as_init, + .elevator_exit_fn = as_exit, + + .elevator_ktype = &as_ktype, +}; + +EXPORT_SYMBOL(iosched_as); diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c index 71750da0db6d..be19601847b5 100644 --- a/drivers/block/ll_rw_blk.c +++ b/drivers/block/ll_rw_blk.c @@ -1033,7 +1033,7 @@ static inline void __generic_unplug_device(request_queue_t *q) /* * was plugged, fire request_fn if queue has stuff to do */ - if (!elv_queue_empty(q)) + if (elv_next_request(q)) q->request_fn(q); } @@ -1204,6 +1204,18 @@ static int blk_init_free_list(request_queue_t *q) static int __make_request(request_queue_t *, struct bio *); +static elevator_t *chosen_elevator = &iosched_as; + +static int __init elevator_setup(char *str) +{ + if (!strcmp(str, "deadline")) + chosen_elevator = &iosched_deadline; + if (!strcmp(str, "as")) + chosen_elevator = &iosched_as; + return 1; +} +__setup("elevator=", elevator_setup); + /** * blk_init_queue - prepare a request queue for use with a block device * @q: The &request_queue_t to be initialised @@ -1235,11 +1247,20 @@ static int __make_request(request_queue_t *, struct bio *); int blk_init_queue(request_queue_t *q, request_fn_proc *rfn, spinlock_t *lock) { int ret; + static int printed; if (blk_init_free_list(q)) return -ENOMEM; - if ((ret = elevator_init(q, &iosched_deadline))) { + if (!printed) { + printed = 1; + if (chosen_elevator == &iosched_deadline) + printk("deadline elevator\n"); + else if (chosen_elevator == &iosched_as) + printk("anticipatory scheduling elevator\n"); + } + + if ((ret = elevator_init(q, chosen_elevator))) { blk_cleanup_queue(q); return ret; } diff --git a/fs/buffer.c b/fs/buffer.c index f063200c5b66..994bfbc41e73 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -319,6 +319,7 @@ asmlinkage long sys_fsync(unsigned int fd) /* We need to protect against concurrent writers.. */ down(&inode->i_sem); + current->flags |= PF_SYNCWRITE; ret = filemap_fdatawrite(inode->i_mapping); err = file->f_op->fsync(file, dentry, 0); if (!ret) @@ -326,6 +327,7 @@ asmlinkage long sys_fsync(unsigned int fd) err = filemap_fdatawait(inode->i_mapping); if (!ret) ret = err; + current->flags &= ~PF_SYNCWRITE; up(&inode->i_sem); out_putf: @@ -354,6 +356,7 @@ asmlinkage long sys_fdatasync(unsigned int fd) goto out_putf; down(&inode->i_sem); + current->flags |= PF_SYNCWRITE; ret = filemap_fdatawrite(inode->i_mapping); err = file->f_op->fsync(file, dentry, 1); if (!ret) @@ -361,6 +364,7 @@ asmlinkage long sys_fdatasync(unsigned int fd) err = filemap_fdatawait(inode->i_mapping); if (!ret) ret = err; + current->flags &= ~PF_SYNCWRITE; up(&inode->i_sem); out_putf: diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 92682b02ff12..8732f30faa2b 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -516,6 +516,7 @@ int generic_osync_inode(struct inode *inode, int what) int need_write_inode_now = 0; int err2; + current->flags |= PF_SYNCWRITE; if (what & OSYNC_DATA) err = filemap_fdatawrite(inode->i_mapping); if (what & (OSYNC_METADATA|OSYNC_DATA)) { @@ -528,6 +529,7 @@ int generic_osync_inode(struct inode *inode, int what) if (!err) err = err2; } + current->flags &= ~PF_SYNCWRITE; spin_lock(&inode_lock); if ((inode->i_state & I_DIRTY) && diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 07de69c1ef8a..d793bb97dd54 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -89,6 +89,11 @@ extern elevator_t elevator_noop; */ extern elevator_t iosched_deadline; +/* + * anticipatory I/O scheduler + */ +extern elevator_t iosched_as; + extern int elevator_init(request_queue_t *, elevator_t *); extern void elevator_exit(request_queue_t *); extern inline int elv_rq_merge_ok(struct request *, struct bio *); diff --git a/include/linux/sched.h b/include/linux/sched.h index ca97376901b0..e29f9606c2aa 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -321,6 +321,8 @@ struct k_itimer { }; +struct as_io_context; /* Anticipatory scheduler */ +void exit_as_io_context(void); struct task_struct { volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ @@ -450,6 +452,8 @@ struct task_struct { struct dentry *proc_dentry; struct backing_dev_info *backing_dev_info; + struct as_io_context *as_io_context; + unsigned long ptrace_message; siginfo_t *last_siginfo; /* For ptrace use. */ }; @@ -481,6 +485,7 @@ do { if (atomic_dec_and_test(&(tsk)->usage)) __put_task_struct(tsk); } while(0) #define PF_KSWAPD 0x00040000 /* I am kswapd */ #define PF_SWAPOFF 0x00080000 /* I am in swapoff */ #define PF_LESS_THROTTLE 0x01000000 /* Throttle me less: I clena memory */ +#define PF_SYNCWRITE 0x00200000 /* I am doing a sync write */ #ifdef CONFIG_SMP extern int set_cpus_allowed(task_t *p, unsigned long new_mask); diff --git a/kernel/exit.c b/kernel/exit.c index 2f090213e481..8471381546af 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -682,6 +682,8 @@ NORET_TYPE void do_exit(long code) panic("Attempted to kill the idle task!"); if (unlikely(tsk->pid == 1)) panic("Attempted to kill init!"); + if (tsk->as_io_context) + exit_as_io_context(); tsk->flags |= PF_EXITING; del_timer_sync(&tsk->real_timer); diff --git a/kernel/fork.c b/kernel/fork.c index c17e05614c88..fcdc884cd894 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -864,6 +864,7 @@ struct task_struct *copy_process(unsigned long clone_flags, p->lock_depth = -1; /* -1 = no lock */ p->start_time = get_jiffies_64(); p->security = NULL; + p->as_io_context = NULL; retval = -ENOMEM; if ((retval = security_task_alloc(p))) -- cgit v1.2.3 From 179b68bbaadd8296c537ac11f0f5e825c188bfa8 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 4 Jul 2003 19:36:37 -0700 Subject: [PATCH] Use kblockd for running request queues Using keventd for running request_fns is risky because keventd itself can block on disk I/O. Use the new kblockd kernel threads for the generic unplugging. --- drivers/block/ll_rw_blk.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c index be19601847b5..cdfe7d3697bc 100644 --- a/drivers/block/ll_rw_blk.c +++ b/drivers/block/ll_rw_blk.c @@ -1068,7 +1068,7 @@ static void blk_unplug_timeout(unsigned long data) { request_queue_t *q = (request_queue_t *)data; - schedule_work(&q->unplug_work); + kblockd_schedule_work(&q->unplug_work); } /** @@ -1176,7 +1176,7 @@ void blk_cleanup_queue(request_queue_t * q) elevator_exit(q); del_timer_sync(&q->unplug_timer); - flush_scheduled_work(); + kblockd_flush(); mempool_destroy(rl->rq_pool); -- cgit v1.2.3 From ee66147bf85b90df796737381e057155b4bc4fe9 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 4 Jul 2003 19:36:44 -0700 Subject: [PATCH] per queue nr_requests From: Nick Piggin This gets rid of the global queue_nr_requests and usage of BLKDEV_MAX_RQ (the latter is now only used to set the queues' defaults). The queue depth becomes per-queue, controlled by a sysfs entry. --- drivers/block/elevator.c | 14 +--- drivers/block/genhd.c | 4 +- drivers/block/ll_rw_blk.c | 205 ++++++++++++++++++++++++++++++++++++++-------- include/linux/blkdev.h | 11 ++- include/linux/elevator.h | 4 +- 5 files changed, 188 insertions(+), 50 deletions(-) diff --git a/drivers/block/elevator.c b/drivers/block/elevator.c index 406755724e03..89af76783943 100644 --- a/drivers/block/elevator.c +++ b/drivers/block/elevator.c @@ -379,17 +379,13 @@ void elv_completed_request(request_queue_t *q, struct request *rq) e->elevator_completed_req_fn(q, rq); } -int elv_register_queue(struct gendisk *disk) +int elv_register_queue(struct request_queue *q) { - request_queue_t *q = disk->queue; elevator_t *e; - if (!q) - return -ENXIO; - e = &q->elevator; - e->kobj.parent = kobject_get(&disk->kobj); + e->kobj.parent = kobject_get(&q->kobj); if (!e->kobj.parent) return -EBUSY; @@ -399,14 +395,12 @@ int elv_register_queue(struct gendisk *disk) return kobject_register(&e->kobj); } -void elv_unregister_queue(struct gendisk *disk) +void elv_unregister_queue(struct request_queue *q) { - request_queue_t *q = disk->queue; - if (q) { elevator_t * e = &q->elevator; kobject_unregister(&e->kobj); - kobject_put(&disk->kobj); + kobject_put(&q->kobj); } } diff --git a/drivers/block/genhd.c b/drivers/block/genhd.c index 889b8753c29f..361aee8ab255 100644 --- a/drivers/block/genhd.c +++ b/drivers/block/genhd.c @@ -191,7 +191,7 @@ void add_disk(struct gendisk *disk) blk_register_region(MKDEV(disk->major, disk->first_minor), disk->minors, NULL, exact_match, exact_lock, disk); register_disk(disk); - elv_register_queue(disk); + blk_register_queue(disk); } EXPORT_SYMBOL(add_disk); @@ -199,7 +199,7 @@ EXPORT_SYMBOL(del_gendisk); /* in partitions/check.c */ void unlink_gendisk(struct gendisk *disk) { - elv_unregister_queue(disk); + blk_unregister_queue(disk); blk_unregister_region(MKDEV(disk->major, disk->first_minor), disk->minors); } diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c index cdfe7d3697bc..b1248e542e5e 100644 --- a/drivers/block/ll_rw_blk.c +++ b/drivers/block/ll_rw_blk.c @@ -42,12 +42,6 @@ static kmem_cache_t *request_cachep; static LIST_HEAD(blk_plug_list); static spinlock_t blk_plug_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED; -/* - * Number of requests per queue. This many for reads and for writes (twice - * this number, total). - */ -static int queue_nr_requests; - static wait_queue_head_t congestion_wqh[2]; /* @@ -57,9 +51,9 @@ static struct workqueue_struct *kblockd_workqueue; unsigned long blk_max_low_pfn, blk_max_pfn; -static inline int batch_requests(void) +static inline int batch_requests(struct request_queue *q) { - return min(BLKDEV_MAX_RQ / 8, 8); + return min(q->nr_requests / 8, 8UL); } /* @@ -67,11 +61,11 @@ static inline int batch_requests(void) * considered to be congested. It include a little hysteresis to keep the * context switch rate down. */ -static inline int queue_congestion_on_threshold(void) +static inline int queue_congestion_on_threshold(struct request_queue *q) { int ret; - ret = queue_nr_requests / 8 - 1; + ret = q->nr_requests / 8 - 1; if (ret < 0) ret = 1; return ret; @@ -80,13 +74,13 @@ static inline int queue_congestion_on_threshold(void) /* * The threshold at which a queue is considered to be uncongested */ -static inline int queue_congestion_off_threshold(void) +static inline int queue_congestion_off_threshold(struct request_queue *q) { int ret; - ret = queue_nr_requests / 8 + 1; - if (ret > queue_nr_requests) - ret = queue_nr_requests; + ret = q->nr_requests / 8 + 1; + if (ret > q->nr_requests) + ret = q->nr_requests; return ret; } @@ -199,6 +193,7 @@ void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn) /* * set defaults */ + q->nr_requests = BLKDEV_MAX_RQ; q->max_phys_segments = MAX_PHYS_SEGMENTS; q->max_hw_segments = MAX_HW_SEGMENTS; q->make_request_fn = mfn; @@ -452,13 +447,15 @@ void blk_queue_free_tags(request_queue_t *q) q->queue_flags &= ~(1 << QUEUE_FLAG_QUEUED); } -static int init_tag_map(struct blk_queue_tag *tags, int depth) +static int +init_tag_map(request_queue_t *q, struct blk_queue_tag *tags, int depth) { int bits, i; - if (depth > (queue_nr_requests*2)) { - depth = (queue_nr_requests*2); - printk(KERN_ERR "%s: adjusted depth to %d\n", __FUNCTION__, depth); + if (depth > q->nr_requests * 2) { + depth = q->nr_requests * 2; + printk(KERN_ERR "%s: adjusted depth to %d\n", + __FUNCTION__, depth); } tags->tag_index = kmalloc(depth * sizeof(struct request *), GFP_ATOMIC); @@ -487,7 +484,6 @@ fail: return -ENOMEM; } - /** * blk_queue_init_tags - initialize the queue tag info * @q: the request queue for the device @@ -501,7 +497,7 @@ int blk_queue_init_tags(request_queue_t *q, int depth) if (!tags) goto fail; - if (init_tag_map(tags, depth)) + if (init_tag_map(q, tags, depth)) goto fail; INIT_LIST_HEAD(&tags->busy_list); @@ -551,7 +547,7 @@ int blk_queue_resize_tags(request_queue_t *q, int new_depth) tag_map = bqt->tag_map; max_depth = bqt->real_max_depth; - if (init_tag_map(bqt, new_depth)) + if (init_tag_map(q, bqt, new_depth)) return -ENOMEM; memcpy(bqt->tag_index, tag_index, max_depth * sizeof(struct request *)); @@ -1315,12 +1311,12 @@ static struct request *get_request(request_queue_t *q, int rw, int gfp_mask) struct request_list *rl = &q->rq; spin_lock_irq(q->queue_lock); - if (rl->count[rw] == BLKDEV_MAX_RQ || !elv_may_queue(q, rw)) { + if (rl->count[rw] >= q->nr_requests || !elv_may_queue(q, rw)) { spin_unlock_irq(q->queue_lock); goto out; } rl->count[rw]++; - if ((BLKDEV_MAX_RQ - rl->count[rw]) < queue_congestion_on_threshold()) + if ((q->nr_requests - rl->count[rw]) < queue_congestion_on_threshold(q)) set_queue_congested(q, rw); spin_unlock_irq(q->queue_lock); @@ -1328,7 +1324,7 @@ static struct request *get_request(request_queue_t *q, int rw, int gfp_mask) if (!rq) { spin_lock_irq(q->queue_lock); rl->count[rw]--; - if ((BLKDEV_MAX_RQ - rl->count[rw]) >= queue_congestion_off_threshold()) + if ((q->nr_requests - rl->count[rw]) >= queue_congestion_off_threshold(q)) clear_queue_congested(q, rw); spin_unlock_irq(q->queue_lock); goto out; @@ -1549,10 +1545,10 @@ void __blk_put_request(request_queue_t *q, struct request *req) blk_free_request(q, req); rl->count[rw]--; - if ((BLKDEV_MAX_RQ - rl->count[rw]) >= - queue_congestion_off_threshold()) + if ((q->nr_requests - rl->count[rw]) >= + queue_congestion_off_threshold(q)) clear_queue_congested(q, rw); - if ((BLKDEV_MAX_RQ - rl->count[rw]) >= batch_requests() && + if ((q->nr_requests - rl->count[rw]) >= batch_requests(q) && waitqueue_active(&rl->wait[rw])) wake_up(&rl->wait[rw]); } @@ -2360,14 +2356,6 @@ int __init blk_dev_init(void) if (!request_cachep) panic("Can't create request pool slab cache\n"); - queue_nr_requests = BLKDEV_MAX_RQ; - - printk("block request queues:\n"); - printk(" %d/%d requests per read queue\n", BLKDEV_MIN_RQ, queue_nr_requests); - printk(" %d/%d requests per write queue\n", BLKDEV_MIN_RQ, queue_nr_requests); - printk(" enter congestion at %d\n", queue_congestion_on_threshold()); - printk(" exit congestion at %d\n", queue_congestion_off_threshold()); - blk_max_low_pfn = max_low_pfn; blk_max_pfn = max_pfn; @@ -2376,6 +2364,153 @@ int __init blk_dev_init(void) return 0; } +/* + * sysfs parts below + */ +struct queue_sysfs_entry { + struct attribute attr; + ssize_t (*show)(struct request_queue *, char *); + ssize_t (*store)(struct request_queue *, const char *, size_t); +}; + +static ssize_t +queue_var_show(unsigned int var, char *page) +{ + return sprintf(page, "%d\n", var); +} + +static ssize_t +queue_var_store(unsigned long *var, const char *page, size_t count) +{ + char *p = (char *) page; + + *var = simple_strtoul(p, &p, 10); + return count; +} + +static ssize_t queue_requests_show(struct request_queue *q, char *page) +{ + return queue_var_show(q->nr_requests, (page)); +} + +static ssize_t +queue_requests_store(struct request_queue *q, const char *page, size_t count) +{ + struct request_list *rl = &q->rq; + + int ret = queue_var_store(&q->nr_requests, page, count); + if (q->nr_requests < BLKDEV_MIN_RQ) + q->nr_requests = BLKDEV_MIN_RQ; + + if ((q->nr_requests - rl->count[READ]) < + queue_congestion_on_threshold(q)) + set_queue_congested(q, READ); + else if ((q->nr_requests - rl->count[READ]) >= + queue_congestion_off_threshold(q)) + clear_queue_congested(q, READ); + + if ((q->nr_requests - rl->count[READ]) < + queue_congestion_on_threshold(q)) + set_queue_congested(q, READ); + else if ((q->nr_requests - rl->count[READ]) >= + queue_congestion_off_threshold(q)) + clear_queue_congested(q, READ); + + return ret; +} + +static struct queue_sysfs_entry queue_requests_entry = { + .attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR }, + .show = queue_requests_show, + .store = queue_requests_store, +}; + +static struct attribute *default_attrs[] = { + &queue_requests_entry.attr, + NULL, +}; + +#define to_queue(atr) container_of((atr), struct queue_sysfs_entry, attr) + +static ssize_t +queue_attr_show(struct kobject *kobj, struct attribute *attr, char *page) +{ + struct queue_sysfs_entry *entry = to_queue(attr); + struct request_queue *q; + + q = container_of(kobj, struct request_queue, kobj); + if (!entry->show) + return 0; + + return entry->show(q, page); +} + +static ssize_t +queue_attr_store(struct kobject *kobj, struct attribute *attr, + const char *page, size_t length) +{ + struct queue_sysfs_entry *entry = to_queue(attr); + struct request_queue *q; + + q = container_of(kobj, struct request_queue, kobj); + if (!entry->store) + return -EINVAL; + + return entry->store(q, page, length); +} + +static struct sysfs_ops queue_sysfs_ops = { + .show = queue_attr_show, + .store = queue_attr_store, +}; + +struct kobj_type queue_ktype = { + .sysfs_ops = &queue_sysfs_ops, + .default_attrs = default_attrs, +}; + +int blk_register_queue(struct gendisk *disk) +{ + int ret; + + request_queue_t *q = disk->queue; + + if (!q) + return -ENXIO; + + q->kobj.parent = kobject_get(&disk->kobj); + if (!q->kobj.parent) + return -EBUSY; + + snprintf(q->kobj.name, KOBJ_NAME_LEN, "%s", "queue"); + q->kobj.ktype = &queue_ktype; + + ret = kobject_register(&q->kobj); + if (ret < 0) + return ret; + + ret = elv_register_queue(q); + if (ret) { + kobject_unregister(&q->kobj); + return ret; + } + + return 0; +} + +void blk_unregister_queue(struct gendisk *disk) +{ + request_queue_t *q = disk->queue; + + if (q) { + elv_unregister_queue(q); + + kobject_unregister(&q->kobj); + kobject_put(&disk->kobj); + } +} + + EXPORT_SYMBOL(process_that_request_first); EXPORT_SYMBOL(end_that_request_first); EXPORT_SYMBOL(end_that_request_chunk); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e97790517973..4295d60bf661 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -22,7 +22,7 @@ typedef struct elevator_s elevator_t; struct request_pm_state; #define BLKDEV_MIN_RQ 4 -#define BLKDEV_MAX_RQ 128 +#define BLKDEV_MAX_RQ 128 /* Default maximum */ struct request_list { int count[2]; @@ -268,9 +268,16 @@ struct request_queue */ spinlock_t *queue_lock; + /* + * queue kobject + */ + struct kobject kobj; + /* * queue settings */ + unsigned long nr_requests; /* Max # of requests */ + unsigned short max_sectors; unsigned short max_phys_segments; unsigned short max_hw_segments; @@ -398,6 +405,8 @@ struct sec_size { unsigned block_size_bits; }; +extern int blk_register_queue(struct gendisk *disk); +extern void blk_unregister_queue(struct gendisk *disk); extern void register_disk(struct gendisk *dev); extern void generic_make_request(struct bio *bio); extern void blk_put_request(struct request *); diff --git a/include/linux/elevator.h b/include/linux/elevator.h index d793bb97dd54..b0e70562be94 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -68,8 +68,8 @@ extern int elv_queue_empty(request_queue_t *); extern struct request *elv_next_request(struct request_queue *q); extern struct request *elv_former_request(request_queue_t *, struct request *); extern struct request *elv_latter_request(request_queue_t *, struct request *); -extern int elv_register_queue(struct gendisk *); -extern void elv_unregister_queue(struct gendisk *); +extern int elv_register_queue(request_queue_t *q); +extern void elv_unregister_queue(request_queue_t *q); extern int elv_may_queue(request_queue_t *, int); extern void elv_completed_request(request_queue_t *, struct request *); extern int elv_set_request(request_queue_t *, struct request *, int); -- cgit v1.2.3 From 4e83dc011fab1ab827a991fadccf581f541bf880 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 4 Jul 2003 19:36:51 -0700 Subject: [PATCH] blk_congestion_wait threshold cleanup From: Nick Piggin Now that we are counting requests (not requests free), this patch changes the congested & batch watermarks to be more logical. Also a minor fix to the sysfs code. --- drivers/block/ll_rw_blk.c | 45 ++++++++++++++++++++++----------------------- 1 file changed, 22 insertions(+), 23 deletions(-) diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c index b1248e542e5e..34cd5440d4ab 100644 --- a/drivers/block/ll_rw_blk.c +++ b/drivers/block/ll_rw_blk.c @@ -53,11 +53,11 @@ unsigned long blk_max_low_pfn, blk_max_pfn; static inline int batch_requests(struct request_queue *q) { - return min(q->nr_requests / 8, 8UL); + return q->nr_requests - min(q->nr_requests / 8, 8UL); } /* - * Return the threshold (number of free requests) at which the queue is + * Return the threshold (number of used requests) at which the queue is * considered to be congested. It include a little hysteresis to keep the * context switch rate down. */ @@ -65,9 +65,11 @@ static inline int queue_congestion_on_threshold(struct request_queue *q) { int ret; - ret = q->nr_requests / 8 - 1; - if (ret < 0) - ret = 1; + ret = q->nr_requests - (q->nr_requests / 8) + 1; + + if (ret > q->nr_requests) + ret = q->nr_requests; + return ret; } @@ -78,9 +80,11 @@ static inline int queue_congestion_off_threshold(struct request_queue *q) { int ret; - ret = q->nr_requests / 8 + 1; - if (ret > q->nr_requests) - ret = q->nr_requests; + ret = q->nr_requests - (q->nr_requests / 8) - 1; + + if (ret < 1) + ret = 1; + return ret; } @@ -1316,7 +1320,7 @@ static struct request *get_request(request_queue_t *q, int rw, int gfp_mask) goto out; } rl->count[rw]++; - if ((q->nr_requests - rl->count[rw]) < queue_congestion_on_threshold(q)) + if (rl->count[rw] >= queue_congestion_on_threshold(q)) set_queue_congested(q, rw); spin_unlock_irq(q->queue_lock); @@ -1324,7 +1328,7 @@ static struct request *get_request(request_queue_t *q, int rw, int gfp_mask) if (!rq) { spin_lock_irq(q->queue_lock); rl->count[rw]--; - if ((q->nr_requests - rl->count[rw]) >= queue_congestion_off_threshold(q)) + if (rl->count[rw] < queue_congestion_off_threshold(q)) clear_queue_congested(q, rw); spin_unlock_irq(q->queue_lock); goto out; @@ -1545,10 +1549,9 @@ void __blk_put_request(request_queue_t *q, struct request *req) blk_free_request(q, req); rl->count[rw]--; - if ((q->nr_requests - rl->count[rw]) >= - queue_congestion_off_threshold(q)) + if (rl->count[rw] < queue_congestion_off_threshold(q)) clear_queue_congested(q, rw); - if ((q->nr_requests - rl->count[rw]) >= batch_requests(q) && + if (rl->count[rw] < batch_requests(q) && waitqueue_active(&rl->wait[rw])) wake_up(&rl->wait[rw]); } @@ -2402,19 +2405,15 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count) if (q->nr_requests < BLKDEV_MIN_RQ) q->nr_requests = BLKDEV_MIN_RQ; - if ((q->nr_requests - rl->count[READ]) < - queue_congestion_on_threshold(q)) + if (rl->count[READ] >= queue_congestion_on_threshold(q)) set_queue_congested(q, READ); - else if ((q->nr_requests - rl->count[READ]) >= - queue_congestion_off_threshold(q)) + else if (rl->count[READ] < queue_congestion_off_threshold(q)) clear_queue_congested(q, READ); - if ((q->nr_requests - rl->count[READ]) < - queue_congestion_on_threshold(q)) - set_queue_congested(q, READ); - else if ((q->nr_requests - rl->count[READ]) >= - queue_congestion_off_threshold(q)) - clear_queue_congested(q, READ); + if (rl->count[WRITE] >= queue_congestion_on_threshold(q)) + set_queue_congested(q, WRITE); + else if (rl->count[WRITE] < queue_congestion_off_threshold(q)) + clear_queue_congested(q, WRITE); return ret; } -- cgit v1.2.3 From 08f364136f8ebfd780d52960dd4834746190d98a Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 4 Jul 2003 19:36:59 -0700 Subject: [PATCH] allow the IO scheduler to pass an allocation hint to From: Nick Piggin This patch implements a hint so that AS can tell the request allocator to allocate a request even if there are none left (the accounting is quite flexible and easily handles overallocations). elv_may_queue semantics have changed from "the elevator does _not_ want another request allocated" to "the elevator _insists_ that another request is allocated". I couldn't see any harm ;) Now in practice, AS will only allow _1_ request over the limit, because as soon as the request is sent to AS, it stops anticipating. --- drivers/block/as-iosched.c | 15 +++++++++++++++ drivers/block/elevator.c | 2 +- drivers/block/ll_rw_blk.c | 2 +- 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/block/as-iosched.c b/drivers/block/as-iosched.c index e6af1f822630..2e5e64fb3b39 100644 --- a/drivers/block/as-iosched.c +++ b/drivers/block/as-iosched.c @@ -1641,6 +1641,20 @@ static int as_set_request(request_queue_t *q, struct request *rq, int gfp_mask) return 1; } +static int as_may_queue(request_queue_t *q, int rw) +{ + struct as_data *ad = q->elevator.elevator_data; + struct as_io_context *aic; + if (ad->antic_status == ANTIC_WAIT_REQ || + ad->antic_status == ANTIC_WAIT_NEXT) { + aic = get_as_io_context(); + if (ad->as_io_context == aic) + return 1; + } + + return 0; +} + static void as_exit(request_queue_t *q, elevator_t *e) { struct as_data *ad = e->elevator_data; @@ -1879,6 +1893,7 @@ elevator_t iosched_as = { .elevator_latter_req_fn = as_latter_request, .elevator_set_req_fn = as_set_request, .elevator_put_req_fn = as_put_request, + .elevator_may_queue_fn = as_may_queue, .elevator_init_fn = as_init, .elevator_exit_fn = as_exit, diff --git a/drivers/block/elevator.c b/drivers/block/elevator.c index 89af76783943..485561a037fc 100644 --- a/drivers/block/elevator.c +++ b/drivers/block/elevator.c @@ -368,7 +368,7 @@ int elv_may_queue(request_queue_t *q, int rw) if (e->elevator_may_queue_fn) return e->elevator_may_queue_fn(q, rw); - return 1; + return 0; } void elv_completed_request(request_queue_t *q, struct request *rq) diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c index 34cd5440d4ab..add1bf6130f1 100644 --- a/drivers/block/ll_rw_blk.c +++ b/drivers/block/ll_rw_blk.c @@ -1315,7 +1315,7 @@ static struct request *get_request(request_queue_t *q, int rw, int gfp_mask) struct request_list *rl = &q->rq; spin_lock_irq(q->queue_lock); - if (rl->count[rw] >= q->nr_requests || !elv_may_queue(q, rw)) { + if (rl->count[rw] >= q->nr_requests && !elv_may_queue(q, rw)) { spin_unlock_irq(q->queue_lock); goto out; } -- cgit v1.2.3 From f67198fbeb5ac1cf51f124791db1f8d1b7a04b85 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 4 Jul 2003 19:37:05 -0700 Subject: [PATCH] handle OOM in get_request_wait() From: Nick Piggin If there are no requess in flight against the target device and get_request() fails, nothing will wake us up. Fix. --- drivers/block/ll_rw_blk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c index add1bf6130f1..57daaf4aea9d 100644 --- a/drivers/block/ll_rw_blk.c +++ b/drivers/block/ll_rw_blk.c @@ -1381,7 +1381,7 @@ static struct request *get_request_wait(request_queue_t *q, int rw) * no wakeup will be delivered. So now we're on the * waitqueue, go check for that. */ - rq = get_request(q, rw, GFP_ATOMIC & ~__GFP_HIGH); + rq = get_request(q, rw, GFP_NOIO); if (!rq) io_schedule(); finish_wait(&rl->wait[rw], &wait); -- cgit v1.2.3 From 80af89ca709d4dfe41178abe29217a0fefa1af12 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 4 Jul 2003 19:37:12 -0700 Subject: [PATCH] block batching fairness From: Nick Piggin This patch fixes the request batching fairness/starvation issue. Its not clear what is going on with 2.4, but it seems that its a problem around this area. Anyway, previously: * request queue fills up * process 1 calls get_request, sleeps * a couple of requests are freed * process 2 calls get_request, proceeds * a couple of requests are freed * process 2 calls get_request... Now as unlikely as it seems, it could be a problem. Its a fairness problem that process 2 can skip ahead of process 1 anyway. With the patch: * request queue fills up * any process calling get_request will sleep * once the queue gets below the batch watermark, processes start being worken, and may allocate. This patch includes Chris Mason's fix to only clear queue_full when all tasks have been woken. Previously I think starvation and unfairness could still occur. With this change to the blk-fair-batches patch, Chris is showing some much improved numbers for 2.4 - 170 ms max wait vs 2700ms without blk-fair-batches for a dbench 90 run. He didn't indicate how much difference his patch alone made, but it is an important fix I think. --- drivers/block/ll_rw_blk.c | 75 +++++++++++++++++++++++++++++++---------------- include/linux/blkdev.h | 26 ++++++++++++++++ 2 files changed, 75 insertions(+), 26 deletions(-) diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c index 57daaf4aea9d..f7981c1fa3e6 100644 --- a/drivers/block/ll_rw_blk.c +++ b/drivers/block/ll_rw_blk.c @@ -53,7 +53,7 @@ unsigned long blk_max_low_pfn, blk_max_pfn; static inline int batch_requests(struct request_queue *q) { - return q->nr_requests - min(q->nr_requests / 8, 8UL); + return q->nr_requests - min(q->nr_requests / 8, 8UL) - 1; } /* @@ -1309,13 +1309,16 @@ static inline struct request *blk_alloc_request(request_queue_t *q,int gfp_mask) /* * Get a free request, queue_lock must not be held */ -static struct request *get_request(request_queue_t *q, int rw, int gfp_mask) +static struct request * +get_request(request_queue_t *q, int rw, int gfp_mask, int force) { struct request *rq = NULL; struct request_list *rl = &q->rq; spin_lock_irq(q->queue_lock); - if (rl->count[rw] >= q->nr_requests && !elv_may_queue(q, rw)) { + if (rl->count[rw] == q->nr_requests) + blk_set_queue_full(q, rw); + if (blk_queue_full(q, rw) && !force && !elv_may_queue(q, rw)) { spin_unlock_irq(q->queue_lock); goto out; } @@ -1330,6 +1333,14 @@ static struct request *get_request(request_queue_t *q, int rw, int gfp_mask) rl->count[rw]--; if (rl->count[rw] < queue_congestion_off_threshold(q)) clear_queue_congested(q, rw); + + if (rl->count[rw] <= batch_requests(q)) { + if (waitqueue_active(&rl->wait[rw])) + wake_up(&rl->wait[rw]); + else + blk_clear_queue_full(q, rw); + } + spin_unlock_irq(q->queue_lock); goto out; } @@ -1366,26 +1377,22 @@ static struct request *get_request_wait(request_queue_t *q, int rw) { DEFINE_WAIT(wait); struct request *rq; + int waited = 0; generic_unplug_device(q); do { - rq = get_request(q, rw, GFP_NOIO); + struct request_list *rl = &q->rq; - if (!rq) { - struct request_list *rl = &q->rq; + prepare_to_wait_exclusive(&rl->wait[rw], &wait, + TASK_UNINTERRUPTIBLE); - prepare_to_wait_exclusive(&rl->wait[rw], &wait, - TASK_UNINTERRUPTIBLE); - /* - * If _all_ the requests were suddenly returned then - * no wakeup will be delivered. So now we're on the - * waitqueue, go check for that. - */ - rq = get_request(q, rw, GFP_NOIO); - if (!rq) - io_schedule(); - finish_wait(&rl->wait[rw], &wait); + rq = get_request(q, rw, GFP_NOIO, waited); + + if (!rq) { + io_schedule(); + waited = 1; } + finish_wait(&rl->wait[rw], &wait); } while (!rq); return rq; @@ -1397,10 +1404,10 @@ struct request *blk_get_request(request_queue_t *q, int rw, int gfp_mask) BUG_ON(rw != READ && rw != WRITE); - rq = get_request(q, rw, gfp_mask); - - if (!rq && (gfp_mask & __GFP_WAIT)) + if (gfp_mask & __GFP_WAIT) rq = get_request_wait(q, rw); + else + rq = get_request(q, rw, gfp_mask, 0); return rq; } @@ -1551,9 +1558,13 @@ void __blk_put_request(request_queue_t *q, struct request *req) rl->count[rw]--; if (rl->count[rw] < queue_congestion_off_threshold(q)) clear_queue_congested(q, rw); - if (rl->count[rw] < batch_requests(q) && - waitqueue_active(&rl->wait[rw])) - wake_up(&rl->wait[rw]); + + if (rl->count[rw] <= batch_requests(q)) { + if (waitqueue_active(&rl->wait[rw])) + wake_up(&rl->wait[rw]); + else + blk_clear_queue_full(q, rw); + } } } @@ -1796,7 +1807,7 @@ get_rq: freereq = NULL; } else { spin_unlock_irq(q->queue_lock); - if ((freereq = get_request(q, rw, GFP_ATOMIC)) == NULL) { + if ((freereq = get_request(q, rw, GFP_ATOMIC, 0)) == NULL) { /* * READA bit set */ @@ -1904,8 +1915,7 @@ static inline void blk_partition_remap(struct bio *bio) * bio happens to be merged with someone else, and may change bi_dev and * bi_sector for remaps as it sees fit. So the values of these fields * should NOT be depended on after the call to generic_make_request. - * - * */ + */ void generic_make_request(struct bio *bio) { request_queue_t *q; @@ -2415,6 +2425,19 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count) else if (rl->count[WRITE] < queue_congestion_off_threshold(q)) clear_queue_congested(q, WRITE); + if (rl->count[READ] >= q->nr_requests) { + blk_set_queue_full(q, READ); + } else if (rl->count[READ] <= batch_requests(q)) { + blk_clear_queue_full(q, READ); + wake_up_all(&rl->wait[READ]); + } + + if (rl->count[WRITE] >= q->nr_requests) { + blk_set_queue_full(q, WRITE); + } else if (rl->count[WRITE] <= batch_requests(q)) { + blk_clear_queue_full(q, WRITE); + wake_up_all(&rl->wait[WRITE]); + } return ret; } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 4295d60bf661..d3a8f6ecd806 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -307,6 +307,8 @@ struct request_queue #define QUEUE_FLAG_CLUSTER 0 /* cluster several segments into 1 */ #define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ #define QUEUE_FLAG_STOPPED 2 /* queue is stopped */ +#define QUEUE_FLAG_READFULL 3 /* write queue has been filled */ +#define QUEUE_FLAG_WRITEFULL 4 /* read queue has been filled */ #define blk_queue_plugged(q) !list_empty(&(q)->plug_list) #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) @@ -322,6 +324,30 @@ struct request_queue #define rq_data_dir(rq) ((rq)->flags & 1) +static inline int blk_queue_full(struct request_queue *q, int rw) +{ + if (rw == READ) + return test_bit(QUEUE_FLAG_READFULL, &q->queue_flags); + return test_bit(QUEUE_FLAG_WRITEFULL, &q->queue_flags); +} + +static inline void blk_set_queue_full(struct request_queue *q, int rw) +{ + if (rw == READ) + set_bit(QUEUE_FLAG_READFULL, &q->queue_flags); + else + set_bit(QUEUE_FLAG_WRITEFULL, &q->queue_flags); +} + +static inline void blk_clear_queue_full(struct request_queue *q, int rw) +{ + if (rw == READ) + clear_bit(QUEUE_FLAG_READFULL, &q->queue_flags); + else + clear_bit(QUEUE_FLAG_WRITEFULL, &q->queue_flags); +} + + /* * mergeable request must not have _NOMERGE or _BARRIER bit set, nor may * it already be started by driver. -- cgit v1.2.3 From 16f88dbdbffa3dc52b959706e6a311a932b51ed6 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 4 Jul 2003 19:37:19 -0700 Subject: [PATCH] generic io contexts From: Nick Piggin Generalise the AS-specific per-process IO context so that other IO schedulers could use it. --- drivers/block/as-iosched.c | 254 +++++++++++++++++---------------------------- drivers/block/ll_rw_blk.c | 88 ++++++++++++++++ include/linux/blkdev.h | 44 ++++++++ include/linux/sched.h | 6 +- kernel/exit.c | 4 +- kernel/fork.c | 2 +- 6 files changed, 233 insertions(+), 165 deletions(-) diff --git a/drivers/block/as-iosched.c b/drivers/block/as-iosched.c index 2e5e64fb3b39..d63c92dfcf96 100644 --- a/drivers/block/as-iosched.c +++ b/drivers/block/as-iosched.c @@ -59,14 +59,6 @@ */ #define default_antic_expire ((HZ / 150) ? HZ / 150 : 1) -/* - * This is the per-process anticipatory I/O scheduler state. It is refcounted - * and kmalloc'ed. - * - * There is no locking protecting the contents of this structure! Pointers - * to a single as_io_context may appear in multiple queues at once. - */ - /* * Keep track of up to 20ms thinktimes. We can go as big as we like here, * however huge values tend to interfere and not decay fast enough. A program @@ -82,28 +74,6 @@ enum as_io_states { AS_TASK_IORUNNING, /* Process has completed some IO */ }; -struct as_io_context { - atomic_t refcount; - pid_t pid; - unsigned long state; - atomic_t nr_queued; /* queued reads & sync writes */ - atomic_t nr_dispatched; /* number of requests gone to the drivers */ - - spinlock_t lock; - - /* IO History tracking */ - /* Thinktime */ - unsigned long last_end_request; - unsigned long ttime_total; - unsigned long ttime_samples; - unsigned long ttime_mean; - /* Layout pattern */ - long seek_samples; - sector_t last_request_pos; - sector_t seek_total; - sector_t seek_mean; -}; - enum anticipation_status { ANTIC_OFF=0, /* Not anticipating (normal operation) */ ANTIC_WAIT_REQ, /* The last read has not yet completed */ @@ -144,8 +114,8 @@ struct as_data { unsigned long antic_start; /* jiffies: when it started */ struct timer_list antic_timer; /* anticipatory scheduling timer */ struct work_struct antic_work; /* Deferred unplugging */ - struct as_io_context *as_io_context;/* Identify the expected process */ - int aic_finished; /* IO associated with as_io_context finished */ + struct io_context *io_context; /* Identify the expected process */ + int ioc_finished; /* IO associated with io_context is finished */ int nr_dispatched; /* @@ -178,7 +148,7 @@ struct as_rq { struct request *request; - struct as_io_context *as_io_context; /* The submitting task */ + struct io_context *io_context; /* The submitting task */ /* * request hash, key is the ending offset (for back merge lookup) @@ -206,99 +176,55 @@ static kmem_cache_t *arq_pool; /* Debug */ static atomic_t nr_as_io_requests = ATOMIC_INIT(0); -static void put_as_io_context(struct as_io_context **paic) +/* Called to deallocate the as_io_context */ +static void free_as_io_context(struct as_io_context *aic) { - struct as_io_context *aic = *paic; - - if (aic == NULL) - return; - - BUG_ON(atomic_read(&aic->refcount) == 0); - - if (atomic_dec_and_test(&aic->refcount)) { - WARN_ON(atomic_read(&nr_as_io_requests) == 0); - atomic_dec(&nr_as_io_requests); - kfree(aic); - } + atomic_dec(&nr_as_io_requests); + kfree(aic); } -/* Called by the exitting task */ -void exit_as_io_context(void) +/* Called when the task exits */ +static void exit_as_io_context(struct as_io_context *aic) { - unsigned long flags; - struct as_io_context *aic; - - local_irq_save(flags); - aic = current->as_io_context; - if (aic) { - clear_bit(AS_TASK_RUNNING, &aic->state); - put_as_io_context(&aic); - current->as_io_context = NULL; - } - local_irq_restore(flags); + clear_bit(AS_TASK_RUNNING, &aic->state); } -/* - * If the current task has no IO context then create one and initialise it. - * If it does have a context, take a ref on it. - * - * This is always called in the context of the task which submitted the I/O. - * But weird things happen, so we disable local interrupts to ensure exclusive - * access to *current. - */ -static struct as_io_context *get_as_io_context(void) +static struct as_io_context *alloc_as_io_context(void) { - struct task_struct *tsk = current; - unsigned long flags; struct as_io_context *ret; - local_irq_save(flags); - ret = tsk->as_io_context; - if (ret == NULL) { - ret = kmalloc(sizeof(*ret), GFP_ATOMIC); - if (ret) { - atomic_inc(&nr_as_io_requests); - atomic_set(&ret->refcount, 1); - ret->pid = tsk->pid; - ret->state = 1 << AS_TASK_RUNNING; - atomic_set(&ret->nr_queued, 0); - atomic_set(&ret->nr_dispatched, 0); - spin_lock_init(&ret->lock); - ret->ttime_total = 0; - ret->ttime_samples = 0; - ret->ttime_mean = 0; - ret->seek_total = 0; - ret->seek_samples = 0; - ret->seek_mean = 0; - tsk->as_io_context = ret; - } + ret = kmalloc(sizeof(*ret), GFP_ATOMIC); + if (ret) { + atomic_inc(&nr_as_io_requests); + ret->dtor = free_as_io_context; + ret->exit = exit_as_io_context; + ret->state = 1 << AS_TASK_RUNNING; + atomic_set(&ret->nr_queued, 0); + atomic_set(&ret->nr_dispatched, 0); + spin_lock_init(&ret->lock); + ret->ttime_total = 0; + ret->ttime_samples = 0; + ret->ttime_mean = 0; + ret->seek_total = 0; + ret->seek_samples = 0; + ret->seek_mean = 0; } - local_irq_restore(flags); - atomic_inc(&ret->refcount); + return ret; } -static void -copy_as_io_context(struct as_io_context **pdst, struct as_io_context **psrc) +/* + * If the current task has no AS IO context then create one and initialise it. + * Then take a ref on the task's io context and return it. + */ +static struct io_context *as_get_io_context(void) { - struct as_io_context *src = *psrc; - - if (src) { - BUG_ON(atomic_read(&src->refcount) == 0); - atomic_inc(&src->refcount); - put_as_io_context(pdst); - *pdst = src; - } + struct io_context *ioc = get_io_context(); + if (ioc && !ioc->aic) + ioc->aic = alloc_as_io_context(); + return ioc; } -static void -swap_as_io_context(struct as_io_context **aic1, struct as_io_context **aic2) -{ - struct as_io_context *temp; - temp = *aic1; - *aic1 = *aic2; - *aic2 = temp; -} /* * the back merge hash support functions @@ -662,7 +588,7 @@ static void as_antic_waitreq(struct as_data *ad) { BUG_ON(ad->antic_status == ANTIC_FINISHED); if (ad->antic_status == ANTIC_OFF) { - if (!ad->as_io_context || ad->aic_finished) + if (!ad->io_context || ad->ioc_finished) as_antic_waitnext(ad); else ad->antic_status = ANTIC_WAIT_REQ; @@ -715,7 +641,7 @@ static int as_close_req(struct as_data *ad, struct as_rq *arq) sector_t next = arq->request->sector; sector_t delta; /* acceptable close offset (in sectors) */ - if (ad->antic_status == ANTIC_OFF || !ad->aic_finished) + if (ad->antic_status == ANTIC_OFF || !ad->ioc_finished) delay = 0; else delay = ((jiffies - ad->antic_start) * 1000) / HZ; @@ -745,6 +671,7 @@ static int as_close_req(struct as_data *ad, struct as_rq *arq) */ static int as_can_break_anticipation(struct as_data *ad, struct as_rq *arq) { + struct io_context *ioc; struct as_io_context *aic; if (arq && arq->is_sync == REQ_SYNC && as_close_req(ad, arq)) { @@ -752,7 +679,7 @@ static int as_can_break_anticipation(struct as_data *ad, struct as_rq *arq) return 1; } - if (ad->aic_finished && as_antic_expired(ad)) { + if (ad->ioc_finished && as_antic_expired(ad)) { /* * In this situation status should really be FINISHED, * however the timer hasn't had the chance to run yet. @@ -760,14 +687,18 @@ static int as_can_break_anticipation(struct as_data *ad, struct as_rq *arq) return 1; } - aic = ad->as_io_context; - BUG_ON(!aic); + ioc = ad->io_context; + BUG_ON(!ioc); - if (arq && aic == arq->as_io_context) { + if (arq && ioc == arq->io_context) { /* request from same process */ return 1; } + aic = ioc->aic; + if (!aic) + return 0; + if (!test_bit(AS_TASK_RUNNING, &aic->state)) { /* process anticipated on has exitted */ return 1; @@ -810,7 +741,7 @@ static int as_can_break_anticipation(struct as_data *ad, struct as_rq *arq) */ static int as_can_anticipate(struct as_data *ad, struct as_rq *arq) { - if (!ad->as_io_context) + if (!ad->io_context) /* * Last request submitted was a write */ @@ -973,12 +904,10 @@ static void as_completed_request(request_queue_t *q, struct request *rq) { struct as_data *ad = q->elevator.elevator_data; struct as_rq *arq = RQ_DATA(rq); - struct as_io_context *aic = arq->as_io_context; + struct as_io_context *aic; - if (unlikely(!blk_fs_request(rq))) { - WARN_ON(aic); + if (unlikely(!blk_fs_request(rq))) return; - } WARN_ON(blk_fs_request(rq) && arq->state == AS_RQ_NEW); @@ -1004,18 +933,12 @@ static void as_completed_request(request_queue_t *q, struct request *rq) ad->changed_batch = 0; } - if (!aic) + if (!arq->io_context) return; - spin_lock(&aic->lock); - if (arq->is_sync == REQ_SYNC) { - set_bit(AS_TASK_IORUNNING, &aic->state); - aic->last_end_request = jiffies; - } - - if (ad->as_io_context == aic) { + if (ad->io_context == arq->io_context) { ad->antic_start = jiffies; - ad->aic_finished = 1; + ad->ioc_finished = 1; if (ad->antic_status == ANTIC_WAIT_REQ) { /* * We were waiting on this request, now anticipate @@ -1024,9 +947,19 @@ static void as_completed_request(request_queue_t *q, struct request *rq) as_antic_waitnext(ad); } } + + aic = arq->io_context->aic; + if (!aic) + return; + + spin_lock(&aic->lock); + if (arq->is_sync == REQ_SYNC) { + set_bit(AS_TASK_IORUNNING, &aic->state); + aic->last_end_request = jiffies; + } spin_unlock(&aic->lock); - put_as_io_context(&arq->as_io_context); + put_io_context(arq->io_context); } /* @@ -1047,9 +980,9 @@ static void as_remove_queued_request(request_queue_t *q, struct request *rq) WARN_ON(arq->state != AS_RQ_QUEUED); - if (arq->as_io_context) { - BUG_ON(!atomic_read(&arq->as_io_context->nr_queued)); - atomic_dec(&arq->as_io_context->nr_queued); + if (arq->io_context && arq->io_context->aic) { + BUG_ON(!atomic_read(&arq->io_context->aic->nr_queued)); + atomic_dec(&arq->io_context->aic->nr_queued); } /* @@ -1082,10 +1015,12 @@ static void as_remove_dispatched_request(request_queue_t *q, struct request *rq) WARN_ON(arq->state != AS_RQ_DISPATCHED); WARN_ON(ON_RB(&arq->rb_node)); - aic = arq->as_io_context; - if (aic) { - WARN_ON(!atomic_read(&aic->nr_dispatched)); - atomic_dec(&aic->nr_dispatched); + if (arq->io_context && arq->io_context->aic) { + aic = arq->io_context->aic; + if (aic) { + WARN_ON(!atomic_read(&aic->nr_dispatched)); + atomic_dec(&aic->nr_dispatched); + } } } /* @@ -1180,17 +1115,17 @@ static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq) if (data_dir == REQ_SYNC) { /* In case we have to anticipate after this */ - copy_as_io_context(&ad->as_io_context, &arq->as_io_context); + copy_io_context(&ad->io_context, &arq->io_context); } else { - if (ad->as_io_context) { - put_as_io_context(&ad->as_io_context); - ad->as_io_context = NULL; + if (ad->io_context) { + put_io_context(ad->io_context); + ad->io_context = NULL; } if (ad->current_write_count != 0) ad->current_write_count--; } - ad->aic_finished = 0; + ad->ioc_finished = 0; ad->next_arq[data_dir] = as_find_next_arq(ad, arq); @@ -1199,8 +1134,8 @@ static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq) */ as_remove_queued_request(ad->q, arq->request); list_add_tail(&arq->request->queuelist, ad->dispatch); - if (arq->as_io_context) - atomic_inc(&arq->as_io_context->nr_dispatched); + if (arq->io_context && arq->io_context->aic) + atomic_inc(&arq->io_context->aic->nr_dispatched); WARN_ON(arq->state != AS_RQ_QUEUED); arq->state = AS_RQ_DISPATCHED; @@ -1355,11 +1290,11 @@ static void as_add_request(struct as_data *ad, struct as_rq *arq) arq->is_sync = 0; data_dir = arq->is_sync; - arq->as_io_context = get_as_io_context(); + arq->io_context = as_get_io_context(); - if (arq->as_io_context) { - atomic_inc(&arq->as_io_context->nr_queued); - as_update_iohist(arq->as_io_context, arq->request); + if (arq->io_context && arq->io_context->aic) { + atomic_inc(&arq->io_context->aic->nr_queued); + as_update_iohist(arq->io_context->aic, arq->request); } as_add_arq_rb(ad, arq); @@ -1575,8 +1510,7 @@ as_merged_requests(request_queue_t *q, struct request *req, * Don't copy here but swap, because when anext is * removed below, it must contain the unused context */ - swap_as_io_context(&arq->as_io_context, - &anext->as_io_context); + swap_io_context(&arq->io_context, &anext->io_context); } } @@ -1584,7 +1518,7 @@ as_merged_requests(request_queue_t *q, struct request *req, * kill knowledge of next, this one is a goner */ as_remove_queued_request(q, next); - put_as_io_context(&anext->as_io_context); + put_io_context(anext->io_context); } /* @@ -1630,7 +1564,7 @@ static int as_set_request(request_queue_t *q, struct request *rq, int gfp_mask) RB_CLEAR(&arq->rb_node); arq->request = rq; arq->state = AS_RQ_NEW; - arq->as_io_context = NULL; + arq->io_context = NULL; INIT_LIST_HEAD(&arq->hash); arq->hash_valid_count = 0; INIT_LIST_HEAD(&arq->fifo); @@ -1643,16 +1577,18 @@ static int as_set_request(request_queue_t *q, struct request *rq, int gfp_mask) static int as_may_queue(request_queue_t *q, int rw) { + int ret = 0; struct as_data *ad = q->elevator.elevator_data; - struct as_io_context *aic; + struct io_context *ioc; if (ad->antic_status == ANTIC_WAIT_REQ || ad->antic_status == ANTIC_WAIT_NEXT) { - aic = get_as_io_context(); - if (ad->as_io_context == aic) - return 1; + ioc = as_get_io_context(); + if (ad->io_context == ioc) + ret = 1; + put_io_context(ioc); } - return 0; + return ret; } static void as_exit(request_queue_t *q, elevator_t *e) @@ -1666,7 +1602,7 @@ static void as_exit(request_queue_t *q, elevator_t *e) BUG_ON(!list_empty(&ad->fifo_list[REQ_ASYNC])); mempool_destroy(ad->arq_pool); - put_as_io_context(&ad->as_io_context); + put_io_context(ad->io_context); kfree(ad->hash); kfree(ad); } diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c index f7981c1fa3e6..8f44b5690d9a 100644 --- a/drivers/block/ll_rw_blk.c +++ b/drivers/block/ll_rw_blk.c @@ -1318,6 +1318,7 @@ get_request(request_queue_t *q, int rw, int gfp_mask, int force) spin_lock_irq(q->queue_lock); if (rl->count[rw] == q->nr_requests) blk_set_queue_full(q, rw); + if (blk_queue_full(q, rw) && !force && !elv_may_queue(q, rw)) { spin_unlock_irq(q->queue_lock); goto out; @@ -2377,6 +2378,93 @@ int __init blk_dev_init(void) return 0; } + +/* + * IO Context helper functions + */ +void put_io_context(struct io_context *ioc) +{ + if (ioc == NULL) + return; + + BUG_ON(atomic_read(&ioc->refcount) == 0); + + if (atomic_dec_and_test(&ioc->refcount)) { + if (ioc->aic && ioc->aic->dtor) + ioc->aic->dtor(ioc->aic); + kfree(ioc); + } +} + +/* Called by the exitting task */ +void exit_io_context(void) +{ + unsigned long flags; + struct io_context *ioc; + + local_irq_save(flags); + ioc = current->io_context; + if (ioc) { + if (ioc->aic && ioc->aic->exit) + ioc->aic->exit(ioc->aic); + put_io_context(ioc); + current->io_context = NULL; + } + local_irq_restore(flags); +} + +/* + * If the current task has no IO context then create one and initialise it. + * If it does have a context, take a ref on it. + * + * This is always called in the context of the task which submitted the I/O. + * But weird things happen, so we disable local interrupts to ensure exclusive + * access to *current. + */ +struct io_context *get_io_context(void) +{ + struct task_struct *tsk = current; + unsigned long flags; + struct io_context *ret; + + local_irq_save(flags); + ret = tsk->io_context; + if (ret == NULL) { + ret = kmalloc(sizeof(*ret), GFP_ATOMIC); + if (ret) { + atomic_set(&ret->refcount, 1); + ret->pid = tsk->pid; + ret->aic = NULL; + tsk->io_context = ret; + } + } + local_irq_restore(flags); + atomic_inc(&ret->refcount); + return ret; +} + +void copy_io_context(struct io_context **pdst, struct io_context **psrc) +{ + struct io_context *src = *psrc; + struct io_context *dst = *pdst; + + if (src) { + BUG_ON(atomic_read(&src->refcount) == 0); + atomic_inc(&src->refcount); + put_io_context(dst); + *pdst = src; + } +} + +void swap_io_context(struct io_context **ioc1, struct io_context **ioc2) +{ + struct io_context *temp; + temp = *ioc1; + *ioc1 = *ioc2; + *ioc2 = temp; +} + + /* * sysfs parts below */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d3a8f6ecd806..13116a7a7969 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -24,6 +24,50 @@ struct request_pm_state; #define BLKDEV_MIN_RQ 4 #define BLKDEV_MAX_RQ 128 /* Default maximum */ +/* + * This is the per-process anticipatory I/O scheduler state. + */ +struct as_io_context { + spinlock_t lock; + + void (*dtor)(struct as_io_context *aic); /* destructor */ + void (*exit)(struct as_io_context *aic); /* called on task exit */ + + unsigned long state; + atomic_t nr_queued; /* queued reads & sync writes */ + atomic_t nr_dispatched; /* number of requests gone to the drivers */ + + /* IO History tracking */ + /* Thinktime */ + unsigned long last_end_request; + unsigned long ttime_total; + unsigned long ttime_samples; + unsigned long ttime_mean; + /* Layout pattern */ + long seek_samples; + sector_t last_request_pos; + sector_t seek_total; + sector_t seek_mean; +}; + +/* + * This is the per-process I/O subsystem state. It is refcounted and + * kmalloc'ed. Currently all fields are modified in process io context + * (apart from the atomic refcount), so require no locking. + */ +struct io_context { + atomic_t refcount; + pid_t pid; + + struct as_io_context *aic; +}; + +void put_io_context(struct io_context *ioc); +void exit_io_context(void); +struct io_context *get_io_context(void); +void copy_io_context(struct io_context **pdst, struct io_context **psrc); +void swap_io_context(struct io_context **ioc1, struct io_context **ioc2); + struct request_list { int count[2]; mempool_t *rq_pool; diff --git a/include/linux/sched.h b/include/linux/sched.h index e29f9606c2aa..750f2a12cada 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -321,8 +321,8 @@ struct k_itimer { }; -struct as_io_context; /* Anticipatory scheduler */ -void exit_as_io_context(void); +struct io_context; /* See blkdev.h */ +void exit_io_context(void); struct task_struct { volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ @@ -452,7 +452,7 @@ struct task_struct { struct dentry *proc_dentry; struct backing_dev_info *backing_dev_info; - struct as_io_context *as_io_context; + struct io_context *io_context; unsigned long ptrace_message; siginfo_t *last_siginfo; /* For ptrace use. */ diff --git a/kernel/exit.c b/kernel/exit.c index 8471381546af..ebc839b645a7 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -682,8 +682,8 @@ NORET_TYPE void do_exit(long code) panic("Attempted to kill the idle task!"); if (unlikely(tsk->pid == 1)) panic("Attempted to kill init!"); - if (tsk->as_io_context) - exit_as_io_context(); + if (tsk->io_context) + exit_io_context(); tsk->flags |= PF_EXITING; del_timer_sync(&tsk->real_timer); diff --git a/kernel/fork.c b/kernel/fork.c index fcdc884cd894..96ce3385cc75 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -864,7 +864,7 @@ struct task_struct *copy_process(unsigned long clone_flags, p->lock_depth = -1; /* -1 = no lock */ p->start_time = get_jiffies_64(); p->security = NULL; - p->as_io_context = NULL; + p->io_context = NULL; retval = -ENOMEM; if ((retval = security_task_alloc(p))) -- cgit v1.2.3 From 930805a244eaadb5aefbc08b558db72136128388 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 4 Jul 2003 19:37:26 -0700 Subject: [PATCH] block request batching From: Nick Piggin The following patch gets batching working how it should be. After a process is woken up, it is allowed to allocate up to 32 requests for 20ms. It does not stop other processes submitting requests if it isn't submitting though. This should allow less context switches, and allow batches of requests from each process to be sent to the io scheduler instead of 1 request from each process. tiobench sequential writes are more than tripled, random writes are nearly doubled over mm1. In earlier tests I generally saw better CPU efficiency but it doesn't show here. There is still debug to be taken out. Its also only on UP. Avg Maximum Lat% Lat% CPU Identifier Rate (CPU%) Latency Latency >2s >10s Eff ------------------- ------ --------- ---------- ------- ------ ---- -2.5.71-mm1 11.13 3.783% 46.10 24668.01 0.84 0.02 294 +2.5.71-mm1 13.21 4.489% 37.37 5691.66 0.76 0.00 294 Random Reads ------------------- ------ --------- ---------- ------- ------ ---- -2.5.71-mm1 0.97 0.582% 519.86 6444.66 11.93 0.00 167 +2.5.71-mm1 1.01 0.604% 484.59 6604.93 10.73 0.00 167 Sequential Writes ------------------- ------ --------- ---------- ------- ------ ---- -2.5.71-mm1 4.85 4.456% 77.80 99359.39 0.18 0.13 109 +2.5.71-mm1 14.11 14.19% 10.07 22805.47 0.09 0.04 99 Random Writes ------------------- ------ --------- ---------- ------- ------ ---- -2.5.71-mm1 0.46 0.371% 14.48 6173.90 0.23 0.00 125 +2.5.71-mm1 0.86 0.744% 24.08 8753.66 0.31 0.00 115 It decreases context switch rate on IBM's 8-way on ext2 tiobench 64 threads from ~2500/s to ~140/s on their regression tests. --- drivers/block/ll_rw_blk.c | 132 ++++++++++++++++++++++++++++++---------------- include/linux/blkdev.h | 6 +++ 2 files changed, 94 insertions(+), 44 deletions(-) diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c index 8f44b5690d9a..633266ee8c87 100644 --- a/drivers/block/ll_rw_blk.c +++ b/drivers/block/ll_rw_blk.c @@ -51,10 +51,11 @@ static struct workqueue_struct *kblockd_workqueue; unsigned long blk_max_low_pfn, blk_max_pfn; -static inline int batch_requests(struct request_queue *q) -{ - return q->nr_requests - min(q->nr_requests / 8, 8UL) - 1; -} +/* Amount of time in which a process may batch requests */ +#define BLK_BATCH_TIME (HZ/50UL) + +/* Number of requests a "batching" process may submit */ +#define BLK_BATCH_REQ 32 /* * Return the threshold (number of used requests) at which the queue is @@ -1305,24 +1306,76 @@ static inline struct request *blk_alloc_request(request_queue_t *q,int gfp_mask) return NULL; } +/* + * ioc_batching returns true if the ioc is a valid batching request and + * should be given priority access to a request. + */ +static inline int ioc_batching(struct io_context *ioc) +{ + if (!ioc) + return 0; + + return ioc->nr_batch_requests == BLK_BATCH_REQ || + (ioc->nr_batch_requests > 0 + && time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME)); +} + +/* + * ioc_set_batching sets ioc to be a new "batcher" if it is not one + */ +void ioc_set_batching(struct io_context *ioc) +{ + if (!ioc || ioc_batching(ioc)) + return; + + ioc->nr_batch_requests = BLK_BATCH_REQ; + ioc->last_waited = jiffies; +} + +/* + * A request has just been released. Account for it, update the full and + * congestion status, wake up any waiters. Called under q->queue_lock. + */ +static void freed_request(request_queue_t *q, int rw) +{ + struct request_list *rl = &q->rq; + + rl->count[rw]--; + if (rl->count[rw] < queue_congestion_off_threshold(q)) + clear_queue_congested(q, rw); + if (rl->count[rw]+1 <= q->nr_requests) { + smp_mb(); + if (waitqueue_active(&rl->wait[rw])) + wake_up(&rl->wait[rw]); + if (!waitqueue_active(&rl->wait[rw])) + blk_clear_queue_full(q, rw); + } +} + #define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist) /* * Get a free request, queue_lock must not be held */ -static struct request * -get_request(request_queue_t *q, int rw, int gfp_mask, int force) +static struct request *get_request(request_queue_t *q, int rw, int gfp_mask) { struct request *rq = NULL; struct request_list *rl = &q->rq; + struct io_context *ioc = get_io_context(); spin_lock_irq(q->queue_lock); - if (rl->count[rw] == q->nr_requests) - blk_set_queue_full(q, rw); + if (rl->count[rw]+1 >= q->nr_requests) { + if (!blk_queue_full(q, rw)) { + ioc_set_batching(ioc); + blk_set_queue_full(q, rw); + } + } - if (blk_queue_full(q, rw) && !force && !elv_may_queue(q, rw)) { + if (blk_queue_full(q, rw) + && !ioc_batching(ioc) && !elv_may_queue(q, rw)) { spin_unlock_irq(q->queue_lock); goto out; } + rl->count[rw]++; if (rl->count[rw] >= queue_congestion_on_threshold(q)) set_queue_congested(q, rw); @@ -1331,20 +1384,13 @@ get_request(request_queue_t *q, int rw, int gfp_mask, int force) rq = blk_alloc_request(q, gfp_mask); if (!rq) { spin_lock_irq(q->queue_lock); - rl->count[rw]--; - if (rl->count[rw] < queue_congestion_off_threshold(q)) - clear_queue_congested(q, rw); - - if (rl->count[rw] <= batch_requests(q)) { - if (waitqueue_active(&rl->wait[rw])) - wake_up(&rl->wait[rw]); - else - blk_clear_queue_full(q, rw); - } - + freed_request(q, rw); spin_unlock_irq(q->queue_lock); goto out; } + + if (ioc_batching(ioc)) + ioc->nr_batch_requests--; INIT_LIST_HEAD(&rq->queuelist); @@ -1367,6 +1413,7 @@ get_request(request_queue_t *q, int rw, int gfp_mask, int force) rq->sense = NULL; out: + put_io_context(ioc); return rq; } @@ -1378,7 +1425,6 @@ static struct request *get_request_wait(request_queue_t *q, int rw) { DEFINE_WAIT(wait); struct request *rq; - int waited = 0; generic_unplug_device(q); do { @@ -1387,11 +1433,15 @@ static struct request *get_request_wait(request_queue_t *q, int rw) prepare_to_wait_exclusive(&rl->wait[rw], &wait, TASK_UNINTERRUPTIBLE); - rq = get_request(q, rw, GFP_NOIO, waited); + rq = get_request(q, rw, GFP_NOIO); if (!rq) { + struct io_context *ioc; + io_schedule(); - waited = 1; + ioc = get_io_context(); + ioc_set_batching(ioc); + put_io_context(ioc); } finish_wait(&rl->wait[rw], &wait); } while (!rq); @@ -1408,7 +1458,7 @@ struct request *blk_get_request(request_queue_t *q, int rw, int gfp_mask) if (gfp_mask & __GFP_WAIT) rq = get_request_wait(q, rw); else - rq = get_request(q, rw, gfp_mask, 0); + rq = get_request(q, rw, gfp_mask); return rq; } @@ -1555,17 +1605,7 @@ void __blk_put_request(request_queue_t *q, struct request *req) BUG_ON(!list_empty(&req->queuelist)); blk_free_request(q, req); - - rl->count[rw]--; - if (rl->count[rw] < queue_congestion_off_threshold(q)) - clear_queue_congested(q, rw); - - if (rl->count[rw] <= batch_requests(q)) { - if (waitqueue_active(&rl->wait[rw])) - wake_up(&rl->wait[rw]); - else - blk_clear_queue_full(q, rw); - } + freed_request(q, rw); } } @@ -1808,7 +1848,7 @@ get_rq: freereq = NULL; } else { spin_unlock_irq(q->queue_lock); - if ((freereq = get_request(q, rw, GFP_ATOMIC, 0)) == NULL) { + if ((freereq = get_request(q, rw, GFP_ATOMIC)) == NULL) { /* * READA bit set */ @@ -1852,13 +1892,12 @@ out: __blk_put_request(q, freereq); if (blk_queue_plugged(q)) { - int nr_queued = q->rq.count[0] + q->rq.count[1]; + int nr_queued = q->rq.count[READ] + q->rq.count[WRITE]; if (nr_queued == q->unplug_thresh) __generic_unplug_device(q); } spin_unlock_irq(q->queue_lock); - return 0; end_io: @@ -1866,7 +1905,6 @@ end_io: return 0; } - /* * If bio->bi_dev is a partition, remap the location */ @@ -2378,6 +2416,7 @@ int __init blk_dev_init(void) return 0; } +static atomic_t nr_io_contexts = ATOMIC_INIT(0); /* * IO Context helper functions @@ -2393,6 +2432,7 @@ void put_io_context(struct io_context *ioc) if (ioc->aic && ioc->aic->dtor) ioc->aic->dtor(ioc->aic); kfree(ioc); + atomic_dec(&nr_io_contexts); } } @@ -2409,7 +2449,8 @@ void exit_io_context(void) ioc->aic->exit(ioc->aic); put_io_context(ioc); current->io_context = NULL; - } + } else + WARN_ON(1); local_irq_restore(flags); } @@ -2432,8 +2473,11 @@ struct io_context *get_io_context(void) if (ret == NULL) { ret = kmalloc(sizeof(*ret), GFP_ATOMIC); if (ret) { + atomic_inc(&nr_io_contexts); atomic_set(&ret->refcount, 1); ret->pid = tsk->pid; + ret->last_waited = jiffies; /* doesn't matter... */ + ret->nr_batch_requests = 0; /* because this is 0 */ ret->aic = NULL; tsk->io_context = ret; } @@ -2515,16 +2559,16 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count) if (rl->count[READ] >= q->nr_requests) { blk_set_queue_full(q, READ); - } else if (rl->count[READ] <= batch_requests(q)) { + } else if (rl->count[READ]+1 <= q->nr_requests) { blk_clear_queue_full(q, READ); - wake_up_all(&rl->wait[READ]); + wake_up(&rl->wait[READ]); } if (rl->count[WRITE] >= q->nr_requests) { blk_set_queue_full(q, WRITE); - } else if (rl->count[WRITE] <= batch_requests(q)) { + } else if (rl->count[WRITE]+1 <= q->nr_requests) { blk_clear_queue_full(q, WRITE); - wake_up_all(&rl->wait[WRITE]); + wake_up(&rl->wait[WRITE]); } return ret; } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 13116a7a7969..69178ca80d7d 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -59,6 +59,12 @@ struct io_context { atomic_t refcount; pid_t pid; + /* + * For request batching + */ + unsigned long last_waited; /* Time last woken after wait for request */ + int nr_batch_requests; /* Number of requests left in the batch */ + struct as_io_context *aic; }; -- cgit v1.2.3 From 07581dd2bdd67146d13a61ca6506c6c8b694666a Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 4 Jul 2003 19:37:34 -0700 Subject: [PATCH] get_io_context fixes - pass gfp_flags to get_io_context(): not all callers are forced to use GFP_ATOMIC(). - fix locking in get_io_context(): bump the refcount whilein the exclusive region. - don't go oops in get_io_context() if the kmalloc failed. - in as_get_io_context(): fail the whole thing if we were unable to allocate the AS-specific part. - as_remove_queued_request() cleanup --- drivers/block/as-iosched.c | 50 ++++++++++++++++++++++------------------------ drivers/block/ll_rw_blk.c | 9 +++++---- include/linux/blkdev.h | 2 +- 3 files changed, 30 insertions(+), 31 deletions(-) diff --git a/drivers/block/as-iosched.c b/drivers/block/as-iosched.c index d63c92dfcf96..b19289348fb0 100644 --- a/drivers/block/as-iosched.c +++ b/drivers/block/as-iosched.c @@ -219,13 +219,17 @@ static struct as_io_context *alloc_as_io_context(void) */ static struct io_context *as_get_io_context(void) { - struct io_context *ioc = get_io_context(); - if (ioc && !ioc->aic) + struct io_context *ioc = get_io_context(GFP_ATOMIC); + if (ioc && !ioc->aic) { ioc->aic = alloc_as_io_context(); + if (!ioc->aic) { + put_io_context(ioc); + ioc = NULL; + } + } return ioc; } - /* * the back merge hash support functions */ @@ -971,32 +975,26 @@ static void as_completed_request(request_queue_t *q, struct request *rq) static void as_remove_queued_request(request_queue_t *q, struct request *rq) { struct as_rq *arq = RQ_DATA(rq); + const int data_dir = arq->is_sync; + struct as_data *ad = q->elevator.elevator_data; - if (!arq) - BUG(); - else { - const int data_dir = arq->is_sync; - struct as_data *ad = q->elevator.elevator_data; - - WARN_ON(arq->state != AS_RQ_QUEUED); - - if (arq->io_context && arq->io_context->aic) { - BUG_ON(!atomic_read(&arq->io_context->aic->nr_queued)); - atomic_dec(&arq->io_context->aic->nr_queued); - } - - /* - * Update the "next_arq" cache if we are about to remove its - * entry - */ - if (ad->next_arq[data_dir] == arq) - ad->next_arq[data_dir] = as_find_next_arq(ad, arq); + WARN_ON(arq->state != AS_RQ_QUEUED); - list_del_init(&arq->fifo); - as_remove_merge_hints(q, arq); - as_del_arq_rb(ad, arq); + if (arq->io_context && arq->io_context->aic) { + BUG_ON(!atomic_read(&arq->io_context->aic->nr_queued)); + atomic_dec(&arq->io_context->aic->nr_queued); } + /* + * Update the "next_arq" cache if we are about to remove its + * entry + */ + if (ad->next_arq[data_dir] == arq) + ad->next_arq[data_dir] = as_find_next_arq(ad, arq); + + list_del_init(&arq->fifo); + as_remove_merge_hints(q, arq); + as_del_arq_rb(ad, arq); } /* @@ -1292,7 +1290,7 @@ static void as_add_request(struct as_data *ad, struct as_rq *arq) arq->io_context = as_get_io_context(); - if (arq->io_context && arq->io_context->aic) { + if (arq->io_context) { atomic_inc(&arq->io_context->aic->nr_queued); as_update_iohist(arq->io_context->aic, arq->request); } diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c index 633266ee8c87..13cc6073bb47 100644 --- a/drivers/block/ll_rw_blk.c +++ b/drivers/block/ll_rw_blk.c @@ -1360,7 +1360,7 @@ static struct request *get_request(request_queue_t *q, int rw, int gfp_mask) { struct request *rq = NULL; struct request_list *rl = &q->rq; - struct io_context *ioc = get_io_context(); + struct io_context *ioc = get_io_context(gfp_mask); spin_lock_irq(q->queue_lock); if (rl->count[rw]+1 >= q->nr_requests) { @@ -1439,7 +1439,7 @@ static struct request *get_request_wait(request_queue_t *q, int rw) struct io_context *ioc; io_schedule(); - ioc = get_io_context(); + ioc = get_io_context(GFP_NOIO); ioc_set_batching(ioc); put_io_context(ioc); } @@ -2462,7 +2462,7 @@ void exit_io_context(void) * But weird things happen, so we disable local interrupts to ensure exclusive * access to *current. */ -struct io_context *get_io_context(void) +struct io_context *get_io_context(int gfp_flags) { struct task_struct *tsk = current; unsigned long flags; @@ -2482,8 +2482,9 @@ struct io_context *get_io_context(void) tsk->io_context = ret; } } + if (ret) + atomic_inc(&ret->refcount); local_irq_restore(flags); - atomic_inc(&ret->refcount); return ret; } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 69178ca80d7d..2e7f92aa1dc2 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -70,7 +70,7 @@ struct io_context { void put_io_context(struct io_context *ioc); void exit_io_context(void); -struct io_context *get_io_context(void); +struct io_context *get_io_context(int gfp_flags); void copy_io_context(struct io_context **pdst, struct io_context **psrc); void swap_io_context(struct io_context **ioc1, struct io_context **ioc2); -- cgit v1.2.3 From e34b0f533d3da5bdff2360c306e4255fed3cbebd Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 4 Jul 2003 19:37:40 -0700 Subject: [PATCH] block allocation comments From: Nick Piggin Add some comments to the request allocation code. --- drivers/block/ll_rw_blk.c | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c index 13cc6073bb47..f03a77be82b6 100644 --- a/drivers/block/ll_rw_blk.c +++ b/drivers/block/ll_rw_blk.c @@ -1315,13 +1315,21 @@ static inline int ioc_batching(struct io_context *ioc) if (!ioc) return 0; + /* + * Make sure the process is able to allocate at least 1 request + * even if the batch times out, otherwise we could theoretically + * lose wakeups. + */ return ioc->nr_batch_requests == BLK_BATCH_REQ || (ioc->nr_batch_requests > 0 && time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME)); } /* - * ioc_set_batching sets ioc to be a new "batcher" if it is not one + * ioc_set_batching sets ioc to be a new "batcher" if it is not one. This + * will cause the process to be a "batcher" on all queues in the system. This + * is the behaviour we want though - once it gets a wakeup it should be given + * a nice run. */ void ioc_set_batching(struct io_context *ioc) { @@ -1364,6 +1372,12 @@ static struct request *get_request(request_queue_t *q, int rw, int gfp_mask) spin_lock_irq(q->queue_lock); if (rl->count[rw]+1 >= q->nr_requests) { + /* + * The queue will fill after this allocation, so set it as + * full, and mark this process as "batching". This process + * will be allowed to complete a batch of requests, others + * will be blocked. + */ if (!blk_queue_full(q, rw)) { ioc_set_batching(ioc); blk_set_queue_full(q, rw); @@ -1372,6 +1386,10 @@ static struct request *get_request(request_queue_t *q, int rw, int gfp_mask) if (blk_queue_full(q, rw) && !ioc_batching(ioc) && !elv_may_queue(q, rw)) { + /* + * The queue is full and the allocating process is not a + * "batcher", and not exempted by the IO scheduler + */ spin_unlock_irq(q->queue_lock); goto out; } @@ -1383,6 +1401,13 @@ static struct request *get_request(request_queue_t *q, int rw, int gfp_mask) rq = blk_alloc_request(q, gfp_mask); if (!rq) { + /* + * Allocation failed presumably due to memory. Undo anything + * we might have messed up. + * + * Allocating task should really be put onto the front of the + * wait queue, but this is pretty rare. + */ spin_lock_irq(q->queue_lock); freed_request(q, rw); spin_unlock_irq(q->queue_lock); @@ -1439,6 +1464,13 @@ static struct request *get_request_wait(request_queue_t *q, int rw) struct io_context *ioc; io_schedule(); + + /* + * After sleeping, we become a "batching" process and + * will be able to allocate at least one request, and + * up to a big batch of them for a small period time. + * See ioc_batching, ioc_set_batching + */ ioc = get_io_context(GFP_NOIO); ioc_set_batching(ioc); put_io_context(ioc); -- cgit v1.2.3 From 12c1bf07056e1cb01f1ae2f23846780e04cb65c7 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 4 Jul 2003 19:37:46 -0700 Subject: [PATCH] after exec_mmap(), exec cannot fail If de_thread() fails in flush_old_exec() then we try to fail the execve(). That is a bad move, because exec_mmap() has already switched the current process over to the new mm. The new process is not yet sufficiently set up to handle the error and the kernel doublefaults and dies. exec_mmap() is the point of no return. Change flush_old_exec() to call de_thread() before running exec_mmap() so the execing program sees the error. I added fault injection to both de_thread() and exec_mmap() - everything now survives OK. --- fs/exec.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index 4f37deb79e00..3d9730f93e08 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -759,12 +759,6 @@ int flush_old_exec(struct linux_binprm * bprm) char * name; int i, ch, retval; - /* - * Release all of the old mmap stuff - */ - retval = exec_mmap(bprm->mm); - if (retval) - goto out; /* * Make sure we have a private signal table and that * we are unassociated from the previous thread group. @@ -773,6 +767,13 @@ int flush_old_exec(struct linux_binprm * bprm) if (retval) goto out; + /* + * Release all of the old mmap stuff + */ + retval = exec_mmap(bprm->mm); + if (retval) + goto out; + bprm->mm = NULL; /* We're using it now */ /* This is the point of no return */ -- cgit v1.2.3 From 1c630a8df793686088620aa88913e3cb9b635353 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 4 Jul 2003 19:37:54 -0700 Subject: [PATCH] bootmem.c cleanups From: Davide Libenzi - Remove a couple of impossible debug checks (unsigneds cannot be negative!) - If __alloc_bootmem_core() fails with a goal and unaligned node_boot_start it'll loop fovever. --- mm/bootmem.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/mm/bootmem.c b/mm/bootmem.c index db4aff37a1a9..48f286bb780a 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -84,10 +84,6 @@ static void __init reserve_bootmem_core(bootmem_data_t *bdata, unsigned long add if (!size) BUG(); - if (sidx < 0) - BUG(); - if (eidx < 0) - BUG(); if (sidx >= eidx) BUG(); if ((addr >> PAGE_SHIFT) >= bdata->node_low_pfn) @@ -202,7 +198,7 @@ restart_scan: ; } - if (preferred) { + if (preferred > offset) { preferred = offset; goto restart_scan; } -- cgit v1.2.3 From 0d98604b2849f0449b15bf0cb90654e949db4cb8 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 4 Jul 2003 19:38:00 -0700 Subject: [PATCH] epoll: microoptimisations From: Davide Libenzi - Inline eventpoll_release() so that __fput() does not need to call in epoll code if the file itself is not registered inside an epoll fd - Add inclusion due __u32 and __u64 usage - Fix debug printf that would otherwise panic if enabled with the new epoll code --- fs/eventpoll.c | 30 +++++++++--------------------- include/linux/eventpoll.h | 35 +++++++++++++++++++++++++++++++++-- 2 files changed, 42 insertions(+), 23 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index b17cdae0109c..d6fef13ef5e0 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -443,28 +443,16 @@ void eventpoll_init_file(struct file *file) /* - * This is called from inside fs/file_table.c:__fput() to unlink files - * from the eventpoll interface. We need to have this facility to cleanup - * correctly files that are closed without being removed from the eventpoll - * interface. + * This is called from eventpoll_release() to unlink files from the eventpoll + * interface. We need to have this facility to cleanup correctly files that are + * closed without being removed from the eventpoll interface. */ -void eventpoll_release(struct file *file) +void eventpoll_release_file(struct file *file) { struct list_head *lsthead = &file->f_ep_links; struct eventpoll *ep; struct epitem *epi; - /* - * Fast check to avoid the get/release of the semaphore. Since - * we're doing this outside the semaphore lock, it might return - * false negatives, but we don't care. It'll help in 99.99% of cases - * to avoid the semaphore lock. False positives simply cannot happen - * because the file in on the way to be removed and nobody ( but - * eventpoll ) has still a reference to this file. - */ - if (list_empty(lsthead)) - return; - /* * We don't want to get "file->f_ep_lock" because it is not * necessary. It is not necessary because we're in the "struct file" @@ -541,7 +529,7 @@ eexit_1: /* * The following function implement the controller interface for the eventpoll * file that enable the insertion/removal/change of file descriptors inside - * the interest set. It rapresents the kernel part of the user spcae epoll_ctl(2). + * the interest set. It rapresents the kernel part of the user space epoll_ctl(2). */ asmlinkage long sys_epoll_ctl(int epfd, int op, int fd, struct epoll_event *event) { @@ -551,8 +539,8 @@ asmlinkage long sys_epoll_ctl(int epfd, int op, int fd, struct epoll_event *even struct epitem *epi; struct epoll_event epds; - DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_ctl(%d, %d, %d, %u)\n", - current, epfd, op, fd, event->events)); + DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_ctl(%d, %d, %d, %p)\n", + current, epfd, op, fd, event)); error = -EFAULT; if (copy_from_user(&epds, event, sizeof(struct epoll_event))) @@ -633,8 +621,8 @@ eexit_3: eexit_2: fput(file); eexit_1: - DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_ctl(%d, %d, %d, %u) = %d\n", - current, epfd, op, fd, event->events, error)); + DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_ctl(%d, %d, %d, %p) = %d\n", + current, epfd, op, fd, event, error)); return error; } diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h index 60f8cadb1f50..f89acbe8183a 100644 --- a/include/linux/eventpoll.h +++ b/include/linux/eventpoll.h @@ -14,6 +14,8 @@ #ifndef _LINUX_EVENTPOLL_H #define _LINUX_EVENTPOLL_H +#include + /* Valid opcodes to issue to sys_epoll_ctl() */ #define EPOLL_CTL_ADD 1 @@ -55,8 +57,37 @@ asmlinkage long sys_epoll_wait(int epfd, struct epoll_event *events, int maxeven /* Used to initialize the epoll bits inside the "struct file" */ void eventpoll_init_file(struct file *file); -/* Used in fs/file_table.c:__fput() to unlink files from the eventpoll interface */ -void eventpoll_release(struct file *file); +/* Used to release the epoll bits inside the "struct file" */ +void eventpoll_release_file(struct file *file); + +/* + * This is called from inside fs/file_table.c:__fput() to unlink files + * from the eventpoll interface. We need to have this facility to cleanup + * correctly files that are closed without being removed from the eventpoll + * interface. + */ +static inline void eventpoll_release(struct file *file) +{ + + /* + * Fast check to avoid the get/release of the semaphore. Since + * we're doing this outside the semaphore lock, it might return + * false negatives, but we don't care. It'll help in 99.99% of cases + * to avoid the semaphore lock. False positives simply cannot happen + * because the file in on the way to be removed and nobody ( but + * eventpoll ) has still a reference to this file. + */ + if (likely(list_empty(&file->f_ep_links))) + return; + + /* + * The file is being closed while it is still linked to an epoll + * descriptor. We need to handle this by correctly unlinking it + * from its containers. + */ + eventpoll_release_file(file); +} + #else -- cgit v1.2.3 From 7fc4c64b2794116ff84ca732342dd20b6588879a Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 4 Jul 2003 19:38:06 -0700 Subject: [PATCH] fix current->user->__count leak From: Arvind Kandhare When switch_uid is called, the reference count of the new user is incremented twice. I think the increment in the switch_uid is done because of the reparent_to_init() function which does not increase the __count for root user. But if switch_uid is called from any other function, the reference count is already incremented by the caller by calling alloc_uid for the new user. Hence the count is incremented twice. The user struct will not be deleted even when there are no processes holding a reference count for it. This does not cause any problem currently because nothing is dependent on timely deletion of the user struct. --- kernel/exit.c | 1 + kernel/user.c | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/exit.c b/kernel/exit.c index ebc839b645a7..c52fc310cb16 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -230,6 +230,7 @@ void reparent_to_init(void) /* signals? */ security_task_reparent_to_init(current); memcpy(current->rlim, init_task.rlim, sizeof(*(current->rlim))); + atomic_inc(&(INIT_USER->__count)); switch_uid(INIT_USER); write_unlock_irq(&tasklist_lock); diff --git a/kernel/user.c b/kernel/user.c index 592680d8cc68..86bd412b85da 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -126,7 +126,6 @@ void switch_uid(struct user_struct *new_user) * we should be checking for it. -DaveM */ old_user = current->user; - atomic_inc(&new_user->__count); atomic_inc(&new_user->processes); atomic_dec(&old_user->processes); current->user = new_user; -- cgit v1.2.3 From 090a3c7bfc5fc7904fc9862e87c56ca8d60af721 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 4 Jul 2003 19:38:13 -0700 Subject: [PATCH] MTD build fix for old gcc's From: junkio@cox.net Sigh. Is there a gcc option to tell it to not accept this incompatible C99 extension? --- drivers/mtd/mtd_blkdevs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c index 3b0c9c469ea3..d15606605f81 100644 --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c @@ -211,9 +211,10 @@ static int blktrans_ioctl(struct inode *inode, struct file *file, case HDIO_GETGEO: if (tr->getgeo) { struct hd_geometry g; + int ret; memset(&g, 0, sizeof(g)); - int ret = tr->getgeo(dev, &g); + ret = tr->getgeo(dev, &g); if (ret) return ret; -- cgit v1.2.3 From 92be328beb50704570ffec52b6b3ab2e8f378904 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 4 Jul 2003 19:38:20 -0700 Subject: [PATCH] fix rfcomm oops From: ilmari@ilmari.org (Dagfinn Ilmari Mannsaker) It turns out that net/bluetooth/rfcomm/sock.c (and net/bluetooth/hci_sock.c) had been left out when net_proto_family gained an owner field, here's a patch that fixes them both. --- net/bluetooth/hci_sock.c | 1 + net/bluetooth/rfcomm/sock.c | 1 + 2 files changed, 2 insertions(+) diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 2040a73db165..1953785a0cea 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -632,6 +632,7 @@ static int hci_sock_dev_event(struct notifier_block *this, unsigned long event, struct net_proto_family hci_sock_family_ops = { .family = PF_BLUETOOTH, + .owner = THIS_MODULE, .create = hci_sock_create, }; diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 2a3eff6b4372..e0aafca1b78d 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -878,6 +878,7 @@ static struct proto_ops rfcomm_sock_ops = { static struct net_proto_family rfcomm_sock_family_ops = { .family = PF_BLUETOOTH, + .owner = THIS_MODULE, .create = rfcomm_sock_create }; -- cgit v1.2.3 From 71ac7ef2bb70c31c4970cb1a916b76faf9ca8341 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 4 Jul 2003 19:38:27 -0700 Subject: [PATCH] i2o_scsi build fix i2o_scsi.c now needs pci.h. --- drivers/message/i2o/i2o_scsi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/message/i2o/i2o_scsi.c b/drivers/message/i2o/i2o_scsi.c index 9cccfba214c5..abb054dc30e0 100644 --- a/drivers/message/i2o/i2o_scsi.c +++ b/drivers/message/i2o/i2o_scsi.c @@ -49,6 +49,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3 From 2d3160cc8a22fd2105210a8a76dd41e976428b77 Mon Sep 17 00:00:00 2001 From: Krzysztof Halasa Date: Fri, 4 Jul 2003 20:17:46 -0700 Subject: [PATCH] C99 initializers in hdlc_generic.c --- drivers/net/wan/hdlc_generic.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/net/wan/hdlc_generic.c b/drivers/net/wan/hdlc_generic.c index f003d03844d3..fd0b3dfeeb19 100644 --- a/drivers/net/wan/hdlc_generic.c +++ b/drivers/net/wan/hdlc_generic.c @@ -177,11 +177,8 @@ EXPORT_SYMBOL(unregister_hdlc_device); struct packet_type hdlc_packet_type= { - __constant_htons(ETH_P_HDLC), - NULL, - hdlc_rcv, - NULL, - NULL + .type = __constant_htons(ETH_P_HDLC), + .func = hdlc_rcv, }; -- cgit v1.2.3 From b5d16cb5fd5443a82b486d8468a8e91bddd1a88a Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 4 Jul 2003 21:19:58 -0700 Subject: Signing fixes part 3 --- fs/cifs/cifsencrypt.c | 30 +++++++++++++++++++++++------- fs/cifs/cifsproto.h | 2 +- fs/cifs/transport.c | 6 +++--- 3 files changed, 27 insertions(+), 11 deletions(-) diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index e7c468f1812d..69a083a16311 100755 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -23,28 +23,34 @@ #include "cifspdu.h" #include "cifsglob.h" #include "cifs_debug.h" +#include "md5.h" /* Calculate and return the CIFS signature based on the mac key and the smb pdu */ -/* the eight byte signature must be allocated by the caller. */ +/* the 16 byte signature must be allocated by the caller */ +/* Note we only use the 1st eight bytes */ /* Note that the smb header signature field on input contains the sequence number before this function is called */ -static int cifs_calculate_signature(const struct smb_hdr * cifs_pdu, const char * mac_key, char * signature) +static int cifs_calculate_signature(const struct smb_hdr * cifs_pdu, const char * key, char * signature) { + struct MD5Context context; if((cifs_pdu == NULL) || (signature == NULL)) return -EINVAL; - /* MD5(mac_key, text) */ - /* return 1st eight bytes in signature */ - + MD5Init(&context); + MD5Update(&context,key,CIFS_SESSION_KEY_SIZE); + MD5Update(&context,cifs_pdu->Protocol,cifs_pdu->smb_buf_length); + MD5Final(signature,&context); + cifs_dump_mem("signature: ",signature,16); /* BB remove BB */ return 0; } -int cifs_sign_smb(struct smb_hdr * cifs_pdu, struct cifsSesInfo * ses) +int cifs_sign_smb(struct smb_hdr * cifs_pdu, struct cifsSesInfo * ses, + __u32 * pexpected_response_sequence_number) { int rc = 0; - char smb_signature[8]; + char smb_signature[20]; /* BB remember to initialize sequence number elsewhere and initialize mac_signing key elsewhere BB */ /* BB remember to add code to save expected sequence number in midQ entry BB */ @@ -55,8 +61,14 @@ int cifs_sign_smb(struct smb_hdr * cifs_pdu, struct cifsSesInfo * ses) if((le32_to_cpu(cifs_pdu->Flags2) & SMBFLG2_SECURITY_SIGNATURE) == 0) return rc; + write_lock(&GlobalMid_Lock); cifs_pdu->Signature.Sequence.SequenceNumber = ses->sequence_number; cifs_pdu->Signature.Sequence.Reserved = 0; + + *pexpected_response_sequence_number = ses->sequence_number++; + ses->sequence_number++; + write_unlock(&GlobalMid_Lock); + rc = cifs_calculate_signature(cifs_pdu, ses->mac_signing_key,smb_signature); if(rc) memset(cifs_pdu->Signature.SecuritySignature, 0, 8); @@ -78,6 +90,10 @@ int cifs_verify_signature(const struct smb_hdr * cifs_pdu, const char * mac_key, /* BB what if signatures are supposed to be on for session but server does not send one? BB */ /* BB also do not verify oplock breaks for signature */ + + /* Do not need to verify session setups with signature "BSRSPYL " */ + if(memcmp(cifs_pdu->Signature.SecuritySignature,"BSRSPYL ",8)==0) + cFYI(1,("dummy signature received for smb command 0x%x",cifs_pdu->Command)); return rc; } diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index ba9d0c64c0e2..e2520cc49e35 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -225,7 +225,7 @@ extern void tconInfoFree(struct cifsTconInfo *); extern int cifs_demultiplex_thread(struct TCP_Server_Info *); extern int cifs_reconnect(struct TCP_Server_Info *server); -extern int cifs_sign_smb(struct smb_hdr *, struct cifsSesInfo *); +extern int cifs_sign_smb(struct smb_hdr *, struct cifsSesInfo *,__u32 *); extern int cifs_verify_signature(const struct smb_hdr *, const char * mac_key, __u32 expected_sequence_number); diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 62d6136f293a..65f96baccb4c 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -200,7 +200,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses, if (in_buf->smb_buf_length > 12) in_buf->Flags2 = cpu_to_le16(in_buf->Flags2); - rc = cifs_sign_smb(in_buf, ses); + rc = cifs_sign_smb(in_buf, ses, &midQ->sequence_number); midQ->midState = MID_REQUEST_SUBMITTED; rc = smb_send(ses->server->ssocket, in_buf, in_buf->smb_buf_length, @@ -250,8 +250,8 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses, receive_len + 4 /* include 4 byte RFC1001 header */ ); -/* int cifs_verify_signature(out_buf, ses->mac_signing_key, - __u32 expected_sequence_number); */ +rc = cifs_verify_signature(out_buf, ses->mac_signing_key,midQ->sequence_number); /* BB fix BB */ + dump_smb(out_buf, 92); /* convert the length into a more usable form */ out_buf->smb_buf_length = -- cgit v1.2.3 From 99fb27c6a6e7abe50938baa79c429143fbfc8bb9 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Sat, 5 Jul 2003 04:08:19 -0700 Subject: [PATCH] Improve mmap readaround This tweaks the mmap read-ahead behaviour so that the prefaulting is largely pointless. - double the minimum readaround chunksize in page_cache_readaround(). - when a seek is detected, collapse the window more slowly. --- mm/readahead.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/readahead.c b/mm/readahead.c index d6fef1a3c124..ed9ca357a9a5 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -363,9 +363,9 @@ page_cache_readahead(struct address_space *mapping, struct file_ra_state *ra, } else { /* * A miss - lseek, pagefault, pread, etc. Shrink the readahead - * window by 25%. + * window. */ - ra->next_size -= ra->next_size / 4 + 2; + ra->next_size -= 2; } if ((long)ra->next_size > (long)max) @@ -447,7 +447,7 @@ page_cache_readaround(struct address_space *mapping, struct file_ra_state *ra, struct file *filp, unsigned long offset) { if (ra->next_size != -1UL) { - const unsigned long min = get_min_readahead(ra) * 2; + const unsigned long min = get_min_readahead(ra) * 4; unsigned long target; unsigned long backward; -- cgit v1.2.3 From e4c67754e590e4e14f8167065b205708963e81cb Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Sat, 5 Jul 2003 20:39:12 -0700 Subject: [PATCH] enable device mapper in compat layer The compat ioctls for device mapper were not being enabled due to an incorrect config option. --- include/linux/compat_ioctl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/compat_ioctl.h b/include/linux/compat_ioctl.h index 076b1b268c7d..3fb888cc0c34 100644 --- a/include/linux/compat_ioctl.h +++ b/include/linux/compat_ioctl.h @@ -117,7 +117,7 @@ COMPATIBLE_IOCTL(START_ARRAY) COMPATIBLE_IOCTL(STOP_ARRAY) COMPATIBLE_IOCTL(STOP_ARRAY_RO) COMPATIBLE_IOCTL(RESTART_ARRAY_RW) -#ifdef CONFIG_DM +#ifdef CONFIG_BLK_DEV_DM /* DM */ COMPATIBLE_IOCTL(DM_VERSION) COMPATIBLE_IOCTL(DM_REMOVE_ALL) -- cgit v1.2.3 From 16cde048d6de5405bdfade3b0d496c412b6f6401 Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Sat, 5 Jul 2003 22:33:35 -0700 Subject: [PATCH] New maintainter for nbd I no longer have the time/interest in nbd, and Paul agreed to take it over. --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 73073266f7ad..c4bb8d3ae788 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1265,8 +1265,8 @@ L: linux-hams@vger.kernel.org S: Maintained NETWORK BLOCK DEVICE -P: Pavel Machek -M: pavel@atrey.karlin.mff.cuni.cz +P: Paul Clements +M: Paul.Clements@steeleye.com S: Maintained NETWORK DEVICE DRIVERS -- cgit v1.2.3 From db15ad85a4dad503d6c34bd2e8ec09bdb2c0b659 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Sat, 5 Jul 2003 22:33:43 -0700 Subject: [PATCH] fix IDE init oops on PowerMac From Mikael Petterson: Booting kernel 2.5.74 on a PowerMac with CONFIG_BLK_DEV_IDE_PMAC=y results in an oops during IDE init, and the box then reboots. The patch below updates drivers/ide/ppc/pmac.c to also set up the hwif->ide_dma_queued_off and hwif->ide_dma_queued_on function pointers, which fixes the oops. Tested on my ancient PM4400. --- drivers/ide/ppc/pmac.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/ide/ppc/pmac.c b/drivers/ide/ppc/pmac.c index f86304224663..8cdcb9a0a736 100644 --- a/drivers/ide/ppc/pmac.c +++ b/drivers/ide/ppc/pmac.c @@ -1514,6 +1514,8 @@ pmac_ide_setup_dma(struct device_node *np, int ix) ide_hwifs[ix].ide_dma_timeout = &__ide_dma_timeout; ide_hwifs[ix].ide_dma_retune = &__ide_dma_retune; ide_hwifs[ix].ide_dma_lostirq = &pmac_ide_dma_lostirq; + ide_hwifs[ix].ide_dma_queued_on = &__ide_dma_queued_on; + ide_hwifs[ix].ide_dma_queued_off = &__ide_dma_queued_off; #ifdef CONFIG_BLK_DEV_IDEDMA_PMAC_AUTO if (!noautodma) -- cgit v1.2.3 From 47dba81236079324ef08cbba96481f0762cad5ff Mon Sep 17 00:00:00 2001 From: Paul Fulghum Date: Sat, 5 Jul 2003 22:58:02 -0700 Subject: [PATCH] synclink.c update Fix arbitration between net open and tty open. Cleanup unused local resulting from latest tty changes. --- drivers/char/synclink.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/drivers/char/synclink.c b/drivers/char/synclink.c index b68901640a19..ccb3804e4977 100644 --- a/drivers/char/synclink.c +++ b/drivers/char/synclink.c @@ -1,7 +1,7 @@ /* * linux/drivers/char/synclink.c * - * $Id: synclink.c,v 4.9 2003/05/06 21:18:51 paulkf Exp $ + * $Id: synclink.c,v 4.12 2003/06/18 15:29:32 paulkf Exp $ * * Device driver for Microgate SyncLink ISA and PCI * high speed multiprotocol serial adapters. @@ -910,7 +910,7 @@ MODULE_PARM(txdmabufs,"1-" __MODULE_STRING(MAX_TOTAL_DEVICES) "i"); MODULE_PARM(txholdbufs,"1-" __MODULE_STRING(MAX_TOTAL_DEVICES) "i"); static char *driver_name = "SyncLink serial driver"; -static char *driver_version = "$Revision: 4.9 $"; +static char *driver_version = "$Revision: 4.12 $"; static int synclink_init_one (struct pci_dev *dev, const struct pci_device_id *ent); @@ -3170,14 +3170,17 @@ static void mgsl_close(struct tty_struct *tty, struct file * filp) { struct mgsl_struct * info = (struct mgsl_struct *)tty->driver_data; - if (!info || mgsl_paranoia_check(info, tty->name, "mgsl_close")) + if (mgsl_paranoia_check(info, tty->name, "mgsl_close")) return; if (debug_level >= DEBUG_LEVEL_INFO) printk("%s(%d):mgsl_close(%s) entry, count=%d\n", __FILE__,__LINE__, info->device_name, info->count); - if (!info->count || tty_hung_up_p(filp)) + if (!info->count) + return; + + if (tty_hung_up_p(filp)) goto cleanup; if ((tty->count == 1) && (info->count != 1)) { @@ -3493,16 +3496,11 @@ static int mgsl_open(struct tty_struct *tty, struct file * filp) info = mgsl_device_list; while(info && info->line != line) info = info->next_device; - if ( !info ){ - printk("%s(%d):Can't find specified device on open (line=%d)\n", - __FILE__,__LINE__,line); + if (mgsl_paranoia_check(info, tty->name, "mgsl_open")) return -ENODEV; - } tty->driver_data = info; info->tty = tty; - if (mgsl_paranoia_check(info, tty->name, "mgsl_open")) - return -ENODEV; if (debug_level >= DEBUG_LEVEL_INFO) printk("%s(%d):mgsl_open(%s), old ref count = %d\n", @@ -3562,6 +3560,8 @@ static int mgsl_open(struct tty_struct *tty, struct file * filp) cleanup: if (retval) { + if (tty->count == 1) + info->tty = 0; /* tty layer will release tty struct */ if(info->count) info->count--; } @@ -4461,7 +4461,6 @@ static struct tty_operations mgsl_ops = { int mgsl_init_tty(void); int mgsl_init_tty() { - struct mgsl_struct *info; serial_driver = alloc_tty_driver(mgsl_device_count); if (!serial_driver) return -ENOMEM; -- cgit v1.2.3 From d4188a2633197ebe7bb08c6c8665d1fe0c56be47 Mon Sep 17 00:00:00 2001 From: Paul Fulghum Date: Sat, 5 Jul 2003 22:58:09 -0700 Subject: [PATCH] synclinkmp.c update Fix arbitration between net open and tty open. Clean up unused locals resulting from latest tty changes. --- drivers/char/synclinkmp.c | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/drivers/char/synclinkmp.c b/drivers/char/synclinkmp.c index 99e5a937be51..46c23f3f1e68 100644 --- a/drivers/char/synclinkmp.c +++ b/drivers/char/synclinkmp.c @@ -1,5 +1,5 @@ /* - * $Id: synclinkmp.c,v 4.8 2003/04/21 17:46:55 paulkf Exp $ + * $Id: synclinkmp.c,v 4.12 2003/06/18 15:29:33 paulkf Exp $ * * Device driver for Microgate SyncLink Multiport * high speed multiprotocol serial adapter. @@ -481,7 +481,6 @@ static int break_on_load=0; * assigned major number. May be forced as module parameter. */ static int ttymajor=0; -static int cuamajor=0; /* * Array of user specified options for ISA adapters. @@ -492,13 +491,12 @@ static int dosyncppp[MAX_DEVICES] = {0,}; MODULE_PARM(break_on_load,"i"); MODULE_PARM(ttymajor,"i"); -MODULE_PARM(cuamajor,"i"); MODULE_PARM(debug_level,"i"); MODULE_PARM(maxframe,"1-" __MODULE_STRING(MAX_DEVICES) "i"); MODULE_PARM(dosyncppp,"1-" __MODULE_STRING(MAX_DEVICES) "i"); static char *driver_name = "SyncLink MultiPort driver"; -static char *driver_version = "$Revision: 4.8 $"; +static char *driver_version = "$Revision: 4.12 $"; static int synclinkmp_init_one(struct pci_dev *dev,const struct pci_device_id *ent); static void synclinkmp_remove_one(struct pci_dev *dev); @@ -739,12 +737,8 @@ static int open(struct tty_struct *tty, struct file *filp) info = synclinkmp_device_list; while(info && info->line != line) info = info->next_device; - if ( !info ){ - printk("%s(%d):%s Can't find specified device on open (line=%d)\n", - __FILE__,__LINE__,info->device_name,line); + if (sanity_check(info, tty->name, "open")) return -ENODEV; - } - if ( info->init_error ) { printk("%s(%d):%s device is not allocated, init error=%d\n", __FILE__,__LINE__,info->device_name,info->init_error); @@ -753,8 +747,6 @@ static int open(struct tty_struct *tty, struct file *filp) tty->driver_data = info; info->tty = tty; - if (sanity_check(info, tty->name, "open")) - return -ENODEV; if (debug_level >= DEBUG_LEVEL_INFO) printk("%s(%d):%s open(), old ref count = %d\n", @@ -802,6 +794,8 @@ static int open(struct tty_struct *tty, struct file *filp) cleanup: if (retval) { + if (tty->count == 1) + info->tty = 0; /* tty layer will release tty struct */ if(info->count) info->count--; } @@ -816,14 +810,17 @@ static void close(struct tty_struct *tty, struct file *filp) { SLMP_INFO * info = (SLMP_INFO *)tty->driver_data; - if (!info || sanity_check(info, tty->name, "close")) + if (sanity_check(info, tty->name, "close")) return; if (debug_level >= DEBUG_LEVEL_INFO) printk("%s(%d):%s close() entry, count=%d\n", __FILE__,__LINE__, info->device_name, info->count); - if (!info->count || tty_hung_up_p(filp)) + if (!info->count) + return; + + if (tty_hung_up_p(filp)) goto cleanup; if ((tty->count == 1) && (info->count != 1)) { @@ -3775,8 +3772,6 @@ static struct tty_operations ops = { static int __init synclinkmp_init(void) { - SLMP_INFO *info; - if (break_on_load) { synclinkmp_get_text_ptr(); BREAKPOINT(); -- cgit v1.2.3 From a6a6977c72a8382ef4daf85585fd438e58c7aa4a Mon Sep 17 00:00:00 2001 From: Paul Fulghum Date: Sat, 5 Jul 2003 22:58:17 -0700 Subject: [PATCH] synclink_cs.c update Fix arbitration between net open and tty open. Cleanup missed bits of CUA device removal changes. --- drivers/char/pcmcia/synclink_cs.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/drivers/char/pcmcia/synclink_cs.c b/drivers/char/pcmcia/synclink_cs.c index 115a16feb0d1..980c1b7dc8ec 100644 --- a/drivers/char/pcmcia/synclink_cs.c +++ b/drivers/char/pcmcia/synclink_cs.c @@ -1,7 +1,7 @@ /* * linux/drivers/char/pcmcia/synclink_cs.c * - * $Id: synclink_cs.c,v 4.10 2003/05/13 16:06:03 paulkf Exp $ + * $Id: synclink_cs.c,v 4.13 2003/06/18 15:29:32 paulkf Exp $ * * Device driver for Microgate SyncLink PC Card * multiprotocol serial adapter. @@ -467,7 +467,6 @@ static int break_on_load=0; * assigned major number. May be forced as module parameter. */ static int ttymajor=0; -static int cuamajor=0; static int debug_level = 0; static int maxframe[MAX_DEVICE_COUNT] = {0,}; @@ -485,7 +484,6 @@ MODULE_PARM(irq_list, "1-4i"); MODULE_PARM(break_on_load,"i"); MODULE_PARM(ttymajor,"i"); -MODULE_PARM(cuamajor,"i"); MODULE_PARM(debug_level,"i"); MODULE_PARM(maxframe,"1-" __MODULE_STRING(MAX_DEVICE_COUNT) "i"); MODULE_PARM(dosyncppp,"1-" __MODULE_STRING(MAX_DEVICE_COUNT) "i"); @@ -493,7 +491,7 @@ MODULE_PARM(dosyncppp,"1-" __MODULE_STRING(MAX_DEVICE_COUNT) "i"); MODULE_LICENSE("GPL"); static char *driver_name = "SyncLink PC Card driver"; -static char *driver_version = "$Revision: 4.10 $"; +static char *driver_version = "$Revision: 4.13 $"; static struct tty_driver *serial_driver; @@ -1290,7 +1288,7 @@ void dcd_change(MGSLPC_INFO *info) (info->serial_signals & SerialSignal_DCD) ? "on" : "off"); if (info->serial_signals & SerialSignal_DCD) wake_up_interruptible(&info->open_wait); - else if (!(info->flags & ASYNC_CALLOUT_NOHUP)) { + else { if (debug_level >= DEBUG_LEVEL_ISR) printk("doing serial hangup..."); if (info->tty) @@ -2538,14 +2536,17 @@ static void mgslpc_close(struct tty_struct *tty, struct file * filp) { MGSLPC_INFO * info = (MGSLPC_INFO *)tty->driver_data; - if (!info || mgslpc_paranoia_check(info, tty->name, "mgslpc_close")) + if (mgslpc_paranoia_check(info, tty->name, "mgslpc_close")) return; if (debug_level >= DEBUG_LEVEL_INFO) printk("%s(%d):mgslpc_close(%s) entry, count=%d\n", __FILE__,__LINE__, info->device_name, info->count); - if (!info->count || tty_hung_up_p(filp)) + if (!info->count) + return; + + if (tty_hung_up_p(filp)) goto cleanup; if ((tty->count == 1) && (info->count != 1)) { @@ -2822,16 +2823,11 @@ static int mgslpc_open(struct tty_struct *tty, struct file * filp) info = mgslpc_device_list; while(info && info->line != line) info = info->next_device; - if ( !info ){ - printk("%s(%d):Can't find specified device on open (line=%d)\n", - __FILE__,__LINE__,line); + if (mgslpc_paranoia_check(info, tty->name, "mgslpc_open")) return -ENODEV; - } tty->driver_data = info; info->tty = tty; - if (mgslpc_paranoia_check(info, tty->name, "mgslpc_open")) - return -ENODEV; if (debug_level >= DEBUG_LEVEL_INFO) printk("%s(%d):mgslpc_open(%s), old ref count = %d\n", @@ -2879,6 +2875,8 @@ static int mgslpc_open(struct tty_struct *tty, struct file * filp) cleanup: if (retval) { + if (tty->count == 1) + info->tty = 0; /* tty layer will release tty struct */ if(info->count) info->count--; } -- cgit v1.2.3 From f0a8aa740a24500b3379396ace6737722d0de1d4 Mon Sep 17 00:00:00 2001 From: Bernardo Innocenti Date: Sat, 5 Jul 2003 22:58:25 -0700 Subject: [PATCH] Fix do_div() for all architectures This offers a generic do_div64() that actually does the right thing, unlike some architectures that "optimized" the 64-by-32 divide into just a 32-bit divide. Both ppc and sh were already providing an assembly optimized __div64_32(). I called my function the same, so that their optimized versions will automatically override mine in lib.a. I've only tested extensively on m68knommu (uClinux) and made sure generated code is reasonably short. Should be ok also on parisc, since it's the same algorithm they were using before. - add generic C implementations of the do_div() for 32bit and 64bit archs in asm-generic/div64.h; - add generic library support function __div64_32() to handle the full 64/32 case on 32bit archs; - kill multiple copies of generic do_div() in architecture specific subdirs. Most copies were either buggy or not doing what they were supposed to do; - ensure all surviving instances of do_div() have their parameters correctly parenthesized to avoid funny side-effects; --- include/asm-alpha/div64.h | 15 +----------- include/asm-arm26/div64.h | 15 +----------- include/asm-cris/div64.h | 17 +------------ include/asm-generic/div64.h | 53 +++++++++++++++++++++++++++++++++++++++++ include/asm-h8300/div64.h | 14 +---------- include/asm-ia64/div64.h | 21 +---------------- include/asm-m68k/div64.h | 9 ------- include/asm-m68knommu/div64.h | 14 +---------- include/asm-mips64/div64.h | 19 +-------------- include/asm-parisc/div64.h | 55 +------------------------------------------ include/asm-ppc/div64.h | 24 +------------------ include/asm-ppc64/div64.h | 19 +-------------- include/asm-s390/div64.h | 8 +------ include/asm-sh/div64.h | 21 +---------------- include/asm-sparc/div64.h | 12 +--------- include/asm-sparc64/div64.h | 15 +----------- include/asm-v850/div64.h | 12 +--------- include/asm-x86_64/div64.h | 15 +----------- lib/Makefile | 2 +- lib/div64.c | 45 +++++++++++++++++++++++++++++++++++ 20 files changed, 115 insertions(+), 290 deletions(-) create mode 100644 include/asm-generic/div64.h create mode 100644 lib/div64.c diff --git a/include/asm-alpha/div64.h b/include/asm-alpha/div64.h index 080dcd480805..6cd978cefb28 100644 --- a/include/asm-alpha/div64.h +++ b/include/asm-alpha/div64.h @@ -1,14 +1 @@ -#ifndef __ALPHA_DIV64 -#define __ALPHA_DIV64 - -/* - * Hey, we're already 64-bit, no - * need to play games.. - */ -#define do_div(n,base) ({ \ - int __res; \ - __res = ((unsigned long) (n)) % (unsigned) (base); \ - (n) = ((unsigned long) (n)) / (unsigned) (base); \ - __res; }) - -#endif +#include diff --git a/include/asm-arm26/div64.h b/include/asm-arm26/div64.h index 27fec4ee6aed..6cd978cefb28 100644 --- a/include/asm-arm26/div64.h +++ b/include/asm-arm26/div64.h @@ -1,14 +1 @@ -#ifndef __ASM_ARM_DIV64 -#define __ASM_ARM_DIV64 - -/* We're not 64-bit, but... */ -#define do_div(n,base) \ -({ \ - int __res; \ - __res = ((unsigned long)n) % (unsigned int)base; \ - n = ((unsigned long)n) / (unsigned int)base; \ - __res; \ -}) - -#endif - +#include diff --git a/include/asm-cris/div64.h b/include/asm-cris/div64.h index bf33c2e8a04d..6cd978cefb28 100644 --- a/include/asm-cris/div64.h +++ b/include/asm-cris/div64.h @@ -1,16 +1 @@ -#ifndef __ASM_CRIS_DIV64 -#define __ASM_CRIS_DIV64 - -/* copy from asm-arm */ - -/* We're not 64-bit, but... */ -#define do_div(n,base) \ -({ \ - int __res; \ - __res = ((unsigned long)n) % (unsigned int)base; \ - n = ((unsigned long)n) / (unsigned int)base; \ - __res; \ -}) - -#endif - +#include diff --git a/include/asm-generic/div64.h b/include/asm-generic/div64.h new file mode 100644 index 000000000000..292b5a840431 --- /dev/null +++ b/include/asm-generic/div64.h @@ -0,0 +1,53 @@ +#ifndef _ASM_GENERIC_DIV64_H +#define _ASM_GENERIC_DIV64_H +/* + * Copyright (C) 2003 Bernardo Innocenti + * Based on former asm-ppc/div64.h and asm-m68knommu/div64.h + * + * The semantics of do_div() are: + * + * uint32_t do_div(uint64_t *n, uint32_t base) + * { + * uint32_t remainder = *n % base; + * *n = *n / base; + * return remainder; + * } + * + * NOTE: macro parameter n is evaluated multiple times, + * beware of side effects! + */ + +#include + +#if BITS_PER_LONG == 64 + +# define do_div(n,base) ({ \ + uint32_t __base = (base); \ + uint32_t __rem; \ + __rem = ((uint64_t)(n)) % __base; \ + (n) = ((uint64_t)(n)) / __base; \ + __rem; \ + }) + +#elif BITS_PER_LONG == 32 + +extern uint32_t __div64_32(uint64_t *dividend, uint32_t divisor); + +# define do_div(n,base) ({ \ + uint32_t __base = (base); \ + uint32_t __rem; \ + if (((n) >> 32) == 0) { \ + __rem = (uint32_t)(n) % __base; \ + (n) = (uint32_t)(n) / __base; \ + } else \ + __rem = __div64_32(&(n), __base); \ + __rem; \ + }) + +#else /* BITS_PER_LONG == ?? */ + +# error do_div() does not yet support the C64 + +#endif /* BITS_PER_LONG */ + +#endif /* _ASM_GENERIC_DIV64_H */ diff --git a/include/asm-h8300/div64.h b/include/asm-h8300/div64.h index df5634def9dc..6cd978cefb28 100644 --- a/include/asm-h8300/div64.h +++ b/include/asm-h8300/div64.h @@ -1,13 +1 @@ -#ifndef H8300_DIV64_H -#define H8300_DIV64_H - -/* n = n / base; return rem; */ - -#define do_div(n,base) ({ \ - int __res; \ - __res = ((unsigned long) n) % (unsigned) base; \ - n = ((unsigned long) n) / (unsigned) base; \ - __res; \ -}) - -#endif /* _H8300_DIV64_H */ +#include diff --git a/include/asm-ia64/div64.h b/include/asm-ia64/div64.h index 08c03f672041..6cd978cefb28 100644 --- a/include/asm-ia64/div64.h +++ b/include/asm-ia64/div64.h @@ -1,20 +1 @@ -#ifndef _ASM_IA64_DIV64_H -#define _ASM_IA64_DIV64_H - -/* - * Copyright (C) 1999 Hewlett-Packard Co - * Copyright (C) 1999 David Mosberger-Tang - * - * vsprintf uses this to divide a 64-bit integer N by a small integer BASE. - * This is incredibly hard on IA-64... - */ - -#define do_div(n,base) \ -({ \ - int _res; \ - _res = ((unsigned long) (n)) % (unsigned) (base); \ - (n) = ((unsigned long) (n)) / (unsigned) (base); \ - _res; \ -}) - -#endif /* _ASM_IA64_DIV64_H */ +#include diff --git a/include/asm-m68k/div64.h b/include/asm-m68k/div64.h index 16bf1f88d40c..9f65de1a2480 100644 --- a/include/asm-m68k/div64.h +++ b/include/asm-m68k/div64.h @@ -3,7 +3,6 @@ /* n = n / base; return rem; */ -#if 1 #define do_div(n, base) ({ \ union { \ unsigned long n32[2]; \ @@ -23,13 +22,5 @@ (n) = __n.n64; \ __rem; \ }) -#else -#define do_div(n,base) ({ \ - int __res; \ - __res = ((unsigned long) n) % (unsigned) base; \ - n = ((unsigned long) n) / (unsigned) base; \ - __res; \ -}) -#endif #endif /* _M68K_DIV64_H */ diff --git a/include/asm-m68knommu/div64.h b/include/asm-m68knommu/div64.h index b41200162d8b..6cd978cefb28 100644 --- a/include/asm-m68knommu/div64.h +++ b/include/asm-m68knommu/div64.h @@ -1,13 +1 @@ -#ifndef _M68KNOMMU_DIV64_H -#define _M68KNOMMU_DIV64_H - -/* n = n / base; return rem; */ - -#define do_div(n,base) ({ \ - int __res; \ - __res = ((unsigned long) n) % (unsigned) base; \ - n = ((unsigned long) n) / (unsigned) base; \ - __res; \ -}) - -#endif /* _M68K_DIV64_H */ +#include diff --git a/include/asm-mips64/div64.h b/include/asm-mips64/div64.h index 3ec1fadcfd7e..1147123d4d82 100644 --- a/include/asm-mips64/div64.h +++ b/include/asm-mips64/div64.h @@ -27,23 +27,6 @@ (res) = __quot; \ __mod; }) -/* - * Hey, we're already 64-bit, no - * need to play games.. - */ -#define do_div(n, base) ({ \ - unsigned long __quot; \ - unsigned int __mod; \ - unsigned long __div; \ - unsigned int __base; \ - \ - __div = (n); \ - __base = (base); \ - \ - __mod = __div % __base; \ - __quot = __div / __base; \ - \ - (n) = __quot; \ - __mod; }) +#include #endif /* _ASM_DIV64_H */ diff --git a/include/asm-parisc/div64.h b/include/asm-parisc/div64.h index e86e35e8a272..6cd978cefb28 100644 --- a/include/asm-parisc/div64.h +++ b/include/asm-parisc/div64.h @@ -1,54 +1 @@ -#ifndef __ASM_PARISC_DIV64 -#define __ASM_PARISC_DIV64 - -#ifdef __LP64__ - -/* - * Copyright (C) 1999 Hewlett-Packard Co - * Copyright (C) 1999 David Mosberger-Tang - * - * vsprintf uses this to divide a 64-bit integer N by a small integer BASE. - * This is incredibly hard on IA-64 and HPPA - */ - -#define do_div(n,base) \ -({ \ - int _res; \ - _res = ((unsigned long) (n)) % (unsigned) (base); \ - (n) = ((unsigned long) (n)) / (unsigned) (base); \ - _res; \ -}) - -#else -/* - * unsigned long long division. Yuck Yuck! What is Linux coming to? - * This is 100% disgusting - */ -#define do_div(n,base) \ -({ \ - unsigned long __low, __low2, __high, __rem; \ - __low = (n) & 0xffffffff; \ - __high = (n) >> 32; \ - if (__high) { \ - __rem = __high % (unsigned long)base; \ - __high = __high / (unsigned long)base; \ - __low2 = __low >> 16; \ - __low2 += __rem << 16; \ - __rem = __low2 % (unsigned long)base; \ - __low2 = __low2 / (unsigned long)base; \ - __low = __low & 0xffff; \ - __low += __rem << 16; \ - __rem = __low % (unsigned long)base; \ - __low = __low / (unsigned long)base; \ - n = __low + ((long long)__low2 << 16) + \ - ((long long) __high << 32); \ - } else { \ - __rem = __low % (unsigned long)base; \ - n = (__low / (unsigned long)base); \ - } \ - __rem; \ -}) -#endif - -#endif - +#include diff --git a/include/asm-ppc/div64.h b/include/asm-ppc/div64.h index ec3ae5bcb3de..6cd978cefb28 100644 --- a/include/asm-ppc/div64.h +++ b/include/asm-ppc/div64.h @@ -1,23 +1 @@ -#ifndef __PPC_DIV64 -#define __PPC_DIV64 - -#include - -extern u32 __div64_32(u64 *dividend, u32 div); - -#define do_div(n, div) ({ \ - u64 __n = (n); \ - u32 __d = (div); \ - u32 __q, __r; \ - if ((__n >> 32) == 0) { \ - __q = (u32)__n / __d; \ - __r = (u32)__n - __q * __d; \ - (n) = __q; \ - } else { \ - __r = __div64_32(&__n, __d); \ - (n) = __n; \ - } \ - __r; \ -}) - -#endif +#include diff --git a/include/asm-ppc64/div64.h b/include/asm-ppc64/div64.h index 22ba87033456..6cd978cefb28 100644 --- a/include/asm-ppc64/div64.h +++ b/include/asm-ppc64/div64.h @@ -1,18 +1 @@ -#ifndef __PPC_DIV64 -#define __PPC_DIV64 - -/* Copyright 2001 PPC64 Team, IBM Corp - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#define do_div(n,base) ({ \ - int __res; \ - __res = ((unsigned long) (n)) % (unsigned) (base); \ - (n) = ((unsigned long) (n)) / (unsigned) (base); \ - __res; }) - -#endif +#include diff --git a/include/asm-s390/div64.h b/include/asm-s390/div64.h index 79b5f06fa174..0c5f739832f1 100644 --- a/include/asm-s390/div64.h +++ b/include/asm-s390/div64.h @@ -43,13 +43,7 @@ }) #else /* __s390x__ */ - -#define do_div(n,base) ({ \ -int __res; \ -__res = ((unsigned long) n) % (unsigned) base; \ -n = ((unsigned long) n) / (unsigned) base; \ -__res; }) - +#include #endif /* __s390x__ */ #endif diff --git a/include/asm-sh/div64.h b/include/asm-sh/div64.h index dd4665af4fb8..6cd978cefb28 100644 --- a/include/asm-sh/div64.h +++ b/include/asm-sh/div64.h @@ -1,20 +1 @@ -#ifndef __ASM_SH_DIV64 -#define __ASM_SH_DIV64 - -extern u64 __div64_32(u64 n, u32 d); - -#define do_div(n,base) ({ \ -u64 __n = (n), __q; \ -u32 __base = (base); \ -u32 __res; \ -if ((__n >> 32) == 0) { \ - __res = ((unsigned long) __n) % (unsigned) __base; \ - (n) = ((unsigned long) __n) / (unsigned) __base; \ -} else { \ - __q = __div64_32(__n, __base); \ - __res = __n - __q * __base; \ - (n) = __q; \ -} \ -__res; }) - -#endif /* __ASM_SH_DIV64 */ +#include diff --git a/include/asm-sparc/div64.h b/include/asm-sparc/div64.h index 167260a9c953..6cd978cefb28 100644 --- a/include/asm-sparc/div64.h +++ b/include/asm-sparc/div64.h @@ -1,11 +1 @@ -#ifndef __SPARC_DIV64 -#define __SPARC_DIV64 - -/* We're not 64-bit, but... */ -#define do_div(n,base) ({ \ - int __res; \ - __res = ((unsigned long) n) % (unsigned) base; \ - n = ((unsigned long) n) / (unsigned) base; \ - __res; }) - -#endif /* __SPARC_DIV64 */ +#include diff --git a/include/asm-sparc64/div64.h b/include/asm-sparc64/div64.h index a36d2443d209..6cd978cefb28 100644 --- a/include/asm-sparc64/div64.h +++ b/include/asm-sparc64/div64.h @@ -1,14 +1 @@ -#ifndef __SPARC64_DIV64 -#define __SPARC64_DIV64 - -/* - * Hey, we're already 64-bit, no - * need to play games.. - */ -#define do_div(n,base) ({ \ - int __res; \ - __res = ((unsigned long) n) % (unsigned) base; \ - n = ((unsigned long) n) / (unsigned) base; \ - __res; }) - -#endif /* __SPARC64_DIV64 */ +#include diff --git a/include/asm-v850/div64.h b/include/asm-v850/div64.h index 165518b4623c..6cd978cefb28 100644 --- a/include/asm-v850/div64.h +++ b/include/asm-v850/div64.h @@ -1,11 +1 @@ -#ifndef __V850_DIV64_H__ -#define __V850_DIV64_H__ - -/* We're not 64-bit, but... */ -#define do_div(n,base) ({ \ - int __res; \ - __res = ((unsigned long) n) % (unsigned) base; \ - n = ((unsigned long) n) / (unsigned) base; \ - __res; }) - -#endif /* __V850_DIV64_H__ */ +#include diff --git a/include/asm-x86_64/div64.h b/include/asm-x86_64/div64.h index 2c94d0762fe0..6cd978cefb28 100644 --- a/include/asm-x86_64/div64.h +++ b/include/asm-x86_64/div64.h @@ -1,14 +1 @@ -#ifndef __X86_64_DIV64 -#define __X86_64_DIV64 - -/* - * Hey, we're already 64-bit, no - * need to play games.. - */ -#define do_div(n,base) ({ \ - int __res; \ - __res = ((unsigned long) (n)) % (unsigned) (base); \ - (n) = ((unsigned long) (n)) / (unsigned) (base); \ - __res; }) - -#endif +#include diff --git a/lib/Makefile b/lib/Makefile index 9121869155a6..91e7b30d3ca0 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -5,7 +5,7 @@ lib-y := errno.o ctype.o string.o vsprintf.o cmdline.o \ bust_spinlocks.o rbtree.o radix-tree.o dump_stack.o \ - kobject.o idr.o + kobject.o idr.o div64.o lib-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o diff --git a/lib/div64.c b/lib/div64.c new file mode 100644 index 000000000000..eab47437f182 --- /dev/null +++ b/lib/div64.c @@ -0,0 +1,45 @@ +/* + * Copyright (C) 2003 Bernardo Innocenti + * + * Based on former do_div() implementation from asm-parisc/div64.h: + * Copyright (C) 1999 Hewlett-Packard Co + * Copyright (C) 1999 David Mosberger-Tang + * + * + * Generic C version of 64bit/32bit division and modulo, with + * 64bit result and 32bit remainder. + * + * The fast case for (n>>32 == 0) is handled inline by do_div(). + * + * Code generated for this function might be very inefficient + * for some CPUs. div64_32() can be overridden by linking arch-specific + * assembly versions such as arch/ppc/lib/div64.S and arch/sh/lib/div64.S. + */ + +#include +#include + +uint32_t __div64_32(uint64_t *n, uint32_t base) +{ + uint32_t low, low2, high, rem; + + low = *n & 0xffffffff; + high = *n >> 32; + rem = high % (uint32_t)base; + high = high / (uint32_t)base; + low2 = low >> 16; + low2 += rem << 16; + rem = low2 % (uint32_t)base; + low2 = low2 / (uint32_t)base; + low = low & 0xffff; + low += rem << 16; + rem = low % (uint32_t)base; + low = low / (uint32_t)base; + + *n = low + + ((uint64_t)low2 << 16) + + ((uint64_t)high << 32); + + return rem; +} + -- cgit v1.2.3 From e939c913081c23c84e77fc1e5a480b1eead393ee Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 5 Jul 2003 22:58:33 -0700 Subject: [PATCH] another timer overflow thing in add_timer_internal() we simply leave the timer pending forever if the expiry is in more than 0xffffffff jiffies. This means more than 48 days on eg. ia64 - which is not an unrealistic timeout. IIRC crond is happy to use extremely large timeouts. It's better to time out early (if you can call 48 days "early") than to not time out at all. --- kernel/timer.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/kernel/timer.c b/kernel/timer.c index 7bce7a7cb2c2..3995425e44a5 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -126,13 +126,17 @@ static void internal_add_timer(tvec_base_t *base, struct timer_list *timer) * or you set a timer to go off in the past */ vec = base->tv1.vec + (base->timer_jiffies & TVR_MASK); - } else if (idx <= 0xffffffffUL) { - int i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK; - vec = base->tv5.vec + i; } else { - /* Can only get here on architectures with 64-bit jiffies */ - INIT_LIST_HEAD(&timer->entry); - return; + int i; + /* If the timeout is larger than 0xffffffff on 64-bit + * architectures then we use the maximum timeout: + */ + if (idx > 0xffffffffUL) { + idx = 0xffffffffUL; + expires = idx + base->timer_jiffies; + } + i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK; + vec = base->tv5.vec + i; } /* * Timers are FIFO: -- cgit v1.2.3 From 82a333fa1948869322f32a67223ea8d0ae9ad8ba Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 5 Jul 2003 23:23:55 -0700 Subject: Simplify and speed up mmap read-around handling This improves cold-cache program startup noticeably for me, and simplifies the read-ahead logic at the same time. The rules for read-ahead are: - if the vma is marked random, we just do the regular one-page case. Obvious. - if the vma is marked "linear access", we use the regular readahead code. No change in behaviour there (well, we also only consider it a _miss_ if it was marked linear access - the "readahead" and "readaround" things are now totally independent of each other) - otherwise, we look at how many hits/misses we've had for this particular file open for mmap, and if we've had noticeably more misses than hits, we don't bother with read-around. In particular, this means that the "real" read-ahead logic literally only needs to worry about finding sequential accesses, and does not have to worry about the common executable mmap access patthers that have very different behaviour. Some constant tweaking may be a good idea. --- include/linux/fs.h | 2 ++ include/linux/mm.h | 4 ---- mm/filemap.c | 62 +++++++++++++++++++++++++++++++++--------------------- mm/readahead.c | 31 --------------------------- 4 files changed, 40 insertions(+), 59 deletions(-) diff --git a/include/linux/fs.h b/include/linux/fs.h index 7a5f305101c5..77dd4b13dc43 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -420,6 +420,8 @@ struct file_ra_state { unsigned long ahead_start; /* Ahead window */ unsigned long ahead_size; unsigned long ra_pages; /* Maximum readahead window */ + unsigned long mmap_hit; /* Cache hit stat for mmap accesses */ + unsigned long mmap_miss; /* Cache miss stat for mmap accesses */ }; struct file { diff --git a/include/linux/mm.h b/include/linux/mm.h index d75f64725853..858914b2dbd3 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -571,10 +571,6 @@ void page_cache_readahead(struct address_space *mapping, struct file_ra_state *ra, struct file *filp, unsigned long offset); -void page_cache_readaround(struct address_space *mapping, - struct file_ra_state *ra, - struct file *filp, - unsigned long offset); void handle_ra_miss(struct address_space *mapping, struct file_ra_state *ra, pgoff_t offset); unsigned long max_sane_readahead(unsigned long nr); diff --git a/mm/filemap.c b/mm/filemap.c index 1352d59d2ee4..f9623a9fecc6 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -925,6 +925,9 @@ static int page_cache_read(struct file * file, unsigned long offset) return error == -EEXIST ? 0 : error; } +#define MMAP_READAROUND (16UL) +#define MMAP_LOTSAMISS (100) + /* * filemap_nopage() is invoked via the vma operations vector for a * mapped memory region to read in file data during a page fault. @@ -942,19 +945,19 @@ struct page * filemap_nopage(struct vm_area_struct * area, unsigned long address struct inode *inode = mapping->host; struct page *page; unsigned long size, pgoff, endoff; - int did_readahead; + int did_readaround = 0; pgoff = ((address - area->vm_start) >> PAGE_CACHE_SHIFT) + area->vm_pgoff; endoff = ((area->vm_end - area->vm_start) >> PAGE_CACHE_SHIFT) + area->vm_pgoff; retry_all: - /* - * An external ptracer can access pages that normally aren't - * accessible.. - */ size = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - if ((pgoff >= size) && (area->vm_mm == current->mm)) - return NULL; + if (pgoff >= size) + goto outside_data_content; + + /* If we don't want any read-ahead, don't bother */ + if (VM_RandomReadHint(area)) + goto no_cached_page; /* * The "size" of the file, as far as mmap is concerned, isn't bigger @@ -963,25 +966,14 @@ retry_all: if (size > endoff) size = endoff; - did_readahead = 0; - /* * The readahead code wants to be told about each and every page * so it can build and shrink its windows appropriately + * + * For sequential accesses, we use the generic readahead logic. */ - if (VM_SequentialReadHint(area)) { - did_readahead = 1; + if (VM_SequentialReadHint(area)) page_cache_readahead(mapping, ra, file, pgoff); - } - - /* - * If the offset is outside the mapping size we're off the end - * of a privately mapped file, so we need to map a zero page. - */ - if ((pgoff < size) && !VM_RandomReadHint(area)) { - did_readahead = 1; - page_cache_readaround(mapping, ra, file, pgoff); - } /* * Do we have something in the page cache already? @@ -989,13 +981,27 @@ retry_all: retry_find: page = find_get_page(mapping, pgoff); if (!page) { - if (did_readahead) { + if (VM_SequentialReadHint(area)) { handle_ra_miss(mapping, ra, pgoff); - did_readahead = 0; + goto no_cached_page; } - goto no_cached_page; + ra->mmap_miss++; + + /* + * Do we miss much more than hit in this file? If so, + * stop bothering with read-ahead. It will only hurt. + */ + if (ra->mmap_miss > ra->mmap_hit + MMAP_LOTSAMISS) + goto no_cached_page; + + did_readaround = 1; + do_page_cache_readahead(mapping, file, pgoff & ~(MMAP_READAROUND-1), MMAP_READAROUND); + goto retry_find; } + if (!did_readaround) + ra->mmap_hit++; + /* * Ok, found a page in the page cache, now we need to check * that it's up-to-date. @@ -1010,6 +1016,14 @@ success: mark_page_accessed(page); return page; +outside_data_content: + /* + * An external ptracer can access pages that normally aren't + * accessible.. + */ + if (area->vm_mm == current->mm) + return NULL; + /* Fall through to the non-read-ahead case */ no_cached_page: /* * We're only likely to ever get here if MADV_RANDOM is in diff --git a/mm/readahead.c b/mm/readahead.c index ed9ca357a9a5..179ba48d5e5c 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -437,37 +437,6 @@ out: return; } -/* - * For mmap reads (typically executables) the access pattern is fairly random, - * but somewhat ascending. So readaround favours pages beyond the target one. - * We also boost the window size, as it can easily shrink due to misses. - */ -void -page_cache_readaround(struct address_space *mapping, struct file_ra_state *ra, - struct file *filp, unsigned long offset) -{ - if (ra->next_size != -1UL) { - const unsigned long min = get_min_readahead(ra) * 4; - unsigned long target; - unsigned long backward; - - /* - * If next_size is zero then leave it alone, because that's a - * readahead startup state. - */ - if (ra->next_size && ra->next_size < min) - ra->next_size = min; - - target = offset; - backward = ra->next_size / 4; - - if (backward > target) - target = 0; - else - target -= backward; - page_cache_readahead(mapping, ra, filp, target); - } -} /* * handle_ra_miss() is called when it is known that a page which should have -- cgit v1.2.3 From fd83c5ed3fb9587a926cc51f9970e3ad09405304 Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Sun, 6 Jul 2003 03:19:51 -0700 Subject: [PATCH] selection of boot parameters at configure time for Motorola 5282 targets Allow setting boot time parameters at configuration for Motorola 5282 targets. --- arch/m68knommu/platform/5282/config.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/m68knommu/platform/5282/config.c b/arch/m68knommu/platform/5282/config.c index 9548cf3a4268..c3fc68c8af66 100644 --- a/arch/m68knommu/platform/5282/config.c +++ b/arch/m68knommu/platform/5282/config.c @@ -62,7 +62,7 @@ void config_BSP(char *commandp, int size) { mcf_disableall(); -#if defined(CONFIG_BOOTPARAM) +#ifdef CONFIG_BOOTPARAM strncpy(commandp, CONFIG_BOOTPARAM_STRING, size); commandp[size-1] = 0; #else -- cgit v1.2.3 From bb47ba3b29c4b46fdd0584808466083d9e6db801 Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Sun, 6 Jul 2003 03:20:00 -0700 Subject: [PATCH] conditional ROMfs copy for Motorola M5307C3 board Conditionally copy the ROMfs filesystem on the Motorola M5307C3 target board only if using a ROMfs. --- arch/m68knommu/platform/5307/MOTOROLA/crt0_ram.S | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/m68knommu/platform/5307/MOTOROLA/crt0_ram.S b/arch/m68knommu/platform/5307/MOTOROLA/crt0_ram.S index 070c0e8002d2..1fff8b293b77 100644 --- a/arch/m68knommu/platform/5307/MOTOROLA/crt0_ram.S +++ b/arch/m68knommu/platform/5307/MOTOROLA/crt0_ram.S @@ -110,6 +110,8 @@ _start: movc %d0,%CACR nop + +#ifdef CONFIG_ROMFS_FS /* * Move ROM filesystem above bss :-) */ @@ -131,6 +133,12 @@ _copy_romfs: cmp.l %a0, %a2 /* Check if at end */ bne _copy_romfs +#else /* CONFIG_ROMFS_FS */ + lea.l _ebss, %a1 + move.l %a1, _ramstart +#endif /* CONFIG_ROMFS_FS */ + + /* * Zero out the bss region. */ -- cgit v1.2.3 From abba592563a249f6a1033fd641e5e385a494716c Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Sun, 6 Jul 2003 03:20:14 -0700 Subject: [PATCH] force PAGE_SIZE to be an unsigned long Force PAGE_SIZE for the m68knommu architecture to be an unsigned long. This makes it consistent with all other architectures and cleans up a load of compiler warnings. --- include/asm-m68knommu/page.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/asm-m68knommu/page.h b/include/asm-m68knommu/page.h index 7defa6391f0b..0d063a97f1c7 100644 --- a/include/asm-m68knommu/page.h +++ b/include/asm-m68knommu/page.h @@ -6,7 +6,7 @@ /* PAGE_SHIFT determines the page size */ #define PAGE_SHIFT (12) -#define PAGE_SIZE (4096) +#define PAGE_SIZE (1UL << PAGE_SHIFT) #define PAGE_MASK (~(PAGE_SIZE-1)) #ifdef __KERNEL__ -- cgit v1.2.3 From 5b0a7205c8c0eb559aa768eaae0e76606b72019e Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Sun, 6 Jul 2003 03:20:31 -0700 Subject: [PATCH] remove unused register from clobber list in down_trylock() Remove "%d0" register from clobber list of down_trylock() for m68knommu. It is not used by the asm code here at all. --- include/asm-m68knommu/semaphore.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/asm-m68knommu/semaphore.h b/include/asm-m68knommu/semaphore.h index f6cfb9f49001..4720a09e6191 100644 --- a/include/asm-m68knommu/semaphore.h +++ b/include/asm-m68knommu/semaphore.h @@ -145,7 +145,7 @@ extern inline int down_trylock(struct semaphore * sem) ".previous" : "=d" (result) : "a" (sem1) - : "%d0", "memory"); + : "memory"); return result; } -- cgit v1.2.3 From ca6abe4c0fe908470c049c355af97479bb59b3b5 Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Sun, 6 Jul 2003 03:20:56 -0700 Subject: [PATCH] simplify access_ok() for all m68knommu targets Unify access_ok for all m68knommu targets. All targets use the common linker script and have common end symbols. So now we can just use a simple check. --- include/asm-m68knommu/uaccess.h | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/include/asm-m68knommu/uaccess.h b/include/asm-m68knommu/uaccess.h index 6ff1747b02e0..9f890ed45b4f 100644 --- a/include/asm-m68knommu/uaccess.h +++ b/include/asm-m68knommu/uaccess.h @@ -13,21 +13,14 @@ #define VERIFY_READ 0 #define VERIFY_WRITE 1 -/* We let the MMU do all checking */ -extern inline int access_ok(int type, const void * addr, unsigned long size) +#define access_ok(type,addr,size) _access_ok((unsigned long)(addr),(size)) + +static inline int _access_ok(unsigned long addr, unsigned long size) { -#define RANGE_CHECK_OK(addr, size, lower, upper) \ - (((addr) >= (lower)) && (((addr) + (size)) < (upper))) - -#ifdef CONFIG_COLDFIRE - extern unsigned long _ramend; - return(RANGE_CHECK_OK((unsigned long) addr, size, 0L, _ramend) || - (is_in_rom((unsigned long) addr) && - is_in_rom((unsigned long) addr + size))); -#else - /* DAVIDM - this could be restricted a lot more */ - return(RANGE_CHECK_OK((unsigned long)addr, size, 0, 0x10f00000)); -#endif + extern unsigned long memory_start, memory_end; + + return (((addr >= memory_start) && (addr+size < memory_end)) || + (is_in_rom(addr) && is_in_rom(addr+size))); } extern inline int verify_area(int type, const void * addr, unsigned long size) -- cgit v1.2.3 From bd792b1704f44a9d9d83486f00967cbcba68405c Mon Sep 17 00:00:00 2001 From: Steve French Date: Sun, 6 Jul 2003 04:24:48 -0700 Subject: Signing fixes part 4 of 4 --- fs/cifs/cifsencrypt.c | 57 +++++++++++++++++++++++++++++++++++++++++++-------- fs/cifs/cifsglob.h | 2 +- fs/cifs/cifsproto.h | 1 + fs/cifs/connect.c | 11 +++++++--- fs/cifs/transport.c | 9 ++++++-- 5 files changed, 66 insertions(+), 14 deletions(-) diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 69a083a16311..db3226e62720 100755 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -30,6 +30,9 @@ /* Note we only use the 1st eight bytes */ /* Note that the smb header signature field on input contains the sequence number before this function is called */ + +extern void mdfour(unsigned char *out, unsigned char *in, int n); +extern void E_md4hash(const unsigned char *passwd, unsigned char *p16); static int cifs_calculate_signature(const struct smb_hdr * cifs_pdu, const char * key, char * signature) { @@ -39,10 +42,9 @@ static int cifs_calculate_signature(const struct smb_hdr * cifs_pdu, const char return -EINVAL; MD5Init(&context); - MD5Update(&context,key,CIFS_SESSION_KEY_SIZE); + MD5Update(&context,key,CIFS_SESSION_KEY_SIZE+16); MD5Update(&context,cifs_pdu->Protocol,cifs_pdu->smb_buf_length); MD5Final(signature,&context); - cifs_dump_mem("signature: ",signature,16); /* BB remove BB */ return 0; } @@ -62,7 +64,7 @@ int cifs_sign_smb(struct smb_hdr * cifs_pdu, struct cifsSesInfo * ses, return rc; write_lock(&GlobalMid_Lock); - cifs_pdu->Signature.Sequence.SequenceNumber = ses->sequence_number; + cifs_pdu->Signature.Sequence.SequenceNumber = cpu_to_le32(ses->sequence_number); cifs_pdu->Signature.Sequence.Reserved = 0; *pexpected_response_sequence_number = ses->sequence_number++; @@ -78,15 +80,19 @@ int cifs_sign_smb(struct smb_hdr * cifs_pdu, struct cifsSesInfo * ses, return rc; } -int cifs_verify_signature(const struct smb_hdr * cifs_pdu, const char * mac_key, +int cifs_verify_signature(struct smb_hdr * cifs_pdu, const char * mac_key, __u32 expected_sequence_number) { - unsigned int rc = 0; + unsigned int rc; + char server_response_sig[8]; + char what_we_think_sig_should_be[20]; if((cifs_pdu == NULL) || (mac_key == NULL)) return -EINVAL; - - /* BB no need to verify negprot or if flag is not on for session (or for frame?? */ + + if (cifs_pdu->Command == SMB_COM_NEGOTIATE) + return 0; + /* BB what if signatures are supposed to be on for session but server does not send one? BB */ /* BB also do not verify oplock breaks for signature */ @@ -95,5 +101,40 @@ int cifs_verify_signature(const struct smb_hdr * cifs_pdu, const char * mac_key, if(memcmp(cifs_pdu->Signature.SecuritySignature,"BSRSPYL ",8)==0) cFYI(1,("dummy signature received for smb command 0x%x",cifs_pdu->Command)); - return rc; + expected_sequence_number = cpu_to_le32(expected_sequence_number); + + /* save off the origiginal signature so we can modify the smb and check + its signature against what the server sent */ + memcpy(server_response_sig,cifs_pdu->Signature.SecuritySignature,8); + + cifs_pdu->Signature.Sequence.SequenceNumber = expected_sequence_number; + cifs_pdu->Signature.Sequence.Reserved = 0; + + rc = cifs_calculate_signature(cifs_pdu, mac_key, + what_we_think_sig_should_be); + + if(rc) + return rc; + + +/* cifs_dump_mem("what we think it should be: ",what_we_think_sig_should_be,16); */ + + if(memcmp(server_response_sig, what_we_think_sig_should_be, 8)) + return -EACCES; + else + return 0; + +} + +/* We fill in key by putting in 40 byte array which was allocated by caller */ +int cifs_calculate_mac_key(char * key, const char * rn, const char * password) +{ + char temp_key[16]; + if ((key == NULL) || (rn == NULL) || (password == NULL)) + return -EINVAL; + + E_md4hash(password, temp_key); /* BB may have to do another md4 of it */ + mdfour(key,temp_key,16); + memcpy(key+16,rn, CIFS_SESSION_KEY_SIZE); + return 0; } diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index f79a0695fb36..5b0ae3bc20cb 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -151,7 +151,7 @@ struct cifsSesInfo { enum statusEnum status; __u32 sequence_number; /* needed for CIFS PDU signature */ __u16 ipc_tid; /* special tid for connection to IPC share */ - char mac_signing_key[CIFS_SESSION_KEY_SIZE]; + char mac_signing_key[CIFS_SESSION_KEY_SIZE + 16]; char *serverOS; /* name of operating system underlying the server */ char *serverNOS; /* name of network operating system that the server is running */ char *serverDomain; /* security realm of server */ diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index e2520cc49e35..5b9dded835ec 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -228,6 +228,7 @@ extern int cifs_reconnect(struct TCP_Server_Info *server); extern int cifs_sign_smb(struct smb_hdr *, struct cifsSesInfo *,__u32 *); extern int cifs_verify_signature(const struct smb_hdr *, const char * mac_key, __u32 expected_sequence_number); +extern int cifs_calculate_mac_key(char * key,const char * rn,const char * pass); /* BB routines below not implemented yet BB */ diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index bd4c4e9a309f..f861fd7c5e4c 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -200,7 +200,7 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server) continue; } pdu_length = 4 + ntohl(smb_buffer->smb_buf_length); - cFYI(1, ("Peek length rcvd: %d with smb length: %d", length, pdu_length)); /* BB */ + cFYI(1, ("Peek length rcvd: %d with smb length: %d", length, pdu_length)); temp = (char *) smb_buffer; if (length > 3) { @@ -332,8 +332,9 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server) kfree(server); } else /* BB need to more gracefully handle the rare negative session response case because response will be still outstanding */ - cERROR(1, ("There are still active MIDs in queue and we are exiting but we can not delete mid_q_entries or TCP_Server_Info structure due to pending requests MEMORY LEAK!!")); /* BB wake up waitors, and/or wait and/or free stale mids and try again? BB */ -/* BB Need to fix bug in error path above - perhaps wait until smb requests + cERROR(1, ("Active MIDs in queue while exiting - can not delete mid_q_entries or TCP_Server_Info structure due to pending requests MEMORY LEAK!!")); + /* BB wake up waitors, and/or wait and/or free stale mids and try again? BB */ + /* BB Need to fix bug in error path above - perhaps wait until smb requests time out and then free the tcp per server struct BB */ read_unlock(&GlobalSMBSeslock); @@ -672,6 +673,8 @@ int setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo, struct nls_tab SMBNTencrypt(pSesInfo->password_with_pad, pSesInfo->server->cryptKey,ntlm_session_key); + /* BB add call to save MAC key here BB */ + /* for better security the weaker lanman hash not sent in AuthSessSetup so why bother calculating it */ /* toUpper(nls_info, @@ -690,6 +693,8 @@ int setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo, struct nls_tab SMBNTencrypt(pSesInfo->password_with_pad, pSesInfo->server->cryptKey, ntlm_session_key); + + cifs_calculate_mac_key(pSesInfo->mac_signing_key, ntlm_session_key, pSesInfo->password_with_pad); rc = CIFSSessSetup(xid, pSesInfo, ntlm_session_key, nls_info); } diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 65f96baccb4c..5d976743f720 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -250,12 +250,17 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses, receive_len + 4 /* include 4 byte RFC1001 header */ ); -rc = cifs_verify_signature(out_buf, ses->mac_signing_key,midQ->sequence_number); /* BB fix BB */ - dump_smb(out_buf, 92); /* convert the length into a more usable form */ out_buf->smb_buf_length = be32_to_cpu(out_buf->smb_buf_length); + if((out_buf->smb_buf_length > 24) && + (ses->server->secMode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))) { + rc = cifs_verify_signature(out_buf, ses->mac_signing_key,midQ->sequence_number); /* BB fix BB */ + if(rc) + cFYI(1,("Unexpected signature received from server")); + } + if (out_buf->smb_buf_length > 12) out_buf->Flags2 = le16_to_cpu(out_buf->Flags2); if (out_buf->smb_buf_length > 28) -- cgit v1.2.3 From 3d97dc2d349e6630bced9ced2ca7d0c7b52e49bc Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Sun, 6 Jul 2003 05:24:13 -0700 Subject: [PATCH] shared library support for MMUless binfmt_flat loader This patch adds shared library support to the MMU application loader, binfmt_flat. This is not new, it is a forward port from the same support in 2.4.x kernels with MMUless support, and has been running for well over a year now. The code support is conditionally compiled on CONFIG_BINFMT_FLAT_SHARED. This change also abstracts a bit more architecture dependent code into the separate flat.h includes. Basically relocations within an application also carry a tag to identify what they refer too (this code or which shared library). This is patched as before at load/run-time with an appropriate address. --- fs/binfmt_flat.c | 507 +++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 364 insertions(+), 143 deletions(-) diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index 1c7699cea914..4cdc1b8c5020 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -1,15 +1,17 @@ +/****************************************************************************/ /* * linux/fs/binfmt_flat.c * - * Copyright (C) 2000, 2001 Lineo, by David McCullough + * Copyright (C) 2000-2003 David McCullough * Copyright (C) 2002 Greg Ungerer - * + * Copyright (C) 2002 SnapGear, by Paul Dale + * Copyright (C) 2000, 2001 Lineo, by David McCullough * based heavily on: * * linux/fs/binfmt_aout.c: * Copyright (C) 1991, 1992, 1996 Linus Torvalds * linux/fs/binfmt_flat.c for 2.0 kernel - * Copyright (C) 1998 Kenneth Albanowski + * Copyright (C) 1998 Kenneth Albanowski * JAN/99 -- coded full program relocation (gerg@snapgear.com) */ @@ -42,27 +44,69 @@ #include #include -#undef DEBUG +/****************************************************************************/ + +#if 0 +#define DEBUG 1 +#endif + #ifdef DEBUG -#define DBG_FLT(a...) printk(##a) +#define DBG_FLT(a...) printk(a) #else #define DBG_FLT(a...) #endif +#define RELOC_FAILED 0xff00ff01 /* Relocation incorrect somewhere */ +#define UNLOADED_LIB 0x7ff000ff /* Placeholder for unused library */ + +struct lib_info { + struct { + unsigned long start_code; /* Start of text segment */ + unsigned long start_data; /* Start of data segment */ + unsigned long start_brk; /* End of data segment */ + unsigned long text_len; /* Length of text segment */ + unsigned long entry; /* Start address for this module */ + unsigned long build_date; /* When this one was compiled */ + short loaded; /* Has this library been loaded? */ + } lib_list[MAX_SHARED_LIBS]; +}; + +#ifdef CONFIG_BINFMT_SHARED_FLAT +static int load_flat_shared_library(int id, struct lib_info *p); +#endif + static int load_flat_binary(struct linux_binprm *, struct pt_regs * regs); -static int load_flat_library(struct file*); +static int flat_core_dump(long signr, struct pt_regs * regs, struct file *file); + extern void dump_thread(struct pt_regs *, struct user *); static struct linux_binfmt flat_format = { - NULL, THIS_MODULE, load_flat_binary, load_flat_library, NULL, PAGE_SIZE + .module = THIS_MODULE, + .load_binary = load_flat_binary, + .core_dump = flat_core_dump, + .min_coredump = PAGE_SIZE }; +/****************************************************************************/ +/* + * Routine writes a core dump image in the current directory. + * Currently only a stub-function. + */ + +static int flat_core_dump(long signr, struct pt_regs * regs, struct file *file) +{ + printk("Process %s:%d received signr %d and should have core dumped\n", + current->comm, current->pid, (int) signr); + return(1); +} +/****************************************************************************/ /* * create_flat_tables() parses the env- and arg-strings in new user * memory and creates the pointer tables from them, and puts their * addresses on the "stack", returning the new stack pointer value. */ + static unsigned long create_flat_tables( unsigned long pp, struct linux_binprm * bprm) @@ -80,10 +124,13 @@ static unsigned long create_flat_tables( envp = sp; sp -= argc+1; argv = sp; + + flat_stack_align(sp); if (flat_argvp_envp_on_stack()) { - put_user((unsigned long) envp, --sp); - put_user((unsigned long) argv, --sp); + --sp; put_user((unsigned long) envp, sp); + --sp; put_user((unsigned long) argv, sp); } + put_user(argc,--sp); current->mm->arg_start = (unsigned long) p; while (argc-->0) { @@ -105,6 +152,7 @@ static unsigned long create_flat_tables( return (unsigned long)sp; } +/****************************************************************************/ #ifdef CONFIG_BINFMT_ZFLAT @@ -183,7 +231,7 @@ static int decompress_exec( if (buf[3] & EXTRA_FIELD) { ret += 2 + buf[10] + (buf[11] << 8); if (unlikely(LBUFSIZE == ret)) { - DBG_FLAT("binfmt_flat: buffer overflow (EXTRA)?\n"); + DBG_FLT("binfmt_flat: buffer overflow (EXTRA)?\n"); return -ENOEXEC; } } @@ -191,7 +239,7 @@ static int decompress_exec( for (; ret < LBUFSIZE && (buf[ret] != 0); ret++) ; if (unlikely(LBUFSIZE == ret)) { - DBG_FLAT("binfmt_flat: buffer overflow (ORIG_NAME)?\n"); + DBG_FLT("binfmt_flat: buffer overflow (ORIG_NAME)?\n"); return -ENOEXEC; } } @@ -199,7 +247,7 @@ static int decompress_exec( for (; ret < LBUFSIZE && (buf[ret] != 0); ret++) ; if (unlikely(LBUFSIZE == ret)) { - DBG_FLAT("binfmt_flat: buffer overflow (COMMENT)?\n"); + DBG_FLT("binfmt_flat: buffer overflow (COMMENT)?\n"); return -ENOEXEC; } } @@ -243,48 +291,78 @@ static int decompress_exec( #endif /* CONFIG_BINFMT_ZFLAT */ +/****************************************************************************/ static unsigned long -calc_reloc(unsigned long r, unsigned long text_len) +calc_reloc(unsigned long r, struct lib_info *p, int curid, int internalp) { unsigned long addr; - - if (r > current->mm->start_brk - current->mm->start_data + text_len) { - printk("BINFMT_FLAT: reloc outside program 0x%x (0 - 0x%x), killing!\n", - (int) r,(int)(current->mm->start_brk-current->mm->start_code)); - send_sig(SIGSEGV, current, 0); - return(current->mm->start_brk); /* return something safe to write to */ + int id; + unsigned long start_brk; + unsigned long start_data; + unsigned long text_len; + unsigned long start_code; + +#ifdef CONFIG_BINFMT_SHARED_FLAT + if (r == 0) + id = curid; /* Relocs of 0 are always self referring */ + else { + id = (r >> 24) & 0xff; /* Find ID for this reloc */ + r &= 0x00ffffff; /* Trim ID off here */ + } + if (id >= MAX_SHARED_LIBS) { + printk("BINFMT_FLAT: reference 0x%x to shared library %d", + (unsigned) r, id); + goto failed; + } + if (curid != id) { + if (internalp) { + printk("BINFMT_FLAT: reloc address 0x%x not in same module " + "(%d != %d)", (unsigned) r, curid, id); + goto failed; + } else if ( ! p->lib_list[id].loaded && + load_flat_shared_library(id, p) > (unsigned long) -4096) { + printk("BINFMT_FLAT: failed to load library %d", id); + goto failed; + } + /* Check versioning information (i.e. time stamps) */ + if (p->lib_list[id].build_date && p->lib_list[curid].build_date && + p->lib_list[curid].build_date < p->lib_list[id].build_date) { + printk("BINFMT_FLAT: library %d is younger than %d", id, curid); + goto failed; + } } +#else + id = 0; +#endif - if (r < text_len) { - /* In text segment */ - return r + current->mm->start_code; + start_brk = p->lib_list[id].start_brk; + start_data = p->lib_list[id].start_data; + start_code = p->lib_list[id].start_code; + text_len = p->lib_list[id].text_len; + + if (!flat_reloc_valid(r, start_brk - start_data + text_len)) { + printk("BINFMT_FLAT: reloc outside program 0x%x (0 - 0x%x/0x%x)", + (int) r,(int)(start_brk-start_code),(int)text_len); + goto failed; } - /* - * we allow inclusive ranges here so that programs may do things - * like reference the end of data (_end) without failing these tests - */ - addr = r - text_len + current->mm->start_data; - if (addr >= current->mm->start_code && - addr <= current->mm->start_code + text_len) - return(addr); - - if (addr >= current->mm->start_data && - addr <= current->mm->start_brk) - return(addr); - - printk("BINFMT_FLAT: reloc addr outside text/data 0x%x " - "code(0x%x - 0x%x) data(0x%x - 0x%x) killing\n", (int) addr, - (int) current->mm->start_code, - (int) (current->mm->start_code + text_len), - (int) current->mm->start_data, - (int) current->mm->start_brk); + if (r < text_len) /* In text segment */ + addr = r + start_code; + else /* In data segment */ + addr = r - text_len + start_data; + + /* Range checked already above so doing the range tests is redundant...*/ + return(addr); + +failed: + printk(", killing %s!\n", current->comm); send_sig(SIGSEGV, current, 0); - return(current->mm->start_brk); /* return something safe to write to */ + return RELOC_FAILED; } +/****************************************************************************/ void old_reloc(unsigned long rl) { @@ -327,26 +405,22 @@ void old_reloc(unsigned long rl) #endif } +/****************************************************************************/ -/* - * These are the functions used to load flat style executables and shared - * libraries. There is no binary dependent code anywhere else. - */ - -static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs) +static int load_flat_file(struct linux_binprm * bprm, + struct lib_info *libinfo, int id, unsigned long *extra_stack) { struct flat_hdr * hdr; unsigned long textpos = 0, datapos = 0, result; + unsigned long realdatastart = 0; unsigned long text_len, data_len, bss_len, stack_len, flags; - unsigned long memp = 0, memkasked = 0; /* for finding the brk area */ + unsigned long memp = 0; /* for finding the brk area */ unsigned long extra, rlim; - unsigned long p = bprm->p; unsigned long *reloc = 0, *rp; struct inode *inode; int i, rev, relocs = 0; loff_t fpos; - - DBG_FLT("BINFMT_FLAT: Loading file: %x\n", bprm->file); + unsigned long start_code, end_code; hdr = ((struct flat_hdr *) bprm->buf); /* exec-header */ inode = bprm->file->f_dentry->d_inode; @@ -355,41 +429,42 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs) data_len = ntohl(hdr->data_end) - ntohl(hdr->data_start); bss_len = ntohl(hdr->bss_end) - ntohl(hdr->data_end); stack_len = ntohl(hdr->stack_size); + if (extra_stack) { + stack_len += *extra_stack; + *extra_stack = stack_len; + } relocs = ntohl(hdr->reloc_count); flags = ntohl(hdr->flags); rev = ntohl(hdr->rev); - /* - * We have to add the size of our arguments to our stack size - * otherwise it's too easy for users to create stack overflows - * by passing in a huge argument list. And yes, we have to be - * pedantic and include space for the argv/envp array as it may have - * a lot of entries. - */ - #define TOP_OF_ARGS (PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *)) - stack_len += TOP_OF_ARGS - bprm->p; /* the strings */ - stack_len += (bprm->argc + 1) * sizeof(char *); /* the argv array */ - stack_len += (bprm->envc + 1) * sizeof(char *); /* the envp array */ + if (flags & FLAT_FLAG_KTRACE) + printk("BINFMT_FLAT: Loading file: %s\n", bprm->filename); if (strncmp(hdr->magic, "bFLT", 4) || (rev != FLAT_VERSION && rev != OLD_FLAT_VERSION)) { /* * because a lot of people do not manage to produce good * flat binaries, we leave this printk to help them realise - * the problem. We only print the error if it's - * not a script file. + * the problem. We only print the error if its not a script file */ if (strncmp(hdr->magic, "#!", 2)) printk("BINFMT_FLAT: bad magic/rev (0x%x, need 0x%x)\n", rev, (int) FLAT_VERSION); return -ENOEXEC; } + + /* Don't allow old format executables to use shared libraries */ + if (rev == OLD_FLAT_VERSION && id != 0) { + printk("BINFMT_FLAT: shared libraries are not available before rev 0x%x\n", + (int) FLAT_VERSION); + return -ENOEXEC; + } /* * fix up the flags for the older format, there were all kinds * of endian hacks, this only works for the simple cases */ - if (rev == OLD_FLAT_VERSION && flags) + if (rev == OLD_FLAT_VERSION && flat_old_ram_flag(flags)) flags = FLAT_FLAG_RAM; #ifndef CONFIG_BINFMT_ZFLAT @@ -411,15 +486,22 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs) return -ENOMEM; /* Flush all traces of the currently running executable */ - result = flush_old_exec(bprm); - if (result) - return result; + if (id == 0) { + result = flush_old_exec(bprm); + if (result) + return result; + + /* OK, This is the point of no return */ + set_personality(PER_LINUX); + } - /* OK, This is the point of no return */ - set_personality(PER_LINUX); + /* + * calculate the extra space we need to map in + */ + extra = max(bss_len + stack_len, relocs * sizeof(unsigned long)); /* - * there are a couple of cases here, the separate code/data + * there are a couple of cases here, the seperate code/data * case, and then the fully copied to RAM case which lumps * it all together. */ @@ -437,26 +519,27 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs) if (!textpos) textpos = (unsigned long) -ENOMEM; printk("Unable to mmap process text, errno %d\n", (int)-textpos); + return(textpos); } - extra = max(bss_len + stack_len, relocs * sizeof(unsigned long)), - down_write(¤t->mm->mmap_sem); - datapos = do_mmap(0, 0, data_len + extra, + realdatastart = do_mmap(0, 0, data_len + extra + + MAX_SHARED_LIBS * sizeof(unsigned long), PROT_READ|PROT_WRITE|PROT_EXEC, 0, 0); up_write(¤t->mm->mmap_sem); - if (datapos == 0 || datapos >= (unsigned long)-4096) { - if (!datapos) - datapos = (unsigned long) -ENOMEM; + if (realdatastart == 0 || realdatastart >= (unsigned long)-4096) { + if (!realdatastart) + realdatastart = (unsigned long) -ENOMEM; printk("Unable to allocate RAM for process data, errno %d\n", (int)-datapos); do_munmap(current->mm, textpos, text_len); - return datapos; + return realdatastart; } + datapos = realdatastart + MAX_SHARED_LIBS * sizeof(unsigned long); DBG_FLT("BINFMT_FLAT: Allocated data+bss+stack (%d bytes): %x\n", - data_len + bss_len + stack_len, datapos); + (int)(data_len + bss_len + stack_len), (int)datapos); fpos = ntohl(hdr->data_start); #ifdef CONFIG_BINFMT_ZFLAT @@ -466,30 +549,24 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs) } else #endif { - result = bprm->file->f_op->read(bprm->file, - (char *) datapos, data_len + extra, &fpos); + result = bprm->file->f_op->read(bprm->file, (char *) datapos, + data_len + (relocs * sizeof(unsigned long)), &fpos); } if (result >= (unsigned long)-4096) { printk("Unable to read data+bss, errno %d\n", (int)-result); do_munmap(current->mm, textpos, text_len); - do_munmap(current->mm, datapos, data_len + extra); + do_munmap(current->mm, realdatastart, data_len + extra); return result; } reloc = (unsigned long *) (datapos+(ntohl(hdr->reloc_start)-text_len)); - memp = datapos; - memkasked = data_len + extra; + memp = realdatastart; } else { - /* - * calculate the extra space we need to map in - */ - - extra = max(bss_len + stack_len, relocs * sizeof(unsigned long)), - down_write(¤t->mm->mmap_sem); - textpos = do_mmap(0, 0, text_len + data_len + extra, + textpos = do_mmap(0, 0, text_len + data_len + extra + + MAX_SHARED_LIBS * sizeof(unsigned long), PROT_READ | PROT_EXEC | PROT_WRITE, 0, 0); up_write(¤t->mm->mmap_sem); if (!textpos || textpos >= (unsigned long) -4096) { @@ -497,12 +574,14 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs) textpos = (unsigned long) -ENOMEM; printk("Unable to allocate RAM for process text/data, errno %d\n", (int)-textpos); + return(textpos); } - datapos = textpos + ntohl (hdr->data_start); - reloc = (unsigned long *) (textpos + ntohl(hdr->reloc_start)); + realdatastart = textpos + ntohl(hdr->data_start); + datapos = realdatastart + MAX_SHARED_LIBS * sizeof(unsigned long); + reloc = (unsigned long *) (textpos + ntohl(hdr->reloc_start) + + MAX_SHARED_LIBS * sizeof(unsigned long)); memp = textpos; - memkasked = text_len + data_len + extra; #ifdef CONFIG_BINFMT_ZFLAT /* @@ -514,6 +593,8 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs) (text_len + data_len + (relocs * sizeof(unsigned long)) - sizeof (struct flat_hdr)), 0); + memmove((void *) datapos, (void *) realdatastart, + data_len + (relocs * sizeof(unsigned long))); } else if (flags & FLAT_FLAG_GZDATA) { fpos = 0; result = bprm->file->f_op->read(bprm->file, @@ -527,40 +608,64 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs) { fpos = 0; result = bprm->file->f_op->read(bprm->file, - (char *) textpos, text_len + data_len + extra, &fpos); + (char *) textpos, text_len, &fpos); + if (result < (unsigned long) -4096) { + fpos = ntohl(hdr->data_start); + result = bprm->file->f_op->read(bprm->file, (char *) datapos, + data_len + (relocs * sizeof(unsigned long)), &fpos); + } } if (result >= (unsigned long)-4096) { printk("Unable to read code+data+bss, errno %d\n",(int)-result); - do_munmap(current->mm, textpos, text_len + data_len + extra); + do_munmap(current->mm, textpos, text_len + data_len + extra + + MAX_SHARED_LIBS * sizeof(unsigned long)); return result; } } - DBG_FLT("Mapping is %x, Entry point is %x, data_start is %x\n", - textpos, ntohl(hdr->entry), ntohl(hdr->data_start)); - - current->mm->start_code = textpos + sizeof (struct flat_hdr); - current->mm->end_code = textpos + text_len; - current->mm->start_data = datapos; - current->mm->end_data = datapos + data_len; - /* - * set up the brk stuff (uses any slack left in data/bss/stack allocation - * We put the brk after the bss (between the bss and stack) like other - * platforms. - */ - current->mm->start_brk = datapos + data_len + bss_len; - current->mm->brk = (current->mm->start_brk + 3) & ~3; - current->mm->context.end_brk = memp + ksize((void *) memp) - stack_len; - current->mm->rss = 0; + if (flags & FLAT_FLAG_KTRACE) + printk("Mapping is %x, Entry point is %x, data_start is %x\n", + (int)textpos, 0x00ffffff&ntohl(hdr->entry), ntohl(hdr->data_start)); + + /* The main program needs a little extra setup in the task structure */ + start_code = textpos + sizeof (struct flat_hdr); + end_code = textpos + text_len; + if (id == 0) { + current->mm->start_code = start_code; + current->mm->end_code = end_code; + current->mm->start_data = datapos; + current->mm->end_data = datapos + data_len; + /* + * set up the brk stuff, uses any slack left in data/bss/stack + * allocation. We put the brk after the bss (between the bss + * and stack) like other platforms. + */ + current->mm->start_brk = datapos + data_len + bss_len; + current->mm->brk = (current->mm->start_brk + 3) & ~3; + current->mm->context.end_brk = memp + ksize((void *) memp) - stack_len; + current->mm->rss = 0; + } - DBG_FLT("Load %s: TEXT=%x-%x DATA=%x-%x BSS=%x-%x\n", - bprm->filename, - (int) current->mm->start_code, (int) current->mm->end_code, - (int) current->mm->start_data, (int) current->mm->end_data, - (int) current->mm->end_data, (int) current->mm->brk); + if (flags & FLAT_FLAG_KTRACE) + printk("%s %s: TEXT=%x-%x DATA=%x-%x BSS=%x-%x\n", + id ? "Lib" : "Load", bprm->filename, + (int) start_code, (int) end_code, + (int) datapos, + (int) (datapos + data_len), + (int) (datapos + data_len), + (int) (((datapos + data_len + bss_len) + 3) & ~3)); text_len -= sizeof(struct flat_hdr); /* the real code len */ + /* Store the current module values into the global library structure */ + libinfo->lib_list[id].start_code = start_code; + libinfo->lib_list[id].start_data = datapos; + libinfo->lib_list[id].start_brk = datapos + data_len + bss_len; + libinfo->lib_list[id].text_len = text_len; + libinfo->lib_list[id].loaded = 1; + libinfo->lib_list[id].entry = (0x00ffffff & ntohl(hdr->entry)) + textpos; + libinfo->lib_list[id].build_date = ntohl(hdr->build_date); + /* * We just load the allocations into some temporary memory to * help simplify all this mumbo jumbo @@ -573,10 +678,16 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs) * really an offset into the image which contains an offset into the * image. */ - if (flags & FLAT_FLAG_GOTPIC) { - for (rp = (unsigned long *)datapos; *rp != 0xffffffff; rp++) - *rp = calc_reloc(*rp, text_len); + for (rp = (unsigned long *)datapos; *rp != 0xffffffff; rp++) { + unsigned long addr; + if (*rp) { + addr = calc_reloc(*rp, libinfo, id, 0); + if (addr == RELOC_FAILED) + return -ENOEXEC; + *rp = addr; + } + } } /* @@ -590,63 +701,172 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs) * reference to be statically initialised to _stext (I've moved * __start to address 4 so that is okay). */ - if (rev > OLD_FLAT_VERSION) { for (i=0; i < relocs; i++) { - unsigned long addr; + unsigned long addr, relval; /* Get the address of the pointer to be relocated (of course, the address has to be relocated first). */ - rp = (unsigned long *) calc_reloc(ntohl(reloc[i]), text_len); + relval = ntohl(reloc[i]); + addr = flat_get_relocate_addr(relval); + rp = (unsigned long *) calc_reloc(addr, libinfo, id, 1); + if (rp == (unsigned long *)RELOC_FAILED) + return -ENOEXEC; /* Get the pointer's value. */ - addr = get_unaligned (rp); - + addr = flat_get_addr_from_rp(rp, relval); if (addr != 0) { /* * Do the relocation. PIC relocs in the data section are * already in target order */ - addr = calc_reloc( - (flags & FLAT_FLAG_GOTPIC) ? addr : ntohl(addr), - text_len); + if ((flags & FLAT_FLAG_GOTPIC) == 0) + addr = ntohl(addr); + addr = calc_reloc(addr, libinfo, id, 0); + if (addr == RELOC_FAILED) + return -ENOEXEC; + /* Write back the relocated pointer. */ - put_unaligned (addr, rp); + flat_put_addr_at_rp(rp, addr, relval); } } } else { for (i=0; i < relocs; i++) old_reloc(ntohl(reloc[i])); } + + flush_icache_range(start_code, end_code); /* zero the BSS, BRK and stack areas */ memset((void*)(datapos + data_len), 0, bss_len + - (current->mm->context.end_brk - current->mm->start_brk) + + (memp + ksize((void *) memp) - stack_len - /* end brk */ + libinfo->lib_list[id].start_brk) + /* start brk */ stack_len); + return 0; +} + + +/****************************************************************************/ +#ifdef CONFIG_BINFMT_SHARED_FLAT + +/* + * Load a shared library into memory. The library gets its own data + * segment (including bss) but not argv/argc/environ. + */ + +static int load_flat_shared_library(int id, struct lib_info *libs) +{ + struct linux_binprm bprm; + int res; + char buf[16]; + + /* Create the file name */ + sprintf(buf, "/lib/lib%d.so", id); + + /* Open the file up */ + bprm.filename = buf; + bprm.file = open_exec(bprm.filename); + res = PTR_ERR(bprm.file); + if (IS_ERR(bprm.file)) + return res; + + res = prepare_binprm(&bprm); + + if (res <= (unsigned long)-4096) + res = load_flat_file(&bprm, libs, id, NULL); + if (bprm.file) { + allow_write_access(bprm.file); + fput(bprm.file); + bprm.file = NULL; + } + return(res); +} + +#endif /* CONFIG_BINFMT_SHARED_FLAT */ +/****************************************************************************/ + +/* + * These are the functions used to load flat style executables and shared + * libraries. There is no binary dependent code anywhere else. + */ + +static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs) +{ + struct lib_info libinfo; + unsigned long p = bprm->p; + unsigned long stack_len; + unsigned long start_addr; + unsigned long *sp; + int res; + int i, j; + + memset(&libinfo, 0, sizeof(libinfo)); + /* + * We have to add the size of our arguments to our stack size + * otherwise it's too easy for users to create stack overflows + * by passing in a huge argument list. And yes, we have to be + * pedantic and include space for the argv/envp array as it may have + * a lot of entries. + */ +#define TOP_OF_ARGS (PAGE_SIZE * MAX_ARG_PAGES - sizeof(void *)) + stack_len = TOP_OF_ARGS - bprm->p; /* the strings */ + stack_len += (bprm->argc + 1) * sizeof(char *); /* the argv array */ + stack_len += (bprm->envc + 1) * sizeof(char *); /* the envp array */ + + + res = load_flat_file(bprm, &libinfo, 0, &stack_len); + if (res > (unsigned long)-4096) + return res; + + /* Update data segment pointers for all libraries */ + for (i=0; iflags &= ~PF_FORKNOEXEC; - flush_icache_range(current->mm->start_code, current->mm->end_code); - set_binfmt(&flat_format); p = ((current->mm->context.end_brk + stack_len + 3) & ~3) - 4; - DBG_FLT("p=%x\n", p); + DBG_FLT("p=%x\n", (int)p); /* copy the arg pages onto the stack, this could be more efficient :-) */ for (i = TOP_OF_ARGS - 1; i >= bprm->p; i--) * (char *) --p = ((char *) page_address(bprm->page[i/PAGE_SIZE]))[i % PAGE_SIZE]; - current->mm->start_stack = (unsigned long) create_flat_tables(p, bprm); + sp = (unsigned long *) create_flat_tables(p, bprm); + + /* Fake some return addresses to ensure the call chain will + * initialise library in order for us. We are required to call + * lib 1 first, then 2, ... and finally the main program (id 0). + */ + start_addr = libinfo.lib_list[0].entry; + +#ifdef CONFIG_BINFMT_SHARED_FLAT + for (i = MAX_SHARED_LIBS-1; i>0; i--) { + if (libinfo.lib_list[i].loaded) { + /* Push previos first to call address */ + --sp; put_user(start_addr, sp); + start_addr = libinfo.lib_list[i].entry; + } + } +#endif + + /* Stash our initial stack pointer into the mm structure */ + current->mm->start_stack = (unsigned long )sp; + DBG_FLT("start_thread(regs=0x%x, entry=0x%x, start_stack=0x%x)\n", - regs, textpos + ntohl(hdr->entry), current->mm->start_stack); - start_thread(regs, - textpos + ntohl(hdr->entry), - current->mm->start_stack); + (int)regs, (int)start_addr, (int)current->mm->start_stack); + + start_thread(regs, start_addr, current->mm->start_stack); if (current->ptrace & PT_PTRACED) send_sig(SIGTRAP, current, 0); @@ -654,10 +874,7 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs) return 0; } -static int load_flat_library(struct file *file) -{ - return(-ENOEXEC); -} +/****************************************************************************/ static int __init init_flat_binfmt(void) { @@ -669,5 +886,9 @@ static void __exit exit_flat_binfmt(void) unregister_binfmt(&flat_format); } +/****************************************************************************/ + module_init(init_flat_binfmt); module_exit(exit_flat_binfmt); + +/****************************************************************************/ -- cgit v1.2.3 From 2253b09ed84acda70967c6851326c0cb8fa40143 Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Sun, 6 Jul 2003 05:24:20 -0700 Subject: [PATCH] flat loader H8/300 specific support abstracted Architecture specific flat loader code for H8/300 moved into its own H8/300 flat.h header. --- include/asm-h8300/flat.h | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/include/asm-h8300/flat.h b/include/asm-h8300/flat.h index ac3f50610f54..61d9aaf7e0b2 100644 --- a/include/asm-h8300/flat.h +++ b/include/asm-h8300/flat.h @@ -5,6 +5,21 @@ #ifndef __H8300_FLAT_H__ #define __H8300_FLAT_H__ -#define flat_argvp_envp_on_stack() 1 +#define flat_stack_align(sp) /* nothing needed */ +#define flat_argvp_envp_on_stack() 1 +#define flat_old_ram_flag(flags) 1 +#define flat_reloc_valid(reloc, size) ((reloc) <= (size)) + +/* + * on the H8 a couple of the relocations have an instruction in the + * top byte. As there can only be 24bits of address space, we just + * always preserve that 8bits at the top, when it isn't an instruction + * is is 0 (davidm@snapgear.com) + */ + +#define flat_get_relocate_addr(rel) (rel) +#define flat_get_addr_from_rp(rp, relval) (get_unaligned(rp) & 0x0ffffff) +#define flat_put_addr_at_rp(rp, addr, rel) \ + put_unaligned (((*(char *)(rp)) << 24) | ((addr) & 0x00ffffff), rp) #endif /* __H8300_FLAT_H__ */ -- cgit v1.2.3 From cb04237beb18b54fa8d3183aacea49270369a671 Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Sun, 6 Jul 2003 05:24:52 -0700 Subject: [PATCH] flat loader m68knommu specific support abstracted Architecture specific flat loader code for m68knommu moved into its own m68knommu flat.h header. Part of the shared library flat loader update. --- include/asm-m68knommu/flat.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/include/asm-m68knommu/flat.h b/include/asm-m68knommu/flat.h index fe805aaa33df..73712f276cc0 100644 --- a/include/asm-m68knommu/flat.h +++ b/include/asm-m68knommu/flat.h @@ -5,6 +5,12 @@ #ifndef __M68KNOMMU_FLAT_H__ #define __M68KNOMMU_FLAT_H__ -#define flat_argvp_envp_on_stack() 1 +#define flat_stack_align(sp) /* nothing needed */ +#define flat_argvp_envp_on_stack() 1 +#define flat_old_ram_flag(flags) (flags) +#define flat_reloc_valid(reloc, size) ((reloc) <= (size)) +#define flat_get_addr_from_rp(rp, relval) get_unaligned(rp) +#define flat_put_addr_at_rp(rp, val, relval) put_unaligned(val,rp) +#define flat_get_relocate_addr(rel) (rel) #endif /* __M68KNOMMU_FLAT_H__ */ -- cgit v1.2.3 From 40d98fd3062713d859922203d990a0384bdb6dd6 Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Sun, 6 Jul 2003 05:25:03 -0700 Subject: [PATCH] flat loader v850 specific support abstracted Architecture specific flat loader code for v850 moved into its own v850 flat.h header. This patch also adds supporti for a number of relocation cases that need to be handled at laod time. Most of this code is originally from Miles Bader . --- include/asm-v850/flat.h | 117 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 114 insertions(+), 3 deletions(-) diff --git a/include/asm-v850/flat.h b/include/asm-v850/flat.h index 94a42a41e843..17106c894ab7 100644 --- a/include/asm-v850/flat.h +++ b/include/asm-v850/flat.h @@ -1,8 +1,8 @@ /* * include/asm-v850/flat.h -- uClinux flat-format executables * - * Copyright (C) 2002 NEC Corporation - * Copyright (C) 2002 Miles Bader + * Copyright (C) 2002,03 NEC Electronics Corporation + * Copyright (C) 2002,03 Miles Bader * * This file is subject to the terms and conditions of the GNU General * Public License. See the file COPYING in the main directory of this @@ -14,6 +14,117 @@ #ifndef __V850_FLAT_H__ #define __V850_FLAT_H__ -#define flat_argvp_envp_on_stack() 0 +/* The amount by which a relocation can exceed the program image limits + without being regarded as an error. On the v850, the relocations of + some base-pointers can be offset by 0x8000 (to allow better usage of the + space offered by 16-bit signed offsets -- in most cases the offsets used + with such a base-pointer will be negative). */ + +#define flat_reloc_valid(reloc, size) ((reloc) <= (size + 0x8000)) + +#define flat_stack_align(sp) /* nothing needed */ +#define flat_argvp_envp_on_stack() 0 +#define flat_old_ram_flag(flags) (flags) + +/* We store the type of relocation in the top 4 bits of the `relval.' */ + +/* Convert a relocation entry into an address. */ +static inline unsigned long +flat_get_relocate_addr (unsigned long relval) +{ + return relval & 0x0fffffff; /* Mask out top 4-bits */ +} + +#define flat_v850_get_reloc_type(relval) ((relval) >> 28) + +#define FLAT_V850_R_32 0 /* Normal 32-bit reloc */ +#define FLAT_V850_R_HI16S_LO15 1 /* High 16-bits + signed 15-bit low field */ +#define FLAT_V850_R_HI16S_LO16 2 /* High 16-bits + signed 16-bit low field */ + +/* Extract the address to be relocated from the symbol reference at RP; + RELVAL is the raw relocation-table entry from which RP is derived. + For the v850, RP should always be half-word aligned. */ +static inline unsigned long flat_get_addr_from_rp (unsigned long *rp, + unsigned long relval) +{ + short *srp = (short *)rp; + + switch (flat_v850_get_reloc_type (relval)) + { + case FLAT_V850_R_32: + /* Simple 32-bit address. */ + return srp[0] | (srp[1] << 16); + + case FLAT_V850_R_HI16S_LO16: + /* The high and low halves of the address are in the 16 + bits at RP, and the 2nd word of the 32-bit instruction + following that, respectively. The low half is _signed_ + so we have to sign-extend it and add it to the upper + half instead of simply or-ing them together. + + Unlike most relocated address, this one is stored in + native (little-endian) byte-order to avoid problems with + trashing the low-order bit, so we have to convert to + network-byte-order before returning, as that's what the + caller expects. */ + return htonl ((srp[0] << 16) + srp[2]); + + case FLAT_V850_R_HI16S_LO15: + /* The high and low halves of the address are in the 16 + bits at RP, and the upper 15 bits of the 2nd word of the + 32-bit instruction following that, respectively. The + low half is _signed_ so we have to sign-extend it and + add it to the upper half instead of simply or-ing them + together. The lowest bit is always zero. + + Unlike most relocated address, this one is stored in + native (little-endian) byte-order to avoid problems with + trashing the low-order bit, so we have to convert to + network-byte-order before returning, as that's what the + caller expects. */ + return htonl ((srp[0] << 16) + (srp[2] & ~0x1)); + + default: + return ~0; /* bogus value */ + } +} + +/* Insert the address ADDR into the symbol reference at RP; + RELVAL is the raw relocation-table entry from which RP is derived. + For the v850, RP should always be half-word aligned. */ +static inline void flat_put_addr_at_rp (unsigned long *rp, unsigned long addr, + unsigned long relval) +{ + short *srp = (short *)rp; + + switch (flat_v850_get_reloc_type (relval)) { + case FLAT_V850_R_32: + /* Simple 32-bit address. */ + srp[0] = addr & 0xFFFF; + srp[1] = (addr >> 16); + break; + + case FLAT_V850_R_HI16S_LO16: + /* The high and low halves of the address are in the 16 + bits at RP, and the 2nd word of the 32-bit instruction + following that, respectively. The low half is _signed_ + so we must carry its sign bit to the upper half before + writing the upper half. */ + srp[0] = (addr >> 16) + ((addr >> 15) & 0x1); + srp[2] = addr & 0xFFFF; + break; + + case FLAT_V850_R_HI16S_LO15: + /* The high and low halves of the address are in the 16 + bits at RP, and the upper 15 bits of the 2nd word of the + 32-bit instruction following that, respectively. The + low half is _signed_ so we must carry its sign bit to + the upper half before writing the upper half. The + lowest bit we preserve from the existing instruction. */ + srp[0] = (addr >> 16) + ((addr >> 15) & 0x1); + srp[2] = (addr & 0xFFFE) | (srp[2] & 0x1); + break; + } +} #endif /* __V850_FLAT_H__ */ -- cgit v1.2.3 From 1e8bb0c57f53e35bef53e8588f46d8daaf9c2188 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 6 Jul 2003 05:28:36 -0700 Subject: Fix several broken macros to get the "private" field of a seq-file in the networking code. From YOSHIFUJI Hideaki --- net/ipv4/igmp.c | 4 ++-- net/ipv4/raw.c | 2 +- net/ipv6/anycast.c | 2 +- net/ipv6/ip6_flowlabel.c | 2 +- net/ipv6/mcast.c | 4 ++-- net/ipv6/raw.c | 2 +- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 9dd6fa44045f..0f7515214803 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -2099,7 +2099,7 @@ struct igmp_mc_iter_state { struct in_device *in_dev; }; -#define igmp_mc_seq_private(seq) ((struct igmp_mc_iter_state *)&seq->private) +#define igmp_mc_seq_private(seq) ((struct igmp_mc_iter_state *)(seq)->private) static inline struct ip_mc_list *igmp_mc_get_first(struct seq_file *seq) { @@ -2254,7 +2254,7 @@ struct igmp_mcf_iter_state { struct ip_mc_list *im; }; -#define igmp_mcf_seq_private(seq) ((struct igmp_mcf_iter_state *)&seq->private) +#define igmp_mcf_seq_private(seq) ((struct igmp_mcf_iter_state *)(seq)->private) static inline struct ip_sf_list *igmp_mcf_get_first(struct seq_file *seq) { diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index cb939b4fad05..76dc609a793b 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -687,7 +687,7 @@ struct raw_iter_state { int bucket; }; -#define raw_seq_private(seq) ((struct raw_iter_state *)&seq->private) +#define raw_seq_private(seq) ((struct raw_iter_state *)(seq)->private) static struct sock *raw_get_first(struct seq_file *seq) { diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index ff77920a52c3..1dc5b9da2e2b 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -441,7 +441,7 @@ struct ac6_iter_state { struct inet6_dev *idev; }; -#define ac6_seq_private(seq) ((struct ac6_iter_state *)&seq->private) +#define ac6_seq_private(seq) ((struct ac6_iter_state *)(seq)->private) static inline struct ifacaddr6 *ac6_get_first(struct seq_file *seq) { diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 7211838a4546..c26b90ab5717 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -559,7 +559,7 @@ struct ip6fl_iter_state { int bucket; }; -#define ip6fl_seq_private(seq) ((struct ip6fl_iter_state *)&(seq)->private) +#define ip6fl_seq_private(seq) ((struct ip6fl_iter_state *)(seq)->private) static struct ip6_flowlabel *ip6fl_get_first(struct seq_file *seq) { diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index f286ae429a2c..829832275533 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -2045,7 +2045,7 @@ struct igmp6_mc_iter_state { struct inet6_dev *idev; }; -#define igmp6_mc_seq_private(seq) ((struct igmp6_mc_iter_state *)&seq->private) +#define igmp6_mc_seq_private(seq) ((struct igmp6_mc_iter_state *)(seq)->private) static inline struct ifmcaddr6 *igmp6_mc_get_first(struct seq_file *seq) { @@ -2185,7 +2185,7 @@ struct igmp6_mcf_iter_state { struct ifmcaddr6 *im; }; -#define igmp6_mcf_seq_private(seq) ((struct igmp6_mcf_iter_state *)&seq->private) +#define igmp6_mcf_seq_private(seq) ((struct igmp6_mcf_iter_state *)(seq)->private) static inline struct ip6_sf_list *igmp6_mcf_get_first(struct seq_file *seq) { diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index d1b44f4b8134..da0977558fc2 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -913,7 +913,7 @@ struct raw6_iter_state { int bucket; }; -#define raw6_seq_private(seq) ((struct raw6_iter_state *)&seq->private) +#define raw6_seq_private(seq) ((struct raw6_iter_state *)(seq)->private) static struct sock *raw6_get_first(struct seq_file *seq) { -- cgit v1.2.3 From 8ffcb67a9b54e5cf37ecc90b01f906b5f83920fb Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Sun, 6 Jul 2003 05:40:14 -0700 Subject: [PATCH] use task_cpu() not ->thread_info->cpu in sched.c From: Mikael Pettersson This patch fixes two p->thread_info->cpu occurrences in kernel/sched.c to use the task_cpu(p) macro instead, which is optimised on UP. Although one of the occurrences is under #ifdef CONFIG_SMP, it's bad style to use the raw non-optimisable form in non-arch code. --- kernel/sched.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/sched.c b/kernel/sched.c index bb552059577d..4f3d1fa42669 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -508,8 +508,8 @@ repeat_lock_task: } #ifdef CONFIG_SMP else - if (unlikely(kick) && task_running(rq, p) && (p->thread_info->cpu != smp_processor_id())) - smp_send_reschedule(p->thread_info->cpu); + if (unlikely(kick) && task_running(rq, p) && (task_cpu(p) != smp_processor_id())) + smp_send_reschedule(task_cpu(p)); #endif p->state = TASK_RUNNING; } @@ -1332,7 +1332,7 @@ pick_next_task: switch_tasks: prefetch(next); clear_tsk_need_resched(prev); - RCU_qsctr(prev->thread_info->cpu)++; + RCU_qsctr(task_cpu(prev))++; if (likely(prev != next)) { rq->nr_switches++; -- cgit v1.2.3 From 114e04f27a009b3ecb17b00ab51a367c3007097c Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Sun, 6 Jul 2003 05:40:21 -0700 Subject: [PATCH] misc fixes - xfs printk warning fix (dev_t is ulong on ppc64) - unused var in serial_remove() (Daniele Bellucci ) --- drivers/serial/8250_cs.c | 2 +- fs/xfs/linux/xfs_super.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/serial/8250_cs.c b/drivers/serial/8250_cs.c index b1e012cb5d3d..a70c3ddfadfa 100644 --- a/drivers/serial/8250_cs.c +++ b/drivers/serial/8250_cs.c @@ -133,7 +133,7 @@ static dev_link_t *dev_list = NULL; static void serial_remove(dev_link_t *link) { struct serial_info *info = link->priv; - int i, ret; + int i; link->state &= ~DEV_PRESENT; diff --git a/fs/xfs/linux/xfs_super.c b/fs/xfs/linux/xfs_super.c index 0df0d915865f..d482cb69fade 100644 --- a/fs/xfs/linux/xfs_super.c +++ b/fs/xfs/linux/xfs_super.c @@ -265,8 +265,8 @@ xfs_setsize_buftarg( if (set_blocksize(btp->pbr_bdev, sectorsize)) { printk(KERN_WARNING - "XFS: Cannot set_blocksize to %u on device 0x%x\n", - sectorsize, btp->pbr_dev); + "XFS: Cannot set_blocksize to %u on device 0x%lx\n", + sectorsize, (unsigned long)btp->pbr_dev); } } -- cgit v1.2.3 From 0b453423a125f0d02e7339912ed6fc2321420263 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Sun, 6 Jul 2003 05:40:27 -0700 Subject: [PATCH] breadahead() tweaks - use ll_rw_block(). - use READA - export it to modules. --- fs/buffer.c | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/fs/buffer.c b/fs/buffer.c index 994bfbc41e73..d3fbedea7a1a 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1454,24 +1454,13 @@ EXPORT_SYMBOL(__getblk); /* * Do async read-ahead on a buffer.. */ -void -__breadahead(struct block_device *bdev, sector_t block, int size) +void __breadahead(struct block_device *bdev, sector_t block, int size) { struct buffer_head *bh = __getblk(bdev, block, size); - if (!test_set_buffer_locked(bh)) { - if (!buffer_uptodate(bh)) { - /* - * This eats the bh count from __getblk() and - * unlocks when the read is done. - */ - bh->b_end_io = end_buffer_io_sync; - submit_bh(READ, bh); - return; - } - unlock_buffer(bh); - } + ll_rw_block(READA, 1, &bh); brelse(bh); } +EXPORT_SYMBOL(__breadahead); /** * __bread() - reads a specified block and returns the bh -- cgit v1.2.3 From c3087712a6489b05cfaa2e5360e3f5b111eab56e Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Sun, 6 Jul 2003 05:40:35 -0700 Subject: [PATCH] proc_attr_lookup() fix From: Daniele Belluci proc_attr_lookup() was missed out in Trond's conversion. (It is behind CONFIG_SECURITY). --- fs/proc/base.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index 2c8d50e98d48..485ff692e87f 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1245,7 +1245,9 @@ static struct file_operations proc_attr_operations = { .readdir = proc_attr_readdir, }; -static struct dentry *proc_attr_lookup(struct inode *dir, struct dentry *dentry){ +static struct dentry *proc_attr_lookup(struct inode *dir, + struct dentry *dentry, struct nameidata *nd) +{ return proc_pident_lookup(dir, dentry, attr_stuff); } -- cgit v1.2.3 From 574dd5965a0bfeb60d46de8b3b97a8d78784f145 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Sun, 6 Jul 2003 05:40:42 -0700 Subject: [PATCH] xattr: cleanups From: From: Andreas Gruenbacher * Various minor cleanups and simplifications in the extended attributes and acl code. * Use a smarter shortcut rule in ext[23]_permission(): If the mask contains permissions that are not also contained in the group file mode permission bits, those permissions can never be granted by an acl. (The previous shortcut rule was more coarse.) --- fs/ext2/acl.c | 85 +++++++++++++++++++++++++++++++++---------------------- fs/ext2/xattr.c | 55 +++++++++++++++++------------------- fs/ext3/acl.c | 88 ++++++++++++++++++++++++++++++++++----------------------- fs/ext3/xattr.c | 49 +++++++++++++++----------------- 4 files changed, 153 insertions(+), 124 deletions(-) diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c index 4db56bdd8fe5..bc95b66ff023 100644 --- a/fs/ext2/acl.c +++ b/fs/ext2/acl.c @@ -1,7 +1,7 @@ /* * linux/fs/ext2/acl.c * - * Copyright (C) 2001 by Andreas Gruenbacher, + * Copyright (C) 2001-2003 Andreas Gruenbacher, */ #include @@ -19,7 +19,7 @@ static struct posix_acl * ext2_acl_from_disk(const void *value, size_t size) { const char *end = (char *)value + size; - int n, count; + size_t n, count; struct posix_acl *acl; if (!value) @@ -85,7 +85,7 @@ ext2_acl_to_disk(const struct posix_acl *acl, size_t *size) { ext2_acl_header *ext_acl; char *e; - int n; + size_t n; *size = ext2_acl_size(acl->a_count); ext_acl = (ext2_acl_header *)kmalloc(sizeof(ext2_acl_header) + @@ -130,10 +130,11 @@ fail: static struct posix_acl * ext2_get_acl(struct inode *inode, int type) { + const size_t max_size = ext2_acl_size(EXT2_ACL_MAX_ENTRIES); + struct ext2_inode_inode *ei = EXT2_I(inode); int name_index; char *value; - struct posix_acl *acl, **p_acl; - const size_t size = ext2_acl_size(EXT2_ACL_MAX_ENTRIES); + struct posix_acl *acl; int retval; if (!test_opt(inode->i_sb, POSIX_ACL)) @@ -141,36 +142,43 @@ ext2_get_acl(struct inode *inode, int type) switch(type) { case ACL_TYPE_ACCESS: - p_acl = &EXT2_I(inode)->i_acl; + if (ei->i_acl != EXT2_ACL_NOT_CACHED) + return posix_acl_dup(ei->i_acl); name_index = EXT2_XATTR_INDEX_POSIX_ACL_ACCESS; break; case ACL_TYPE_DEFAULT: - p_acl = &EXT2_I(inode)->i_default_acl; + if (ei->i_default_acl != EXT2_ACL_NOT_CACHED) + return posix_acl_dup(ei->i_default_acl); name_index = EXT2_XATTR_INDEX_POSIX_ACL_DEFAULT; break; default: return ERR_PTR(-EINVAL); } - if (*p_acl != EXT2_ACL_NOT_CACHED) - return posix_acl_dup(*p_acl); - value = kmalloc(size, GFP_KERNEL); + value = kmalloc(max_size, GFP_KERNEL); if (!value) return ERR_PTR(-ENOMEM); - retval = ext2_xattr_get(inode, name_index, "", value, size); - - if (retval == -ENODATA || retval == -ENOSYS) - *p_acl = acl = NULL; - else if (retval < 0) - acl = ERR_PTR(retval); - else { + retval = ext2_xattr_get(inode, name_index, "", value, max_size); + acl = ERR_PTR(retval); + if (retval >= 0) acl = ext2_acl_from_disk(value, retval); - if (!IS_ERR(acl)) - *p_acl = posix_acl_dup(acl); - } + else if (retval == -ENODATA || retval == -ENOSYS) + acl = NULL; kfree(value); + + if (!IS_ERR(acl)) { + switch(type) { + case ACL_TYPE_ACCESS: + ei->i_acl = posix_acl_dup(acl); + break; + + case ACL_TYPE_DEFAULT: + ei->i_default_acl = posix_acl_dup(acl); + break; + } + } return acl; } @@ -180,9 +188,9 @@ ext2_get_acl(struct inode *inode, int type) static int ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl) { + struct ext2_inode_info *ei = EXT2_I(inode); int name_index; void *value = NULL; - struct posix_acl **p_acl; size_t size; int error; @@ -194,7 +202,6 @@ ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl) switch(type) { case ACL_TYPE_ACCESS: name_index = EXT2_XATTR_INDEX_POSIX_ACL_ACCESS; - p_acl = &EXT2_I(inode)->i_acl; if (acl) { mode_t mode = inode->i_mode; error = posix_acl_equiv_mode(acl, &mode); @@ -211,7 +218,6 @@ ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl) case ACL_TYPE_DEFAULT: name_index = EXT2_XATTR_INDEX_POSIX_ACL_DEFAULT; - p_acl = &EXT2_I(inode)->i_default_acl; if (!S_ISDIR(inode->i_mode)) return acl ? -EACCES : 0; break; @@ -232,9 +238,19 @@ ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl) if (value) kfree(value); if (!error) { - if (*p_acl && *p_acl != EXT2_ACL_NOT_CACHED) - posix_acl_release(*p_acl); - *p_acl = posix_acl_dup(acl); + switch(type) { + case ACL_TYPE_ACCESS: + if (ei->i_acl != EXT2_ACL_NOT_CACHED) + posix_acl_release(ei->i_acl); + ei->i_acl = posix_acl_dup(acl); + break; + + case ACL_TYPE_DEFAULT: + if (ei->i_default_acl != EXT2_ACL_NOT_CACHED) + posix_acl_release(ei->i_default_acl); + ei->i_default_acl = posix_acl_dup(acl); + break; + } } return error; } @@ -254,11 +270,13 @@ __ext2_permission(struct inode *inode, int mask, int lock) if (current->fsuid == inode->i_uid) { mode >>= 6; } else if (test_opt(inode->i_sb, POSIX_ACL)) { - /* ACL can't contain additional permissions if - the ACL_MASK entry is 0 */ - if (!(mode & S_IRWXG)) + struct ext2_inode_info *ei = EXT2_I(inode); + + /* The access ACL cannot grant access if the group class + permission bits don't contain all requested permissions. */ + if (((mode >> 3) & mask & S_IRWXO) != mask) goto check_groups; - if (EXT2_I(inode)->i_acl == EXT2_ACL_NOT_CACHED) { + if (ei->i_acl == EXT2_ACL_NOT_CACHED) { struct posix_acl *acl; if (lock) { @@ -271,12 +289,11 @@ __ext2_permission(struct inode *inode, int mask, int lock) if (IS_ERR(acl)) return PTR_ERR(acl); posix_acl_release(acl); - if (EXT2_I(inode)->i_acl == EXT2_ACL_NOT_CACHED) + if (ei->i_acl == EXT2_ACL_NOT_CACHED) return -EIO; } - if (EXT2_I(inode)->i_acl) { - int error = posix_acl_permission(inode, - EXT2_I(inode)->i_acl, mask); + if (ei->i_acl) { + int error = posix_acl_permission(inode, ei->i_acl,mask); if (error == -EACCES) goto check_capabilities; return error; diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c index ed2d1d4e6671..64edd0e25ee3 100644 --- a/fs/ext2/xattr.c +++ b/fs/ext2/xattr.c @@ -1,7 +1,7 @@ /* * linux/fs/ext2/xattr.c * - * Copyright (C) 2001 by Andreas Gruenbacher, + * Copyright (C) 2001-2003 Andreas Gruenbacher * * Fix by Harrison Xing . * Extended attributes for symlinks and special files added per @@ -83,8 +83,9 @@ EXPORT_SYMBOL(ext2_xattr_set); } while (0) # define ea_bdebug(bh, f...) do { \ char b[BDEVNAME_SIZE]; \ - printk(KERN_DEBUG "block %s:%ld: ", \ - bdevname(bh->b_bdev, b), bh->b_blocknr); \ + printk(KERN_DEBUG "block %s:%lu: ", \ + bdevname(bh->b_bdev, b), \ + (unsigned long) bh->b_blocknr); \ printk(f); \ printk("\n"); \ } while (0) @@ -196,7 +197,6 @@ ext2_xattr_handler(int name_index) * Inode operation getxattr() * * dentry->d_inode->i_sem down - * BKL held [before 2.5.x] */ ssize_t ext2_getxattr(struct dentry *dentry, const char *name, @@ -215,7 +215,6 @@ ext2_getxattr(struct dentry *dentry, const char *name, * Inode operation listxattr() * * dentry->d_inode->i_sem down - * BKL held [before 2.5.x] */ ssize_t ext2_listxattr(struct dentry *dentry, char *buffer, size_t size) @@ -227,7 +226,6 @@ ext2_listxattr(struct dentry *dentry, char *buffer, size_t size) * Inode operation setxattr() * * dentry->d_inode->i_sem down - * BKL held [before 2.5.x] */ int ext2_setxattr(struct dentry *dentry, const char *name, @@ -248,7 +246,6 @@ ext2_setxattr(struct dentry *dentry, const char *name, * Inode operation removexattr() * * dentry->d_inode->i_sem down - * BKL held [before 2.5.x] */ int ext2_removexattr(struct dentry *dentry, const char *name) @@ -278,9 +275,9 @@ ext2_xattr_get(struct inode *inode, int name_index, const char *name, { struct buffer_head *bh = NULL; struct ext2_xattr_entry *entry; - unsigned int size; + size_t name_len, size; char *end; - int name_len, error; + int error; ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld", name_index, name, buffer, (long)buffer_size); @@ -376,7 +373,7 @@ ext2_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) { struct buffer_head *bh = NULL; struct ext2_xattr_entry *entry; - unsigned int size = 0; + size_t size = 0; char *buf, *end; int error; @@ -482,8 +479,8 @@ ext2_xattr_set(struct inode *inode, int name_index, const char *name, struct buffer_head *bh = NULL; struct ext2_xattr_header *header = NULL; struct ext2_xattr_entry *here, *last; - unsigned int name_len; - int min_offs = sb->s_blocksize, not_found = 1, free, error; + size_t name_len, free, min_offs = sb->s_blocksize; + int not_found = 1, error; char *end; /* @@ -540,7 +537,7 @@ bad_block: ext2_error(sb, "ext2_xattr_set", if ((char *)next >= end) goto bad_block; if (!here->e_value_block && here->e_value_size) { - int offs = le16_to_cpu(here->e_value_offs); + size_t offs = le16_to_cpu(here->e_value_offs); if (offs < min_offs) min_offs = offs; } @@ -560,7 +557,7 @@ bad_block: ext2_error(sb, "ext2_xattr_set", if ((char *)next >= end) goto bad_block; if (!last->e_value_block && last->e_value_size) { - int offs = le16_to_cpu(last->e_value_offs); + size_t offs = le16_to_cpu(last->e_value_offs); if (offs < min_offs) min_offs = offs; } @@ -584,25 +581,23 @@ bad_block: ext2_error(sb, "ext2_xattr_set", error = 0; if (value == NULL) goto cleanup; - else - free -= EXT2_XATTR_LEN(name_len); } else { /* Request to create an existing attribute? */ error = -EEXIST; if (flags & XATTR_CREATE) goto cleanup; if (!here->e_value_block && here->e_value_size) { - unsigned int size = le32_to_cpu(here->e_value_size); + size_t size = le32_to_cpu(here->e_value_size); if (le16_to_cpu(here->e_value_offs) + size > sb->s_blocksize || size > sb->s_blocksize) goto bad_block; free += EXT2_XATTR_SIZE(size); } + free += EXT2_XATTR_LEN(name_len); } - free -= EXT2_XATTR_SIZE(value_len); error = -ENOSPC; - if (free < 0) + if (free < EXT2_XATTR_LEN(name_len) + EXT2_XATTR_SIZE(value_len)) goto cleanup; /* Here we know that we can set the new attribute. */ @@ -640,8 +635,8 @@ bad_block: ext2_error(sb, "ext2_xattr_set", if (not_found) { /* Insert the new name. */ - int size = EXT2_XATTR_LEN(name_len); - int rest = (char *)last - (char *)here; + size_t size = EXT2_XATTR_LEN(name_len); + size_t rest = (char *)last - (char *)here; memmove((char *)here + size, here, rest); memset(here, 0, size); here->e_name_index = name_index; @@ -651,7 +646,7 @@ bad_block: ext2_error(sb, "ext2_xattr_set", /* Remove the old value. */ if (!here->e_value_block && here->e_value_size) { char *first_val = (char *)header + min_offs; - int offs = le16_to_cpu(here->e_value_offs); + size_t offs = le16_to_cpu(here->e_value_offs); char *val = (char *)header + offs; size_t size = EXT2_XATTR_SIZE( le32_to_cpu(here->e_value_size)); @@ -663,7 +658,7 @@ bad_block: ext2_error(sb, "ext2_xattr_set", /* Adjust all value offsets. */ last = ENTRY(header+1); while (!IS_LAST_ENTRY(last)) { - int o = le16_to_cpu(last->e_value_offs); + size_t o = le16_to_cpu(last->e_value_offs); if (!last->e_value_block && o < offs) last->e_value_offs = cpu_to_le16(o + size); @@ -678,7 +673,7 @@ bad_block: ext2_error(sb, "ext2_xattr_set", goto cleanup; } else { /* Remove the old name. */ - int size = EXT2_XATTR_LEN(name_len); + size_t size = EXT2_XATTR_LEN(name_len); last = ENTRY((char *)last - size); memmove(here, (char*)here + size, (char*)last - (char*)here); @@ -732,9 +727,9 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh, * The old block will be released after updating * the inode. */ - ea_bdebug(new_bh, "%s block %ld", + ea_bdebug(new_bh, "%s block %lu", (old_bh == new_bh) ? "keeping" : "reusing", - new_bh->b_blocknr); + (unsigned long) new_bh->b_blocknr); error = -EDQUOT; if (DQUOT_ALLOC_BLOCK(inode, 1)) @@ -751,8 +746,10 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh, ext2_xattr_cache_insert(new_bh); } else { /* We need to allocate a new block */ - int goal = le32_to_cpu(EXT2_SB(sb)->s_es->s_first_data_block) + - EXT2_I(inode)->i_block_group * EXT2_BLOCKS_PER_GROUP(sb); + int goal = le32_to_cpu(EXT2_SB(sb)->s_es-> + s_first_data_block) + + EXT2_I(inode)->i_block_group * + EXT2_BLOCKS_PER_GROUP(sb); int block = ext2_new_block(inode, goal, 0, 0, &error); if (error) goto cleanup; @@ -857,8 +854,8 @@ ext2_xattr_delete_inode(struct inode *inode) if (HDR(bh)->h_refcount == cpu_to_le32(1)) { ext2_xattr_cache_remove(bh); ext2_free_blocks(inode, EXT2_I(inode)->i_file_acl, 1); + get_bh(bh); bforget(bh); - bh = NULL; } else { HDR(bh)->h_refcount = cpu_to_le32( le32_to_cpu(HDR(bh)->h_refcount) - 1); diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c index d29f14efb253..94b6f477f1d8 100644 --- a/fs/ext3/acl.c +++ b/fs/ext3/acl.c @@ -1,7 +1,7 @@ /* * linux/fs/ext3/acl.c * - * Copyright (C) 2001 by Andreas Gruenbacher, + * Copyright (C) 2001-2003 Andreas Gruenbacher, */ #include @@ -20,7 +20,7 @@ static struct posix_acl * ext3_acl_from_disk(const void *value, size_t size) { const char *end = (char *)value + size; - int n, count; + size_t n, count; struct posix_acl *acl; if (!value) @@ -86,7 +86,7 @@ ext3_acl_to_disk(const struct posix_acl *acl, size_t *size) { ext3_acl_header *ext_acl; char *e; - int n; + size_t n; *size = ext3_acl_size(acl->a_count); ext_acl = (ext3_acl_header *)kmalloc(sizeof(ext3_acl_header) + @@ -133,10 +133,11 @@ fail: static struct posix_acl * ext3_get_acl(struct inode *inode, int type) { + const size_t max_size = ext3_acl_size(EXT3_ACL_MAX_ENTRIES); + struct ext3_inode_info *ei = EXT3_I(inode); int name_index; char *value; - struct posix_acl *acl, **p_acl; - const size_t size = ext3_acl_size(EXT3_ACL_MAX_ENTRIES); + struct posix_acl *acl; int retval; if (!test_opt(inode->i_sb, POSIX_ACL)) @@ -144,36 +145,43 @@ ext3_get_acl(struct inode *inode, int type) switch(type) { case ACL_TYPE_ACCESS: - p_acl = &EXT3_I(inode)->i_acl; + if (ei->i_acl != EXT3_ACL_NOT_CACHED) + return posix_acl_dup(ei->i_acl); name_index = EXT3_XATTR_INDEX_POSIX_ACL_ACCESS; break; case ACL_TYPE_DEFAULT: - p_acl = &EXT3_I(inode)->i_default_acl; + if (ei->i_default_acl != EXT3_ACL_NOT_CACHED) + return posix_acl_dup(ei->i_default_acl); name_index = EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT; break; default: return ERR_PTR(-EINVAL); } - if (*p_acl != EXT3_ACL_NOT_CACHED) - return posix_acl_dup(*p_acl); - value = kmalloc(size, GFP_KERNEL); + value = kmalloc(max_size, GFP_KERNEL); if (!value) return ERR_PTR(-ENOMEM); - retval = ext3_xattr_get(inode, name_index, "", value, size); - - if (retval == -ENODATA || retval == -ENOSYS) - *p_acl = acl = NULL; - else if (retval < 0) - acl = ERR_PTR(retval); - else { + retval = ext3_xattr_get(inode, name_index, "", value, max_size); + acl = ERR_PTR(retval); + if (retval > 0) acl = ext3_acl_from_disk(value, retval); - if (!IS_ERR(acl)) - *p_acl = posix_acl_dup(acl); - } + else if (retval == -ENODATA || retval == -ENOSYS) + acl = NULL; kfree(value); + + if (!IS_ERR(acl)) { + switch(type) { + case ACL_TYPE_ACCESS: + ei->i_acl = posix_acl_dup(acl); + break; + + case ACL_TYPE_DEFAULT: + ei->i_default_acl = posix_acl_dup(acl); + break; + } + } return acl; } @@ -186,9 +194,9 @@ static int ext3_set_acl(handle_t *handle, struct inode *inode, int type, struct posix_acl *acl) { + struct ext3_inode_info *ei = EXT3_I(inode); int name_index; void *value = NULL; - struct posix_acl **p_acl; size_t size; int error; @@ -198,7 +206,6 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type, switch(type) { case ACL_TYPE_ACCESS: name_index = EXT3_XATTR_INDEX_POSIX_ACL_ACCESS; - p_acl = &EXT3_I(inode)->i_acl; if (acl) { mode_t mode = inode->i_mode; error = posix_acl_equiv_mode(acl, &mode); @@ -215,7 +222,6 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type, case ACL_TYPE_DEFAULT: name_index = EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT; - p_acl = &EXT3_I(inode)->i_default_acl; if (!S_ISDIR(inode->i_mode)) return acl ? -EACCES : 0; break; @@ -231,14 +237,25 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type, return (int)PTR_ERR(value); } - error = ext3_xattr_set_handle(handle, inode, name_index, "", value, size, 0); + error = ext3_xattr_set_handle(handle, inode, name_index, "", + value, size, 0); if (value) kfree(value); if (!error) { - if (*p_acl && *p_acl != EXT3_ACL_NOT_CACHED) - posix_acl_release(*p_acl); - *p_acl = posix_acl_dup(acl); + switch(type) { + case ACL_TYPE_ACCESS: + if (ei->i_acl != EXT3_ACL_NOT_CACHED) + posix_acl_release(ei->i_acl); + ei->i_acl = posix_acl_dup(acl); + break; + + case ACL_TYPE_DEFAULT: + if (ei->i_default_acl != EXT3_ACL_NOT_CACHED) + posix_acl_release(ei->i_default_acl); + ei->i_default_acl = posix_acl_dup(acl); + break; + } } return error; } @@ -258,11 +275,13 @@ __ext3_permission(struct inode *inode, int mask, int lock) if (current->fsuid == inode->i_uid) { mode >>= 6; } else if (test_opt(inode->i_sb, POSIX_ACL)) { - /* ACL can't contain additional permissions if - the ACL_MASK entry is 0 */ - if (!(mode & S_IRWXG)) + struct ext3_inode_info *ei = EXT3_I(inode); + + /* The access ACL cannot grant access if the group class + permission bits don't contain all requested permissions. */ + if (((mode >> 3) & mask & S_IRWXO) != mask) goto check_groups; - if (EXT3_I(inode)->i_acl == EXT3_ACL_NOT_CACHED) { + if (ei->i_acl == EXT3_ACL_NOT_CACHED) { struct posix_acl *acl; if (lock) { @@ -275,12 +294,11 @@ __ext3_permission(struct inode *inode, int mask, int lock) if (IS_ERR(acl)) return PTR_ERR(acl); posix_acl_release(acl); - if (EXT3_I(inode)->i_acl == EXT3_ACL_NOT_CACHED) + if (ei->i_acl == EXT3_ACL_NOT_CACHED) return -EIO; } - if (EXT3_I(inode)->i_acl) { - int error = posix_acl_permission(inode, - EXT3_I(inode)->i_acl, mask); + if (ei->i_acl) { + int error = posix_acl_permission(inode, ei->i_acl,mask); if (error == -EACCES) goto check_capabilities; return error; diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c index 066316459d83..5069ec2c5b62 100644 --- a/fs/ext3/xattr.c +++ b/fs/ext3/xattr.c @@ -1,7 +1,7 @@ /* * linux/fs/ext3/xattr.c * - * Copyright (C) 2001 by Andreas Gruenbacher, + * Copyright (C) 2001-2003 Andreas Gruenbacher, * * Fix by Harrison Xing . * Ext3 code with a lot of help from Eric Jarman . @@ -63,8 +63,6 @@ #include "xattr.h" #include "acl.h" -#define EXT3_EA_USER "user." - #define HDR(bh) ((struct ext3_xattr_header *)((bh)->b_data)) #define ENTRY(ptr) ((struct ext3_xattr_entry *)(ptr)) #define FIRST_ENTRY(bh) ENTRY(HDR(bh)+1) @@ -79,8 +77,9 @@ } while (0) # define ea_bdebug(bh, f...) do { \ char b[BDEVNAME_SIZE]; \ - printk(KERN_DEBUG "block %s:%ld: ", \ - bdevname(bh->b_bdev, b), bh->b_blocknr); \ + printk(KERN_DEBUG "block %s:%lu: ", \ + bdevname(bh->b_bdev, b), \ + (unsigned long) bh->b_blocknr); \ printk(f); \ printk("\n"); \ } while (0) @@ -271,9 +270,9 @@ ext3_xattr_get(struct inode *inode, int name_index, const char *name, { struct buffer_head *bh = NULL; struct ext3_xattr_entry *entry; - unsigned int size; + size_t name_len, size; char *end; - int name_len, error; + int error; ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld", name_index, name, buffer, (long)buffer_size); @@ -369,7 +368,7 @@ ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) { struct buffer_head *bh = NULL; struct ext3_xattr_entry *entry; - unsigned int size = 0; + size_t size = 0; char *buf, *end; int error; @@ -478,8 +477,8 @@ ext3_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, struct buffer_head *bh = NULL; struct ext3_xattr_header *header = NULL; struct ext3_xattr_entry *here, *last; - unsigned int name_len; - int min_offs = sb->s_blocksize, not_found = 1, free, error; + size_t name_len, free, min_offs = sb->s_blocksize; + int not_found = 1, error; char *end; /* @@ -536,7 +535,7 @@ bad_block: ext3_error(sb, "ext3_xattr_set", if ((char *)next >= end) goto bad_block; if (!here->e_value_block && here->e_value_size) { - int offs = le16_to_cpu(here->e_value_offs); + size_t offs = le16_to_cpu(here->e_value_offs); if (offs < min_offs) min_offs = offs; } @@ -556,7 +555,7 @@ bad_block: ext3_error(sb, "ext3_xattr_set", if ((char *)next >= end) goto bad_block; if (!last->e_value_block && last->e_value_size) { - int offs = le16_to_cpu(last->e_value_offs); + size_t offs = le16_to_cpu(last->e_value_offs); if (offs < min_offs) min_offs = offs; } @@ -580,25 +579,23 @@ bad_block: ext3_error(sb, "ext3_xattr_set", error = 0; if (value == NULL) goto cleanup; - else - free -= EXT3_XATTR_LEN(name_len); } else { /* Request to create an existing attribute? */ error = -EEXIST; if (flags & XATTR_CREATE) goto cleanup; if (!here->e_value_block && here->e_value_size) { - unsigned int size = le32_to_cpu(here->e_value_size); + size_t size = le32_to_cpu(here->e_value_size); if (le16_to_cpu(here->e_value_offs) + size > sb->s_blocksize || size > sb->s_blocksize) goto bad_block; free += EXT3_XATTR_SIZE(size); } + free += EXT3_XATTR_LEN(name_len); } - free -= EXT3_XATTR_SIZE(value_len); error = -ENOSPC; - if (free < 0) + if (free < EXT3_XATTR_LEN(name_len) + EXT3_XATTR_SIZE(value_len)) goto cleanup; /* Here we know that we can set the new attribute. */ @@ -639,8 +636,8 @@ bad_block: ext3_error(sb, "ext3_xattr_set", if (not_found) { /* Insert the new name. */ - int size = EXT3_XATTR_LEN(name_len); - int rest = (char *)last - (char *)here; + size_t size = EXT3_XATTR_LEN(name_len); + size_t rest = (char *)last - (char *)here; memmove((char *)here + size, here, rest); memset(here, 0, size); here->e_name_index = name_index; @@ -650,7 +647,7 @@ bad_block: ext3_error(sb, "ext3_xattr_set", /* Remove the old value. */ if (!here->e_value_block && here->e_value_size) { char *first_val = (char *)header + min_offs; - int offs = le16_to_cpu(here->e_value_offs); + size_t offs = le16_to_cpu(here->e_value_offs); char *val = (char *)header + offs; size_t size = EXT3_XATTR_SIZE( le32_to_cpu(here->e_value_size)); @@ -662,7 +659,7 @@ bad_block: ext3_error(sb, "ext3_xattr_set", /* Adjust all value offsets. */ last = ENTRY(header+1); while (!IS_LAST_ENTRY(last)) { - int o = le16_to_cpu(last->e_value_offs); + size_t o = le16_to_cpu(last->e_value_offs); if (!last->e_value_block && o < offs) last->e_value_offs = cpu_to_le16(o + size); @@ -678,7 +675,7 @@ bad_block: ext3_error(sb, "ext3_xattr_set", goto cleanup; } else { /* Remove the old name. */ - int size = EXT3_XATTR_LEN(name_len); + size_t size = EXT3_XATTR_LEN(name_len); last = ENTRY((char *)last - size); memmove(here, (char*)here + size, (char*)last - (char*)here); @@ -733,9 +730,9 @@ ext3_xattr_set_handle2(handle_t *handle, struct inode *inode, * The old block will be released after updating * the inode. */ - ea_bdebug(new_bh, "%s block %ld", + ea_bdebug(new_bh, "%s block %lu", (old_bh == new_bh) ? "keeping" : "reusing", - new_bh->b_blocknr); + (unsigned long) new_bh->b_blocknr); error = -EDQUOT; if (DQUOT_ALLOC_BLOCK(inode, 1)) @@ -759,7 +756,7 @@ ext3_xattr_set_handle2(handle_t *handle, struct inode *inode, EXT3_SB(sb)->s_es->s_first_data_block) + EXT3_I(inode)->i_block_group * EXT3_BLOCKS_PER_GROUP(sb); - int block = ext3_new_block(handle, + int block = ext3_new_block(handle, inode, goal, 0, 0, &error); if (error) goto cleanup; @@ -895,8 +892,8 @@ ext3_xattr_delete_inode(handle_t *handle, struct inode *inode) if (HDR(bh)->h_refcount == cpu_to_le32(1)) { ext3_xattr_cache_remove(bh); ext3_free_blocks(handle, inode, EXT3_I(inode)->i_file_acl, 1); + get_bh(bh); ext3_forget(handle, 1, inode, bh, EXT3_I(inode)->i_file_acl); - bh = NULL; } else { HDR(bh)->h_refcount = cpu_to_le32( le32_to_cpu(HDR(bh)->h_refcount) - 1); -- cgit v1.2.3 From 3edb027feea281c11a7ef5e56bcf3489878d3056 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Sun, 6 Jul 2003 05:40:50 -0700 Subject: [PATCH] xattr: blockdev inode selection fix From: Andreas Gruenbacher The inode->i_bdev field is not the same as inode->i_sb->s_bdev or bh->b_bdev. We must compare inode->i_sb->s_bdev with bh->b_bdev, or else equal extended attribute block will not be found. --- fs/ext2/xattr.c | 5 +++-- fs/ext3/xattr.c | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c index 64edd0e25ee3..248f5076f322 100644 --- a/fs/ext2/xattr.c +++ b/fs/ext2/xattr.c @@ -973,7 +973,8 @@ ext2_xattr_cache_find(struct inode *inode, struct ext2_xattr_header *header) if (!header->h_hash) return NULL; /* never share */ ea_idebug(inode, "looking for cached blocks [%x]", (int)hash); - ce = mb_cache_entry_find_first(ext2_xattr_cache, 0, inode->i_bdev, hash); + ce = mb_cache_entry_find_first(ext2_xattr_cache, 0, + inode->i_sb->s_bdev, hash); while (ce) { struct buffer_head *bh = sb_bread(inode->i_sb, ce->e_block); @@ -993,7 +994,7 @@ ext2_xattr_cache_find(struct inode *inode, struct ext2_xattr_header *header) return bh; } brelse(bh); - ce = mb_cache_entry_find_next(ce, 0, inode->i_bdev, hash); + ce = mb_cache_entry_find_next(ce, 0, inode->i_sb->s_bdev, hash); } return NULL; } diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c index 5069ec2c5b62..578cfad5f6bf 100644 --- a/fs/ext3/xattr.c +++ b/fs/ext3/xattr.c @@ -1010,7 +1010,8 @@ ext3_xattr_cache_find(struct inode *inode, struct ext3_xattr_header *header) if (!header->h_hash) return NULL; /* never share */ ea_idebug(inode, "looking for cached blocks [%x]", (int)hash); - ce = mb_cache_entry_find_first(ext3_xattr_cache, 0, inode->i_bdev, hash); + ce = mb_cache_entry_find_first(ext3_xattr_cache, 0, + inode->i_sb->s_bdev, hash); while (ce) { struct buffer_head *bh = sb_bread(inode->i_sb, ce->e_block); @@ -1030,7 +1031,7 @@ ext3_xattr_cache_find(struct inode *inode, struct ext3_xattr_header *header) return bh; } brelse(bh); - ce = mb_cache_entry_find_next(ce, 0, inode->i_bdev, hash); + ce = mb_cache_entry_find_next(ce, 0, inode->i_sb->s_bdev, hash); } return NULL; } -- cgit v1.2.3 From a39afa31c59075459f1b0f89d9e6e3446d0ff6a2 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Sun, 6 Jul 2003 05:40:57 -0700 Subject: [PATCH] xattr: update-in-place optimisation From: Andreas Gruenbacher It is common to update extended attributes without changing the value's length. This patch optimizes this case. In addition to that, the current code tries to recognize early when extended attribute blocks become empty. This optimization is not of significant value, so this patch removes it, and moves the empty block test further down. --- fs/ext2/xattr.c | 42 ++++++++++++++++++++++++++---------------- fs/ext3/xattr.c | 43 ++++++++++++++++++++++++++----------------- 2 files changed, 52 insertions(+), 33 deletions(-) diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c index 248f5076f322..2dc9c65452a2 100644 --- a/fs/ext2/xattr.c +++ b/fs/ext2/xattr.c @@ -643,13 +643,24 @@ bad_block: ext2_error(sb, "ext2_xattr_set", here->e_name_len = name_len; memcpy(here->e_name, name, name_len); } else { - /* Remove the old value. */ if (!here->e_value_block && here->e_value_size) { char *first_val = (char *)header + min_offs; size_t offs = le16_to_cpu(here->e_value_offs); char *val = (char *)header + offs; size_t size = EXT2_XATTR_SIZE( le32_to_cpu(here->e_value_size)); + + if (size == EXT2_XATTR_SIZE(value_len)) { + /* The old and the new value have the same + size. Just replace. */ + here->e_value_size = cpu_to_le32(value_len); + memset(val + size - EXT2_XATTR_PAD, 0, + EXT2_XATTR_PAD); /* Clear pad bytes. */ + memcpy(val, value, value_len); + goto skip_replace; + } + + /* Remove the old value. */ memmove(first_val + size, first_val, val - first_val); memset(first_val, 0, size); here->e_value_offs = 0; @@ -666,19 +677,12 @@ bad_block: ext2_error(sb, "ext2_xattr_set", } } if (value == NULL) { - /* Remove this attribute. */ - if (EXT2_XATTR_NEXT(ENTRY(header+1)) == last) { - /* This block is now empty. */ - error = ext2_xattr_set2(inode, bh, NULL); - goto cleanup; - } else { - /* Remove the old name. */ - size_t size = EXT2_XATTR_LEN(name_len); - last = ENTRY((char *)last - size); - memmove(here, (char*)here + size, - (char*)last - (char*)here); - memset(last, 0, size); - } + /* Remove the old name. */ + size_t size = EXT2_XATTR_LEN(name_len); + last = ENTRY((char *)last - size); + memmove(here, (char*)here + size, + (char*)last - (char*)here); + memset(last, 0, size); } } @@ -695,9 +699,15 @@ bad_block: ext2_error(sb, "ext2_xattr_set", memcpy(val, value, value_len); } } - ext2_xattr_rehash(header, here); - error = ext2_xattr_set2(inode, bh, header); +skip_replace: + if (IS_LAST_ENTRY(ENTRY(header+1))) { + /* This block is now empty. */ + error = ext2_xattr_set2(inode, bh, NULL); + } else { + ext2_xattr_rehash(header, here); + error = ext2_xattr_set2(inode, bh, header); + } cleanup: brelse(bh); diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c index 578cfad5f6bf..0db74f942296 100644 --- a/fs/ext3/xattr.c +++ b/fs/ext3/xattr.c @@ -644,13 +644,24 @@ bad_block: ext3_error(sb, "ext3_xattr_set", here->e_name_len = name_len; memcpy(here->e_name, name, name_len); } else { - /* Remove the old value. */ if (!here->e_value_block && here->e_value_size) { char *first_val = (char *)header + min_offs; size_t offs = le16_to_cpu(here->e_value_offs); char *val = (char *)header + offs; size_t size = EXT3_XATTR_SIZE( le32_to_cpu(here->e_value_size)); + + if (size == EXT3_XATTR_SIZE(value_len)) { + /* The old and the new value have the same + size. Just replace. */ + here->e_value_size = cpu_to_le32(value_len); + memset(val + size - EXT3_XATTR_PAD, 0, + EXT3_XATTR_PAD); /* Clear pad bytes. */ + memcpy(val, value, value_len); + goto skip_replace; + } + + /* Remove the old value. */ memmove(first_val + size, first_val, val - first_val); memset(first_val, 0, size); here->e_value_offs = 0; @@ -667,20 +678,12 @@ bad_block: ext3_error(sb, "ext3_xattr_set", } } if (value == NULL) { - /* Remove this attribute. */ - if (EXT3_XATTR_NEXT(ENTRY(header+1)) == last) { - /* This block is now empty. */ - error = ext3_xattr_set_handle2(handle, inode, - bh, NULL); - goto cleanup; - } else { - /* Remove the old name. */ - size_t size = EXT3_XATTR_LEN(name_len); - last = ENTRY((char *)last - size); - memmove(here, (char*)here + size, - (char*)last - (char*)here); - memset(last, 0, size); - } + /* Remove the old name. */ + size_t size = EXT3_XATTR_LEN(name_len); + last = ENTRY((char *)last - size); + memmove(here, (char*)here + size, + (char*)last - (char*)here); + memset(last, 0, size); } } @@ -697,9 +700,15 @@ bad_block: ext3_error(sb, "ext3_xattr_set", memcpy(val, value, value_len); } } - ext3_xattr_rehash(header, here); - error = ext3_xattr_set_handle2(handle, inode, bh, header); +skip_replace: + if (IS_LAST_ENTRY(ENTRY(header+1))) { + /* This block is now empty. */ + error = ext3_xattr_set_handle2(handle, inode, bh, NULL); + } else { + ext3_xattr_rehash(header, here); + error = ext3_xattr_set_handle2(handle, inode, bh, header); + } cleanup: brelse(bh); -- cgit v1.2.3 From 430cab6d8dc4b79ad691c1d50d7ef217777e2593 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Sun, 6 Jul 2003 05:41:05 -0700 Subject: [PATCH] xattrr: preparation for fine-grained locking From: Andreas Gruenbacher Andrew Morton found that there is lock contention between extended attribute operations (like reading ACLs, which `ls -l' needs to do) and other operations on the same files. This is due to the fact that all extended attribute syscalls take inode->i_sem before calling into the filesystem code. To fix this problem, this patch no longer takes inode->i_sem in the getxattr and listxattr syscalls, and moves the lock taking code into the file systems. (Another patch improves the locking strategy in ext2 and ext3.) --- Documentation/filesystems/Locking | 4 ++-- fs/ext2/xattr.c | 15 +++++++++++++-- fs/ext3/xattr.c | 15 +++++++++++++-- fs/jfs/xattr.c | 22 ++++++++++++++++++++-- fs/xattr.c | 4 ---- fs/xfs/linux/xfs_iops.c | 34 ++++++++++++++++++++++++++++++++-- 6 files changed, 80 insertions(+), 14 deletions(-) diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 56d482903188..969c8726f208 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -68,8 +68,8 @@ setattr: yes permission: no getattr: no setxattr: yes -getxattr: yes -listxattr: yes +getxattr: no +listxattr: no removexattr: yes Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_sem on victim. diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c index 2dc9c65452a2..aa29871da68e 100644 --- a/fs/ext2/xattr.c +++ b/fs/ext2/xattr.c @@ -204,11 +204,16 @@ ext2_getxattr(struct dentry *dentry, const char *name, { struct ext2_xattr_handler *handler; struct inode *inode = dentry->d_inode; + ssize_t error; handler = ext2_xattr_resolve_name(&name); if (!handler) return -EOPNOTSUPP; - return handler->get(inode, name, buffer, size); + down(&inode->i_sem); + error = handler->get(inode, name, buffer, size); + up(&inode->i_sem); + + return error; } /* @@ -219,7 +224,13 @@ ext2_getxattr(struct dentry *dentry, const char *name, ssize_t ext2_listxattr(struct dentry *dentry, char *buffer, size_t size) { - return ext2_xattr_list(dentry->d_inode, buffer, size); + ssize_t error; + + down(&dentry->d_inode->i_sem); + error = ext2_xattr_list(dentry->d_inode, buffer, size); + up(&dentry->d_inode->i_sem); + + return error; } /* diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c index 0db74f942296..b89f8be46f0f 100644 --- a/fs/ext3/xattr.c +++ b/fs/ext3/xattr.c @@ -199,11 +199,16 @@ ext3_getxattr(struct dentry *dentry, const char *name, { struct ext3_xattr_handler *handler; struct inode *inode = dentry->d_inode; + ssize_t error; handler = ext3_xattr_resolve_name(&name); if (!handler) return -EOPNOTSUPP; - return handler->get(inode, name, buffer, size); + down(&inode->i_sem); + error = handler->get(inode, name, buffer, size); + up(&inode->i_sem); + + return error; } /* @@ -214,7 +219,13 @@ ext3_getxattr(struct dentry *dentry, const char *name, ssize_t ext3_listxattr(struct dentry *dentry, char *buffer, size_t size) { - return ext3_xattr_list(dentry->d_inode, buffer, size); + ssize_t error; + + down(&dentry->d_inode->i_sem); + error = ext3_xattr_list(dentry->d_inode, buffer, size); + up(&dentry->d_inode->i_sem); + + return error; } /* diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c index 4ae1b0ffaf05..79126b49c813 100644 --- a/fs/jfs/xattr.c +++ b/fs/jfs/xattr.c @@ -964,10 +964,17 @@ ssize_t __jfs_getxattr(struct inode *inode, const char *name, void *data, ssize_t jfs_getxattr(struct dentry *dentry, const char *name, void *data, size_t buf_size) { - return __jfs_getxattr(dentry->d_inode, name, data, buf_size); + int err; + + down(&dentry->d_inode->i_sem); + err = __jfs_getxattr(dentry->d_inode, name, data, buf_size); + up(&dentry->d_inode->i_sem); + + return err; } -ssize_t jfs_listxattr(struct dentry * dentry, char *data, size_t buf_size) +static ssize_t __jfs_listxattr(struct dentry * dentry, char *data, + size_t buf_size) { struct inode *inode = dentry->d_inode; char *buffer; @@ -1013,6 +1020,17 @@ ssize_t jfs_listxattr(struct dentry * dentry, char *data, size_t buf_size) return size; } +ssize_t jfs_listxattr(struct dentry * dentry, char *data, size_t buf_size) +{ + int err; + + down(&dentry->d_inode->i_sem); + err = __jfs_listxattr(dentry, data, buf_size); + up(&dentry->d_inode->i_sem); + + return err; +} + int jfs_removexattr(struct dentry *dentry, const char *name) { return __jfs_setxattr(dentry->d_inode, name, 0, 0, XATTR_REPLACE); diff --git a/fs/xattr.c b/fs/xattr.c index 2a2b6d53af1e..37d2a109eef7 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -160,9 +160,7 @@ getxattr(struct dentry *d, char *name, void *value, size_t size) error = security_inode_getxattr(d, kname); if (error) goto out; - down(&d->d_inode->i_sem); error = d->d_inode->i_op->getxattr(d, kname, kvalue, size); - up(&d->d_inode->i_sem); } if (kvalue && error > 0) @@ -233,9 +231,7 @@ listxattr(struct dentry *d, char *list, size_t size) error = security_inode_listxattr(d); if (error) goto out; - down(&d->d_inode->i_sem); error = d->d_inode->i_op->listxattr(d, klist, size); - up(&d->d_inode->i_sem); } if (klist && error > 0) diff --git a/fs/xfs/linux/xfs_iops.c b/fs/xfs/linux/xfs_iops.c index 8a90e5495a3a..59f520390143 100644 --- a/fs/xfs/linux/xfs_iops.c +++ b/fs/xfs/linux/xfs_iops.c @@ -642,7 +642,7 @@ linvfs_setxattr( } STATIC ssize_t -linvfs_getxattr( +__linvfs_getxattr( struct dentry *dentry, const char *name, void *data, @@ -697,9 +697,24 @@ linvfs_getxattr( return -EOPNOTSUPP; } +STATIC ssize_t +linvfs_getxattr( + struct dentry *dentry, + const char *name, + void *data, + size_t size) +{ + int error; + + down(&dentry->d_inode->i_sem); + error = __linvfs_getxattr(dentry, name, data, size); + up(&dentry->d_inode->i_sem); + + return error; +} STATIC ssize_t -linvfs_listxattr( +__linvfs_listxattr( struct dentry *dentry, char *data, size_t size) @@ -741,6 +756,21 @@ linvfs_listxattr( return result; } +STATIC ssize_t +linvfs_listxattr( + struct dentry *dentry, + char *data, + size_t size) +{ + int error; + + down(&dentry->d_inode->i_sem); + error = __linvfs_listxattr(dentry, data, size); + up(&dentry->d_inode->i_sem); + + return error; +} + STATIC int linvfs_removexattr( struct dentry *dentry, -- cgit v1.2.3 From 6abc05cce8b06b9c986fb2bbd83e6fa9888ab1be Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Sun, 6 Jul 2003 05:41:12 -0700 Subject: [PATCH] xattr: fine-grained locking From: Andreas Gruenbacher This patch removes the dependency on i_sem in the getxattr and listxattr iops of ext2 and ext3. In addition, the global ext[23]_xattr semaphores go away. Instead of i_sem and the global semaphore, mutual exclusion is now ensured by per-inode xattr semaphores, and by locking the buffers before modifying them. The detailed locking strategy is described in comments in fs/ext[23]/xattr.c. Due to this change it is no longer necessary to take i_sem in ext[23]_permission() for retrieving acls, so the ext[23]_permission_locked() functions go away. Additionally, the patch fixes a race condition in ext[23]_permission: Accessing inode->i_acl was protected by the BKL in 2.4; in 2.5 there no longer is such protection. Instead, inode->i_acl (and inode->i_default_acl) are now accessed under inode->i_lock. (This could be replaced by RCU in the future.) In the ext3 extended attribute code, an new uglines results from locking at the buffer head level: The buffer lock must be held between testing if an xattr block can be modified and the actual modification to prevent races from happening. Before a block can be modified, ext3_journal_get_write_access() must be called. But this requies an unlocked buffer, so I call ext3_journal_get_write_access() before locking the buffer. If it turns out that the buffer cannot be modified, journal_release_buffer() is called. Calling ext3_journal_get_write_access after the test but while the buffer is still locked would be much better. --- fs/ext2/acl.c | 104 +++++++++++------------- fs/ext2/acl.h | 1 - fs/ext2/ext2.h | 10 +++ fs/ext2/super.c | 3 + fs/ext2/xattr.c | 155 ++++++++++++++++++----------------- fs/ext2/xattr_user.c | 12 --- fs/ext3/acl.c | 99 +++++++++++------------ fs/ext3/acl.h | 1 - fs/ext3/super.c | 3 + fs/ext3/xattr.c | 201 +++++++++++++++++++++++++--------------------- fs/ext3/xattr_user.c | 12 --- include/linux/ext3_fs_i.h | 10 +++ 12 files changed, 311 insertions(+), 300 deletions(-) diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c index bc95b66ff023..0df165f8ee01 100644 --- a/fs/ext2/acl.c +++ b/fs/ext2/acl.c @@ -124,14 +124,38 @@ fail: return ERR_PTR(-EINVAL); } +static inline struct posix_acl * +ext2_iget_acl(struct inode *inode, struct posix_acl **i_acl) +{ + struct posix_acl *acl = EXT2_ACL_NOT_CACHED; + + spin_lock(&inode->i_lock); + if (*i_acl != EXT2_ACL_NOT_CACHED) + acl = posix_acl_dup(*i_acl); + spin_unlock(&inode->i_lock); + + return acl; +} + +static inline void +ext2_iset_acl(struct inode *inode, struct posix_acl **i_acl, + struct posix_acl *acl) +{ + spin_lock(&inode->i_lock); + if (*i_acl != EXT2_ACL_NOT_CACHED) + posix_acl_release(*i_acl); + *i_acl = posix_acl_dup(acl); + spin_unlock(&inode->i_lock); +} + /* - * inode->i_sem: down + * inode->i_sem: don't care */ static struct posix_acl * ext2_get_acl(struct inode *inode, int type) { const size_t max_size = ext2_acl_size(EXT2_ACL_MAX_ENTRIES); - struct ext2_inode_inode *ei = EXT2_I(inode); + struct ext2_inode_info *ei = EXT2_I(inode); int name_index; char *value; struct posix_acl *acl; @@ -142,14 +166,16 @@ ext2_get_acl(struct inode *inode, int type) switch(type) { case ACL_TYPE_ACCESS: - if (ei->i_acl != EXT2_ACL_NOT_CACHED) - return posix_acl_dup(ei->i_acl); + acl = ext2_iget_acl(inode, &ei->i_acl); + if (acl != EXT2_ACL_NOT_CACHED) + return acl; name_index = EXT2_XATTR_INDEX_POSIX_ACL_ACCESS; break; case ACL_TYPE_DEFAULT: - if (ei->i_default_acl != EXT2_ACL_NOT_CACHED) - return posix_acl_dup(ei->i_default_acl); + acl = ext2_iget_acl(inode, &ei->i_default_acl); + if (acl != EXT2_ACL_NOT_CACHED) + return acl; name_index = EXT2_XATTR_INDEX_POSIX_ACL_DEFAULT; break; @@ -171,11 +197,11 @@ ext2_get_acl(struct inode *inode, int type) if (!IS_ERR(acl)) { switch(type) { case ACL_TYPE_ACCESS: - ei->i_acl = posix_acl_dup(acl); + ext2_iset_acl(inode, &ei->i_acl, acl); break; case ACL_TYPE_DEFAULT: - ei->i_default_acl = posix_acl_dup(acl); + ext2_iset_acl(inode, &ei->i_default_acl, acl); break; } } @@ -240,23 +266,24 @@ ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl) if (!error) { switch(type) { case ACL_TYPE_ACCESS: - if (ei->i_acl != EXT2_ACL_NOT_CACHED) - posix_acl_release(ei->i_acl); - ei->i_acl = posix_acl_dup(acl); + ext2_iset_acl(inode, &ei->i_acl, acl); break; case ACL_TYPE_DEFAULT: - if (ei->i_default_acl != EXT2_ACL_NOT_CACHED) - posix_acl_release(ei->i_default_acl); - ei->i_default_acl = posix_acl_dup(acl); + ext2_iset_acl(inode, &ei->i_default_acl, acl); break; } } return error; } -static int -__ext2_permission(struct inode *inode, int mask, int lock) +/* + * Inode operation permission(). + * + * inode->i_sem: don't care + */ +int +ext2_permission(struct inode *inode, int mask, struct nameidata *nd) { int mode = inode->i_mode; @@ -270,30 +297,16 @@ __ext2_permission(struct inode *inode, int mask, int lock) if (current->fsuid == inode->i_uid) { mode >>= 6; } else if (test_opt(inode->i_sb, POSIX_ACL)) { - struct ext2_inode_info *ei = EXT2_I(inode); + struct posix_acl *acl; /* The access ACL cannot grant access if the group class permission bits don't contain all requested permissions. */ if (((mode >> 3) & mask & S_IRWXO) != mask) goto check_groups; - if (ei->i_acl == EXT2_ACL_NOT_CACHED) { - struct posix_acl *acl; - - if (lock) { - down(&inode->i_sem); - acl = ext2_get_acl(inode, ACL_TYPE_ACCESS); - up(&inode->i_sem); - } else - acl = ext2_get_acl(inode, ACL_TYPE_ACCESS); - - if (IS_ERR(acl)) - return PTR_ERR(acl); + acl = ext2_get_acl(inode, ACL_TYPE_ACCESS); + if (acl) { + int error = posix_acl_permission(inode, acl, mask); posix_acl_release(acl); - if (ei->i_acl == EXT2_ACL_NOT_CACHED) - return -EIO; - } - if (ei->i_acl) { - int error = posix_acl_permission(inode, ei->i_acl,mask); if (error == -EACCES) goto check_capabilities; return error; @@ -319,33 +332,11 @@ check_capabilities: return -EACCES; } -/* - * Inode operation permission(). - * - * inode->i_sem: up - * BKL held [before 2.5.x] - */ -int -ext2_permission(struct inode *inode, int mask, struct nameidata *nd) -{ - return __ext2_permission(inode, mask, 1); -} - -/* - * Used internally if i_sem is already down. - */ -int -ext2_permission_locked(struct inode *inode, int mask) -{ - return __ext2_permission(inode, mask, 0); -} - /* * Initialize the ACLs of a new inode. Called from ext2_new_inode. * * dir->i_sem: down * inode->i_sem: up (access to inode is still exclusive) - * BKL held [before 2.5.x] */ int ext2_init_acl(struct inode *inode, struct inode *dir) @@ -405,7 +396,6 @@ cleanup: * file mode. * * inode->i_sem: down - * BKL held [before 2.5.x] */ int ext2_acl_chmod(struct inode *inode) diff --git a/fs/ext2/acl.h b/fs/ext2/acl.h index 2e0560130b63..01937daf1168 100644 --- a/fs/ext2/acl.h +++ b/fs/ext2/acl.h @@ -60,7 +60,6 @@ static inline int ext2_acl_count(size_t size) /* acl.c */ extern int ext2_permission (struct inode *, int, struct nameidata *); -extern int ext2_permission_locked (struct inode *, int); extern int ext2_acl_chmod (struct inode *); extern int ext2_init_acl (struct inode *, struct inode *); diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index 610695289845..67f704ab1258 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h @@ -41,6 +41,16 @@ struct ext2_inode_info { __u32 i_prealloc_block; __u32 i_prealloc_count; __u32 i_dir_start_lookup; +#ifdef CONFIG_EXT2_FS_XATTR + /* + * Extended attributes can be read independently of the main file + * data. Taking i_sem even when reading would cause contention + * between readers of EAs and writers of regular file data, so + * instead we synchronize on xattr_sem when reading or changing + * EAs. + */ + struct rw_semaphore xattr_sem; +#endif #ifdef CONFIG_EXT2_FS_POSIX_ACL struct posix_acl *i_acl; struct posix_acl *i_default_acl; diff --git a/fs/ext2/super.c b/fs/ext2/super.c index c4604187f186..14b8cca47277 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -177,6 +177,9 @@ static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == SLAB_CTOR_CONSTRUCTOR) { rwlock_init(&ei->i_meta_lock); +#ifdef CONFIG_EXT2_FS_XATTR + init_rwsem(&ei->xattr_sem); +#endif inode_init_once(&ei->vfs_inode); } } diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c index aa29871da68e..f1334adc62ed 100644 --- a/fs/ext2/xattr.c +++ b/fs/ext2/xattr.c @@ -42,13 +42,12 @@ * * Locking strategy * ---------------- - * The VFS already holds the BKL and the inode->i_sem semaphore when any of - * the xattr inode operations are called, so we are guaranteed that only one - * processes accesses extended attributes of an inode at any time. - * - * For writing we also grab the ext2_xattr_sem semaphore. This ensures that - * only a single process is modifying an extended attribute block, even - * if the block is shared among inodes. + * EXT2_I(inode)->i_file_acl is protected by EXT2_I(inode)->xattr_sem. + * EA blocks are only changed if they are exclusive to an inode, so + * holding xattr_sem also means that nothing but the EA block's reference + * count will change. Multiple writers to an EA block are synchronized + * by the bh lock. No more than a single bh lock is held at any time + * to avoid deadlocks. */ #include @@ -57,7 +56,7 @@ #include #include #include -#include +#include #include "ext2.h" #include "xattr.h" #include "acl.h" @@ -105,15 +104,6 @@ static void ext2_xattr_rehash(struct ext2_xattr_header *, struct ext2_xattr_entry *); static struct mb_cache *ext2_xattr_cache; - -/* - * If a file system does not share extended attributes among inodes, - * we should not need the ext2_xattr_sem semaphore. However, the - * filesystem may still contain shared blocks, so we always take - * the lock. - */ - -static DECLARE_MUTEX(ext2_xattr_sem); static struct ext2_xattr_handler *ext2_xattr_handlers[EXT2_XATTR_INDEX_MAX]; static rwlock_t ext2_handler_lock = RW_LOCK_UNLOCKED; @@ -196,7 +186,7 @@ ext2_xattr_handler(int name_index) /* * Inode operation getxattr() * - * dentry->d_inode->i_sem down + * dentry->d_inode->i_sem: don't care */ ssize_t ext2_getxattr(struct dentry *dentry, const char *name, @@ -204,39 +194,28 @@ ext2_getxattr(struct dentry *dentry, const char *name, { struct ext2_xattr_handler *handler; struct inode *inode = dentry->d_inode; - ssize_t error; handler = ext2_xattr_resolve_name(&name); if (!handler) return -EOPNOTSUPP; - down(&inode->i_sem); - error = handler->get(inode, name, buffer, size); - up(&inode->i_sem); - - return error; + return handler->get(inode, name, buffer, size); } /* * Inode operation listxattr() * - * dentry->d_inode->i_sem down + * dentry->d_inode->i_sem: don't care */ ssize_t ext2_listxattr(struct dentry *dentry, char *buffer, size_t size) { - ssize_t error; - - down(&dentry->d_inode->i_sem); - error = ext2_xattr_list(dentry->d_inode, buffer, size); - up(&dentry->d_inode->i_sem); - - return error; + return ext2_xattr_list(dentry->d_inode, buffer, size); } /* * Inode operation setxattr() * - * dentry->d_inode->i_sem down + * dentry->d_inode->i_sem: down */ int ext2_setxattr(struct dentry *dentry, const char *name, @@ -256,7 +235,7 @@ ext2_setxattr(struct dentry *dentry, const char *name, /* * Inode operation removexattr() * - * dentry->d_inode->i_sem down + * dentry->d_inode->i_sem: down */ int ext2_removexattr(struct dentry *dentry, const char *name) @@ -295,12 +274,15 @@ ext2_xattr_get(struct inode *inode, int name_index, const char *name, if (name == NULL) return -EINVAL; + down_read(&EXT2_I(inode)->xattr_sem); + error = -ENODATA; if (!EXT2_I(inode)->i_file_acl) - return -ENODATA; + goto cleanup; ea_idebug(inode, "reading block %d", EXT2_I(inode)->i_file_acl); bh = sb_bread(inode->i_sb, EXT2_I(inode)->i_file_acl); + error = -EIO; if (!bh) - return -EIO; + goto cleanup; ea_bdebug(bh, "b_count=%d, refcount=%d", atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); end = bh->b_data + bh->b_size; @@ -365,6 +347,7 @@ found: cleanup: brelse(bh); + up_read(&EXT2_I(inode)->xattr_sem); return error; } @@ -391,12 +374,15 @@ ext2_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) ea_idebug(inode, "buffer=%p, buffer_size=%ld", buffer, (long)buffer_size); + down_read(&EXT2_I(inode)->xattr_sem); + error = 0; if (!EXT2_I(inode)->i_file_acl) - return 0; + goto cleanup; ea_idebug(inode, "reading block %d", EXT2_I(inode)->i_file_acl); bh = sb_bread(inode->i_sb, EXT2_I(inode)->i_file_acl); + error = -EIO; if (!bh) - return -EIO; + goto cleanup; ea_bdebug(bh, "b_count=%d, refcount=%d", atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); end = bh->b_data + bh->b_size; @@ -449,6 +435,7 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_list", cleanup: brelse(bh); + up_read(&EXT2_I(inode)->xattr_sem); return error; } @@ -520,8 +507,7 @@ ext2_xattr_set(struct inode *inode, int name_index, const char *name, name_len = strlen(name); if (name_len > 255 || value_len > sb->s_blocksize) return -ERANGE; - down(&ext2_xattr_sem); - + down_write(&EXT2_I(inode)->xattr_sem); if (EXT2_I(inode)->i_file_acl) { /* The inode already has an extended attribute block. */ bh = sb_bread(sb, EXT2_I(inode)->i_file_acl); @@ -614,12 +600,16 @@ bad_block: ext2_error(sb, "ext2_xattr_set", /* Here we know that we can set the new attribute. */ if (header) { + /* assert(header == HDR(bh)); */ + lock_buffer(bh); if (header->h_refcount == cpu_to_le32(1)) { ea_bdebug(bh, "modifying in-place"); ext2_xattr_cache_remove(bh); + /* keep the buffer locked while modifying it. */ } else { int offset; + unlock_buffer(bh); ea_bdebug(bh, "cloning"); header = kmalloc(bh->b_size, GFP_KERNEL); error = -ENOMEM; @@ -644,6 +634,8 @@ bad_block: ext2_error(sb, "ext2_xattr_set", last = here = ENTRY(header+1); } + /* Iff we are modifying the block in-place, bh is locked here. */ + if (not_found) { /* Insert the new name. */ size_t size = EXT2_XATTR_LEN(name_len); @@ -714,9 +706,13 @@ bad_block: ext2_error(sb, "ext2_xattr_set", skip_replace: if (IS_LAST_ENTRY(ENTRY(header+1))) { /* This block is now empty. */ + if (bh && header == HDR(bh)) + unlock_buffer(bh); /* we were modifying in-place. */ error = ext2_xattr_set2(inode, bh, NULL); } else { ext2_xattr_rehash(header, here); + if (bh && header == HDR(bh)) + unlock_buffer(bh); /* we were modifying in-place. */ error = ext2_xattr_set2(inode, bh, header); } @@ -724,7 +720,7 @@ cleanup: brelse(bh); if (!(bh && header == HDR(bh))) kfree(header); - up(&ext2_xattr_sem); + up_write(&EXT2_I(inode)->xattr_sem); return error; } @@ -744,24 +740,28 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh, new_bh = ext2_xattr_cache_find(inode, header); if (new_bh) { /* - * We found an identical block in the cache. - * The old block will be released after updating - * the inode. + * We found an identical block in the cache. The + * block returned is locked. The old block will + * be released after updating the inode. */ ea_bdebug(new_bh, "%s block %lu", (old_bh == new_bh) ? "keeping" : "reusing", (unsigned long) new_bh->b_blocknr); error = -EDQUOT; - if (DQUOT_ALLOC_BLOCK(inode, 1)) + if (DQUOT_ALLOC_BLOCK(inode, 1)) { + unlock_buffer(new_bh); goto cleanup; + } HDR(new_bh)->h_refcount = cpu_to_le32( le32_to_cpu(HDR(new_bh)->h_refcount) + 1); ea_bdebug(new_bh, "refcount now=%d", le32_to_cpu(HDR(new_bh)->h_refcount)); + unlock_buffer(new_bh); } else if (old_bh && header == HDR(old_bh)) { - /* Keep this block. */ + /* Keep this block. No need to lock the block as we + don't need to change the reference count. */ new_bh = old_bh; get_bh(new_bh); ext2_xattr_cache_insert(new_bh); @@ -812,12 +812,11 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh, error = 0; if (old_bh && old_bh != new_bh) { /* - * If there was an old block, and we are not still using it, - * we now release the old block. - */ - unsigned int refcount = le32_to_cpu(HDR(old_bh)->h_refcount); - - if (refcount == 1) { + * If there was an old block and we are no longer using it, + * release the old block. + */ + lock_buffer(old_bh); + if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) { /* Free the old block. */ ea_bdebug(old_bh, "freeing"); ext2_free_blocks(inode, old_bh->b_blocknr, 1); @@ -827,12 +826,14 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh, bforget(old_bh); } else { /* Decrement the refcount only. */ - refcount--; - HDR(old_bh)->h_refcount = cpu_to_le32(refcount); + HDR(old_bh)->h_refcount = cpu_to_le32( + le32_to_cpu(HDR(old_bh)->h_refcount) - 1); DQUOT_FREE_BLOCK(inode, 1); mark_buffer_dirty(old_bh); - ea_bdebug(old_bh, "refcount now=%d", refcount); + ea_bdebug(old_bh, "refcount now=%d", + le32_to_cpu(HDR(old_bh)->h_refcount)); } + unlock_buffer(old_bh); } cleanup: @@ -850,12 +851,11 @@ cleanup: void ext2_xattr_delete_inode(struct inode *inode) { - struct buffer_head *bh; + struct buffer_head *bh = NULL; + down_write(&EXT2_I(inode)->xattr_sem); if (!EXT2_I(inode)->i_file_acl) - return; - down(&ext2_xattr_sem); - + goto cleanup; bh = sb_bread(inode->i_sb, EXT2_I(inode)->i_file_acl); if (!bh) { ext2_error(inode->i_sb, "ext2_xattr_delete_inode", @@ -871,7 +871,7 @@ ext2_xattr_delete_inode(struct inode *inode) EXT2_I(inode)->i_file_acl); goto cleanup; } - ea_bdebug(bh, "refcount now=%d", le32_to_cpu(HDR(bh)->h_refcount) - 1); + lock_buffer(bh); if (HDR(bh)->h_refcount == cpu_to_le32(1)) { ext2_xattr_cache_remove(bh); ext2_free_blocks(inode, EXT2_I(inode)->i_file_acl, 1); @@ -885,11 +885,13 @@ ext2_xattr_delete_inode(struct inode *inode) sync_dirty_buffer(bh); DQUOT_FREE_BLOCK(inode, 1); } + ea_bdebug(bh, "refcount now=%d", le32_to_cpu(HDR(bh)->h_refcount) - 1); + unlock_buffer(bh); EXT2_I(inode)->i_file_acl = 0; cleanup: brelse(bh); - up(&ext2_xattr_sem); + up_write(&EXT2_I(inode)->xattr_sem); } /* @@ -982,8 +984,8 @@ ext2_xattr_cmp(struct ext2_xattr_header *header1, * * Find an identical extended attribute block. * - * Returns a pointer to the block found, or NULL if such a block was - * not found or an error occurred. + * Returns a locked buffer head to the block found, or NULL if such + * a block was not found or an error occurred. */ static struct buffer_head * ext2_xattr_cache_find(struct inode *inode, struct ext2_xattr_header *header) @@ -1003,18 +1005,23 @@ ext2_xattr_cache_find(struct inode *inode, struct ext2_xattr_header *header) ext2_error(inode->i_sb, "ext2_xattr_cache_find", "inode %ld: block %ld read error", inode->i_ino, (unsigned long) ce->e_block); - } else if (le32_to_cpu(HDR(bh)->h_refcount) > - EXT2_XATTR_REFCOUNT_MAX) { - ea_idebug(inode, "block %ld refcount %d>%d", - (unsigned long) ce->e_block, - le32_to_cpu(HDR(bh)->h_refcount), - EXT2_XATTR_REFCOUNT_MAX); - } else if (!ext2_xattr_cmp(header, HDR(bh))) { - ea_bdebug(bh, "b_count=%d",atomic_read(&(bh->b_count))); - mb_cache_entry_release(ce); - return bh; + } else { + lock_buffer(bh); + if (le32_to_cpu(HDR(bh)->h_refcount) > + EXT2_XATTR_REFCOUNT_MAX) { + ea_idebug(inode, "block %ld refcount %d>%d", + (unsigned long) ce->e_block, + le32_to_cpu(HDR(bh)->h_refcount), + EXT2_XATTR_REFCOUNT_MAX); + } else if (!ext2_xattr_cmp(header, HDR(bh))) { + ea_bdebug(bh, "b_count=%d", + atomic_read(&(bh->b_count))); + mb_cache_entry_release(ce); + return bh; + } + unlock_buffer(bh); + brelse(bh); } - brelse(bh); ce = mb_cache_entry_find_next(ce, 0, inode->i_sb->s_bdev, hash); } return NULL; diff --git a/fs/ext2/xattr_user.c b/fs/ext2/xattr_user.c index fc0ec86f4928..be1558761064 100644 --- a/fs/ext2/xattr_user.c +++ b/fs/ext2/xattr_user.c @@ -11,10 +11,6 @@ #include "ext2.h" #include "xattr.h" -#ifdef CONFIG_EXT2_FS_POSIX_ACL -# include "acl.h" -#endif - #define XATTR_USER_PREFIX "user." static size_t @@ -44,11 +40,7 @@ ext2_xattr_user_get(struct inode *inode, const char *name, return -EINVAL; if (!test_opt(inode->i_sb, XATTR_USER)) return -EOPNOTSUPP; -#ifdef CONFIG_EXT2_FS_POSIX_ACL - error = ext2_permission_locked(inode, MAY_READ); -#else error = permission(inode, MAY_READ, NULL); -#endif if (error) return error; @@ -68,11 +60,7 @@ ext2_xattr_user_set(struct inode *inode, const char *name, if ( !S_ISREG(inode->i_mode) && (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX)) return -EPERM; -#ifdef CONFIG_EXT2_FS_POSIX_ACL - error = ext2_permission_locked(inode, MAY_WRITE); -#else error = permission(inode, MAY_WRITE, NULL); -#endif if (error) return error; diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c index 94b6f477f1d8..2416e214280f 100644 --- a/fs/ext3/acl.c +++ b/fs/ext3/acl.c @@ -125,10 +125,34 @@ fail: return ERR_PTR(-EINVAL); } +static inline struct posix_acl * +ext3_iget_acl(struct inode *inode, struct posix_acl **i_acl) +{ + struct posix_acl *acl = EXT3_ACL_NOT_CACHED; + + spin_lock(&inode->i_lock); + if (*i_acl != EXT3_ACL_NOT_CACHED) + acl = posix_acl_dup(*i_acl); + spin_unlock(&inode->i_lock); + + return acl; +} + +static inline void +ext3_iset_acl(struct inode *inode, struct posix_acl **i_acl, + struct posix_acl *acl) +{ + spin_lock(&inode->i_lock); + if (*i_acl != EXT3_ACL_NOT_CACHED) + posix_acl_release(*i_acl); + *i_acl = posix_acl_dup(acl); + spin_unlock(&inode->i_lock); +} + /* * Inode operation get_posix_acl(). * - * inode->i_sem: down + * inode->i_sem: don't care */ static struct posix_acl * ext3_get_acl(struct inode *inode, int type) @@ -145,14 +169,16 @@ ext3_get_acl(struct inode *inode, int type) switch(type) { case ACL_TYPE_ACCESS: - if (ei->i_acl != EXT3_ACL_NOT_CACHED) - return posix_acl_dup(ei->i_acl); + acl = ext3_iget_acl(inode, &ei->i_acl); + if (acl != EXT3_ACL_NOT_CACHED) + return acl; name_index = EXT3_XATTR_INDEX_POSIX_ACL_ACCESS; break; case ACL_TYPE_DEFAULT: - if (ei->i_default_acl != EXT3_ACL_NOT_CACHED) - return posix_acl_dup(ei->i_default_acl); + acl = ext3_iget_acl(inode, &ei->i_default_acl); + if (acl != EXT3_ACL_NOT_CACHED) + return acl; name_index = EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT; break; @@ -174,11 +200,11 @@ ext3_get_acl(struct inode *inode, int type) if (!IS_ERR(acl)) { switch(type) { case ACL_TYPE_ACCESS: - ei->i_acl = posix_acl_dup(acl); + ext3_iset_acl(inode, &ei->i_acl, acl); break; case ACL_TYPE_DEFAULT: - ei->i_default_acl = posix_acl_dup(acl); + ext3_iset_acl(inode, &ei->i_default_acl, acl); break; } } @@ -245,23 +271,24 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type, if (!error) { switch(type) { case ACL_TYPE_ACCESS: - if (ei->i_acl != EXT3_ACL_NOT_CACHED) - posix_acl_release(ei->i_acl); - ei->i_acl = posix_acl_dup(acl); + ext3_iset_acl(inode, &ei->i_acl, acl); break; case ACL_TYPE_DEFAULT: - if (ei->i_default_acl != EXT3_ACL_NOT_CACHED) - posix_acl_release(ei->i_default_acl); - ei->i_default_acl = posix_acl_dup(acl); + ext3_iset_acl(inode, &ei->i_default_acl, acl); break; } } return error; } -static int -__ext3_permission(struct inode *inode, int mask, int lock) +/* + * Inode operation permission(). + * + * inode->i_sem: don't care + */ +int +ext3_permission(struct inode *inode, int mask, struct nameidata *nd) { int mode = inode->i_mode; @@ -275,30 +302,16 @@ __ext3_permission(struct inode *inode, int mask, int lock) if (current->fsuid == inode->i_uid) { mode >>= 6; } else if (test_opt(inode->i_sb, POSIX_ACL)) { - struct ext3_inode_info *ei = EXT3_I(inode); + struct posix_acl *acl; /* The access ACL cannot grant access if the group class permission bits don't contain all requested permissions. */ if (((mode >> 3) & mask & S_IRWXO) != mask) goto check_groups; - if (ei->i_acl == EXT3_ACL_NOT_CACHED) { - struct posix_acl *acl; - - if (lock) { - down(&inode->i_sem); - acl = ext3_get_acl(inode, ACL_TYPE_ACCESS); - up(&inode->i_sem); - } else - acl = ext3_get_acl(inode, ACL_TYPE_ACCESS); - - if (IS_ERR(acl)) - return PTR_ERR(acl); + acl = ext3_get_acl(inode, ACL_TYPE_ACCESS); + if (acl) { + int error = posix_acl_permission(inode, acl, mask); posix_acl_release(acl); - if (ei->i_acl == EXT3_ACL_NOT_CACHED) - return -EIO; - } - if (ei->i_acl) { - int error = posix_acl_permission(inode, ei->i_acl,mask); if (error == -EACCES) goto check_capabilities; return error; @@ -324,26 +337,6 @@ check_capabilities: return -EACCES; } -/* - * Inode operation permission(). - * - * inode->i_sem: up - */ -int -ext3_permission(struct inode *inode, int mask, struct nameidata *nd) -{ - return __ext3_permission(inode, mask, 1); -} - -/* - * Used internally if i_sem is already down. - */ -int -ext3_permission_locked(struct inode *inode, int mask) -{ - return __ext3_permission(inode, mask, 0); -} - /* * Initialize the ACLs of a new inode. Called from ext3_new_inode. * diff --git a/fs/ext3/acl.h b/fs/ext3/acl.h index 6aaef97a5fc3..9d9d9d2e4a0c 100644 --- a/fs/ext3/acl.h +++ b/fs/ext3/acl.h @@ -60,7 +60,6 @@ static inline int ext3_acl_count(size_t size) /* acl.c */ extern int ext3_permission (struct inode *, int, struct nameidata *); -extern int ext3_permission_locked (struct inode *, int); extern int ext3_acl_chmod (struct inode *); extern int ext3_init_acl (handle_t *, struct inode *, struct inode *); diff --git a/fs/ext3/super.c b/fs/ext3/super.c index d84dddf2959b..04f3c6d04b7f 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -519,6 +519,9 @@ static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == SLAB_CTOR_CONSTRUCTOR) { INIT_LIST_HEAD(&ei->i_orphan); +#ifdef CONFIG_EXT3_FS_XATTR + init_rwsem(&ei->xattr_sem); +#endif init_rwsem(&ei->truncate_sem); inode_init_once(&ei->vfs_inode); } diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c index b89f8be46f0f..6fbda077bdbe 100644 --- a/fs/ext3/xattr.c +++ b/fs/ext3/xattr.c @@ -43,13 +43,12 @@ * * Locking strategy * ---------------- - * The VFS holdsinode->i_sem semaphore when any of the xattr inode - * operations are called, so we are guaranteed that only one - * processes accesses extended attributes of an inode at any time. - * - * For writing we also grab the ext3_xattr_sem semaphore. This ensures that - * only a single process is modifying an extended attribute block, even - * if the block is shared among inodes. + * EXT3_I(inode)->i_file_acl is protected by EXT3_I(inode)->xattr_sem. + * EA blocks are only changed if they are exclusive to an inode, so + * holding xattr_sem also means that nothing but the EA block's reference + * count will change. Multiple writers to an EA block are synchronized + * by the bh lock. No more than a single bh lock is held at any time + * to avoid deadlocks. */ #include @@ -59,7 +58,7 @@ #include #include #include -#include +#include #include "xattr.h" #include "acl.h" @@ -93,22 +92,14 @@ static int ext3_xattr_set_handle2(handle_t *, struct inode *, struct ext3_xattr_header *); static int ext3_xattr_cache_insert(struct buffer_head *); -static struct buffer_head *ext3_xattr_cache_find(struct inode *, - struct ext3_xattr_header *); +static struct buffer_head *ext3_xattr_cache_find(handle_t *, struct inode *, + struct ext3_xattr_header *, + int *); static void ext3_xattr_cache_remove(struct buffer_head *); static void ext3_xattr_rehash(struct ext3_xattr_header *, struct ext3_xattr_entry *); static struct mb_cache *ext3_xattr_cache; - -/* - * If a file system does not share extended attributes among inodes, - * we should not need the ext3_xattr_sem semaphore. However, the - * filesystem may still contain shared blocks, so we always take - * the lock. - */ - -static DECLARE_MUTEX(ext3_xattr_sem); static struct ext3_xattr_handler *ext3_xattr_handlers[EXT3_XATTR_INDEX_MAX]; static rwlock_t ext3_handler_lock = RW_LOCK_UNLOCKED; @@ -191,7 +182,7 @@ ext3_xattr_handler(int name_index) /* * Inode operation getxattr() * - * dentry->d_inode->i_sem down + * dentry->d_inode->i_sem: don't care */ ssize_t ext3_getxattr(struct dentry *dentry, const char *name, @@ -199,39 +190,28 @@ ext3_getxattr(struct dentry *dentry, const char *name, { struct ext3_xattr_handler *handler; struct inode *inode = dentry->d_inode; - ssize_t error; handler = ext3_xattr_resolve_name(&name); if (!handler) return -EOPNOTSUPP; - down(&inode->i_sem); - error = handler->get(inode, name, buffer, size); - up(&inode->i_sem); - - return error; + return handler->get(inode, name, buffer, size); } /* * Inode operation listxattr() * - * dentry->d_inode->i_sem down + * dentry->d_inode->i_sem: don't care */ ssize_t ext3_listxattr(struct dentry *dentry, char *buffer, size_t size) { - ssize_t error; - - down(&dentry->d_inode->i_sem); - error = ext3_xattr_list(dentry->d_inode, buffer, size); - up(&dentry->d_inode->i_sem); - - return error; + return ext3_xattr_list(dentry->d_inode, buffer, size); } /* * Inode operation setxattr() * - * dentry->d_inode->i_sem down + * dentry->d_inode->i_sem: down */ int ext3_setxattr(struct dentry *dentry, const char *name, @@ -251,7 +231,7 @@ ext3_setxattr(struct dentry *dentry, const char *name, /* * Inode operation removexattr() * - * dentry->d_inode->i_sem down + * dentry->d_inode->i_sem: down */ int ext3_removexattr(struct dentry *dentry, const char *name) @@ -290,12 +270,15 @@ ext3_xattr_get(struct inode *inode, int name_index, const char *name, if (name == NULL) return -EINVAL; + down_read(&EXT3_I(inode)->xattr_sem); + error = -ENODATA; if (!EXT3_I(inode)->i_file_acl) - return -ENODATA; + goto cleanup; ea_idebug(inode, "reading block %d", EXT3_I(inode)->i_file_acl); bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl); + error = -EIO; if (!bh) - return -EIO; + goto cleanup; ea_bdebug(bh, "b_count=%d, refcount=%d", atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); end = bh->b_data + bh->b_size; @@ -360,6 +343,7 @@ found: cleanup: brelse(bh); + up_read(&EXT3_I(inode)->xattr_sem); return error; } @@ -386,12 +370,15 @@ ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) ea_idebug(inode, "buffer=%p, buffer_size=%ld", buffer, (long)buffer_size); + down_read(&EXT3_I(inode)->xattr_sem); + error = 0; if (!EXT3_I(inode)->i_file_acl) - return 0; + goto cleanup; ea_idebug(inode, "reading block %d", EXT3_I(inode)->i_file_acl); bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl); + error = -EIO; if (!bh) - return -EIO; + goto cleanup; ea_bdebug(bh, "b_count=%d, refcount=%d", atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); end = bh->b_data + bh->b_size; @@ -444,6 +431,7 @@ bad_block: ext3_error(inode->i_sb, "ext3_xattr_list", cleanup: brelse(bh); + up_read(&EXT3_I(inode)->xattr_sem); return error; } @@ -459,11 +447,12 @@ static void ext3_xattr_update_super_block(handle_t *handle, return; lock_super(sb); - ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh); - EXT3_SB(sb)->s_es->s_feature_compat |= - cpu_to_le32(EXT3_FEATURE_COMPAT_EXT_ATTR); - sb->s_dirt = 1; - ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); + if (ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh) == 0) { + EXT3_SB(sb)->s_es->s_feature_compat |= + cpu_to_le32(EXT3_FEATURE_COMPAT_EXT_ATTR); + sb->s_dirt = 1; + ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); + } unlock_super(sb); } @@ -518,8 +507,7 @@ ext3_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, name_len = strlen(name); if (name_len > 255 || value_len > sb->s_blocksize) return -ERANGE; - down(&ext3_xattr_sem); - + down_write(&EXT3_I(inode)->xattr_sem); if (EXT3_I(inode)->i_file_acl) { /* The inode already has an extended attribute block. */ bh = sb_bread(sb, EXT3_I(inode)->i_file_acl); @@ -612,15 +600,28 @@ bad_block: ext3_error(sb, "ext3_xattr_set", /* Here we know that we can set the new attribute. */ if (header) { + int credits = 0; + + /* assert(header == HDR(bh)); */ + if (header->h_refcount != cpu_to_le32(1)) + goto skip_get_write_access; + /* ext3_journal_get_write_access() requires an unlocked bh, + which complicates things here. */ + error = ext3_journal_get_write_access_credits(handle, bh, + &credits); + if (error) + goto cleanup; + lock_buffer(bh); if (header->h_refcount == cpu_to_le32(1)) { ea_bdebug(bh, "modifying in-place"); ext3_xattr_cache_remove(bh); - error = ext3_journal_get_write_access(handle, bh); - if (error) - goto cleanup; + /* keep the buffer locked while modifying it. */ } else { int offset; + unlock_buffer(bh); + journal_release_buffer(handle, bh, credits); + skip_get_write_access: ea_bdebug(bh, "cloning"); header = kmalloc(bh->b_size, GFP_KERNEL); error = -ENOMEM; @@ -645,6 +646,8 @@ bad_block: ext3_error(sb, "ext3_xattr_set", last = here = ENTRY(header+1); } + /* Iff we are modifying the block in-place, bh is locked here. */ + if (not_found) { /* Insert the new name. */ size_t size = EXT3_XATTR_LEN(name_len); @@ -715,9 +718,13 @@ bad_block: ext3_error(sb, "ext3_xattr_set", skip_replace: if (IS_LAST_ENTRY(ENTRY(header+1))) { /* This block is now empty. */ + if (bh && header == HDR(bh)) + unlock_buffer(bh); /* we were modifying in-place. */ error = ext3_xattr_set_handle2(handle, inode, bh, NULL); } else { ext3_xattr_rehash(header, here); + if (bh && header == HDR(bh)) + unlock_buffer(bh); /* we were modifying in-place. */ error = ext3_xattr_set_handle2(handle, inode, bh, header); } @@ -725,7 +732,7 @@ cleanup: brelse(bh); if (!(bh && header == HDR(bh))) kfree(header); - up(&ext3_xattr_sem); + up_write(&EXT3_I(inode)->xattr_sem); return error; } @@ -740,33 +747,34 @@ ext3_xattr_set_handle2(handle_t *handle, struct inode *inode, { struct super_block *sb = inode->i_sb; struct buffer_head *new_bh = NULL; - int error; + int credits = 0, error; if (header) { - new_bh = ext3_xattr_cache_find(inode, header); + new_bh = ext3_xattr_cache_find(handle, inode, header, &credits); if (new_bh) { /* - * We found an identical block in the cache. - * The old block will be released after updating - * the inode. + * We found an identical block in the cache. The + * block returned is locked. The old block will + * be released after updating the inode. */ ea_bdebug(new_bh, "%s block %lu", (old_bh == new_bh) ? "keeping" : "reusing", (unsigned long) new_bh->b_blocknr); error = -EDQUOT; - if (DQUOT_ALLOC_BLOCK(inode, 1)) - goto cleanup; - - error = ext3_journal_get_write_access(handle, new_bh); - if (error) + if (DQUOT_ALLOC_BLOCK(inode, 1)) { + unlock_buffer(new_bh); + journal_release_buffer(handle, new_bh, credits); goto cleanup; + } HDR(new_bh)->h_refcount = cpu_to_le32( le32_to_cpu(HDR(new_bh)->h_refcount) + 1); ea_bdebug(new_bh, "refcount now=%d", le32_to_cpu(HDR(new_bh)->h_refcount)); + unlock_buffer(new_bh); } else if (old_bh && header == HDR(old_bh)) { - /* Keep this block. */ + /* Keep this block. No need to lock the block as we + * don't need to change the reference count. */ new_bh = old_bh; get_bh(new_bh); ext3_xattr_cache_insert(new_bh); @@ -817,15 +825,14 @@ getblk_failed: error = 0; if (old_bh && old_bh != new_bh) { /* - * If there was an old block, and we are not still using it, - * we now release the old block. + * If there was an old block, and we are no longer using it, + * release the old block. */ - unsigned int refcount = le32_to_cpu(HDR(old_bh)->h_refcount); - error = ext3_journal_get_write_access(handle, old_bh); if (error) goto cleanup; - if (refcount == 1) { + lock_buffer(old_bh); + if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) { /* Free the old block. */ ea_bdebug(old_bh, "freeing"); ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1); @@ -837,12 +844,14 @@ getblk_failed: ext3_forget(handle, 1, inode, old_bh,old_bh->b_blocknr); } else { /* Decrement the refcount only. */ - refcount--; - HDR(old_bh)->h_refcount = cpu_to_le32(refcount); + HDR(old_bh)->h_refcount = cpu_to_le32( + le32_to_cpu(HDR(old_bh)->h_refcount) - 1); DQUOT_FREE_BLOCK(inode, 1); ext3_journal_dirty_metadata(handle, old_bh); - ea_bdebug(old_bh, "refcount now=%d", refcount); + ea_bdebug(old_bh, "refcount now=%d", + le32_to_cpu(HDR(old_bh)->h_refcount)); } + unlock_buffer(old_bh); } cleanup: @@ -886,12 +895,11 @@ ext3_xattr_set(struct inode *inode, int name_index, const char *name, void ext3_xattr_delete_inode(handle_t *handle, struct inode *inode) { - struct buffer_head *bh; + struct buffer_head *bh = NULL; + down_write(&EXT3_I(inode)->xattr_sem); if (!EXT3_I(inode)->i_file_acl) - return; - down(&ext3_xattr_sem); - + goto cleanup; bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl); if (!bh) { ext3_error(inode->i_sb, "ext3_xattr_delete_inode", @@ -899,7 +907,6 @@ ext3_xattr_delete_inode(handle_t *handle, struct inode *inode) EXT3_I(inode)->i_file_acl); goto cleanup; } - ea_bdebug(bh, "b_count=%d", atomic_read(&(bh->b_count))); if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || HDR(bh)->h_blocks != cpu_to_le32(1)) { ext3_error(inode->i_sb, "ext3_xattr_delete_inode", @@ -907,8 +914,9 @@ ext3_xattr_delete_inode(handle_t *handle, struct inode *inode) EXT3_I(inode)->i_file_acl); goto cleanup; } - ext3_journal_get_write_access(handle, bh); - ea_bdebug(bh, "refcount now=%d", le32_to_cpu(HDR(bh)->h_refcount) - 1); + if (ext3_journal_get_write_access(handle, bh) != 0) + goto cleanup; + lock_buffer(bh); if (HDR(bh)->h_refcount == cpu_to_le32(1)) { ext3_xattr_cache_remove(bh); ext3_free_blocks(handle, inode, EXT3_I(inode)->i_file_acl, 1); @@ -922,11 +930,13 @@ ext3_xattr_delete_inode(handle_t *handle, struct inode *inode) handle->h_sync = 1; DQUOT_FREE_BLOCK(inode, 1); } + ea_bdebug(bh, "refcount now=%d", le32_to_cpu(HDR(bh)->h_refcount) - 1); + unlock_buffer(bh); EXT3_I(inode)->i_file_acl = 0; cleanup: brelse(bh); - up(&ext3_xattr_sem); + up_write(&EXT3_I(inode)->xattr_sem); } /* @@ -1022,7 +1032,8 @@ ext3_xattr_cmp(struct ext3_xattr_header *header1, * not found or an error occurred. */ static struct buffer_head * -ext3_xattr_cache_find(struct inode *inode, struct ext3_xattr_header *header) +ext3_xattr_cache_find(handle_t *handle, struct inode *inode, + struct ext3_xattr_header *header, int *credits) { __u32 hash = le32_to_cpu(header->h_hash); struct mb_cache_entry *ce; @@ -1039,18 +1050,28 @@ ext3_xattr_cache_find(struct inode *inode, struct ext3_xattr_header *header) ext3_error(inode->i_sb, "ext3_xattr_cache_find", "inode %ld: block %ld read error", inode->i_ino, (unsigned long) ce->e_block); - } else if (le32_to_cpu(HDR(bh)->h_refcount) > - EXT3_XATTR_REFCOUNT_MAX) { - ea_idebug(inode, "block %ld refcount %d>%d", - (unsigned long) ce->e_block, - le32_to_cpu(HDR(bh)->h_refcount), - EXT3_XATTR_REFCOUNT_MAX); - } else if (!ext3_xattr_cmp(header, HDR(bh))) { - ea_bdebug(bh, "b_count=%d",atomic_read(&(bh->b_count))); - mb_cache_entry_release(ce); - return bh; + } else { + /* ext3_journal_get_write_access() requires an unlocked + * bh, which complicates things here. */ + if (ext3_journal_get_write_access_credits(handle, bh, + credits) != 0) + return NULL; + lock_buffer(bh); + if (le32_to_cpu(HDR(bh)->h_refcount) > + EXT3_XATTR_REFCOUNT_MAX) { + ea_idebug(inode, "block %ld refcount %d>%d", + (unsigned long) ce->e_block, + le32_to_cpu(HDR(bh)->h_refcount), + EXT3_XATTR_REFCOUNT_MAX); + } else if (!ext3_xattr_cmp(header, HDR(bh))) { + mb_cache_entry_release(ce); + /* buffer will be unlocked by caller */ + return bh; + } + unlock_buffer(bh); + journal_release_buffer(handle, bh, *credits); + brelse(bh); } - brelse(bh); ce = mb_cache_entry_find_next(ce, 0, inode->i_sb->s_bdev, hash); } return NULL; diff --git a/fs/ext3/xattr_user.c b/fs/ext3/xattr_user.c index b8c789e60fa0..84877afff67a 100644 --- a/fs/ext3/xattr_user.c +++ b/fs/ext3/xattr_user.c @@ -13,10 +13,6 @@ #include #include "xattr.h" -#ifdef CONFIG_EXT3_FS_POSIX_ACL -# include "acl.h" -#endif - #define XATTR_USER_PREFIX "user." static size_t @@ -46,11 +42,7 @@ ext3_xattr_user_get(struct inode *inode, const char *name, return -EINVAL; if (!test_opt(inode->i_sb, XATTR_USER)) return -EOPNOTSUPP; -#ifdef CONFIG_EXT3_FS_POSIX_ACL - error = ext3_permission_locked(inode, MAY_READ); -#else error = permission(inode, MAY_READ, NULL); -#endif if (error) return error; @@ -70,11 +62,7 @@ ext3_xattr_user_set(struct inode *inode, const char *name, if ( !S_ISREG(inode->i_mode) && (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX)) return -EPERM; -#ifdef CONFIG_EXT3_FS_POSIX_ACL - error = ext3_permission_locked(inode, MAY_WRITE); -#else error = permission(inode, MAY_WRITE, NULL); -#endif if (error) return error; diff --git a/include/linux/ext3_fs_i.h b/include/linux/ext3_fs_i.h index 1a6a6c5922f7..1181cfae7142 100644 --- a/include/linux/ext3_fs_i.h +++ b/include/linux/ext3_fs_i.h @@ -62,6 +62,16 @@ struct ext3_inode_info { __u32 i_prealloc_count; #endif __u32 i_dir_start_lookup; +#ifdef CONFIG_EXT3_FS_XATTR + /* + * Extended attributes can be read independently of the main file + * data. Taking i_sem even when reading would cause contention + * between readers of EAs and writers of regular file data, so + * instead we synchronize on xattr_sem when reading or changing + * EAs. + */ + struct rw_semaphore xattr_sem; +#endif #ifdef CONFIG_EXT3_FS_POSIX_ACL struct posix_acl *i_acl; struct posix_acl *i_default_acl; -- cgit v1.2.3 From c132f1432948dc91fd7d03845dfcd918d8b732c4 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Sun, 6 Jul 2003 05:41:19 -0700 Subject: [PATCH] Module autoloading for quota From: Jan Kara This implements autoloading of quota modules. --- fs/dquot.c | 17 +++++++++++++++-- include/linux/quota.h | 10 ++++++++++ 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/fs/dquot.c b/fs/dquot.c index 34f9417ed71a..9b3a6aae6c5f 100644 --- a/fs/dquot.c +++ b/fs/dquot.c @@ -74,6 +74,7 @@ #include #include #include +#include #include @@ -96,6 +97,7 @@ spinlock_t dq_data_lock = SPIN_LOCK_UNLOCKED; static char *quotatypes[] = INITQFNAMES; static struct quota_format_type *quota_formats; /* List of registered formats */ +static struct quota_module_name module_names[] = INIT_QUOTA_MODULE_NAMES; int register_quota_format(struct quota_format_type *fmt) { @@ -123,8 +125,19 @@ static struct quota_format_type *find_quota_format(int id) spin_lock(&dq_list_lock); for (actqf = quota_formats; actqf && actqf->qf_fmt_id != id; actqf = actqf->qf_next); - if (actqf && !try_module_get(actqf->qf_owner)) - actqf = NULL; + if (!actqf || !try_module_get(actqf->qf_owner)) { + int qm; + + for (qm = 0; module_names[qm].qm_fmt_id && module_names[qm].qm_fmt_id != id; qm++); + if (!module_names[qm].qm_fmt_id || request_module(module_names[qm].qm_mod_name)) { + actqf = NULL; + goto out; + } + for (actqf = quota_formats; actqf && actqf->qf_fmt_id != id; actqf = actqf->qf_next); + if (actqf && !try_module_get(actqf->qf_owner)) + actqf = NULL; + } +out: spin_unlock(&dq_list_lock); return actqf; } diff --git a/include/linux/quota.h b/include/linux/quota.h index 77d017472dc7..fbf2d2b2a5be 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -306,6 +306,16 @@ int register_quota_format(struct quota_format_type *fmt); void unregister_quota_format(struct quota_format_type *fmt); void init_dquot_operations(struct dquot_operations *fsdqops); +struct quota_module_name { + int qm_fmt_id; + char *qm_mod_name; +}; + +#define INIT_QUOTA_MODULE_NAMES {\ + {QFMT_VFS_OLD, "quota_v1"},\ + {QFMT_VFS_V0, "quota_v2"},\ + {0, NULL}} + #else # /* nodep */ include -- cgit v1.2.3 From d029f790d8ed6d277ef47eb1e1f99cdbc3f11017 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Sun, 6 Jul 2003 05:41:27 -0700 Subject: [PATCH] display bootserver in /proc/net/pnp From: "lode leroy" I would like to submit a trivial enhancement to display the ip address of the bootserver in /proc/net/pnp This aids me in developing a diskless linux root image to know where it comes from... --- net/ipv4/ipconfig.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 706b6e8a7bcb..7e1e4d749905 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -1115,6 +1115,8 @@ static int pnp_get_info(char *buffer, char **start, "nameserver %u.%u.%u.%u\n", NIPQUAD(ic_nameservers[i])); } + len += sprintf(buffer + len, "bootserver %u.%u.%u.%u\n", + NIPQUAD(ic_servaddr)); if (offset > len) offset = len; -- cgit v1.2.3 From d24d1d3abedcd64a9fd90e602bbd45d73b9c0de0 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Sun, 6 Jul 2003 05:41:34 -0700 Subject: [PATCH] BSD accounting speedup From: Ingo Molnar Most distributions turn on process accounting - but even the common 'accounting is off' case is horrible SMP-scalability-wise: it accesses a global spinlock during every sys_exit() call, which bounces like mad on SMP (and NUMA) systems. (i also got rid of the unused return code.) --- include/linux/acct.h | 2 +- kernel/acct.c | 29 +++++++++++++++++++---------- 2 files changed, 20 insertions(+), 11 deletions(-) diff --git a/include/linux/acct.h b/include/linux/acct.h index 0b4ae8298a0e..69389c4af8e4 100644 --- a/include/linux/acct.h +++ b/include/linux/acct.h @@ -78,7 +78,7 @@ struct acct #ifdef CONFIG_BSD_PROCESS_ACCT struct super_block; extern void acct_auto_close(struct super_block *sb); -extern int acct_process(long exitcode); +extern void acct_process(long exitcode); #else #define acct_auto_close(x) do { } while (0) #define acct_process(x) do { } while (0) diff --git a/kernel/acct.c b/kernel/acct.c index e63095525ac2..028e310bd15f 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -394,17 +394,26 @@ static void do_acct_process(long exitcode, struct file *file) /* * acct_process - now just a wrapper around do_acct_process */ -int acct_process(long exitcode) +void acct_process(long exitcode) { struct file *file = NULL; + + /* + * accelerate the common fastpath: + */ + if (!acct_globals.file) + return; + spin_lock(&acct_globals.lock); - if (acct_globals.file) { - file = acct_globals.file; - get_file(file); - spin_unlock(&acct_globals.lock); - do_acct_process(exitcode, file); - fput(file); - } else - spin_unlock(&acct_globals.lock); - return 0; + file = acct_globals.file; + if (!file) + goto out_unlock; + + get_file(file); + spin_unlock(&acct_globals.lock); + do_acct_process(exitcode, file); + fput(file); + +out_unlock: + spin_unlock(&acct_globals.lock); } -- cgit v1.2.3 From 020a24469f928fe463553296cf72e69b1f57257f Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Sun, 6 Jul 2003 05:58:51 -0700 Subject: [PATCH] conditional ROMfs copy for Cleopatra/5307 board Conditionally copy the ROMfs filesystem on the Cleopatra/5307 target board only if using a ROMfs. --- arch/m68knommu/platform/5307/CLEOPATRA/crt0_ram.S | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/m68knommu/platform/5307/CLEOPATRA/crt0_ram.S b/arch/m68knommu/platform/5307/CLEOPATRA/crt0_ram.S index a87ee519a60d..809b9ca2bdf0 100644 --- a/arch/m68knommu/platform/5307/CLEOPATRA/crt0_ram.S +++ b/arch/m68knommu/platform/5307/CLEOPATRA/crt0_ram.S @@ -127,6 +127,7 @@ _start: movc %d0,%CACR +#ifdef CONFIG_ROMFS_FS /* * Move ROM filesystem above bss :-) */ @@ -148,6 +149,12 @@ _copy_romfs: cmp.l %a0, %a2 /* Check if at end */ bne _copy_romfs +#else /* CONFIG_ROMFS_FS */ + lea.l _ebss, %a1 + move.l %a1, _ramstart +#endif /* CONFIG_ROMFS_FS */ + + /* * Zero out the bss region. */ -- cgit v1.2.3 From 4947a1a3029d9092524bc980da7cb024bb7c6c77 Mon Sep 17 00:00:00 2001 From: Steve French Date: Sun, 6 Jul 2003 08:36:41 -0700 Subject: Fix statfs failure due to invalid value for ffree --- fs/cifs/CHANGES | 7 ++++++- fs/cifs/cifsfs.c | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES index ee8682efbada..a7aa89abfd5b 100644 --- a/fs/cifs/CHANGES +++ b/fs/cifs/CHANGES @@ -1,7 +1,12 @@ Version 0.81 ------------ Finish up CIFS packet digital signing for the default -NTLM security case. +NTLM security case. This should help Windows 2003 +network interoperability since it is common for +packet signing to be required now. Fix statfs (stat -f) +which recently started returning errors due to +invalid value (-1 instead of 0) being set in the +struct kstatfs f_ffiles field. Version 0.80 ----------- diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 2201681095ca..60e4629b1d67 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -165,7 +165,7 @@ cifs_statfs(struct super_block *sb, struct kstatfs *buf) able to support more than this, but best to be safe since Win2k and others can not handle very long filenames */ buf->f_files = 0; /* undefined */ - buf->f_ffree = -1; /* unlimited */ + buf->f_ffree = 0; /* unlimited */ rc = CIFSSMBQFSInfo(xid, pTcon, buf, cifs_sb->local_nls); -- cgit v1.2.3 From 395b6e1dc40be8a1515f577f3c9f52508d965c96 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 6 Jul 2003 22:51:59 -0700 Subject: [PATCH] Double unlock in BSD accounting speedup patch doh - double unlock in the acct-is-on path. Noticed by Aneesh Kumar K.V --- kernel/acct.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/kernel/acct.c b/kernel/acct.c index 028e310bd15f..0009dfb25dcb 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -406,14 +406,13 @@ void acct_process(long exitcode) spin_lock(&acct_globals.lock); file = acct_globals.file; - if (!file) - goto out_unlock; - + if (unlikely(!file)) { + spin_unlock(&acct_globals.lock); + return; + } get_file(file); spin_unlock(&acct_globals.lock); + do_acct_process(exitcode, file); fput(file); - -out_unlock: - spin_unlock(&acct_globals.lock); } -- cgit v1.2.3 From 4c397a46724a0d9331cbbcc3f025c35c1fa6c08e Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Sun, 6 Jul 2003 23:00:04 -0700 Subject: [PATCH] .no .romvec section for DragonEngine/68328 target A couple of minor fixes for the 68328 interrupt setup code. - don't define the .romvec section for DragonEngine build - print newline at end of spurious interrupt count in show_interrupts() --- arch/m68knommu/platform/68328/ints.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/m68knommu/platform/68328/ints.c b/arch/m68knommu/platform/68328/ints.c index 2d864431dd42..290fdd07b7b0 100644 --- a/arch/m68knommu/platform/68328/ints.c +++ b/arch/m68knommu/platform/68328/ints.c @@ -81,6 +81,7 @@ unsigned int local_irq_count[NR_CPUS]; /* irq node variables for the 32 (potential) on chip sources */ static irq_node_t int_irq_list[NR_IRQS]; +#if !defined(CONFIG_DRAGEN2) asm (" .global _start, __ramend .section .romvec @@ -102,6 +103,7 @@ e_vectors: ignore: rte "); +#endif /* * This function should be called during kernel startup to initialize @@ -204,7 +206,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_printf(p, " "); seq_printf(p, "%s\n", int_irq_list[i].devname); } - seq_printf(p, " : %10u spurious", num_spurious); + seq_printf(p, " : %10u spurious\n", num_spurious); return 0; } -- cgit v1.2.3 From a7eec8d9269e8c879e216bfcc11a5b5fa333c250 Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Sun, 6 Jul 2003 23:00:12 -0700 Subject: [PATCH] define shared lib limits for flat loader This patch includes the last peices of the flat laoder shared library support. Define the shared lib limit and implement a flag for doing kernel level tracing. --- include/linux/flat.h | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/include/linux/flat.h b/include/linux/flat.h index 7643a3f9d3f5..ec56852e2659 100644 --- a/include/linux/flat.h +++ b/include/linux/flat.h @@ -1,7 +1,7 @@ - -/* Copyright (C) 1998 Kenneth Albanowski - * The Silver Hammer Group, Ltd. - * Copyright (C) 2002 David McCullough +/* + * Copyright (C) 2002-2003 David McCullough + * Copyright (C) 1998 Kenneth Albanowski + * The Silver Hammer Group, Ltd. * * This file provides the definitions and structures needed to * support uClinux flat-format executables. @@ -10,10 +10,18 @@ #ifndef _LINUX_FLAT_H #define _LINUX_FLAT_H +#ifdef __KERNEL__ #include +#endif #define FLAT_VERSION 0x00000004L +#ifdef CONFIG_BINFMT_SHARED_FLAT +#define MAX_SHARED_LIBS (4) +#else +#define MAX_SHARED_LIBS (1) +#endif + /* * To make everything easier to port and manage cross platform * development, all fields are in network byte order. @@ -46,8 +54,10 @@ struct flat_hdr { #define FLAT_FLAG_GOTPIC 0x0002 /* program is PIC with GOT */ #define FLAT_FLAG_GZIP 0x0004 /* all but the header is compressed */ #define FLAT_FLAG_GZDATA 0x0008 /* only data/relocs are compressed (for XIP) */ +#define FLAT_FLAG_KTRACE 0x0010 /* output useful kernel trace for debugging */ +#ifdef __KERNEL__ /* so systems without linux headers can compile the apps */ /* * While it would be nice to keep this header clean, users of older * tools still need this support in the kernel. So this section is @@ -85,4 +95,6 @@ typedef union { } reloc; } flat_v2_reloc_t; +#endif /* __KERNEL__ */ + #endif /* _LINUX_FLAT_H */ -- cgit v1.2.3 From 6543adbd85f82e5e566d24e8bd542779ec246a4c Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Sun, 6 Jul 2003 23:00:19 -0700 Subject: [PATCH] cleanup show_process_blocks() for non-mmu targets Clean up show_process_blocks() loop for non-mmu targets. --- mm/nommu.c | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/mm/nommu.c b/mm/nommu.c index 018262482d5a..5595fa7054f8 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -250,23 +250,16 @@ static inline unsigned long calc_vm_flags(unsigned long prot, unsigned long flag #ifdef DEBUG static void show_process_blocks(void) { - struct mm_tblock_struct * tblock, *tmp; - + struct mm_tblock_struct *tblock; + printk("Process blocks %d:", current->pid); - - tmp = current->mm->context.tblock; - while (tmp) { - printk(" %p: %p", tmp, tmp->rblock); - if (tmp->rblock) - printk(" (%d @%p #%d)", kobjsize(tmp->rblock->kblock), - tmp->rblock->kblock, tmp->rblock->refcount); - if (tmp->next) - printk(" ->"); - else - printk("."); - tmp = tmp->next; + + for (tblock = ¤t->mm->context.tblock; tblock; tblock = tblock->next) { + printk(" %p: %p", tblock, tblock->rblock); + if (tblock->rblock) + printk(" (%d @%p #%d)", kobjsize(tblock->rblock->kblock), tblock->rblock->kblock, tblock->rblock->refcount); + printk(tblock->next ? " ->" : ".\n"); } - printk("\n"); } #endif /* DEBUG */ @@ -355,7 +348,7 @@ unsigned long do_mmap_pgoff( error = file->f_op->mmap(file, &vma); #ifdef DEBUG - printk("mmap mmap returned %d /%x\n", error, vma.vm_start); + printk("f_op->mmap() returned %d/%lx\n", error, vma.vm_start); #endif if (!error) return vma.vm_start; -- cgit v1.2.3 From 18da0cad253d29657e4be8d3510122986edf299d Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Sun, 6 Jul 2003 23:00:27 -0700 Subject: [PATCH] define raw read/write for m68knommu io access Define the raw read and write access macros for m68knommu. These rae use by MTD drivers in particular. --- include/asm-m68knommu/io.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/include/asm-m68knommu/io.h b/include/asm-m68knommu/io.h index d0dbd4c796c4..f42645e4a2d2 100644 --- a/include/asm-m68knommu/io.h +++ b/include/asm-m68knommu/io.h @@ -43,6 +43,13 @@ static inline unsigned int _swapl(volatile unsigned long v) #define writew(b,addr) (void)((*(volatile unsigned short *) (addr)) = (b)) #define writel(b,addr) (void)((*(volatile unsigned int *) (addr)) = (b)) +#define __raw_readb readb +#define __raw_readw readw +#define __raw_readl readl +#define __raw_writeb writeb +#define __raw_writew writew +#define __raw_writel writel + static inline void io_outsb(unsigned int addr, void *buf, int len) { volatile unsigned char *ap = (volatile unsigned char *) addr; @@ -100,7 +107,7 @@ static inline void io_insl(unsigned int addr, void *buf, int len) #define memcpy_fromio(a,b,c) memcpy((a),(void *)(b),(c)) #define memcpy_toio(a,b,c) memcpy((void *)(a),(b),(c)) -#define inb(addr) readb(addr) +#define inb(addr) readb(addr) #define inw(addr) readw(addr) #define inl(addr) readl(addr) #define outb(x,addr) ((void) writeb(x,addr)) -- cgit v1.2.3 From 163feb5818080346b1e819ca8f8fcd68eac5e593 Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Sun, 6 Jul 2003 23:00:33 -0700 Subject: [PATCH] remove 68360 specific trap init call No longer need the 68360 specific trap init call. The generic interrupt/trap code is now setup to do this itself. --- arch/m68knommu/platform/68360/config.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arch/m68knommu/platform/68360/config.c b/arch/m68knommu/platform/68360/config.c index 6172044d0349..701303874109 100644 --- a/arch/m68knommu/platform/68360/config.c +++ b/arch/m68knommu/platform/68360/config.c @@ -205,9 +205,4 @@ void config_BSP(char *command, int len) mach_hwclk = NULL; mach_set_clock_mmss = NULL; mach_reset = BSP_reset; - - //Kendrick's Change - mach_trap_init = M68360_init_IRQ; - - config_M68360_irq(); } -- cgit v1.2.3 From b993be7e4517f328fd6bd8bcea2f038c894a292e Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sun, 6 Jul 2003 23:01:42 -0700 Subject: [PATCH] Make kstat_this_cpu in terms of __get_cpu_var and use it kstat_this_cpu() is defined in terms of per_cpu instead of __get_cpu_var. This patch changes that, and uses it everywhere appropriate. The sched.c change puts it in a local variable, which helps gcc generate better code. --- arch/i386/kernel/irq.c | 3 +-- include/linux/kernel_stat.h | 3 ++- kernel/sched.c | 13 +++++++------ 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c index 6e0ff0a46b5c..314d47e8ec17 100644 --- a/arch/i386/kernel/irq.c +++ b/arch/i386/kernel/irq.c @@ -416,7 +416,6 @@ asmlinkage unsigned int do_IRQ(struct pt_regs regs) * handled by some other CPU. (or is disabled) */ int irq = regs.orig_eax & 0xff; /* high bits used in ret_from_ code */ - int cpu = smp_processor_id(); irq_desc_t *desc = irq_desc + irq; struct irqaction * action; unsigned int status; @@ -437,7 +436,7 @@ asmlinkage unsigned int do_IRQ(struct pt_regs regs) } } #endif - kstat_cpu(cpu).irqs[irq]++; + kstat_this_cpu.irqs[irq]++; spin_lock(&desc->lock); desc->handler->ack(irq); /* diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 88425e94cdc5..9971827a3c4b 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -31,7 +31,8 @@ struct kernel_stat { DECLARE_PER_CPU(struct kernel_stat, kstat); #define kstat_cpu(cpu) per_cpu(kstat, cpu) -#define kstat_this_cpu kstat_cpu(smp_processor_id()) +/* Must have preemption disabled for this to be meaningful. */ +#define kstat_this_cpu __get_cpu_var(kstat) extern unsigned long nr_context_switches(void); diff --git a/kernel/sched.c b/kernel/sched.c index 4f3d1fa42669..a4305315afda 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1175,6 +1175,7 @@ DEFINE_PER_CPU(struct kernel_stat, kstat) = { { 0 } }; void scheduler_tick(int user_ticks, int sys_ticks) { int cpu = smp_processor_id(); + struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; runqueue_t *rq = this_rq(); task_t *p = current; @@ -1184,19 +1185,19 @@ void scheduler_tick(int user_ticks, int sys_ticks) if (p == rq->idle) { /* note: this timer irq context must be accounted for as well */ if (irq_count() - HARDIRQ_OFFSET >= SOFTIRQ_OFFSET) - kstat_cpu(cpu).cpustat.system += sys_ticks; + cpustat->system += sys_ticks; else if (atomic_read(&rq->nr_iowait) > 0) - kstat_cpu(cpu).cpustat.iowait += sys_ticks; + cpustat->iowait += sys_ticks; else - kstat_cpu(cpu).cpustat.idle += sys_ticks; + cpustat->idle += sys_ticks; rebalance_tick(rq, 1); return; } if (TASK_NICE(p) > 0) - kstat_cpu(cpu).cpustat.nice += user_ticks; + cpustat->nice += user_ticks; else - kstat_cpu(cpu).cpustat.user += user_ticks; - kstat_cpu(cpu).cpustat.system += sys_ticks; + cpustat->user += user_ticks; + cpustat->system += sys_ticks; /* Task might have expired already, but not scheduled off yet */ if (p->array != rq->active) { -- cgit v1.2.3 From 8a6879c603dc4ea40f89fb1bda8f2b5039e19396 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sun, 6 Jul 2003 23:01:50 -0700 Subject: [PATCH] switch_mm and enter_lazy_tlb: remove cpu arg switch_mm and enter_lazy_tlb take a CPU arg, which is always smp_processor_id(). This is misleading, and pointless if they use per-cpu variables or other optimizations. gcc will eliminate redundant smp_processor_id() (in inline functions) anyway. This removes that arg from all the architectures. --- arch/cris/mm/tlb.c | 2 +- arch/i386/kernel/cpu/common.c | 2 +- arch/parisc/kernel/smp.c | 2 +- arch/s390/kernel/setup.c | 2 +- arch/um/kernel/process_kern.c | 3 ++- arch/x86_64/kernel/setup64.c | 2 +- fs/aio.c | 2 +- include/asm-alpha/machvec.h | 2 +- include/asm-alpha/mmu_context.h | 15 ++++++++------- include/asm-arm/mmu_context.h | 4 ++-- include/asm-arm26/mmu_context.h | 4 ++-- include/asm-cris/mmu_context.h | 6 +++--- include/asm-h8300/mmu_context.h | 4 ++-- include/asm-i386/mmu_context.h | 11 ++++++++--- include/asm-ia64/mmu_context.h | 4 ++-- include/asm-m68k/mmu_context.h | 6 +++--- include/asm-m68knommu/mmu_context.h | 4 ++-- include/asm-mips/mmu_context.h | 5 +++-- include/asm-mips64/mmu_context.h | 2 +- include/asm-parisc/mmu_context.h | 6 +++--- include/asm-ppc/mmu_context.h | 4 ++-- include/asm-ppc64/mmu_context.h | 8 ++++---- include/asm-s390/mmu_context.h | 8 ++++---- include/asm-sh/mmu_context.h | 10 +++++----- include/asm-sparc/mmu_context.h | 8 ++++---- include/asm-sparc64/mmu_context.h | 6 +++--- include/asm-um/mmu_context.h | 6 ++++-- include/asm-v850/mmu_context.h | 4 ++-- include/asm-x86_64/mmu_context.h | 9 +++++---- kernel/exit.c | 2 +- kernel/sched.c | 6 +++--- 31 files changed, 85 insertions(+), 74 deletions(-) diff --git a/arch/cris/mm/tlb.c b/arch/cris/mm/tlb.c index f5a97c9799fd..d31dc25b0070 100644 --- a/arch/cris/mm/tlb.c +++ b/arch/cris/mm/tlb.c @@ -283,7 +283,7 @@ get_mmu_context(struct mm_struct *mm) void switch_mm(struct mm_struct *prev, struct mm_struct *next, - struct task_struct *tsk, int cpu) + struct task_struct *tsk) { /* make sure we have a context */ diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index 5e579ede103c..99167880abbd 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -495,7 +495,7 @@ void __init cpu_init (void) current->active_mm = &init_mm; if (current->mm) BUG(); - enter_lazy_tlb(&init_mm, current, cpu); + enter_lazy_tlb(&init_mm, current); load_esp0(t, thread->esp0); set_tss_desc(cpu,t); diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c index edf4e669ecfc..ce1094414dea 100644 --- a/arch/parisc/kernel/smp.c +++ b/arch/parisc/kernel/smp.c @@ -456,7 +456,7 @@ smp_cpu_init(int cpunum) current->active_mm = &init_mm; if(current->mm) BUG(); - enter_lazy_tlb(&init_mm, current, cpunum); + enter_lazy_tlb(&init_mm, current); init_IRQ(); /* make sure no IRQ's are enabled or pending */ } diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 1c43b3d40787..a4f84f8c6bcc 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -107,7 +107,7 @@ void __devinit cpu_init (void) current->active_mm = &init_mm; if (current->mm) BUG(); - enter_lazy_tlb(&init_mm, current, nr); + enter_lazy_tlb(&init_mm, current); } /* diff --git a/arch/um/kernel/process_kern.c b/arch/um/kernel/process_kern.c index 9f0fba361776..18c53d71658e 100644 --- a/arch/um/kernel/process_kern.c +++ b/arch/um/kernel/process_kern.c @@ -113,8 +113,9 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) } void switch_mm(struct mm_struct *prev, struct mm_struct *next, - struct task_struct *tsk, unsigned cpu) + struct task_struct *tsk) { + unsigned cpu = smp_processor_id(); if (prev != next) clear_bit(cpu, &prev->cpu_vm_mask); set_bit(cpu, &next->cpu_vm_mask); diff --git a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c index 6b71af1d1060..82a75fa6b67e 100644 --- a/arch/x86_64/kernel/setup64.c +++ b/arch/x86_64/kernel/setup64.c @@ -288,7 +288,7 @@ void __init cpu_init (void) me->active_mm = &init_mm; if (me->mm) BUG(); - enter_lazy_tlb(&init_mm, me, cpu); + enter_lazy_tlb(&init_mm, me); set_tss_desc(cpu, t); load_TR_desc(); diff --git a/fs/aio.c b/fs/aio.c index ccbdeede79ff..58983f34337f 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -552,7 +552,7 @@ static void unuse_mm(struct mm_struct *mm) { current->mm = NULL; /* active_mm is still 'mm' */ - enter_lazy_tlb(mm, current, smp_processor_id()); + enter_lazy_tlb(mm, current); } /* Run on kevent's context. FIXME: needs to be per-cpu and warn if an diff --git a/include/asm-alpha/machvec.h b/include/asm-alpha/machvec.h index 6d1b408e3ed5..f09f71909aa6 100644 --- a/include/asm-alpha/machvec.h +++ b/include/asm-alpha/machvec.h @@ -68,7 +68,7 @@ struct alpha_machine_vector int (*mv_is_ioaddr)(unsigned long); void (*mv_switch_mm)(struct mm_struct *, struct mm_struct *, - struct task_struct *, long); + struct task_struct *); void (*mv_activate_mm)(struct mm_struct *, struct mm_struct *); void (*mv_flush_tlb_current)(struct mm_struct *); diff --git a/include/asm-alpha/mmu_context.h b/include/asm-alpha/mmu_context.h index a087254ea38a..3ae6408acaed 100644 --- a/include/asm-alpha/mmu_context.h +++ b/include/asm-alpha/mmu_context.h @@ -130,11 +130,12 @@ __get_new_mm_context(struct mm_struct *mm, long cpu) __EXTERN_INLINE void ev5_switch_mm(struct mm_struct *prev_mm, struct mm_struct *next_mm, - struct task_struct *next, long cpu) + struct task_struct *next) { /* Check if our ASN is of an older version, and thus invalid. */ unsigned long asn; unsigned long mmc; + long cpu = smp_processor_id(); #ifdef CONFIG_SMP cpu_data[cpu].asn_lock = 1; @@ -159,7 +160,7 @@ ev5_switch_mm(struct mm_struct *prev_mm, struct mm_struct *next_mm, __EXTERN_INLINE void ev4_switch_mm(struct mm_struct *prev_mm, struct mm_struct *next_mm, - struct task_struct *next, long cpu) + struct task_struct *next) { /* As described, ASN's are broken for TLB usage. But we can optimize for switching between threads -- if the mm is @@ -174,7 +175,7 @@ ev4_switch_mm(struct mm_struct *prev_mm, struct mm_struct *next_mm, /* Do continue to allocate ASNs, because we can still use them to avoid flushing the icache. */ - ev5_switch_mm(prev_mm, next_mm, next, cpu); + ev5_switch_mm(prev_mm, next_mm, next); } extern void __load_new_mm_context(struct mm_struct *); @@ -212,14 +213,14 @@ ev4_activate_mm(struct mm_struct *prev_mm, struct mm_struct *next_mm) #define deactivate_mm(tsk,mm) do { } while (0) #ifdef CONFIG_ALPHA_GENERIC -# define switch_mm(a,b,c,d) alpha_mv.mv_switch_mm((a),(b),(c),(d)) +# define switch_mm(a,b,c) alpha_mv.mv_switch_mm((a),(b),(c)) # define activate_mm(x,y) alpha_mv.mv_activate_mm((x),(y)) #else # ifdef CONFIG_ALPHA_EV4 -# define switch_mm(a,b,c,d) ev4_switch_mm((a),(b),(c),(d)) +# define switch_mm(a,b,c) ev4_switch_mm((a),(b),(c)) # define activate_mm(x,y) ev4_activate_mm((x),(y)) # else -# define switch_mm(a,b,c,d) ev5_switch_mm((a),(b),(c),(d)) +# define switch_mm(a,b,c) ev5_switch_mm((a),(b),(c)) # define activate_mm(x,y) ev5_activate_mm((x),(y)) # endif #endif @@ -245,7 +246,7 @@ destroy_context(struct mm_struct *mm) } static inline void -enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk, unsigned cpu) +enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { tsk->thread_info->pcb.ptbr = ((unsigned long)mm->pgd - IDENT_ADDR) >> PAGE_SHIFT; diff --git a/include/asm-arm/mmu_context.h b/include/asm-arm/mmu_context.h index 3b3b473c668f..e0340f5fbf32 100644 --- a/include/asm-arm/mmu_context.h +++ b/include/asm-arm/mmu_context.h @@ -28,7 +28,7 @@ * tsk->mm will be NULL */ static inline void -enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk, unsigned cpu) +enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { } @@ -40,7 +40,7 @@ enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk, unsigned cpu) */ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, - struct task_struct *tsk, unsigned int cpu) + struct task_struct *tsk) { if (prev != next) { cpu_switch_mm(next->pgd, next); diff --git a/include/asm-arm26/mmu_context.h b/include/asm-arm26/mmu_context.h index 88b7b4f8f210..1a929bfe5c3a 100644 --- a/include/asm-arm26/mmu_context.h +++ b/include/asm-arm26/mmu_context.h @@ -26,7 +26,7 @@ * tsk->mm will be NULL */ static inline void -enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk, unsigned cpu) +enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { } @@ -36,7 +36,7 @@ enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk, unsigned cpu) */ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, - struct task_struct *tsk, unsigned int cpu) + struct task_struct *tsk) { cpu_switch_mm(next->pgd, next); } diff --git a/include/asm-cris/mmu_context.h b/include/asm-cris/mmu_context.h index 6a6ea71a85cd..f9308c5bbd99 100644 --- a/include/asm-cris/mmu_context.h +++ b/include/asm-cris/mmu_context.h @@ -5,11 +5,11 @@ extern int init_new_context(struct task_struct *tsk, struct mm_struct *mm); extern void get_mmu_context(struct mm_struct *mm); extern void destroy_context(struct mm_struct *mm); extern void switch_mm(struct mm_struct *prev, struct mm_struct *next, - struct task_struct *tsk, int cpu); + struct task_struct *tsk); #define deactivate_mm(tsk,mm) do { } while (0) -#define activate_mm(prev,next) switch_mm((prev),(next),NULL,smp_processor_id()) +#define activate_mm(prev,next) switch_mm((prev),(next),NULL) /* current active pgd - this is similar to other processors pgd * registers like cr3 on the i386 @@ -17,7 +17,7 @@ extern void switch_mm(struct mm_struct *prev, struct mm_struct *next, extern volatile pgd_t *current_pgd; /* defined in arch/cris/mm/fault.c */ -static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk, unsigned cpu) +static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { } diff --git a/include/asm-h8300/mmu_context.h b/include/asm-h8300/mmu_context.h index ffdf723191cf..23b555b7b4b9 100644 --- a/include/asm-h8300/mmu_context.h +++ b/include/asm-h8300/mmu_context.h @@ -6,7 +6,7 @@ #include #include -static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk, unsigned cpu) +static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { } @@ -20,7 +20,7 @@ init_new_context(struct task_struct *tsk, struct mm_struct *mm) #define destroy_context(mm) do { } while(0) #define deactivate_mm(tsk,mm) do { } while(0) -static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk, unsigned cpu) +static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) { } diff --git a/include/asm-i386/mmu_context.h b/include/asm-i386/mmu_context.h index 14b1fa1ebef8..938fc1364344 100644 --- a/include/asm-i386/mmu_context.h +++ b/include/asm-i386/mmu_context.h @@ -14,16 +14,21 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm); void destroy_context(struct mm_struct *mm); -static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk, unsigned cpu) +static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { #ifdef CONFIG_SMP + unsigned cpu = smp_processor_id(); if (cpu_tlbstate[cpu].state == TLBSTATE_OK) cpu_tlbstate[cpu].state = TLBSTATE_LAZY; #endif } -static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk, unsigned cpu) +static inline void switch_mm(struct mm_struct *prev, + struct mm_struct *next, + struct task_struct *tsk) { + int cpu = smp_processor_id(); + if (likely(prev != next)) { /* stop flush ipis for the previous mm */ clear_bit(cpu, &prev->cpu_vm_mask); @@ -62,6 +67,6 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, str asm("movl %0,%%fs ; movl %0,%%gs": :"r" (0)) #define activate_mm(prev, next) \ - switch_mm((prev),(next),NULL,smp_processor_id()) + switch_mm((prev),(next),NULL) #endif diff --git a/include/asm-ia64/mmu_context.h b/include/asm-ia64/mmu_context.h index a614a1dbbb61..dee1cd007f5a 100644 --- a/include/asm-ia64/mmu_context.h +++ b/include/asm-ia64/mmu_context.h @@ -71,7 +71,7 @@ DECLARE_PER_CPU(u8, ia64_need_tlb_flush); extern void wrap_mmu_context (struct mm_struct *mm); static inline void -enter_lazy_tlb (struct mm_struct *mm, struct task_struct *tsk, unsigned cpu) +enter_lazy_tlb (struct mm_struct *mm, struct task_struct *tsk) { } @@ -198,7 +198,7 @@ activate_mm (struct mm_struct *prev, struct mm_struct *next) activate_context(next); } -#define switch_mm(prev_mm,next_mm,next_task,cpu) activate_mm(prev_mm, next_mm) +#define switch_mm(prev_mm,next_mm,next_task) activate_mm(prev_mm, next_mm) # endif /* ! __ASSEMBLY__ */ #endif /* _ASM_IA64_MMU_CONTEXT_H */ diff --git a/include/asm-m68k/mmu_context.h b/include/asm-m68k/mmu_context.h index 04cfa101eb70..4983fb7b6a0c 100644 --- a/include/asm-m68k/mmu_context.h +++ b/include/asm-m68k/mmu_context.h @@ -3,7 +3,7 @@ #include -static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk, unsigned cpu) +static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { } @@ -79,7 +79,7 @@ extern inline void switch_mm_0460(struct mm_struct *mm) asm volatile (".chip 68k"); } -static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk, unsigned cpu) +static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) { if (prev != next) { if (CPU_IS_020_OR_030) @@ -137,7 +137,7 @@ static inline void activate_context(struct mm_struct *mm) sun3_put_context(mm->context); } -static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk, unsigned cpu) +static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) { activate_context(tsk->mm); } diff --git a/include/asm-m68knommu/mmu_context.h b/include/asm-m68knommu/mmu_context.h index a4286176513c..9bc0fd49b8aa 100644 --- a/include/asm-m68knommu/mmu_context.h +++ b/include/asm-m68knommu/mmu_context.h @@ -6,7 +6,7 @@ #include #include -static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk, unsigned cpu) +static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { } @@ -19,7 +19,7 @@ init_new_context(struct task_struct *tsk, struct mm_struct *mm) #define destroy_context(mm) do { } while(0) -static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk, unsigned cpu) +static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) { } diff --git a/include/asm-mips/mmu_context.h b/include/asm-mips/mmu_context.h index cf19cd768f95..8d2153f323ef 100644 --- a/include/asm-mips/mmu_context.h +++ b/include/asm-mips/mmu_context.h @@ -49,7 +49,7 @@ extern unsigned long pgd_current[]; #define cpu_asid(cpu, mm) (cpu_context((cpu), (mm)) & ASID_MASK) #define asid_cache(cpu) (cpu_data[cpu].asid_cache) -static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk, unsigned cpu) +static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { } @@ -92,9 +92,10 @@ init_new_context(struct task_struct *tsk, struct mm_struct *mm) } static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, - struct task_struct *tsk, unsigned cpu) + struct task_struct *tsk) { unsigned long flags; + unsigned cpu = smp_processor_id(); local_irq_save(flags); diff --git a/include/asm-mips64/mmu_context.h b/include/asm-mips64/mmu_context.h index 107e2459ccd0..b7ca8c085452 100644 --- a/include/asm-mips64/mmu_context.h +++ b/include/asm-mips64/mmu_context.h @@ -40,7 +40,7 @@ extern unsigned long pgd_current[]; #define cpu_asid(cpu, mm) (cpu_context((cpu), (mm)) & ASID_MASK) #define asid_cache(cpu) (cpu_data[cpu].asid_cache) -static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk, unsigned cpu) +static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { } diff --git a/include/asm-parisc/mmu_context.h b/include/asm-parisc/mmu_context.h index 1ff9e5f0582c..6327156282f2 100644 --- a/include/asm-parisc/mmu_context.h +++ b/include/asm-parisc/mmu_context.h @@ -6,7 +6,7 @@ #include #include -static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk, unsigned cpu) +static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { } @@ -43,7 +43,7 @@ static inline void load_context(mm_context_t context) #endif } -static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk, unsigned cpu) +static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) { if (prev != next) { @@ -69,6 +69,6 @@ static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next) if (next->context == 0) next->context = alloc_sid(); - switch_mm(prev,next,current,0); + switch_mm(prev,next,current); } #endif diff --git a/include/asm-ppc/mmu_context.h b/include/asm-ppc/mmu_context.h index 131269863309..1e8176b2d122 100644 --- a/include/asm-ppc/mmu_context.h +++ b/include/asm-ppc/mmu_context.h @@ -48,7 +48,7 @@ -- Dan */ -static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk, unsigned cpu) +static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { } @@ -153,7 +153,7 @@ static inline void destroy_context(struct mm_struct *mm) } static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, - struct task_struct *tsk, int cpu) + struct task_struct *tsk) { tsk->thread.pgdir = next->pgd; get_mmu_context(next); diff --git a/include/asm-ppc64/mmu_context.h b/include/asm-ppc64/mmu_context.h index 1014f086e5e7..a84ee5812e9f 100644 --- a/include/asm-ppc64/mmu_context.h +++ b/include/asm-ppc64/mmu_context.h @@ -56,7 +56,7 @@ struct mmu_context_queue_t { extern struct mmu_context_queue_t mmu_context_queue; static inline void -enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk, unsigned cpu) +enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { } @@ -140,10 +140,10 @@ extern void flush_stab(struct task_struct *tsk, struct mm_struct *mm); */ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, - struct task_struct *tsk, int cpu) + struct task_struct *tsk) { flush_stab(tsk, next); - set_bit(cpu, &next->cpu_vm_mask); + set_bit(smp_processor_id(), &next->cpu_vm_mask); } #define deactivate_mm(tsk,mm) do { } while (0) @@ -153,7 +153,7 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next, * the context for the new mm so we see the new mappings. */ #define activate_mm(active_mm, mm) \ - switch_mm(active_mm, mm, current, smp_processor_id()); + switch_mm(active_mm, mm, current); #define VSID_RANDOMIZER 42470972311 #define VSID_MASK 0xfffffffff diff --git a/include/asm-s390/mmu_context.h b/include/asm-s390/mmu_context.h index 87be0aab7028..0e63fd521893 100644 --- a/include/asm-s390/mmu_context.h +++ b/include/asm-s390/mmu_context.h @@ -17,12 +17,12 @@ #define destroy_context(mm) flush_tlb_mm(mm) static inline void enter_lazy_tlb(struct mm_struct *mm, - struct task_struct *tsk, unsigned cpu) + struct task_struct *tsk) { } static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, - struct task_struct *tsk, unsigned cpu) + struct task_struct *tsk) { unsigned long pgd; @@ -42,7 +42,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, : : "m" (pgd) ); #endif /* __s390x__ */ } - set_bit(cpu, &next->cpu_vm_mask); + set_bit(smp_processor_id(), &next->cpu_vm_mask); } #define deactivate_mm(tsk,mm) do { } while (0) @@ -50,7 +50,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, extern inline void activate_mm(struct mm_struct *prev, struct mm_struct *next) { - switch_mm(prev, next, current, smp_processor_id()); + switch_mm(prev, next, current); } #endif diff --git a/include/asm-sh/mmu_context.h b/include/asm-sh/mmu_context.h index 17775400498b..c956ddea556b 100644 --- a/include/asm-sh/mmu_context.h +++ b/include/asm-sh/mmu_context.h @@ -129,7 +129,7 @@ static __inline__ void activate_context(struct mm_struct *mm) (Currently not used) */ static __inline__ void switch_mm(struct mm_struct *prev, struct mm_struct *next, - struct task_struct *tsk, unsigned int cpu) + struct task_struct *tsk) { if (likely(prev != next)) { unsigned long __pgdir = (unsigned long)next->pgd; @@ -144,10 +144,10 @@ static __inline__ void switch_mm(struct mm_struct *prev, #define deactivate_mm(tsk,mm) do { } while (0) #define activate_mm(prev, next) \ - switch_mm((prev),(next),NULL,smp_processor_id()) + switch_mm((prev),(next),NULL) static __inline__ void -enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk, unsigned cpu) +enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { } #else /* !CONFIG_MMU */ @@ -157,10 +157,10 @@ enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk, unsigned cpu) #define set_asid(asid) do { } while (0) #define get_asid() (0) #define activate_context(mm) do { } while (0) -#define switch_mm(prev,next,tsk,cpu) do { } while (0) +#define switch_mm(prev,next,tsk) do { } while (0) #define deactivate_mm(tsk,mm) do { } while (0) #define activate_mm(prev,next) do { } while (0) -#define enter_lazy_tlb(mm,tsk,cpu) do { } while (0) +#define enter_lazy_tlb(mm,tsk) do { } while (0) #endif /* CONFIG_MMU */ #if defined(CONFIG_CPU_SH3) || defined(CONFIG_CPU_SH4) diff --git a/include/asm-sparc/mmu_context.h b/include/asm-sparc/mmu_context.h index f386a8f4bbe8..ed1e01d04d21 100644 --- a/include/asm-sparc/mmu_context.h +++ b/include/asm-sparc/mmu_context.h @@ -5,7 +5,7 @@ #ifndef __ASSEMBLY__ -static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk, unsigned cpu) +static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { } @@ -26,14 +26,14 @@ BTFIXUPDEF_CALL(void, destroy_context, struct mm_struct *) #define destroy_context(mm) BTFIXUP_CALL(destroy_context)(mm) /* Switch the current MM context. */ -BTFIXUPDEF_CALL(void, switch_mm, struct mm_struct *, struct mm_struct *, struct task_struct *, int) +BTFIXUPDEF_CALL(void, switch_mm, struct mm_struct *, struct mm_struct *, struct task_struct *) -#define switch_mm(old_mm, mm, tsk, cpu) BTFIXUP_CALL(switch_mm)(old_mm, mm, tsk, cpu) +#define switch_mm(old_mm, mm, tsk) BTFIXUP_CALL(switch_mm)(old_mm, mm, tsk) #define deactivate_mm(tsk,mm) do { } while (0) /* Activate a new MM instance for the current task. */ -#define activate_mm(active_mm, mm) switch_mm((active_mm), (mm), NULL, smp_processor_id()) +#define activate_mm(active_mm, mm) switch_mm((active_mm), (mm), NULL) #endif /* !(__ASSEMBLY__) */ diff --git a/include/asm-sparc64/mmu_context.h b/include/asm-sparc64/mmu_context.h index ab9eedf22860..292757aa3176 100644 --- a/include/asm-sparc64/mmu_context.h +++ b/include/asm-sparc64/mmu_context.h @@ -27,7 +27,7 @@ #include #include -static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk, unsigned cpu) +static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { } @@ -106,7 +106,7 @@ do { \ extern void __flush_tlb_mm(unsigned long, unsigned long); /* Switch the current MM context. */ -static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, struct task_struct *tsk, int cpu) +static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, struct task_struct *tsk) { unsigned long ctx_valid; @@ -125,7 +125,7 @@ static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, str } { - unsigned long vm_mask = (1UL << cpu); + unsigned long vm_mask = (1UL << smp_processor_id()); /* Even if (mm == old_mm) we _must_ check * the cpu_vm_mask. If we do not we could diff --git a/include/asm-um/mmu_context.h b/include/asm-um/mmu_context.h index 14ca8b2a4628..4ddffc1a7832 100644 --- a/include/asm-um/mmu_context.h +++ b/include/asm-um/mmu_context.h @@ -21,8 +21,10 @@ static inline void activate_mm(struct mm_struct *old, struct mm_struct *new) extern void switch_mm_skas(int mm_fd); static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, - struct task_struct *tsk, unsigned cpu) + struct task_struct *tsk) { + unsigned cpu = smp_processor_id(); + if(prev != next){ clear_bit(cpu, &prev->cpu_vm_mask); set_bit(cpu, &next->cpu_vm_mask); @@ -33,7 +35,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, } static inline void enter_lazy_tlb(struct mm_struct *mm, - struct task_struct *tsk, unsigned cpu) + struct task_struct *tsk) { } diff --git a/include/asm-v850/mmu_context.h b/include/asm-v850/mmu_context.h index 24301a46a92e..f521c8050d3c 100644 --- a/include/asm-v850/mmu_context.h +++ b/include/asm-v850/mmu_context.h @@ -3,9 +3,9 @@ #define destroy_context(mm) ((void)0) #define init_new_context(tsk,mm) 0 -#define switch_mm(prev,next,tsk,cpu) ((void)0) +#define switch_mm(prev,next,tsk) ((void)0) #define deactivate_mm(tsk,mm) do { } while (0) #define activate_mm(prev,next) ((void)0) -#define enter_lazy_tlb(mm,tsk,cpu) ((void)0) +#define enter_lazy_tlb(mm,tsk) ((void)0) #endif /* __V850_MMU_CONTEXT_H__ */ diff --git a/include/asm-x86_64/mmu_context.h b/include/asm-x86_64/mmu_context.h index 41fc9820ee94..8f80f157035e 100644 --- a/include/asm-x86_64/mmu_context.h +++ b/include/asm-x86_64/mmu_context.h @@ -17,20 +17,21 @@ void destroy_context(struct mm_struct *mm); #ifdef CONFIG_SMP -static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk, unsigned cpu) +static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { if (read_pda(mmu_state) == TLBSTATE_OK) write_pda(mmu_state, TLBSTATE_LAZY); } #else -static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk, unsigned cpu) +static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { } #endif static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, - struct task_struct *tsk, unsigned cpu) + struct task_struct *tsk) { + unsigned cpu = smp_processor_id(); if (likely(prev != next)) { /* stop flush ipis for the previous mm */ clear_bit(cpu, &prev->cpu_vm_mask); @@ -68,7 +69,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, } while(0) #define activate_mm(prev, next) \ - switch_mm((prev),(next),NULL,smp_processor_id()) + switch_mm((prev),(next),NULL) #endif diff --git a/kernel/exit.c b/kernel/exit.c index c52fc310cb16..7792bb1268ff 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -443,7 +443,7 @@ static inline void __exit_mm(struct task_struct * tsk) /* more a memory barrier than a real lock */ task_lock(tsk); tsk->mm = NULL; - enter_lazy_tlb(mm, current, smp_processor_id()); + enter_lazy_tlb(mm, current); task_unlock(tsk); mmput(mm); } diff --git a/kernel/sched.c b/kernel/sched.c index a4305315afda..556c5cdbb9c2 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -646,9 +646,9 @@ static inline task_t * context_switch(runqueue_t *rq, task_t *prev, task_t *next if (unlikely(!mm)) { next->active_mm = oldmm; atomic_inc(&oldmm->mm_count); - enter_lazy_tlb(oldmm, next, smp_processor_id()); + enter_lazy_tlb(oldmm, next); } else - switch_mm(oldmm, mm, next, smp_processor_id()); + switch_mm(oldmm, mm, next); if (unlikely(!prev->mm)) { prev->active_mm = NULL; @@ -2528,7 +2528,7 @@ void __init sched_init(void) * The boot idle thread does lazy MMU switching as well: */ atomic_inc(&init_mm.mm_count); - enter_lazy_tlb(&init_mm, current, smp_processor_id()); + enter_lazy_tlb(&init_mm, current); } #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP -- cgit v1.2.3 From 74ad37557e248ac4d6227ea5d7b986003fb6fb63 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Sun, 6 Jul 2003 23:04:09 -0700 Subject: [PATCH] Compile fix and cleanup for macserial driver This adds a declaration that the macserial driver needs in order to compile correctly, and removes some old SERIAL_DO_RESTART junk which isn't used (SERIAL_DO_RESTART is never defined in this driver) and which I think is incorrect anyway, since it looks to me like it would potentially return an ERESTARTSYS error without a signal pending. --- drivers/macintosh/macserial.c | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/drivers/macintosh/macserial.c b/drivers/macintosh/macserial.c index 1c087bfc9343..9f4b7cdd2f68 100644 --- a/drivers/macintosh/macserial.c +++ b/drivers/macintosh/macserial.c @@ -76,6 +76,8 @@ static struct pmu_sleep_notifier serial_sleep_notifier = { in the order we want. */ #define RECOVERY_DELAY eieio() +static struct tty_driver *serial_driver; + struct mac_zschannel zs_channels[NUM_CHANNELS]; struct mac_serial zs_soft[NUM_CHANNELS]; @@ -2093,12 +2095,7 @@ static int block_til_ready(struct tty_struct *tty, struct file * filp, */ if (info->flags & ZILOG_CLOSING) { interruptible_sleep_on(&info->close_wait); -#ifdef SERIAL_DO_RESTART - return ((info->flags & ZILOG_HUP_NOTIFY) ? - -EAGAIN : -ERESTARTSYS); -#else return -EAGAIN; -#endif } /* @@ -2139,14 +2136,7 @@ static int block_til_ready(struct tty_struct *tty, struct file * filp, set_current_state(TASK_INTERRUPTIBLE); if (tty_hung_up_p(filp) || !(info->flags & ZILOG_INITIALIZED)) { -#ifdef SERIAL_DO_RESTART - if (info->flags & ZILOG_HUP_NOTIFY) - retval = -EAGAIN; - else - retval = -ERESTARTSYS; -#else retval = -EAGAIN; -#endif break; } if (!(info->flags & ZILOG_CLOSING) && @@ -2222,12 +2212,7 @@ static int rs_open(struct tty_struct *tty, struct file * filp) (info->flags & ZILOG_CLOSING)) { if (info->flags & ZILOG_CLOSING) interruptible_sleep_on(&info->close_wait); -#ifdef SERIAL_DO_RESTART - return ((info->flags & ZILOG_HUP_NOTIFY) ? - -EAGAIN : -ERESTARTSYS); -#else return -EAGAIN; -#endif } /* -- cgit v1.2.3 From d62f5562b1a1ba6a081700f887876b3e2aaba1de Mon Sep 17 00:00:00 2001 From: Bruno Ducrot Date: Sun, 6 Jul 2003 23:04:55 -0700 Subject: [PATCH] powernow-k7 typo fix Due to a typo in powernow-k7.c, the value which correspond to the CPU core multiplicator and the VID value are swapped when we go down to up in frequency step. --- arch/i386/kernel/cpu/cpufreq/powernow-k7.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k7.c b/arch/i386/kernel/cpu/cpufreq/powernow-k7.c index 91a8a25f635a..36181c7a0c64 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k7.c +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k7.c @@ -268,8 +268,8 @@ static void change_speed (unsigned int index) change_VID(vid); } else { /* Going up, so change VID first */ - change_VID(fid); - change_FID(vid); + change_VID(vid); + change_FID(fid); } -- cgit v1.2.3 From bd824b9fb179ceba690d683a79eeb6f0f0ff5f06 Mon Sep 17 00:00:00 2001 From: Ian Molton Date: Sun, 6 Jul 2003 23:29:28 -0700 Subject: [PATCH] ARM26 architecture update --- MAINTAINERS | 15 ++ arch/arm26/Config.help | 387 ---------------------------------------- arch/arm26/Kconfig | 156 ++-------------- arch/arm26/config.in | 151 ---------------- arch/arm26/kernel/Makefile | 2 +- arch/arm26/kernel/arch.c | 30 ---- arch/arm26/kernel/asm-offsets.c | 2 +- arch/arm26/kernel/compat.c | 2 +- arch/arm26/kernel/dma.c | 2 +- arch/arm26/kernel/ecard.c | 20 +-- arch/arm26/kernel/irq.c | 9 +- arch/arm26/kernel/process.c | 4 +- arch/arm26/kernel/setup.c | 73 +++----- arch/arm26/kernel/traps.c | 7 +- arch/arm26/lib/Makefile | 15 +- arch/arm26/machine/Makefile | 2 +- arch/arm26/machine/arch.c | 36 ---- arch/arm26/machine/irq.c | 4 +- arch/arm26/mm/init.c | 2 +- arch/arm26/mm/mm-memc.c | 2 - include/asm-arm26/arch.h | 62 ------- include/asm-arm26/bug.h | 11 +- include/asm-arm26/bugs.h | 2 +- include/asm-arm26/ecard.h | 3 + include/asm-arm26/mach-types.h | 1 + include/asm-arm26/pgalloc.h | 4 +- include/asm-arm26/pgtable.h | 2 +- include/asm-arm26/statfs.h | 21 +-- include/asm-arm26/thread_info.h | 2 +- 29 files changed, 105 insertions(+), 924 deletions(-) delete mode 100644 arch/arm26/Config.help delete mode 100644 arch/arm26/config.in delete mode 100644 arch/arm26/kernel/arch.c delete mode 100644 arch/arm26/machine/arch.c delete mode 100644 include/asm-arm26/arch.h diff --git a/MAINTAINERS b/MAINTAINERS index c4bb8d3ae788..6590dd21b509 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -245,6 +245,21 @@ M: jschlst@samba.org L: linux-atalk@lists.netspace.org S: Maintained +ARM26 ARCHITECTURE +P: Ian Molton +M: spyro@f2s.com +S: Maintained + +ARM26/ARCHIMEDES +P: Ian Molton +M: spyro@f2s.com +S: Maintained + +ARM26/A5000 +P: John Appleby +M: john@dnsworld.co.uk +S: Maintained + ARM MFM AND FLOPPY DRIVERS P: Dave Gilbert M: linux@treblig.org diff --git a/arch/arm26/Config.help b/arch/arm26/Config.help deleted file mode 100644 index a2a7f13d4eff..000000000000 --- a/arch/arm26/Config.help +++ /dev/null @@ -1,387 +0,0 @@ -CONFIG_ARM - The ARM series is a line of low-power-consumption RISC chip designs - licensed by ARM ltd and targeted at embedded applications. - -CONFIG_IDE - If you say Y here, your kernel will be able to manage low cost mass - storage units such as ATA/(E)IDE and ATAPI units. The most common - cases are IDE hard drives and ATAPI CD-ROM drives. - - If your system is pure SCSI and doesn't use these interfaces, you - can say N here. - - Integrated Disk Electronics (IDE aka ATA-1) is a connecting standard - for mass storage units such as hard disks. It was designed by - Western Digital and Compaq Computer in 1984. It was then named - ST506. Quite a number of disks use the IDE interface. - - AT Attachment (ATA) is the superset of the IDE specifications. - ST506 was also called ATA-1. - - Fast-IDE is ATA-2 (also named Fast ATA), Enhanced IDE (EIDE) is - ATA-3. It provides support for larger disks (up to 8.4GB by means of - the LBA standard), more disks (4 instead of 2) and for other mass - storage units such as tapes and cdrom. UDMA/33 (aka UltraDMA/33) is - ATA-4 and provides faster (and more CPU friendly) transfer modes - than previous PIO (Programmed processor Input/Output) from previous - ATA/IDE standards by means of fast DMA controllers. - - ATA Packet Interface (ATAPI) is a protocol used by EIDE tape and - CD-ROM drives, similar in many respects to the SCSI protocol. - - SMART IDE (Self Monitoring, Analysis and Reporting Technology) was - designed in order to prevent data corruption and disk crash by - detecting pre hardware failure conditions (heat, access time, and - the like...). Disks built since June 1995 may follow this standard. - The kernel itself don't manage this; however there are quite a - number of user programs such as smart that can query the status of - SMART parameters disk. - - If you want to compile this driver as a module ( = code which can be - inserted in and removed from the running kernel whenever you want), - say M here and read . The module - will be called ide.o. - - For further information, please read . - - If unsure, say Y. - -CONFIG_ISA - Find out whether you have ISA slots on your motherboard. ISA is the - name of a bus system, i.e. the way the CPU talks to the other stuff - inside your box. Other bus systems are PCI, EISA, MicroChannel - (MCA) or VESA. ISA is an older system, now being displaced by PCI; - newer boards don't support it. If you have ISA, say Y, otherwise N. - -CONFIG_PREEMPT - This option reduces the latency of the kernel when reacting to - real-time or interactive events by allowing a low priority process to - be preempted even if it is in kernel mode executing a system call. - This allows applications to run more reliably even when the system is - under load. - - Say Y here if you are building a kernel for a desktop, embedded - or real-time system. Say N if you are unsure. - -CONFIG_MCA - MicroChannel Architecture is found in some IBM PS/2 machines and - laptops. It is a bus system similar to PCI or ISA. See - (and especially the web page given - there) before attempting to build an MCA bus kernel. - -CONFIG_EISA - The Extended Industry Standard Architecture (EISA) bus was - developed as an open alternative to the IBM MicroChannel bus. - - The EISA bus provided some of the features of the IBM MicroChannel - bus while maintaining backward compatibility with cards made for - the older ISA bus. The EISA bus saw limited use between 1988 and - 1995 when it was made obsolete by the PCI bus. - - Say Y here if you are building a kernel for an EISA-based machine. - - Otherwise, say N. - -CONFIG_HOTPLUG - Say Y here if you want to plug devices into your computer while - the system is running, and be able to use them quickly. In many - cases, the devices can likewise be unplugged at any time too. - - One well known example of this is PCMCIA- or PC-cards, credit-card - size devices such as network cards, modems or hard drives which are - plugged into slots found on all modern laptop computers. Another - example, used on modern desktops as well as laptops, is USB. - - Enable HOTPLUG and KMOD, and build a modular kernel. Get agent - software (at ) and install it. - Then your kernel will automatically call out to a user mode "policy - agent" (/sbin/hotplug) to load modules and set up software needed - to use devices as you hotplug them. - -CONFIG_KCORE_ELF - If you enabled support for /proc file system then the file - /proc/kcore will contain the kernel core image. This can be used - in gdb: - - $ cd /usr/src/linux ; gdb vmlinux /proc/kcore - - You have two choices here: ELF and A.OUT. Selecting ELF will make - /proc/kcore appear in ELF core format as defined by the Executable - and Linking Format specification. Selecting A.OUT will choose the - old "a.out" format which may be necessary for some old versions - of binutils or on some architectures. - - This is especially useful if you have compiled the kernel with the - "-g" option to preserve debugging information. It is mainly used - for examining kernel data structures on the live kernel so if you - don't understand what this means or are not a kernel hacker, just - leave it at its default value ELF. - -CONFIG_KCORE_AOUT - Not necessary unless you're using a very out-of-date binutils - version. You probably want KCORE_ELF. - -CONFIG_BINFMT_ELF - ELF (Executable and Linkable Format) is a format for libraries and - executables used across different architectures and operating - systems. Saying Y here will enable your kernel to run ELF binaries - and enlarge it by about 13 KB. ELF support under Linux has now all - but replaced the traditional Linux a.out formats (QMAGIC and ZMAGIC) - because it is portable (this does *not* mean that you will be able - to run executables from different architectures or operating systems - however) and makes building run-time libraries very easy. Many new - executables are distributed solely in ELF format. You definitely - want to say Y here. - - Information about ELF is contained in the ELF HOWTO available from - . - - If you find that after upgrading from Linux kernel 1.2 and saying Y - here, you still can't run any ELF binaries (they just crash), then - you'll have to install the newest ELF runtime libraries, including - ld.so (check the file for location and - latest version). - - If you want to compile this as a module ( = code which can be - inserted in and removed from the running kernel whenever you want), - say M here and read . The module - will be called binfmt_elf.o. Saying M or N here is dangerous because - some crucial programs on your system might be in ELF format. - -CONFIG_BINFMT_AOUT - A.out (Assembler.OUTput) is a set of formats for libraries and - executables used in the earliest versions of UNIX. Linux used the - a.out formats QMAGIC and ZMAGIC until they were replaced with the - ELF format. - - As more and more programs are converted to ELF, the use for a.out - will gradually diminish. If you disable this option it will reduce - your kernel by one page. This is not much and by itself does not - warrant removing support. However its removal is a good idea if you - wish to ensure that absolutely none of your programs will use this - older executable format. If you don't know what to answer at this - point then answer Y. If someone told you "You need a kernel with - QMAGIC support" then you'll have to say Y here. You may answer M to - compile a.out support as a module and later load the module when you - want to use a program or library in a.out format. The module will be - called binfmt_aout.o. Saying M or N here is dangerous though, - because some crucial programs on your system might still be in A.OUT - format. - -CONFIG_BINFMT_MISC - If you say Y here, it will be possible to plug wrapper-driven binary - formats into the kernel. You will like this especially when you use - programs that need an interpreter to run like Java, Python or - Emacs-Lisp. It's also useful if you often run DOS executables under - the Linux DOS emulator DOSEMU (read the DOSEMU-HOWTO, available from - ). Once you have - registered such a binary class with the kernel, you can start one of - those programs simply by typing in its name at a shell prompt; Linux - will automatically feed it to the correct interpreter. - - You can do other nice things, too. Read the file - to learn how to use this - feature, and for information about how - to include Java support. - - You must say Y to "/proc file system support" (CONFIG_PROC_FS) to - use this part of the kernel. - - You may say M here for module support and later load the module when - you have use for it; the module is called binfmt_misc.o. If you - don't know what to answer at this point, say Y. - -CONFIG_SCSI - If you want to use a SCSI hard disk, SCSI tape drive, SCSI CD-ROM or - any other SCSI device under Linux, say Y and make sure that you know - the name of your SCSI host adapter (the card inside your computer - that "speaks" the SCSI protocol, also called SCSI controller), - because you will be asked for it. - - You also need to say Y here if you want support for the parallel - port version of the 100 MB IOMEGA ZIP drive. - - This driver is also available as a module ( = code which can be - inserted in and removed from the running kernel whenever you want). - The module will be called scsi_mod.o. If you want to compile it as - a module, say M here and read and - . However, do not compile this as a - module if your root file system (the one containing the directory /) - is located on a SCSI device. - -CONFIG_NETDEVICES - You can say N here if you don't intend to connect your Linux box to - any other computer at all or if all your connections will be over a - telephone line with a modem either via UUCP (UUCP is a protocol to - forward mail and news between unix hosts over telephone lines; read - the UUCP-HOWTO, available from - ) or dialing up a shell - account or a BBS, even using term (term is a program which gives you - almost full Internet connectivity if you have a regular dial up - shell account on some Internet connected Unix computer. Read - ). - - You'll have to say Y if your computer contains a network card that - you want to use under Linux (make sure you know its name because you - will be asked for it and read the Ethernet-HOWTO (especially if you - plan to use more than one network card under Linux)) or if you want - to use SLIP (Serial Line Internet Protocol is the protocol used to - send Internet traffic over telephone lines or null modem cables) or - CSLIP (compressed SLIP) or PPP (Point to Point Protocol, a better - and newer replacement for SLIP) or PLIP (Parallel Line Internet - Protocol is mainly used to create a mini network by connecting the - parallel ports of two local machines) or AX.25/KISS (protocol for - sending Internet traffic over amateur radio links). - - Make sure to read the NET-3-HOWTO. Eventually, you will have to read - Olaf Kirch's excellent and free book "Network Administrator's - Guide", to be found in . If - unsure, say Y. - -CONFIG_MAGIC_SYSRQ - If you say Y here, you will have some control over the system even - if the system crashes for example during kernel debugging (e.g., you - will be able to flush the buffer cache to disk, reboot the system - immediately or dump some status information). This is accomplished - by pressing various keys while holding SysRq (Alt+PrintScreen). It - also works on a serial console (on PC hardware at least), if you - send a BREAK and then within 5 seconds a command keypress. The - keys are documented in . Don't say Y - unless you really know what this hack does. - -CONFIG_ARCH_ARCA5K - This selects support for 'ARM26' CPUs (ARM 2 and 3) - -CONFIG_ARCH_A5K - Say Y here to to support the Acorn A5000. Linux can support the - internal IDE disk and CD-ROM interface, serial and parallel port, - and the floppy drive. Note that on some A5000s the floppy is - plugged into the wrong socket on the motherboard. - -CONFIG_ARCH_ARC - The Acorn Archimedes was an personal computer based on an 8MHz ARM2 - processor, released in 1987. It supported 512K of RAM and 2 800K - floppy disks. Picture and more detailed specifications at - . - -CONFIG_PAGESIZE_16 - Say Y here if your Archimedes or A5000 system has only 2MB of - memory, otherwise say N. The resulting kernel will not run on a - machine with 4MB of memory. - -CONFIG_FPE_NWFPE - Say Y to include the NWFPE floating point emulator in the kernel. - This is necessary to run most binaries. Linux does not currently - support floating point hardware so you need to say Y here even if - your machine has an FPA or floating point co-processor podule. - - It is also possible to say M to build the emulator as a module - (nwfpe.o) or indeed to leave it out altogether. However, unless you - know what you are doing this can easily render your machine - unbootable. Saying Y is the safe option. - - You may say N here if you are going to load the Acorn FPEmulator - early in the bootup. - -CONFIG_FPE_FASTFPE - Say Y here to include the FAST floating point emulator in the kernel. - This is an experimental much faster emulator which now also has full - precision for the mantissa. It does not support any exceptions. - It is very simple, and approximately 3-6 times faster than NWFPE. - - It should be sufficient for most programs. It may be not suitable - for scientific calculations, but you have to check this for yourself. - If you do not feel you need a faster FP emulation you should better - choose NWFPE. - - It is also possible to say M to build the emulator as a module - (fastfpe.o). But keep in mind that you should only load the FP - emulator early in the bootup. You should never change from NWFPE to - FASTFPE or vice versa in an active system! - -CONFIG_DEBUG_ERRORS - This option controls verbose debugging information which can be - printed when the kernel detects an internal error. This debugging - information is useful to kernel hackers when tracking down problems, - but mostly meaningless to other people. It's safe to say Y unless - you are concerned with the code size or don't want to see these - messages. - -CONFIG_NO_FRAME_POINTER - If you say Y here, the resulting kernel will be slightly smaller and - faster. However, when a problem occurs with the kernel, the - information that is reported is severely limited. Most people - should say N here. - -CONFIG_DEBUG_USER - When a user program crashes due to an exception, the kernel can - print a brief message explaining what the problem was. This is - sometimes helpful for debugging but serves no purpose on a - production system. Most people should say N here. - -CONFIG_DEBUG_INFO - Say Y here to include source-level debugging information in the - `vmlinux' binary image. This is handy if you want to use gdb or - addr2line to debug the kernel. It has no impact on the in-memory - footprint of the running kernel but it can increase the amount of - time and disk space needed for compilation of the kernel. If in - doubt say N. - -CONFIG_DEBUG_LL - Say Y here to include definitions of printascii, printchar, printhex - in the kernel. This is helpful if you are debugging code that - executes before the console is initialized. - -CONFIG_NO_PGT_CACHE - Normally the kernel maintains a `quicklist' of preallocated - pagetable structures in order to increase performance. On machines - with very few pages this may however be a loss. Say Y here to - disable the pgtable cache. - -CONFIG_ARTHUR - Say Y here to include the kernel code necessary if you want to run - Acorn RISC OS/Arthur binaries under Linux. This code is still very - experimental; if this sounds frightening, say N and sleep in peace. - You can also say M here to compile this support as a module (which - will be called arthur.o). - -CONFIG_CMDLINE - On some architectures (EBSA110 and CATS), there is currently no way - for the boot loader to pass arguments to the kernel. For these - architectures, you should supply some command-line options at build - time by entering them here. As a minimum, you should specify the - memory size and the root device (e.g., mem=64M root=/dev/nfs). - -CONFIG_DEBUG_KERNEL - Say Y here if you are developing drivers or trying to debug and - identify kernel problems. - -CONFIG_DEBUG_SLAB - Say Y here to have the kernel do limited verification on memory - allocation as well as poisoning memory on free to catch use of freed - memory. - -CONFIG_DEBUG_SPINLOCK - Say Y here and build SMP to catch missing spinlock initialization - and certain other kinds of spinlock errors commonly made. This is - best used in conjunction with the NMI watchdog so that spinlock - deadlocks are also debuggable. - -CONFIG_DEBUG_BUGVERBOSE - Say Y here to make BUG() panics output the file name and line number - of the BUG call as well as the EIP and oops trace. This aids - debugging but costs about 70-100K of memory. - -CONFIG_ZBOOT_ROM - Say Y here if you intend to execute your compressed kernel image (zImage) - directly from ROM or flash. If unsure, say N. - -CONFIG_ZBOOT_ROM_TEXT - The base address for zImage. Unless you have special requirements, you - should not change this value. - -CONFIG_ZBOOT_ROM_BSS - The base address of 64KiB of read/write memory, which must be available - while the decompressor is running. Unless you have special requirements, - you should not change this value. - diff --git a/arch/arm26/Kconfig b/arch/arm26/Kconfig index b62382271dad..31d2e5de2c32 100644 --- a/arch/arm26/Kconfig +++ b/arch/arm26/Kconfig @@ -8,15 +8,8 @@ mainmenu "Linux Kernel Configuration" config ARM bool default y - help - The ARM series is a line of low-power-consumption RISC chip designs - licensed by ARM ltd and targeted at embedded applications and - handhelds such as the Compaq IPAQ. ARM-based PCs are no longer - manufactured, but legacy ARM-based PC hardware remains popular in - Europe. There is an ARM Linux project with a web page at - . - -config ARCH_ARCA5K + +config ARM26 bool default y @@ -67,25 +60,25 @@ comment "Archimedes/A5000 Implementations (select only ONE)" config ARCH_ARC bool "Archimedes" - depends on ARCH_ARCA5K help - The Acorn Archimedes was an personal computer based on an 8K ARM2 - processor, released in 1987. It supported 512K of RAM and 2 800K - floppy disks. Picture and more detailed specifications at - . + Say Y to support the Acorn Archimedes. + + The Acorn Archimedes was an personal computer based on an 8MHz ARM2 + processor, released in 1987. It supported up to 16MB of RAM in + later models and floppy, harddisc, ethernet etc. config ARCH_A5K bool "A5000" - depends on ARCH_ARCA5K help - Say Y here to to support the Acorn A5000. Linux can support the + Say Y here to to support the Acorn A5000. + + Linux can support the internal IDE disk and CD-ROM interface, serial and parallel port, and the floppy drive. Note that on some A5000s the floppy is plugged into the wrong socket on the motherboard. config PAGESIZE_16 - bool "2MB physical memory" - depends on ARCH_ARCA5K + bool "2MB physical memory (broken)" help Say Y here if your Archimedes or A5000 system has only 2MB of memory, otherwise say N. The resulting kernel will not run on a @@ -186,81 +179,7 @@ config KCORE_AOUT endchoice -config BINFMT_AOUT - tristate "Kernel support for a.out binaries" - ---help--- - A.out (Assembler.OUTput) is a set of formats for libraries and - executables used in the earliest versions of UNIX. Linux used the - a.out formats QMAGIC and ZMAGIC until they were replaced with the - ELF format. - - As more and more programs are converted to ELF, the use for a.out - will gradually diminish. If you disable this option it will reduce - your kernel by one page. This is not much and by itself does not - warrant removing support. However its removal is a good idea if you - wish to ensure that absolutely none of your programs will use this - older executable format. If you don't know what to answer at this - point then answer Y. If someone told you "You need a kernel with - QMAGIC support" then you'll have to say Y here. You may answer M to - compile a.out support as a module and later load the module when you - want to use a program or library in a.out format. The module will be - called binfmt_aout. Saying M or N here is dangerous though, - because some crucial programs on your system might still be in A.OUT - format. - -config BINFMT_ELF - tristate "Kernel support for ELF binaries" - ---help--- - ELF (Executable and Linkable Format) is a format for libraries and - executables used across different architectures and operating - systems. Saying Y here will enable your kernel to run ELF binaries - and enlarge it by about 13 KB. ELF support under Linux has now all - but replaced the traditional Linux a.out formats (QMAGIC and ZMAGIC) - because it is portable (this does *not* mean that you will be able - to run executables from different architectures or operating systems - however) and makes building run-time libraries very easy. Many new - executables are distributed solely in ELF format. You definitely - want to say Y here. - - Information about ELF is contained in the ELF HOWTO available from - . - - If you find that after upgrading from Linux kernel 1.2 and saying Y - here, you still can't run any ELF binaries (they just crash), then - you'll have to install the newest ELF runtime libraries, including - ld.so (check the file for location and - latest version). - - If you want to compile this as a module ( = code which can be - inserted in and removed from the running kernel whenever you want), - say M here and read . The module - will be called binfmt_elf. Saying M or N here is dangerous because - some crucial programs on your system might be in ELF format. - -config BINFMT_MISC - tristate "Kernel support for MISC binaries" - ---help--- - If you say Y here, it will be possible to plug wrapper-driven binary - formats into the kernel. You will like this especially when you use - programs that need an interpreter to run like Java, Python or - Emacs-Lisp. It's also useful if you often run DOS executables under - the Linux DOS emulator DOSEMU (read the DOSEMU-HOWTO, available from - ). Once you have - registered such a binary class with the kernel, you can start one of - those programs simply by typing in its name at a shell prompt; Linux - will automatically feed it to the correct interpreter. - - You can do other nice things, too. Read the file - to learn how to use this - feature, and for information about how - to include Java support. - - You must say Y to "/proc file system support" (CONFIG_PROC_FS) to - use this part of the kernel. - - You may say M here for module support and later load the module when - you have use for it; the module is called binfmt_misc. If you - don't know what to answer at this point, say Y. +source "fs/Kconfig.binfmt" config PREEMPT bool "Preemptible Kernel (EXPERIMENTAL)" @@ -311,59 +230,8 @@ source "net/Kconfig" source "net/irda/Kconfig" -menu "ATA/ATAPI/MFM/RLL support" - -config IDE - tristate "ATA/ATAPI/MFM/RLL support" - ---help--- - If you say Y here, your kernel will be able to manage low cost mass - storage units such as ATA/(E)IDE and ATAPI units. The most common - cases are IDE hard drives and ATAPI CD-ROM drives. - - If your system is pure SCSI and doesn't use these interfaces, you - can say N here. - - Integrated Disk Electronics (IDE aka ATA-1) is a connecting standard - for mass storage units such as hard disks. It was designed by - Western Digital and Compaq Computer in 1984. It was then named - ST506. Quite a number of disks use the IDE interface. - - AT Attachment (ATA) is the superset of the IDE specifications. - ST506 was also called ATA-1. - - Fast-IDE is ATA-2 (also named Fast ATA), Enhanced IDE (EIDE) is - ATA-3. It provides support for larger disks (up to 8.4GB by means of - the LBA standard), more disks (4 instead of 2) and for other mass - storage units such as tapes and cdrom. UDMA/33 (aka UltraDMA/33) is - ATA-4 and provides faster (and more CPU friendly) transfer modes - than previous PIO (Programmed processor Input/Output) from previous - ATA/IDE standards by means of fast DMA controllers. - - ATA Packet Interface (ATAPI) is a protocol used by EIDE tape and - CD-ROM drives, similar in many respects to the SCSI protocol. - - SMART IDE (Self Monitoring, Analysis and Reporting Technology) was - designed in order to prevent data corruption and disk crash by - detecting pre hardware failure conditions (heat, access time, and - the like...). Disks built since June 1995 may follow this standard. - The kernel itself don't manage this; however there are quite a - number of user programs such as smart that can query the status of - SMART parameters disk. - - If you want to compile this driver as a module ( = code which can be - inserted in and removed from the running kernel whenever you want), - say M here and read . The module - will be called ide. - - For further information, please read . - - If unsure, say Y. - source "drivers/ide/Kconfig" -endmenu - - source "drivers/scsi/Kconfig" source "drivers/isdn/Kconfig" diff --git a/arch/arm26/config.in b/arch/arm26/config.in deleted file mode 100644 index 006a435d7591..000000000000 --- a/arch/arm26/config.in +++ /dev/null @@ -1,151 +0,0 @@ -# -# For a description of the syntax of this configuration file, -# see Documentation/kbuild/config-language.txt. -# -mainmenu_name "Linux Kernel Configuration" - -define_bool CONFIG_ARM y -define_bool CONFIG_EISA n -define_bool CONFIG_SBUS n -define_bool CONFIG_MCA n -define_bool CONFIG_UID16 y -define_bool CONFIG_RWSEM_GENERIC_SPINLOCK y -define_bool CONFIG_RWSEM_XCHGADD_ALGORITHM n -define_bool CONFIG_GENERIC_BUST_SPINLOCK n -define_bool CONFIG_GENERIC_ISA_DMA n - -source init/Config.in - -mainmenu_option next_comment -comment 'System Type' - -define_bool CONFIG_ARCH_ARCA5K -bool ' Archimedes' CONFIG_ARCH_ARC -bool ' A5000' CONFIG_ARCH_A5K - -# Definitions to make life easier -define_bool CONFIG_ARCH_ACORN y -define_bool CONFIG_CPU_32 n -define_bool CONFIG_CPU_26 y -bool '2MB physical memory' CONFIG_PAGESIZE_16 - -endmenu - -mainmenu_option next_comment -comment 'General setup' - -define_bool CONFIG_FIQ y - -# Compressed boot loader in ROM. Yes, we really want to ask about -# TEXT and BSS so we preserve their values in the config files. -bool 'Compressed boot loader in ROM/flash' CONFIG_ZBOOT_ROM -hex 'Compressed ROM boot loader base address' CONFIG_ZBOOT_ROM_TEXT 0 -hex 'Compressed ROM boot loader BSS address' CONFIG_ZBOOT_ROM_BSS 0 - -comment 'At least one math emulation must be selected' -define_bool CONFIG_FPE_NWFPE y -choice 'Kernel core (/proc/kcore) format' \ - "ELF CONFIG_KCORE_ELF \ - A.OUT CONFIG_KCORE_AOUT" ELF -tristate 'Kernel support for a.out binaries' CONFIG_BINFMT_AOUT -tristate 'Kernel support for ELF binaries' CONFIG_BINFMT_ELF -tristate 'Kernel support for MISC binaries' CONFIG_BINFMT_MISC -string 'Default kernel command string' CONFIG_CMDLINE "" - -define_bool CONFIG_ALIGNMENT_TRAP n -endmenu - -source drivers/parport/Config.in -source drivers/pnp/Config.in -source drivers/block/Config.in -source drivers/md/Config.in -source drivers/acorn/block/Config.in - -if [ "$CONFIG_NET" = "y" ]; then - source net/Config.in - - mainmenu_option next_comment - comment 'Network device support' - - bool 'Network device support' CONFIG_NETDEVICES - if [ "$CONFIG_NETDEVICES" = "y" ]; then - source drivers/net/Config.in - fi - endmenu -fi - -mainmenu_option next_comment -comment 'ATA/ATAPI/MFM/RLL support' - -tristate 'ATA/ATAPI/MFM/RLL support' CONFIG_IDE - -if [ "$CONFIG_IDE" != "n" ]; then - source drivers/ide/Config.in -else - define_bool CONFIG_BLK_DEV_HD n -fi -endmenu - -mainmenu_option next_comment -comment 'SCSI support' - -tristate 'SCSI support' CONFIG_SCSI -endmenu - -source drivers/isdn/Config.in - -# -# input before char - char/joystick depends on it. As does USB. -# -source drivers/input/Config.in - -source drivers/char/Config.in -if [ "$CONFIG_BUSMOUSE" = "y" ]; then - define_bool CONFIG_KBDMOUSE y -fi - -source drivers/media/Config.in - -source fs/Config.in - -if [ "$CONFIG_VT" = "y" ]; then - mainmenu_option next_comment - comment 'Console drivers' - source drivers/video/Config.in - endmenu -fi - -mainmenu_option next_comment -comment 'Sound' - -tristate 'Sound card support' CONFIG_SOUND -if [ "$CONFIG_SOUND" != "n" ]; then - source sound/Config.in -fi -endmenu - -source drivers/misc/Config.in -source drivers/usb/Config.in - -mainmenu_option next_comment -comment 'Kernel hacking' - -# Always compile kernel with framepointer (until 2.4 real comes out) -# Bug reports aren't much use without this. -bool 'Compile kernel without frame pointer' CONFIG_NO_FRAME_POINTER -bool 'Verbose user fault messages' CONFIG_DEBUG_USER -bool 'Include debugging information in kernel binary' CONFIG_DEBUG_INFO - -bool 'Kernel debugging' CONFIG_DEBUG_KERNEL -dep_bool ' Debug memory allocations' CONFIG_DEBUG_SLAB $CONFIG_DEBUG_KERNEL -dep_bool ' Magic SysRq key' CONFIG_MAGIC_SYSRQ $CONFIG_DEBUG_KERNEL -dep_bool ' Spinlock debugging' CONFIG_DEBUG_SPINLOCK $CONFIG_DEBUG_KERNEL -dep_bool ' Wait queue debugging' CONFIG_DEBUG_WAITQ $CONFIG_DEBUG_KERNEL -dep_bool ' Verbose BUG() reporting (adds 70K)' CONFIG_DEBUG_BUGVERBOSE $CONFIG_DEBUG_KERNEL -dep_bool ' Verbose kernel error messages' CONFIG_DEBUG_ERRORS $CONFIG_DEBUG_KERNEL -# These options are only for real kernel hackers who want to get their hands dirty. -dep_bool ' Kernel low-level debugging functions' CONFIG_DEBUG_LL $CONFIG_DEBUG_KERNEL -endmenu - -source security/Config.in -source lib/Config.in diff --git a/arch/arm26/kernel/Makefile b/arch/arm26/kernel/Makefile index 01b44b589dc8..59a75ea9a8dd 100644 --- a/arch/arm26/kernel/Makefile +++ b/arch/arm26/kernel/Makefile @@ -6,7 +6,7 @@ ENTRY_OBJ = entry.o # Object file lists. -obj-y := arch.o compat.o dma.o entry.o irq.o \ +obj-y := compat.o dma.o entry.o irq.o \ process.o ptrace.o semaphore.o setup.o signal.o sys_arm.o \ time.o traps.o ecard.o time-acorn.o dma.o \ ecard.o fiq.o time.o diff --git a/arch/arm26/kernel/arch.c b/arch/arm26/kernel/arch.c deleted file mode 100644 index 186de4fd6be3..000000000000 --- a/arch/arm26/kernel/arch.c +++ /dev/null @@ -1,30 +0,0 @@ -/* - * linux/arch/arm/kernel/arch.c - * - * Architecture specific fixups. - */ -#include -#include -#include - -#include -#include -#include -#include -#include - -unsigned int vram_size; - - -unsigned int memc_ctrl_reg; -unsigned int number_mfm_drives; - -static int __init parse_tag_acorn(const struct tag *tag) -{ - memc_ctrl_reg = tag->u.acorn.memc_control_reg; - number_mfm_drives = tag->u.acorn.adfsdrives; - return 0; -} - -__tagtable(ATAG_ACORN, parse_tag_acorn); - diff --git a/arch/arm26/kernel/asm-offsets.c b/arch/arm26/kernel/asm-offsets.c index 1e8764884737..b8d0442de9b5 100644 --- a/arch/arm26/kernel/asm-offsets.c +++ b/arch/arm26/kernel/asm-offsets.c @@ -1,7 +1,7 @@ /* * Copyright (C) 1995-2001 Russell King * 2001-2002 Keith Owens - * 2003-? Ian Molton + * 2003 Ian Molton * * Generate definitions needed by assembly language modules. * This code generates raw asm output which is post-processed to extract diff --git a/arch/arm26/kernel/compat.c b/arch/arm26/kernel/compat.c index 0f8644fe6a86..e3717ab1c043 100644 --- a/arch/arm26/kernel/compat.c +++ b/arch/arm26/kernel/compat.c @@ -26,7 +26,7 @@ #include #include -#include +//#include //#include /* diff --git a/arch/arm26/kernel/dma.c b/arch/arm26/kernel/dma.c index a6ae4e23651d..b4b8c7c4bbe8 100644 --- a/arch/arm26/kernel/dma.c +++ b/arch/arm26/kernel/dma.c @@ -2,7 +2,7 @@ * linux/arch/arm/kernel/dma.c * * Copyright (C) 1995-2000 Russell King - * 2003-? Ian Molton + * 2003 Ian Molton * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/arch/arm26/kernel/ecard.c b/arch/arm26/kernel/ecard.c index d69497a92811..b57a2e6b87eb 100644 --- a/arch/arm26/kernel/ecard.c +++ b/arch/arm26/kernel/ecard.c @@ -11,9 +11,8 @@ * Find all installed expansion cards, and handle interrupts from them. * * Created from information from Acorns RiscOS3 PRMs - * - * 08-Dec-1996 RMK Added code for the 9'th expansion card - the ether - * podule slot. + * 15-Jun-2003 IM Modified from ARM32 (RiscPC capable) version + * 10-Jan-1999 RMK Run loaders in a simulated RISC OS environment. * 06-May-1997 RMK Added blacklist for cards whose loader doesn't work. * 12-Sep-1997 RMK Created new handling of interrupt enables/disables * - cards can now register their own routine to control @@ -21,10 +20,7 @@ * 29-Sep-1997 RMK Expansion card interrupt hardware not being re-enabled * on reset from Linux. (Caused cards not to respond * under RiscOS without hard reset). - * 15-Feb-1998 RMK Added DMA support - * 12-Sep-1998 RMK Added EASI support - * 10-Jan-1999 RMK Run loaders in a simulated RISC OS environment. - * 17-Apr-1999 RMK Support for EASI Type C cycles. + * */ #define ECARD_C @@ -527,8 +523,7 @@ static int ecard_prints(char *buffer, ecard_t *ec) { char *start = buffer; - buffer += sprintf(buffer, " %d: %s ", ec->slot_no, - ec->type == ECARD_EASI ? "EASI" : " "); + buffer += sprintf(buffer, " %d: ", ec->slot_no); if (ec->cid.id == 0) { struct in_chunk_dir incd; @@ -674,7 +669,7 @@ ecard_probe(int slot, card_type_t type) memset(ec, 0, sizeof(ecard_t)); ec->slot_no = slot; - ec->type = type; + ec->type = type; ec->irq = NO_IRQ; ec->fiq = NO_IRQ; ec->dma = NO_DMA; @@ -770,9 +765,8 @@ static int __init ecard_init(void) printk("Probing expansion cards\n"); - for (slot = 0; slot < 8; slot ++) { - if (ecard_probe(slot, ECARD_EASI) == -ENODEV) - ecard_probe(slot, ECARD_IOC); + for (slot = 0; slot < 4; slot ++) { + ecard_probe(slot, ECARD_IOC); } irqhw = ecard_probeirqhw(); diff --git a/arch/arm26/kernel/irq.c b/arch/arm26/kernel/irq.c index 3b4965a28193..ddd2ea591afd 100644 --- a/arch/arm26/kernel/irq.c +++ b/arch/arm26/kernel/irq.c @@ -41,7 +41,7 @@ * not be set too low to prevent false triggering. Conversely, if it * is set too high, then you could miss a stuck IRQ. * - * Maybe we ought to set a timer and re-enable the IRQ at a later time? + * FIXME Maybe we ought to set a timer and re-enable the IRQ at a later time? */ #define MAX_IRQ_CNT 100000 @@ -49,7 +49,6 @@ static volatile unsigned long irq_err_count; static spinlock_t irq_controller_lock = SPIN_LOCK_UNLOCKED; struct irqdesc irq_desc[NR_IRQS]; -void (*init_arch_irq)(void) __initdata = NULL; /* * Dummy mask/unmask handler @@ -88,7 +87,6 @@ void disable_irq(unsigned int irq) { struct irqdesc *desc = irq_desc + irq; unsigned long flags; - spin_lock_irqsave(&irq_controller_lock, flags); if (!desc->depth++) desc->enabled = 0; @@ -114,7 +112,7 @@ void enable_irq(unsigned int irq) spin_lock_irqsave(&irq_controller_lock, flags); if (unlikely(!desc->depth)) { printk("enable_irq(%u) unbalanced from %p\n", irq, - __builtin_return_address(0)); + __builtin_return_address(0)); //FIXME bum addresses reported - why? } else if (!--desc->depth) { desc->probing = 0; desc->enabled = 1; @@ -187,7 +185,6 @@ __do_irq(unsigned int irq, struct irqaction *action, struct pt_regs *regs) unsigned int status; spin_unlock(&irq_controller_lock); - if (!(action->flags & SA_INTERRUPT)) local_irq_enable(); @@ -700,6 +697,6 @@ void __init init_IRQ(void) for (irq = 0, desc = irq_desc; irq < NR_IRQS; irq++, desc++) *desc = bad_irq_desc; - init_arch_irq(); + arc_init_irq(); init_dma(); } diff --git a/arch/arm26/kernel/process.c b/arch/arm26/kernel/process.c index 6a4283a30199..bf8e0367be88 100644 --- a/arch/arm26/kernel/process.c +++ b/arch/arm26/kernel/process.c @@ -227,7 +227,9 @@ extern void free_page_8k(unsigned long page); #define ll_alloc_task_struct() ((struct thread_info *)get_page_8k(GFP_KERNEL)) #define ll_free_task_struct(p) free_page_8k((unsigned long)(p)) -struct thread_info *alloc_thread_info(void) +//FIXME - do we use *task param below looks like we dont, which is ok? +//FIXME - if EXTRA_TASK_STRUCT is zero we can optimise the below away permanently. *IF* its supposed to be zero. +struct thread_info *alloc_thread_info(struct task_struct *task) { struct thread_info *thread = NULL; diff --git a/arch/arm26/kernel/setup.c b/arch/arm26/kernel/setup.c index f4523c16ae0f..a5e07d757be7 100644 --- a/arch/arm26/kernel/setup.c +++ b/arch/arm26/kernel/setup.c @@ -30,7 +30,6 @@ #include #include -#include #include #ifndef MEM_SIZE @@ -66,6 +65,8 @@ unsigned int system_rev; unsigned int system_serial_low; unsigned int system_serial_high; unsigned int elf_hwcap; +unsigned int memc_ctrl_reg; +unsigned int number_mfm_drives; struct processor processor; @@ -147,33 +148,6 @@ static void __init setup_processor(void) cpu_proc_init(); } -static struct machine_desc * __init setup_machine(unsigned int nr) -{ - extern struct machine_desc __arch_info_begin, __arch_info_end; - struct machine_desc *list; - - /* - * locate architecture in the list of supported architectures. - */ - for (list = &__arch_info_begin; list < &__arch_info_end; list++) - if (list->nr == nr) - break; - - /* - * If the architecture type is not recognised, then we - * can co nothing... - */ - if (list >= &__arch_info_end) { - printk("Architecture configuration botched (nr %d), unable " - "to continue.\n", nr); - while (1); - } - - printk("Machine: %s\n", list->name); - - return list; -} - /* * Initial parsing of the command line. We need to pick out the * memory size. We look for mem=size@start, where start and size @@ -239,7 +213,7 @@ setup_ramdisk(int doload, int prompt, int image_start, unsigned int rd_sz) } static void __init -request_standard_resources(struct meminfo *mi, struct machine_desc *mdesc) +request_standard_resources(struct meminfo *mi) { struct resource *res; int i; @@ -274,22 +248,18 @@ request_standard_resources(struct meminfo *mi, struct machine_desc *mdesc) request_resource(res, &kernel_data); } - if (mdesc->video_start) { +/* FIXME - needed? if (mdesc->video_start) { video_ram.start = mdesc->video_start; video_ram.end = mdesc->video_end; request_resource(&iomem_resource, &video_ram); - } + }*/ /* * Some machines don't have the possibility of ever - * possessing lp0, lp1 or lp2 + * possessing lp1 or lp2 */ - if (mdesc->reserve_lp0) + if (0) /* FIXME - need to do this for A5k at least */ request_resource(&ioport_resource, &lp0); - if (mdesc->reserve_lp1) - request_resource(&ioport_resource, &lp1); - if (mdesc->reserve_lp2) - request_resource(&ioport_resource, &lp2); } /* @@ -359,6 +329,15 @@ static int __init parse_tag_videotext(const struct tag *tag) __tagtable(ATAG_VIDEOTEXT, parse_tag_videotext); #endif +static int __init parse_tag_acorn(const struct tag *tag) +{ + memc_ctrl_reg = tag->u.acorn.memc_control_reg; + number_mfm_drives = tag->u.acorn.adfsdrives; + return 0; +} + +__tagtable(ATAG_ACORN, parse_tag_acorn); + static int __init parse_tag_ramdisk(const struct tag *tag) { setup_ramdisk((tag->u.ramdisk.flags & 1) == 0, @@ -467,15 +446,18 @@ static struct init_tags { void __init setup_arch(char **cmdline_p) { struct tag *tags = (struct tag *)&init_tags; - struct machine_desc *mdesc; char *from = default_command_line; setup_processor(); - mdesc = setup_machine(machine_arch_type); - machine_name = mdesc->name; + if(machine_arch_type == MACH_TYPE_A5K) + machine_name = "A5000"; + else if(machine_arch_type == MACH_TYPE_ARCHIMEDES) + machine_name = "Archimedes"; + else + machine_name = "UNKNOWN"; - if (mdesc->param_offset) - tags = (struct tag *)mdesc->param_offset; //FIXME - ugly? + //FIXME - this may need altering when we get ROM images working + tags = (struct tag *)0x0207c000; /* * If we have the old style parameters, convert them to @@ -501,12 +483,7 @@ void __init setup_arch(char **cmdline_p) parse_cmdline(&meminfo, cmdline_p, from); bootmem_init(&meminfo); paging_init(&meminfo); - request_standard_resources(&meminfo, mdesc); - - /* - * Set up various architecture-specific pointers - */ - init_arch_irq = mdesc->init_irq; + request_standard_resources(&meminfo); #ifdef CONFIG_VT #if defined(CONFIG_DUMMY_CONSOLE) diff --git a/arch/arm26/kernel/traps.c b/arch/arm26/kernel/traps.c index eaf86c9fedd1..d0e1bc6cf474 100644 --- a/arch/arm26/kernel/traps.c +++ b/arch/arm26/kernel/traps.c @@ -175,6 +175,11 @@ void show_trace_task(struct task_struct *tsk) } } +/* FIXME - this is probably wrong.. */ +void show_stack(struct task_struct *task, unsigned long *sp) { + dump_mem("Stack: ", (unsigned long)sp, 8192+(unsigned long)task->thread_info); +} + spinlock_t die_lock = SPIN_LOCK_UNLOCKED; /* @@ -497,7 +502,7 @@ baddataabort(int code, unsigned long instr, struct pt_regs *regs) die_if_kernel("unknown data abort code", regs, instr); } -void __bug(const char *file, int line, void *data) +volatile void __bug(const char *file, int line, void *data) { printk(KERN_CRIT"kernel BUG at %s:%d!", file, line); if (data) diff --git a/arch/arm26/lib/Makefile b/arch/arm26/lib/Makefile index d327dc7f385e..d05d6b0fd3e2 100644 --- a/arch/arm26/lib/Makefile +++ b/arch/arm26/lib/Makefile @@ -4,9 +4,7 @@ # Copyright (C) 1995-2000 Russell King # -L_TARGET := lib.a - -obj-y := backtrace.o changebit.o csumipv6.o csumpartial.o \ +lib-y := backtrace.o changebit.o csumipv6.o csumpartial.o \ csumpartialcopy.o csumpartialcopyuser.o clearbit.o \ copy_page.o delay.o findbit.o memchr.o memcpy.o \ memset.o memzero.o setbit.o \ @@ -16,15 +14,12 @@ obj-y := backtrace.o changebit.o csumipv6.o csumpartial.o \ ucmpdi2.o udivdi3.o lib1funcs.o ecard.o io-acorn.o \ floppydma.o io-readsb.o io-writesb.o io-writesl.o \ uaccess-kernel.o uaccess-user.o io-readsw-armv3.o \ - io-writesw-armv3.o io-readsl-armv3.o - -obj-m := -obj-n := - -obj-$(CONFIG_VT)+= kbd.o + io-writesw-armv3.o io-readsl-armv3.o ecard.o \ + io-acorn.o floppydma.o -obj-y += ecard.o io-acorn.o floppydma.o +lib-n := +lib-$(CONFIG_VT)+= kbd.o csumpartialcopy.o: csumpartialcopygeneric.S csumpartialcopyuser.o: csumpartialcopygeneric.S diff --git a/arch/arm26/machine/Makefile b/arch/arm26/machine/Makefile index f1211f9c9827..84642febab44 100644 --- a/arch/arm26/machine/Makefile +++ b/arch/arm26/machine/Makefile @@ -4,7 +4,7 @@ # Object file lists. -obj-y := arch.o dma.o irq.o oldlatches.o \ +obj-y := dma.o irq.o oldlatches.o \ small_page.o extra-y := head.o diff --git a/arch/arm26/machine/arch.c b/arch/arm26/machine/arch.c deleted file mode 100644 index 410c2852afaf..000000000000 --- a/arch/arm26/machine/arch.c +++ /dev/null @@ -1,36 +0,0 @@ -/* - * linux/arch/arm26/mach-arc/arch.c - * - * Copyright (C) 1998-2001 Russell King - * Copyright (C) 2003 Ian Molton - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Architecture specific fixups. - */ -#include -#include -#include - -#include -#include -#include -#include - -#include -#include - -extern void arc_init_irq(void); - -#ifdef CONFIG_ARCH_ARC -MACHINE_START(ARCHIMEDES, "Acorn-Archimedes") -#elif defined(CONFIG_ARCH_A5K) -MACHINE_START(A5K, "Acorn-A5000") -#endif - MAINTAINER("Ian Molton") - BOOT_PARAMS(0x0207c000) - INITIRQ(arc_init_irq) -MACHINE_END - diff --git a/arch/arm26/machine/irq.c b/arch/arm26/machine/irq.c index 1c5f88fb2e52..158a7d29efbc 100644 --- a/arch/arm26/machine/irq.c +++ b/arch/arm26/machine/irq.c @@ -73,7 +73,6 @@ static struct irqchip arc_a_chip = { static void arc_mask_irq_b(unsigned int irq) { unsigned int val, mask; - mask = 1 << (irq & 7); val = ioc_readb(IOC_IRQMASKB); ioc_writeb(val & ~mask, IOC_IRQMASKB); @@ -94,7 +93,7 @@ static struct irqchip arc_b_chip = { .unmask = arc_unmask_irq_b, }; -/* FIXME - JMA none of these functions are used in arm26 +/* FIXME - JMA none of these functions are used in arm26 currently static void arc_mask_irq_fiq(unsigned int irq) { unsigned int val, mask; @@ -124,6 +123,7 @@ void __init arc_init_irq(void) { unsigned int irq, flags; + /* Disable all IOC interrupt sources */ ioc_writeb(0, IOC_IRQMASKA); ioc_writeb(0, IOC_IRQMASKB); ioc_writeb(0, IOC_FIQMASK); diff --git a/arch/arm26/mm/init.c b/arch/arm26/mm/init.c index 01c772bef70e..989f08c19c62 100644 --- a/arch/arm26/mm/init.c +++ b/arch/arm26/mm/init.c @@ -32,7 +32,7 @@ #include #include -#include +//#include #include #define TABLE_SIZE PTRS_PER_PTE * sizeof(pte_t)) diff --git a/arch/arm26/mm/mm-memc.c b/arch/arm26/mm/mm-memc.c index 15ec20e2750c..6b3d3ae806f4 100644 --- a/arch/arm26/mm/mm-memc.c +++ b/arch/arm26/mm/mm-memc.c @@ -70,11 +70,9 @@ void free_pgd_slow(pgd_t *pgd) pgd_t *get_pgd_slow(struct mm_struct *mm) { - void *pg2k; pgd_t *new_pgd, *init_pgd; pmd_t *new_pmd, *init_pmd; pte_t *new_pte, *init_pte; - struct mm_struct bob; new_pgd = alloc_pgd_table(); if (!new_pgd) diff --git a/include/asm-arm26/arch.h b/include/asm-arm26/arch.h deleted file mode 100644 index 1011bcc6d7ac..000000000000 --- a/include/asm-arm26/arch.h +++ /dev/null @@ -1,62 +0,0 @@ -/* - * linux/include/asm-arm/mach/arch.h - * - * Copyright (C) 2000 Russell King - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -/* - * The size of struct machine_desc - * (for assembler code) - * FIXME - I count 45... or is this padding? - */ -#define SIZEOF_MACHINE_DESC 48 - -#ifndef __ASSEMBLY__ - -struct tag; - -struct machine_desc { - int nr; /* arch no FIXME - get rid */ - const char *name; /* architecture name */ - unsigned int param_offset; /* parameter page */ - - unsigned int video_start; /* start of video RAM */ - unsigned int video_end; /* end of video RAM */ - - unsigned int reserve_lp0 :1; /* never has lp0 */ - unsigned int reserve_lp1 :1; /* never has lp1 */ - unsigned int reserve_lp2 :1; /* never has lp2 */ - unsigned int soft_reboot :1; /* soft reboot */ - void (*fixup)(struct machine_desc *, - struct tag *, char **, - struct meminfo *); - void (*map_io)(void);/* IO mapping function */ - void (*init_irq)(void); -}; - -/* - * Set of macros to define architecture features. This is built into - * a table by the linker. - */ -#define MACHINE_START(_type,_name) \ -const struct machine_desc __mach_desc_##_type \ - __attribute__((__section__(".arch.info"))) = { \ - nr: MACH_TYPE_##_type, \ - name: _name, - -#define MAINTAINER(n) - -#define BOOT_PARAMS(_params) \ - param_offset: _params, - -#define INITIRQ(_func) \ - init_irq: _func, - -#define MACHINE_END \ -}; - -#endif diff --git a/include/asm-arm26/bug.h b/include/asm-arm26/bug.h index a92b1bc8eb1e..a1afde728b03 100644 --- a/include/asm-arm26/bug.h +++ b/include/asm-arm26/bug.h @@ -4,7 +4,7 @@ #include #ifdef CONFIG_DEBUG_BUGVERBOSE -extern void __bug(const char *file, int line, void *data); +extern volatile void __bug(const char *file, int line, void *data); /* give file/line information */ #define BUG() __bug(__FILE__, __LINE__, NULL) @@ -18,4 +18,13 @@ extern void __bug(const char *file, int line, void *data); #endif +#define BUG_ON(condition) do { if (unlikely((condition)!=0)) BUG(); } while(0) + +#define WARN_ON(condition) do { \ + if (unlikely((condition)!=0)) { \ + printk("Badness in %s at %s:%d\n", __FUNCTION__, __FILE__, __LINE__); \ + dump_stack(); \ + } \ +} while (0) + #endif diff --git a/include/asm-arm26/bugs.h b/include/asm-arm26/bugs.h index 665ab62c27ce..e99ac2e46d7f 100644 --- a/include/asm-arm26/bugs.h +++ b/include/asm-arm26/bugs.h @@ -1,5 +1,5 @@ /* - * linux/include/asm-arm/bugs.h + * linux/include/asm-arm26/bugs.h * * Copyright (C) 1995 Russell King * diff --git a/include/asm-arm26/ecard.h b/include/asm-arm26/ecard.h index 28af14fd7f75..8318a0c1df81 100644 --- a/include/asm-arm26/ecard.h +++ b/include/asm-arm26/ecard.h @@ -26,6 +26,9 @@ #define PROD_ACORN_ETHER1 0x0003 #define PROD_ACORN_MFM 0x000b +#define MANU_CCONCEPTS 0x0009 +#define PROD_CCONCEPTS_COLOURCARD 0x0050 + #define MANU_ANT2 0x0011 #define PROD_ANT_ETHER3 0x00a4 diff --git a/include/asm-arm26/mach-types.h b/include/asm-arm26/mach-types.h index 514b8032696c..b34045b78128 100644 --- a/include/asm-arm26/mach-types.h +++ b/include/asm-arm26/mach-types.h @@ -1,5 +1,6 @@ /* * Unlike ARM32 this is NOT automatically generated. DONT delete it + * Instead, consider FIXME-ing it so its auto-detected. */ #ifndef __ASM_ARM_MACH_TYPE_H diff --git a/include/asm-arm26/pgalloc.h b/include/asm-arm26/pgalloc.h index bf2e19512524..6437167b1ffe 100644 --- a/include/asm-arm26/pgalloc.h +++ b/include/asm-arm26/pgalloc.h @@ -55,9 +55,9 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp, pte_t *ptep) * is thrown away. It just cant be zero. -IM */ -#define pmd_alloc_one(mm,addr) ((pmd_t *)2); BUG() +#define pmd_alloc_one(mm,addr) ({ BUG(); ((pmd_t *)2); }) #define pmd_free(pmd) do { } while (0) -#define pgd_populate(mm,pmd,pte) (0) +#define pgd_populate(mm,pmd,pte) BUG() extern pgd_t *get_pgd_slow(struct mm_struct *mm); extern void free_pgd_slow(pgd_t *pgd); diff --git a/include/asm-arm26/pgtable.h b/include/asm-arm26/pgtable.h index a6ac3957b0be..3fb067eaf801 100644 --- a/include/asm-arm26/pgtable.h +++ b/include/asm-arm26/pgtable.h @@ -179,7 +179,7 @@ extern struct page *empty_zero_page; /* Is pmd_page supposed to return a pointer to a page in some arches? ours seems to * return a pointer to memory (no special alignment) */ -#define pmd_page(pmd) ((unsigned long)(pmd_val((pmd)) & ~_PMD_PRESENT)) +#define pmd_page(pmd) ((struct page *)(pmd_val((pmd)) & ~_PMD_PRESENT)) #define pmd_page_kernel(pmd) ((pte_t *)(pmd_val((pmd)) & ~_PMD_PRESENT)) #define pte_offset_kernel(dir,addr) (pmd_page_kernel(*(dir)) + __pte_index(addr)) diff --git a/include/asm-arm26/statfs.h b/include/asm-arm26/statfs.h index a1eba73ded99..776dbc8f7623 100644 --- a/include/asm-arm26/statfs.h +++ b/include/asm-arm26/statfs.h @@ -1,25 +1,8 @@ #ifndef _ASMARM_STATFS_H #define _ASMARM_STATFS_H -#ifndef __KERNEL_STRICT_NAMES +//FIXME - this may not be appropriate for arm26. check it out. -#include - -typedef __kernel_fsid_t fsid_t; - -#endif - -struct statfs { - long f_type; - long f_bsize; - long f_blocks; - long f_bfree; - long f_bavail; - long f_files; - long f_ffree; - __kernel_fsid_t f_fsid; - long f_namelen; - long f_spare[6]; -}; +#include #endif diff --git a/include/asm-arm26/thread_info.h b/include/asm-arm26/thread_info.h index 3f298bf9206a..24c5136484fb 100644 --- a/include/asm-arm26/thread_info.h +++ b/include/asm-arm26/thread_info.h @@ -85,7 +85,7 @@ static inline struct thread_info *current_thread_info(void) //#define INIT_THREAD_SIZE (65536) #define __get_user_regs(x) (((struct pt_regs *)((unsigned long)(x) + THREAD_SIZE - 8)) - 1) -extern struct thread_info *alloc_thread_info(void); +extern struct thread_info *alloc_thread_info(struct task_struct *task); extern void free_thread_info(struct thread_info *); #define get_thread_info(ti) get_task_struct((ti)->task) -- cgit v1.2.3 From f597d1debdd1f5314a0177cb68d1d0ced960929b Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Sun, 6 Jul 2003 23:34:25 -0700 Subject: [PATCH] suspend SMP-kernel with one CPU This allows suspend to work on UP machines, even if the kernel is compiled for SMP. --- kernel/suspend.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/suspend.c b/kernel/suspend.c index 843cbaf6d433..1033ee31df68 100644 --- a/kernel/suspend.c +++ b/kernel/suspend.c @@ -1203,12 +1203,12 @@ static int read_suspend_image(const char * specialfile, int noresume) void software_resume(void) { -#ifdef CONFIG_SMP - printk(KERN_WARNING "Software Suspend has a malfunctioning SMP support. Disabled :(\n"); -#else + if (num_online_cpus() > 1) { + printk(KERN_WARNING "Software Suspend has malfunctioning SMP support. Disabled :(\n"); + return; + } /* We enable the possibility of machine suspend */ software_suspend_enabled = 1; -#endif if (!resume_status) return; -- cgit v1.2.3 From 62133cb1109242fde586f965cff48c85eaa7f52b Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Mon, 7 Jul 2003 01:31:32 -0700 Subject: [PATCH] tgkill patch for safe inter-thread signals This is the updated versions of the patch Ingo sent some time ago to implement a new tgkill() syscall which specifies the target thread without any possibility of ambiguity or thread ID wrap races, by passing in both the thread group _and_ the thread ID as the arguments. This is really needed since many/most people still run with limited PID ranges (maybe due to legacy apps breaking) and the PID reuse can cause problems. --- arch/i386/kernel/entry.S | 1 + include/asm-i386/unistd.h | 3 ++- kernel/signal.c | 50 +++++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 51 insertions(+), 3 deletions(-) diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index 5aef7a47a383..dafcae8663c8 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -876,5 +876,6 @@ ENTRY(sys_call_table) .long sys_clock_nanosleep .long sys_statfs64 .long sys_fstatfs64 + .long sys_tgkill nr_syscalls=(.-sys_call_table)/4 diff --git a/include/asm-i386/unistd.h b/include/asm-i386/unistd.h index b1bdc016eed5..f39e505e3eb4 100644 --- a/include/asm-i386/unistd.h +++ b/include/asm-i386/unistd.h @@ -275,8 +275,9 @@ #define __NR_clock_nanosleep (__NR_timer_create+8) #define __NR_statfs64 268 #define __NR_fstatfs64 269 +#define __NR_tgkill 270 -#define NR_syscalls 270 +#define NR_syscalls 271 /* user-visible error numbers are in the range -1 - -124: see */ diff --git a/kernel/signal.c b/kernel/signal.c index dbefbea7623e..7ac72191b30b 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -579,8 +579,8 @@ static int rm_from_queue(unsigned long mask, struct sigpending *s) /* * Bad permissions for sending the signal */ -static inline int check_kill_permission(int sig, struct siginfo *info, - struct task_struct *t) +static int check_kill_permission(int sig, struct siginfo *info, + struct task_struct *t) { int error = -EINVAL; if (sig < 0 || sig > _NSIG) @@ -2088,6 +2088,52 @@ sys_kill(int pid, int sig) return kill_something_info(sig, &info, pid); } +/** + * sys_tkill - send signal to one specific thread + * @tgid: the thread group ID of the thread + * @pid: the PID of the thread + * @sig: signal to be sent + * + * This syscall also checks the tgid and returns -ESRCH even if the PID + * exists but it's not belonging to the target process anymore. This + * method solves the problem of threads exiting and PIDs getting reused. + */ +asmlinkage long sys_tgkill(int tgid, int pid, int sig) +{ + struct siginfo info; + int error; + struct task_struct *p; + + /* This is only valid for single tasks */ + if (pid <= 0 || tgid <= 0) + return -EINVAL; + + info.si_signo = sig; + info.si_errno = 0; + info.si_code = SI_TKILL; + info.si_pid = current->tgid; + info.si_uid = current->uid; + + read_lock(&tasklist_lock); + p = find_task_by_pid(pid); + error = -ESRCH; + if (p && (p->tgid == tgid)) { + error = check_kill_permission(sig, &info, p); + /* + * The null signal is a permissions and process existence + * probe. No signal is actually delivered. + */ + if (!error && sig && p->sighand) { + spin_lock_irq(&p->sighand->siglock); + handle_stop_signal(sig, p); + error = specific_send_sig_info(sig, &info, p); + spin_unlock_irq(&p->sighand->siglock); + } + } + read_unlock(&tasklist_lock); + return error; +} + /* * Send a signal to only one task, even if it's a CLONE_THREAD task. */ -- cgit v1.2.3 From a35d0e110ebc80e7a9c2a10ff7437bde32650c68 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 7 Jul 2003 02:14:10 -0700 Subject: [PATCH] make create() follow symlinks again The intent patches broke behaviour w.r.t. following symlinks when doing an open() with file creation. The problem occurs in open_namei() because the LOOKUP_PARENT flag is no longer set when we do the call to follow_link(). --- fs/namei.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/namei.c b/fs/namei.c index 2fc6f11fe795..4bd15bdcef47 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1344,6 +1344,7 @@ do_link: * stored in nd->last.name and we will have to putname() it when we * are done. Procfs-like symlinks just set LAST_BIND. */ + nd->flags |= LOOKUP_PARENT; error = security_inode_follow_link(dentry, nd); if (error) goto exit_dput; @@ -1352,6 +1353,7 @@ do_link: dput(dentry); if (error) return error; + nd->flags &= ~LOOKUP_PARENT; if (nd->last_type == LAST_BIND) { dentry = nd->dentry; goto ok; -- cgit v1.2.3 From 4aad4269068a9e1b8ee4a334410de7a777b7c4e5 Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Mon, 7 Jul 2003 09:20:38 -0700 Subject: [PATCH] 68328 DragenEngine configure updates Configuration updates for 68328 DragenEngine board. Fix up name so that it is "DragenEngine" and clean up eeprom read. --- arch/m68knommu/Kconfig | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/arch/m68knommu/Kconfig b/arch/m68knommu/Kconfig index 758525459878..2c08a7d8b27d 100644 --- a/arch/m68knommu/Kconfig +++ b/arch/m68knommu/Kconfig @@ -222,21 +222,17 @@ config UCDIMM Support for the Arcturus Networks uDsimm module. config DRAGEN2 - bool "Dragen Engine II board support" + bool "DragenEngine II board support" depends on M68VZ328 help - Support for the Dragen Engine II board. + Support for the DragenEngine II board. -config HWADDR_FROMEEPROM - bool " Read ETH address from EEPROM" - depends on DRAGEN2 +config DIRECT_IO_ACCESS + bool " Allow user to access IO directly" + depends on (UCSIMM || UCDIMM || DRAGEN2) help - Use MAC address from EEPROM. - -config HWADDR_OFFSET - int " Offset from start of EEPROM" - default "2" - depends on HWADDR_FROMEEPROM + Disable the CPU internal registers protection in user mode, + to allow a user application to read/write them. config INIT_LCD bool " Initialize LCD" @@ -246,7 +242,7 @@ config INIT_LCD config MEMORY_RESERVE int " Memory reservation (MiB)" - depends on (UCSIMM || UCDIMM || DRAGEN2) + depends on (UCSIMM || UCDIMM) help Reserve certain memory regions on 68x328 based boards. -- cgit v1.2.3 From 0516e7088ade7a80a94302b6e2bff4ece4c6b8ef Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Mon, 7 Jul 2003 09:20:47 -0700 Subject: [PATCH] conditional ROMfs copy for SecureEdgeMP3/5307 board Conditionally copy the ROMfs filesystem on the SecureEdgeMP3/5307 target board only if using a ROMfs. --- arch/m68knommu/platform/5307/MP3/crt0_ram.S | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/m68knommu/platform/5307/MP3/crt0_ram.S b/arch/m68knommu/platform/5307/MP3/crt0_ram.S index 822e1a49bac9..d99883ac3bd0 100644 --- a/arch/m68knommu/platform/5307/MP3/crt0_ram.S +++ b/arch/m68knommu/platform/5307/MP3/crt0_ram.S @@ -124,6 +124,8 @@ _start: movec %d0,%CACR nop + +#ifdef CONFIG_ROMFS_FS /* * Move ROM filesystem above bss :-) */ @@ -145,6 +147,12 @@ _copy_romfs: cmp.l %a0, %a2 /* Check if at end */ bne _copy_romfs +#else /* CONFIG_ROMFS_FS */ + lea.l _ebss, %a1 + move.l %a1, _ramstart +#endif /* CONFIG_ROMFS_FS */ + + /* * Zero out the bss region. */ -- cgit v1.2.3 From b9e5cd60a3dd807a7826a107fe91a724d6670dc8 Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Mon, 7 Jul 2003 09:21:02 -0700 Subject: [PATCH] DragenEngine interrupt handler to use irqreturn_t DragenEngine setup code updates: - Change interrupt handler return type to irqreturn_t - Allow configure time setting of boot parameters - Clean up warnings --- arch/m68knommu/platform/68VZ328/de2/config.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/arch/m68knommu/platform/68VZ328/de2/config.c b/arch/m68knommu/platform/68VZ328/de2/config.c index bbba58b90de0..6673fe79aa14 100644 --- a/arch/m68knommu/platform/68VZ328/de2/config.c +++ b/arch/m68knommu/platform/68VZ328/de2/config.c @@ -36,7 +36,7 @@ #define TICKS_PER_JIFFY 41450 static void -dragen2_sched_init(void (*timer_routine) (int, void *, struct pt_regs *)) +dragen2_sched_init(irqreturn_t (*timer_routine) (int, void *, struct pt_regs *)) { /* disable timer 1 */ TCTL = 0; @@ -183,14 +183,20 @@ static void init_hardware(void) #endif } -void config_BSP(char *command, int len) +void config_BSP(char *command, int size) { printk("68VZ328 DragonBallVZ support (c) 2001 Lineo, Inc.\n"); - command[0] = '\0'; /* no specific boot option */ + +#if defined(CONFIG_BOOTPARAM) + strncpy(command, CONFIG_BOOTPARAM_STRING, size); + command[size-1] = 0; +#else + memset(command, 0, size); +#endif init_hardware(); - mach_sched_init = dragen2_sched_init; + mach_sched_init = (void *)dragen2_sched_init; mach_tick = dragen2_tick; mach_gettimeoffset = dragen2_gettimeoffset; mach_reset = dragen2_reset; -- cgit v1.2.3 From 7aa647da72e82c54b3d95ab3ba5560859abb0181 Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Mon, 7 Jul 2003 09:21:25 -0700 Subject: [PATCH] conditional ROMfs copy for NETtel/5307 board Conditionally copy the ROMfs filesystem on the NETtel/5307 target board only if using a ROMfs. --- arch/m68knommu/platform/5307/NETtel/crt0_ram.S | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/m68knommu/platform/5307/NETtel/crt0_ram.S b/arch/m68knommu/platform/5307/NETtel/crt0_ram.S index 70eaf4cf7579..009a40506c06 100644 --- a/arch/m68knommu/platform/5307/NETtel/crt0_ram.S +++ b/arch/m68knommu/platform/5307/NETtel/crt0_ram.S @@ -147,6 +147,8 @@ _start: movec %d0,%CACR nop + +#ifdef CONFIG_ROMFS_FS /* * Move ROM filesystem above bss :-) */ @@ -168,6 +170,12 @@ _copy_romfs: cmp.l %a0, %a2 /* Check if at end */ bne _copy_romfs +#else /* CONFIG_ROMFS_FS */ + lea.l _ebss, %a1 + move.l %a1, _ramstart +#endif /* CONFIG_ROMFS_FS */ + + /* * Zero out the bss region. */ -- cgit v1.2.3 From b3f87eece254fe3a2f16e296ccba8fd7de7e19c3 Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Mon, 7 Jul 2003 09:21:47 -0700 Subject: [PATCH] fix security_initcall in m68knommu linker script Global SECURITY_INIT macro cannot be used inside .init section for m68knommu linker script. It is a complete section of its own, need to just list the components individually. --- arch/m68knommu/vmlinux.lds.S | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/m68knommu/vmlinux.lds.S b/arch/m68knommu/vmlinux.lds.S index fa2004ada4ba..1ab8a31ef964 100644 --- a/arch/m68knommu/vmlinux.lds.S +++ b/arch/m68knommu/vmlinux.lds.S @@ -277,7 +277,9 @@ SECTIONS { __con_initcall_start = .; *(.con_initcall.init) __con_initcall_end = .; - SECURITY_INIT + __security_initcall_start = .; + *(.security_initcall.init) + __security_initcall_end = .; . = ALIGN(4); __initramfs_start = .; *(.init.ramfs) -- cgit v1.2.3 From a3264e91e1ad176513582c841ef60d80ab028c23 Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Mon, 7 Jul 2003 09:22:14 -0700 Subject: [PATCH] clean module_exit in m68knommu serial drivers Remove un-used commented module_exit functions from m68knommu ColdFire and 68328 serial drivers. These drivers currently cannot be configured as modules, and they have no exit functions. --- drivers/serial/68328serial.c | 1 - drivers/serial/mcfserial.c | 1 - 2 files changed, 2 deletions(-) diff --git a/drivers/serial/68328serial.c b/drivers/serial/68328serial.c index 7ff8ff5dbecc..f212ae0992d1 100644 --- a/drivers/serial/68328serial.c +++ b/drivers/serial/68328serial.c @@ -1533,7 +1533,6 @@ void unregister_serial(int line) } module_init(rs68328_init); -/* DAVIDM module_exit(rs68328_fini); */ diff --git a/drivers/serial/mcfserial.c b/drivers/serial/mcfserial.c index 38bcb8765456..c6b7caa224df 100644 --- a/drivers/serial/mcfserial.c +++ b/drivers/serial/mcfserial.c @@ -1643,7 +1643,6 @@ mcfrs_init(void) } module_init(mcfrs_init); -/* DAVIDM module_exit(mcfrs_fini); */ /****************************************************************************/ /* Serial Console */ -- cgit v1.2.3 From dcc52875ac4a6e21571f07dc5b3ba709b64f39ab Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 7 Jul 2003 10:03:03 -0700 Subject: [SPARC64]: Move raid xor into library assembler file. --- arch/sparc64/kernel/sparc64_ksyms.c | 13 ++ arch/sparc64/lib/Makefile | 2 +- arch/sparc64/lib/xor.S | 356 +++++++++++++++++++++++++++++++++++ include/asm-sparc64/xor.h | 360 +----------------------------------- 4 files changed, 371 insertions(+), 360 deletions(-) create mode 100644 arch/sparc64/lib/xor.S diff --git a/arch/sparc64/kernel/sparc64_ksyms.c b/arch/sparc64/kernel/sparc64_ksyms.c index f2f9c9d0540c..2af4669e3396 100644 --- a/arch/sparc64/kernel/sparc64_ksyms.c +++ b/arch/sparc64/kernel/sparc64_ksyms.c @@ -116,6 +116,14 @@ extern unsigned long pfn_base; extern unsigned int sys_call_table[]; +extern void xor_vis_2(unsigned long, unsigned long *, unsigned long *); +extern void xor_vis_3(unsigned long, unsigned long *, unsigned long *, + unsigned long *); +extern void xor_vis_4(unsigned long, unsigned long *, unsigned long *, + unsigned long *, unsigned long *); +extern void xor_vis_5(unsigned long, unsigned long *, unsigned long *, + unsigned long *, unsigned long *, unsigned long *); + /* used by various drivers */ #ifdef CONFIG_SMP #ifndef CONFIG_DEBUG_SPINLOCK @@ -376,3 +384,8 @@ EXPORT_SYMBOL(ns87303_lock); EXPORT_SYMBOL_GPL(sys_call_table); EXPORT_SYMBOL(tick_ops); + +EXPORT_SYMBOL(xor_vis_2); +EXPORT_SYMBOL(xor_vis_3); +EXPORT_SYMBOL(xor_vis_4); +EXPORT_SYMBOL(xor_vis_5); diff --git a/arch/sparc64/lib/Makefile b/arch/sparc64/lib/Makefile index bc822e42b2c9..394c4e2914d1 100644 --- a/arch/sparc64/lib/Makefile +++ b/arch/sparc64/lib/Makefile @@ -10,4 +10,4 @@ lib-y := PeeCeeI.o blockops.o debuglocks.o strlen.o strncmp.o \ VIScopy.o VISbzero.o VISmemset.o VIScsum.o VIScsumcopy.o \ VIScsumcopyusr.o VISsave.o atomic.o rwlock.o bitops.o \ dec_and_lock.o U3memcpy.o U3copy_from_user.o U3copy_to_user.o \ - U3copy_in_user.o mcount.o ipcsum.o rwsem.o + U3copy_in_user.o mcount.o ipcsum.o rwsem.o xor.o diff --git a/arch/sparc64/lib/xor.S b/arch/sparc64/lib/xor.S new file mode 100644 index 000000000000..8eeba353dee4 --- /dev/null +++ b/arch/sparc64/lib/xor.S @@ -0,0 +1,356 @@ +/* + * arch/sparc64/lib/xor.S + * + * High speed xor_block operation for RAID4/5 utilizing the + * UltraSparc Visual Instruction Set. + * + * Copyright (C) 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz) + */ + +#include +#include +#include +#include + +/* + * Requirements: + * !(((long)dest | (long)sourceN) & (64 - 1)) && + * !(len & 127) && len >= 256 + */ + .text + .globl xor_vis_2 + .type xor_vis_2,@function +xor_vis_2: + rd %fprs, %o5 + andcc %o5, FPRS_FEF|FPRS_DU, %g0 + be,pt %icc, 0f + sethi %hi(VISenter), %g1 + jmpl %g1 + %lo(VISenter), %g7 + add %g7, 8, %g7 +0: wr %g0, FPRS_FEF, %fprs + rd %asi, %g1 + wr %g0, ASI_BLK_P, %asi + membar #LoadStore|#StoreLoad|#StoreStore + sub %o0, 128, %o0 + ldda [%o1] %asi, %f0 + ldda [%o2] %asi, %f16 + +2: ldda [%o1 + 64] %asi, %f32 + fxor %f0, %f16, %f16 + fxor %f2, %f18, %f18 + fxor %f4, %f20, %f20 + fxor %f6, %f22, %f22 + fxor %f8, %f24, %f24 + fxor %f10, %f26, %f26 + fxor %f12, %f28, %f28 + fxor %f14, %f30, %f30 + stda %f16, [%o1] %asi + ldda [%o2 + 64] %asi, %f48 + ldda [%o1 + 128] %asi, %f0 + fxor %f32, %f48, %f48 + fxor %f34, %f50, %f50 + add %o1, 128, %o1 + fxor %f36, %f52, %f52 + add %o2, 128, %o2 + fxor %f38, %f54, %f54 + subcc %o0, 128, %o0 + fxor %f40, %f56, %f56 + fxor %f42, %f58, %f58 + fxor %f44, %f60, %f60 + fxor %f46, %f62, %f62 + stda %f48, [%o1 - 64] %asi + bne,pt %xcc, 2b + ldda [%o2] %asi, %f16 + + ldda [%o1 + 64] %asi, %f32 + fxor %f0, %f16, %f16 + fxor %f2, %f18, %f18 + fxor %f4, %f20, %f20 + fxor %f6, %f22, %f22 + fxor %f8, %f24, %f24 + fxor %f10, %f26, %f26 + fxor %f12, %f28, %f28 + fxor %f14, %f30, %f30 + stda %f16, [%o1] %asi + ldda [%o2 + 64] %asi, %f48 + membar #Sync + fxor %f32, %f48, %f48 + fxor %f34, %f50, %f50 + fxor %f36, %f52, %f52 + fxor %f38, %f54, %f54 + fxor %f40, %f56, %f56 + fxor %f42, %f58, %f58 + fxor %f44, %f60, %f60 + fxor %f46, %f62, %f62 + stda %f48, [%o1 + 64] %asi + membar #Sync|#StoreStore|#StoreLoad + wr %g1, %g0, %asi + retl + wr %g0, 0, %fprs + .size xor_vis_2, .-xor_vis_2 + + + .globl xor_vis_3 + .type xor_vis_3,@function +xor_vis_3: + rd %fprs, %o5 + andcc %o5, FPRS_FEF|FPRS_DU, %g0 + be,pt %icc, 0f + sethi %hi(VISenter), %g1 + jmpl %g1 + %lo(VISenter), %g7 + add %g7, 8, %g7 +0: wr %g0, FPRS_FEF, %fprs + rd %asi, %g1 + wr %g0, ASI_BLK_P, %asi + membar #LoadStore|#StoreLoad|#StoreStore + sub %o0, 64, %o0 + ldda [%o1] %asi, %f0 + ldda [%o2] %asi, %f16 + +3: ldda [%o3] %asi, %f32 + fxor %f0, %f16, %f48 + fxor %f2, %f18, %f50 + add %o1, 64, %o1 + fxor %f4, %f20, %f52 + fxor %f6, %f22, %f54 + add %o2, 64, %o2 + fxor %f8, %f24, %f56 + fxor %f10, %f26, %f58 + fxor %f12, %f28, %f60 + fxor %f14, %f30, %f62 + ldda [%o1] %asi, %f0 + fxor %f48, %f32, %f48 + fxor %f50, %f34, %f50 + fxor %f52, %f36, %f52 + fxor %f54, %f38, %f54 + add %o3, 64, %o3 + fxor %f56, %f40, %f56 + fxor %f58, %f42, %f58 + subcc %o0, 64, %o0 + fxor %f60, %f44, %f60 + fxor %f62, %f46, %f62 + stda %f48, [%o1 - 64] %asi + bne,pt %xcc, 3b + ldda [%o2] %asi, %f16 + + ldda [%o3] %asi, %f32 + fxor %f0, %f16, %f48 + fxor %f2, %f18, %f50 + fxor %f4, %f20, %f52 + fxor %f6, %f22, %f54 + fxor %f8, %f24, %f56 + fxor %f10, %f26, %f58 + fxor %f12, %f28, %f60 + fxor %f14, %f30, %f62 + membar #Sync + fxor %f48, %f32, %f48 + fxor %f50, %f34, %f50 + fxor %f52, %f36, %f52 + fxor %f54, %f38, %f54 + fxor %f56, %f40, %f56 + fxor %f58, %f42, %f58 + fxor %f60, %f44, %f60 + fxor %f62, %f46, %f62 + stda %f48, [%o1] %asi + membar #Sync|#StoreStore|#StoreLoad + wr %g1, %g0, %asi + retl + wr %g0, 0, %fprs + .size xor_vis_3, .-xor_vis_3 + + + .globl xor_vis_4 + .type xor_vis_4,@function +xor_vis_4: + rd %fprs, %o5 + andcc %o5, FPRS_FEF|FPRS_DU, %g0 + be,pt %icc, 0f + sethi %hi(VISenter), %g1 + jmpl %g1 + %lo(VISenter), %g7 + add %g7, 8, %g7 +0: wr %g0, FPRS_FEF, %fprs + rd %asi, %g1 + wr %g0, ASI_BLK_P, %asi + membar #LoadStore|#StoreLoad|#StoreStore + sub %o0, 64, %o0 + ldda [%o1] %asi, %f0 + ldda [%o2] %asi, %f16 + +4: ldda [%o3] %asi, %f32 + fxor %f0, %f16, %f16 + fxor %f2, %f18, %f18 + add %o1, 64, %o1 + fxor %f4, %f20, %f20 + fxor %f6, %f22, %f22 + add %o2, 64, %o2 + fxor %f8, %f24, %f24 + fxor %f10, %f26, %f26 + fxor %f12, %f28, %f28 + fxor %f14, %f30, %f30 + ldda [%o4] %asi, %f48 + fxor %f16, %f32, %f32 + fxor %f18, %f34, %f34 + fxor %f20, %f36, %f36 + fxor %f22, %f38, %f38 + add %o3, 64, %o3 + fxor %f24, %f40, %f40 + fxor %f26, %f42, %f42 + fxor %f28, %f44, %f44 + fxor %f30, %f46, %f46 + ldda [%o1] %asi, %f0 + fxor %f32, %f48, %f48 + fxor %f34, %f50, %f50 + fxor %f36, %f52, %f52 + add %o4, 64, %o4 + fxor %f38, %f54, %f54 + fxor %f40, %f56, %f56 + fxor %f42, %f58, %f58 + subcc %o0, 64, %o0 + fxor %f44, %f60, %f60 + fxor %f46, %f62, %f62 + stda %f48, [%o1 - 64] %asi + bne,pt %xcc, 4b + ldda [%o2] %asi, %f16 + + ldda [%o3] %asi, %f32 + fxor %f0, %f16, %f16 + fxor %f2, %f18, %f18 + fxor %f4, %f20, %f20 + fxor %f6, %f22, %f22 + fxor %f8, %f24, %f24 + fxor %f10, %f26, %f26 + fxor %f12, %f28, %f28 + fxor %f14, %f30, %f30 + ldda [%o4] %asi, %f48 + fxor %f16, %f32, %f32 + fxor %f18, %f34, %f34 + fxor %f20, %f36, %f36 + fxor %f22, %f38, %f38 + fxor %f24, %f40, %f40 + fxor %f26, %f42, %f42 + fxor %f28, %f44, %f44 + fxor %f30, %f46, %f46 + membar #Sync + fxor %f32, %f48, %f48 + fxor %f34, %f50, %f50 + fxor %f36, %f52, %f52 + fxor %f38, %f54, %f54 + fxor %f40, %f56, %f56 + fxor %f42, %f58, %f58 + fxor %f44, %f60, %f60 + fxor %f46, %f62, %f62 + stda %f48, [%o1] %asi + membar #Sync|#StoreStore|#StoreLoad + wr %g1, %g0, %asi + retl + wr %g0, 0, %fprs + .size xor_vis_4, .-xor_vis_4 + + + .globl xor_vis_5 + .type xor_vis_5,@function +xor_vis_5: + mov %o5, %g5 + rd %fprs, %o5 + andcc %o5, FPRS_FEF|FPRS_DU, %g0 + be,pt %icc, 0f + sethi %hi(VISenter), %g1 + jmpl %g1 + %lo(VISenter), %g7 + add %g7, 8, %g7 +0: wr %g0, FPRS_FEF, %fprs + mov %g5, %o5 + rd %asi, %g1 + wr %g0, ASI_BLK_P, %asi + membar #LoadStore|#StoreLoad|#StoreStore + sub %o0, 64, %o0 + ldda [%o1] %asi, %f0 + ldda [%o2] %asi, %f16 + +5: ldda [%o3] %asi, %f32 + fxor %f0, %f16, %f48 + fxor %f2, %f18, %f50 + add %o1, 64, %o1 + fxor %f4, %f20, %f52 + fxor %f6, %f22, %f54 + add %o2, 64, %o2 + fxor %f8, %f24, %f56 + fxor %f10, %f26, %f58 + fxor %f12, %f28, %f60 + fxor %f14, %f30, %f62 + ldda [%o4] %asi, %f16 + fxor %f48, %f32, %f48 + fxor %f50, %f34, %f50 + fxor %f52, %f36, %f52 + fxor %f54, %f38, %f54 + add %o3, 64, %o3 + fxor %f56, %f40, %f56 + fxor %f58, %f42, %f58 + fxor %f60, %f44, %f60 + fxor %f62, %f46, %f62 + ldda [%o5] %asi, %f32 + fxor %f48, %f16, %f48 + fxor %f50, %f18, %f50 + add %o4, 64, %o4 + fxor %f52, %f20, %f52 + fxor %f54, %f22, %f54 + add %o5, 64, %o5 + fxor %f56, %f24, %f56 + fxor %f58, %f26, %f58 + fxor %f60, %f28, %f60 + fxor %f62, %f30, %f62 + ldda [%o1] %asi, %f0 + fxor %f48, %f32, %f48 + fxor %f50, %f34, %f50 + fxor %f52, %f36, %f52 + fxor %f54, %f38, %f54 + fxor %f56, %f40, %f56 + fxor %f58, %f42, %f58 + subcc %o0, 64, %o0 + fxor %f60, %f44, %f60 + fxor %f62, %f46, %f62 + stda %f48, [%o1 - 64] %asi + bne,pt %xcc, 5b + ldda [%o2] %asi, %f16 + + ldda [%o3] %asi, %f32 + fxor %f0, %f16, %f48 + fxor %f2, %f18, %f50 + fxor %f4, %f20, %f52 + fxor %f6, %f22, %f54 + fxor %f8, %f24, %f56 + fxor %f10, %f26, %f58 + fxor %f12, %f28, %f60 + fxor %f14, %f30, %f62 + ldda [%o4] %asi, %f16 + fxor %f48, %f32, %f48 + fxor %f50, %f34, %f50 + fxor %f52, %f36, %f52 + fxor %f54, %f38, %f54 + fxor %f56, %f40, %f56 + fxor %f58, %f42, %f58 + fxor %f60, %f44, %f60 + fxor %f62, %f46, %f62 + ldda [%o5] %asi, %f32 + fxor %f48, %f16, %f48 + fxor %f50, %f18, %f50 + fxor %f52, %f20, %f52 + fxor %f54, %f22, %f54 + fxor %f56, %f24, %f56 + fxor %f58, %f26, %f58 + fxor %f60, %f28, %f60 + fxor %f62, %f30, %f62 + membar #Sync + fxor %f48, %f32, %f48 + fxor %f50, %f34, %f50 + fxor %f52, %f36, %f52 + fxor %f54, %f38, %f54 + fxor %f56, %f40, %f56 + fxor %f58, %f42, %f58 + fxor %f60, %f44, %f60 + fxor %f62, %f46, %f62 + stda %f48, [%o1] %asi + membar #Sync|#StoreStore|#StoreLoad + wr %g1, %g0, %asi + retl + wr %g0, 0, %fprs + .size xor_vis_5, .-xor_vis_5 diff --git a/include/asm-sparc64/xor.h b/include/asm-sparc64/xor.h index 9ecc98f667d0..8b3a7e4b6062 100644 --- a/include/asm-sparc64/xor.h +++ b/include/asm-sparc64/xor.h @@ -16,15 +16,6 @@ * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ -/* - * Requirements: - * !(((long)dest | (long)sourceN) & (64 - 1)) && - * !(len & 127) && len >= 256 - * - * It is done in pure assembly, as otherwise gcc makes it a non-leaf - * function, which is not what we want. - */ - #include #include @@ -36,356 +27,7 @@ extern void xor_vis_4(unsigned long, unsigned long *, unsigned long *, extern void xor_vis_5(unsigned long, unsigned long *, unsigned long *, unsigned long *, unsigned long *, unsigned long *); -#define _S(x) __S(x) -#define __S(x) #x -#define DEF(x) __asm__(#x " = " _S(x)) - -DEF(FPRS_FEF); -DEF(FPRS_DU); -DEF(ASI_BLK_P); - -/* ??? We set and use %asi instead of using ASI_BLK_P directly because gas - currently does not accept symbolic constants for the ASI specifier. */ - -__asm__ (" - .text - .globl xor_vis_2 - .type xor_vis_2,@function -xor_vis_2: - rd %fprs, %o5 - andcc %o5, FPRS_FEF|FPRS_DU, %g0 - be,pt %icc, 0f - sethi %hi(VISenter), %g1 - jmpl %g1 + %lo(VISenter), %g7 - add %g7, 8, %g7 -0: wr %g0, FPRS_FEF, %fprs - rd %asi, %g1 - wr %g0, ASI_BLK_P, %asi - membar #LoadStore|#StoreLoad|#StoreStore - sub %o0, 128, %o0 - ldda [%o1] %asi, %f0 - ldda [%o2] %asi, %f16 - -2: ldda [%o1 + 64] %asi, %f32 - fxor %f0, %f16, %f16 - fxor %f2, %f18, %f18 - fxor %f4, %f20, %f20 - fxor %f6, %f22, %f22 - fxor %f8, %f24, %f24 - fxor %f10, %f26, %f26 - fxor %f12, %f28, %f28 - fxor %f14, %f30, %f30 - stda %f16, [%o1] %asi - ldda [%o2 + 64] %asi, %f48 - ldda [%o1 + 128] %asi, %f0 - fxor %f32, %f48, %f48 - fxor %f34, %f50, %f50 - add %o1, 128, %o1 - fxor %f36, %f52, %f52 - add %o2, 128, %o2 - fxor %f38, %f54, %f54 - subcc %o0, 128, %o0 - fxor %f40, %f56, %f56 - fxor %f42, %f58, %f58 - fxor %f44, %f60, %f60 - fxor %f46, %f62, %f62 - stda %f48, [%o1 - 64] %asi - bne,pt %xcc, 2b - ldda [%o2] %asi, %f16 - - ldda [%o1 + 64] %asi, %f32 - fxor %f0, %f16, %f16 - fxor %f2, %f18, %f18 - fxor %f4, %f20, %f20 - fxor %f6, %f22, %f22 - fxor %f8, %f24, %f24 - fxor %f10, %f26, %f26 - fxor %f12, %f28, %f28 - fxor %f14, %f30, %f30 - stda %f16, [%o1] %asi - ldda [%o2 + 64] %asi, %f48 - membar #Sync - fxor %f32, %f48, %f48 - fxor %f34, %f50, %f50 - fxor %f36, %f52, %f52 - fxor %f38, %f54, %f54 - fxor %f40, %f56, %f56 - fxor %f42, %f58, %f58 - fxor %f44, %f60, %f60 - fxor %f46, %f62, %f62 - stda %f48, [%o1 + 64] %asi - membar #Sync|#StoreStore|#StoreLoad - wr %g1, %g0, %asi - retl - wr %g0, 0, %fprs - .size xor_vis_2, .-xor_vis_2 - - - .globl xor_vis_3 - .type xor_vis_3,@function -xor_vis_3: - rd %fprs, %o5 - andcc %o5, FPRS_FEF|FPRS_DU, %g0 - be,pt %icc, 0f - sethi %hi(VISenter), %g1 - jmpl %g1 + %lo(VISenter), %g7 - add %g7, 8, %g7 -0: wr %g0, FPRS_FEF, %fprs - rd %asi, %g1 - wr %g0, ASI_BLK_P, %asi - membar #LoadStore|#StoreLoad|#StoreStore - sub %o0, 64, %o0 - ldda [%o1] %asi, %f0 - ldda [%o2] %asi, %f16 - -3: ldda [%o3] %asi, %f32 - fxor %f0, %f16, %f48 - fxor %f2, %f18, %f50 - add %o1, 64, %o1 - fxor %f4, %f20, %f52 - fxor %f6, %f22, %f54 - add %o2, 64, %o2 - fxor %f8, %f24, %f56 - fxor %f10, %f26, %f58 - fxor %f12, %f28, %f60 - fxor %f14, %f30, %f62 - ldda [%o1] %asi, %f0 - fxor %f48, %f32, %f48 - fxor %f50, %f34, %f50 - fxor %f52, %f36, %f52 - fxor %f54, %f38, %f54 - add %o3, 64, %o3 - fxor %f56, %f40, %f56 - fxor %f58, %f42, %f58 - subcc %o0, 64, %o0 - fxor %f60, %f44, %f60 - fxor %f62, %f46, %f62 - stda %f48, [%o1 - 64] %asi - bne,pt %xcc, 3b - ldda [%o2] %asi, %f16 - - ldda [%o3] %asi, %f32 - fxor %f0, %f16, %f48 - fxor %f2, %f18, %f50 - fxor %f4, %f20, %f52 - fxor %f6, %f22, %f54 - fxor %f8, %f24, %f56 - fxor %f10, %f26, %f58 - fxor %f12, %f28, %f60 - fxor %f14, %f30, %f62 - membar #Sync - fxor %f48, %f32, %f48 - fxor %f50, %f34, %f50 - fxor %f52, %f36, %f52 - fxor %f54, %f38, %f54 - fxor %f56, %f40, %f56 - fxor %f58, %f42, %f58 - fxor %f60, %f44, %f60 - fxor %f62, %f46, %f62 - stda %f48, [%o1] %asi - membar #Sync|#StoreStore|#StoreLoad - wr %g1, %g0, %asi - retl - wr %g0, 0, %fprs - .size xor_vis_3, .-xor_vis_3 - - - .globl xor_vis_4 - .type xor_vis_4,@function -xor_vis_4: - rd %fprs, %o5 - andcc %o5, FPRS_FEF|FPRS_DU, %g0 - be,pt %icc, 0f - sethi %hi(VISenter), %g1 - jmpl %g1 + %lo(VISenter), %g7 - add %g7, 8, %g7 -0: wr %g0, FPRS_FEF, %fprs - rd %asi, %g1 - wr %g0, ASI_BLK_P, %asi - membar #LoadStore|#StoreLoad|#StoreStore - sub %o0, 64, %o0 - ldda [%o1] %asi, %f0 - ldda [%o2] %asi, %f16 - -4: ldda [%o3] %asi, %f32 - fxor %f0, %f16, %f16 - fxor %f2, %f18, %f18 - add %o1, 64, %o1 - fxor %f4, %f20, %f20 - fxor %f6, %f22, %f22 - add %o2, 64, %o2 - fxor %f8, %f24, %f24 - fxor %f10, %f26, %f26 - fxor %f12, %f28, %f28 - fxor %f14, %f30, %f30 - ldda [%o4] %asi, %f48 - fxor %f16, %f32, %f32 - fxor %f18, %f34, %f34 - fxor %f20, %f36, %f36 - fxor %f22, %f38, %f38 - add %o3, 64, %o3 - fxor %f24, %f40, %f40 - fxor %f26, %f42, %f42 - fxor %f28, %f44, %f44 - fxor %f30, %f46, %f46 - ldda [%o1] %asi, %f0 - fxor %f32, %f48, %f48 - fxor %f34, %f50, %f50 - fxor %f36, %f52, %f52 - add %o4, 64, %o4 - fxor %f38, %f54, %f54 - fxor %f40, %f56, %f56 - fxor %f42, %f58, %f58 - subcc %o0, 64, %o0 - fxor %f44, %f60, %f60 - fxor %f46, %f62, %f62 - stda %f48, [%o1 - 64] %asi - bne,pt %xcc, 4b - ldda [%o2] %asi, %f16 - - ldda [%o3] %asi, %f32 - fxor %f0, %f16, %f16 - fxor %f2, %f18, %f18 - fxor %f4, %f20, %f20 - fxor %f6, %f22, %f22 - fxor %f8, %f24, %f24 - fxor %f10, %f26, %f26 - fxor %f12, %f28, %f28 - fxor %f14, %f30, %f30 - ldda [%o4] %asi, %f48 - fxor %f16, %f32, %f32 - fxor %f18, %f34, %f34 - fxor %f20, %f36, %f36 - fxor %f22, %f38, %f38 - fxor %f24, %f40, %f40 - fxor %f26, %f42, %f42 - fxor %f28, %f44, %f44 - fxor %f30, %f46, %f46 - membar #Sync - fxor %f32, %f48, %f48 - fxor %f34, %f50, %f50 - fxor %f36, %f52, %f52 - fxor %f38, %f54, %f54 - fxor %f40, %f56, %f56 - fxor %f42, %f58, %f58 - fxor %f44, %f60, %f60 - fxor %f46, %f62, %f62 - stda %f48, [%o1] %asi - membar #Sync|#StoreStore|#StoreLoad - wr %g1, %g0, %asi - retl - wr %g0, 0, %fprs - .size xor_vis_4, .-xor_vis_4 - - - .globl xor_vis_5 - .type xor_vis_5,@function -xor_vis_5: - mov %o5, %g5 - rd %fprs, %o5 - andcc %o5, FPRS_FEF|FPRS_DU, %g0 - be,pt %icc, 0f - sethi %hi(VISenter), %g1 - jmpl %g1 + %lo(VISenter), %g7 - add %g7, 8, %g7 -0: wr %g0, FPRS_FEF, %fprs - mov %g5, %o5 - rd %asi, %g1 - wr %g0, ASI_BLK_P, %asi - membar #LoadStore|#StoreLoad|#StoreStore - sub %o0, 64, %o0 - ldda [%o1] %asi, %f0 - ldda [%o2] %asi, %f16 - -5: ldda [%o3] %asi, %f32 - fxor %f0, %f16, %f48 - fxor %f2, %f18, %f50 - add %o1, 64, %o1 - fxor %f4, %f20, %f52 - fxor %f6, %f22, %f54 - add %o2, 64, %o2 - fxor %f8, %f24, %f56 - fxor %f10, %f26, %f58 - fxor %f12, %f28, %f60 - fxor %f14, %f30, %f62 - ldda [%o4] %asi, %f16 - fxor %f48, %f32, %f48 - fxor %f50, %f34, %f50 - fxor %f52, %f36, %f52 - fxor %f54, %f38, %f54 - add %o3, 64, %o3 - fxor %f56, %f40, %f56 - fxor %f58, %f42, %f58 - fxor %f60, %f44, %f60 - fxor %f62, %f46, %f62 - ldda [%o5] %asi, %f32 - fxor %f48, %f16, %f48 - fxor %f50, %f18, %f50 - add %o4, 64, %o4 - fxor %f52, %f20, %f52 - fxor %f54, %f22, %f54 - add %o5, 64, %o5 - fxor %f56, %f24, %f56 - fxor %f58, %f26, %f58 - fxor %f60, %f28, %f60 - fxor %f62, %f30, %f62 - ldda [%o1] %asi, %f0 - fxor %f48, %f32, %f48 - fxor %f50, %f34, %f50 - fxor %f52, %f36, %f52 - fxor %f54, %f38, %f54 - fxor %f56, %f40, %f56 - fxor %f58, %f42, %f58 - subcc %o0, 64, %o0 - fxor %f60, %f44, %f60 - fxor %f62, %f46, %f62 - stda %f48, [%o1 - 64] %asi - bne,pt %xcc, 5b - ldda [%o2] %asi, %f16 - - ldda [%o3] %asi, %f32 - fxor %f0, %f16, %f48 - fxor %f2, %f18, %f50 - fxor %f4, %f20, %f52 - fxor %f6, %f22, %f54 - fxor %f8, %f24, %f56 - fxor %f10, %f26, %f58 - fxor %f12, %f28, %f60 - fxor %f14, %f30, %f62 - ldda [%o4] %asi, %f16 - fxor %f48, %f32, %f48 - fxor %f50, %f34, %f50 - fxor %f52, %f36, %f52 - fxor %f54, %f38, %f54 - fxor %f56, %f40, %f56 - fxor %f58, %f42, %f58 - fxor %f60, %f44, %f60 - fxor %f62, %f46, %f62 - ldda [%o5] %asi, %f32 - fxor %f48, %f16, %f48 - fxor %f50, %f18, %f50 - fxor %f52, %f20, %f52 - fxor %f54, %f22, %f54 - fxor %f56, %f24, %f56 - fxor %f58, %f26, %f58 - fxor %f60, %f28, %f60 - fxor %f62, %f30, %f62 - membar #Sync - fxor %f48, %f32, %f48 - fxor %f50, %f34, %f50 - fxor %f52, %f36, %f52 - fxor %f54, %f38, %f54 - fxor %f56, %f40, %f56 - fxor %f58, %f42, %f58 - fxor %f60, %f44, %f60 - fxor %f62, %f46, %f62 - stda %f48, [%o1] %asi - membar #Sync|#StoreStore|#StoreLoad - wr %g1, %g0, %asi - retl - wr %g0, 0, %fprs - .size xor_vis_5, .-xor_vis_5 -"); +/* XXX Ugh, write cheetah versions... -DaveM */ static struct xor_block_template xor_block_VIS = { .name = "VIS", -- cgit v1.2.3 From 4731c06b9e69a72adb36baa2332d4f443f560ef1 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 7 Jul 2003 11:37:57 -0700 Subject: [SPARC64]: Kill all irq_cpustat_t except __softirq_pending. --- include/asm-sparc64/hardirq.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/include/asm-sparc64/hardirq.h b/include/asm-sparc64/hardirq.h index 44a4e97f4793..97e43a160b3b 100644 --- a/include/asm-sparc64/hardirq.h +++ b/include/asm-sparc64/hardirq.h @@ -11,15 +11,9 @@ #include #include -/* entry.S is sensitive to the offsets of these fields */ -/* rtrap.S is sensitive to the size of this structure */ +/* rtrap.S is sensitive to the offsets of these fields */ typedef struct { unsigned int __softirq_pending; - unsigned int __unused_1; - unsigned int __unused_2; - unsigned int __unused_3; - unsigned int __syscall_count; - struct task_struct * __ksoftirqd_task; } ____cacheline_aligned irq_cpustat_t; #include /* Standard mappings for irq_cpustat_t above */ -- cgit v1.2.3 From 54e2e762f010209bb4fbbe0cb7d97e6dee8b4843 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 7 Jul 2003 12:15:57 -0700 Subject: [SPARC64]: Use kstat_this_cpu where possible. --- arch/sparc64/kernel/irq.c | 4 ++-- arch/sparc64/kernel/smp.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/sparc64/kernel/irq.c b/arch/sparc64/kernel/irq.c index 7902b35f01d1..c606b4721b59 100644 --- a/arch/sparc64/kernel/irq.c +++ b/arch/sparc64/kernel/irq.c @@ -733,7 +733,7 @@ void handler_irq(int irq, struct pt_regs *regs) #endif irq_enter(); - kstat_cpu(cpu).irqs[irq]++; + kstat_this_cpu.irqs[irq]++; /* Sliiiick... */ #ifndef CONFIG_SMP @@ -805,7 +805,7 @@ void sparc_floppy_irq(int irq, void *dev_cookie, struct pt_regs *regs) int cpu = smp_processor_id(); irq_enter(); - kstat_cpu(cpu).irqs[irq]++; + kstat_this_cpu.irqs[irq]++; *(irq_work(cpu, irq)) = 0; bucket = get_ino_in_irqaction(action) + ivector_table; diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c index a5504b899178..0d0ff4e20428 100644 --- a/arch/sparc64/kernel/smp.c +++ b/arch/sparc64/kernel/smp.c @@ -1060,7 +1060,7 @@ void smp_percpu_timer_interrupt(struct pt_regs *regs) irq_enter(); if (cpu == boot_cpu_id) { - kstat_cpu(cpu).irqs[0]++; + kstat_this_cpu.irqs[0]++; timer_tick_interrupt(regs); } -- cgit v1.2.3 From fec04c7623bc6f0b5d31b29d5d0cae1695a26511 Mon Sep 17 00:00:00 2001 From: Hirofumi Ogawa Date: Mon, 7 Jul 2003 19:55:47 -0700 Subject: [PATCH] FAT maintainership --- MAINTAINERS | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 6590dd21b509..9f2880a7c299 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2103,11 +2103,10 @@ L: user-mode-linux-user@lists.sourceforge.net W: http://user-mode-linux.sourceforge.net S: Maintained -VFAT FILESYSTEM: -P: Gordon Chaffee -M: chaffee@cs.berkeley.edu +FAT/VFAT/MSDOS FILESYSTEM: +P: OGAWA Hirofumi +M: hirofumi@mail.parknet.co.jp L: linux-kernel@vger.kernel.org -W: http://bmrc.berkeley.edu/people/chaffee S: Maintained VIA 82Cxxx AUDIO DRIVER -- cgit v1.2.3 From a1bafab5cbe2ce874fe5b0e012cc9ab74a672b39 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 7 Jul 2003 22:15:27 -0700 Subject: Avoid deadlocking on thread shutdown after a vfork. --- fs/exec.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/fs/exec.c b/fs/exec.c index 3d9730f93e08..36a11c0eddd1 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1256,10 +1256,21 @@ void format_corename(char *corename, const char *pattern, long signr) static void zap_threads (struct mm_struct *mm) { struct task_struct *g, *p; + struct task_struct *tsk = current; + struct completion *vfork_done = tsk->vfork_done; + + /* + * Make sure nobody is waiting for us to release the VM, + * otherwise we can deadlock when we wait on each other + */ + if (vfork_done) { + tsk->vfork_done = NULL; + complete(vfork_done); + } read_lock(&tasklist_lock); do_each_thread(g,p) - if (mm == p->mm && p != current) { + if (mm == p->mm && p != tsk) { force_sig_specific(SIGKILL, p); mm->core_waiters++; } -- cgit v1.2.3